Skip to content

Commit

Permalink
EP13 product/metadata type resource updates (#1536)
Browse files Browse the repository at this point in the history
* New MDT get_with_fields() method.

* Product resource cleanup (including methods moving in and out) see EP13.

* Get tests passing as they are.

* Porting tests hopefully keeping coverage.

* More test porting and migrating internal usages of deprecated/moved methods.

* What's new

* What's new - update PR number

* Minor change to whats_new - mostly to kick off checks again.

* Lintage.

* tweak list_products to consider load_hints (#1535)

* tweak list_products to check for crs and resolution in load_hints

* update whats_new

* get osgeo/gdal from ghcr

---------

Authored-by: Ariana Barzinpour <ariana.barzinpour@ga.gov.au>

* Fix query API test fixture.

* Fix virtual product test fixture.

* Nice cleanup - thanks @Ariana-B

---------

Co-authored-by: Ariana-B <40238244+ariana-b@users.noreply.github.com>
  • Loading branch information
SpacemanPaul and Ariana-B committed Jan 18, 2024
1 parent 2671f2c commit bb42559
Show file tree
Hide file tree
Showing 24 changed files with 137 additions and 141 deletions.
14 changes: 11 additions & 3 deletions datacube/api/core.py
Expand Up @@ -143,6 +143,14 @@ def list_products(self, with_pandas=True, dataset_count=False):
:return: A table or list of every product in the datacube.
:rtype: pandas.DataFrame or list(dict)
"""
def _get_non_default(product, col):
load_hints = product.load_hints()
if load_hints:
if col == 'crs':
return load_hints.get('output_crs', None)
return load_hints.get(col, None)
return getattr(product.grid_spec, col, None)

# Read properties from each datacube product
cols = [
'name',
Expand All @@ -155,10 +163,10 @@ def list_products(self, with_pandas=True, dataset_count=False):
getattr(pr, col, None)
# if 'default_crs' and 'default_resolution' are not None
# return 'default_crs' and 'default_resolution'
if getattr(pr, col, None) and 'default' not in col
# else try 'grid_spec.crs' and 'grid_spec.resolution'
if getattr(pr, col, None) or 'default' not in col
# else get crs and resolution from load_hints or grid_spec
# as per output_geobox() handling logic
else getattr(pr.grid_spec, col.replace('default_', ''), None)
else _get_non_default(pr, col.replace('default_', ''))
for col in cols]
for pr in self.index.products.get_all()]

Expand Down
2 changes: 1 addition & 1 deletion datacube/api/query.py
Expand Up @@ -118,7 +118,7 @@ def __init__(self, index=None, product=None, geopolygon=None, like=None, **searc

remaining_keys -= known_dim_keys

unknown_keys = remaining_keys - set(index.datasets.get_field_names())
unknown_keys = remaining_keys - set(index.products.get_field_names())
# TODO: What about keys source filters, and what if the keys don't match up with this product...
if unknown_keys:
raise LookupError('Unknown arguments: ', unknown_keys)
Expand Down
55 changes: 52 additions & 3 deletions datacube/index/abstract.py
Expand Up @@ -17,8 +17,8 @@
from urllib.parse import urlparse, ParseResult
from uuid import UUID
from datetime import timedelta

from deprecat import deprecat

from datacube.cfg.api import ODCEnvironment, ODCOptionHandler
from datacube.index.exceptions import TransactionException
from datacube.index.fields import Field
Expand Down Expand Up @@ -295,6 +295,17 @@ def update_document(self,
"""
return self.update(self.from_doc(definition), allow_unsafe_updates=allow_unsafe_updates)

def get_with_fields(self, field_names: Iterable[str]) -> Iterable[MetadataType]:
"""
Return all metadata types that have all of the named search fields.
:param field_names: Iterable of search field names
:return: Iterable of matching metadata types.
"""
for mdt in self.get_all():
if all(field in mdt.dataset_fields for field in field_names):
yield mdt

def get(self, id_: int) -> Optional[MetadataType]:
"""
Fetch metadata type by id.
Expand Down Expand Up @@ -671,14 +682,47 @@ def get_by_name_unsafe(self, name: str) -> Product:
:raises KeyError: if not found
"""

@abstractmethod
def get_with_fields(self, field_names: Iterable[str]) -> Iterable[Product]:
"""
Return products that have all of the given fields.
:param field_names: names of fields that returned products must have
:returns: Matching product models
"""
return self.get_with_types(self.metadata_type_resource.get_with_fields(field_names))

def get_with_types(self, types: Iterable[MetadataType]) -> Iterable[Product]:
"""
Return all products for given metadata types
:param types: An interable of MetadataType models
:return: An iterable of Product models
"""
mdts = set(mdt.name for mdt in types)
for prod in self.get_all():
if prod.metadata_type.name in mdts:
yield prod

def get_field_names(self, product: Optional[str | Product] = None) -> Iterable[str]:
"""
Get the list of possible search fields for a Product (or all products)
:param product: Name of product, a Product object, or None for all products
:return: All possible search field names
"""
if product is None:
prods = self.get_all()
else:
if isinstance(product, str):
product = self.get_by_name(product)
if product is None:
prods = []
else:
prods = [product]
out = set()
for prod in prods:
out.update(prod.metadata_type.dataset_fields)
return out

def search(self, **query: QueryField) -> Iterator[Product]:
"""
Expand Down Expand Up @@ -1287,14 +1331,19 @@ def get_all_dataset_ids(self, archived: bool) -> Iterable[UUID]:
:return: Iterable of dataset ids
"""

@abstractmethod
@deprecat(
reason="This method has been moved to the Product resource (i.e. dc.index.products.get_field_names)",
version="1.9.0",
category=ODC2DeprecationWarning
)
def get_field_names(self, product_name: Optional[str] = None) -> Iterable[str]:
"""
Get the list of possible search fields for a Product (or all products)
:param product_name: Name of product, or None for all products
:return: All possible search field names
"""
return self._index.products.get_field_names(product_name)

@abstractmethod
def get_locations(self, id_: DSID) -> Iterable[str]:
Expand Down
14 changes: 0 additions & 14 deletions datacube/index/memory/_datasets.py
Expand Up @@ -296,20 +296,6 @@ def get_all_dataset_ids(self, archived: bool) -> Iterable[UUID]:
else:
return (id_ for id_ in self.active_by_id.keys())

def get_field_names(self, product_name=None) -> Iterable[str]:
if product_name is None:
prods = self._index.products.get_all()
else:
prod = self._index.products.get_by_name(product_name)
if prod:
prods = [prod]
else:
prods = []
out: Set[str] = set()
for prod in prods:
out.update(prod.metadata_type.dataset_fields)
return out

def get_locations(self, id_: DSID) -> Iterable[str]:
uuid = dsid_to_uuid(id_)
return (s for s in self.locations[uuid])
Expand Down
8 changes: 0 additions & 8 deletions datacube/index/memory/_products.py
Expand Up @@ -119,14 +119,6 @@ def get_unsafe(self, id_: int) -> Product:
def get_by_name_unsafe(self, name: str) -> Product:
return self.clone(self.by_name[name])

def get_with_fields(self, field_names: Iterable[str]) -> Iterable[Product]:
for prod in self.get_all():
for name in field_names:
if name not in prod.metadata_type.dataset_fields:
break
else:
yield prod

def search_robust(self, **query: QueryField) -> Iterator[Tuple[Product, Mapping[str, QueryField]]]:
def listify(v):
if isinstance(v, tuple):
Expand Down
7 changes: 2 additions & 5 deletions datacube/index/null/_datasets.py
Expand Up @@ -9,8 +9,8 @@


class DatasetResource(AbstractDatasetResource):
def __init__(self, product_resource):
self.types = product_resource
def __init__(self, index):
super().__init__(index)

def get(self, id_: DSID, include_sources: bool = False, include_deriveds: bool = False, max_depth: int = 0):
return None
Expand Down Expand Up @@ -53,9 +53,6 @@ def purge(self, ids: Iterable[DSID]):
def get_all_dataset_ids(self, archived: bool):
return []

def get_field_names(self, product_name=None):
return []

def get_locations(self, id_):
return []

Expand Down
3 changes: 0 additions & 3 deletions datacube/index/null/_metadata_types.py
Expand Up @@ -8,9 +8,6 @@


class MetadataTypeResource(AbstractMetadataTypeResource):
def __init__(self):
pass

def from_doc(self, definition):
raise NotImplementedError

Expand Down
7 changes: 2 additions & 5 deletions datacube/index/null/_products.py
Expand Up @@ -13,8 +13,8 @@


class ProductResource(AbstractProductResource):
def __init__(self, metadata_type_resource):
self.metadata_type_resource = metadata_type_resource
def __init__(self, mdtr):
self.metadata_type_resource = mdtr

def add(self, product, allow_table_lock=False):
raise NotImplementedError()
Expand All @@ -31,9 +31,6 @@ def get_unsafe(self, id_):
def get_by_name_unsafe(self, name):
raise KeyError(name)

def get_with_fields(self, field_names):
return []

def search_robust(self, **query):
return []

Expand Down
4 changes: 2 additions & 2 deletions datacube/index/null/index.py
Expand Up @@ -36,9 +36,9 @@ def __init__(self, env: ODCEnvironment) -> None:
self._env = env
self._users = UserResource()
self._metadata_types = MetadataTypeResource()
self._products = ProductResource(self.metadata_types)
self._products = ProductResource(self._metadata_types)
self._lineage = NoLineageResource(self)
self._datasets = DatasetResource(self.products)
self._datasets = DatasetResource(self)

@property
def environment(self) -> ODCEnvironment:
Expand Down
29 changes: 0 additions & 29 deletions datacube/index/postgis/_datasets.py
Expand Up @@ -409,23 +409,6 @@ def get_all_dataset_ids(self, archived: bool):
with self._db_connection(transaction=True) as transaction:
return [dsid[0] for dsid in transaction.all_dataset_ids(archived)]

def get_field_names(self, product_name=None):
"""
Get the list of possible search fields for a Product
:param str product_name:
:rtype: set[str]
"""
if product_name is None:
products = self.products.get_all()
else:
products = [self.products.get_by_name(product_name)]

out = set()
for prod_ in products:
out.update(prod_.metadata_type.dataset_fields)
return out

def get_locations(self, id_):
"""
Get the list of storage locations for the given dataset id
Expand Down Expand Up @@ -659,18 +642,6 @@ def count_product_through_time(self, period, **query):
"""
return next(self._do_time_count(period, query, ensure_single=True))[1]

def _get_products(self, q):
products = set()
if 'product' in q.keys():
products.add(self.products.get_by_name(q['product']))
else:
# Otherwise search any metadata type that has all the given search fields.
products = self.products.get_with_fields(tuple(q.keys()))
if not products:
raise ValueError('No type of dataset has fields: {}'.format(q.keys()))

return products

def _get_product_queries(self, query):
for product, q in self.products.search_robust(**query):
q['product_id'] = product.id
Expand Down
14 changes: 0 additions & 14 deletions datacube/index/postgis/_products.py
Expand Up @@ -253,20 +253,6 @@ def get_by_name_unsafe(self, name): # type: ignore
raise KeyError('"%s" is not a valid Product name' % name)
return self._make(result)

def get_with_fields(self, field_names):
"""
Return dataset types that have all the given fields.
:param tuple[str] field_names:
:rtype: __generator[Product]
"""
for type_ in self.get_all():
for name in field_names:
if name not in type_.metadata_type.dataset_fields:
break
else:
yield type_

def search_robust(self, **query):
"""
Return dataset types that match match-able fields and dict of remaining un-matchable fields.
Expand Down
17 changes: 0 additions & 17 deletions datacube/index/postgres/_datasets.py
Expand Up @@ -391,23 +391,6 @@ def get_all_dataset_ids(self, archived: bool):
with self._db_connection(transaction=True) as transaction:
return [dsid[0] for dsid in transaction.all_dataset_ids(archived)]

def get_field_names(self, product_name=None):
"""
Get the list of possible search fields for a Product
:param str product_name:
:rtype: set[str]
"""
if product_name is None:
types = self.types.get_all()
else:
types = [self.types.get_by_name(product_name)]

out = set()
for type_ in types:
out.update(type_.metadata_type.dataset_fields)
return out

def get_locations(self, id_):
"""
Get the list of storage locations for the given dataset id
Expand Down
14 changes: 0 additions & 14 deletions datacube/index/postgres/_products.py
Expand Up @@ -244,20 +244,6 @@ def get_by_name_unsafe(self, name): # type: ignore
raise KeyError('"%s" is not a valid Product name' % name)
return self._make(result)

def get_with_fields(self, field_names):
"""
Return dataset types that have all the given fields.
:param tuple[str] field_names:
:rtype: __generator[Product]
"""
for type_ in self.get_all():
for name in field_names:
if name not in type_.metadata_type.dataset_fields:
break
else:
yield type_

def search_robust(self, **query):
"""
Return dataset types that match match-able fields and dict of remaining un-matchable fields.
Expand Down
9 changes: 9 additions & 0 deletions datacube/model/__init__.py
Expand Up @@ -411,6 +411,15 @@ def validate_eo3(cls, doc):
cls.validate(doc)
validate_eo3_compatible_type(doc)

def __eq__(self, other: Any) -> bool:
if self is other:
return True

if self.__class__ != other.__class__:
return False

return self.name == other.name

def __str__(self) -> str:
return "MetadataType(name={name!r}, id_={id!r})".format(id=self.id, name=self.name)

Expand Down
2 changes: 1 addition & 1 deletion datacube/scripts/search_tool.py
Expand Up @@ -94,7 +94,7 @@ def datasets(ctx, index, expressions):
Search available Datasets
"""
ctx.obj['write_results'](
sorted(index.datasets.get_field_names()),
sorted(index.products.get_field_names()),
index.datasets.search_summaries(**expressions)
)

Expand Down
2 changes: 1 addition & 1 deletion docker/Dockerfile
Expand Up @@ -4,7 +4,7 @@
## Copyright (c) 2015-2020 ODC Contributors
## SPDX-License-Identifier: Apache-2.0
##
FROM osgeo/gdal:ubuntu-small-latest
FROM ghcr.io/osgeo/gdal:ubuntu-small-latest
ARG V_PG=14
ARG V_PGIS=14-postgis-3

Expand Down
3 changes: 3 additions & 0 deletions docs/about/whats_new.rst
Expand Up @@ -28,6 +28,7 @@ v1.9.next
- Alembic migrations for postgis driver (:pull:`1520`)
- EP08 lineage extensions/changes to datasets.get(). (:pull:`1530`)
- EP13 API changes to Index and IndexDriver. (:pull:`1534`)
- EP13 API changes to metadata and product resources. (:pull:`1536`)


v1.8.next
Expand All @@ -37,6 +38,8 @@ v1.8.next
- Warn if non-eo3 dataset has eo3 metadata type (:pull:`1523`)
- Update pandas version in docker image to be consistent with conda environment and default to stdlib
timezone instead of pytz when converting timestamps; automatically update copyright years (:pull:`1527`)
- Update github-Dockerhub credential-passing mechanism. (:pull:`1528`)
- Tweak ``list_products`` logic for getting crs and resolution values (:pull:`1535`)

v1.8.17 (8th November 2023)
===========================
Expand Down

0 comments on commit bb42559

Please sign in to comment.