From 9585230bf216e481e522ab2465b664e8d0729e94 Mon Sep 17 00:00:00 2001 From: Paul Haesler Date: Tue, 6 Feb 2024 07:22:29 +1100 Subject: [PATCH] Spatial index by product (#1539) * Implement spatial_index(product) as per EP13 - should have been in last PR. * Update whats_new.rst * Trivial text change to retrigger workflows after changing base branch. * Lintage. sigh. * Split temporal and spatial extent methods across dataset and product resources. * Close coverage gaps. * Base class docstring copypasta. * Remove separate metadata resource from product resource - we have whole index. * Fix null driver method signatures. * More docstring copypasta. * lintage --- datacube/drivers/postgis/_api.py | 15 +++-- datacube/drivers/postgres/_api.py | 25 ++++++++ datacube/index/abstract.py | 57 +++++++++++++------ datacube/index/memory/_datasets.py | 19 +------ datacube/index/memory/_products.py | 23 +++++--- datacube/index/memory/index.py | 5 +- datacube/index/null/_datasets.py | 15 +---- datacube/index/null/_products.py | 10 +++- datacube/index/null/index.py | 2 +- datacube/index/postgis/_datasets.py | 32 +++-------- datacube/index/postgis/_products.py | 41 +++++++++---- datacube/index/postgres/_datasets.py | 54 ++---------------- datacube/index/postgres/_products.py | 40 +++++++++---- docs/about/whats_new.rst | 1 + integration_tests/index/test_memory_index.py | 12 ++-- integration_tests/index/test_null_index.py | 9 +-- integration_tests/index/test_postgis_index.py | 38 +++++++++---- integration_tests/index/test_search_legacy.py | 2 + .../test_index_datasets_search.py | 15 ++--- 19 files changed, 220 insertions(+), 195 deletions(-) diff --git a/datacube/drivers/postgis/_api.py b/datacube/drivers/postgis/_api.py index 1e4636760..3bfa26306 100644 --- a/datacube/drivers/postgis/_api.py +++ b/datacube/drivers/postgis/_api.py @@ -396,15 +396,14 @@ def spatial_extent(self, ids, crs): SpatialIndex = self._db.spatial_index(crs) # noqa: N806 if SpatialIndex is None: return None - result = self._connection.execute( - select( - func.ST_AsGeoJSON(func.ST_Union(SpatialIndex.extent)) - ).select_from( - SpatialIndex - ).where( - SpatialIndex.dataset_ref.in_(ids) - ) + query = select( + func.ST_AsGeoJSON(func.ST_Union(SpatialIndex.extent)) + ).select_from( + SpatialIndex + ).where( + SpatialIndex.dataset_ref.in_(ids) ) + result = self._connection.execute(query) for r in result: extent_json = r[0] if extent_json is None: diff --git a/datacube/drivers/postgres/_api.py b/datacube/drivers/postgres/_api.py index c1236637b..17dd830fd 100644 --- a/datacube/drivers/postgres/_api.py +++ b/datacube/drivers/postgres/_api.py @@ -13,6 +13,7 @@ Persistence API implementation for postgres. """ +import datetime import logging import uuid # noqa: F401 from typing import Iterable, Tuple @@ -1151,6 +1152,30 @@ def get_all_metadata_defs(self): ).fetchall() ] + def temporal_extent_by_product(self, product_id: int, + min_time_offset, max_time_offset) -> tuple[datetime.datetime, datetime.datetime]: + time_min = DateDocField('aquisition_time_min', + 'Min of time when dataset was acquired', + DATASET.c.metadata, + False, # is it indexed + offset=min_time_offset, + selection='least') + + time_max = DateDocField('aquisition_time_max', + 'Max of time when dataset was acquired', + DATASET.c.metadata, + False, # is it indexed + offset=max_time_offset, + selection='greatest') + + return self._connection.execute( + select( + func.min(time_min.alchemy_expression), func.max(time_max.alchemy_expression) + ).where( + DATASET.c.dataset_type_ref == product_id + ) + ).first() + def get_locations(self, dataset_id): return [ record[0] diff --git a/datacube/index/abstract.py b/datacube/index/abstract.py index afd7a6319..c2490b385 100644 --- a/datacube/index/abstract.py +++ b/datacube/index/abstract.py @@ -405,7 +405,8 @@ class and implement all abstract methods. (If a particular abstract method is not applicable for a particular implementation raise a NotImplementedError) """ - metadata_type_resource: AbstractMetadataTypeResource + def __init__(self, index: "AbstractIndex"): + self._index = index def from_doc(self, definition: Mapping[str, Any], metadata_type_cache: Optional[MutableMapping[str, MetadataType]] = None) -> Product: @@ -430,14 +431,14 @@ def from_doc(self, definition: Mapping[str, Any], if metadata_type_cache is not None and metadata_type in metadata_type_cache: metadata_type = metadata_type_cache[metadata_type] else: - metadata_type = self.metadata_type_resource.get_by_name(metadata_type) + metadata_type = self._index.metadata_types.get_by_name(metadata_type) if (metadata_type is not None and metadata_type_cache is not None and metadata_type.name not in metadata_type_cache): metadata_type_cache[metadata_type.name] = metadata_type else: # Otherwise they embedded a document, add it if needed: - metadata_type = self.metadata_type_resource.from_doc(metadata_type) + metadata_type = self._index.metadata_types.from_doc(metadata_type) definition = dict(definition) definition['metadata_type'] = metadata_type.name @@ -689,7 +690,7 @@ def get_with_fields(self, field_names: Iterable[str]) -> Iterable[Product]: :param field_names: names of fields that returned products must have :returns: Matching product models """ - return self.get_with_types(self.metadata_type_resource.get_with_fields(field_names)) + return self.get_with_types(self._index.metadata_types.get_with_fields(field_names)) def get_with_types(self, types: Iterable[MetadataType]) -> Iterable[Product]: """ @@ -779,6 +780,33 @@ def get_all_docs(self) -> Iterable[Mapping[str, Any]]: for prod in self.get_all(): yield prod.definition + @abstractmethod + def spatial_extent(self, product: str | Product, crs: CRS = CRS("EPSG:4326")) -> Optional[Geometry]: + """ + Return the combined spatial extent of the nominated product + + Uses spatial index. + + Returns None if no index for the CRS, or if no datasets for the product in the relevant spatial index, + or if the driver does not support the spatial index api. + + Result will not include extents of datasets that cannot be validly projected into the CRS. + + :param product: A Product or product name. (or None) + :param crs: A CRS (defaults to EPSG:4326) + :return: The combined spatial extents of the product. + """ + + @abstractmethod + def temporal_extent(self, product: str | Product) -> tuple[datetime.datetime, datetime.datetime]: + """ + Returns the minimum and maximum acquisition time of a product. + Raises KeyError if product has no datasets in the index + + :param product: Product or name of product + :return: minimum and maximum acquisition times + """ + # Non-strict Dataset ID representation DSID = Union[str, UUID] @@ -1696,24 +1724,18 @@ def search_eager(self, **query: QueryField) -> List[Dataset]: return list(self.search(**query)) # type: ignore[arg-type] # mypy isn't being very smart here :( @abstractmethod - def temporal_extent(self, - product: str | Product | None, - ids: Iterable[DSID] | None - ) -> tuple[datetime.datetime, datetime.datetime]: + def temporal_extent(self, ids: Iterable[DSID]) -> tuple[datetime.datetime, datetime.datetime]: """ - Returns the minimum and maximum acquisition time of a product or an iterable of dataset ids. + Returns the minimum and maximum acquisition time of an iterable of dataset ids. - Only one ids or products can be passed - the other should be None. Raises ValueError if - both or neither of ids and products is passed. Raises KeyError if no datasets in the index - match the input argument. + Raises KeyError if none of the datasets are in the index - :param product: Product or name of product :param ids: Iterable of dataset ids. :return: minimum and maximum acquisition times """ @deprecat( - reason="This method has been renamed 'temporal_extent'", + reason="This method has been moved to the Product Resource and renamed 'temporal_extent()'", version="1.9.0", category=ODC2DeprecationWarning ) @@ -1726,7 +1748,7 @@ def get_product_time_bounds(self, :param product: Product of name of product :return: minimum and maximum acquisition times """ - return self.temporal_extent(product=product) + return self._index.products.temporal_extent(product=product) @abstractmethod def search_returning_datasets_light(self, @@ -1762,11 +1784,12 @@ def search_returning_datasets_light(self, """ @abstractmethod - def spatial_extent(self, ids: Iterable[DSID], crs: CRS = CRS("EPSG:4326")) -> Optional[Geometry]: + def spatial_extent(self, ids: Iterable[DSID], crs: CRS = CRS("EPSG:4326")) -> Geometry | None: """ - Return the combined spatial extent of the nominated datasets. + Return the combined spatial extent of the nominated datasets Uses spatial index. + Returns None if no index for the CRS, or if no identified datasets are indexed in the relevant spatial index. Result will not include extents of datasets that cannot be validly projected into the CRS. diff --git a/datacube/index/memory/_datasets.py b/datacube/index/memory/_datasets.py index 2e27a2603..eb0bbd1a5 100755 --- a/datacube/index/memory/_datasets.py +++ b/datacube/index/memory/_datasets.py @@ -642,20 +642,10 @@ def make_summary(ds: Dataset) -> Mapping[str, Any]: for ds in self.search(**query): # type: ignore[arg-type] yield make_summary(ds) - def temporal_extent( - self, - product: str | Product | None = None, - ids: Iterable[DSID] | None = None - ) -> tuple[datetime.datetime, datetime.datetime]: - if product is None and ids is None: - raise ValueError("Must supply product or ids") - elif product is not None and ids is not None: - raise ValueError("Cannot supply both product and ids") - elif product is not None: - if isinstance(product, str): - product = self._index.products.get_by_name_unsafe(product) - ids = self.by_product.get(product.name, []) + def spatial_extent(self, ids, crs=None): + return None + def temporal_extent(self, ids: Iterable[DSID]) -> tuple[datetime.datetime, datetime.datetime]: min_time: Optional[datetime.datetime] = None max_time: Optional[datetime.datetime] = None for dsid in ids: @@ -727,9 +717,6 @@ def clone(self, orig: Dataset, for_save=False, lookup_locations=True) -> Dataset archived_time=None if for_save else orig.archived_time ) - def spatial_extent(self, ids, crs=None): - return None - # Lineage methods need to be implemented on the dataset resource as that is where the relevant indexes # currently live. def _get_all_lineage(self) -> Iterable[LineageRelation]: diff --git a/datacube/index/memory/_products.py b/datacube/index/memory/_products.py index 419a4c25c..78df07d7d 100644 --- a/datacube/index/memory/_products.py +++ b/datacube/index/memory/_products.py @@ -2,13 +2,13 @@ # # Copyright (c) 2015-2024 ODC Contributors # SPDX-License-Identifier: Apache-2.0 +import datetime import logging from typing import Iterable, Iterator, Mapping, Tuple, cast from datacube.index.fields import as_expression from datacube.index.abstract import AbstractProductResource, QueryField -from datacube.index.memory._metadata_types import MetadataTypeResource from datacube.model import Product from datacube.utils import changes, jsonify_document, _readable_offset from datacube.utils.changes import AllowPolicy, Change, Offset, check_doc_unchanged, get_doc_changes, classify_changes @@ -19,8 +19,8 @@ class ProductResource(AbstractProductResource): - def __init__(self, metadata_type_resource): - self.metadata_type_resource: MetadataTypeResource = metadata_type_resource + def __init__(self, index): + self._index = index self.by_id = {} self.by_name = {} self.next_id = 1 @@ -36,11 +36,11 @@ def add(self, product: Product, allow_table_lock: bool = False) -> Product: f'Metadata Type {product.name}' ) else: - mdt = self.metadata_type_resource.get_by_name(product.metadata_type.name) + mdt = self._index.metadata_types.get_by_name(product.metadata_type.name) if mdt is None: _LOG.warning(f'Adding metadata_type "{product.metadata_type.name}" as it doesn\'t exist') - product.metadata_type = self.metadata_type_resource.add(product.metadata_type, - allow_table_lock=allow_table_lock) + product.metadata_type = self._index.metadata_types.add(product.metadata_type, + allow_table_lock=allow_table_lock) clone = self.clone(product) clone.id = self.next_id self.next_id += 1 @@ -168,7 +168,16 @@ def get_all(self) -> Iterable[Product]: def clone(self, orig: Product) -> Product: return Product( - self.metadata_type_resource.clone(orig.metadata_type), + self._index.metadata_types.clone(orig.metadata_type), jsonify_document(orig.definition), id_=orig.id ) + + def spatial_extent(self, product, crs=None): + return None + + def temporal_extent(self, product: str | Product) -> tuple[datetime.datetime, datetime.datetime]: + if isinstance(product, str): + product = self._index.products.get_by_name_unsafe(product) + ids = self._index.datasets.by_product.get(product.name, []) + return self._index.datasets.temporal_extent(ids) diff --git a/datacube/index/memory/index.py b/datacube/index/memory/index.py index bb839f8d1..6fe5cdeec 100644 --- a/datacube/index/memory/index.py +++ b/datacube/index/memory/index.py @@ -47,7 +47,7 @@ def __init__(self, env: ODCEnvironment) -> None: self._env = env self._users = UserResource() self._metadata_types = MetadataTypeResource() - self._products = ProductResource(self.metadata_types) + self._products = ProductResource(self) self._lineage = LineageResource(self) self._datasets = DatasetResource(self) global counter @@ -128,8 +128,7 @@ def metadata_type_from_doc(definition: dict) -> MetadataType: :param definition: """ MetadataType.validate(definition) # type: ignore - return MetadataType(definition, - dataset_search_fields=Index.get_dataset_fields(definition)) + return MetadataType(definition, dataset_search_fields=Index.get_dataset_fields(definition)) def index_driver_init(): diff --git a/datacube/index/null/_datasets.py b/datacube/index/null/_datasets.py index e4339c170..b2ecbf8c0 100755 --- a/datacube/index/null/_datasets.py +++ b/datacube/index/null/_datasets.py @@ -106,19 +106,8 @@ def count_product_through_time(self, period, **query): def search_summaries(self, **query): return [] - def temporal_extent( - self, - product: str | Product = None, - ids: Iterable[DSID] | None = None - ) -> tuple[datetime.datetime, datetime.datetime]: - if product is None and ids is None: - raise ValueError("Must specify product or ids") - elif ids is not None and product is not None: - raise ValueError("Cannot specify both product and ids") - elif ids is not None: - raise KeyError(str(ids)) - else: - raise KeyError(str(product)) + def temporal_extent(self, ids: Iterable[DSID]) -> tuple[datetime.datetime, datetime.datetime]: + raise KeyError(str(ids)) # pylint: disable=redefined-outer-name def search_returning_datasets_light(self, field_names: tuple, custom_offsets=None, limit=None, **query): diff --git a/datacube/index/null/_products.py b/datacube/index/null/_products.py index b7ef47141..68e906d87 100644 --- a/datacube/index/null/_products.py +++ b/datacube/index/null/_products.py @@ -3,6 +3,7 @@ # Copyright (c) 2015-2024 ODC Contributors # SPDX-License-Identifier: Apache-2.0 import logging +import datetime from datacube.index.abstract import AbstractProductResource from datacube.model import Product @@ -13,9 +14,6 @@ class ProductResource(AbstractProductResource): - def __init__(self, mdtr): - self.metadata_type_resource = mdtr - def add(self, product, allow_table_lock=False): raise NotImplementedError() @@ -39,3 +37,9 @@ def search_by_metadata(self, metadata): def get_all(self) -> Iterable[Product]: return [] + + def temporal_extent(self, product: str | Product) -> tuple[datetime.datetime, datetime.datetime]: + raise KeyError(str(product)) + + def spatial_extent(self, product, crs=None): + raise KeyError(str(product)) diff --git a/datacube/index/null/index.py b/datacube/index/null/index.py index e222ad8d7..0e5c284a6 100644 --- a/datacube/index/null/index.py +++ b/datacube/index/null/index.py @@ -36,7 +36,7 @@ def __init__(self, env: ODCEnvironment) -> None: self._env = env self._users = UserResource() self._metadata_types = MetadataTypeResource() - self._products = ProductResource(self._metadata_types) + self._products = ProductResource(self) self._lineage = NoLineageResource(self) self._datasets = DatasetResource(self) diff --git a/datacube/index/postgis/_datasets.py b/datacube/index/postgis/_datasets.py index 1e7f2c0f8..4127d9848 100755 --- a/datacube/index/postgis/_datasets.py +++ b/datacube/index/postgis/_datasets.py @@ -747,29 +747,6 @@ def search_summaries(self, **query): _LOG.warning("search results: %s (%s)", output["id"], output["product"]) yield output - def temporal_extent( - self, - product: str | Product | None = None, - ids: Iterable[DSID] | None = None - ) -> tuple[datetime.datetime, datetime.datetime]: - """ - Returns the minimum and maximum acquisition time of the product. - """ - if product is None and ids is None: - raise ValueError("Must supply product or ids") - elif product is not None and ids is not None: - raise ValueError("Cannot supply both product and ids") - elif product is not None: - if isinstance(product, str): - product = self._index.products.get_by_name_unsafe(product) - with self._db_connection() as connection: - result = connection.temporal_extent_by_prod(product.id) - else: - with self._db_connection() as connection: - result = connection.temporal_extent_by_ids(ids) - - return result - # pylint: disable=redefined-outer-name def search_returning_datasets_light(self, field_names: tuple, custom_offsets=None, limit=None, **query): """ @@ -910,7 +887,14 @@ def get_custom_query_expressions(self, custom_query, custom_offsets): return custom_exprs - def spatial_extent(self, ids: Iterable[DSID], crs: CRS = CRS("EPSG:4326")) -> Optional[Geometry]: + def temporal_extent(self, ids: Iterable[DSID]) -> tuple[datetime.datetime, datetime.datetime]: + """ + Returns the minimum and maximum acquisition time of the specified datasets. + """ + with self._db_connection() as connection: + return connection.temporal_extent_by_ids(ids) + + def spatial_extent(self, ids: Iterable[DSID], crs: CRS = CRS("EPSG:4326")) -> Geometry | None: with self._db_connection() as connection: return connection.spatial_extent(ids, crs) diff --git a/datacube/index/postgis/_products.py b/datacube/index/postgis/_products.py index 4774bac5c..989a23370 100644 --- a/datacube/index/postgis/_products.py +++ b/datacube/index/postgis/_products.py @@ -2,11 +2,13 @@ # # Copyright (c) 2015-2024 ODC Contributors # SPDX-License-Identifier: Apache-2.0 +import datetime import logging from time import monotonic from cachetools.func import lru_cache +from odc.geo.geom import CRS, Geometry from datacube.index import fields from datacube.index.abstract import AbstractProductResource, BatchStatus from datacube.index.postgis._transaction import IndexResourceAddIn @@ -21,19 +23,16 @@ class ProductResource(AbstractProductResource, IndexResourceAddIn): """ - :type _db: datacube.drivers.postgis._connections.PostgresDb - :type metadata_type_resource: datacube.index._metadata_types.MetadataTypeResource + Postgis driver product resource implementation """ def __init__(self, db, index): """ :type db: datacube.drivers.postgis._connections.PostgresDb - :type metadata_type_resource: datacube.index._metadata_types.MetadataTypeResource + :type index: datacube.index.postgis.index.Index """ + super().__init__(index) self._db = db - self._index = index - self.metadata_type_resource = self._index.metadata_types - self.get_unsafe = lru_cache()(self.get_unsafe) self.get_by_name_unsafe = lru_cache()(self.get_by_name_unsafe) @@ -41,7 +40,7 @@ def __getstate__(self): """ We define getstate/setstate to avoid pickling the caches """ - return self._db, self.metadata_type_resource + return self._db, self._index.metadata_types def __setstate__(self, state): """ @@ -72,11 +71,11 @@ def add(self, product, allow_table_lock=False): 'Metadata Type {}'.format(product.name) ) else: - metadata_type = self.metadata_type_resource.get_by_name(product.metadata_type.name) + metadata_type = self._index.metadata_types.get_by_name(product.metadata_type.name) if metadata_type is None: _LOG.warning('Adding metadata_type "%s" as it doesn\'t exist.', product.metadata_type.name) - metadata_type = self.metadata_type_resource.add(product.metadata_type, - allow_table_lock=allow_table_lock) + metadata_type = self._index.metadata_types.add(product.metadata_type, + allow_table_lock=allow_table_lock) with self._db_connection() as connection: connection.insert_product( name=product.name, @@ -199,7 +198,7 @@ def update(self, product: Product, allow_unsafe_updates=False, allow_table_lock= # name, field.sql_expression, new_field.sql_expression # ) # ) - metadata_type = self.metadata_type_resource.get_by_name(product.metadata_type.name) + metadata_type = self._index.metadata_types.get_by_name(product.metadata_type.name) # TODO: should we add metadata type here? assert metadata_type, "TODO: should we add metadata type here?" with self._db_connection() as conn: @@ -335,6 +334,24 @@ def _make_many(self, query_rows): def _make(self, query_row) -> Product: return Product( definition=query_row.definition, - metadata_type=cast(MetadataType, self.metadata_type_resource.get(query_row.metadata_type_ref)), + metadata_type=cast(MetadataType, self._index.metadata_types.get(query_row.metadata_type_ref)), id_=query_row.id, ) + + def temporal_extent(self, product: str | Product) -> tuple[datetime.datetime, datetime.datetime]: + """ + Returns the minimum and maximum acquisition time of the product. + """ + if isinstance(product, str): + product = self.get_by_name_unsafe(product) + with self._db_connection() as connection: + result = connection.temporal_extent_by_prod(product.id) + + return result + + def spatial_extent(self, product: str | Product, crs: CRS = CRS("EPSG:4326")) -> Geometry | None: + if isinstance(product, str): + product = self._index.products.get_by_name_unsafe(product) + ids = [ds.id for ds in self._index.datasets.search(product=product.name)] + with self._db_connection() as connection: + return connection.spatial_extent(ids, crs) diff --git a/datacube/index/postgres/_datasets.py b/datacube/index/postgres/_datasets.py index 49163cacd..783b258fd 100755 --- a/datacube/index/postgres/_datasets.py +++ b/datacube/index/postgres/_datasets.py @@ -14,9 +14,7 @@ from typing import Iterable, List, Union, Mapping, Any, Optional from uuid import UUID -from sqlalchemy import select, func - -from datacube.drivers.postgres._fields import SimpleDocField, DateDocField +from datacube.drivers.postgres._fields import SimpleDocField from datacube.drivers.postgres._schema import DATASET from datacube.index.abstract import (AbstractDatasetResource, DatasetSpatialMixin, DSID, DatasetTuple, BatchStatus) @@ -738,54 +736,17 @@ def search_summaries(self, **query): for columns in results: yield columns._asdict() + def spatial_extent(self, ids, crs=None): + return None + def temporal_extent( self, - product: str | Product | None = None, ids: Iterable[DSID] | None = None ) -> tuple[datetime.datetime, datetime.datetime]: """ - Returns the minimum and maximum acquisition time of the product. + Returns the minimum and maximum acquisition time of the specified datasets. """ - if product is None and ids is None: - raise ValueError("Must supply product or ids") - elif product is not None and ids is not None: - raise ValueError("Cannot supply both product and ids") - elif product is not None: - if isinstance(product, str): - product = self._index.products.get_by_name_unsafe(product) - else: - raise NotImplementedError("Sorry ids not supported in postgres driver.") - - # This implementation violates architecture - should not be SQLAlchemy code at this level. - # Get the offsets from dataset doc - dataset_section = product.metadata_type.definition['dataset'] - min_offset = dataset_section['search_fields']['time']['min_offset'] - max_offset = dataset_section['search_fields']['time']['max_offset'] - - time_min = DateDocField('aquisition_time_min', - 'Min of time when dataset was acquired', - DATASET.c.metadata, - False, # is it indexed - offset=min_offset, - selection='least') - - time_max = DateDocField('aquisition_time_max', - 'Max of time when dataset was acquired', - DATASET.c.metadata, - False, # is it indexed - offset=max_offset, - selection='greatest') - - with self._db_connection() as connection: - result = connection.execute( - select( - func.min(time_min.alchemy_expression), func.max(time_max.alchemy_expression) - ).where( - DATASET.c.dataset_type_ref == product.id - ) - ).first() - - return result + raise NotImplementedError("Sorry Temporal Extent by dataset ids is not supported in postgres driver.") # pylint: disable=redefined-outer-name def search_returning_datasets_light(self, field_names: tuple, custom_offsets=None, limit=None, **query): @@ -928,9 +889,6 @@ def get_custom_query_expressions(self, custom_query, custom_offsets): return custom_exprs - def spatial_extent(self, ids, crs=None): - return None - def get_all_docs_for_product(self, product: Product, batch_size: int = 1000) -> Iterable[DatasetTuple]: product_search_key = [product.name] with self._db_connection(transaction=True) as connection: diff --git a/datacube/index/postgres/_products.py b/datacube/index/postgres/_products.py index 0ddb1e021..6f8536793 100644 --- a/datacube/index/postgres/_products.py +++ b/datacube/index/postgres/_products.py @@ -2,6 +2,7 @@ # # Copyright (c) 2015-2024 ODC Contributors # SPDX-License-Identifier: Apache-2.0 +import datetime import logging from cachetools.func import lru_cache @@ -20,18 +21,16 @@ class ProductResource(AbstractProductResource, IndexResourceAddIn): """ - :type _db: datacube.drivers.postgres._connections.PostgresDb - :type metadata_type_resource: datacube.index._metadata_types.MetadataTypeResource + Legacy driver product resource implementation """ def __init__(self, db, index): """ :type db: datacube.drivers.postgres._connections.PostgresDb - :type metadata_type_resource: datacube.index._metadata_types.MetadataTypeResource + :type index: datacube.index.postgres.index.Index """ + super().__init__(index) self._db = db - self._index = index - self.metadata_type_resource = self._index.metadata_types self.get_unsafe = lru_cache()(self.get_unsafe) self.get_by_name_unsafe = lru_cache()(self.get_by_name_unsafe) @@ -40,7 +39,7 @@ def __getstate__(self): """ We define getstate/setstate to avoid pickling the caches """ - return self._db, self.metadata_type_resource + return self._db, self._index.metadata_types def __setstate__(self, state): """ @@ -72,11 +71,11 @@ def add(self, product, allow_table_lock=False): 'Metadata Type {}'.format(product.name) ) else: - metadata_type = self.metadata_type_resource.get_by_name(product.metadata_type.name) + metadata_type = self._index.metadata_types.get_by_name(product.metadata_type.name) if metadata_type is None: _LOG.warning('Adding metadata_type "%s" as it doesn\'t exist.', product.metadata_type.name) - metadata_type = self.metadata_type_resource.add(product.metadata_type, - allow_table_lock=allow_table_lock) + metadata_type = self._index.metadata_types.add(product.metadata_type, + allow_table_lock=allow_table_lock) with self._db_connection() as connection: if connection.in_transaction and not allow_table_lock: raise ValueError("allow_table_lock must be True if called inside a transaction.") @@ -188,7 +187,7 @@ def update(self, product: Product, allow_unsafe_updates=False, allow_table_lock= # name, field.sql_expression, new_field.sql_expression # ) # ) - metadata_type = cast(MetadataType, self.metadata_type_resource.get_by_name(product.metadata_type.name)) + metadata_type = cast(MetadataType, self._index.metadata_types.get_by_name(product.metadata_type.name)) # Given we cannot change metadata type because of the check above, and this is an # update method, the metadata type is guaranteed to already exist. with self._db_connection(transaction=allow_table_lock) as conn: @@ -326,6 +325,25 @@ def _make_many(self, query_rows): def _make(self, query_row) -> Product: return Product( definition=query_row.definition, - metadata_type=cast(MetadataType, self.metadata_type_resource.get(query_row.metadata_type_ref)), + metadata_type=cast(MetadataType, self._index.metadata_types.get(query_row.metadata_type_ref)), id_=query_row.id, ) + + def spatial_extent(self, product, crs=None): + return None + + def temporal_extent(self, product: str | Product) -> tuple[datetime.datetime, datetime.datetime]: + """ + Returns the minimum and maximum acquisition time of the product. + """ + if isinstance(product, str): + product = self._index.products.get_by_name_unsafe(product) + + # This implementation violates architecture - should not be SQLAlchemy code at this level. + # Get the offsets from dataset doc + dataset_section = product.metadata_type.definition['dataset'] + min_offset = dataset_section['search_fields']['time']['min_offset'] + max_offset = dataset_section['search_fields']['time']['max_offset'] + + with self._db_connection() as connection: + return connection.temporal_extent_by_product(product.id, min_offset, max_offset) diff --git a/docs/about/whats_new.rst b/docs/about/whats_new.rst index 16d7132b5..1b0a18666 100644 --- a/docs/about/whats_new.rst +++ b/docs/about/whats_new.rst @@ -30,6 +30,7 @@ v1.9.next - EP13 API changes to Index and IndexDriver. (:pull:`1534`) - EP13 API changes to metadata and product resources. (:pull:`1536`) - Phase 1 of EP13 API changes to dataset resource - get_unsafe, get_derived, temporal_extent. (:pull:`1538`) +- Add product argument to spatial_extent method, as per EP13. (:pull:`1539`) v1.8.next diff --git a/integration_tests/index/test_memory_index.py b/integration_tests/index/test_memory_index.py index 14f5db479..dea12e2e7 100644 --- a/integration_tests/index/test_memory_index.py +++ b/integration_tests/index/test_memory_index.py @@ -380,14 +380,10 @@ def test_mem_ds_expand_periods(mem_index_fresh: ODCEnvironment): ] -def test_temporal_extent(mem_eo3_data: ODCEnvironment): +def test_spatiotemporal_extent(mem_eo3_data: ODCEnvironment): dc, ls8_id, wo_id = mem_eo3_data ls8 = dc.index.datasets.get(ls8_id) wo = dc.index.datasets.get(wo_id) - with pytest.raises(ValueError) as e: - dc.index.datasets.temporal_extent() - with pytest.raises(ValueError) as e: - dc.index.datasets.temporal_extent(product="product1", ids=[ls8.id]) with pytest.raises(KeyError) as e: dc.index.datasets.temporal_extent(ids=[uuid4()]) @@ -398,12 +394,16 @@ def test_temporal_extent(mem_eo3_data: ODCEnvironment): for ds in (ls8, wo): tmin, tmax = dc.index.datasets.get_product_time_bounds(ds.product.name) assert (tmin is None and tmax is None) or tmin < tmax - tmin2, tmax2 = dc.index.datasets.temporal_extent(product=ds.product) + tmin2, tmax2 = dc.index.products.temporal_extent(product=ds.product) assert tmin2 == tmin and tmax2 == tmax ids = [doc["id"] for prod, doc, uris in dc.index.datasets.get_all_docs_for_product(product=ds.product)] tmin2, tmax2 = dc.index.datasets.temporal_extent(ids=ids) assert tmin2 == tmin and tmax2 == tmax + # Spatial extent + assert dc.index.products.spatial_extent(ls8.product) is None + assert dc.index.datasets.spatial_extent([ls8_id, wo_id]) is None + def test_mem_ds_archive_purge(mem_eo3_data: ODCEnvironment): dc, ls8_id, wo_id = mem_eo3_data diff --git a/integration_tests/index/test_null_index.py b/integration_tests/index/test_null_index.py index d4b79c060..acf661070 100644 --- a/integration_tests/index/test_null_index.py +++ b/integration_tests/index/test_null_index.py @@ -66,6 +66,10 @@ def test_null_product_resource(null_config: ODCEnvironment): assert dc.index.products.search_robust(foo="bar", baz=12) == [] assert empty(dc.index.products.get_with_fields(["foo", "bar"])) assert empty(dc.index.products.get_field_names()) + with pytest.raises(KeyError) as e: + dc.index.products.spatial_extent("a_prod") + with pytest.raises(KeyError) as e: + dc.index.products.temporal_extent("a_prod") with pytest.raises(KeyError) as e: dc.index.products.get_unsafe(1) with pytest.raises(KeyError) as e: @@ -113,12 +117,9 @@ def test_null_dataset_resource(null_config: ODCEnvironment): dc.index.datasets.archive_location(test_uuid, "http://a.uri/test") with pytest.raises(NotImplementedError) as e: dc.index.datasets.restore_location(test_uuid, "http://a.uri/test") - with pytest.raises(ValueError) as e: - dc.index.datasets.temporal_extent() - with pytest.raises(ValueError) as e: - dc.index.datasets.temporal_extent(product="product1", ids=[test_uuid]) with pytest.raises(KeyError) as e: dc.index.datasets.temporal_extent(ids=[test_uuid]) + assert dc.index.datasets.spatial_extent(ids=[test_uuid]) is None with pytest.raises(KeyError) as e: dc.index.datasets.get_product_time_bounds("product1") diff --git a/integration_tests/index/test_postgis_index.py b/integration_tests/index/test_postgis_index.py index 4ca64b6ed..b9e1424f7 100644 --- a/integration_tests/index/test_postgis_index.py +++ b/integration_tests/index/test_postgis_index.py @@ -134,9 +134,13 @@ def test_spatial_extent(index, epsg3577 = CRS("EPSG:3577") index.create_spatial_index(epsg3577) index.update_spatial_index(crses=[epsg3577]) - ext1 = index.datasets.spatial_extent([ls8_eo3_dataset.id], epsg4326) - ext2 = index.datasets.spatial_extent([ls8_eo3_dataset2.id], epsg4326) - ext12 = index.datasets.spatial_extent([ls8_eo3_dataset.id, ls8_eo3_dataset2.id], epsg4326) + + with pytest.raises(KeyError): + index.products.spatial_extent("spaghetti_product") + + ext1 = index.datasets.spatial_extent([ls8_eo3_dataset.id], crs=epsg4326) + ext2 = index.datasets.spatial_extent([ls8_eo3_dataset2.id], crs=epsg4326) + ext12 = index.datasets.spatial_extent([ls8_eo3_dataset.id, ls8_eo3_dataset2.id], crs=epsg4326) assert ext1 is not None and ext2 is not None and ext12 is not None assert ext1 == ext2 assert ext12.difference(ext1).area < 0.001 @@ -144,23 +148,35 @@ def test_spatial_extent(index, assert ls8_eo3_dataset.extent.to_crs(epsg4326).intersects(ext12) assert ls8_eo3_dataset2.extent.to_crs(epsg4326).intersects(ext2) assert ls8_eo3_dataset2.extent.to_crs(epsg4326).intersects(ext12) - extau12 = index.datasets.spatial_extent([ls8_eo3_dataset.id, ls8_eo3_dataset2.id], epsg3577) + extau12 = index.datasets.spatial_extent([ls8_eo3_dataset.id, ls8_eo3_dataset2.id], crs=epsg3577) extau12africa = index.datasets.spatial_extent( [ls8_eo3_dataset.id, ls8_eo3_dataset2.id, africa_eo3_dataset.id], - epsg3577 + crs=epsg3577 ) assert extau12 == extau12africa - ext3 = index.datasets.spatial_extent([ls8_eo3_dataset3.id], epsg4326) + ext3 = index.datasets.spatial_extent(ids=[ls8_eo3_dataset3.id], crs=epsg4326) ext1234 = index.datasets.spatial_extent( [ ls8_eo3_dataset.id, ls8_eo3_dataset2.id, ls8_eo3_dataset3.id, ls8_eo3_dataset4.id - ], epsg4326) + ], + crs=epsg4326) assert ext1.difference(ext1234).area < 0.001 assert ext3.difference(ext1234).area < 0.001 - ext1_3577 = index.datasets.spatial_extent([ls8_eo3_dataset.id], epsg3577) + ext1_3577 = index.datasets.spatial_extent([ls8_eo3_dataset.id], crs=epsg3577) assert ext1_3577.intersects(ls8_eo3_dataset.extent._to_crs(epsg3577)) + ext_ls8 = index.products.spatial_extent( + ls8_eo3_dataset.product, + crs=epsg4326 + ) + assert ext_ls8 == ext1234 + ext_ls8 = index.products.spatial_extent( + ls8_eo3_dataset.product.name, + crs=epsg4326 + ) + assert ext_ls8 == ext1234 + @pytest.mark.parametrize('datacube_env_name', ('experimental',)) def test_spatial_search(index, @@ -207,16 +223,16 @@ def test_spatial_search(index, def test_temporal_extents(index, ls8_eo3_dataset, ls8_eo3_dataset2, ls8_eo3_dataset3, ls8_eo3_dataset4): - start, end = index.datasets.temporal_extent(product=ls8_eo3_dataset.product) + start, end = index.products.temporal_extent(ls8_eo3_dataset.product) assert start == datetime.datetime( 2013, 4, 4, 0, 58, 34, 682275, tzinfo=datetime.timezone.utc) assert end == datetime.datetime( 2016, 5, 28, 23, 50, 59, 149573, tzinfo=datetime.timezone.utc) - start2, end2 = index.datasets.temporal_extent(product=ls8_eo3_dataset.product.name) + start2, end2 = index.products.temporal_extent(ls8_eo3_dataset.product.name) assert start == start2 and end == end2 - start2, end2 = index.datasets.temporal_extent(ids=[ + start2, end2 = index.datasets.temporal_extent([ ls8_eo3_dataset.id, ls8_eo3_dataset2.id, ls8_eo3_dataset3.id, ls8_eo3_dataset4.id, ]) diff --git a/integration_tests/index/test_search_legacy.py b/integration_tests/index/test_search_legacy.py index 5efbfffba..211dcea80 100644 --- a/integration_tests/index/test_search_legacy.py +++ b/integration_tests/index/test_search_legacy.py @@ -1085,3 +1085,5 @@ def test_spatial_index_api_defaults(index: Index): with pytest.raises(NotImplementedError) as e: index.drop_spatial_index(CRS("epsg:3577")) assert "does not support the Spatial Index API" in str(e.value) + assert index.products.spatial_extent("a_product") is None + assert index.datasets.spatial_extent([uuid.uuid4(), uuid.uuid4()]) is None diff --git a/integration_tests/test_index_datasets_search.py b/integration_tests/test_index_datasets_search.py index 1e38ab71e..42b150a70 100644 --- a/integration_tests/test_index_datasets_search.py +++ b/integration_tests/test_index_datasets_search.py @@ -139,27 +139,20 @@ def index_products(): def test_temporal_extent( index, ls8_eo3_dataset, ls8_eo3_dataset2, ls8_eo3_dataset3, ls8_eo3_dataset4 ): - with pytest.raises(ValueError): - start, end = index.datasets.temporal_extent() - with pytest.raises(ValueError): - start, end = index.datasets.temporal_extent( - product=ls8_eo3_dataset.product, - ids=[ls8_eo3_dataset.id, ls8_eo3_dataset2.id, ls8_eo3_dataset3.id, ls8_eo3_dataset4.id] - ) with pytest.raises(KeyError): - start, end = index.datasets.temporal_extent(product="orthentick_produckt") + start, end = index.products.temporal_extent("orthentick_produckt") - start, end = index.datasets.temporal_extent(product=ls8_eo3_dataset.product) + start, end = index.products.temporal_extent(ls8_eo3_dataset.product) assert start == datetime.datetime( 2013, 4, 4, 0, 58, 34, 682275, tzinfo=datetime.timezone.utc) assert end == datetime.datetime( 2016, 5, 28, 23, 50, 59, 149573, tzinfo=datetime.timezone.utc) - start2, end2 = index.datasets.temporal_extent(product=ls8_eo3_dataset.product.name) + start2, end2 = index.products.temporal_extent(ls8_eo3_dataset.product.name) assert start == start2 and end == end2 try: - start2, end2 = index.datasets.temporal_extent(ids=[ + start2, end2 = index.datasets.temporal_extent([ ls8_eo3_dataset.id, ls8_eo3_dataset2.id, ls8_eo3_dataset3.id, ls8_eo3_dataset4.id, ])