From c9c92f829691c35958f5346315bbb59d7738fbe2 Mon Sep 17 00:00:00 2001 From: Ariana Barzinpour Date: Thu, 7 Mar 2024 03:12:06 +0000 Subject: [PATCH 1/8] refine _time_to_search_dims logic so that behaviour when time is passed as an int/float is the same as when it is a string --- datacube/api/query.py | 35 ++++++----------------------------- tests/api/test_query.py | 4 +++- 2 files changed, 9 insertions(+), 30 deletions(-) diff --git a/datacube/api/query.py b/datacube/api/query.py index a81701ab6..115c592f6 100644 --- a/datacube/api/query.py +++ b/datacube/api/query.py @@ -14,7 +14,6 @@ from typing import Optional, Union import pandas -from dateutil import tz from pandas import to_datetime as pandas_to_datetime import numpy as np @@ -126,11 +125,7 @@ def __init__(self, index=None, product=None, geopolygon=None, like=None, **searc if 'time' not in self.search: time_coord = like.coords.get('time') if time_coord is not None: - self.search['time'] = _time_to_search_dims( - (pandas_to_datetime(time_coord.values[0]).to_pydatetime(), - pandas_to_datetime(time_coord.values[-1]).to_pydatetime() - + datetime.timedelta(milliseconds=1)) # TODO: inclusive time searches - ) + self.search['time'] = _time_to_search_dims((time_coord.values[0], time_coord.values[-1])) @property def search_terms(self): @@ -304,23 +299,6 @@ def _values_to_search(**kwargs): return search -def _to_datetime(t): - if isinstance(t, (float, int)): - t = datetime.datetime.fromtimestamp(t, tz=tz.tzutc()) - - if isinstance(t, tuple): - t = datetime.datetime(*t, tzinfo=tz.tzutc()) - elif isinstance(t, str): - try: - t = datetime.datetime.strptime(t, "%Y-%m-%dT%H:%M:%S.%fZ") - except ValueError: - pass - elif isinstance(t, datetime.datetime): - return tz_aware(t) - - return pandas_to_datetime(t, utc=True, infer_datetime_format=True).to_pydatetime() - - def _time_to_search_dims(time_range): with warnings.catch_warnings(): warnings.simplefilter("ignore", UserWarning) @@ -343,15 +321,14 @@ def _time_to_search_dims(time_range): tr_end = tr_end.isoformat() if tr_start is None: - tr_start = datetime.datetime.fromtimestamp(0) - start = _to_datetime(tr_start) + start = datetime.datetime.fromtimestamp(0) + else: + start = pandas_to_datetime(str(tr_start)).to_pydatetime() if tr_end is None: tr_end = datetime.datetime.now().strftime("%Y-%m-%d") - end = _to_datetime(pandas.Period(tr_end) - .end_time - .to_pydatetime()) + end = pandas.Period(tr_end).end_time.to_pydatetime() - tr = Range(start, end) + tr = Range(tz_aware(start), tz_aware(end)) if start == end: return tr[0] diff --git a/tests/api/test_query.py b/tests/api/test_query.py index a2ac72276..42cfc431f 100644 --- a/tests/api/test_query.py +++ b/tests/api/test_query.py @@ -144,7 +144,9 @@ def format_test(start_out, end_out): ((datetime.date(2008, 1, 1), None), format_test('2008-01-01T00:00:00', datetime.datetime.now().strftime("%Y-%m-%dT23:59:59.999999"))), ((None, '2008'), - format_test(datetime.datetime.fromtimestamp(0).strftime("%Y-%m-%dT%H:%M:%S"), '2008-12-31T23:59:59.999999')) + format_test(datetime.datetime.fromtimestamp(0).strftime("%Y-%m-%dT%H:%M:%S"), '2008-12-31T23:59:59.999999')), + ((2008), + format_test('2008-01-01T00:00:00', '2008-12-31T23:59:59.999999')), ] From 57dccd2e41bbed110f41c914811efe68419220b9 Mon Sep 17 00:00:00 2001 From: Ariana Barzinpour Date: Thu, 7 Mar 2024 03:29:29 +0000 Subject: [PATCH 2/8] update expected tzinfo in query docstring --- datacube/api/query.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datacube/api/query.py b/datacube/api/query.py index 115c592f6..6c53117dd 100644 --- a/datacube/api/query.py +++ b/datacube/api/query.py @@ -68,7 +68,7 @@ def __init__(self, index=None, product=None, geopolygon=None, like=None, **searc Use by accessing :attr:`search_terms`: >>> query.search_terms['time'] # doctest: +NORMALIZE_WHITESPACE - Range(begin=datetime.datetime(2001, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), \ + Range(begin=datetime.datetime(2001, 1, 1, 0, 0, tzinfo=tzutc()), \ end=datetime.datetime(2002, 1, 1, 23, 59, 59, 999999, tzinfo=tzutc())) By passing in an ``index``, the search parameters will be validated as existing on the ``product``. From 91cd8f2d7dcfb3ec91fe9251f12258c4dda470e7 Mon Sep 17 00:00:00 2001 From: Ariana Barzinpour Date: Thu, 7 Mar 2024 04:32:02 +0000 Subject: [PATCH 3/8] convert np.datetime64 to datetime.datetime --- datacube/api/query.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/datacube/api/query.py b/datacube/api/query.py index 6c53117dd..b069fd90e 100644 --- a/datacube/api/query.py +++ b/datacube/api/query.py @@ -125,7 +125,11 @@ def __init__(self, index=None, product=None, geopolygon=None, like=None, **searc if 'time' not in self.search: time_coord = like.coords.get('time') if time_coord is not None: - self.search['time'] = _time_to_search_dims((time_coord.values[0], time_coord.values[-1])) + self.search['time'] = _time_to_search_dims( + # convert from np.datetime64 to datetime.datetime + (pandas_to_datetime(time_coord.values[0]).to_pydatetime(), + pandas_to_datetime(time_coord.values[-1]).to_pydatetime()) + ) @property def search_terms(self): @@ -302,7 +306,6 @@ def _values_to_search(**kwargs): def _time_to_search_dims(time_range): with warnings.catch_warnings(): warnings.simplefilter("ignore", UserWarning) - tr_start, tr_end = time_range, time_range if hasattr(time_range, '__iter__') and not isinstance(time_range, str): From 7c221c38f782156c43d986d7b0a6721ca7302f90 Mon Sep 17 00:00:00 2001 From: Ariana Barzinpour Date: Thu, 7 Mar 2024 04:34:05 +0000 Subject: [PATCH 4/8] update whats_new --- docs/about/whats_new.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/about/whats_new.rst b/docs/about/whats_new.rst index f8ecdc76a..243ece996 100644 --- a/docs/about/whats_new.rst +++ b/docs/about/whats_new.rst @@ -16,6 +16,7 @@ v1.8.next - Tweak ``list_products`` logic for getting crs and resolution values (:pull:`1535`) - Add new ODC Cheatsheet reference doc to Data Access & Analysis documentation page (:pull:`1543`) - Fix broken codecov github action. (:pull:`1554`) +- Fix handling of date value as int in Query construction (:pull:`1561`) v1.8.17 (8th November 2023) =========================== From fd5e1163305ed9ad4b650100d13ba2250cb464c6 Mon Sep 17 00:00:00 2001 From: Ariana Barzinpour Date: Tue, 12 Mar 2024 00:42:03 +0000 Subject: [PATCH 5/8] error on int/float, less back and forth dt conversion --- datacube/api/query.py | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/datacube/api/query.py b/datacube/api/query.py index b069fd90e..d290b8501 100644 --- a/datacube/api/query.py +++ b/datacube/api/query.py @@ -57,7 +57,7 @@ def __init__(self, group_by_func, dimension, units, sort_key=None, group_key=Non 'source_filter') -class Query(object): +class Query(): def __init__(self, index=None, product=None, geopolygon=None, like=None, **search_terms): """Parses search terms in preparation for querying the Data Cube Index. @@ -315,21 +315,30 @@ def _time_to_search_dims(time_range): tr_start, tr_end = tmp[0], tmp[-1] - # Attempt conversion to isoformat - # allows pandas.Period to handle - # date and datetime objects - if hasattr(tr_start, 'isoformat'): - tr_start = tr_start.isoformat() - if hasattr(tr_end, 'isoformat'): - tr_end = tr_end.isoformat() + if isinstance(tr_start, (int, float)) or isinstance(tr_end, (int, float)): + raise TypeError("Time dimension must be provided as a datetime or a string") if tr_start is None: start = datetime.datetime.fromtimestamp(0) + elif not isinstance(tr_start, datetime.datetime): + # ensure consistency between different datetime types + if hasattr(tr_start, 'isoformat'): + tr_start = tr_start.isoformat() + start = pandas_to_datetime(tr_start).to_pydatetime() else: - start = pandas_to_datetime(str(tr_start)).to_pydatetime() + start = tr_start + if tr_end is None: tr_end = datetime.datetime.now().strftime("%Y-%m-%d") - end = pandas.Period(tr_end).end_time.to_pydatetime() + if not isinstance(tr_end, datetime.datetime): + # Attempt conversion to isoformat + # allows pandas.Period to handle date objects + if hasattr(tr_end, 'isoformat'): + tr_end = tr_end.isoformat() + end = pandas.Period(tr_end).end_time.to_pydatetime() + else: + # if it's already a datetime, no need to extrapolate the period end + end = tr_end tr = Range(tz_aware(start), tz_aware(end)) if start == end: From c03d2419a065cc347282f5a19a45b1bb83ee28ad Mon Sep 17 00:00:00 2001 From: Ariana Barzinpour Date: Tue, 12 Mar 2024 01:16:48 +0000 Subject: [PATCH 6/8] fix end datetime handling --- datacube/api/query.py | 17 +++++++---------- tests/api/test_query.py | 7 +++++-- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/datacube/api/query.py b/datacube/api/query.py index d290b8501..b07aa98f1 100644 --- a/datacube/api/query.py +++ b/datacube/api/query.py @@ -321,7 +321,7 @@ def _time_to_search_dims(time_range): if tr_start is None: start = datetime.datetime.fromtimestamp(0) elif not isinstance(tr_start, datetime.datetime): - # ensure consistency between different datetime types + # convert to datetime.datetime if hasattr(tr_start, 'isoformat'): tr_start = tr_start.isoformat() start = pandas_to_datetime(tr_start).to_pydatetime() @@ -330,15 +330,12 @@ def _time_to_search_dims(time_range): if tr_end is None: tr_end = datetime.datetime.now().strftime("%Y-%m-%d") - if not isinstance(tr_end, datetime.datetime): - # Attempt conversion to isoformat - # allows pandas.Period to handle date objects - if hasattr(tr_end, 'isoformat'): - tr_end = tr_end.isoformat() - end = pandas.Period(tr_end).end_time.to_pydatetime() - else: - # if it's already a datetime, no need to extrapolate the period end - end = tr_end + # Attempt conversion to isoformat + # allows pandas.Period to handle datetime objects + if hasattr(tr_end, 'isoformat'): + tr_end = tr_end.isoformat() + # get end of period to ensure range is inclusive + end = pandas.Period(tr_end).end_time.to_pydatetime() tr = Range(tz_aware(start), tz_aware(end)) if start == end: diff --git a/tests/api/test_query.py b/tests/api/test_query.py index 42cfc431f..7b6c1e406 100644 --- a/tests/api/test_query.py +++ b/tests/api/test_query.py @@ -145,8 +145,6 @@ def format_test(start_out, end_out): format_test('2008-01-01T00:00:00', datetime.datetime.now().strftime("%Y-%m-%dT23:59:59.999999"))), ((None, '2008'), format_test(datetime.datetime.fromtimestamp(0).strftime("%Y-%m-%dT%H:%M:%S"), '2008-12-31T23:59:59.999999')), - ((2008), - format_test('2008-01-01T00:00:00', '2008-12-31T23:59:59.999999')), ] @@ -157,6 +155,11 @@ def test_time_handling(time_param, expected): assert query.search_terms['time'] == expected +def test_time_handling_int(): + with pytest.raises(TypeError): + Query(time=2008) + + def test_solar_day(): _s = SimpleNamespace ds = _s(center_time=parse_time('1987-05-22 23:07:44.2270250Z'), From 811283e09092f46e0ac2f351a75d45941f604a8b Mon Sep 17 00:00:00 2001 From: Ariana Barzinpour Date: Tue, 12 Mar 2024 04:50:24 +0000 Subject: [PATCH 7/8] more informative description in whats_new --- docs/about/whats_new.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/about/whats_new.rst b/docs/about/whats_new.rst index 243ece996..b083c38a3 100644 --- a/docs/about/whats_new.rst +++ b/docs/about/whats_new.rst @@ -16,7 +16,8 @@ v1.8.next - Tweak ``list_products`` logic for getting crs and resolution values (:pull:`1535`) - Add new ODC Cheatsheet reference doc to Data Access & Analysis documentation page (:pull:`1543`) - Fix broken codecov github action. (:pull:`1554`) -- Fix handling of date value as int in Query construction (:pull:`1561`) +- Throw error if ``time`` dimension is provided as an int or float to Query construction + instead of assuming it to be seconds since epoch (:pull:`1561`) v1.8.17 (8th November 2023) =========================== From 4bf8cb8dc4fcca681665e1e688e810b407034d4d Mon Sep 17 00:00:00 2001 From: Ariana-B <40238244+Ariana-B@users.noreply.github.com> Date: Tue, 12 Mar 2024 15:51:38 +1100 Subject: [PATCH 8/8] Update Query constructor syntax Co-authored-by: Damien Ayers --- datacube/api/query.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datacube/api/query.py b/datacube/api/query.py index b07aa98f1..62d9804a4 100644 --- a/datacube/api/query.py +++ b/datacube/api/query.py @@ -57,7 +57,7 @@ def __init__(self, group_by_func, dimension, units, sort_key=None, group_key=Non 'source_filter') -class Query(): +class Query: def __init__(self, index=None, product=None, geopolygon=None, like=None, **search_terms): """Parses search terms in preparation for querying the Data Cube Index.