Skip to content

Commit

Permalink
Revert "Fix group_by on load inconsistently fusing data"
Browse files Browse the repository at this point in the history
This reverts commit 15a84e4.
  • Loading branch information
v0lat1le committed Nov 1, 2016
1 parent f451de5 commit 070a281
Show file tree
Hide file tree
Showing 7 changed files with 15 additions and 20 deletions.
4 changes: 2 additions & 2 deletions datacube/api/_api.py
Expand Up @@ -68,8 +68,8 @@ def _get_descriptor_for_dataset(self, dataset_type, datasets, group_by, geopolyg
if not geopolygon:
geopolygon = get_bounds(datasets, dataset_type.grid_spec.crs)

datasets.sort(key=group_by.grouping_key)
groups = [Group(key, list(group)) for key, group in groupby(datasets, group_by.grouping_key)]
datasets.sort(key=group_by.group_by_func)
groups = [Group(key, list(group)) for key, group in groupby(datasets, group_by.group_by_func)]

dataset_descriptor['result_min'] = tuple()
dataset_descriptor['result_max'] = tuple()
Expand Down
6 changes: 3 additions & 3 deletions datacube/api/core.py
Expand Up @@ -369,9 +369,9 @@ def product_sources(datasets, group_by):
.. seealso:: :meth:`product_observations` :meth:`product_data`
"""
dimension, grouping_key, units, sorting_key, reverse_sort = group_by
datasets.sort(key=sorting_key, reverse=reverse_sort)
groups = [Group(key, tuple(group)) for key, group in groupby(datasets, grouping_key)]
dimension, group_func, units = group_by
datasets.sort(key=group_func)
groups = [Group(key, tuple(group)) for key, group in groupby(datasets, group_func)]

data = numpy.empty(len(groups), dtype=object)
for index, group in enumerate(groups):
Expand Down
4 changes: 2 additions & 2 deletions datacube/api/grid_workflow.py
Expand Up @@ -223,8 +223,8 @@ def tile_sources(observations, group_by):
"""
tiles = {}
for cell_index, observation in observations.items():
observation['datasets'].sort(key=group_by.sorting_key, reverse=group_by.reversed)
groups = [(key, tuple(group)) for key, group in groupby(observation['datasets'], group_by.grouping_key)]
observation['datasets'].sort(key=group_by.group_by_func)
groups = [(key, tuple(group)) for key, group in groupby(observation['datasets'], group_by.group_by_func)]

for key, datasets in groups:
data = numpy.empty(1, dtype=object)
Expand Down
14 changes: 5 additions & 9 deletions datacube/api/query.py
Expand Up @@ -33,7 +33,7 @@
_LOG = logging.getLogger(__name__)


GroupBy = collections.namedtuple('GroupBy', ['dimension', 'grouping_key', 'units', 'sorting_key', 'reversed'])
GroupBy = collections.namedtuple('GroupBy', ['dimension', 'group_by_func', 'units'])

FLOAT_TOLERANCE = 0.0000001 # TODO: For DB query, use some sort of 'contains' query, rather than range overlap.
SPATIAL_KEYS = ('latitude', 'lat', 'y', 'longitude', 'lon', 'long', 'x')
Expand Down Expand Up @@ -171,16 +171,12 @@ def query_geopolygon(geopolygon=None, **kwargs):

def query_group_by(group_by='time', **kwargs):
time_grouper = GroupBy(dimension='time',
grouping_key=lambda ds: ds.center_time,
units='seconds since 1970-01-01 00:00:00',
sorting_key=lambda ds: ds.center_time,
reversed=True)
group_by_func=lambda ds: ds.center_time,
units='seconds since 1970-01-01 00:00:00')

solar_day_grouper = GroupBy(dimension='time',
grouping_key=solar_day,
units='seconds since 1970-01-01 00:00:00',
sorting_key=lambda ds: ds.center_time,
reversed=True)
group_by_func=solar_day,
units='seconds since 1970-01-01 00:00:00')

group_by_map = {
None: time_grouper,
Expand Down
3 changes: 1 addition & 2 deletions integration_tests/test_full_ingestion.py
Expand Up @@ -173,8 +173,7 @@ def check_open_with_api(index):

geobox = GeoBox(200, 200, Affine(25, 0.0, 1500000, 0.0, -25, -3900000), CRS('EPSG:3577'))
observations = datacube.product_observations(product='ls5_nbar_albers', geopolygon=geobox.extent)
group_by = GroupBy('time', lambda ds: ds.center_time, 'seconds since 1970-01-01 00:00:00',
lambda ds: ds.center_time, True)
group_by = GroupBy('time', lambda ds: ds.center_time, 'seconds since 1970-01-01 00:00:00')
sources = datacube.product_sources(observations, group_by)
data = datacube.product_data(sources, geobox, input_type.measurements.values())
assert data.blue.shape == (1, 200, 200)
2 changes: 1 addition & 1 deletion tests/api/test_core.py
Expand Up @@ -15,7 +15,7 @@ def group_func(d):
{'time': datetime.datetime(2016, 2, 1), 'value': 'bar'}
]

group_by = GroupBy(dimension, group_func, units, group_func, reversed=False)
group_by = GroupBy(dimension, group_func, units)
grouped = Datacube.product_sources(datasets, group_by)

assert str(grouped.time.dtype) == 'datetime64[ns]'
Expand Down
2 changes: 1 addition & 1 deletion tests/api/test_query.py
Expand Up @@ -86,7 +86,7 @@ def test_convert_descriptor_query_to_search_query_with_groupby():
}
query = DescriptorQuery(descriptor_query)
assert query.group_by
assert callable(query.group_by.grouping_key)
assert callable(query.group_by.group_by_func)
assert query.group_by.dimension == 'time'
assert query.group_by.units == 'seconds since 1970-01-01 00:00:00'

Expand Down

0 comments on commit 070a281

Please sign in to comment.