Skip to content

Commit

Permalink
Fix the issue caused by MultiIndex/coords in xarray (#1397)
Browse files Browse the repository at this point in the history
* fix broken parts due to xarray updates

* figure dims from coords xarray

* add more test of xarray

* please github action

* code format

* update whats_new.rst

---------

Co-authored-by: Emma Ai <emma.ai@ga.gov.au>
  • Loading branch information
emmaai and Emma Ai committed Feb 5, 2023
1 parent 34f4da8 commit 42e2726
Show file tree
Hide file tree
Showing 12 changed files with 85 additions and 54 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build-docs.yml
Expand Up @@ -15,7 +15,7 @@ jobs:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v1
- uses: actions/checkout@v3
with:
fetch-depth: 0

Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/doc-qa.yaml
Expand Up @@ -16,7 +16,7 @@ jobs:
runs-on: ubuntu-latest
steps:
# Spellcheck
- uses: actions/checkout@v1
- uses: actions/checkout@v3
with:
fetch-depth: 0

Expand All @@ -28,7 +28,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: "Checkout"
uses: actions/checkout@v2
uses: actions/checkout@v3

- name: "Create cache dir"
run: mkdir .cache
Expand All @@ -38,7 +38,7 @@ jobs:
id: extract_base_branch

- name: "Cache DOCtor-RST"
uses: actions/cache@v2
uses: actions/cache@v3
with:
path: .cache
key: ${{ runner.os }}-doctor-rst-${{ steps.extract_base_branch.outputs.branch }}
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/lint.yaml
Expand Up @@ -23,7 +23,7 @@ jobs:
name: Pylint
steps:
- name: checkout git
uses: actions/checkout@v2
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Setup conda
Expand All @@ -47,7 +47,7 @@ jobs:
python-version: [3.8]
steps:
- name: Checkout
uses: actions/checkout@v2
uses: actions/checkout@v3
- name: Setup conda
uses: s-weigand/setup-conda@v1
with:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Expand Up @@ -32,7 +32,7 @@ jobs:
- opendatacube/datacube-tests:latest

steps:
- uses: actions/checkout@v1
- uses: actions/checkout@v3
with:
fetch-depth: 0

Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/test-conda-build.yml
Expand Up @@ -17,10 +17,10 @@ jobs:
python-version: ["3.8", "3.9"]

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3

- name: Cache conda
uses: actions/cache@v2
uses: actions/cache@v3
env:
# Increase this value to reset cache if setup.py has not changed
CACHE_NUMBER: 0
Expand Down
2 changes: 1 addition & 1 deletion conda-environment.yml
Expand Up @@ -33,5 +33,5 @@ dependencies:
- ruamel.yaml
- sqlalchemy <2.0
- GeoAlchemy2
- xarray >=0.9,!=2022.6.0
- xarray >=0.9
- toolz
13 changes: 11 additions & 2 deletions datacube/api/core.py
Expand Up @@ -593,8 +593,17 @@ def empty_func(m, shape):
# - 3D dims must fit between (t) and (y, x) or (lat, lon)

# 2D defaults
dims_default = tuple(coords) + geobox.dimensions
shape_default = tuple(c.size for c in coords.values()) + geobox.shape
# retrieve dims from coords if DataArray
dims_default = None
if coords != {}:
coords_value = next(iter(coords.values()))
if isinstance(coords_value, xarray.DataArray):
dims_default = coords_value.dims + geobox.dimensions

if dims_default is None:
dims_default = tuple(coords) + geobox.dimensions

shape_default = tuple(c.size for k, c in coords.items() if k in dims_default) + geobox.shape
coords_default = OrderedDict(**coords, **geobox.xr_coords(with_crs=spatial_ref))

arrays = []
Expand Down
1 change: 1 addition & 0 deletions docs/about/whats_new.rst
Expand Up @@ -12,6 +12,7 @@ v1.8.next
- Update conda environment file and add notes to release process to ensure pip and conda
dependencies are in sync and up-to-date. (:pull:`1395`)
- Update docker constraints (:pull:`1396`)
- Compatible with the changes w.r.t. `MultiIndex` and `coord/dims` introduced since `xarray>2022.3.0` (:pull:`1397`)


v1.8.10 (30 January 2023)
Expand Down
6 changes: 3 additions & 3 deletions tests/conftest.py
Expand Up @@ -297,15 +297,15 @@ def tmpnetcdf_filename(tmpdir):


@pytest.fixture
def odc_style_xr_dataset():
"""An xarray.Dataset with ODC style coordinates and CRS, and no time dimension.
def odc_style_xr_dataset(request):
"""An xarray.Dataset with ODC style coordinates and CRS
Contains an EPSG:4326, single variable 'B10' of 100x100 int16 pixels."""
affine = Affine.scale(0.1, 0.1) * Affine.translation(20, 30)
geobox = geometry.GeoBox(100, 100, affine, geometry.CRS(GEO_PROJ))

return Datacube.create_storage(
{}, geobox, [Measurement(name="B10", dtype="int16", nodata=0, units="1")]
request.param, geobox, [Measurement(name="B10", dtype="int16", nodata=0, units="1")]
)


Expand Down
1 change: 1 addition & 0 deletions tests/storage/test_netcdfwriter.py
Expand Up @@ -292,6 +292,7 @@ def test_useful_error_on_write_empty_dataset(tmpnetcdf_filename):
assert 'geobox' in str(excinfo.value)


@pytest.mark.parametrize("odc_style_xr_dataset", [{}], indirect=True)
def test_write_dataset_to_netcdf(tmpnetcdf_filename, odc_style_xr_dataset):
write_dataset_to_netcdf(odc_style_xr_dataset, tmpnetcdf_filename, global_attributes={'foo': 'bar'},
variable_params={'B10': {'attrs': {'abc': 'xyz'}}})
Expand Down
97 changes: 58 additions & 39 deletions tests/test_xarray_extension.py
Expand Up @@ -4,6 +4,8 @@
# SPDX-License-Identifier: Apache-2.0
import pytest
import xarray as xr
import pandas as pd
import numpy as np
from datacube.testutils.geom import epsg4326, epsg3857
from datacube.testutils import mk_sample_xr_dataset, remove_crs
from datacube.utils.geometry import assign_crs
Expand All @@ -16,7 +18,23 @@
_get_crs_from_coord,
)


multi_coords = xr.DataArray(
np.zeros(1),
[
(
"spec",
pd.MultiIndex.from_arrays(
np.array([["2001-01-01"], ["2001-01-01"]]), names=("time", "solar_day")
),
)
],
).coords
single_coord = dict(time=np.array(["2001-01-01"]))


@pytest.mark.parametrize(
"odc_style_xr_dataset", [single_coord, multi_coords], indirect=True
)
def test_xr_extension(odc_style_xr_dataset):
xx = odc_style_xr_dataset
assert _norm_crs(None) is None
Expand All @@ -26,7 +44,7 @@ def test_xr_extension(odc_style_xr_dataset):
with pytest.raises(ValueError):
_norm_crs([])

assert xx.geobox.shape == xx.B10.shape
assert (1,) + xx.geobox.shape == xx.B10.shape

(sx, zz0, tx, zz1, sy, ty) = xx.affine[:6]
assert (zz0, zz1) == (0, 0)
Expand All @@ -39,8 +57,8 @@ def test_xr_extension(odc_style_xr_dataset):
# affine should still be valid
A = _xarray_affine(xx)
assert A is not None
assert A*(0.5, 0.5) == (xx.longitude[0], xx.latitude[0])
assert A*(0.5, 1.5) == (xx.longitude[0], xx.latitude[1])
assert A * (0.5, 0.5) == (xx.longitude[0], xx.latitude[0])
assert A * (0.5, 1.5) == (xx.longitude[0], xx.latitude[1])


def test_xr_geobox():
Expand All @@ -52,11 +70,11 @@ def test_xr_geobox():

assert ds.geobox.crs == epsg3857
assert ds.band.geobox.crs == epsg3857
assert ds.band.affine*(0, 0) == xy
assert ds.band.affine*(1, 1) == tuple(a+b for a, b in zip(xy, rxy))
assert ds.band.affine * (0, 0) == xy
assert ds.band.affine * (1, 1) == tuple(a + b for a, b in zip(xy, rxy))

assert ds.band[:, :2, :2].affine*(0, 0) == xy
assert ds.band[:, :2, :2].affine*(1, 1) == tuple(a+b for a, b in zip(xy, rxy))
assert ds.band[:, :2, :2].affine * (0, 0) == xy
assert ds.band[:, :2, :2].affine * (1, 1) == tuple(a + b for a, b in zip(xy, rxy))
assert ds.band.isel(time=0, x=0).affine is None

xx = ds.band + 1000
Expand All @@ -74,26 +92,26 @@ def test_xr_geobox_unhappy():
xx = mk_sample_xr_dataset(crs=None)

# test duplicates behaviour in get_crs_from_{coord,attrs} are caught
xx.band.attrs.update(grid_mapping='x') # force exception in coord
xx.x.attrs.update(crs='EPSG:4326') # force exception in attr
xx.y.attrs.update(crs='EPSG:3857')
xx.band.attrs.update(grid_mapping="x") # force exception in coord
xx.x.attrs.update(crs="EPSG:4326") # force exception in attr
xx.y.attrs.update(crs="EPSG:3857")
with pytest.warns(UserWarning):
assert xx.band.geobox is not None

# test crs not being a string
xx = mk_sample_xr_dataset(crs=None)
xx.attrs['crs'] = ['this will not parse']
xx.attrs["crs"] = ["this will not parse"]
with pytest.warns(UserWarning):
assert xx.geobox is None

xx.attrs['crs'] = 'this will fail CRS() constructor'
xx.attrs["crs"] = "this will fail CRS() constructor"
with pytest.warns(UserWarning):
assert xx.geobox is None

xx = mk_sample_xr_dataset(crs=epsg3857)
xx.attrs.pop('crs', None)
xx.band.attrs.pop('crs', None)
xx.spatial_ref.attrs['spatial_ref'] = 'this will fail CRS() constructor'
xx.attrs.pop("crs", None)
xx.band.attrs.pop("crs", None)
xx.spatial_ref.attrs["spatial_ref"] = "this will fail CRS() constructor"
with pytest.warns(UserWarning):
assert xx.geobox is None

Expand All @@ -102,8 +120,8 @@ def test_crs_from_coord():
xx_none = mk_sample_xr_dataset(crs=None)
xx_3857 = mk_sample_xr_dataset(crs=epsg3857)
xx_4326 = mk_sample_xr_dataset(crs=epsg4326)
cc_4326 = xx_4326.geobox.xr_coords(with_crs='epsg_4326')['epsg_4326']
cc_3857 = xx_3857.geobox.xr_coords(with_crs='epsg_3857')['epsg_3857']
cc_4326 = xx_4326.geobox.xr_coords(with_crs="epsg_4326")["epsg_4326"]
cc_3857 = xx_3857.geobox.xr_coords(with_crs="epsg_3857")["epsg_3857"]

assert _get_crs_from_coord(xx_none.band) is None
assert _get_crs_from_coord(xx_none) is None
Expand All @@ -112,10 +130,10 @@ def test_crs_from_coord():
assert _get_crs_from_coord(xx_4326.band) == epsg4326
assert _get_crs_from_coord(xx_4326) == epsg4326

xx = xx_none.band.assign_attrs(grid_mapping='x')
xx = xx_none.band.assign_attrs(grid_mapping="x")
with pytest.raises(ValueError):
_get_crs_from_coord(xx)
xx = xx_none.band.assign_attrs(grid_mapping='no_such_coord')
xx = xx_none.band.assign_attrs(grid_mapping="no_such_coord")
assert _get_crs_from_coord(xx) is None

xx_2crs = xx_none.assign_coords(cc_4326=cc_4326, cc_3857=cc_3857)
Expand All @@ -128,62 +146,63 @@ def test_crs_from_coord():
_get_crs_from_coord(xx_2crs.band)

# any should just return "first" guess, we not sure which one
crs = _get_crs_from_coord(xx_2crs, 'any')
crs = _get_crs_from_coord(xx_2crs, "any")
assert epsg4326 == crs or epsg3857 == crs

# all should return a list of length 2
crss = _get_crs_from_coord(xx_2crs, 'all')
crss = _get_crs_from_coord(xx_2crs, "all")
assert len(crss) == 2
assert any(crs == epsg3857 for crs in crss)
assert any(crs == epsg4326 for crs in crss)

with pytest.raises(ValueError):
_get_crs_from_coord(xx_2crs, 'no-such-mode')
_get_crs_from_coord(xx_2crs, "no-such-mode")


def test_crs_from_attrs():
xx_none = mk_sample_xr_dataset(crs=None)
xx_3857 = mk_sample_xr_dataset(crs=epsg3857)
xx_4326 = mk_sample_xr_dataset(crs=epsg4326)

assert _get_crs_from_attrs(xx_none, ('y', 'x')) is None
assert _get_crs_from_attrs(xx_none.band, ('y', 'x')) is None
assert _get_crs_from_attrs(xx_3857.band, ('y', 'x')) == epsg3857
assert _get_crs_from_attrs(xx_3857, ('y', 'x')) == epsg3857
assert _get_crs_from_attrs(xx_4326.band, ('latitude', 'longitude')) == epsg4326
assert _get_crs_from_attrs(xx_4326, ('latitude', 'longitude')) == epsg4326
assert _get_crs_from_attrs(xx_none, ("y", "x")) is None
assert _get_crs_from_attrs(xx_none.band, ("y", "x")) is None
assert _get_crs_from_attrs(xx_3857.band, ("y", "x")) == epsg3857
assert _get_crs_from_attrs(xx_3857, ("y", "x")) == epsg3857
assert _get_crs_from_attrs(xx_4326.band, ("latitude", "longitude")) == epsg4326
assert _get_crs_from_attrs(xx_4326, ("latitude", "longitude")) == epsg4326

assert _get_crs_from_attrs(xr.Dataset(), None) is None

# check inconsistent CRSs
xx = xx_3857.copy()
xx.x.attrs['crs'] = xx_4326.attrs['crs']
xx.x.attrs["crs"] = xx_4326.attrs["crs"]
with pytest.warns(UserWarning):
_get_crs_from_attrs(xx, ('y', 'x'))
_get_crs_from_attrs(xx, ("y", "x"))

xx = xx_none.copy()
xx.attrs['crs'] = epsg3857
xx.attrs["crs"] = epsg3857
assert xx.geobox is not None
assert xx.geobox.crs is epsg3857


@pytest.mark.parametrize("odc_style_xr_dataset", [{}, multi_coords], indirect=True)
def test_assign_crs(odc_style_xr_dataset):
xx = odc_style_xr_dataset
assert xx.geobox is not None

xx_nocrs = remove_crs(xx)
assert xx_nocrs.geobox is None
yy = assign_crs(xx_nocrs, 'epsg:4326')
assert xx_nocrs.geobox is None # verify source is not modified in place
assert yy.geobox.crs == 'epsg:4326'
yy = assign_crs(xx_nocrs, "epsg:4326")
assert xx_nocrs.geobox is None # verify source is not modified in place
assert yy.geobox.crs == "epsg:4326"

yy = assign_crs(xx_nocrs.B10, 'epsg:4326')
assert yy.geobox.crs == 'epsg:4326'
yy = assign_crs(xx_nocrs.B10, "epsg:4326")
assert yy.geobox.crs == "epsg:4326"

xx_xr_style_crs = xx_nocrs.copy()
xx_xr_style_crs.attrs.update(crs='epsg:3857')
xx_xr_style_crs.attrs.update(crs="epsg:3857")
yy = assign_crs(xx_xr_style_crs)
assert yy.geobox.crs == 'epsg:3857'
assert yy.geobox.crs == "epsg:3857"

with pytest.raises(ValueError):
assign_crs(xx_nocrs)
Expand Down
1 change: 1 addition & 0 deletions wordlist.txt
Expand Up @@ -271,6 +271,7 @@ mongolia
MTL
multiband
multigeom
MultiIndex
multiline
multipoint
multipolygon
Expand Down

0 comments on commit 42e2726

Please sign in to comment.