Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Don't allow overwriting indexes with region writes #8877

Merged
merged 3 commits into from Mar 27, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/whats-new.rst
Expand Up @@ -38,6 +38,8 @@ New Features
Breaking changes
~~~~~~~~~~~~~~~~

- Don't allow overwriting index variables with ``to_zarr`` region writes. (:issue:`8589`, :pull:`8876`).
By `Deepak Cherian <https://github.com/dcherian>`_.

Deprecations
~~~~~~~~~~~~
Expand Down
14 changes: 4 additions & 10 deletions xarray/backends/api.py
Expand Up @@ -1572,14 +1572,11 @@ def _validate_and_autodetect_region(
raise TypeError(f"``region`` must be a dict, got {type(region)}")

if any(v == "auto" for v in region.values()):
region_was_autodetected = True
if mode != "r+":
raise ValueError(
f"``mode`` must be 'r+' when using ``region='auto'``, got {mode}"
)
region = _auto_detect_regions(ds, region, open_kwargs)
else:
region_was_autodetected = False

for k, v in region.items():
if k not in ds.dims:
Expand Down Expand Up @@ -1612,7 +1609,7 @@ def _validate_and_autodetect_region(
f".drop_vars({non_matching_vars!r})"
)

return region, region_was_autodetected
return region


def _validate_datatypes_for_zarr_append(zstore, dataset):
Expand Down Expand Up @@ -1784,12 +1781,9 @@ def to_zarr(
storage_options=storage_options,
zarr_version=zarr_version,
)
region, region_was_autodetected = _validate_and_autodetect_region(
dataset, region, mode, open_kwargs
)
# drop indices to avoid potential race condition with auto region
if region_was_autodetected:
dataset = dataset.drop_vars(dataset.indexes)
region = _validate_and_autodetect_region(dataset, region, mode, open_kwargs)
# can't modify indexed with region writes
dataset = dataset.drop_vars(dataset.indexes)
if append_dim is not None and append_dim in region:
raise ValueError(
f"cannot list the same dimension in both ``append_dim`` and "
Expand Down
32 changes: 17 additions & 15 deletions xarray/tests/test_backends.py
Expand Up @@ -5641,24 +5641,26 @@ def test_zarr_region_index_write(self, tmp_path):
}
)

ds_region = 1 + ds.isel(x=slice(2, 4), y=slice(6, 8))
region_slice = dict(x=slice(2, 4), y=slice(6, 8))
ds_region = 1 + ds.isel(region_slice)

ds.to_zarr(tmp_path / "test.zarr")

with patch.object(
ZarrStore,
"set_variables",
side_effect=ZarrStore.set_variables,
autospec=True,
) as mock:
ds_region.to_zarr(tmp_path / "test.zarr", region="auto", mode="r+")

# should write the data vars but never the index vars with auto mode
for call in mock.call_args_list:
written_variables = call.args[1].keys()
assert "test" in written_variables
assert "x" not in written_variables
assert "y" not in written_variables
for region in [region_slice, "auto"]:
with patch.object(
ZarrStore,
"set_variables",
side_effect=ZarrStore.set_variables,
autospec=True,
) as mock:
ds_region.to_zarr(tmp_path / "test.zarr", region=region, mode="r+")

# should write the data vars but never the index vars with auto mode
for call in mock.call_args_list:
written_variables = call.args[1].keys()
assert "test" in written_variables
assert "x" not in written_variables
assert "y" not in written_variables

def test_zarr_region_append(self, tmp_path):
x = np.arange(0, 50, 10)
Expand Down