diff --git a/src/zarr/__init__.py b/src/zarr/__init__.py index b3c1e05b7..601b1295a 100644 --- a/src/zarr/__init__.py +++ b/src/zarr/__init__.py @@ -31,7 +31,6 @@ from zarr.errors import CopyError, MetadataError from zarr.hierarchy import Group, group, open_group from zarr.n5 import N5Store, N5FSStore -from zarr._storage.store import v3_api_available from zarr.storage import ( ABSStore, DBMStore, @@ -53,18 +52,3 @@ # in case setuptools scm screw up and find version to be 0.0.0 assert not __version__.startswith("0.0.0") - -if v3_api_available: - from zarr._storage.v3 import ( - ABSStoreV3, - DBMStoreV3, - KVStoreV3, - DirectoryStoreV3, - LMDBStoreV3, - LRUStoreCacheV3, - MemoryStoreV3, - MongoDBStoreV3, - RedisStoreV3, - SQLiteStoreV3, - ZipStoreV3, - ) diff --git a/src/zarr/_storage/absstore.py b/src/zarr/_storage/absstore.py index c9a113148..d8e292535 100644 --- a/src/zarr/_storage/absstore.py +++ b/src/zarr/_storage/absstore.py @@ -3,7 +3,7 @@ import warnings from numcodecs.compat import ensure_bytes from zarr.util import normalize_storage_path -from zarr._storage.store import _get_metadata_suffix, data_root, meta_root, Store, StoreV3 +from zarr._storage.store import Store __doctest_requires__ = { ("ABSStore", "ABSStore.*"): ["azure.storage.blob"], @@ -222,56 +222,3 @@ def getsize(self, path=None): def clear(self): self.rmdir() - - -class ABSStoreV3(ABSStore, StoreV3): - def list(self): - return list(self.keys()) - - def __eq__(self, other): - return ( - isinstance(other, ABSStoreV3) - and self.client == other.client - and self.prefix == other.prefix - ) - - def __setitem__(self, key, value): - self._validate_key(key) - super().__setitem__(key, value) - - def rmdir(self, path=None): - if not path: - # Currently allowing clear to delete everything as in v2 - - # If we disallow an empty path then we will need to modify - # TestABSStoreV3 to have the create_store method use a prefix. - ABSStore.rmdir(self, "") - return - - meta_dir = meta_root + path - meta_dir = meta_dir.rstrip("/") - ABSStore.rmdir(self, meta_dir) - - # remove data folder - data_dir = data_root + path - data_dir = data_dir.rstrip("/") - ABSStore.rmdir(self, data_dir) - - # remove metadata files - sfx = _get_metadata_suffix(self) - array_meta_file = meta_dir + ".array" + sfx - if array_meta_file in self: - del self[array_meta_file] - group_meta_file = meta_dir + ".group" + sfx - if group_meta_file in self: - del self[group_meta_file] - - # TODO: adapt the v2 getsize method to work for v3 - # For now, calling the generic keys-based _getsize - def getsize(self, path=None): - from zarr.storage import _getsize # avoid circular import - - return _getsize(self, path) - - -ABSStoreV3.__doc__ = ABSStore.__doc__ diff --git a/src/zarr/_storage/store.py b/src/zarr/_storage/store.py index 9911cfa12..911af20fd 100644 --- a/src/zarr/_storage/store.py +++ b/src/zarr/_storage/store.py @@ -1,36 +1,18 @@ -import abc -import os -from collections import defaultdict from collections.abc import MutableMapping -from copy import copy -from string import ascii_letters, digits -from typing import Any, Dict, List, Mapping, Optional, Sequence, Tuple, Union +from typing import Any, List, Mapping, Optional, Sequence, Union -from zarr.meta import Metadata2, Metadata3 +from zarr.meta import Metadata2 from zarr.util import normalize_storage_path from zarr.context import Context + # v2 store keys array_meta_key = ".zarray" group_meta_key = ".zgroup" attrs_key = ".zattrs" -# v3 paths -meta_root = "meta/root/" -data_root = "data/root/" - DEFAULT_ZARR_VERSION = 2 -v3_api_available = os.environ.get("ZARR_V3_EXPERIMENTAL_API", "0").lower() not in ["0", "false"] - - -def assert_zarr_v3_api_available(): - if not v3_api_available: - raise NotImplementedError( - "# V3 reading and writing is experimental! To enable support, set:\n" - "ZARR_V3_EXPERIMENTAL_API=1" - ) # pragma: no cover - class BaseStore(MutableMapping): """Abstract base class for store implementations. @@ -182,371 +164,6 @@ def rmdir(self, path: str = "") -> None: _rmdir_from_keys(self, path) -class StoreV3(BaseStore): - _store_version = 3 - _metadata_class = Metadata3 - _valid_key_characters = set(ascii_letters + digits + "/.-_") - - def _valid_key(self, key: str) -> bool: - """ - Verify that a key conforms to the specification. - - A key is any string containing only character in the range a-z, A-Z, - 0-9, or in the set /.-_ it will return True if that's the case, False - otherwise. - """ - if not isinstance(key, str) or not key.isascii(): - return False - if set(key) - self._valid_key_characters: - return False - return True - - def _validate_key(self, key: str): - """ - Verify that a key conforms to the v3 specification. - - A key is any string containing only character in the range a-z, A-Z, - 0-9, or in the set /.-_ it will return True if that's the case, False - otherwise. - - In spec v3, keys can only start with the prefix meta/, data/ or be - exactly zarr.json and should not end with /. This should not be exposed - to the user, and is a store implementation detail, so this method will - raise a ValueError in that case. - """ - if not self._valid_key(key): - raise ValueError( - f"Keys must be ascii strings and may only contain the " - f"characters {''.join(sorted(self._valid_key_characters))}" - ) - - if ( - not key.startswith("data/") - and (not key.startswith("meta/")) - and (not key == "zarr.json") - # TODO: Possibly allow key == ".zmetadata" too if we write a - # consolidated metadata spec corresponding to this? - ): - raise ValueError("keys starts with unexpected value: `{}`".format(key)) - - if key.endswith("/"): - raise ValueError("keys may not end in /") - - def list_prefix(self, prefix): - if prefix.startswith("/"): - raise ValueError("prefix must not begin with /") - # TODO: force prefix to end with /? - return [k for k in self.list() if k.startswith(prefix)] - - def erase(self, key): - self.__delitem__(key) - - def erase_prefix(self, prefix): - assert prefix.endswith("/") - - if prefix == "/": - all_keys = self.list() - else: - all_keys = self.list_prefix(prefix) - for key in all_keys: - self.erase(key) - - def list_dir(self, prefix): - """ - TODO: carefully test this with trailing/leading slashes - """ - if prefix: # allow prefix = "" ? - assert prefix.endswith("/") - - all_keys = self.list_prefix(prefix) - len_prefix = len(prefix) - keys = [] - prefixes = [] - for k in all_keys: - trail = k[len_prefix:] - if "/" not in trail: - keys.append(prefix + trail) - else: - prefixes.append(prefix + trail.split("/", maxsplit=1)[0] + "/") - return keys, list(set(prefixes)) - - def list(self): - return list(self.keys()) - - def __contains__(self, key): - return key in self.list() - - @abc.abstractmethod - def __setitem__(self, key, value): - """Set a value.""" - - @abc.abstractmethod - def __getitem__(self, key): - """Get a value.""" - - @abc.abstractmethod - def rmdir(self, path=None): - """Remove a data path and all its subkeys and related metadata. - Expects a path without the data or meta root prefix.""" - - @property - def supports_efficient_get_partial_values(self): - return False - - def get_partial_values( - self, key_ranges: Sequence[Tuple[str, Tuple[int, Optional[int]]]] - ) -> List[Union[bytes, memoryview, bytearray]]: - """Get multiple partial values. - key_ranges can be an iterable of key, range pairs, - where a range specifies two integers range_start and range_length - as a tuple, (range_start, range_length). - range_length may be None to indicate to read until the end. - range_start may be negative to start reading range_start bytes - from the end of the file. - A key may occur multiple times with different ranges. - Inserts None for missing keys into the returned list.""" - results: List[Union[bytes, memoryview, bytearray]] = [None] * len(key_ranges) # type: ignore[list-item] # noqa: E501 - indexed_ranges_by_key: Dict[str, List[Tuple[int, Tuple[int, Optional[int]]]]] = defaultdict( - list - ) - for i, (key, range_) in enumerate(key_ranges): - indexed_ranges_by_key[key].append((i, range_)) - for key, indexed_ranges in indexed_ranges_by_key.items(): - try: - value = self[key] - except KeyError: # pragma: no cover - continue - for i, (range_from, range_length) in indexed_ranges: - if range_length is None: - results[i] = value[range_from:] - else: - results[i] = value[range_from : range_from + range_length] - return results - - def supports_efficient_set_partial_values(self): - return False - - def set_partial_values(self, key_start_values): - """Set multiple partial values. - key_start_values can be an iterable of key, start and value triplets - as tuples, (key, start, value), where start defines the offset in bytes. - A key may occur multiple times with different starts and non-overlapping values. - Also, start may only be beyond the current value if other values fill the gap. - start may be negative to start writing start bytes from the current - end of the file, ending the file with the new value.""" - unique_keys = set(next(zip(*key_start_values))) - values = {} - for key in unique_keys: - old_value = self.get(key) - values[key] = None if old_value is None else bytearray(old_value) - for key, start, value in key_start_values: - if values[key] is None: - assert start == 0 - values[key] = value - else: - if start > len(values[key]): # pragma: no cover - raise ValueError( - f"Cannot set value at start {start}, " - + f"since it is beyond the data at key {key}, " - + f"having length {len(values[key])}." - ) - if start < 0: - values[key][start:] = value - else: - values[key][start : start + len(value)] = value - for key, value in values.items(): - self[key] = value - - def clear(self): - """Remove all items from store.""" - self.erase_prefix("/") - - def __eq__(self, other): - return NotImplemented - - @staticmethod - def _ensure_store(store): - """ - We want to make sure internally that zarr stores are always a class - with a specific interface derived from ``Store``, which is slightly - different than ``MutableMapping``. - - We'll do this conversion in a few places automatically - """ - from zarr._storage.v3 import KVStoreV3 # avoid circular import - - if store is None: - return None - elif isinstance(store, StoreV3): - return store - elif isinstance(store, Store): - raise ValueError(f"cannot initialize a v3 store with a v{store._store_version} store") - elif isinstance(store, MutableMapping): - return KVStoreV3(store) - else: - for attr in [ - "keys", - "values", - "get", - "__setitem__", - "__getitem__", - "__delitem__", - "__contains__", - ]: - if not hasattr(store, attr): - break - else: - return KVStoreV3(store) - - raise ValueError( - "v3 stores must be subclasses of StoreV3, " - "if your store exposes the MutableMapping interface wrap it in " - f"Zarr.storage.KVStoreV3. Got {store}" - ) - - -class StorageTransformer(MutableMapping, abc.ABC): - """Base class for storage transformers. The methods simply pass on the data as-is - and should be overwritten by sub-classes.""" - - _store_version = 3 - _metadata_class = Metadata3 - - def __init__(self, _type) -> None: - if _type not in self.valid_types: # pragma: no cover - raise ValueError( - f"Storage transformer cannot be initialized with type {_type}, " - + f"must be one of {list(self.valid_types)}." - ) - self.type = _type - self._inner_store = None - - def _copy_for_array(self, array, inner_store): - transformer_copy = copy(self) - transformer_copy._inner_store = inner_store - return transformer_copy - - @abc.abstractproperty - def extension_uri(self): - pass # pragma: no cover - - @abc.abstractproperty - def valid_types(self): - pass # pragma: no cover - - def get_config(self): - """Return a dictionary holding configuration parameters for this - storage transformer. All values must be compatible with JSON encoding.""" - # Override in sub-class if need special encoding of config values. - # By default, assume all non-private members are configuration - # parameters except for type . - return {k: v for k, v in self.__dict__.items() if not k.startswith("_") and k != "type"} - - @classmethod - def from_config(cls, _type, config): - """Instantiate storage transformer from a configuration object.""" - # override in sub-class if need special decoding of config values - - # by default, assume constructor accepts configuration parameters as - # keyword arguments without any special decoding - return cls(_type, **config) - - @property - def inner_store(self) -> Union["StorageTransformer", StoreV3]: - assert ( - self._inner_store is not None - ), "inner_store is not initialized, first get a copy via _copy_for_array." - return self._inner_store - - # The following implementations are usually fine to keep as-is: - - def __eq__(self, other): - return ( - type(self) == type(other) - and self._inner_store == other._inner_store - and self.get_config() == other.get_config() - ) - - def erase(self, key): - self.__delitem__(key) - - def list(self): - return list(self.keys()) - - def list_dir(self, prefix): - return StoreV3.list_dir(self, prefix) - - def is_readable(self): - return self.inner_store.is_readable() - - def is_writeable(self): - return self.inner_store.is_writeable() - - def is_listable(self): - return self.inner_store.is_listable() - - def is_erasable(self): - return self.inner_store.is_erasable() - - def clear(self): - return self.inner_store.clear() - - def __enter__(self): - return self.inner_store.__enter__() - - def __exit__(self, exc_type, exc_value, traceback): - return self.inner_store.__exit__(exc_type, exc_value, traceback) - - def close(self) -> None: - return self.inner_store.close() - - # The following implementations might need to be re-implemented - # by subclasses implementing storage transformers: - - def rename(self, src_path: str, dst_path: str) -> None: - return self.inner_store.rename(src_path, dst_path) - - def list_prefix(self, prefix): - return self.inner_store.list_prefix(prefix) - - def erase_prefix(self, prefix): - return self.inner_store.erase_prefix(prefix) - - def rmdir(self, path=None): - return self.inner_store.rmdir(path) - - def __contains__(self, key): - return self.inner_store.__contains__(key) - - def __setitem__(self, key, value): - return self.inner_store.__setitem__(key, value) - - def __getitem__(self, key): - return self.inner_store.__getitem__(key) - - def __delitem__(self, key): - return self.inner_store.__delitem__(key) - - def __iter__(self): - return self.inner_store.__iter__() - - def __len__(self): - return self.inner_store.__len__() - - @property - def supports_efficient_get_partial_values(self): - return self.inner_store.supports_efficient_get_partial_values - - def get_partial_values(self, key_ranges): - return self.inner_store.get_partial_values(key_ranges) - - def supports_efficient_set_partial_values(self): - return self.inner_store.supports_efficient_set_partial_values() - - def set_partial_values(self, key_start_values): - return self.inner_store.set_partial_values(key_start_values) - - # allow MutableMapping for backwards compatibility StoreLike = Union[BaseStore, MutableMapping] @@ -560,40 +177,6 @@ def _path_to_prefix(path: Optional[str]) -> str: return prefix -def _get_hierarchy_metadata(store: StoreV3) -> Mapping[str, Any]: - version = getattr(store, "_store_version", 2) - if version < 3: - raise ValueError("zarr.json hierarchy metadata not stored for " f"zarr v{version} stores") - if "zarr.json" not in store: - raise ValueError("zarr.json metadata not found in store") - return store._metadata_class.decode_hierarchy_metadata(store["zarr.json"]) - - -def _get_metadata_suffix(store: StoreV3) -> str: - if "zarr.json" in store: - return _get_hierarchy_metadata(store)["metadata_key_suffix"] - return ".json" - - -def _rename_metadata_v3(store: StoreV3, src_path: str, dst_path: str) -> bool: - """Rename source or group metadata file associated with src_path.""" - any_renamed = False - sfx = _get_metadata_suffix(store) - src_path = src_path.rstrip("/") - dst_path = dst_path.rstrip("/") - _src_array_json = meta_root + src_path + ".array" + sfx - if _src_array_json in store: - new_key = meta_root + dst_path + ".array" + sfx - store[new_key] = store.pop(_src_array_json) - any_renamed = True - _src_group_json = meta_root + src_path + ".group" + sfx - if _src_group_json in store: - new_key = meta_root + dst_path + ".group" + sfx - store[new_key] = store.pop(_src_group_json) - any_renamed = True - return any_renamed - - def _rename_from_keys(store: BaseStore, src_path: str, dst_path: str) -> None: # assume path already normalized src_prefix = _path_to_prefix(src_path) @@ -605,19 +188,7 @@ def _rename_from_keys(store: BaseStore, src_path: str, dst_path: str) -> None: new_key = dst_prefix + key.lstrip(src_prefix) store[new_key] = store.pop(key) else: - any_renamed = False - for root_prefix in [meta_root, data_root]: - _src_prefix = root_prefix + src_prefix - _dst_prefix = root_prefix + dst_prefix - for key in store.list_prefix(_src_prefix): # type: ignore - new_key = _dst_prefix + key[len(_src_prefix) :] - store[new_key] = store.pop(key) - any_renamed = True - any_meta_renamed = _rename_metadata_v3(store, src_path, dst_path) # type: ignore - any_renamed = any_meta_renamed or any_renamed - - if not any_renamed: - raise ValueError(f"no item {src_path} found to rename") + raise NotImplementedError("This function only supports Zarr version 2.") def _rmdir_from_keys(store: StoreLike, path: Optional[str] = None) -> None: @@ -628,26 +199,6 @@ def _rmdir_from_keys(store: StoreLike, path: Optional[str] = None) -> None: del store[key] -def _rmdir_from_keys_v3(store: StoreV3, path: str = "") -> None: - meta_dir = meta_root + path - meta_dir = meta_dir.rstrip("/") - _rmdir_from_keys(store, meta_dir) - - # remove data folder - data_dir = data_root + path - data_dir = data_dir.rstrip("/") - _rmdir_from_keys(store, data_dir) - - # remove metadata files - sfx = _get_metadata_suffix(store) - array_meta_file = meta_dir + ".array" + sfx - if array_meta_file in store: - store.erase(array_meta_file) - group_meta_file = meta_dir + ".group" + sfx - if group_meta_file in store: - store.erase(group_meta_file) - - def _listdir_from_keys(store: BaseStore, path: Optional[str] = None) -> List[str]: # assume path already normalized prefix = _path_to_prefix(path) @@ -661,37 +212,15 @@ def _listdir_from_keys(store: BaseStore, path: Optional[str] = None) -> List[str def _prefix_to_array_key(store: StoreLike, prefix: str) -> str: - if getattr(store, "_store_version", 2) == 3: - sfx = _get_metadata_suffix(store) # type: ignore - if prefix: - key = meta_root + prefix.rstrip("/") + ".array" + sfx - else: - key = meta_root[:-1] + ".array" + sfx - else: - key = prefix + array_meta_key + key = prefix + array_meta_key return key def _prefix_to_group_key(store: StoreLike, prefix: str) -> str: - if getattr(store, "_store_version", 2) == 3: - sfx = _get_metadata_suffix(store) # type: ignore - if prefix: - key = meta_root + prefix.rstrip("/") + ".group" + sfx - else: - key = meta_root[:-1] + ".group" + sfx - else: - key = prefix + group_meta_key + key = prefix + group_meta_key return key def _prefix_to_attrs_key(store: StoreLike, prefix: str) -> str: - if getattr(store, "_store_version", 2) == 3: - # for v3, attributes are stored in the array metadata - sfx = _get_metadata_suffix(store) # type: ignore - if prefix: - key = meta_root + prefix.rstrip("/") + ".array" + sfx - else: - key = meta_root[:-1] + ".array" + sfx - else: - key = prefix + attrs_key + key = prefix + attrs_key return key diff --git a/src/zarr/_storage/v3.py b/src/zarr/_storage/v3.py deleted file mode 100644 index d3cbc5823..000000000 --- a/src/zarr/_storage/v3.py +++ /dev/null @@ -1,625 +0,0 @@ -import os -import shutil -from collections import OrderedDict -from collections.abc import MutableMapping -from threading import Lock -from typing import Union, Dict, Any - -from zarr.errors import ( - MetadataError, - ReadOnlyError, -) -from zarr.util import buffer_size, json_loads, normalize_storage_path - -from zarr._storage.absstore import ABSStoreV3 # noqa: F401 -from zarr._storage.store import ( # noqa: F401 - _get_hierarchy_metadata, - _get_metadata_suffix, - _listdir_from_keys, - _rename_from_keys, - _rename_metadata_v3, - _rmdir_from_keys, - _rmdir_from_keys_v3, - _path_to_prefix, - _prefix_to_array_key, - _prefix_to_group_key, - array_meta_key, - attrs_key, - data_root, - group_meta_key, - meta_root, - BaseStore, - Store, - StoreV3, -) -from zarr.storage import ( - DBMStore, - ConsolidatedMetadataStore, - DirectoryStore, - FSStore, - KVStore, - LMDBStore, - LRUStoreCache, - MemoryStore, - MongoDBStore, - RedisStore, - SQLiteStore, - ZipStore, - _getsize, -) - -__doctest_requires__ = { - ("RedisStore", "RedisStore.*"): ["redis"], - ("MongoDBStore", "MongoDBStore.*"): ["pymongo"], - ("LRUStoreCache", "LRUStoreCache.*"): ["s3fs"], -} - - -try: - # noinspection PyUnresolvedReferences - from zarr.codecs import Blosc - - default_compressor = Blosc() -except ImportError: # pragma: no cover - from zarr.codecs import Zlib - - default_compressor = Zlib() - - -Path = Union[str, bytes, None] -# allow MutableMapping for backwards compatibility -StoreLike = Union[BaseStore, MutableMapping] - - -class RmdirV3: - """Mixin class that can be used to ensure override of any existing v2 rmdir class.""" - - def rmdir(self, path: str = "") -> None: - path = normalize_storage_path(path) - _rmdir_from_keys_v3(self, path) # type: ignore - - -class KVStoreV3(RmdirV3, KVStore, StoreV3): - def list(self): - return list(self._mutable_mapping.keys()) - - def __setitem__(self, key, value): - self._validate_key(key) - super().__setitem__(key, value) - - def __eq__(self, other): - return isinstance(other, KVStoreV3) and self._mutable_mapping == other._mutable_mapping - - -KVStoreV3.__doc__ = KVStore.__doc__ - - -def _get_files_and_dirs_from_path(store, path): - path = normalize_storage_path(path) - - files = [] - # add array metadata file if present - array_key = _prefix_to_array_key(store, path) - if array_key in store: - files.append(os.path.join(store.path, array_key)) - - # add group metadata file if present - group_key = _prefix_to_group_key(store, path) - if group_key in store: - files.append(os.path.join(store.path, group_key)) - - dirs = [] - # add array and group folders if present - for d in [data_root + path, meta_root + path]: - dir_path = os.path.join(store.path, d) - if os.path.exists(dir_path): - dirs.append(dir_path) - return files, dirs - - -class FSStoreV3(FSStore, StoreV3): - # FSStoreV3 doesn't use this (FSStore uses it within _normalize_key) - _META_KEYS = () - - def __setitem__(self, key, value): - self._validate_key(key) - super().__setitem__(key, value) - - def _default_key_separator(self): - if self.key_separator is None: - self.key_separator = "/" - - def list(self): - return list(self.keys()) - - def _normalize_key(self, key): - key = normalize_storage_path(key).lstrip("/") - return key.lower() if self.normalize_keys else key - - def getsize(self, path=None): - size = 0 - if path is None or path == "": - # size of both the data and meta subdirs - dirs = [] - for d in ["data/root", "meta/root"]: - dir_path = os.path.join(self.path, d) - if os.path.exists(dir_path): - dirs.append(dir_path) - elif path in self: - # access individual element by full path - return buffer_size(self[path]) - else: - files, dirs = _get_files_and_dirs_from_path(self, path) - for file in files: - size += os.path.getsize(file) - for d in dirs: - size += self.fs.du(d, total=True, maxdepth=None) - return size - - def setitems(self, values): - if self.mode == "r": - raise ReadOnlyError() - values = {self._normalize_key(key): val for key, val in values.items()} - - # initialize the /data/root/... folder corresponding to the array! - # Note: tests.test_core_v3.TestArrayWithFSStoreV3PartialRead fails - # without this explicit creation of directories - subdirectories = set(os.path.dirname(v) for v in values.keys()) - for subdirectory in subdirectories: - data_dir = os.path.join(self.path, subdirectory) - if not self.fs.exists(data_dir): - self.fs.mkdir(data_dir) - - self.map.setitems(values) - - def rmdir(self, path=None): - if self.mode == "r": - raise ReadOnlyError() - if path: - for base in [meta_root, data_root]: - store_path = self.dir_path(base + path) - if self.fs.isdir(store_path): - self.fs.rm(store_path, recursive=True) - - # remove any associated metadata files - sfx = _get_metadata_suffix(self) - meta_dir = (meta_root + path).rstrip("/") - array_meta_file = meta_dir + ".array" + sfx - self.pop(array_meta_file, None) - group_meta_file = meta_dir + ".group" + sfx - self.pop(group_meta_file, None) - else: - store_path = self.dir_path(path) - if self.fs.isdir(store_path): - self.fs.rm(store_path, recursive=True) - - @property - def supports_efficient_get_partial_values(self): - return True - - def get_partial_values(self, key_ranges): - """Get multiple partial values. - key_ranges can be an iterable of key, range pairs, - where a range specifies two integers range_start and range_length - as a tuple, (range_start, range_length). - range_length may be None to indicate to read until the end. - range_start may be negative to start reading range_start bytes - from the end of the file. - A key may occur multiple times with different ranges. - Inserts None for missing keys into the returned list.""" - results = [] - for key, (range_start, range_length) in key_ranges: - key = self._normalize_key(key) - path = self.dir_path(key) - try: - if range_start is None or range_length is None: - end = None - else: - end = range_start + range_length - result = self.fs.cat_file(path, start=range_start, end=end) - except self.map.missing_exceptions: - result = None - results.append(result) - return results - - -class MemoryStoreV3(MemoryStore, StoreV3): - def __init__(self, root=None, cls=dict, dimension_separator=None): - if root is None: - self.root = cls() - else: - self.root = root - self.cls = cls - self.write_mutex = Lock() - self._dimension_separator = dimension_separator # TODO: modify for v3? - - def __eq__(self, other): - return ( - isinstance(other, MemoryStoreV3) and self.root == other.root and self.cls == other.cls - ) - - def __setitem__(self, key, value): - self._validate_key(key) - super().__setitem__(key, value) - - def list(self): - return list(self.keys()) - - def getsize(self, path: Path = None): - return _getsize(self, path) - - def rename(self, src_path: Path, dst_path: Path): - src_path = normalize_storage_path(src_path) - dst_path = normalize_storage_path(dst_path) - - any_renamed = False - for base in [meta_root, data_root]: - if self.list_prefix(base + src_path): - src_parent, src_key = self._get_parent(base + src_path) - dst_parent, dst_key = self._require_parent(base + dst_path) - - if src_key in src_parent: - dst_parent[dst_key] = src_parent.pop(src_key) - - if base == meta_root: - # check for and move corresponding metadata - sfx = _get_metadata_suffix(self) - src_meta = src_key + ".array" + sfx - if src_meta in src_parent: - dst_meta = dst_key + ".array" + sfx - dst_parent[dst_meta] = src_parent.pop(src_meta) - src_meta = src_key + ".group" + sfx - if src_meta in src_parent: - dst_meta = dst_key + ".group" + sfx - dst_parent[dst_meta] = src_parent.pop(src_meta) - any_renamed = True - any_renamed = _rename_metadata_v3(self, src_path, dst_path) or any_renamed - if not any_renamed: - raise ValueError(f"no item {src_path} found to rename") - - def rmdir(self, path: Path = None): - path = normalize_storage_path(path) - if path: - for base in [meta_root, data_root]: - try: - parent, key = self._get_parent(base + path) - value = parent[key] - except KeyError: - continue - else: - if isinstance(value, self.cls): - del parent[key] - - # remove any associated metadata files - sfx = _get_metadata_suffix(self) - meta_dir = (meta_root + path).rstrip("/") - array_meta_file = meta_dir + ".array" + sfx - self.pop(array_meta_file, None) - group_meta_file = meta_dir + ".group" + sfx - self.pop(group_meta_file, None) - else: - # clear out root - self.root = self.cls() - - -MemoryStoreV3.__doc__ = MemoryStore.__doc__ - - -class DirectoryStoreV3(DirectoryStore, StoreV3): - def list(self): - return list(self.keys()) - - def __eq__(self, other): - return isinstance(other, DirectoryStoreV3) and self.path == other.path - - def __setitem__(self, key, value): - self._validate_key(key) - super().__setitem__(key, value) - - def getsize(self, path: Path = None): - return _getsize(self, path) - - def rename(self, src_path, dst_path, metadata_key_suffix=".json"): - store_src_path = normalize_storage_path(src_path) - store_dst_path = normalize_storage_path(dst_path) - - dir_path = self.path - any_existed = False - for root_prefix in ["meta", "data"]: - src_path = os.path.join(dir_path, root_prefix, "root", store_src_path) - if os.path.exists(src_path): - any_existed = True - dst_path = os.path.join(dir_path, root_prefix, "root", store_dst_path) - os.renames(src_path, dst_path) - - for suffix in [".array" + metadata_key_suffix, ".group" + metadata_key_suffix]: - src_meta = os.path.join(dir_path, "meta", "root", store_src_path + suffix) - if os.path.exists(src_meta): - any_existed = True - dst_meta = os.path.join(dir_path, "meta", "root", store_dst_path + suffix) - dst_dir = os.path.dirname(dst_meta) - if not os.path.exists(dst_dir): - os.makedirs(dst_dir) - os.rename(src_meta, dst_meta) - if not any_existed: - raise FileNotFoundError("nothing found at src_path") - - def rmdir(self, path=None): - store_path = normalize_storage_path(path) - dir_path = self.path - if store_path: - for base in [meta_root, data_root]: - dir_path = os.path.join(dir_path, base + store_path) - if os.path.isdir(dir_path): - shutil.rmtree(dir_path) - - # remove any associated metadata files - sfx = _get_metadata_suffix(self) - meta_dir = (meta_root + path).rstrip("/") - array_meta_file = meta_dir + ".array" + sfx - self.pop(array_meta_file, None) - group_meta_file = meta_dir + ".group" + sfx - self.pop(group_meta_file, None) - - elif os.path.isdir(dir_path): - shutil.rmtree(dir_path) - - -DirectoryStoreV3.__doc__ = DirectoryStore.__doc__ - - -class ZipStoreV3(ZipStore, StoreV3): - def list(self): - return list(self.keys()) - - def __eq__(self, other): - return ( - isinstance(other, ZipStore) - and self.path == other.path - and self.compression == other.compression - and self.allowZip64 == other.allowZip64 - ) - - def __setitem__(self, key, value): - self._validate_key(key) - super().__setitem__(key, value) - - def getsize(self, path=None): - path = normalize_storage_path(path) - with self.mutex: - children = self.list_prefix(data_root + path) - children += self.list_prefix(meta_root + path) - print(f"path={path}, children={children}") - if children: - size = 0 - for name in children: - info = self.zf.getinfo(name) - size += info.compress_size - return size - elif path in self: - info = self.zf.getinfo(path) - return info.compress_size - else: - return 0 - - -ZipStoreV3.__doc__ = ZipStore.__doc__ - - -class RedisStoreV3(RmdirV3, RedisStore, StoreV3): - def list(self): - return list(self.keys()) - - def __setitem__(self, key, value): - self._validate_key(key) - super().__setitem__(key, value) - - -RedisStoreV3.__doc__ = RedisStore.__doc__ - - -class MongoDBStoreV3(RmdirV3, MongoDBStore, StoreV3): - def list(self): - return list(self.keys()) - - def __setitem__(self, key, value): - self._validate_key(key) - super().__setitem__(key, value) - - -MongoDBStoreV3.__doc__ = MongoDBStore.__doc__ - - -class DBMStoreV3(RmdirV3, DBMStore, StoreV3): - def list(self): - return list(self.keys()) - - def __setitem__(self, key, value): - self._validate_key(key) - super().__setitem__(key, value) - - -DBMStoreV3.__doc__ = DBMStore.__doc__ - - -class LMDBStoreV3(RmdirV3, LMDBStore, StoreV3): - def list(self): - return list(self.keys()) - - def __setitem__(self, key, value): - self._validate_key(key) - super().__setitem__(key, value) - - -LMDBStoreV3.__doc__ = LMDBStore.__doc__ - - -class SQLiteStoreV3(SQLiteStore, StoreV3): - def list(self): - return list(self.keys()) - - def getsize(self, path=None): - # TODO: why does the query below not work in this case? - # For now fall back to the default _getsize implementation - # size = 0 - # for _path in [data_root + path, meta_root + path]: - # c = self.cursor.execute( - # ''' - # SELECT COALESCE(SUM(LENGTH(v)), 0) FROM zarr - # WHERE k LIKE (? || "%") AND - # 0 == INSTR(LTRIM(SUBSTR(k, LENGTH(?) + 1), "/"), "/") - # ''', - # (_path, _path) - # ) - # for item_size, in c: - # size += item_size - # return size - - # fallback to default implementation for now - return _getsize(self, path) - - def __setitem__(self, key, value): - self._validate_key(key) - super().__setitem__(key, value) - - def rmdir(self, path=None): - path = normalize_storage_path(path) - if path: - for base in [meta_root, data_root]: - with self.lock: - self.cursor.execute('DELETE FROM zarr WHERE k LIKE (? || "/%")', (base + path,)) - # remove any associated metadata files - sfx = _get_metadata_suffix(self) - meta_dir = (meta_root + path).rstrip("/") - array_meta_file = meta_dir + ".array" + sfx - self.pop(array_meta_file, None) - group_meta_file = meta_dir + ".group" + sfx - self.pop(group_meta_file, None) - else: - self.clear() - - -SQLiteStoreV3.__doc__ = SQLiteStore.__doc__ - - -class LRUStoreCacheV3(RmdirV3, LRUStoreCache, StoreV3): - def __init__(self, store, max_size: int): - self._store = StoreV3._ensure_store(store) - self._max_size = max_size - self._current_size = 0 - self._keys_cache = None - self._contains_cache = {} - self._listdir_cache: Dict[Path, Any] = dict() - self._values_cache: Dict[Path, Any] = OrderedDict() - self._mutex = Lock() - self.hits = self.misses = 0 - - def list(self): - return list(self.keys()) - - def __setitem__(self, key, value): - self._validate_key(key) - super().__setitem__(key, value) - - -LRUStoreCacheV3.__doc__ = LRUStoreCache.__doc__ - - -class ConsolidatedMetadataStoreV3(ConsolidatedMetadataStore, StoreV3): - """A layer over other storage, where the metadata has been consolidated into - a single key. - - The purpose of this class, is to be able to get all of the metadata for - a given array in a single read operation from the underlying storage. - See :func:`zarr.convenience.consolidate_metadata` for how to create this - single metadata key. - - This class loads from the one key, and stores the data in a dict, so that - accessing the keys no longer requires operations on the backend store. - - This class is read-only, and attempts to change the array metadata will - fail, but changing the data is possible. If the backend storage is changed - directly, then the metadata stored here could become obsolete, and - :func:`zarr.convenience.consolidate_metadata` should be called again and the class - re-invoked. The use case is for write once, read many times. - - .. note:: This is an experimental feature. - - Parameters - ---------- - store: Store - Containing the zarr array. - metadata_key: str - The target in the store where all of the metadata are stored. We - assume JSON encoding. - - See Also - -------- - zarr.convenience.consolidate_metadata, zarr.convenience.open_consolidated - - """ - - def __init__(self, store: StoreLike, metadata_key=meta_root + "consolidated/.zmetadata"): - self.store = StoreV3._ensure_store(store) - - # retrieve consolidated metadata - meta = json_loads(self.store[metadata_key]) - - # check format of consolidated metadata - consolidated_format = meta.get("zarr_consolidated_format", None) - if consolidated_format != 1: - raise MetadataError( - "unsupported zarr consolidated metadata format: %s" % consolidated_format - ) - - # decode metadata - self.meta_store: Store = KVStoreV3(meta["metadata"]) - - def rmdir(self, key): - raise ReadOnlyError() - - -def _normalize_store_arg_v3(store: Any, storage_options=None, mode="r") -> BaseStore: - # default to v2 store for backward compatibility - zarr_version = getattr(store, "_store_version", 3) - if zarr_version != 3: - raise ValueError("store must be a version 3 store") - if store is None: - store = KVStoreV3(dict()) - # add default zarr.json metadata - store["zarr.json"] = store._metadata_class.encode_hierarchy_metadata(None) - return store - if isinstance(store, os.PathLike): - store = os.fspath(store) - if FSStore._fsspec_installed(): - import fsspec - - if isinstance(store, fsspec.FSMap): - return FSStoreV3( - store.root, - fs=store.fs, - mode=mode, - check=store.check, - create=store.create, - missing_exceptions=store.missing_exceptions, - **(storage_options or {}), - ) - if isinstance(store, str): - if "://" in store or "::" in store: - store = FSStoreV3(store, mode=mode, **(storage_options or {})) - elif storage_options: - raise ValueError("storage_options passed with non-fsspec path") - elif store.endswith(".zip"): - store = ZipStoreV3(store, mode=mode) - elif store.endswith(".n5"): - raise NotImplementedError("N5Store not yet implemented for V3") - # return N5StoreV3(store) - else: - store = DirectoryStoreV3(store) - else: - store = StoreV3._ensure_store(store) - - if "zarr.json" not in store: - # add default zarr.json metadata - store["zarr.json"] = store._metadata_class.encode_hierarchy_metadata(None) - return store diff --git a/src/zarr/_storage/v3_storage_transformers.py b/src/zarr/_storage/v3_storage_transformers.py deleted file mode 100644 index cb11cea52..000000000 --- a/src/zarr/_storage/v3_storage_transformers.py +++ /dev/null @@ -1,382 +0,0 @@ -import functools -import itertools -import os -from typing import NamedTuple, Tuple, Optional, Union, Iterator - -from numcodecs.compat import ensure_bytes -import numpy as np - -from zarr._storage.store import StorageTransformer, StoreV3, _rmdir_from_keys_v3 -from zarr.util import normalize_storage_path - - -MAX_UINT_64 = 2**64 - 1 - - -v3_sharding_available = os.environ.get("ZARR_V3_SHARDING", "0").lower() not in ["0", "false"] - - -def assert_zarr_v3_sharding_available(): - if not v3_sharding_available: - raise NotImplementedError( - "Using V3 sharding is experimental and not yet finalized! To enable support, set:\n" - "ZARR_V3_SHARDING=1" - ) # pragma: no cover - - -class _ShardIndex(NamedTuple): - store: "ShardingStorageTransformer" - # dtype uint64, shape (chunks_per_shard_0, chunks_per_shard_1, ..., 2) - offsets_and_lengths: np.ndarray - - def __localize_chunk__(self, chunk: Tuple[int, ...]) -> Tuple[int, ...]: - return tuple( - chunk_i % shard_i for chunk_i, shard_i in zip(chunk, self.store.chunks_per_shard) - ) - - def is_all_empty(self) -> bool: - return np.array_equiv(self.offsets_and_lengths, MAX_UINT_64) - - def get_chunk_slice(self, chunk: Tuple[int, ...]) -> Optional[slice]: - localized_chunk = self.__localize_chunk__(chunk) - chunk_start, chunk_len = self.offsets_and_lengths[localized_chunk] - if (chunk_start, chunk_len) == (MAX_UINT_64, MAX_UINT_64): - return None - else: - return slice(int(chunk_start), int(chunk_start + chunk_len)) - - def set_chunk_slice(self, chunk: Tuple[int, ...], chunk_slice: Optional[slice]) -> None: - localized_chunk = self.__localize_chunk__(chunk) - if chunk_slice is None: - self.offsets_and_lengths[localized_chunk] = (MAX_UINT_64, MAX_UINT_64) - else: - self.offsets_and_lengths[localized_chunk] = ( - chunk_slice.start, - chunk_slice.stop - chunk_slice.start, - ) - - def to_bytes(self) -> bytes: - return self.offsets_and_lengths.tobytes(order="C") - - @classmethod - def from_bytes( - cls, buffer: Union[bytes, bytearray], store: "ShardingStorageTransformer" - ) -> "_ShardIndex": - try: - return cls( - store=store, - offsets_and_lengths=np.frombuffer(bytearray(buffer), dtype=" None: - super().__init__(_type) - assert test_value == self.TEST_CONSTANT - self.test_value = test_value - - -class ShardingStorageTransformer(StorageTransformer): # lgtm[py/missing-equals] - """Implements sharding as a storage transformer, as described in the spec: - https://zarr-specs.readthedocs.io/en/latest/extensions/storage-transformers/sharding/v1.0.html - https://purl.org/zarr/spec/storage_transformers/sharding/1.0 - """ - - extension_uri = "https://purl.org/zarr/spec/storage_transformers/sharding/1.0" - valid_types = ["indexed"] - - def __init__(self, _type, chunks_per_shard) -> None: - assert_zarr_v3_sharding_available() - super().__init__(_type) - if isinstance(chunks_per_shard, int): - chunks_per_shard = (chunks_per_shard,) - else: - chunks_per_shard = tuple(int(i) for i in chunks_per_shard) - if chunks_per_shard == (): - chunks_per_shard = (1,) - self.chunks_per_shard = chunks_per_shard - self._num_chunks_per_shard = functools.reduce(lambda x, y: x * y, chunks_per_shard, 1) - self._dimension_separator = None - self._data_key_prefix = None - - def _copy_for_array(self, array, inner_store): - transformer_copy = super()._copy_for_array(array, inner_store) - transformer_copy._dimension_separator = array._dimension_separator - transformer_copy._data_key_prefix = array._data_key_prefix - if len(array._shape) > len(self.chunks_per_shard): - # The array shape might be longer when initialized with subdtypes. - # subdtypes dimensions come last, therefore padding chunks_per_shard - # with ones, effectively disabling sharding on the unlisted dimensions. - transformer_copy.chunks_per_shard += (1,) * ( - len(array._shape) - len(self.chunks_per_shard) - ) - return transformer_copy - - @property - def dimension_separator(self) -> str: - assert ( - self._dimension_separator is not None - ), "dimension_separator is not initialized, first get a copy via _copy_for_array." - return self._dimension_separator - - def _is_data_key(self, key: str) -> bool: - assert ( - self._data_key_prefix is not None - ), "data_key_prefix is not initialized, first get a copy via _copy_for_array." - return key.startswith(self._data_key_prefix) - - def _key_to_shard(self, chunk_key: str) -> Tuple[str, Tuple[int, ...]]: - prefix, _, chunk_string = chunk_key.rpartition("c") - chunk_subkeys = ( - tuple(map(int, chunk_string.split(self.dimension_separator))) if chunk_string else (0,) - ) - shard_key_tuple = ( - subkey // shard_i for subkey, shard_i in zip(chunk_subkeys, self.chunks_per_shard) - ) - shard_key = prefix + "c" + self.dimension_separator.join(map(str, shard_key_tuple)) - return shard_key, chunk_subkeys - - def _get_index_from_store(self, shard_key: str) -> _ShardIndex: - # At the end of each shard 2*64bit per chunk for offset and length define the index: - index_bytes = self.inner_store.get_partial_values( - [(shard_key, (-16 * self._num_chunks_per_shard, None))] - )[0] - if index_bytes is None: - raise KeyError(shard_key) - return _ShardIndex.from_bytes( - index_bytes, - self, - ) - - def _get_index_from_buffer(self, buffer: Union[bytes, bytearray]) -> _ShardIndex: - # At the end of each shard 2*64bit per chunk for offset and length define the index: - return _ShardIndex.from_bytes(buffer[-16 * self._num_chunks_per_shard :], self) - - def _get_chunks_in_shard(self, shard_key: str) -> Iterator[Tuple[int, ...]]: - _, _, chunk_string = shard_key.rpartition("c") - shard_key_tuple = ( - tuple(map(int, chunk_string.split(self.dimension_separator))) if chunk_string else (0,) - ) - for chunk_offset in itertools.product(*(range(i) for i in self.chunks_per_shard)): - yield tuple( - shard_key_i * shards_i + offset_i - for shard_key_i, offset_i, shards_i in zip( - shard_key_tuple, chunk_offset, self.chunks_per_shard - ) - ) - - def __getitem__(self, key): - if self._is_data_key(key): - if self.supports_efficient_get_partial_values: - # Use the partial implementation, which fetches the index separately - value = self.get_partial_values([(key, (0, None))])[0] - if value is None: - raise KeyError(key) - else: - return value - shard_key, chunk_subkey = self._key_to_shard(key) - try: - full_shard_value = self.inner_store[shard_key] - except KeyError: - raise KeyError(key) - index = self._get_index_from_buffer(full_shard_value) - chunk_slice = index.get_chunk_slice(chunk_subkey) - if chunk_slice is not None: - return full_shard_value[chunk_slice] - else: - raise KeyError(key) - else: - return self.inner_store.__getitem__(key) - - def __setitem__(self, key, value): - value = ensure_bytes(value) - if self._is_data_key(key): - shard_key, chunk_subkey = self._key_to_shard(key) - chunks_to_read = set(self._get_chunks_in_shard(shard_key)) - chunks_to_read.remove(chunk_subkey) - new_content = {chunk_subkey: value} - try: - if self.supports_efficient_get_partial_values: - index = self._get_index_from_store(shard_key) - full_shard_value = None - else: - full_shard_value = self.inner_store[shard_key] - index = self._get_index_from_buffer(full_shard_value) - except KeyError: - index = _ShardIndex.create_empty(self) - else: - chunk_slices = [ - (chunk_to_read, index.get_chunk_slice(chunk_to_read)) - for chunk_to_read in chunks_to_read - ] - valid_chunk_slices = [ - (chunk_to_read, chunk_slice) - for chunk_to_read, chunk_slice in chunk_slices - if chunk_slice is not None - ] - # use get_partial_values if less than half of the available chunks must be read: - # (This can be changed when set_partial_values can be used efficiently.) - use_partial_get = ( - self.supports_efficient_get_partial_values - and len(valid_chunk_slices) < len(chunk_slices) / 2 - ) - - if use_partial_get: - chunk_values = self.inner_store.get_partial_values( - [ - ( - shard_key, - ( - chunk_slice.start, - chunk_slice.stop - chunk_slice.start, - ), - ) - for _, chunk_slice in valid_chunk_slices - ] - ) - for chunk_value, (chunk_to_read, _) in zip(chunk_values, valid_chunk_slices): - new_content[chunk_to_read] = chunk_value - else: - if full_shard_value is None: - full_shard_value = self.inner_store[shard_key] - for chunk_to_read, chunk_slice in valid_chunk_slices: - if chunk_slice is not None: - new_content[chunk_to_read] = full_shard_value[chunk_slice] - - shard_content = b"" - for chunk_subkey, chunk_content in new_content.items(): - chunk_slice = slice(len(shard_content), len(shard_content) + len(chunk_content)) - index.set_chunk_slice(chunk_subkey, chunk_slice) - shard_content += chunk_content - # Appending the index at the end of the shard: - shard_content += index.to_bytes() - self.inner_store[shard_key] = shard_content - else: # pragma: no cover - self.inner_store[key] = value - - def __delitem__(self, key): - if self._is_data_key(key): - shard_key, chunk_subkey = self._key_to_shard(key) - try: - index = self._get_index_from_store(shard_key) - except KeyError: - raise KeyError(key) - - index.set_chunk_slice(chunk_subkey, None) - - if index.is_all_empty(): - del self.inner_store[shard_key] - else: - index_bytes = index.to_bytes() - self.inner_store.set_partial_values([(shard_key, -len(index_bytes), index_bytes)]) - else: # pragma: no cover - del self.inner_store[key] - - def _shard_key_to_original_keys(self, key: str) -> Iterator[str]: - if self._is_data_key(key): - index = self._get_index_from_store(key) - prefix, _, _ = key.rpartition("c") - for chunk_tuple in self._get_chunks_in_shard(key): - if index.get_chunk_slice(chunk_tuple) is not None: - yield prefix + "c" + self.dimension_separator.join(map(str, chunk_tuple)) - else: - yield key - - def __iter__(self) -> Iterator[str]: - for key in self.inner_store: - yield from self._shard_key_to_original_keys(key) - - def __len__(self): - return sum(1 for _ in self.keys()) - - def get_partial_values(self, key_ranges): - if self.supports_efficient_get_partial_values: - transformed_key_ranges = [] - cached_indices = {} - none_indices = [] - for i, (key, range_) in enumerate(key_ranges): - if self._is_data_key(key): - shard_key, chunk_subkey = self._key_to_shard(key) - try: - index = cached_indices[shard_key] - except KeyError: - try: - index = self._get_index_from_store(shard_key) - except KeyError: - none_indices.append(i) - continue - cached_indices[shard_key] = index - chunk_slice = index.get_chunk_slice(chunk_subkey) - if chunk_slice is None: - none_indices.append(i) - continue - range_start, range_length = range_ - if range_length is None: - range_length = chunk_slice.stop - chunk_slice.start - transformed_key_ranges.append( - (shard_key, (range_start + chunk_slice.start, range_length)) - ) - else: # pragma: no cover - transformed_key_ranges.append((key, range_)) - values = self.inner_store.get_partial_values(transformed_key_ranges) - for i in none_indices: - values.insert(i, None) - return values - else: - return StoreV3.get_partial_values(self, key_ranges) - - def supports_efficient_set_partial_values(self): - return False - - def set_partial_values(self, key_start_values): - # This does not yet implement efficient set_partial_values - StoreV3.set_partial_values(self, key_start_values) - - def rename(self, src_path: str, dst_path: str) -> None: - StoreV3.rename(self, src_path, dst_path) # type: ignore[arg-type] - - def list_prefix(self, prefix): - return StoreV3.list_prefix(self, prefix) - - def erase_prefix(self, prefix): - if self._is_data_key(prefix): - StoreV3.erase_prefix(self, prefix) - else: - self.inner_store.erase_prefix(prefix) - - def rmdir(self, path=None): - path = normalize_storage_path(path) - _rmdir_from_keys_v3(self, path) - - def __contains__(self, key): - if self._is_data_key(key): - shard_key, chunk_subkeys = self._key_to_shard(key) - try: - index = self._get_index_from_store(shard_key) - except KeyError: - return False - chunk_slice = index.get_chunk_slice(chunk_subkeys) - return chunk_slice is not None - else: - return self._inner_store.__contains__(key) diff --git a/src/zarr/attrs.py b/src/zarr/attrs.py index e589bc902..65f0423ec 100644 --- a/src/zarr/attrs.py +++ b/src/zarr/attrs.py @@ -2,7 +2,7 @@ import warnings from collections.abc import MutableMapping -from zarr._storage.store import Store, StoreV3 +from zarr._storage.store import Store from zarr.util import json_dumps @@ -27,8 +27,7 @@ class Attributes(MutableMapping): """ def __init__(self, store, key=".zattrs", read_only=False, cache=True, synchronizer=None): - self._version = getattr(store, "_store_version", 2) - _Store = Store if self._version == 2 else StoreV3 + _Store = Store self.store = _Store._ensure_store(store) self.key = key self.read_only = read_only @@ -41,8 +40,6 @@ def _get_nosync(self): data = self.store[self.key] except KeyError: d: dict[str, Any] = dict() - if self._version > 2: - d["attributes"] = {} else: d = self.store._metadata_class.parse_metadata(data) return d @@ -52,8 +49,6 @@ def asdict(self): if self.cache and self._cached_asdict is not None: return self._cached_asdict d = self._get_nosync() - if self._version == 3: - d = d["attributes"] if self.cache: self._cached_asdict = d return d @@ -61,10 +56,7 @@ def asdict(self): def refresh(self): """Refresh cached attributes from the store.""" if self.cache: - if self._version == 2: - self._cached_asdict = self._get_nosync() - else: - self._cached_asdict = self._get_nosync()["attributes"] + self._cached_asdict = self._get_nosync() def __contains__(self, x): return x in self.asdict() @@ -92,10 +84,8 @@ def _setitem_nosync(self, item, value): d = self._get_nosync() # set key value - if self._version == 2: - d[item] = value - else: - d["attributes"][item] = value + + d[item] = value # _put modified data self._put_nosync(d) @@ -108,10 +98,7 @@ def _delitem_nosync(self, key): d = self._get_nosync() # delete key value - if self._version == 2: - del d[key] - else: - del d["attributes"][key] + del d[key] # _put modified data self._put_nosync(d) @@ -119,13 +106,10 @@ def _delitem_nosync(self, key): def put(self, d): """Overwrite all attributes with the key/value pairs in the provided dictionary `d` in a single operation.""" - if self._version == 2: - self._write_op(self._put_nosync, d) - else: - self._write_op(self._put_nosync, dict(attributes=d)) + self._write_op(self._put_nosync, d) def _put_nosync(self, d): - d_to_check = d if self._version == 2 else d["attributes"] + d_to_check = d if not all(isinstance(item, str) for item in d_to_check): # TODO: Raise an error for non-string keys # raise TypeError("attribute keys must be strings") @@ -140,33 +124,11 @@ def _put_nosync(self, d): except TypeError as ex: # pragma: no cover raise TypeError("attribute keys can not be stringified") from ex - if self._version == 2: - d = d_to_check - else: - d["attributes"] = d_to_check + d = d_to_check - if self._version == 2: - self.store[self.key] = json_dumps(d) - if self.cache: - self._cached_asdict = d - else: - if self.key in self.store: - # Cannot write the attributes directly to JSON, but have to - # store it within the pre-existing attributes key of the v3 - # metadata. - - # Note: this changes the store.counter result in test_caching_on! - - meta = self.store._metadata_class.parse_metadata(self.store[self.key]) - if "attributes" in meta and "filters" in meta["attributes"]: - # need to preserve any existing "filters" attribute - d["attributes"]["filters"] = meta["attributes"]["filters"] - meta["attributes"] = d["attributes"] - else: - meta = d - self.store[self.key] = json_dumps(meta) - if self.cache: - self._cached_asdict = d["attributes"] + self.store[self.key] = json_dumps(d) + if self.cache: + self._cached_asdict = d # noinspection PyMethodOverriding def update(self, *args, **kwargs): @@ -178,10 +140,7 @@ def _update_nosync(self, *args, **kwargs): d = self._get_nosync() # update - if self._version == 2: - d.update(*args, **kwargs) - else: - d["attributes"].update(*args, **kwargs) + d.update(*args, **kwargs) # _put modified data self._put_nosync(d) diff --git a/src/zarr/convenience.py b/src/zarr/convenience.py index 9c0deeea4..615a019dc 100644 --- a/src/zarr/convenience.py +++ b/src/zarr/convenience.py @@ -3,8 +3,6 @@ import os import re from collections.abc import Mapping, MutableMapping - -from zarr._storage.store import data_root, meta_root, assert_zarr_v3_api_available from zarr.core import Array from zarr.creation import array as _create_array from zarr.creation import open_array @@ -14,14 +12,12 @@ from zarr.hierarchy import open_group from zarr.meta import json_dumps, json_loads from zarr.storage import ( - _get_metadata_suffix, contains_array, contains_group, normalize_store_arg, BaseStore, ConsolidatedMetadataStore, ) -from zarr._storage.v3 import ConsolidatedMetadataStoreV3 from zarr.util import TreeViewer, buffer_size, normalize_storage_path from typing import Union @@ -38,7 +34,7 @@ def _check_and_update_path(store: BaseStore, path): # noinspection PyShadowingBuiltins -def open(store: StoreLike = None, mode: str = "a", *, zarr_version=None, path=None, **kwargs): +def open(store: StoreLike = None, mode: str = "a", *, path=None, **kwargs): """Convenience function to open a group or array using file-mode-like semantics. Parameters @@ -50,10 +46,6 @@ def open(store: StoreLike = None, mode: str = "a", *, zarr_version=None, path=No read/write (must exist); 'a' means read/write (create if doesn't exist); 'w' means create (overwrite if exists); 'w-' means create (fail if exists). - zarr_version : {2, 3, None}, optional - The zarr protocol version to use. The default value of None will attempt - to infer the version from `store` if possible, otherwise it will fall - back to 2. path : str or None, optional The path within the store to open. **kwargs @@ -101,10 +93,7 @@ def open(store: StoreLike = None, mode: str = "a", *, zarr_version=None, path=No # we pass storage options explicitly, since normalize_store_arg might construct # a store if the input is a fsspec-compatible URL _store: BaseStore = normalize_store_arg( - store, - storage_options=kwargs.pop("storage_options", {}), - mode=mode, - zarr_version=zarr_version, + store, storage_options=kwargs.pop("storage_options", {}), mode=mode ) # path = _check_and_update_path(_store, path) path = normalize_storage_path(path) @@ -135,7 +124,7 @@ def _might_close(path): return isinstance(path, (str, os.PathLike)) -def save_array(store: StoreLike, arr, *, zarr_version=None, path=None, **kwargs): +def save_array(store: StoreLike, arr, *, path=None, **kwargs): """Convenience function to save a NumPy array to the local file system, following a similar API to the NumPy save() function. @@ -145,10 +134,6 @@ def save_array(store: StoreLike, arr, *, zarr_version=None, path=None, **kwargs) Store or path to directory in file system or name of zip file. arr : ndarray NumPy array with data to save. - zarr_version : {2, 3, None}, optional - The zarr protocol version to use when saving. The default value of None - will attempt to infer the version from `store` if possible, otherwise - it will fall back to 2. path : str or None, optional The path within the store where the array will be saved. kwargs @@ -173,19 +158,17 @@ def save_array(store: StoreLike, arr, *, zarr_version=None, path=None, **kwargs) """ may_need_closing = _might_close(store) - _store: BaseStore = normalize_store_arg(store, mode="w", zarr_version=zarr_version) + _store: BaseStore = normalize_store_arg(store, mode="w") path = _check_and_update_path(_store, path) try: - _create_array( - arr, store=_store, overwrite=True, zarr_version=zarr_version, path=path, **kwargs - ) + _create_array(arr, store=_store, overwrite=True, path=path, **kwargs) finally: if may_need_closing: # needed to ensure zip file records are written _store.close() -def save_group(store: StoreLike, *args, zarr_version=None, path=None, **kwargs): +def save_group(store: StoreLike, *args, path=None, **kwargs): """Convenience function to save several NumPy arrays to the local file system, following a similar API to the NumPy savez()/savez_compressed() functions. @@ -195,10 +178,6 @@ def save_group(store: StoreLike, *args, zarr_version=None, path=None, **kwargs): Store or path to directory in file system or name of zip file. args : ndarray NumPy arrays with data to save. - zarr_version : {2, 3, None}, optional - The zarr protocol version to use when saving. The default value of None - will attempt to infer the version from `store` if possible, otherwise - it will fall back to 2. path : str or None, optional Path within the store where the group will be saved. kwargs @@ -253,22 +232,22 @@ def save_group(store: StoreLike, *args, zarr_version=None, path=None, **kwargs): raise ValueError("at least one array must be provided") # handle polymorphic store arg may_need_closing = _might_close(store) - _store: BaseStore = normalize_store_arg(store, mode="w", zarr_version=zarr_version) + _store: BaseStore = normalize_store_arg(store, mode="w") path = _check_and_update_path(_store, path) try: - grp = _create_group(_store, path=path, overwrite=True, zarr_version=zarr_version) + grp = _create_group(_store, path=path, overwrite=True) for i, arr in enumerate(args): k = "arr_{}".format(i) - grp.create_dataset(k, data=arr, overwrite=True, zarr_version=zarr_version) + grp.create_dataset(k, data=arr, overwrite=True) for k, arr in kwargs.items(): - grp.create_dataset(k, data=arr, overwrite=True, zarr_version=zarr_version) + grp.create_dataset(k, data=arr, overwrite=True) finally: if may_need_closing: # needed to ensure zip file records are written _store.close() -def save(store: StoreLike, *args, zarr_version=None, path=None, **kwargs): +def save(store: StoreLike, *args, path=None, **kwargs): """Convenience function to save an array or group of arrays to the local file system. Parameters @@ -277,10 +256,6 @@ def save(store: StoreLike, *args, zarr_version=None, path=None, **kwargs): Store or path to directory in file system or name of zip file. args : ndarray NumPy arrays with data to save. - zarr_version : {2, 3, None}, optional - The zarr protocol version to use when saving. The default value of None - will attempt to infer the version from `store` if possible, otherwise - it will fall back to 2. path : str or None, optional The path within the group where the arrays will be saved. kwargs @@ -349,9 +324,9 @@ def save(store: StoreLike, *args, zarr_version=None, path=None, **kwargs): if len(args) == 0 and len(kwargs) == 0: raise ValueError("at least one array must be provided") if len(args) == 1 and len(kwargs) == 0: - save_array(store, args[0], zarr_version=zarr_version, path=path) + save_array(store, args[0], path=path) else: - save_group(store, *args, zarr_version=zarr_version, path=path, **kwargs) + save_group(store, *args, path=path, **kwargs) class LazyLoader(Mapping): @@ -383,17 +358,13 @@ def __repr__(self): return r -def load(store: StoreLike, zarr_version=None, path=None): +def load(store: StoreLike, path=None): """Load data from an array or group into memory. Parameters ---------- store : MutableMapping or string Store or path to directory in file system or name of zip file. - zarr_version : {2, 3, None}, optional - The zarr protocol version to use when loading. The default value of - None will attempt to infer the version from `store` if possible, - otherwise it will fall back to 2. path : str or None, optional The path within the store from which to load. @@ -415,7 +386,7 @@ def load(store: StoreLike, zarr_version=None, path=None): """ # handle polymorphic store arg - _store = normalize_store_arg(store, zarr_version=zarr_version) + _store = normalize_store_arg(store) path = _check_and_update_path(_store, path) if contains_array(_store, path=path): return Array(store=_store, path=path)[...] @@ -669,9 +640,7 @@ def copy_store( raise ValueError("zarr stores must share the same protocol version") if source_store_version > 2: - nchar_root = len(meta_root) - # code below assumes len(meta_root) === len(data_root) - assert len(data_root) == nchar_root + raise NotImplementedError("This function only supports Zarr version 2.") # setup logging with _LogWriter(log) as log: @@ -682,10 +651,7 @@ def copy_store( if not source_key.startswith(source_path): continue elif source_store_version == 3: - # skip 'meta/root/' or 'data/root/' at start of source_key - if not source_key[nchar_root:].startswith(source_path): - continue - + raise NotImplementedError("This function only supports Zarr version 2.") # process excludes and includes exclude = False for prog in excludes: @@ -705,10 +671,7 @@ def copy_store( key_suffix = source_key[len(source_path) :] dest_key = dest_path + key_suffix elif source_store_version == 3: - # nchar_root is length of 'meta/root/' or 'data/root/' - key_suffix = source_key[nchar_root + len(source_path) :] - dest_key = source_key[:nchar_root] + dest_path + key_suffix - + raise NotImplementedError("This function only supports Zarr version 2.") # create a descriptive label for this operation descr = source_key if dest_key != source_key: @@ -1177,8 +1140,6 @@ def copy_all( # setup counting variables n_copied = n_skipped = n_bytes_copied = 0 - zarr_version = getattr(source, "_version", 2) - # setup logging with _LogWriter(log) as log: for k in source.keys(): @@ -1197,8 +1158,8 @@ def copy_all( n_copied += c n_skipped += s n_bytes_copied += b - if zarr_version == 2: - dest.attrs.update(**source.attrs) + + dest.attrs.update(**source.attrs) # log a final message with a summary of what happened _log_copy_summary(log, dry_run, n_copied, n_skipped, n_bytes_copied) @@ -1253,23 +1214,7 @@ def is_zarr_key(key): return key.endswith(".zarray") or key.endswith(".zgroup") or key.endswith(".zattrs") else: - assert_zarr_v3_api_available() - - sfx = _get_metadata_suffix(store) # type: ignore - - def is_zarr_key(key): - return ( - key.endswith(".array" + sfx) or key.endswith(".group" + sfx) or key == "zarr.json" - ) - - # cannot create a group without a path in v3 - # so create /meta/root/consolidated group to store the metadata - if "consolidated" not in store: - _create_group(store, path="consolidated") - if not metadata_key.startswith("meta/root/"): - metadata_key = "meta/root/consolidated/" + metadata_key - # path = 'consolidated' - + raise NotImplementedError("This function only supports Zarr version 2.") out = { "zarr_consolidated_format": 1, "metadata": {key: json_loads(store[key]) for key in store if is_zarr_key(key)}, @@ -1321,10 +1266,7 @@ def open_consolidated(store: StoreLike, metadata_key=".zmetadata", mode="r+", ** """ # normalize parameters - zarr_version = kwargs.get("zarr_version") - store = normalize_store_arg( - store, storage_options=kwargs.get("storage_options"), mode=mode, zarr_version=zarr_version - ) + store = normalize_store_arg(store, storage_options=kwargs.get("storage_options"), mode=mode) if mode not in {"r", "r+"}: raise ValueError("invalid mode, expected either 'r' or 'r+'; found {!r}".format(mode)) @@ -1332,11 +1274,7 @@ def open_consolidated(store: StoreLike, metadata_key=".zmetadata", mode="r+", ** if store._store_version == 2: ConsolidatedStoreClass = ConsolidatedMetadataStore else: - assert_zarr_v3_api_available() - ConsolidatedStoreClass = ConsolidatedMetadataStoreV3 - # default is to store within 'consolidated' group on v3 - if not metadata_key.startswith("meta/root/"): - metadata_key = "meta/root/consolidated/" + metadata_key + raise NotImplementedError("This function only supports Zarr version 2.") # setup metadata store meta_store = ConsolidatedStoreClass(store, metadata_key=metadata_key) diff --git a/src/zarr/core.py b/src/zarr/core.py index d22a9d79c..06dcb3206 100644 --- a/src/zarr/core.py +++ b/src/zarr/core.py @@ -10,7 +10,7 @@ import numpy as np from numcodecs.compat import ensure_bytes -from zarr._storage.store import _prefix_to_attrs_key, assert_zarr_v3_api_available +from zarr._storage.store import _prefix_to_attrs_key from zarr.attrs import Attributes from zarr.codecs import AsType, get_codec from zarr.context import Context @@ -36,7 +36,6 @@ pop_fields, ) from zarr.storage import ( - _get_hierarchy_metadata, _prefix_to_array_key, KVStore, getsize, @@ -45,6 +44,7 @@ ) from zarr.util import ( ConstantMap, + UncompressedPartialReadBufferV3, all_equal, InfoReporter, check_array_shape, @@ -56,7 +56,6 @@ normalize_shape, normalize_storage_path, PartialReadBuffer, - UncompressedPartialReadBufferV3, ensure_ndarray_like, ) @@ -125,21 +124,14 @@ def __init__( cache_attrs=True, partial_decompress=False, write_empty_chunks=True, - zarr_version=None, meta_array=None, ): # N.B., expect at this point store is fully initialized with all # configuration metadata fully specified and normalized - - store = normalize_store_arg(store, zarr_version=zarr_version) - if zarr_version is None: - zarr_version = store._store_version - - if zarr_version != 2: - assert_zarr_v3_api_available() + store = normalize_store_arg(store) if chunk_store is not None: - chunk_store = normalize_store_arg(chunk_store, zarr_version=zarr_version) + chunk_store = normalize_store_arg(chunk_store) self._store = store self._chunk_store = chunk_store @@ -159,12 +151,6 @@ def __init__( self._meta_array = np.empty_like(meta_array, shape=()) else: self._meta_array = np.empty(()) - self._version = zarr_version - if self._version == 3: - self._data_key_prefix = "data/root/" + self._key_prefix - self._data_path = "data/root/" + self._path - self._hierarchy_metadata = _get_hierarchy_metadata(store=self._store) - self._metadata_key_suffix = self._hierarchy_metadata["metadata_key_suffix"] # initialize metadata self._load_metadata() @@ -205,26 +191,19 @@ def _load_metadata_nosync(self): self._shape = meta["shape"] self._fill_value = meta["fill_value"] dimension_separator = meta.get("dimension_separator", None) - if self._version == 2: - self._chunks = meta["chunks"] - self._dtype = meta["dtype"] - self._order = meta["order"] - if dimension_separator is None: - try: - dimension_separator = self._store._dimension_separator - except (AttributeError, KeyError): - pass - - # Fallback for any stores which do not choose a default - if dimension_separator is None: - dimension_separator = "." - else: - self._chunks = meta["chunk_grid"]["chunk_shape"] - self._dtype = meta["data_type"] - self._order = meta["chunk_memory_layout"] - chunk_separator = meta["chunk_grid"]["separator"] + + self._chunks = meta["chunks"] + self._dtype = meta["dtype"] + self._order = meta["order"] + if dimension_separator is None: + try: + dimension_separator = self._store._dimension_separator + except (AttributeError, KeyError): + pass + + # Fallback for any stores which do not choose a default if dimension_separator is None: - dimension_separator = meta.get("dimension_separator", chunk_separator) + dimension_separator = "." self._dimension_separator = dimension_separator @@ -232,32 +211,17 @@ def _load_metadata_nosync(self): compressor = meta.get("compressor", None) if compressor is None: self._compressor = None - elif self._version == 2: - self._compressor = get_codec(compressor) else: - self._compressor = compressor + self._compressor = get_codec(compressor) # setup filters - if self._version == 2: - filters = meta.get("filters", []) - else: - # TODO: storing filters under attributes for now since the v3 - # array metadata does not have a 'filters' attribute. - filters = meta["attributes"].get("filters", []) + + filters = meta.get("filters", []) + if filters: filters = [get_codec(config) for config in filters] self._filters = filters - if self._version == 3: - storage_transformers = meta.get("storage_transformers", []) - if storage_transformers: - transformed_store = self._chunk_store or self._store - for storage_transformer in storage_transformers[::-1]: - transformed_store = storage_transformer._copy_for_array( - self, transformed_store - ) - self._transformed_chunk_store = transformed_store - def _refresh_metadata(self): if not self._cache_metadata: self._load_metadata() @@ -278,35 +242,22 @@ def _flush_metadata_nosync(self): filters_config = [f.get_config() for f in self._filters] else: filters_config = None - _compressor = compressor_config if self._version == 2 else self._compressor + _compressor = compressor_config meta = dict( shape=self._shape, compressor=_compressor, fill_value=self._fill_value, filters=filters_config, ) - if getattr(self._store, "_store_version", 2) == 2: - meta.update( - dict( - chunks=self._chunks, - dtype=self._dtype, - order=self._order, - dimension_separator=self._dimension_separator, - ) - ) - else: - meta.update( - dict( - chunk_grid=dict( - type="regular", - chunk_shape=self._chunks, - separator=self._dimension_separator, - ), - data_type=self._dtype, - chunk_memory_layout=self._order, - attributes=self.attrs.asdict(), - ) + + meta.update( + dict( + chunks=self._chunks, + dtype=self._dtype, + order=self._order, + dimension_separator=self._dimension_separator, ) + ) mkey = _prefix_to_array_key(self._store, self._key_prefix) self._store[mkey] = self._store._metadata_class.encode_array_metadata(meta) @@ -496,28 +447,11 @@ def nchunks(self): def nchunks_initialized(self): """The number of chunks that have been initialized with some data.""" - # count chunk keys - if self._version == 3: - # # key pattern for chunk keys - # prog = re.compile(r'\.'.join([r'c\d+'] * min(1, self.ndim))) - # # get chunk keys, excluding the prefix - # members = self.chunk_store.list_prefix(self._data_path) - # members = [k.split(self._data_key_prefix)[1] for k in members] - # # count the chunk keys - # return sum(1 for k in members if prog.match(k)) - - # key pattern for chunk keys - prog = re.compile(self._data_key_prefix + r"c\d+") # TODO: ndim == 0 case? - # get chunk keys, excluding the prefix - members = self.chunk_store.list_prefix(self._data_path) - # count the chunk keys - return sum(1 for k in members if prog.match(k)) - else: - # key pattern for chunk keys - prog = re.compile(r"\.".join([r"\d+"] * min(1, self.ndim))) + # key pattern for chunk keys + prog = re.compile(r"\.".join([r"\d+"] * min(1, self.ndim))) - # count chunk keys - return sum(1 for k in listdir(self.chunk_store, self._path) if prog.match(k)) + # count chunk keys + return sum(1 for k in listdir(self.chunk_store, self._path) if prog.match(k)) # backwards compatibility initialized = nchunks_initialized @@ -2044,8 +1978,6 @@ def _process_chunk( cdata = cdata.read_full() self._compressor.decode(cdata, dest) else: - if isinstance(cdata, UncompressedPartialReadBufferV3): - cdata = cdata.read_full() chunk = ensure_ndarray_like(cdata).view(self._dtype) chunk = chunk.reshape(self._chunks, order=self._order) np.copyto(dest, chunk) @@ -2065,21 +1997,13 @@ def _process_chunk( else dim for i, dim in enumerate(self.chunks) ] - if isinstance(cdata, UncompressedPartialReadBufferV3): - chunk_partial = self._decode_chunk( - cdata.read_part(start, nitems), - start=start, - nitems=nitems, - expected_shape=expected_shape, - ) - else: - cdata.read_part(start, nitems) - chunk_partial = self._decode_chunk( - cdata.buff, - start=start, - nitems=nitems, - expected_shape=expected_shape, - ) + cdata.read_part(start, nitems) + chunk_partial = self._decode_chunk( + cdata.buff, + start=start, + nitems=nitems, + expected_shape=expected_shape, + ) tmp[partial_out_selection] = chunk_partial out[out_selection] = tmp[chunk_selection] return @@ -2318,19 +2242,7 @@ def _process_for_setitem(self, ckey, chunk_selection, value, fields=None): return chunk def _chunk_key(self, chunk_coords): - if self._version == 3: - # _chunk_key() corresponds to data_key(P, i, j, ...) example in the spec - # where P = self._key_prefix, i, j, ... = chunk_coords - # e.g. c0/2/3 for 3d array with chunk index (0, 2, 3) - # https://zarr-specs.readthedocs.io/en/core-protocol-v3.0-dev/protocol/core/v3.0.html#regular-grids - return ( - "data/root/" - + self._key_prefix - + "c" - + self._dimension_separator.join(map(str, chunk_coords)) - ) - else: - return self._key_prefix + self._dimension_separator.join(map(str, chunk_coords)) + return self._key_prefix + self._dimension_separator.join(map(str, chunk_coords)) def _decode_chunk(self, cdata, start=None, nitems=None, expected_shape=None): # decompress @@ -2552,7 +2464,6 @@ def __getstate__(self): "cache_attrs": self._attrs.cache, "partial_decompress": self._partial_decompress, "write_empty_chunks": self._write_empty_chunks, - "zarr_version": self._version, "meta_array": self._meta_array, } @@ -2860,7 +2771,6 @@ def view( read_only=read_only, synchronizer=synchronizer, cache_metadata=True, - zarr_version=self._version, ) a._is_view = True diff --git a/src/zarr/creation.py b/src/zarr/creation.py index 6227f90b7..c93178c0e 100644 --- a/src/zarr/creation.py +++ b/src/zarr/creation.py @@ -4,7 +4,6 @@ import numpy as np from numcodecs.registry import codec_registry -from zarr._storage.store import DEFAULT_ZARR_VERSION from zarr.core import Array from zarr.errors import ( ArrayNotFoundError, @@ -42,9 +41,7 @@ def create( dimension_separator=None, write_empty_chunks=True, *, - zarr_version=None, meta_array=None, - storage_transformers=(), **kwargs, ): """Create an array. @@ -109,21 +106,6 @@ def create( .. versionadded:: 2.11 - storage_transformers : sequence of StorageTransformers, optional - Setting storage transformers, changes the storage structure and behaviour - of data coming from the underlying store. The transformers are applied in the - order of the given sequence. Supplying an empty sequence is the same as omitting - the argument or setting it to None. May only be set when using zarr_version 3. - - .. versionadded:: 2.13 - - zarr_version : {None, 2, 3}, optional - The zarr protocol version of the created array. If None, it will be - inferred from ``store`` or ``chunk_store`` if they are provided, - otherwise defaulting to 2. - - .. versionadded:: 2.12 - meta_array : array-like, optional An array instance to use for determining arrays to create and return to users. Use `numpy.empty(())` by default. @@ -173,12 +155,9 @@ def create( """ - if zarr_version is None and store is None: - zarr_version = getattr(chunk_store, "_store_version", DEFAULT_ZARR_VERSION) # handle polymorphic store arg - store = normalize_store_arg(store, zarr_version=zarr_version, mode="w") - zarr_version = getattr(store, "_store_version", DEFAULT_ZARR_VERSION) + store = normalize_store_arg(store, mode="w") # API compatibility with h5py compressor, fill_value = _kwargs_compat(compressor, fill_value, kwargs) @@ -196,9 +175,6 @@ def create( ) dimension_separator = normalize_dimension_separator(dimension_separator) - if zarr_version > 2 and path is None: - path = "/" - # initialize array metadata init_array( store, @@ -214,7 +190,6 @@ def create( filters=filters, object_codec=object_codec, dimension_separator=dimension_separator, - storage_transformers=storage_transformers, ) # instantiate array @@ -463,7 +438,6 @@ def open_array( partial_decompress=False, write_empty_chunks=True, *, - zarr_version=None, dimension_separator=None, meta_array=None, **kwargs, @@ -531,15 +505,10 @@ def open_array( .. versionadded:: 2.11 - zarr_version : {None, 2, 3}, optional - The zarr protocol version of the array to be opened. If None, it will - be inferred from ``store`` or ``chunk_store`` if they are provided, - otherwise defaulting to 2. dimension_separator : {None, '.', '/'}, optional Can be used to specify whether the array is in a flat ('.') or nested ('/') format. If None, the appropriate value will be read from `store` - when present. Otherwise, defaults to '.' when ``zarr_version == 2`` - and `/` otherwise. + when present. Otherwise, defaults to '.'. meta_array : array-like, optional An array instance to use for determining arrays to create and return to users. Use `numpy.empty(())` by default. @@ -579,28 +548,18 @@ def open_array( # w- or x : create, fail if exists # a : read/write if exists, create otherwise (default) - if zarr_version is None and store is None: - zarr_version = getattr(chunk_store, "_store_version", DEFAULT_ZARR_VERSION) - # handle polymorphic store arg - store = normalize_store_arg( - store, storage_options=storage_options, mode=mode, zarr_version=zarr_version - ) - zarr_version = getattr(store, "_store_version", DEFAULT_ZARR_VERSION) + store = normalize_store_arg(store, storage_options=storage_options, mode=mode) + if chunk_store is not None: - chunk_store = normalize_store_arg( - chunk_store, storage_options=storage_options, mode=mode, zarr_version=zarr_version - ) + chunk_store = normalize_store_arg(chunk_store, storage_options=storage_options, mode=mode) # respect the dimension separator specified in a store, if present if dimension_separator is None: if hasattr(store, "_dimension_separator"): dimension_separator = store._dimension_separator else: - dimension_separator = "." if zarr_version == 2 else "/" - - if zarr_version == 3 and path is None: - path = "array" # TODO: raise ValueError instead? + dimension_separator = "." path = normalize_storage_path(path) @@ -709,7 +668,6 @@ def _like_args(a, kwargs): kwargs.setdefault("compressor", a.compressor) kwargs.setdefault("order", a.order) kwargs.setdefault("filters", a.filters) - kwargs.setdefault("zarr_version", a._version) else: kwargs.setdefault("compressor", "default") kwargs.setdefault("order", "C") diff --git a/src/zarr/hierarchy.py b/src/zarr/hierarchy.py index 1c9848e64..e30d2d799 100644 --- a/src/zarr/hierarchy.py +++ b/src/zarr/hierarchy.py @@ -3,13 +3,6 @@ import numpy as np -from zarr._storage.store import ( - _get_metadata_suffix, - data_root, - meta_root, - DEFAULT_ZARR_VERSION, - assert_zarr_v3_api_available, -) from zarr.attrs import Attributes from zarr.core import Array from zarr.creation import ( @@ -31,21 +24,20 @@ ReadOnlyError, ) from zarr.storage import ( - _get_hierarchy_metadata, _prefix_to_group_key, BaseStore, MemoryStore, + group_meta_key, attrs_key, contains_array, contains_group, - group_meta_key, init_group, listdir, normalize_store_arg, rename, rmdir, ) -from zarr._storage.v3 import MemoryStoreV3 + from zarr.util import ( InfoReporter, TreeViewer, @@ -143,19 +135,12 @@ def __init__( chunk_store=None, cache_attrs=True, synchronizer=None, - zarr_version=None, *, meta_array=None, ): - store: BaseStore = _normalize_store_arg(store, zarr_version=zarr_version) - if zarr_version is None: - zarr_version = getattr(store, "_store_version", DEFAULT_ZARR_VERSION) - - if zarr_version != 2: - assert_zarr_v3_api_available() - + store: BaseStore = _normalize_store_arg(store) if chunk_store is not None: - chunk_store: BaseStore = _normalize_store_arg(chunk_store, zarr_version=zarr_version) + chunk_store: BaseStore = _normalize_store_arg(chunk_store) self._store = store self._chunk_store = chunk_store self._path = normalize_storage_path(path) @@ -169,12 +154,6 @@ def __init__( self._meta_array = np.empty_like(meta_array, shape=()) else: self._meta_array = np.empty(()) - self._version = zarr_version - if self._version == 3: - self._data_key_prefix = data_root + self._key_prefix - self._data_path = data_root + self._path - self._hierarchy_metadata = _get_hierarchy_metadata(store=self._store) - self._metadata_key_suffix = _get_metadata_suffix(store=self._store) # guard conditions if contains_array(store, path=self._path): @@ -187,25 +166,13 @@ def __init__( assert not mkey.endswith("root/.group") meta_bytes = store[mkey] except KeyError: - if self._version == 2: - raise GroupNotFoundError(path) - else: - implicit_prefix = meta_root + self._key_prefix - if self._store.list_prefix(implicit_prefix): - # implicit group does not have any metadata - self._meta = None - else: - raise GroupNotFoundError(path) + raise GroupNotFoundError(path) else: self._meta = self._store._metadata_class.decode_group_metadata(meta_bytes) # setup attributes - if self._version == 2: - akey = self._key_prefix + attrs_key - else: - # Note: mkey doesn't actually exist for implicit groups, but the - # object can still be created. - akey = mkey + akey = self._key_prefix + attrs_key + self._attrs = Attributes( store, key=akey, read_only=read_only, cache=cache_attrs, synchronizer=synchronizer ) @@ -304,35 +271,11 @@ def __iter__(self): quux """ - if getattr(self._store, "_store_version", 2) == 2: - for key in sorted(listdir(self._store, self._path)): - path = self._key_prefix + key - if contains_array(self._store, path) or contains_group(self._store, path): - yield key - else: - # TODO: Should this iterate over data folders and/or metadata - # folders and/or metadata files - - dir_path = meta_root + self._key_prefix - name_start = len(dir_path) - keys, prefixes = self._store.list_dir(dir_path) - - # yield any groups or arrays - sfx = self._metadata_key_suffix - for key in keys: - len_suffix = len(".group") + len(sfx) # same for .array - if key.endswith((".group" + sfx, ".array" + sfx)): - yield key[name_start:-len_suffix] - - # also yield any implicit groups - for prefix in prefixes: - prefix = prefix.rstrip("/") - # only implicit if there is no .group.sfx file - if prefix + ".group" + sfx not in self._store: - yield prefix[name_start:] - - # Note: omit data/root/ to avoid duplicate listings - # any group in data/root/ must has an entry in meta/root/ + + for key in sorted(listdir(self._store, self._path)): + path = self._key_prefix + key + if contains_array(self._store, path) or contains_group(self._store, path): + yield key def __len__(self): """Number of members.""" @@ -400,7 +343,6 @@ def __getstate__(self): "chunk_store": self._chunk_store, "cache_attrs": self._attrs.cache, "synchronizer": self._synchronizer, - "zarr_version": self._version, "meta_array": self._meta_array, } @@ -466,7 +408,6 @@ def __getitem__(self, item): chunk_store=self._chunk_store, synchronizer=self._synchronizer, cache_attrs=self.attrs.cache, - zarr_version=self._version, meta_array=self._meta_array, ) elif contains_group(self._store, path, explicit_only=True): @@ -477,25 +418,8 @@ def __getitem__(self, item): chunk_store=self._chunk_store, cache_attrs=self.attrs.cache, synchronizer=self._synchronizer, - zarr_version=self._version, meta_array=self._meta_array, ) - elif self._version == 3: - implicit_group = meta_root + path + "/" - # non-empty folder in the metadata path implies an implicit group - if self._store.list_prefix(implicit_group): - return Group( - self._store, - read_only=self._read_only, - path=path, - chunk_store=self._chunk_store, - cache_attrs=self.attrs.cache, - synchronizer=self._synchronizer, - zarr_version=self._version, - meta_array=self._meta_array, - ) - else: - raise KeyError(item) else: raise KeyError(item) @@ -546,29 +470,11 @@ def group_keys(self): ['bar', 'foo'] """ - if self._version == 2: - for key in sorted(listdir(self._store, self._path)): - path = self._key_prefix + key - if contains_group(self._store, path): - yield key - else: - dir_name = meta_root + self._path - group_sfx = ".group" + self._metadata_key_suffix - # The fact that we call sorted means this can't be a streaming generator. - # The keys are already in memory. - all_keys = sorted(listdir(self._store, dir_name)) - for key in all_keys: - if key.endswith(group_sfx): - key = key[: -len(group_sfx)] - if key in all_keys: - # otherwise we will double count this group - continue - path = self._key_prefix + key - if path.endswith(".array" + self._metadata_key_suffix): - # skip array keys - continue - if contains_group(self._store, path, explicit_only=False): - yield key + + for key in sorted(listdir(self._store, self._path)): + path = self._key_prefix + key + if contains_group(self._store, path): + yield key def groups(self): """Return an iterator over (name, value) pairs for groups only. @@ -587,26 +493,10 @@ def groups(self): foo """ - if self._version == 2: - for key in sorted(listdir(self._store, self._path)): - path = self._key_prefix + key - if contains_group(self._store, path, explicit_only=False): - yield ( - key, - Group( - self._store, - path=path, - read_only=self._read_only, - chunk_store=self._chunk_store, - cache_attrs=self.attrs.cache, - synchronizer=self._synchronizer, - zarr_version=self._version, - ), - ) - else: - for key in self.group_keys(): - path = self._key_prefix + key + for key in sorted(listdir(self._store, self._path)): + path = self._key_prefix + key + if contains_group(self._store, path, explicit_only=False): yield ( key, Group( @@ -616,7 +506,6 @@ def groups(self): chunk_store=self._chunk_store, cache_attrs=self.attrs.cache, synchronizer=self._synchronizer, - zarr_version=self._version, ), ) @@ -671,34 +560,14 @@ def arrays(self, recurse=False): return self._array_iter(keys_only=False, method="arrays", recurse=recurse) def _array_iter(self, keys_only, method, recurse): - if self._version == 2: - for key in sorted(listdir(self._store, self._path)): - path = self._key_prefix + key - if contains_array(self._store, path): - _key = key.rstrip("/") - yield _key if keys_only else (_key, self[key]) - elif recurse and contains_group(self._store, path): - group = self[key] - yield from getattr(group, method)(recurse=recurse) - else: - dir_name = meta_root + self._path - array_sfx = ".array" + self._metadata_key_suffix - group_sfx = ".group" + self._metadata_key_suffix - - for key in sorted(listdir(self._store, dir_name)): - if key.endswith(array_sfx): - key = key[: -len(array_sfx)] - _key = key.rstrip("/") - yield _key if keys_only else (_key, self[key]) - - path = self._key_prefix + key - assert not path.startswith("meta/") - if key.endswith(group_sfx): - # skip group metadata keys - continue - elif recurse and contains_group(self._store, path): - group = self[key] - yield from getattr(group, method)(recurse=recurse) + for key in sorted(listdir(self._store, self._path)): + path = self._key_prefix + key + if contains_array(self._store, path): + _key = key.rstrip("/") + yield _key if keys_only else (_key, self[key]) + elif recurse and contains_group(self._store, path): + group = self[key] + yield from getattr(group, method)(recurse=recurse) def visitvalues(self, func): """Run ``func`` on each object. @@ -978,7 +847,6 @@ def _create_group_nosync(self, name, overwrite=False): chunk_store=self._chunk_store, cache_attrs=self.attrs.cache, synchronizer=self._synchronizer, - zarr_version=self._version, ) def create_groups(self, *names, **kwargs): @@ -1028,7 +896,6 @@ def _require_group_nosync(self, name, overwrite=False): chunk_store=self._chunk_store, cache_attrs=self.attrs.cache, synchronizer=self._synchronizer, - zarr_version=self._version, ) def require_groups(self, *names): @@ -1340,18 +1207,10 @@ def move(self, source, dest): self._write_op(self._move_nosync, source, dest) -def _normalize_store_arg(store, *, storage_options=None, mode="r", zarr_version=None): - if zarr_version is None: - zarr_version = getattr(store, "_store_version", DEFAULT_ZARR_VERSION) - - if zarr_version != 2: - assert_zarr_v3_api_available() - +def _normalize_store_arg(store, *, storage_options=None, mode="r"): if store is None: - return MemoryStore() if zarr_version == 2 else MemoryStoreV3() - return normalize_store_arg( - store, storage_options=storage_options, mode=mode, zarr_version=zarr_version - ) + return MemoryStore() + return normalize_store_arg(store, storage_options=storage_options, mode=mode) def group( @@ -1362,7 +1221,6 @@ def group( synchronizer=None, path=None, *, - zarr_version=None, meta_array=None, ): """Create a group. @@ -1414,20 +1272,11 @@ def group( """ # handle polymorphic store arg - store = _normalize_store_arg(store, zarr_version=zarr_version, mode="w") - if zarr_version is None: - zarr_version = getattr(store, "_store_version", DEFAULT_ZARR_VERSION) - - if zarr_version != 2: - assert_zarr_v3_api_available() + store = _normalize_store_arg(store, mode="w") path = normalize_storage_path(path) - requires_init = None - if zarr_version == 2: - requires_init = overwrite or not contains_group(store) - elif zarr_version == 3: - requires_init = overwrite or not contains_group(store, path) + requires_init = overwrite or not contains_group(store) if requires_init: init_group(store, overwrite=overwrite, chunk_store=chunk_store, path=path) @@ -1439,7 +1288,6 @@ def group( cache_attrs=cache_attrs, synchronizer=synchronizer, path=path, - zarr_version=zarr_version, meta_array=meta_array, ) @@ -1453,7 +1301,6 @@ def open_group( chunk_store=None, storage_options=None, *, - zarr_version=None, meta_array=None, ): """Open a group using file-mode-like semantics. @@ -1507,21 +1354,10 @@ def open_group( """ # handle polymorphic store arg - store = _normalize_store_arg( - store, storage_options=storage_options, mode=mode, zarr_version=zarr_version - ) - if zarr_version is None: - zarr_version = getattr(store, "_store_version", DEFAULT_ZARR_VERSION) - - if zarr_version != 2: - assert_zarr_v3_api_available() + store = _normalize_store_arg(store, storage_options=storage_options, mode=mode) if chunk_store is not None: - chunk_store = _normalize_store_arg( - chunk_store, storage_options=storage_options, mode=mode, zarr_version=zarr_version - ) - if getattr(chunk_store, "_store_version", DEFAULT_ZARR_VERSION) != zarr_version: - raise ValueError("zarr_version of store and chunk_store must match") # pragma: no cover + chunk_store = _normalize_store_arg(chunk_store, storage_options=storage_options, mode=mode) path = normalize_storage_path(path) @@ -1560,6 +1396,5 @@ def open_group( synchronizer=synchronizer, path=path, chunk_store=chunk_store, - zarr_version=zarr_version, meta_array=meta_array, ) diff --git a/src/zarr/meta.py b/src/zarr/meta.py index 3a5435a17..7cca228a1 100644 --- a/src/zarr/meta.py +++ b/src/zarr/meta.py @@ -2,31 +2,21 @@ import itertools from collections.abc import Mapping -import numcodecs import numpy as np -from numcodecs.abc import Codec from zarr.errors import MetadataError from zarr.util import json_dumps, json_loads -from typing import cast, Union, Any, List, Mapping as MappingType, Optional, TYPE_CHECKING +from typing import cast, Union, Any, List, Mapping as MappingType, TYPE_CHECKING if TYPE_CHECKING: # pragma: no cover - from zarr._storage.store import StorageTransformer + pass ZARR_FORMAT = 2 -ZARR_FORMAT_v3 = 3 # FLOAT_FILLS = {"NaN": np.nan, "Infinity": np.PINF, "-Infinity": np.NINF} -_default_entry_point_metadata_v3 = { - "zarr_format": "https://purl.org/zarr/spec/protocol/core/3.0", - "metadata_encoding": "https://purl.org/zarr/spec/protocol/core/3.0", - "metadata_key_suffix": ".json", - "extensions": [], -} - _v3_core_types = set("".join(d) for d in itertools.product("<>", ("u", "i", "f"), ("2", "4", "8"))) _v3_core_types = {"bool", "i1", "u1"} | _v3_core_types @@ -301,271 +291,6 @@ def encode_fill_value(cls, v: Any, dtype: np.dtype, object_codec: Any = None) -> return v -class Metadata3(Metadata2): - ZARR_FORMAT = ZARR_FORMAT_v3 - - @classmethod - def decode_dtype(cls, d, validate=True): - if isinstance(d, dict): - # extract the type from the extension info - try: - d = d["type"] - except KeyError: - raise KeyError("Extended dtype info must provide a key named 'type'.") - d = cls._decode_dtype_descr(d) - dtype = np.dtype(d) - if validate: - if dtype.str in (_v3_core_types | {"|b1", "|u1", "|i1"}): - # it is a core dtype of the v3 spec - pass - else: - # will raise if this is not a recognized extended dtype - get_extended_dtype_info(dtype) - return dtype - - @classmethod - def encode_dtype(cls, d): - s = d.str - if s == "|b1": - return "bool" - elif s == "|u1": - return "u1" - elif s == "|i1": - return "i1" - elif s in _v3_core_types: - return Metadata2.encode_dtype(d) - else: - # Check if this dtype corresponds to a supported extension to - # the v3 protocol. - return get_extended_dtype_info(np.dtype(d)) - - @classmethod - def decode_group_metadata(cls, s: Union[MappingType, bytes, str]) -> MappingType[str, Any]: - meta = cls.parse_metadata(s) - # 1 / 0 - # # check metadata format version - # zarr_format = meta.get("zarr_format", None) - # if zarr_format != cls.ZARR_FORMAT: - # raise MetadataError("unsupported zarr format: %s" % zarr_format) - - assert "attributes" in meta - # meta = dict(attributes=meta['attributes']) - return meta - - # return json.loads(s) - - @classmethod - def encode_group_metadata(cls, meta=None) -> bytes: - # The ZARR_FORMAT should not be in the group metadata, but in the - # entry point metadata instead - # meta = dict(zarr_format=cls.ZARR_FORMAT) - if meta is None: - meta = {"attributes": {}} - meta = dict(attributes=meta.get("attributes", {})) - return json_dumps(meta) - - @classmethod - def encode_hierarchy_metadata(cls, meta=None) -> bytes: - if meta is None: - meta = _default_entry_point_metadata_v3 - elif set(meta.keys()) != { - "zarr_format", - "metadata_encoding", - "metadata_key_suffix", - "extensions", - }: - raise ValueError(f"Unexpected keys in metadata. meta={meta}") - return json_dumps(meta) - - @classmethod - def decode_hierarchy_metadata(cls, s: Union[MappingType, bytes, str]) -> MappingType[str, Any]: - meta = cls.parse_metadata(s) - # check metadata format - # zarr_format = meta.get("zarr_format", None) - # if zarr_format != "https://purl.org/zarr/spec/protocol/core/3.0": - # raise MetadataError("unsupported zarr format: %s" % zarr_format) - if set(meta.keys()) != { - "zarr_format", - "metadata_encoding", - "metadata_key_suffix", - "extensions", - }: - raise ValueError(f"Unexpected keys in metadata. meta={meta}") - return meta - - @classmethod - def _encode_codec_metadata(cls, codec: Codec) -> Optional[Mapping]: - if codec is None: - return None - - # only support gzip for now - config = codec.get_config() - del config["id"] - uri = "https://purl.org/zarr/spec/codec/" - if isinstance(codec, numcodecs.GZip): - uri = uri + "gzip/1.0" - elif isinstance(codec, numcodecs.Zlib): - uri = uri + "zlib/1.0" - elif isinstance(codec, numcodecs.Blosc): - uri = uri + "blosc/1.0" - elif isinstance(codec, numcodecs.BZ2): - uri = uri + "bz2/1.0" - elif isinstance(codec, numcodecs.LZ4): - uri = uri + "lz4/1.0" - elif isinstance(codec, numcodecs.LZMA): - uri = uri + "lzma/1.0" - meta = { - "codec": uri, - "configuration": config, - } - return meta - - @classmethod - def _decode_codec_metadata(cls, meta: Optional[Mapping]) -> Optional[Codec]: - if meta is None: - return None - - uri = "https://purl.org/zarr/spec/codec/" - conf = meta["configuration"] - if meta["codec"].startswith(uri + "gzip/"): - conf["id"] = "gzip" - elif meta["codec"].startswith(uri + "zlib/"): - conf["id"] = "zlib" - elif meta["codec"].startswith(uri + "blosc/"): - conf["id"] = "blosc" - elif meta["codec"].startswith(uri + "bz2/"): - conf["id"] = "bz2" - elif meta["codec"].startswith(uri + "lz4/"): - conf["id"] = "lz4" - elif meta["codec"].startswith(uri + "lzma/"): - conf["id"] = "lzma" - else: - raise NotImplementedError - - codec = numcodecs.get_codec(conf) - - return codec - - @classmethod - def _encode_storage_transformer_metadata( - cls, storage_transformer: "StorageTransformer" - ) -> Optional[Mapping]: - return { - "extension": storage_transformer.extension_uri, - "type": storage_transformer.type, - "configuration": storage_transformer.get_config(), - } - - @classmethod - def _decode_storage_transformer_metadata(cls, meta: Mapping) -> "StorageTransformer": - from zarr._storage.v3_storage_transformers import ( - ShardingStorageTransformer, - DummyStorageTransfomer, - ) - - # This might be changed to a proper registry in the future - KNOWN_STORAGE_TRANSFORMERS = [DummyStorageTransfomer, ShardingStorageTransformer] - - conf = meta.get("configuration", {}) - extension_uri = meta["extension"] - transformer_type = meta["type"] - - for StorageTransformerCls in KNOWN_STORAGE_TRANSFORMERS: - if StorageTransformerCls.extension_uri == extension_uri: - break - else: # pragma: no cover - raise NotImplementedError - - return StorageTransformerCls.from_config(transformer_type, conf) - - @classmethod - def decode_array_metadata(cls, s: Union[MappingType, bytes, str]) -> MappingType[str, Any]: - meta = cls.parse_metadata(s) - - # extract array metadata fields - try: - dtype = cls.decode_dtype(meta["data_type"]) - if dtype.hasobject: - import numcodecs - - object_codec = numcodecs.get_codec(meta["attributes"]["filters"][0]) - else: - object_codec = None - fill_value = cls.decode_fill_value(meta["fill_value"], dtype, object_codec) - # TODO: remove dimension_separator? - - compressor = cls._decode_codec_metadata(meta.get("compressor", None)) - storage_transformers = meta.get("storage_transformers", ()) - storage_transformers = [ - cls._decode_storage_transformer_metadata(i) for i in storage_transformers - ] - extensions = meta.get("extensions", []) - meta = dict( - shape=tuple(meta["shape"]), - chunk_grid=dict( - type=meta["chunk_grid"]["type"], - chunk_shape=tuple(meta["chunk_grid"]["chunk_shape"]), - separator=meta["chunk_grid"]["separator"], - ), - data_type=dtype, - fill_value=fill_value, - chunk_memory_layout=meta["chunk_memory_layout"], - attributes=meta["attributes"], - extensions=extensions, - ) - # compressor field should be absent when there is no compression - if compressor: - meta["compressor"] = compressor - if storage_transformers: - meta["storage_transformers"] = storage_transformers - - except Exception as e: - raise MetadataError("error decoding metadata: %s" % e) - else: - return meta - - @classmethod - def encode_array_metadata(cls, meta: MappingType[str, Any]) -> bytes: - dtype = meta["data_type"] - sdshape = () - if dtype.subdtype is not None: - dtype, sdshape = dtype.subdtype - dimension_separator = meta.get("dimension_separator") - if dtype.hasobject: - import numcodecs - - object_codec = numcodecs.get_codec(meta["attributes"]["filters"][0]) - else: - object_codec = None - - compressor = cls._encode_codec_metadata(meta.get("compressor", None)) - storage_transformers = meta.get("storage_transformers", ()) - storage_transformers = [ - cls._encode_storage_transformer_metadata(i) for i in storage_transformers - ] - extensions = meta.get("extensions", []) - meta = dict( - shape=meta["shape"] + sdshape, - chunk_grid=dict( - type=meta["chunk_grid"]["type"], - chunk_shape=tuple(meta["chunk_grid"]["chunk_shape"]), - separator=meta["chunk_grid"]["separator"], - ), - data_type=cls.encode_dtype(dtype), - fill_value=encode_fill_value(meta["fill_value"], dtype, object_codec), - chunk_memory_layout=meta["chunk_memory_layout"], - attributes=meta.get("attributes", {}), - extensions=extensions, - ) - if compressor: - meta["compressor"] = compressor - if dimension_separator: - meta["dimension_separator"] = dimension_separator - if storage_transformers: - meta["storage_transformers"] = storage_transformers - return json_dumps(meta) - - parse_metadata = Metadata2.parse_metadata decode_array_metadata = Metadata2.decode_array_metadata encode_array_metadata = Metadata2.encode_array_metadata diff --git a/src/zarr/storage.py b/src/zarr/storage.py index e3a43d26c..a7bd22a6b 100644 --- a/src/zarr/storage.py +++ b/src/zarr/storage.py @@ -35,7 +35,6 @@ import uuid import time -from numcodecs.abc import Codec from numcodecs.compat import ensure_bytes, ensure_text, ensure_contiguous_ndarray_like from numcodecs.registry import codec_registry from zarr.context import Context @@ -66,21 +65,15 @@ from zarr._storage.absstore import ABSStore # noqa: F401 from zarr._storage.store import ( # noqa: F401 - _get_hierarchy_metadata, - _get_metadata_suffix, _listdir_from_keys, _rename_from_keys, - _rename_metadata_v3, _rmdir_from_keys, - _rmdir_from_keys_v3, _path_to_prefix, _prefix_to_array_key, _prefix_to_group_key, array_meta_key, attrs_key, - data_root, group_meta_key, - meta_root, DEFAULT_ZARR_VERSION, BaseStore, Store, @@ -122,28 +115,10 @@ def contains_group(store: StoreLike, path: Path = None, explicit_only=True) -> b path = normalize_storage_path(path) prefix = _path_to_prefix(path) key = _prefix_to_group_key(store, prefix) - store_version = getattr(store, "_store_version", 2) - if store_version == 2 or explicit_only: - return key in store - else: - if key in store: - return True - # for v3, need to also handle implicit groups - - sfx = _get_metadata_suffix(store) # type: ignore - implicit_prefix = key.replace(".group" + sfx, "") - if not implicit_prefix.endswith("/"): - implicit_prefix += "/" - if store.list_prefix(implicit_prefix): # type: ignore - return True - return False + return key in store -def _normalize_store_arg_v2(store: Any, storage_options=None, mode="r") -> BaseStore: - # default to v2 store for backward compatibility - zarr_version = getattr(store, "_store_version", 2) - if zarr_version != 2: - raise ValueError("store must be a version 2 store") +def normalize_store_arg(store: Any, storage_options=None, mode="r") -> BaseStore: if store is None: store = KVStore(dict()) return store @@ -180,38 +155,17 @@ def _normalize_store_arg_v2(store: Any, storage_options=None, mode="r") -> BaseS return store -def normalize_store_arg( - store: Any, storage_options=None, mode="r", *, zarr_version=None -) -> BaseStore: - if zarr_version is None: - # default to v2 store for backward compatibility - zarr_version = getattr(store, "_store_version", DEFAULT_ZARR_VERSION) - if zarr_version == 2: - normalize_store = _normalize_store_arg_v2 - elif zarr_version == 3: - from zarr._storage.v3 import _normalize_store_arg_v3 - - normalize_store = _normalize_store_arg_v3 - else: - raise ValueError("zarr_version must be either 2 or 3") - return normalize_store(store, storage_options, mode) - - def rmdir(store: StoreLike, path: Path = None): """Remove all items under the given path. If `store` provides a `rmdir` method, this will be called, otherwise will fall back to implementation via the `Store` interface.""" path = normalize_storage_path(path) - store_version = getattr(store, "_store_version", 2) if hasattr(store, "rmdir") and store.is_erasable(): # type: ignore # pass through store.rmdir(path) else: # slow version, delete one key at a time - if store_version == 2: - _rmdir_from_keys(store, path) - else: - _rmdir_from_keys_v3(store, path) # type: ignore + _rmdir_from_keys(store, path) def rename(store: Store, src_path: Path, dst_path: Path): @@ -254,21 +208,10 @@ def _getsize(store: BaseStore, path: Path = None) -> int: else: path = "" if path is None else normalize_storage_path(path) size = 0 - store_version = getattr(store, "_store_version", 2) - if store_version == 3: - if path == "": - # have to list the root folders without trailing / in this case - members = store.list_prefix(data_root.rstrip("/")) # type: ignore - members += store.list_prefix(meta_root.rstrip("/")) # type: ignore - else: - members = store.list_prefix(data_root + path) # type: ignore - members += store.list_prefix(meta_root + path) # type: ignore - # also include zarr.json? - # members += ['zarr.json'] - else: - members = listdir(store, path) - prefix = _path_to_prefix(path) - members = [prefix + k for k in members] + + members = listdir(store, path) + prefix = _path_to_prefix(path) + members = [prefix + k for k in members] for k in members: try: v = store[k] @@ -437,13 +380,8 @@ def init_array( path = normalize_storage_path(path) # ensure parent group initialized - store_version = getattr(store, "_store_version", 2) - if store_version < 3: - _require_parent_group(path, store=store, chunk_store=chunk_store, overwrite=overwrite) - if store_version == 3 and "zarr.json" not in store: - # initialize with default zarr.json entry level metadata - store["zarr.json"] = store._metadata_class.encode_hierarchy_metadata(None) # type: ignore + _require_parent_group(path, store=store, chunk_store=chunk_store, overwrite=overwrite) if not compressor: # compatibility with legacy tests using compressor=[] @@ -482,50 +420,20 @@ def _init_array_metadata( dimension_separator=None, storage_transformers=(), ): - store_version = getattr(store, "_store_version", 2) - path = normalize_storage_path(path) # guard conditions if overwrite: - if store_version == 2: - # attempt to delete any pre-existing array in store - rmdir(store, path) - if chunk_store is not None: - rmdir(chunk_store, path) - else: - group_meta_key = _prefix_to_group_key(store, _path_to_prefix(path)) - array_meta_key = _prefix_to_array_key(store, _path_to_prefix(path)) - data_prefix = data_root + _path_to_prefix(path) - - # attempt to delete any pre-existing array in store - if array_meta_key in store: - store.erase(array_meta_key) # type: ignore - if group_meta_key in store: - store.erase(group_meta_key) # type: ignore - store.erase_prefix(data_prefix) # type: ignore - if chunk_store is not None: - chunk_store.erase_prefix(data_prefix) # type: ignore - - if "/" in path: - # path is a subfolder of an existing array, remove that array - parent_path = "/".join(path.split("/")[:-1]) - sfx = _get_metadata_suffix(store) # type: ignore - array_key = meta_root + parent_path + ".array" + sfx - if array_key in store: - store.erase(array_key) # type: ignore + # attempt to delete any pre-existing array in store + rmdir(store, path) + if chunk_store is not None: + rmdir(chunk_store, path) if not overwrite: if contains_array(store, path): raise ContainsArrayError(path) - elif contains_group(store, path, explicit_only=False): + if contains_group(store, path, explicit_only=False): raise ContainsGroupError(path) - elif store_version == 3: - if "/" in path: - # cannot create an array within an existing array path - parent_path = "/".join(path.split("/")[:-1]) - if contains_array(store, parent_path): - raise ContainsArrayError(path) # normalize metadata dtype, object_codec = normalize_dtype(dtype, object_codec) @@ -536,7 +444,7 @@ def _init_array_metadata( fill_value = normalize_fill_value(fill_value, dtype) # optional array metadata - if dimension_separator is None and store_version == 2: + if dimension_separator is None: dimension_separator = getattr(store, "_dimension_separator", None) dimension_separator = normalize_dimension_separator(dimension_separator) @@ -553,16 +461,10 @@ def _init_array_metadata( # obtain compressor config compressor_config = None if compressor: - if store_version == 2: - try: - compressor_config = compressor.get_config() - except AttributeError as e: - raise BadCompressorError(compressor) from e - elif not isinstance(compressor, Codec): - raise ValueError("expected a numcodecs Codec for compressor") - # TODO: alternatively, could autoconvert str to a Codec - # e.g. 'zlib' -> numcodec.Zlib object - # compressor = numcodecs.get_codec({'id': compressor}) + try: + compressor_config = compressor.get_config() + except AttributeError as e: + raise BadCompressorError(compressor) from e # obtain filters config if filters: @@ -596,33 +498,16 @@ def _init_array_metadata( filters_config = None # type: ignore # initialize metadata - # TODO: don't store redundant dimension_separator for v3? - _compressor = compressor_config if store_version == 2 else compressor + _compressor = compressor_config meta = dict( shape=shape, compressor=_compressor, fill_value=fill_value, dimension_separator=dimension_separator, ) - if store_version < 3: - meta.update(dict(chunks=chunks, dtype=dtype, order=order, filters=filters_config)) - assert not storage_transformers - else: - if dimension_separator is None: - dimension_separator = "/" - if filters_config: - attributes = {"filters": filters_config} - else: - attributes = {} - meta.update( - dict( - chunk_grid=dict(type="regular", chunk_shape=chunks, separator=dimension_separator), - chunk_memory_layout=order, - data_type=dtype, - attributes=attributes, - storage_transformers=storage_transformers, - ) - ) + + meta.update(dict(chunks=chunks, dtype=dtype, order=order, filters=filters_config)) + assert not storage_transformers key = _prefix_to_array_key(store, _path_to_prefix(path)) if hasattr(store, "_metadata_class"): @@ -661,24 +546,11 @@ def init_group( # normalize path path = normalize_storage_path(path) - store_version = getattr(store, "_store_version", 2) - if store_version < 3: - # ensure parent group initialized - _require_parent_group(path, store=store, chunk_store=chunk_store, overwrite=overwrite) - - if store_version == 3 and "zarr.json" not in store: - # initialize with default zarr.json entry level metadata - store["zarr.json"] = store._metadata_class.encode_hierarchy_metadata(None) # type: ignore + _require_parent_group(path, store=store, chunk_store=chunk_store, overwrite=overwrite) # initialise metadata _init_group_metadata(store=store, overwrite=overwrite, path=path, chunk_store=chunk_store) - if store_version == 3: - # TODO: Should initializing a v3 group also create a corresponding - # empty folder under data/root/? I think probably not until there - # is actual data written there. - pass - def _init_group_metadata( store: StoreLike, @@ -686,50 +558,25 @@ def _init_group_metadata( path: Optional[str] = None, chunk_store: Optional[StoreLike] = None, ): - store_version = getattr(store, "_store_version", 2) path = normalize_storage_path(path) # guard conditions if overwrite: - if store_version == 2: - # attempt to delete any pre-existing items in store - rmdir(store, path) - if chunk_store is not None: - rmdir(chunk_store, path) - else: - group_meta_key = _prefix_to_group_key(store, _path_to_prefix(path)) - array_meta_key = _prefix_to_array_key(store, _path_to_prefix(path)) - data_prefix = data_root + _path_to_prefix(path) - meta_prefix = meta_root + _path_to_prefix(path) - - # attempt to delete any pre-existing array in store - if array_meta_key in store: - store.erase(array_meta_key) # type: ignore - if group_meta_key in store: - store.erase(group_meta_key) # type: ignore - store.erase_prefix(data_prefix) # type: ignore - store.erase_prefix(meta_prefix) # type: ignore - if chunk_store is not None: - chunk_store.erase_prefix(data_prefix) # type: ignore + # attempt to delete any pre-existing items in store + rmdir(store, path) + if chunk_store is not None: + rmdir(chunk_store, path) if not overwrite: if contains_array(store, path): raise ContainsArrayError(path) elif contains_group(store, path): raise ContainsGroupError(path) - elif store_version == 3 and "/" in path: - # cannot create a group overlapping with an existing array name - parent_path = "/".join(path.split("/")[:-1]) - if contains_array(store, parent_path): - raise ContainsArrayError(path) # initialize metadata # N.B., currently no metadata properties are needed, however there may # be in future - if store_version == 3: - meta = {"attributes": {}} # type: ignore - else: - meta = {} + meta: dict[str, Any] = {} key = _prefix_to_group_key(store, _path_to_prefix(path)) if hasattr(store, "_metadata_class"): store[key] = store._metadata_class.encode_group_metadata(meta) diff --git a/tests/test_attrs.py b/tests/test_attrs.py index 7e3377f66..257516384 100644 --- a/tests/test_attrs.py +++ b/tests/test_attrs.py @@ -4,34 +4,24 @@ import pytest import zarr -from zarr._storage.store import meta_root from zarr.attrs import Attributes from zarr.storage import KVStore, DirectoryStore -from zarr._storage.v3 import KVStoreV3 -from .util import CountingDict, CountingDictV3 +from .util import CountingDict from zarr.hierarchy import group -@pytest.fixture(params=[2, 3]) -def zarr_version(request): - return request.param - - -def _init_store(version): - """Use a plain dict() for v2, but KVStoreV3 otherwise.""" - if version == 2: - return dict() - return KVStoreV3(dict()) +def _init_store(): + return dict() class TestAttributes: - def init_attributes(self, store, read_only=False, cache=True, zarr_version=2): - root = ".z" if zarr_version == 2 else meta_root + def init_attributes(self, store, read_only=False, cache=True): + root = ".z" return Attributes(store, key=root + "attrs", read_only=read_only, cache=cache) - def test_storage(self, zarr_version): - store = _init_store(zarr_version) - root = ".z" if zarr_version == 2 else meta_root + def test_storage(self): + store = _init_store() + root = ".z" attrs_key = root + "attrs" a = Attributes(store=store, key=attrs_key) assert isinstance(a.store, KVStore) @@ -44,11 +34,9 @@ def test_storage(self, zarr_version): assert attrs_key in store assert isinstance(store[attrs_key], bytes) d = json.loads(str(store[attrs_key], "utf-8")) - if zarr_version == 3: - d = d["attributes"] assert dict(foo="bar", baz=42) == d - def test_utf8_encoding(self, zarr_version): + def test_utf8_encoding(self): project_root = pathlib.Path(zarr.__file__).resolve().parent.parent fixdir = project_root / "fixture" testdir = fixdir / "utf8attrs" @@ -64,9 +52,9 @@ def test_utf8_encoding(self, zarr_version): fixture = group(store=DirectoryStore(str(fixdir))) assert fixture["utf8attrs"].attrs.asdict() == dict(foo="た") - def test_get_set_del_contains(self, zarr_version): - store = _init_store(zarr_version) - a = self.init_attributes(store, zarr_version=zarr_version) + def test_get_set_del_contains(self): + store = _init_store() + a = self.init_attributes(store) assert "foo" not in a a["foo"] = "bar" a["baz"] = 42 @@ -80,9 +68,9 @@ def test_get_set_del_contains(self, zarr_version): # noinspection PyStatementEffect a["foo"] - def test_update_put(self, zarr_version): - store = _init_store(zarr_version) - a = self.init_attributes(store, zarr_version=zarr_version) + def test_update_put(self): + store = _init_store() + a = self.init_attributes(store) assert "foo" not in a assert "bar" not in a assert "baz" not in a @@ -97,9 +85,9 @@ def test_update_put(self, zarr_version): assert a["bar"] == 84 assert "baz" not in a - def test_iterators(self, zarr_version): - store = _init_store(zarr_version) - a = self.init_attributes(store, zarr_version=zarr_version) + def test_iterators(self): + store = _init_store() + a = self.init_attributes(store) assert 0 == len(a) assert set() == set(a) assert set() == set(a.keys()) @@ -115,15 +103,10 @@ def test_iterators(self, zarr_version): assert {"bar", 42} == set(a.values()) assert {("foo", "bar"), ("baz", 42)} == set(a.items()) - def test_read_only(self, zarr_version): - store = _init_store(zarr_version) - a = self.init_attributes(store, read_only=True, zarr_version=zarr_version) - if zarr_version == 2: - store[".zattrs"] = json.dumps(dict(foo="bar", baz=42)).encode("ascii") - else: - store["meta/root/attrs"] = json.dumps(dict(attributes=dict(foo="bar", baz=42))).encode( - "ascii" - ) + def test_read_only(self): + store = _init_store() + a = self.init_attributes(store, read_only=True) + store[".zattrs"] = json.dumps(dict(foo="bar", baz=42)).encode("ascii") assert a["foo"] == "bar" assert a["baz"] == 42 with pytest.raises(PermissionError): @@ -133,9 +116,9 @@ def test_read_only(self, zarr_version): with pytest.raises(PermissionError): a.update(foo="quux") - def test_key_completions(self, zarr_version): - store = _init_store(zarr_version) - a = self.init_attributes(store, zarr_version=zarr_version) + def test_key_completions(self): + store = _init_store() + a = self.init_attributes(store) d = a._ipython_key_completions_() assert "foo" not in d assert "123" not in d @@ -150,23 +133,20 @@ def test_key_completions(self, zarr_version): assert "asdf;" in d assert "baz" not in d - def test_caching_on(self, zarr_version): + def test_caching_on(self): # caching is turned on by default # setup store - store = CountingDict() if zarr_version == 2 else CountingDictV3() - attrs_key = ".zattrs" if zarr_version == 2 else "meta/root/attrs" + store = CountingDict() + attrs_key = ".zattrs" assert 0 == store.counter["__getitem__", attrs_key] assert 0 == store.counter["__setitem__", attrs_key] - if zarr_version == 2: - store[attrs_key] = json.dumps(dict(foo="xxx", bar=42)).encode("ascii") - else: - store[attrs_key] = json.dumps(dict(attributes=dict(foo="xxx", bar=42))).encode("ascii") + store[attrs_key] = json.dumps(dict(foo="xxx", bar=42)).encode("ascii") assert 0 == store.counter["__getitem__", attrs_key] assert 1 == store.counter["__setitem__", attrs_key] # setup attributes - a = self.init_attributes(store, zarr_version=zarr_version) + a = self.init_attributes(store) # test __getitem__ causes all attributes to be cached assert a["foo"] == "xxx" @@ -178,7 +158,7 @@ def test_caching_on(self, zarr_version): # test __setitem__ updates the cache a["foo"] = "yyy" - get_cnt = 2 if zarr_version == 2 else 3 + get_cnt = 2 assert get_cnt == store.counter["__getitem__", attrs_key] assert 2 == store.counter["__setitem__", attrs_key] assert a["foo"] == "yyy" @@ -187,7 +167,7 @@ def test_caching_on(self, zarr_version): # test update() updates the cache a.update(foo="zzz", bar=84) - get_cnt = 3 if zarr_version == 2 else 5 + get_cnt = 3 assert get_cnt == store.counter["__getitem__", attrs_key] assert 3 == store.counter["__setitem__", attrs_key] assert a["foo"] == "zzz" @@ -205,7 +185,7 @@ def test_caching_on(self, zarr_version): # test __delitem__ updates the cache del a["bar"] - get_cnt = 4 if zarr_version == 2 else 7 + get_cnt = 4 assert get_cnt == store.counter["__getitem__", attrs_key] assert 4 == store.counter["__setitem__", attrs_key] assert "bar" not in a @@ -213,35 +193,28 @@ def test_caching_on(self, zarr_version): assert 4 == store.counter["__setitem__", attrs_key] # test refresh() - if zarr_version == 2: - store[attrs_key] = json.dumps(dict(foo="xxx", bar=42)).encode("ascii") - else: - store[attrs_key] = json.dumps(dict(attributes=dict(foo="xxx", bar=42))).encode("ascii") + store[attrs_key] = json.dumps(dict(foo="xxx", bar=42)).encode("ascii") assert get_cnt == store.counter["__getitem__", attrs_key] a.refresh() - get_cnt = 5 if zarr_version == 2 else 8 + get_cnt = 5 assert get_cnt == store.counter["__getitem__", attrs_key] assert a["foo"] == "xxx" assert get_cnt == store.counter["__getitem__", attrs_key] assert a["bar"] == 42 assert get_cnt == store.counter["__getitem__", attrs_key] - def test_caching_off(self, zarr_version): + def test_caching_off(self): # setup store - store = CountingDict() if zarr_version == 2 else CountingDictV3() - attrs_key = ".zattrs" if zarr_version == 2 else "meta/root/attrs" + store = CountingDict() + attrs_key = ".zattrs" assert 0 == store.counter["__getitem__", attrs_key] assert 0 == store.counter["__setitem__", attrs_key] - - if zarr_version == 2: - store[attrs_key] = json.dumps(dict(foo="xxx", bar=42)).encode("ascii") - else: - store[attrs_key] = json.dumps(dict(attributes=dict(foo="xxx", bar=42))).encode("ascii") + store[attrs_key] = json.dumps(dict(foo="xxx", bar=42)).encode("ascii") assert 0 == store.counter["__getitem__", attrs_key] assert 1 == store.counter["__setitem__", attrs_key] # setup attributes - a = self.init_attributes(store, cache=False, zarr_version=zarr_version) + a = self.init_attributes(store, cache=False) # test __getitem__ assert a["foo"] == "xxx" @@ -253,38 +226,38 @@ def test_caching_off(self, zarr_version): # test __setitem__ a["foo"] = "yyy" - get_cnt = 4 if zarr_version == 2 else 5 + get_cnt = 4 assert get_cnt == store.counter["__getitem__", attrs_key] assert 2 == store.counter["__setitem__", attrs_key] assert a["foo"] == "yyy" - get_cnt = 5 if zarr_version == 2 else 6 + get_cnt = 5 assert get_cnt == store.counter["__getitem__", attrs_key] assert 2 == store.counter["__setitem__", attrs_key] # test update() a.update(foo="zzz", bar=84) - get_cnt = 6 if zarr_version == 2 else 8 + get_cnt = 6 assert get_cnt == store.counter["__getitem__", attrs_key] assert 3 == store.counter["__setitem__", attrs_key] assert a["foo"] == "zzz" assert a["bar"] == 84 - get_cnt = 8 if zarr_version == 2 else 10 + get_cnt = 8 assert get_cnt == store.counter["__getitem__", attrs_key] assert 3 == store.counter["__setitem__", attrs_key] # test __contains__ assert "foo" in a - get_cnt = 9 if zarr_version == 2 else 11 + get_cnt = 9 assert get_cnt == store.counter["__getitem__", attrs_key] assert 3 == store.counter["__setitem__", attrs_key] assert "spam" not in a - get_cnt = 10 if zarr_version == 2 else 12 + get_cnt = 10 assert get_cnt == store.counter["__getitem__", attrs_key] assert 3 == store.counter["__setitem__", attrs_key] - def test_wrong_keys(self, zarr_version): - store = _init_store(zarr_version) - a = self.init_attributes(store, zarr_version=zarr_version) + def test_wrong_keys(self): + store = _init_store() + a = self.init_attributes(store) warning_msg = "only attribute keys of type 'string' will be allowed in the future" diff --git a/tests/test_convenience.py b/tests/test_convenience.py index 7cb4db7a3..d50533e84 100644 --- a/tests/test_convenience.py +++ b/tests/test_convenience.py @@ -27,53 +27,29 @@ from zarr.storage import ( ConsolidatedMetadataStore, FSStore, - KVStore, MemoryStore, atexit_rmtree, - data_root, - meta_root, getsize, ) -from zarr._storage.store import v3_api_available -from zarr._storage.v3 import ( - ConsolidatedMetadataStoreV3, - DirectoryStoreV3, - FSStoreV3, - KVStoreV3, - MemoryStoreV3, - SQLiteStoreV3, -) -from .util import have_fsspec - -_VERSIONS = (2, 3) if v3_api_available else (2,) - - -def _init_creation_kwargs(zarr_version): - kwargs = {"zarr_version": zarr_version} - if zarr_version == 3: - kwargs["path"] = "dataset" - return kwargs -@pytest.mark.parametrize("zarr_version", _VERSIONS) -def test_open_array(path_type, zarr_version): +def test_open_array(path_type): store = tempfile.mkdtemp() atexit.register(atexit_rmtree, store) store = path_type(store) - kwargs = _init_creation_kwargs(zarr_version) # open array, create if doesn't exist - z = open(store, mode="a", shape=100, **kwargs) + z = open(store, mode="a", shape=100) assert isinstance(z, Array) assert z.shape == (100,) # open array, overwrite - z = open(store, mode="w", shape=200, **kwargs) + z = open(store, mode="w", shape=200) assert isinstance(z, Array) assert z.shape == (200,) # open array, read-only - z = open(store, mode="r", **kwargs) + z = open(store, mode="r") assert isinstance(z, Array) assert z.shape == (200,) assert z.read_only @@ -83,79 +59,46 @@ def test_open_array(path_type, zarr_version): open("doesnotexist", mode="r") -@pytest.mark.parametrize("zarr_version", _VERSIONS) -def test_open_group(path_type, zarr_version): +def test_open_group(path_type): store = tempfile.mkdtemp() atexit.register(atexit_rmtree, store) store = path_type(store) - kwargs = _init_creation_kwargs(zarr_version) # open group, create if doesn't exist - g = open(store, mode="a", **kwargs) + g = open(store, mode="a") g.create_group("foo") assert isinstance(g, Group) assert "foo" in g # open group, overwrite - g = open(store, mode="w", **kwargs) + g = open(store, mode="w") assert isinstance(g, Group) assert "foo" not in g # open group, read-only - g = open(store, mode="r", **kwargs) + g = open(store, mode="r") assert isinstance(g, Group) assert g.read_only -@pytest.mark.parametrize("zarr_version", _VERSIONS) -def test_save_errors(zarr_version): +def test_save_errors(): with pytest.raises(ValueError): # no arrays provided - save_group("data/group.zarr", zarr_version=zarr_version) + save_group("data/group.zarr") with pytest.raises(TypeError): # no array provided - save_array("data/group.zarr", zarr_version=zarr_version) + save_array("data/group.zarr") with pytest.raises(ValueError): # no arrays provided - save("data/group.zarr", zarr_version=zarr_version) - - -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -def test_zarr_v3_save_multiple_unnamed(): - x = np.ones(8) - y = np.zeros(8) - store = KVStoreV3(dict()) - # no path provided - save_group(store, x, y, path="dataset", zarr_version=3) - # names become arr_{i} for unnamed *args - assert data_root + "dataset/arr_0/c0" in store - assert data_root + "dataset/arr_1/c0" in store - assert meta_root + "dataset/arr_0.array.json" in store - assert meta_root + "dataset/arr_1.array.json" in store - - -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -def test_zarr_v3_save_errors(): - x = np.ones(8) - with pytest.raises(ValueError): - # no path provided - save_group("data/group.zr3", x, zarr_version=3) - with pytest.raises(ValueError): - # no path provided - save_array("data/group.zr3", x, zarr_version=3) - with pytest.raises(ValueError): - # no path provided - save("data/group.zr3", x, zarr_version=3) + save("data/group.zarr") -@pytest.mark.parametrize("zarr_version", _VERSIONS) -def test_lazy_loader(zarr_version): +def test_lazy_loader(): foo = np.arange(100) bar = np.arange(100, 0, -1) - store = "data/group.zarr" if zarr_version == 2 else "data/group.zr3" - kwargs = _init_creation_kwargs(zarr_version) - save(store, foo=foo, bar=bar, **kwargs) - loader = load(store, **kwargs) + store = "data/group.zarr" + save(store, foo=foo, bar=bar) + loader = load(store) assert "foo" in loader assert "bar" in loader assert "baz" not in loader @@ -166,18 +109,16 @@ def test_lazy_loader(zarr_version): assert "LazyLoader: " in repr(loader) -@pytest.mark.parametrize("zarr_version", _VERSIONS) -def test_load_array(zarr_version): +def test_load_array(): foo = np.arange(100) bar = np.arange(100, 0, -1) - store = "data/group.zarr" if zarr_version == 2 else "data/group.zr3" - kwargs = _init_creation_kwargs(zarr_version) - save(store, foo=foo, bar=bar, **kwargs) + store = "data/group.zarr" + save(store, foo=foo, bar=bar) # can also load arrays directly into a numpy array for array_name in ["foo", "bar"]: - array_path = "dataset/" + array_name if zarr_version == 3 else array_name - array = load(store, path=array_path, zarr_version=zarr_version) + array_path = array_name + array = load(store, path=array_path) assert isinstance(array, np.ndarray) if array_name == "foo": assert_array_equal(foo, array) @@ -185,10 +126,8 @@ def test_load_array(zarr_version): assert_array_equal(bar, array) -@pytest.mark.parametrize("zarr_version", _VERSIONS) -def test_tree(zarr_version): - kwargs = _init_creation_kwargs(zarr_version) - g1 = zarr.group(**kwargs) +def test_tree(): + g1 = zarr.group() g1.create_group("foo") g3 = g1.create_group("bar") g3.create_group("baz") @@ -198,16 +137,13 @@ def test_tree(zarr_version): assert str(zarr.tree(g1)) == str(g1.tree()) -@pytest.mark.parametrize("zarr_version", _VERSIONS) @pytest.mark.parametrize("stores_from_path", [False, True]) @pytest.mark.parametrize( "with_chunk_store,listable", [(False, True), (True, True), (False, False)], ids=["default-listable", "with_chunk_store-listable", "default-unlistable"], ) -def test_consolidate_metadata( - with_chunk_store, zarr_version, listable, monkeypatch, stores_from_path -): +def test_consolidate_metadata(with_chunk_store, listable, monkeypatch, stores_from_path): # setup initial data if stores_from_path: store = tempfile.mkdtemp() @@ -217,17 +153,11 @@ def test_consolidate_metadata( atexit.register(atexit_rmtree, chunk_store) else: chunk_store = None - version_kwarg = {"zarr_version": zarr_version} else: - if zarr_version == 2: - store = MemoryStore() - chunk_store = MemoryStore() if with_chunk_store else None - elif zarr_version == 3: - store = MemoryStoreV3() - chunk_store = MemoryStoreV3() if with_chunk_store else None - version_kwarg = {} - path = "dataset" if zarr_version == 3 else None - z = group(store, chunk_store=chunk_store, path=path, **version_kwarg) + store = MemoryStore() + chunk_store = MemoryStore() if with_chunk_store else None + path = None + z = group(store, chunk_store=chunk_store, path=path) # Reload the actual store implementation in case str store_to_copy = z.store @@ -248,41 +178,22 @@ def test_consolidate_metadata( else: store_class = store - if zarr_version == 3: - # error on v3 if path not provided - with pytest.raises(ValueError): - consolidate_metadata(store_class, path=None) - - with pytest.raises(ValueError): - consolidate_metadata(store_class, path="") - # perform consolidation out = consolidate_metadata(store_class, path=path) assert isinstance(out, Group) assert ["g1", "g2"] == list(out) if not stores_from_path: - if zarr_version == 2: - assert isinstance(out._store, ConsolidatedMetadataStore) - assert ".zmetadata" in store - meta_keys = [ - ".zgroup", - "g1/.zgroup", - "g2/.zgroup", - "g2/.zattrs", - "g2/arr/.zarray", - "g2/arr/.zattrs", - ] - else: - assert isinstance(out._store, ConsolidatedMetadataStoreV3) - assert "meta/root/consolidated/.zmetadata" in store - meta_keys = [ - "zarr.json", - meta_root + "dataset.group.json", - meta_root + "dataset/g1.group.json", - meta_root + "dataset/g2.group.json", - meta_root + "dataset/g2/arr.array.json", - "meta/root/consolidated.group.json", - ] + assert isinstance(out._store, ConsolidatedMetadataStore) + assert ".zmetadata" in store + meta_keys = [ + ".zgroup", + "g1/.zgroup", + "g2/.zgroup", + "g2/.zattrs", + "g2/arr/.zarray", + "g2/arr/.zattrs", + ] + for key in meta_keys: del store[key] @@ -293,11 +204,7 @@ def test_consolidate_metadata( monkeypatch.setattr(fs_memory.MemoryFileSystem, "isdir", lambda x, y: False) monkeypatch.delattr(fs_memory.MemoryFileSystem, "ls") fs = fs_memory.MemoryFileSystem() - if zarr_version == 2: - store_to_open = FSStore("", fs=fs) - else: - store_to_open = FSStoreV3("", fs=fs) - + store_to_open = FSStore("", fs=fs) # copy original store to new unlistable store store_to_open.update(store_to_copy) @@ -305,7 +212,7 @@ def test_consolidate_metadata( store_to_open = store # open consolidated - z2 = open_consolidated(store_to_open, chunk_store=chunk_store, path=path, **version_kwarg) + z2 = open_consolidated(store_to_open, chunk_store=chunk_store, path=path) assert ["g1", "g2"] == list(z2) assert "world" == z2.g2.attrs["hello"] assert 1 == z2.g2.arr.attrs["data"] @@ -320,26 +227,17 @@ def test_consolidate_metadata( if stores_from_path: # path string is note a BaseStore subclass so cannot be used to # initialize a ConsolidatedMetadataStore. - if zarr_version == 2: - with pytest.raises(ValueError): - cmd = ConsolidatedMetadataStore(store) - elif zarr_version == 3: - with pytest.raises(ValueError): - cmd = ConsolidatedMetadataStoreV3(store) + + with pytest.raises(ValueError): + cmd = ConsolidatedMetadataStore(store) else: # tests del/write on the store - if zarr_version == 2: - cmd = ConsolidatedMetadataStore(store) - with pytest.raises(PermissionError): - del cmd[".zgroup"] - with pytest.raises(PermissionError): - cmd[".zgroup"] = None - else: - cmd = ConsolidatedMetadataStoreV3(store) - with pytest.raises(PermissionError): - del cmd[meta_root + "dataset.group.json"] - with pytest.raises(PermissionError): - cmd[meta_root + "dataset.group.json"] = None + + cmd = ConsolidatedMetadataStore(store) + with pytest.raises(PermissionError): + del cmd[".zgroup"] + with pytest.raises(PermissionError): + cmd[".zgroup"] = None # test getsize on the store assert isinstance(getsize(cmd), Integral) @@ -377,7 +275,6 @@ def test_consolidate_metadata( path=path, cache_attrs=True, synchronizer=None, - **version_kwarg, ) @@ -469,7 +366,7 @@ def test_excludes_includes(self): copy_store(source, dest, excludes=excludes) assert len(dest) == 2 - root = "" if self._version == 2 else meta_root + root = "" assert root + "foo" not in dest # multiple excludes @@ -500,7 +397,7 @@ def test_dry_run(self): def test_if_exists(self): source = self.source dest = self._get_dest_store() - root = "" if self._version == 2 else meta_root + root = "" dest[root + "bar/baz"] = b"mmm" # default ('raise') @@ -530,27 +427,6 @@ def test_if_exists(self): copy_store(source, dest, if_exists="foobar") -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestCopyStoreV3(TestCopyStore): - _version = 3 - - def setUp(self): - source = KVStoreV3(dict()) - source["meta/root/foo"] = b"xxx" - source["meta/root/bar/baz"] = b"yyy" - source["meta/root/bar/qux"] = b"zzz" - self.source = source - - def _get_dest_store(self): - return KVStoreV3(dict()) - - def test_mismatched_store_versions(self): - # cannot copy between stores of mixed Zarr versions - dest = KVStore(dict()) - with pytest.raises(ValueError): - copy_store(self.source, dest) - - def check_copied_array(original, copied, without_attrs=False, expect_props=None): # setup source_h5py = original.__module__.startswith("h5py.") @@ -672,28 +548,6 @@ def test_copy_all(): assert destination_group.subgroup.attrs["info"] == "sub attrs" -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -def test_copy_all_v3(): - """ - https://github.com/zarr-developers/zarr-python/issues/269 - - copy_all used to not copy attributes as `.keys()` - - """ - original_group = zarr.group(store=MemoryStoreV3(), path="group1", overwrite=True) - original_group.create_group("subgroup") - - destination_group = zarr.group(store=MemoryStoreV3(), path="group2", overwrite=True) - - # copy from memory to directory store - copy_all( - original_group, - destination_group, - dry_run=False, - ) - assert "subgroup" in destination_group - - class TestCopy: @pytest.fixture(params=[False, True], ids=["zarr", "hdf5"]) def source(self, request, tmpdir): @@ -948,100 +802,3 @@ def test_logging(self, source, dest, tmpdir): # bad option with pytest.raises(TypeError): copy(source["foo"], dest, dry_run=True, log=True) - - -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestCopyV3(TestCopy): - @pytest.fixture(params=["zarr", "hdf5"]) - def source(self, request, tmpdir): - def prep_source(source): - foo = source.create_group("foo") - foo.attrs["experiment"] = "weird science" - baz = foo.create_dataset("bar/baz", data=np.arange(100), chunks=(50,)) - baz.attrs["units"] = "metres" - if request.param == "hdf5": - extra_kws = dict( - compression="gzip", - compression_opts=3, - fillvalue=84, - shuffle=True, - fletcher32=True, - ) - else: - extra_kws = dict(compressor=Zlib(3), order="F", fill_value=42, filters=[Adler32()]) - source.create_dataset( - "spam", - data=np.arange(100, 200).reshape(20, 5), - chunks=(10, 2), - dtype="i2", - **extra_kws, - ) - return source - - if request.param == "hdf5": - h5py = pytest.importorskip("h5py") - fn = tmpdir.join("source.h5") - with h5py.File(str(fn), mode="w") as h5f: - yield prep_source(h5f) - elif request.param == "zarr": - yield prep_source(group(path="group1", zarr_version=3)) - - # Test with various destination StoreV3 types as TestCopyV3 covers rmdir - destinations = ["hdf5", "zarr", "zarr_kvstore", "zarr_directorystore", "zarr_sqlitestore"] - if have_fsspec: - destinations += ["zarr_fsstore"] - - @pytest.fixture(params=destinations) - def dest(self, request, tmpdir): - if request.param == "hdf5": - h5py = pytest.importorskip("h5py") - fn = tmpdir.join("dest.h5") - with h5py.File(str(fn), mode="w") as h5f: - yield h5f - elif request.param == "zarr": - yield group(path="group2", zarr_version=3) - elif request.param == "zarr_kvstore": - store = KVStoreV3(dict()) - yield group(store, path="group2", zarr_version=3) - elif request.param == "zarr_fsstore": - fn = tmpdir.join("dest.zr3") - store = FSStoreV3(str(fn), auto_mkdir=True) - yield group(store, path="group2", zarr_version=3) - elif request.param == "zarr_directorystore": - fn = tmpdir.join("dest.zr3") - store = DirectoryStoreV3(str(fn)) - yield group(store, path="group2", zarr_version=3) - elif request.param == "zarr_sqlitestore": - fn = tmpdir.join("dest.db") - store = SQLiteStoreV3(str(fn)) - yield group(store, path="group2", zarr_version=3) - - def test_copy_array_create_options(self, source, dest): - dest_h5py = dest.__module__.startswith("h5py.") - - # copy array, provide creation options - compressor = Zlib(9) - create_kws = dict(chunks=(10,)) - if dest_h5py: - create_kws.update( - compression="gzip", compression_opts=9, shuffle=True, fletcher32=True, fillvalue=42 - ) - else: - # v3 case has no filters argument in zarr create_kws - create_kws.update(compressor=compressor, fill_value=42, order="F") - copy(source["foo/bar/baz"], dest, without_attrs=True, **create_kws) - check_copied_array( - source["foo/bar/baz"], dest["baz"], without_attrs=True, expect_props=create_kws - ) - - def test_copy_group_no_name(self, source, dest): - if source.__module__.startswith("h5py"): - with pytest.raises(TypeError): - copy(source, dest) - else: - # For v3, dest.name will be inferred from source.name - copy(source, dest) - check_copied_group(source, dest[source.name.lstrip("/")]) - - copy(source, dest, name="root") - check_copied_group(source, dest["root"]) diff --git a/tests/test_core.py b/tests/test_core.py index e8d527c4e..630337179 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -33,15 +33,9 @@ import zarr from zarr._storage.store import ( BaseStore, - v3_api_available, -) -from zarr._storage.v3_storage_transformers import ( - DummyStorageTransfomer, - ShardingStorageTransformer, - v3_sharding_available, ) + from zarr.core import Array -from zarr.errors import ArrayNotFoundError, ContainsGroupError from zarr.meta import json_loads from zarr.n5 import N5Store, N5FSStore, n5_keywords from zarr.storage import ( @@ -56,24 +50,10 @@ SQLiteStore, atexit_rmglob, atexit_rmtree, - data_root, init_array, init_group, - meta_root, normalize_store_arg, ) -from zarr._storage.v3 import ( - ABSStoreV3, - DBMStoreV3, - DirectoryStoreV3, - FSStoreV3, - KVStoreV3, - LMDBStoreV3, - LRUStoreCacheV3, - RmdirV3, - SQLiteStoreV3, - StoreV3, -) from zarr.util import buffer_size from .util import abs_container, skip_test_env_var, have_fsspec, mktemp @@ -82,7 +62,6 @@ class TestArray: - version = 2 root = "" path = "" compressor = Zlib(level=1) @@ -139,7 +118,7 @@ def test_array_init(self): # normal initialization store = self.create_store() init_array(store, shape=100, chunks=10, dtype=" 2: - # in v3, attributes are in a sub-dictionary of the metadata - attrs = attrs["attributes"] assert "foo" in attrs and attrs["foo"] == "bar" a.attrs["bar"] = "foo" assert a.attrs.key in a.store attrs = json_loads(a.store[a.attrs.key]) - if self.version > 2: - # in v3, attributes are in a sub-dictionary of the metadata - attrs = attrs["attributes"] assert "foo" in attrs and attrs["foo"] == "bar" assert "bar" in attrs and attrs["bar"] == "foo" a.store.close() @@ -2298,7 +2256,7 @@ def test_nbytes_stored(self): class TestArrayNoCache(TestArray): def test_cache_metadata(self): a1 = self.create_array(shape=100, chunks=10, dtype="i1", cache_metadata=False) - path = None if self.version == 2 else a1.path + path = None a2 = Array(a1.store, path=path, cache_metadata=True) assert a1.shape == a2.shape assert a1.size == a2.size @@ -2339,7 +2297,7 @@ def test_cache_metadata(self): def test_cache_attrs(self): a1 = self.create_array(shape=100, chunks=10, dtype="i1", cache_attrs=False) - path = None if self.version == 2 else "arr1" + path = None a2 = Array(a1.store, path=path, cache_attrs=True) assert a1.attrs.asdict() == a2.attrs.asdict() @@ -2460,7 +2418,7 @@ def test_read_nitems_less_than_blocksize_from_multiple_chunks(self): """ z = self.create_array(shape=1000000, chunks=100_000) z[40_000:80_000] = 1 - path = None if self.version == 2 else z.path + path = None b = Array(z.store, path=path, read_only=True, partial_decompress=True) assert (b[40_000:80_000] == 1).all() @@ -2470,7 +2428,7 @@ def test_read_from_all_blocks(self): """ z = self.create_array(shape=1000000, chunks=100_000) z[2:99_000] = 1 - path = None if self.version == 2 else z.path + path = None b = Array(z.store, path=path, read_only=True, partial_decompress=True) assert (b[2:99_000] == 1).all() @@ -2517,7 +2475,7 @@ def test_read_nitems_less_than_blocksize_from_multiple_chunks(self): """ z = self.create_array(shape=1000000, chunks=100_000) z[40_000:80_000] = 1 - path = None if self.version == 2 else z.path + path = None b = Array(z.store, path=path, read_only=True, partial_decompress=True) assert (b[40_000:80_000] == 1).all() @@ -2527,607 +2485,11 @@ def test_read_from_all_blocks(self): """ z = self.create_array(shape=1000000, chunks=100_000) z[2:99_000] = 1 - path = None if self.version == 2 else z.path + path = None b = Array(z.store, path=path, read_only=True, partial_decompress=True) assert (b[2:99_000] == 1).all() -#### -# StoreV3 test classes inheriting from the above below this point -#### - - -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestArrayV3(TestArray): - version = 3 - root = meta_root - path = "arr1" - - def create_store(self): - return KVStoreV3(dict()) - - def expected(self): - # tests for array without path will not be run for v3 stores - assert self.version == 3 - return [ - "73ab8ace56719a5c9308c3754f5e2d57bc73dc20", - "5fb3d02b8f01244721582929b3cad578aec5cea5", - "26b098bedb640846e18dc2fbc1c27684bb02b532", - "799a458c287d431d747bec0728987ca4fe764549", - "c780221df84eb91cb62f633f12d3f1eaa9cee6bd", - ] - - # TODO: fix test_nbytes_stored - - -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestArrayWithPathV3(TestArrayV3): - def test_array_init(self): - store = self.create_store() - # can initialize an array without a path - init_array(store, shape=100, chunks=10, dtype=" BaseStore: - path = mkdtemp() - atexit.register(shutil.rmtree, path) - return DirectoryStoreV3(path) - - def test_nbytes_stored(self): - # dict as store - z = self.create_array(shape=1000, chunks=100) - expect_nbytes_stored = sum(buffer_size(v) for k, v in z.store.items() if k != "zarr.json") - assert expect_nbytes_stored == z.nbytes_stored - z[:] = 42 - expect_nbytes_stored = sum(buffer_size(v) for k, v in z.store.items() if k != "zarr.json") - assert expect_nbytes_stored == z.nbytes_stored - - -@skip_test_env_var("ZARR_TEST_ABS") -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestArrayWithABSStoreV3(TestArrayV3): - def create_store(self) -> ABSStoreV3: - client = abs_container() - store = ABSStoreV3(client=client) - store.rmdir() - return store - - -# TODO: TestArrayWithN5StoreV3 -# class TestArrayWithN5StoreV3(TestArrayWithDirectoryStoreV3): - - -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestArrayWithDBMStoreV3(TestArrayV3): - def create_store(self) -> DBMStoreV3: - path = mktemp(suffix=".anydbm") - atexit.register(atexit_rmglob, path + "*") - store = DBMStoreV3(path, flag="n") - return store - - def test_nbytes_stored(self): - pass # not implemented - - -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestArrayWithDBMStoreV3BerkeleyDB(TestArrayV3): - def create_store(self) -> DBMStoreV3: - bsddb3 = pytest.importorskip("bsddb3") - path = mktemp(suffix=".dbm") - atexit.register(os.remove, path) - store = DBMStoreV3(path, flag="n", open=bsddb3.btopen) - return store - - def test_nbytes_stored(self): - pass # not implemented - - -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestArrayWithLMDBStoreV3(TestArrayV3): - lmdb_buffers = True - - def create_store(self) -> LMDBStoreV3: - pytest.importorskip("lmdb") - path = mktemp(suffix=".lmdb") - atexit.register(atexit_rmtree, path) - store = LMDBStoreV3(path, buffers=self.lmdb_buffers) - return store - - def test_store_has_bytes_values(self): - pass # returns values as memoryviews/buffers instead of bytes - - def test_nbytes_stored(self): - pass # not implemented - - -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestArrayWithLMDBStoreV3NoBuffers(TestArrayWithLMDBStoreV3): - lmdb_buffers = False - - def test_nbytes_stored(self): - pass # not implemented - - -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestArrayWithSQLiteStoreV3(TestArrayV3): - def create_store(self): - pytest.importorskip("sqlite3") - path = mktemp(suffix=".db") - atexit.register(atexit_rmtree, path) - store = SQLiteStoreV3(path) - return store - - def test_nbytes_stored(self): - pass # not implemented - - -# skipped adding V3 equivalents for compressors (no change in v3): -# TestArrayWithNoCompressor -# TestArrayWithBZ2Compressor -# TestArrayWithBloscCompressor -# TestArrayWithLZMACompressor - -# skipped test with filters (v3 protocol removed filters) -# TestArrayWithFilters - - -# custom store, does not support getsize() -# Note: this custom mapping doesn't actually have all methods in the -# v3 spec (e.g. erase), but they aren't needed here. - - -class CustomMappingV3(RmdirV3, StoreV3): - def __init__(self): - self.inner = KVStoreV3(dict()) - - def __iter__(self): - return iter(self.keys()) - - def __len__(self): - return len(self.inner) - - def keys(self): - return self.inner.keys() - - def values(self): - return self.inner.values() - - def get(self, item, default=None): - try: - return self.inner[item] - except KeyError: - return default - - def __getitem__(self, item): - return self.inner[item] - - def __setitem__(self, item, value): - self.inner[item] = ensure_bytes(value) - - def __delitem__(self, key): - del self.inner[key] - - def __contains__(self, item): - return item in self.inner - - -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestArrayWithCustomMappingV3(TestArrayV3): - def create_store(self): - store = CustomMappingV3() - return store - - def test_nbytes_stored(self): - z = self.create_array(shape=1000, chunks=100) - expect_nbytes_stored = sum(buffer_size(v) for k, v in z.store.items() if k != "zarr.json") - assert expect_nbytes_stored == z.nbytes_stored - z[:] = 42 - expect_nbytes_stored = sum(buffer_size(v) for k, v in z.store.items() if k != "zarr.json") - assert expect_nbytes_stored == z.nbytes_stored - - def test_len(self): - # dict as store - z = self.create_array(shape=1000, chunks=100) - assert len(z._store) == 2 - - -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestArrayNoCacheV3(TestArrayWithPathV3): - def create_store(self): - store = KVStoreV3(dict()) - return store - - def test_object_arrays_danger(self): - # skip this one as it only works if metadata are cached - pass - - -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestArrayWithStoreCacheV3(TestArrayV3): - def create_store(self): - store = LRUStoreCacheV3(dict(), max_size=None) - return store - - def test_store_has_bytes_values(self): - # skip as the cache has no control over how the store provides values - pass - - -@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestArrayWithFSStoreV3(TestArrayV3): - compressor = Blosc() - - def create_store(self): - path = mkdtemp() - atexit.register(shutil.rmtree, path) - key_separator = self.dimension_separator - store = FSStoreV3( - path, - key_separator=key_separator, - auto_mkdir=True, - create=True, - check=True, - missing_exceptions=None, - ) - return store - - def expected(self): - return [ - "1509abec4285494b61cd3e8d21f44adc3cf8ddf6", - "7cfb82ec88f7ecb7ab20ae3cb169736bc76332b8", - "b663857bb89a8ab648390454954a9cdd453aa24b", - "21e90fa927d09cbaf0e3b773130e2dc05d18ff9b", - "e8c1fdd18b5c2ee050b59d0c8c95d07db642459c", - ] - - -@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestArrayWithFSStoreV3FromFilesystem(TestArrayWithFSStoreV3): - def create_store(self): - from fsspec.implementations.local import LocalFileSystem - - fs = LocalFileSystem(auto_mkdir=True) - path = mkdtemp() - atexit.register(shutil.rmtree, path) - key_separator = self.dimension_separator - store = FSStoreV3( - path, - fs=fs, - key_separator=key_separator, - create=True, - check=True, - missing_exceptions=None, - ) - return store - - def expected(self): - return [ - "1509abec4285494b61cd3e8d21f44adc3cf8ddf6", - "7cfb82ec88f7ecb7ab20ae3cb169736bc76332b8", - "b663857bb89a8ab648390454954a9cdd453aa24b", - "21e90fa927d09cbaf0e3b773130e2dc05d18ff9b", - "e8c1fdd18b5c2ee050b59d0c8c95d07db642459c", - ] - - -@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestArrayWithFSStoreV3PartialRead(TestArrayWithFSStoreV3): - partial_decompress = True - - def expected(self): - return [ - "1509abec4285494b61cd3e8d21f44adc3cf8ddf6", - "7cfb82ec88f7ecb7ab20ae3cb169736bc76332b8", - "b663857bb89a8ab648390454954a9cdd453aa24b", - "21e90fa927d09cbaf0e3b773130e2dc05d18ff9b", - "e8c1fdd18b5c2ee050b59d0c8c95d07db642459c", - ] - - -@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -@pytest.mark.skipif(not v3_sharding_available, reason="sharding is disabled") -class TestArrayWithFSStoreV3PartialReadUncompressedSharded(TestArrayWithFSStoreV3): - partial_decompress = True - compressor = None - - def create_storage_transformers(self, shape) -> Tuple[Any]: - num_dims = 1 if isinstance(shape, int) else len(shape) - sharding_transformer = ShardingStorageTransformer( - "indexed", chunks_per_shard=(2,) * num_dims - ) - return (sharding_transformer,) - - def test_nbytes_stored(self): - z = self.create_array(shape=1000, chunks=100) - expect_nbytes_stored = sum(buffer_size(v) for k, v in z._store.items() if k != "zarr.json") - assert expect_nbytes_stored == z.nbytes_stored - z[:] = 42 - expect_nbytes_stored = sum(buffer_size(v) for k, v in z._store.items() if k != "zarr.json") - assert expect_nbytes_stored == z.nbytes_stored - - def test_supports_efficient_get_set_partial_values(self): - z = self.create_array(shape=100, chunks=10) - assert z.chunk_store.supports_efficient_get_partial_values - assert not z.chunk_store.supports_efficient_set_partial_values() - - def expected(self): - return [ - "90109fc2a4e17efbcb447003ea1c08828b91f71e", - "2b73519f7260dba3ddce0d2b70041888856fec6b", - "bca5798be2ed71d444f3045b05432d937682b7dd", - "9ff1084501e28520e577662a6e3073f1116c76a2", - "882a97cad42417f90f111d0cb916a21579650467", - ] - - -@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestArrayWithFSStoreV3Nested(TestArrayWithFSStoreV3): - dimension_separator = "/" - - def expected(self): - return [ - "1509abec4285494b61cd3e8d21f44adc3cf8ddf6", - "7cfb82ec88f7ecb7ab20ae3cb169736bc76332b8", - "b663857bb89a8ab648390454954a9cdd453aa24b", - "21e90fa927d09cbaf0e3b773130e2dc05d18ff9b", - "e8c1fdd18b5c2ee050b59d0c8c95d07db642459c", - ] - - -@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestArrayWithFSStoreV3NestedPartialRead(TestArrayWithFSStoreV3): - dimension_separator = "/" - - def expected(self): - return [ - "1509abec4285494b61cd3e8d21f44adc3cf8ddf6", - "7cfb82ec88f7ecb7ab20ae3cb169736bc76332b8", - "b663857bb89a8ab648390454954a9cdd453aa24b", - "21e90fa927d09cbaf0e3b773130e2dc05d18ff9b", - "e8c1fdd18b5c2ee050b59d0c8c95d07db642459c", - ] - - -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestArrayWithStorageTransformersV3(TestArrayWithChunkStoreV3): - def create_storage_transformers(self, shape) -> Tuple[Any]: - return ( - DummyStorageTransfomer("dummy_type", test_value=DummyStorageTransfomer.TEST_CONSTANT), - ) - - def expected(self): - return [ - "3fb9a4f8233b09ad02067b6b7fc9fd5caa405c7d", - "89c8eb364beb84919fc9153d2c1ed2696274ec18", - "73307055c3aec095dd1232c38d793ef82a06bd97", - "6152c09255a5efa43b1a115546e35affa00c138c", - "2f8802fc391f67f713302e84fad4fd8f1366d6c2", - ] - - -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -@pytest.mark.skipif(not v3_sharding_available, reason="sharding is disabled") -class TestArrayWithShardingStorageTransformerV3(TestArrayV3): - compressor = None - - def create_storage_transformers(self, shape) -> Tuple[Any]: - num_dims = 1 if isinstance(shape, int) else len(shape) - return (ShardingStorageTransformer("indexed", chunks_per_shard=(2,) * num_dims),) - - def test_nbytes_stored(self): - z = self.create_array(shape=1000, chunks=100) - expect_nbytes_stored = sum(buffer_size(v) for k, v in z._store.items() if k != "zarr.json") - assert expect_nbytes_stored == z.nbytes_stored - z[:] = 42 - expect_nbytes_stored = sum(buffer_size(v) for k, v in z._store.items() if k != "zarr.json") - assert expect_nbytes_stored == z.nbytes_stored - - # mess with store - z.store[data_root + z._key_prefix + "foo"] = list(range(10)) - assert -1 == z.nbytes_stored - - def test_keys_inner_store(self): - z = self.create_array(shape=1000, chunks=100) - assert z.chunk_store.keys() == z._store.keys() - meta_keys = set(z.store.keys()) - z[:] = 42 - assert len(z.chunk_store.keys() - meta_keys) == 10 - # inner store should have half the data keys, - # since chunks_per_shard is 2: - assert len(z._store.keys() - meta_keys) == 5 - - def test_supports_efficient_get_set_partial_values(self): - z = self.create_array(shape=100, chunks=10) - assert not z.chunk_store.supports_efficient_get_partial_values - assert not z.chunk_store.supports_efficient_set_partial_values() - - def expected(self): - return [ - "90109fc2a4e17efbcb447003ea1c08828b91f71e", - "2b73519f7260dba3ddce0d2b70041888856fec6b", - "bca5798be2ed71d444f3045b05432d937682b7dd", - "9ff1084501e28520e577662a6e3073f1116c76a2", - "882a97cad42417f90f111d0cb916a21579650467", - ] - - -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -def test_array_mismatched_store_versions(): - store_v3 = KVStoreV3(dict()) - store_v2 = KVStore(dict()) - - # separate chunk store - chunk_store_v2 = KVStore(dict()) - chunk_store_v3 = KVStoreV3(dict()) - - init_kwargs = dict(shape=100, chunks=10, dtype="""" diff --git a/tests/test_creation.py b/tests/test_creation.py index 27ce00bc8..369d75570 100644 --- a/tests/test_creation.py +++ b/tests/test_creation.py @@ -7,8 +7,6 @@ import pytest from numpy.testing import assert_array_equal -from zarr._storage.store import DEFAULT_ZARR_VERSION -from zarr._storage.v3_storage_transformers import DummyStorageTransfomer from zarr.codecs import Zlib from zarr.core import Array from zarr.creation import ( @@ -28,14 +26,12 @@ from zarr.hierarchy import open_group from zarr.n5 import N5Store from zarr.storage import DirectoryStore, KVStore -from zarr._storage.store import v3_api_available -from zarr._storage.v3 import DirectoryStoreV3, KVStoreV3 from zarr.sync import ThreadSynchronizer from .util import mktemp, have_fsspec -_VERSIONS = (None, 2, 3) if v3_api_available else (None, 2) -_VERSIONS2 = (2, 3) if v3_api_available else (2,) +_VERSIONS = (None, 2) +_VERSIONS2 = (2,) # something bcolz-like @@ -64,25 +60,22 @@ def __getitem__(self, item): return self.data[item] -def _init_creation_kwargs(zarr_version, at_root=True): - kwargs = {"zarr_version": zarr_version} +def _init_creation_kwargs(at_root=True): + kwargs = {} if not at_root: kwargs["path"] = "array" return kwargs -@pytest.mark.parametrize("zarr_version", _VERSIONS) @pytest.mark.parametrize("at_root", [False, True]) -def test_array(zarr_version, at_root): - expected_zarr_version = DEFAULT_ZARR_VERSION if zarr_version is None else zarr_version - kwargs = _init_creation_kwargs(zarr_version, at_root) +def test_array(at_root): + kwargs = _init_creation_kwargs(at_root) # with numpy array a = np.arange(100) z = array(a, chunks=10, **kwargs) assert a.shape == z.shape assert a.dtype == z.dtype - assert z._store._store_version == expected_zarr_version assert_array_equal(a, z[:]) # with array-like @@ -131,39 +124,35 @@ def test_array(zarr_version, at_root): assert np.dtype("i8") == z.dtype -@pytest.mark.parametrize("zarr_version", _VERSIONS) @pytest.mark.parametrize("at_root", [False, True]) -def test_empty(zarr_version, at_root): - kwargs = _init_creation_kwargs(zarr_version, at_root) +def test_empty(at_root): + kwargs = _init_creation_kwargs(at_root) z = empty(100, chunks=10, **kwargs) assert (100,) == z.shape assert (10,) == z.chunks -@pytest.mark.parametrize("zarr_version", _VERSIONS) @pytest.mark.parametrize("at_root", [False, True]) -def test_zeros(zarr_version, at_root): - kwargs = _init_creation_kwargs(zarr_version, at_root) +def test_zeros(at_root): + kwargs = _init_creation_kwargs(at_root) z = zeros(100, chunks=10, **kwargs) assert (100,) == z.shape assert (10,) == z.chunks assert_array_equal(np.zeros(100), z[:]) -@pytest.mark.parametrize("zarr_version", _VERSIONS) @pytest.mark.parametrize("at_root", [False, True]) -def test_ones(zarr_version, at_root): - kwargs = _init_creation_kwargs(zarr_version, at_root) +def test_ones(at_root): + kwargs = _init_creation_kwargs(at_root) z = ones(100, chunks=10, **kwargs) assert (100,) == z.shape assert (10,) == z.chunks assert_array_equal(np.ones(100), z[:]) -@pytest.mark.parametrize("zarr_version", _VERSIONS) @pytest.mark.parametrize("at_root", [False, True]) -def test_full(zarr_version, at_root): - kwargs = _init_creation_kwargs(zarr_version, at_root) +def test_full(at_root): + kwargs = _init_creation_kwargs(at_root) z = full(100, chunks=10, fill_value=42, dtype="i4", **kwargs) assert (100,) == z.shape assert (10,) == z.chunks @@ -174,10 +163,9 @@ def test_full(zarr_version, at_root): assert np.all(np.isnan(z[:])) -@pytest.mark.parametrize("zarr_version", [None, 2]) # TODO -def test_full_additional_dtypes(zarr_version): +def test_full_additional_dtypes(): """Test additional types that aren't part of the base v3 spec.""" - kwargs = _init_creation_kwargs(zarr_version) + kwargs = _init_creation_kwargs() # NaT z = full(100, chunks=10, fill_value="NaT", dtype="M8[s]", **kwargs) assert np.all(np.isnat(z[:])) @@ -209,11 +197,10 @@ def test_full_additional_dtypes(zarr_version): @pytest.mark.parametrize("dimension_separator", [".", "/", None]) -@pytest.mark.parametrize("zarr_version", _VERSIONS) @pytest.mark.parametrize("at_root", [False, True]) -def test_open_array(zarr_version, at_root, dimension_separator): +def test_open_array(at_root, dimension_separator): store = "data/array.zarr" - kwargs = _init_creation_kwargs(zarr_version, at_root) + kwargs = _init_creation_kwargs(at_root) # mode == 'w' z = open_array( @@ -221,23 +208,19 @@ def test_open_array(zarr_version, at_root, dimension_separator): ) z[:] = 42 assert isinstance(z, Array) - if z._store._store_version == 2: - assert isinstance(z.store, DirectoryStore) - else: - assert isinstance(z.store, DirectoryStoreV3) + + assert isinstance(z.store, DirectoryStore) assert (100,) == z.shape assert (10,) == z.chunks assert_array_equal(np.full(100, fill_value=42), z[:]) if dimension_separator is None: - assert z._dimension_separator == "/" if zarr_version == 3 else "." + assert z._dimension_separator == "." else: assert z._dimension_separator == dimension_separator # mode in 'r', 'r+' group_kwargs = kwargs.copy() - if zarr_version == 3: - group_kwargs["path"] = "group" open_group("data/group.zarr", mode="w", **group_kwargs) for mode in "r", "r+": with pytest.raises(ValueError): @@ -246,10 +229,7 @@ def test_open_array(zarr_version, at_root, dimension_separator): open_array("data/group.zarr", mode=mode) z = open_array(store, mode="r", **kwargs) assert isinstance(z, Array) - if z._store._store_version == 2: - assert isinstance(z.store, DirectoryStore) - else: - assert isinstance(z.store, DirectoryStoreV3) + assert isinstance(z.store, DirectoryStore) assert (100,) == z.shape assert (10,) == z.chunks assert_array_equal(np.full(100, fill_value=42), z[:]) @@ -257,10 +237,7 @@ def test_open_array(zarr_version, at_root, dimension_separator): z[:] = 43 z = open_array(store, mode="r+", **kwargs) assert isinstance(z, Array) - if z._store._store_version == 2: - assert isinstance(z.store, DirectoryStore) - else: - assert isinstance(z.store, DirectoryStoreV3) + assert isinstance(z.store, DirectoryStore) assert (100,) == z.shape assert (10,) == z.chunks assert_array_equal(np.full(100, fill_value=42), z[:]) @@ -272,18 +249,12 @@ def test_open_array(zarr_version, at_root, dimension_separator): z = open_array(store, mode="a", shape=100, chunks=10, **kwargs) z[:] = 42 assert isinstance(z, Array) - if z._store._store_version == 2: - assert isinstance(z.store, DirectoryStore) - else: - assert isinstance(z.store, DirectoryStoreV3) + assert isinstance(z.store, DirectoryStore) assert (100,) == z.shape assert (10,) == z.chunks assert_array_equal(np.full(100, fill_value=42), z[:]) - expected_error = TypeError if zarr_version == 3 else ValueError - # v3 path does not conflict, but will raise TypeError without shape kwarg - with pytest.raises(expected_error): - # array would end up at data/group.zarr/meta/root/array.array.json + with pytest.raises(ValueError): open_array("data/group.zarr", mode="a", **kwargs) # mode in 'w-', 'x' @@ -292,18 +263,14 @@ def test_open_array(zarr_version, at_root, dimension_separator): z = open_array(store, mode=mode, shape=100, chunks=10, **kwargs) z[:] = 42 assert isinstance(z, Array) - if z._store._store_version == 2: - assert isinstance(z.store, DirectoryStore) - else: - assert isinstance(z.store, DirectoryStoreV3) + assert isinstance(z.store, DirectoryStore) assert (100,) == z.shape assert (10,) == z.chunks assert_array_equal(np.full(100, fill_value=42), z[:]) with pytest.raises(ValueError): open_array(store, mode=mode, **kwargs) - expected_error = TypeError if zarr_version == 3 else ValueError - # v3 path does not conflict, but will raise TypeError without shape kwarg - with pytest.raises(expected_error): + + with pytest.raises(ValueError): open_array("data/group.zarr", mode=mode, **kwargs) # with synchronizer @@ -327,21 +294,15 @@ def test_open_array(zarr_version, at_root, dimension_separator): def test_open_array_none(): - # open with both store and zarr_version = None + # open with store = None z = open_array(mode="w", shape=100, chunks=10) assert isinstance(z, Array) - assert z._version == 2 @pytest.mark.parametrize("dimension_separator", [".", "/", None]) -@pytest.mark.parametrize("zarr_version", _VERSIONS2) -def test_open_array_infer_separator_from_store(zarr_version, dimension_separator): - if zarr_version == 3: - StoreClass = DirectoryStoreV3 - path = "data" - else: - StoreClass = DirectoryStore - path = None +def test_open_array_infer_separator_from_store(dimension_separator): + StoreClass = DirectoryStore + path = None store = StoreClass("data/array.zarr", dimension_separator=dimension_separator) # Note: no dimension_separator kwarg to open_array @@ -349,25 +310,20 @@ def test_open_array_infer_separator_from_store(zarr_version, dimension_separator z = open_array(store, path=path, mode="w", shape=100, chunks=10) z[:] = 42 assert isinstance(z, Array) - if z._store._store_version == 2: - assert isinstance(z.store, DirectoryStore) - else: - assert isinstance(z.store, DirectoryStoreV3) + assert isinstance(z.store, DirectoryStore) assert (100,) == z.shape assert (10,) == z.chunks assert_array_equal(np.full(100, fill_value=42), z[:]) if dimension_separator is None: - assert z._dimension_separator == "/" if zarr_version == 3 else "." + assert z._dimension_separator == "." else: assert z._dimension_separator == dimension_separator -# TODO: N5 support for v3 -@pytest.mark.parametrize("zarr_version", [None, 2]) -def test_open_array_n5(zarr_version): +def test_open_array_n5(): store = "data/array.zarr" - kwargs = _init_creation_kwargs(zarr_version) + kwargs = _init_creation_kwargs() # for N5 store store = "data/array.n5" @@ -381,8 +337,6 @@ def test_open_array_n5(zarr_version): store = "data/group.n5" group_kwargs = kwargs.copy() - # if zarr_version == 3: - # group_kwargs['path'] = 'group' z = open_group(store, mode="w", **group_kwargs) i = z.create_group("inner") a = i.zeros("array", shape=100, chunks=10) @@ -401,13 +355,12 @@ def test_open_array_n5(zarr_version): assert_array_equal(np.full(100, fill_value=42), a[:]) -@pytest.mark.parametrize("zarr_version", _VERSIONS) @pytest.mark.parametrize("at_root", [False, True]) -def test_open_array_dict_store(zarr_version, at_root): +def test_open_array_dict_store(at_root): # dict will become a KVStore store = dict() - kwargs = _init_creation_kwargs(zarr_version, at_root) - expected_store_type = KVStoreV3 if zarr_version == 3 else KVStore + kwargs = _init_creation_kwargs(at_root) + expected_store_type = KVStore # mode == 'w' z = open_array(store, mode="w", shape=100, chunks=10, **kwargs) @@ -419,11 +372,10 @@ def test_open_array_dict_store(zarr_version, at_root): assert_array_equal(np.full(100, fill_value=42), z[:]) -@pytest.mark.parametrize("zarr_version", _VERSIONS) @pytest.mark.parametrize("at_root", [False, True]) -def test_create_in_dict(zarr_version, at_root): - kwargs = _init_creation_kwargs(zarr_version, at_root) - expected_store_type = KVStoreV3 if zarr_version == 3 else KVStore +def test_create_in_dict(at_root): + kwargs = _init_creation_kwargs(at_root) + expected_store_type = KVStore for func in [empty, zeros, ones]: a = func(100, store=dict(), **kwargs) @@ -434,27 +386,23 @@ def test_create_in_dict(zarr_version, at_root): @pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") -@pytest.mark.parametrize("zarr_version", _VERSIONS) @pytest.mark.parametrize("at_root", [False, True]) -def test_create_writeable_mode(zarr_version, at_root, tmp_path): +def test_create_writeable_mode(at_root, tmp_path): # Regression test for https://github.com/zarr-developers/zarr-python/issues/1306 import fsspec - kwargs = _init_creation_kwargs(zarr_version, at_root) + kwargs = _init_creation_kwargs(at_root) store = fsspec.get_mapper(str(tmp_path)) z = create(100, store=store, **kwargs) assert z.store.map == store -@pytest.mark.parametrize("zarr_version", _VERSIONS) @pytest.mark.parametrize("at_root", [False, True]) -def test_empty_like(zarr_version, at_root): - kwargs = _init_creation_kwargs(zarr_version, at_root) - expected_zarr_version = DEFAULT_ZARR_VERSION if zarr_version is None else zarr_version +def test_empty_like(at_root): + kwargs = _init_creation_kwargs(at_root) # zarr array z = empty(100, chunks=10, dtype="f4", compressor=Zlib(5), order="F", **kwargs) - # zarr_version will be inferred from z, but have to specify a path in v3 z2 = empty_like(z, path=kwargs.get("path")) assert z.shape == z2.shape assert z.chunks == z2.chunks @@ -462,7 +410,6 @@ def test_empty_like(zarr_version, at_root): assert z.compressor.get_config() == z2.compressor.get_config() assert z.fill_value == z2.fill_value assert z.order == z2.order - assert z._store._store_version == z2._store._store_version == expected_zarr_version # numpy array a = np.empty(100, dtype="f4") @@ -471,7 +418,6 @@ def test_empty_like(zarr_version, at_root): assert (100,) == z3.chunks assert a.dtype == z3.dtype assert z3.fill_value is None - assert z3._store._store_version == expected_zarr_version # something slightly silly a = [0] * 100 @@ -494,11 +440,9 @@ def test_empty_like(zarr_version, at_root): assert isinstance(z.chunks, tuple) -@pytest.mark.parametrize("zarr_version", _VERSIONS) @pytest.mark.parametrize("at_root", [False, True]) -def test_zeros_like(zarr_version, at_root): - kwargs = _init_creation_kwargs(zarr_version, at_root) - expected_zarr_version = DEFAULT_ZARR_VERSION if zarr_version is None else zarr_version +def test_zeros_like(at_root): + kwargs = _init_creation_kwargs(at_root) # zarr array z = zeros(100, chunks=10, dtype="f4", compressor=Zlib(5), order="F", **kwargs) @@ -509,7 +453,7 @@ def test_zeros_like(zarr_version, at_root): assert z.compressor.get_config() == z2.compressor.get_config() assert z.fill_value == z2.fill_value assert z.order == z2.order - assert z._store._store_version == z2._store._store_version == expected_zarr_version + # numpy array a = np.empty(100, dtype="f4") z3 = zeros_like(a, chunks=10, **kwargs) @@ -519,11 +463,9 @@ def test_zeros_like(zarr_version, at_root): assert 0 == z3.fill_value -@pytest.mark.parametrize("zarr_version", _VERSIONS) @pytest.mark.parametrize("at_root", [False, True]) -def test_ones_like(zarr_version, at_root): - kwargs = _init_creation_kwargs(zarr_version, at_root) - expected_zarr_version = DEFAULT_ZARR_VERSION if zarr_version is None else zarr_version +def test_ones_like(at_root): + kwargs = _init_creation_kwargs(at_root) # zarr array z = ones(100, chunks=10, dtype="f4", compressor=Zlib(5), order="F", **kwargs) @@ -534,7 +476,7 @@ def test_ones_like(zarr_version, at_root): assert z.compressor.get_config() == z2.compressor.get_config() assert z.fill_value == z2.fill_value assert z.order == z2.order - assert z._store._store_version == z2._store._store_version == expected_zarr_version + # numpy array a = np.empty(100, dtype="f4") z3 = ones_like(a, chunks=10, **kwargs) @@ -542,14 +484,11 @@ def test_ones_like(zarr_version, at_root): assert (10,) == z3.chunks assert a.dtype == z3.dtype assert 1 == z3.fill_value - assert z3._store._store_version == expected_zarr_version -@pytest.mark.parametrize("zarr_version", _VERSIONS) @pytest.mark.parametrize("at_root", [False, True]) -def test_full_like(zarr_version, at_root): - kwargs = _init_creation_kwargs(zarr_version, at_root) - expected_zarr_version = DEFAULT_ZARR_VERSION if zarr_version is None else zarr_version +def test_full_like(at_root): + kwargs = _init_creation_kwargs(at_root) z = full(100, chunks=10, dtype="f4", compressor=Zlib(5), fill_value=42, order="F", **kwargs) z2 = full_like(z, path=kwargs.get("path")) @@ -559,7 +498,7 @@ def test_full_like(zarr_version, at_root): assert z.compressor.get_config() == z2.compressor.get_config() assert z.fill_value == z2.fill_value assert z.order == z2.order - assert z._store._store_version == z2._store._store_version == expected_zarr_version + # numpy array a = np.empty(100, dtype="f4") z3 = full_like(a, chunks=10, fill_value=42, **kwargs) @@ -567,17 +506,15 @@ def test_full_like(zarr_version, at_root): assert (10,) == z3.chunks assert a.dtype == z3.dtype assert 42 == z3.fill_value - assert z3._store._store_version == expected_zarr_version + with pytest.raises(TypeError): # fill_value missing full_like(a, chunks=10, **kwargs) -@pytest.mark.parametrize("zarr_version", _VERSIONS) @pytest.mark.parametrize("at_root", [False, True]) -def test_open_like(zarr_version, at_root): - kwargs = _init_creation_kwargs(zarr_version, at_root) - expected_zarr_version = DEFAULT_ZARR_VERSION if zarr_version is None else zarr_version +def test_open_like(at_root): + kwargs = _init_creation_kwargs(at_root) # zarr array path = mktemp() @@ -590,24 +527,21 @@ def test_open_like(zarr_version, at_root): assert z.compressor.get_config() == z2.compressor.get_config() assert z.fill_value == z2.fill_value assert z.order == z2.order - assert z._store._store_version == z2._store._store_version == expected_zarr_version + # numpy array path = mktemp() atexit.register(shutil.rmtree, path) a = np.empty(100, dtype="f4") - z3 = open_like(a, path, chunks=10, zarr_version=zarr_version) + z3 = open_like(a, path, chunks=10) assert a.shape == z3.shape assert (10,) == z3.chunks assert a.dtype == z3.dtype assert 0 == z3.fill_value - assert z3._store._store_version == expected_zarr_version -@pytest.mark.parametrize("zarr_version", _VERSIONS) @pytest.mark.parametrize("at_root", [False, True]) -def test_create(zarr_version, at_root): - kwargs = _init_creation_kwargs(zarr_version, at_root) - expected_zarr_version = DEFAULT_ZARR_VERSION if zarr_version is None else zarr_version +def test_create(at_root): + kwargs = _init_creation_kwargs(at_root) # defaults z = create(100, **kwargs) @@ -617,7 +551,6 @@ def test_create(zarr_version, at_root): assert np.dtype(None) == z.dtype assert "blosc" == z.compressor.codec_id assert 0 == z.fill_value - assert z._store._store_version == expected_zarr_version # all specified z = create(100, chunks=10, dtype="i4", compressor=Zlib(1), fill_value=42, order="F", **kwargs) @@ -629,7 +562,6 @@ def test_create(zarr_version, at_root): assert 1 == z.compressor.level assert 42 == z.fill_value assert "F" == z.order - assert z._store._store_version == expected_zarr_version # with synchronizer synchronizer = ThreadSynchronizer() @@ -638,7 +570,6 @@ def test_create(zarr_version, at_root): assert (100,) == z.shape assert (10,) == z.chunks assert synchronizer is z.synchronizer - assert z._store._store_version == expected_zarr_version # don't allow string as compressor arg with pytest.raises(ValueError): @@ -671,9 +602,8 @@ def test_create(zarr_version, at_root): assert z.chunks == z.shape -@pytest.mark.parametrize("zarr_version", _VERSIONS) -def test_compression_args(zarr_version): - kwargs = _init_creation_kwargs(zarr_version) +def test_compression_args(): + kwargs = _init_creation_kwargs() with warnings.catch_warnings(): warnings.simplefilter("default") @@ -704,12 +634,11 @@ def test_compression_args(zarr_version): create(100, compressor=Zlib(9), compression_opts=1, **kwargs) -@pytest.mark.parametrize("zarr_version", _VERSIONS) @pytest.mark.parametrize("at_root", [False, True]) -def test_create_read_only(zarr_version, at_root): +def test_create_read_only(at_root): # https://github.com/alimanfoo/zarr/issues/151 - kwargs = _init_creation_kwargs(zarr_version, at_root) + kwargs = _init_creation_kwargs(at_root) # create an array initially read-only, then enable writing z = create(100, read_only=True, **kwargs) @@ -738,18 +667,6 @@ def test_json_dumps_chunks_numpy_dtype(): assert np.all(z[...] == 0) -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -@pytest.mark.parametrize("at_root", [False, True]) -def test_create_with_storage_transformers(at_root): - kwargs = _init_creation_kwargs(zarr_version=3, at_root=at_root) - transformer = DummyStorageTransfomer( - "dummy_type", test_value=DummyStorageTransfomer.TEST_CONSTANT - ) - z = create(1000000000, chunks=True, storage_transformers=[transformer], **kwargs) - assert isinstance(z.chunk_store, DummyStorageTransfomer) - assert z.chunk_store.test_value == DummyStorageTransfomer.TEST_CONSTANT - - @pytest.mark.parametrize( ("init_shape", "init_chunks", "shape", "chunks"), ( diff --git a/tests/test_hierarchy.py b/tests/test_hierarchy.py index 6d4b1ff54..8cd51cc94 100644 --- a/tests/test_hierarchy.py +++ b/tests/test_hierarchy.py @@ -18,7 +18,6 @@ from numcodecs import Zlib from numpy.testing import assert_array_equal -from zarr._storage.store import _get_metadata_suffix, v3_api_available from zarr.attrs import Attributes from zarr.core import Array from zarr.creation import open_array @@ -38,29 +37,13 @@ array_meta_key, atexit_rmglob, atexit_rmtree, - data_root, group_meta_key, init_array, init_group, - meta_root, ) -from zarr._storage.v3 import ( - ABSStoreV3, - KVStoreV3, - DirectoryStoreV3, - MemoryStoreV3, - FSStoreV3, - ZipStoreV3, - DBMStoreV3, - LMDBStoreV3, - SQLiteStoreV3, - LRUStoreCacheV3, -) -from zarr.util import InfoReporter, buffer_size -from .util import skip_test_env_var, have_fsspec, abs_container, mktemp - -_VERSIONS = (2, 3) if v3_api_available else (2,) +from zarr.util import InfoReporter +from .util import skip_test_env_var, have_fsspec, abs_container, mktemp # noinspection PyStatementEffect @@ -148,10 +131,7 @@ def _subgroup_path(self, group, path): def test_create_group(self): g1 = self.create_group() - if g1._version == 2: - path, name = "", "/" - else: - path, name = "group", "/group" + path, name = "", "/" # check root group assert path == g1.path assert name == g1.name @@ -205,12 +185,8 @@ def __str__(self): # test bad keys with pytest.raises(ValueError): g1.create_group("foo") # already exists - if g1._version == 2: - with pytest.raises(ValueError): - g1.create_group("a/b/c") # already exists - elif g1._version == 3: - # for v3 'group/a/b/c' does not already exist - g1.create_group("a/b/c") + with pytest.raises(ValueError): + g1.create_group("a/b/c") # already exists with pytest.raises(ValueError): g4.create_group("/a/b/c") # already exists with pytest.raises(ValueError): @@ -260,16 +236,7 @@ def test_require_group(self): assert g5.store is g5a.store # test path normalization - if g1._version == 2: - assert g1.require_group("quux") == g1.require_group("/quux/") - elif g1._version: - # These are not equal in v3! - # 'quux' will be within the group: - # meta/root/group/quux.group.json - # '/quux/' will be outside of the group at: - # meta/root/quux.group.json - assert g1.require_group("quux") != g1.require_group("/quux/") - + assert g1.require_group("quux") == g1.require_group("/quux/") # multi g6, g7 = g1.require_groups("y", "z") assert isinstance(g6, Group) @@ -289,24 +256,9 @@ def test_rmdir_group_and_array_metadata_files(self): g1.create_dataset("arr1", shape=(100,), chunks=(10,), dtype=np.uint8) # create level 1 child group - g2 = g1.create_group("foo") + _ = g1.create_group("foo") g1.create_dataset("arr2", shape=(100,), chunks=(10,), dtype=np.uint8) - if g1._version > 2 and g1.store.is_erasable(): - arr_path = g1.path + "/arr1" - sfx = _get_metadata_suffix(g1.store) - array_meta_file = meta_root + arr_path + ".array" + sfx - assert array_meta_file in g1.store - group_meta_file = meta_root + g2.path + ".group" + sfx - assert group_meta_file in g1.store - - # rmdir on the array path should also remove the metadata file - g1.store.rmdir(arr_path) - assert array_meta_file not in g1.store - # rmdir on the group path should also remove its metadata file - g1.store.rmdir(g2.path) - assert group_meta_file not in g1.store - def _dataset_path(self, group, path): path = path.rstrip("/") absolute = path.startswith("/") @@ -541,12 +493,9 @@ def test_getitem_contains_iterators(self): # setup g1 = self.create_group() g2 = g1.create_group("foo/bar") - if g1._version == 2: - d1 = g2.create_dataset("/a/b/c", shape=1000, chunks=100) - else: - # v3: cannot create a dataset at the root by starting with / - # instead, need to create the dataset on g1 directly - d1 = g1.create_dataset("a/b/c", shape=1000, chunks=100) + + d1 = g2.create_dataset("/a/b/c", shape=1000, chunks=100) + d1[:] = np.arange(1000) d2 = g1.create_dataset("foo/baz", shape=3000, chunks=300) d2[:] = np.arange(3000) @@ -555,13 +504,7 @@ def test_getitem_contains_iterators(self): assert isinstance(g1["foo"], Group) assert isinstance(g1["foo"]["bar"], Group) assert isinstance(g1["foo/bar"], Group) - if g1._version == 2: - assert isinstance(g1["/foo/bar/"], Group) - else: - # start or end with / raises KeyError - # TODO: should we allow stripping of these on v3? - with pytest.raises(KeyError): - assert isinstance(g1["/foo/bar/"], Group) + assert isinstance(g1["/foo/bar/"], Group) assert isinstance(g1["foo/baz"], Array) assert g2 == g1["foo/bar"] assert g1["foo"]["bar"] == g1["foo/bar"] @@ -604,18 +547,12 @@ def test_getitem_contains_iterators(self): # test __iter__, keys() - if g1._version == 2: - # currently assumes sorted by key - assert ["a", "foo"] == list(g1) - assert ["a", "foo"] == list(g1.keys()) - assert ["bar", "baz"] == list(g1["foo"]) - assert ["bar", "baz"] == list(g1["foo"].keys()) - else: - # v3 is not necessarily sorted by key - assert ["a", "foo"] == sorted(list(g1)) - assert ["a", "foo"] == sorted(list(g1.keys())) - assert ["bar", "baz"] == sorted(list(g1["foo"])) - assert ["bar", "baz"] == sorted(list(g1["foo"].keys())) + # currently assumes sorted by key + assert ["a", "foo"] == list(g1) + assert ["a", "foo"] == list(g1.keys()) + assert ["bar", "baz"] == list(g1["foo"]) + assert ["bar", "baz"] == list(g1["foo"].keys()) + assert [] == sorted(g1["foo/bar"]) assert [] == sorted(g1["foo/bar"].keys()) @@ -624,9 +561,6 @@ def test_getitem_contains_iterators(self): items = list(g1.items()) values = list(g1.values()) - if g1._version == 3: - # v3 are not automatically sorted by key - items, values = zip(*sorted(zip(items, values), key=lambda x: x[0])) assert "a" == items[0][0] assert g1["a"] == items[0][1] assert g1["a"] == values[0] @@ -636,9 +570,6 @@ def test_getitem_contains_iterators(self): items = list(g1["foo"].items()) values = list(g1["foo"].values()) - if g1._version == 3: - # v3 are not automatically sorted by key - items, values = zip(*sorted(zip(items, values), key=lambda x: x[0])) assert "bar" == items[0][0] assert g1["foo"]["bar"] == items[0][1] assert g1["foo"]["bar"] == values[0] @@ -650,13 +581,8 @@ def test_getitem_contains_iterators(self): groups = list(g1.groups()) arrays = list(g1.arrays()) - if g1._version == 2: - # currently assumes sorted by key - assert ["a", "foo"] == list(g1.group_keys()) - else: - assert ["a", "foo"] == sorted(list(g1.group_keys())) - groups = sorted(groups) - arrays = sorted(arrays) + # currently assumes sorted by key + assert ["a", "foo"] == list(g1.group_keys()) assert "a" == groups[0][0] assert g1["a"] == groups[0][1] assert "foo" == groups[1][0] @@ -668,9 +594,6 @@ def test_getitem_contains_iterators(self): assert ["baz"] == list(g1["foo"].array_keys()) groups = list(g1["foo"].groups()) arrays = list(g1["foo"].arrays()) - if g1._version == 3: - groups = sorted(groups) - arrays = sorted(arrays) assert "bar" == groups[0][0] assert g1["foo"]["bar"] == groups[0][1] assert "baz" == arrays[0][0] @@ -699,8 +622,6 @@ def visitor4(name, obj): "foo/bar", "foo/baz", ] - if g1._version == 3: - expected_items = [g1.path + "/" + i for i in expected_items] assert expected_items == items del items[:] @@ -709,8 +630,6 @@ def visitor4(name, obj): "foo/bar", "foo/baz", ] - if g1._version == 3: - expected_items = [g1.path + "/" + i for i in expected_items] assert expected_items == items del items[:] @@ -937,28 +856,10 @@ def test_move(self): g2.move("bar", "/bar") assert "foo2" in g assert "foo2/bar" not in g - if g2._version == 2: - assert "bar" in g - else: - # The `g2.move` call above moved bar to meta/root/bar and - # meta/data/bar. This is outside the `g` group located at - # /meta/root/group, so bar is no longer within `g`. - assert "bar" not in g - assert "meta/root/bar.array.json" in g._store - if g._chunk_store: - assert "data/root/bar/c0" in g._chunk_store - else: - assert "data/root/bar/c0" in g._store + assert "bar" in g assert isinstance(g["foo2"], Group) - if g2._version == 2: - assert_array_equal(data, g["bar"]) - else: - # TODO: How to access element created outside of group.path in v3? - # One option is to make a Hierarchy class representing the - # root. Currently Group requires specification of `path`, - # but the path of the root would be just '' which is not - # currently allowed. - pass + + assert_array_equal(data, g["bar"]) with pytest.raises(ValueError): g2.move("bar", "bar2") @@ -1035,39 +936,19 @@ def test_paths(self): g1 = self.create_group() g2 = g1.create_group("foo/bar") - if g1._version == 2: - assert g1 == g1["/"] - assert g1 == g1["//"] - assert g1 == g1["///"] - assert g1 == g2["/"] - assert g1 == g2["//"] - assert g1 == g2["///"] - assert g2 == g1["foo/bar"] - assert g2 == g1["/foo/bar"] - assert g2 == g1["foo/bar/"] - assert g2 == g1["//foo/bar"] - assert g2 == g1["//foo//bar//"] - assert g2 == g1["///foo///bar///"] - assert g2 == g2["/foo/bar"] - else: - # the expected key format gives a match - assert g2 == g1["foo/bar"] - - # TODO: Should presence of a trailing slash raise KeyError? - # The spec says "the final character is not a / character" - # but we currently strip trailing '/' as done for v2. - assert g2 == g1["foo/bar/"] - - # double slash also currently works (spec doesn't mention this - # case, but have kept it for v2 behavior compatibility) - assert g2 == g1["foo//bar"] - - # TODO, root: fix these cases - # v3: leading / implies we are at the root, not within a group, - # so these all raise KeyError - for path in ["/foo/bar", "//foo/bar", "//foo//bar//", "///fooo///bar///"]: - with pytest.raises(KeyError): - g1[path] + assert g1 == g1["/"] + assert g1 == g1["//"] + assert g1 == g1["///"] + assert g1 == g2["/"] + assert g1 == g2["//"] + assert g1 == g2["///"] + assert g2 == g1["foo/bar"] + assert g2 == g1["/foo/bar"] + assert g2 == g1["foo/bar/"] + assert g2 == g1["//foo/bar"] + assert g2 == g1["//foo//bar//"] + assert g2 == g1["///foo///bar///"] + assert g2 == g2["/foo/bar"] with pytest.raises(ValueError): g1["."] @@ -1133,77 +1014,12 @@ def test_group_init_from_dict(chunk_dict): assert chunk_store is not g.chunk_store -# noinspection PyStatementEffect -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestGroupV3(TestGroup, unittest.TestCase): - @staticmethod - def create_store(): - # can be overridden in sub-classes - return KVStoreV3(dict()), None - - def create_group( - self, store=None, path="group", read_only=False, chunk_store=None, synchronizer=None - ): - # can be overridden in sub-classes - if store is None: - store, chunk_store = self.create_store() - init_group(store, path=path, chunk_store=chunk_store) - g = Group( - store, - path=path, - read_only=read_only, - chunk_store=chunk_store, - synchronizer=synchronizer, - ) - return g - - def test_group_init_1(self): - store, chunk_store = self.create_store() - g = self.create_group(store, chunk_store=chunk_store) - assert store is g.store - if chunk_store is None: - assert store is g.chunk_store - else: - assert chunk_store is g.chunk_store - assert not g.read_only - # different path/name in v3 case - assert "group" == g.path - assert "/group" == g.name - assert "group" == g.basename - - assert isinstance(g.attrs, Attributes) - g.attrs["foo"] = "bar" - assert g.attrs["foo"] == "bar" - - assert isinstance(g.info, InfoReporter) - assert isinstance(repr(g.info), str) - assert isinstance(g.info._repr_html_(), str) - store.close() - - def test_group_init_errors_2(self): - store, chunk_store = self.create_store() - path = "tmp" - init_array(store, path=path, shape=1000, chunks=100, chunk_store=chunk_store) - # array blocks group - with pytest.raises(ValueError): - Group(store, path=path, chunk_store=chunk_store) - store.close() - - class TestGroupWithMemoryStore(TestGroup): @staticmethod def create_store(): return MemoryStore(), None -# noinspection PyStatementEffect -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestGroupV3WithMemoryStore(TestGroupWithMemoryStore, TestGroupV3): - @staticmethod - def create_store(): - return MemoryStoreV3(), None - - class TestGroupWithDirectoryStore(TestGroup): @staticmethod def create_store(): @@ -1213,16 +1029,6 @@ def create_store(): return store, None -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestGroupV3WithDirectoryStore(TestGroupWithDirectoryStore, TestGroupV3): - @staticmethod - def create_store(): - path = tempfile.mkdtemp() - atexit.register(atexit_rmtree, path) - store = DirectoryStoreV3(path) - return store, None - - @skip_test_env_var("ZARR_TEST_ABS") class TestGroupWithABSStore(TestGroup): @staticmethod @@ -1238,22 +1044,6 @@ def test_pickle(self): super().test_pickle() -@skip_test_env_var("ZARR_TEST_ABS") -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestGroupV3WithABSStore(TestGroupV3): - @staticmethod - def create_store(): - container_client = abs_container() - store = ABSStoreV3(client=container_client) - store.rmdir() - return store, None - - @pytest.mark.skipif(sys.version_info < (3, 7), reason="attr not serializable in py36") - def test_pickle(self): - # internal attribute on ContainerClient isn't serializable for py36 and earlier - super().test_pickle() - - class TestGroupWithNestedDirectoryStore(TestGroup): @staticmethod def create_store(): @@ -1284,39 +1074,6 @@ def test_round_trip_nd(self): np.testing.assert_array_equal(h[name][:], data) -@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestGroupV3WithFSStore(TestGroupWithFSStore, TestGroupV3): - @staticmethod - def create_store(): - path = tempfile.mkdtemp() - atexit.register(atexit_rmtree, path) - store = FSStoreV3(path) - return store, None - - def test_round_trip_nd(self): - data = np.arange(1000).reshape(10, 10, 10) - name = "raw" - - store, _ = self.create_store() - f = open_group(store, path="group", mode="w") - f.create_dataset(name, data=data, chunks=(5, 5, 5), compressor=None) - h = open_group(store, path="group", mode="r") - np.testing.assert_array_equal(h[name][:], data) - - f = open_group(store, path="group2", mode="w") - - data_size = data.nbytes - group_meta_size = buffer_size(store[meta_root + "group.group.json"]) - group2_meta_size = buffer_size(store[meta_root + "group2.group.json"]) - array_meta_size = buffer_size(store[meta_root + "group/raw.array.json"]) - assert store.getsize() == data_size + group_meta_size + group2_meta_size + array_meta_size - # added case with path to complete coverage - assert store.getsize("group") == data_size + group_meta_size + array_meta_size - assert store.getsize("group2") == group2_meta_size - assert store.getsize("group/raw") == data_size + array_meta_size - - @pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") class TestGroupWithNestedFSStore(TestGroupWithFSStore): @staticmethod @@ -1340,30 +1097,6 @@ def test_inconsistent_dimension_separator(self): ) -@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestGroupV3WithNestedFSStore(TestGroupV3WithFSStore): - @staticmethod - def create_store(): - path = tempfile.mkdtemp() - atexit.register(atexit_rmtree, path) - store = FSStoreV3(path, key_separator="/", auto_mkdir=True) - return store, None - - def test_inconsistent_dimension_separator(self): - data = np.arange(1000).reshape(10, 10, 10) - name = "raw" - - store, _ = self.create_store() - f = open_group(store, path="group", mode="w") - - # cannot specify dimension_separator that conflicts with the store - with pytest.raises(ValueError): - f.create_dataset( - name, data=data, chunks=(5, 5, 5), compressor=None, dimension_separator="." - ) - - class TestGroupWithZipStore(TestGroup): @staticmethod def create_store(): @@ -1389,16 +1122,6 @@ def test_move(self): pass -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestGroupV3WithZipStore(TestGroupWithZipStore, TestGroupV3): - @staticmethod - def create_store(): - path = mktemp(suffix=".zip") - atexit.register(os.remove, path) - store = ZipStoreV3(path) - return store, None - - class TestGroupWithDBMStore(TestGroup): @staticmethod def create_store(): @@ -1408,16 +1131,6 @@ def create_store(): return store, None -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestGroupV3WithDBMStore(TestGroupWithDBMStore, TestGroupV3): - @staticmethod - def create_store(): - path = mktemp(suffix=".anydbm") - atexit.register(atexit_rmglob, path + "*") - store = DBMStoreV3(path, flag="n") - return store, None - - class TestGroupWithDBMStoreBerkeleyDB(TestGroup): @staticmethod def create_store(): @@ -1428,17 +1141,6 @@ def create_store(): return store, None -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestGroupV3WithDBMStoreBerkeleyDB(TestGroupWithDBMStoreBerkeleyDB, TestGroupV3): - @staticmethod - def create_store(): - bsddb3 = pytest.importorskip("bsddb3") - path = mktemp(suffix=".dbm") - atexit.register(os.remove, path) - store = DBMStoreV3(path, flag="n", open=bsddb3.btopen) - return store, None - - class TestGroupWithLMDBStore(TestGroup): @staticmethod def create_store(): @@ -1449,17 +1151,6 @@ def create_store(): return store, None -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestGroupV3WithLMDBStore(TestGroupWithLMDBStore, TestGroupV3): - @staticmethod - def create_store(): - pytest.importorskip("lmdb") - path = mktemp(suffix=".lmdb") - atexit.register(atexit_rmtree, path) - store = LMDBStoreV3(path) - return store, None - - class TestGroupWithSQLiteStore(TestGroup): def create_store(self): pytest.importorskip("sqlite3") @@ -1469,16 +1160,6 @@ def create_store(self): return store, None -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestGroupV3WithSQLiteStore(TestGroupWithSQLiteStore, TestGroupV3): - def create_store(self): - pytest.importorskip("sqlite3") - path = mktemp(suffix=".db") - atexit.register(atexit_rmtree, path) - store = SQLiteStoreV3(path) - return store, None - - class TestGroupWithChunkStore(TestGroup): @staticmethod def create_store(): @@ -1509,41 +1190,6 @@ def test_chunk_store(self): assert expect == actual -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestGroupV3WithChunkStore(TestGroupWithChunkStore, TestGroupV3): - @staticmethod - def create_store(): - return KVStoreV3(dict()), KVStoreV3(dict()) - - def test_chunk_store(self): - # setup - store, chunk_store = self.create_store() - path = "group1" - g = self.create_group(store, path=path, chunk_store=chunk_store) - - # check attributes - assert store is g.store - assert chunk_store is g.chunk_store - - # create array - a = g.zeros("foo", shape=100, chunks=10) - assert store is a.store - assert chunk_store is a.chunk_store - a[:] = np.arange(100) - assert_array_equal(np.arange(100), a[:]) - - # check store keys - group_key = meta_root + path + ".group.json" - array_key = meta_root + path + "/foo" + ".array.json" - expect = sorted([group_key, array_key, "zarr.json"]) - actual = sorted(store.keys()) - assert expect == actual - expect = [data_root + path + "/foo/c" + str(i) for i in range(10)] - expect += ["zarr.json"] - actual = sorted(chunk_store.keys()) - assert expect == actual - - class TestGroupWithStoreCache(TestGroup): @staticmethod def create_store(): @@ -1551,58 +1197,8 @@ def create_store(): return store, None -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -class TestGroupV3WithStoreCache(TestGroupWithStoreCache, TestGroupV3): - @staticmethod - def create_store(): - store = LRUStoreCacheV3(dict(), max_size=None) - return store, None - - -@pytest.mark.parametrize("zarr_version", _VERSIONS) -def test_group(zarr_version): - # test the group() convenience function - - # basic usage - if zarr_version == 2: - g = group() - assert "" == g.path - assert "/" == g.name - else: - g = group(path="group1", zarr_version=zarr_version) - assert "group1" == g.path - assert "/group1" == g.name - assert isinstance(g, Group) - - # usage with custom store - if zarr_version == 2: - store = KVStore(dict()) - path = None - else: - store = KVStoreV3(dict()) - path = "foo" - g = group(store=store, path=path) - assert isinstance(g, Group) - assert store is g.store - - # overwrite behaviour - if zarr_version == 2: - store = KVStore(dict()) - path = None - else: - store = KVStoreV3(dict()) - path = "foo" - init_array(store, path=path, shape=100, chunks=10) - with pytest.raises(ValueError): - group(store, path=path) - g = group(store, path=path, overwrite=True) - assert isinstance(g, Group) - assert store is g.store - - @pytest.mark.skipif(have_fsspec is False, reason="needs fsspec") -@pytest.mark.parametrize("zarr_version", _VERSIONS) -def test_group_writeable_mode(zarr_version, tmp_path): +def test_group_writeable_mode(tmp_path): # Regression test for https://github.com/zarr-developers/zarr-python/issues/1353 import fsspec @@ -1611,17 +1207,16 @@ def test_group_writeable_mode(zarr_version, tmp_path): assert zg.store.map == store -@pytest.mark.parametrize("zarr_version", _VERSIONS) -def test_open_group(zarr_version): +def test_open_group(): # test the open_group() convenience function store = "data/group.zarr" - expected_store_type = DirectoryStore if zarr_version == 2 else DirectoryStoreV3 + expected_store_type = DirectoryStore # mode == 'w' - path = None if zarr_version == 2 else "group1" - g = open_group(store, path=path, mode="w", zarr_version=zarr_version) + path = None + g = open_group(store, path=path, mode="w") assert isinstance(g, Group) assert isinstance(g.store, expected_store_type) assert 0 == len(g) @@ -1648,44 +1243,39 @@ def test_open_group(zarr_version): # mode == 'a' shutil.rmtree(store) - g = open_group(store, path=path, mode="a", zarr_version=zarr_version) + g = open_group(store, path=path, mode="a") assert isinstance(g, Group) assert isinstance(g.store, expected_store_type) assert 0 == len(g) g.create_groups("foo", "bar") assert 2 == len(g) - if zarr_version == 2: - with pytest.raises(ValueError): - open_group("data/array.zarr", mode="a", zarr_version=zarr_version) - else: - # TODO, root: should this raise an error? - open_group("data/array.zarr", mode="a", zarr_version=zarr_version) + + with pytest.raises(ValueError): + open_group("data/array.zarr", mode="a") # mode in 'w-', 'x' for mode in "w-", "x": shutil.rmtree(store) - g = open_group(store, path=path, mode=mode, zarr_version=zarr_version) + g = open_group(store, path=path, mode=mode) assert isinstance(g, Group) assert isinstance(g.store, expected_store_type) assert 0 == len(g) g.create_groups("foo", "bar") assert 2 == len(g) with pytest.raises(ValueError): - open_group(store, path=path, mode=mode, zarr_version=zarr_version) - if zarr_version == 2: - with pytest.raises(ValueError): - open_group("data/array.zarr", mode=mode) + open_group(store, path=path, mode=mode) + with pytest.raises(ValueError): + open_group("data/array.zarr", mode=mode) # open with path - g = open_group(store, path="foo/bar", zarr_version=zarr_version) + g = open_group(store, path="foo/bar") assert isinstance(g, Group) assert "foo/bar" == g.path -@pytest.mark.parametrize("zarr_version", _VERSIONS) -def test_group_completions(zarr_version): - path = None if zarr_version == 2 else "group1" - g = group(path=path, zarr_version=zarr_version) +def test_group_completions(): + path = None + g = group(path=path) d = dir(g) assert "foo" not in d assert "bar" not in d @@ -1713,10 +1303,9 @@ def test_group_completions(zarr_version): assert "456" not in d # not valid identifier -@pytest.mark.parametrize("zarr_version", _VERSIONS) -def test_group_key_completions(zarr_version): - path = None if zarr_version == 2 else "group1" - g = group(path=path, zarr_version=zarr_version) +def test_group_key_completions(): + path = None + g = group(path=path) d = dir(g) # noinspection PyProtectedMember k = g._ipython_key_completions_() @@ -1750,12 +1339,7 @@ def test_group_key_completions(zarr_version): g.zeros("yyy", shape=100) g.zeros("zzz", shape=100) g.zeros("456", shape=100) - if zarr_version == 2: - g.zeros("asdf;", shape=100) - else: - # cannot have ; in key name for v3 - with pytest.raises(ValueError): - g.zeros("asdf;", shape=100) + g.zeros("asdf;", shape=100) d = dir(g) # noinspection PyProtectedMember @@ -1770,8 +1354,7 @@ def test_group_key_completions(zarr_version): assert "zzz" in d assert "123" not in d # not valid identifier assert "456" not in d # not valid identifier - if zarr_version == 2: - assert "asdf;" not in d # not valid identifier + assert "asdf;" not in d # not valid identifier assert "foo" in k assert "bar" in k @@ -1782,8 +1365,7 @@ def test_group_key_completions(zarr_version): assert "zzz" in k assert "123" in k assert "456" in k - if zarr_version == 2: - assert "asdf;" in k + assert "asdf;" in k def _check_tree(g, expect_bytes, expect_text): @@ -1797,12 +1379,11 @@ def _check_tree(g, expect_bytes, expect_text): isinstance(widget, ipytree.Tree) -@pytest.mark.parametrize("zarr_version", _VERSIONS) @pytest.mark.parametrize("at_root", [False, True]) -def test_tree(zarr_version, at_root): +def test_tree(at_root): # setup path = None if at_root else "group1" - g1 = group(path=path, zarr_version=zarr_version) + g1 = group(path=path) g2 = g1.create_group("foo") g3 = g1.create_group("bar") g3.create_group("baz") @@ -1811,46 +1392,25 @@ def test_tree(zarr_version, at_root): tree_path = "/" if at_root else path # test root group - if zarr_version == 2: - expect_bytes = textwrap.dedent( - f"""\ - {tree_path} - +-- bar - | +-- baz - | +-- quux - | +-- baz (100,) float64 - +-- foo""" - ).encode() - expect_text = textwrap.dedent( - f"""\ - {tree_path} - ├── bar - │ ├── baz - │ └── quux - │ └── baz (100,) float64 - └── foo""" - ) - else: - # Almost the same as for v2, but has a path name and the - # subgroups are not necessarily sorted alphabetically. - expect_bytes = textwrap.dedent( - f"""\ - {tree_path} - +-- foo - +-- bar - +-- baz - +-- quux - +-- baz (100,) float64""" - ).encode() - expect_text = textwrap.dedent( - f"""\ - {tree_path} - ├── foo - └── bar - ├── baz - └── quux - └── baz (100,) float64""" - ) + + expect_bytes = textwrap.dedent( + f"""\ + {tree_path} + +-- bar + | +-- baz + | +-- quux + | +-- baz (100,) float64 + +-- foo""" + ).encode() + expect_text = textwrap.dedent( + f"""\ + {tree_path} + ├── bar + │ ├── baz + │ └── quux + │ └── baz (100,) float64 + └── foo""" + ) _check_tree(g1, expect_bytes, expect_text) # test different group @@ -1882,47 +1442,11 @@ def test_tree(zarr_version, at_root): _check_tree(g3, expect_bytes, expect_text) -@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") -def test_group_mismatched_store_versions(): - store_v3 = KVStoreV3(dict()) - store_v2 = KVStore(dict()) - - # separate chunk store - chunk_store_v2 = KVStore(dict()) - chunk_store_v3 = KVStoreV3(dict()) - - init_group(store_v2, path="group1", chunk_store=chunk_store_v2) - init_group(store_v3, path="group1", chunk_store=chunk_store_v3) - - g1_v3 = Group(store_v3, path="group1", read_only=True, chunk_store=chunk_store_v3) - assert isinstance(g1_v3._store, KVStoreV3) - g1_v2 = Group(store_v2, path="group1", read_only=True, chunk_store=chunk_store_v2) - assert isinstance(g1_v2._store, KVStore) - - # store and chunk_store must have the same zarr protocol version - with pytest.raises(ValueError): - Group(store_v3, path="group1", read_only=False, chunk_store=chunk_store_v2) - with pytest.raises(ValueError): - Group(store_v2, path="group1", read_only=False, chunk_store=chunk_store_v3) - with pytest.raises(ValueError): - open_group(store_v2, path="group1", chunk_store=chunk_store_v3) - with pytest.raises(ValueError): - open_group(store_v3, path="group1", chunk_store=chunk_store_v2) - - # raises Value if read_only and path is not a pre-existing group - with pytest.raises(ValueError): - Group(store_v3, path="group2", read_only=True, chunk_store=chunk_store_v3) - with pytest.raises(ValueError): - Group(store_v3, path="group2", read_only=True, chunk_store=chunk_store_v3) - - -@pytest.mark.parametrize("zarr_version", _VERSIONS) -def test_open_group_from_paths(zarr_version): +def test_open_group_from_paths(): """Verify zarr_version is applied to both the store and chunk_store.""" store = tempfile.mkdtemp() chunk_store = tempfile.mkdtemp() atexit.register(atexit_rmtree, store) atexit.register(atexit_rmtree, chunk_store) path = "g1" - g = open_group(store, path=path, chunk_store=chunk_store, zarr_version=zarr_version) - assert g._store._store_version == g._chunk_store._store_version == zarr_version + _ = open_group(store, path=path, chunk_store=chunk_store) diff --git a/tests/test_meta.py b/tests/test_meta.py index 50f51929e..089afec78 100644 --- a/tests/test_meta.py +++ b/tests/test_meta.py @@ -1,5 +1,4 @@ import base64 -import copy import json import numpy as np @@ -16,11 +15,6 @@ encode_dtype, encode_fill_value, decode_fill_value, - get_extended_dtype_info, - _v3_complex_types, - _v3_datetime_types, - _default_entry_point_metadata_v3, - Metadata3, ) from zarr.util import normalize_dtype, normalize_fill_value @@ -285,77 +279,6 @@ def test_encode_decode_array_dtype_shape(): assert meta_dec["filters"] is None -def test_encode_decode_array_dtype_shape_v3(): - meta = dict( - shape=(100,), - chunk_grid=dict(type="regular", chunk_shape=(10,), separator=("/")), - data_type=np.dtype("(10, 10)U4", "