Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/telegraphic/hickle
Browse files Browse the repository at this point in the history
  • Loading branch information
telegraphic committed Aug 31, 2022
2 parents a0fe714 + 33f3f0a commit 1072e56
Show file tree
Hide file tree
Showing 10 changed files with 78 additions and 64 deletions.
22 changes: 11 additions & 11 deletions README.md
Expand Up @@ -125,16 +125,16 @@ def create_MyClass_dataset(py_obj, h_group, name, **kwargs):
**kwargs ... the compression keyword arguments passed to hickle.dump
# if content of MyClass can be represented as single matrix, vector or scalar
# values than created a dataset of approriate size. and either set its shape and
# dtype parameters # to the approriate size and tyoe . or directly pass the data
# using the data parmameter
# values than created a dataset of appropriate size. and either set its shape and
# dtype parameters # to the appropriate size and tyoe . or directly pass the data
# using the data parameter
ds = h_group.create_dataset(name,data = py_obj.value,**kwargs)
## NOTE: if your class represents a scalar using empty tuple for shape
## than kwargs have to be filtered by no_compression
# ds = h_group.create_dataset(name,data = py_obj.value,shape=(),**no_compression(kwargs))
# set addtional attributes providing additional specialisation of content
# set additional attributes providing additional specialisation of content
ds.attrs['name'] = py_obj.name
# when done return the new dataset object and an empty tuple or list
Expand Down Expand Up @@ -172,7 +172,7 @@ def create_MyClass_dataset(py_obj, h_group, name, **kwargs):
ds = h_group.create_group(name)
# set addtional attributes providing additional specialisation of content
# set additional attributes providing additional specialisation of content
ds.attrs['name'] = py_obj.name
# when done return the new dataset object and a tuple, list or generator function
Expand Down Expand Up @@ -206,7 +206,7 @@ class MyClassContainer(PyContainer):
def filter(self,h_parent): # optional overload
"""
generator member functoin which can be overloaded to reorganize subitems
of h_parent h5py.Group before beeing restored by hickle. Its default
of h_parent h5py.Group before being restored by hickle. Its default
implementation simply yields from h_parent.items().
"""
yield from super().filter(h_parent)
Expand Down Expand Up @@ -248,7 +248,7 @@ LoaderManager.register_class(
create_MyClass_Dataset, # the create dataset function defined in first example above
load_MyClass, # the load dataset function defined in first example above
None, # usually None
True, # Set to False to force explcit storage of MyClass instances in any case
True, # Set to False to force explicit storage of MyClass instances in any case
'custom' # Loader is only used when custom loaders are enabled on calling hickle.dump
)
Expand All @@ -259,7 +259,7 @@ LoaderManager.register_class(
create_MyClass_Dataset, # the create dataset function defined in first example above
None, # usually None
MyClassContainer # the PyContainer to be used to restore content of MyClass
True, # Set to False to force explcit storage of MyClass instances in any case
True, # Set to False to force explicit storage of MyClass instances in any case
None # if set to None loader is enabled unconditionally
)
Expand Down Expand Up @@ -299,15 +299,15 @@ class_register = [
create_MyClass_Dataset, # the create dataset function defined in first example above
load_MyClass, # the load dataset function defined in first example above
None, # usually None
True, # Set to False to force explcit storage of MyClass instances in any case
True, # Set to False to force explicit storage of MyClass instances in any case
'custom' # Loader is only used when custom loaders are enabled on calling hickle.dump
],
[ MyClass, # MyClass type object this loader handles
b'MyClass', # byte string representing the name of the loader
create_MyClass_Dataset, # the create dataset function defined in first example above
None, # usually None
MyClassContainer # the PyContainer to be used to restore content of MyClass
True, # Set to False to force explcit storage of MyClass instances in any case
True, # Set to False to force explicit storage of MyClass instances in any case
None # if set to None loader is enabled unconditionally
]
]
Expand All @@ -325,7 +325,7 @@ such that they can be compressed when stored see default loader modules in `hic

### Note: storing complex objects in HDF5 file
The HDF5 file format is designed to store several big matrices, images and vectors efficiently
and attache some metadata and to provide a convenient way access the data through a tree structure.
and attach some metadata and to provide a convenient way access the data through a tree structure.
It is not designed like python pickle format for efficiently mapping the in memory object structure
to a file. Therefore mindlessly storing plenty of tiny objects and scalar values without combining
them into a single datataset will cause the HDF5 and thus the file created by hickle explode. File
Expand Down
14 changes: 7 additions & 7 deletions conftest.py
Expand Up @@ -68,7 +68,7 @@ def _get_trace_function(trace_function):

# keyword arguments to yield from compression_kwargs fixture below
# may in future become a list of dictionaries to be yieled for
# running same test with different sets of compression keywors
# running same test with different sets of compression keywords
# (implizit parametrization of tests)
_compression_args = dict(
compression='gzip',
Expand All @@ -86,7 +86,7 @@ def pytest_configure(config):
global _test_compression

config.addinivalue_line(
"markers","no_compression: do not enforce h5py comression hardening testing"
"markers","no_compression: do not enforce h5py compression hardening testing"
)
if _test_compression is not None:
return
Expand Down Expand Up @@ -115,9 +115,9 @@ def compression_kwargs(request):
# is called
_trace_register_class = {}

# list of dump_functions to be traced with respect to beeing
# passed the compression related keywords provided throug compression_kwargs
# fixture above. In case a call to any of theses does not include at least these
# list of dump_functions to be traced with respect to being
# passed the compression related keywords provided through compression_kwargs
# fixture above. In case a call to any of these does not include at least these
# keywords an AssertionError Exception is raised.
_trace_functions = collections.OrderedDict()

Expand All @@ -135,7 +135,7 @@ def _chain_profile_call(frame,event,arg):
if next_call:
_trace_profile_call = next_call

# argument names which correspond to argument beeing passed dump_function
# argument names which correspond to argument being passed dump_function
# object
_trace_function_arg_names = {'dump_function'}

Expand Down Expand Up @@ -254,7 +254,7 @@ def traceback_from_frame(frame,stopafter):

def pytest_collection_finish(session):
"""
collect all test functions for which comression related keyword monitoring
collect all test functions for which compression related keyword monitoring
shall be disabled.
"""
if not sys.getprofile() == _trace_loader_funcs:
Expand Down
4 changes: 2 additions & 2 deletions hickle/hickle.py
Expand Up @@ -106,7 +106,7 @@ def _dump(py_obj, h_group, name, memo, loader,attrs={} , **kwargs):
py_obj_ref = memo.get(py_obj_id,None)
if py_obj_ref is not None:

# py_object already dumped to hdf5 file store a refrence to it instead
# py_object already dumped to hdf5 file store a reference to it instead
# instead of dumping it again.
#
# Note: reference dataset share their base_type and py_obj_type with the
Expand Down Expand Up @@ -318,7 +318,7 @@ def load(file_obj, path='/', safe=True, filename = None):
"""

# Try to read the provided file_obj as a hickle file
h5f, path, close_flag = file_opener(file_obj, path, 'r')
h5f, path, close_flag = file_opener(file_obj, path, 'r', filename)
try:
h_root_group = h5f.get(path,None) # only used by v4
if not isinstance(h_root_group,h5.Group):
Expand Down
2 changes: 1 addition & 1 deletion hickle/legacy_v3/lookup.py
Expand Up @@ -187,7 +187,7 @@ def register_class_list(class_list):
register_class(*class_item)

def register_class_exclude(hkl_str_to_ignore):
""" Tell loading funciton to ignore any HDF5 dataset with attribute 'type=XYZ'
""" Tell loading function to ignore any HDF5 dataset with attribute 'type=XYZ'
Args:
hkl_str_to_ignore (str): attribute type=string to ignore and exclude from loading.
Expand Down
12 changes: 8 additions & 4 deletions hickle/loaders/load_astropy.py
Expand Up @@ -127,15 +127,17 @@ def create_astropy_time(py_obj, h_group, name, **kwargs):
tuple containing h5py.Dataset representing astropy time and empty subitems
"""

# Need to catch string times
# Need to catch string times, e.g. 1999-01-01T00:00:00.123
# Must be encoded into bytes.
if 'str' in py_obj.value.dtype.name:
bytes_dtype_str = py_obj.value.dtype.str.replace('<U', '|S')
d = h_group.create_dataset(
name,
data = np.array([item.encode('ascii') for item in py_obj.value ]),
data = np.array(py_obj.value.astype(bytes_dtype_str)),
**kwargs
)
else:
d = h_group.create_dataset(name,data = py_obj.value,dtype = py_obj.value.dtype)
d = h_group.create_dataset(name, data=py_obj.value, dtype=py_obj.value.dtype)
d.attrs['np_dtype'] = py_obj.value.dtype.str.encode('ascii')

d.attrs['format'] = str(py_obj.format).encode('ascii')
Expand Down Expand Up @@ -258,7 +260,9 @@ def load_astropy_time_dataset(h_node,base_type,py_obj_type):
if dtype:
dtype = np.dtype(dtype)
if 'str' in dtype.name:
return py_obj_type(np.array([item.decode('ascii') for item in h_node[()]],dtype=dtype), format=fmt, scale=scale)
bytes_dtype_str = dtype.str.replace('|S', '<U')
time_data = np.array(h_node[()]).astype(bytes_dtype_str)
return py_obj_type(time_data, format=fmt, scale=scale)
return py_obj_type(np.array(h_node[()],dtype=dtype), format=fmt, scale=scale)
return py_obj_type(np.array(h_node[()],dtype = h_node.dtype), format=fmt, scale=scale)

Expand Down
2 changes: 1 addition & 1 deletion hickle/loaders/load_builtins.py
Expand Up @@ -193,7 +193,7 @@ def create_listlike_dataset(py_obj, h_group, name,list_len = -1,item_dtype = Non
dataset[index] = item_dtype.type(item)
return dataset,()

# crate group and provide generator yielding all subitems to be stored within
# create group and provide generator yielding all subitems to be stored within
item_name = "data{:d}"
def provide_listlike_items():
for index,item in enumerate(py_obj,0):
Expand Down
2 changes: 1 addition & 1 deletion hickle/lookup.py
Expand Up @@ -605,7 +605,7 @@ def store_type(self, h_node, py_obj_type, base_type = None, attr_name = 'type',
base_type (bytes):
the base-type bytes string of the loader used to create the h_node and
restore an object instance form on load. If None no 'hickle_types_table'
will be crated for py_obj_type if not already present and a LookupError
will be created for py_obj_type if not already present and a LookupError
exception is raised instead.
attr_name (str):
Expand Down
2 changes: 1 addition & 1 deletion hickle/tests/test_02_hickle_lookup.py
Expand Up @@ -628,7 +628,7 @@ def test_LoaderManager_load_loader(loader_table,h5_data,monkeypatch):
moc_import_lib.setattr("hickle.lookup.find_spec",patch_importlib_util_find_spec)
sys.modules['hickle.loaders.load_builtins'] = backup_load_builtins
loader.types_dict[dict] = backup_py_obj_type
# not added by missing legacy .pyc test readd manually here
# not added by missing legacy .pyc test re-add manually here
lookup.LoaderManager.__loaded_loaders__.add('hickle.loaders.load_builtins')
lookup._custom_loader_enabled_builtins.pop(py_obj_type.__class__.__module__,None)

Expand Down
80 changes: 45 additions & 35 deletions hickle/tests/test_06_load_astropy.py 100644 → 100755
Expand Up @@ -130,43 +130,53 @@ def test_astropy_time_array(h5_data,compression_kwargs):
"""
test proper storage and loading of astropy time representations
"""

loop_counter = 0

times = ['1999-01-01T00:00:00.123456789', '2010-01-01T00:00:00']
t1 = Time(times, format='isot', scale='utc')

for times in ([58264, 58265, 58266], [[58264, 58265, 58266], [58264, 58265, 58266]]):
t1 = Time(times, format='mjd', scale='utc')

h_dataset, subitems = load_astropy.create_astropy_time(t1,h5_data, f'time_{loop_counter}',**compression_kwargs)
assert isinstance(h_dataset,h5.Dataset) and not subitems and iter(subitems)
assert h_dataset.attrs['format'] in( str(t1.format).encode('ascii'),str(t1.format))
assert h_dataset.attrs['scale'] in ( str(t1.scale).encode('ascii'),str(t1.scale))
assert h_dataset.attrs['np_dtype'] in( t1.value.dtype.str.encode('ascii'),t1.value.dtype.str)
reloaded = load_astropy.load_astropy_time_dataset(h_dataset,b'astropy_time',t1.__class__)
assert reloaded.value.shape == t1.value.shape
assert reloaded.format == t1.format
assert reloaded.scale == t1.scale
for index in range(len(t1)):
assert np.allclose(reloaded.value[index], t1.value[index])
loop_counter += 1

t_strings = ['1999-01-01T00:00:00.123456789', '2010-01-01T00:00:00']

# Check that 2D time arrays work as well (github issue #162)
for times in (t_strings, [t_strings, t_strings]):
t1 = Time(times, format='isot', scale='utc')

h_dataset,subitems = load_astropy.create_astropy_time(t1,h5_data,f'time_{loop_counter}',**compression_kwargs)
assert isinstance(h_dataset,h5.Dataset) and not subitems and iter(subitems)
assert h_dataset.attrs['format'] in (str(t1.format).encode('ascii'),str(t1.format))
assert h_dataset.attrs['scale'] in (str(t1.scale).encode('ascii'),str(t1.scale))
assert h_dataset.attrs['np_dtype'] in ( t1.value.dtype.str.encode('ascii'),t1.value.dtype.str)
reloaded = load_astropy.load_astropy_time_dataset(h_dataset,b'astropy_time',t1.__class__)
assert reloaded.value.shape == t1.value.shape
assert reloaded.format == t1.format
assert reloaded.scale == t1.scale
for index in range(len(t1)):
assert reloaded.value[index].tostring() == t1.value[index].tostring()
del h_dataset.attrs['np_dtype']

reloaded = load_astropy.load_astropy_time_dataset(h_dataset,b'astropy_time',t1.__class__)
assert reloaded.value.shape == t1.value.shape
assert reloaded.format == t1.format
assert reloaded.scale == t1.scale
for index in range(len(t1)):
assert reloaded.value[index].tostring() == t1.value[index].tostring()
loop_counter += 1

h_dataset,subitems = load_astropy.create_astropy_time(t1,h5_data,'time1',**compression_kwargs)
assert isinstance(h_dataset,h5.Dataset) and not subitems and iter(subitems)
assert h_dataset.attrs['format'] in (str(t1.format).encode('ascii'),str(t1.format))
assert h_dataset.attrs['scale'] in (str(t1.scale).encode('ascii'),str(t1.scale))
assert h_dataset.attrs['np_dtype'] in ( t1.value.dtype.str.encode('ascii'),t1.value.dtype.str)
reloaded = load_astropy.load_astropy_time_dataset(h_dataset,b'astropy_time',t1.__class__)
assert reloaded.value.shape == t1.value.shape
assert reloaded.format == t1.format
assert reloaded.scale == t1.scale
for index in range(len(t1)):
assert reloaded.value[index] == t1.value[index]
del h_dataset.attrs['np_dtype']

reloaded = load_astropy.load_astropy_time_dataset(h_dataset,b'astropy_time',t1.__class__)
assert reloaded.value.shape == t1.value.shape
assert reloaded.format == t1.format
assert reloaded.scale == t1.scale
for index in range(len(t1)):
assert reloaded.value[index] == t1.value[index]

times = [58264, 58265, 58266]
t1 = Time(times, format='mjd', scale='utc')
h_dataset,subitems = load_astropy.create_astropy_time(t1,h5_data,'time2',**compression_kwargs)
assert isinstance(h_dataset,h5.Dataset) and not subitems and iter(subitems)
assert h_dataset.attrs['format'] in( str(t1.format).encode('ascii'),str(t1.format))
assert h_dataset.attrs['scale'] in ( str(t1.scale).encode('ascii'),str(t1.scale))
assert h_dataset.attrs['np_dtype'] in( t1.value.dtype.str.encode('ascii'),t1.value.dtype.str)
reloaded = load_astropy.load_astropy_time_dataset(h_dataset,b'astropy_time',t1.__class__)
assert reloaded.value.shape == t1.value.shape
assert reloaded.format == t1.format
assert reloaded.scale == t1.scale
for index in range(len(t1)):
assert reloaded.value[index] == t1.value[index]


def test_astropy_angle(h5_data,compression_kwargs):
Expand Down
2 changes: 1 addition & 1 deletion tox.ini
Expand Up @@ -6,7 +6,7 @@ skip_missing_interpreters=true
#do i need change here to trigger wf

[gh-actions]
# needed to match gh-action python versoin numbers with tox mnemonic
# needed to match gh-action python version numbers with tox mnemonic
python =
3.5: py35
3.6: py36
Expand Down

0 comments on commit 1072e56

Please sign in to comment.