Merge branch 'master' of https://github.com/telegraphic/hickle

telegraphic · Aug 31, 2022 · 1072e56 · 1072e56
2 parents a0fe714 + 33f3f0a
commit 1072e56
Show file tree

Hide file tree

Showing 10 changed files with 78 additions and 64 deletions.
diff --git a/README.md b/README.md
@@ -125,16 +125,16 @@ def create_MyClass_dataset(py_obj, h_group, name, **kwargs):
     **kwargs ... the compression keyword arguments passed to hickle.dump
 
     # if content of MyClass can be represented as single matrix, vector or scalar
-    # values than created a dataset of approriate size. and either set its shape and 
-    # dtype parameters # to the approriate size and tyoe . or directly pass the data
-    # using the data parmameter
+    # values than created a dataset of appropriate size. and either set its shape and 
+    # dtype parameters # to the appropriate size and tyoe . or directly pass the data
+    # using the data parameter
     ds = h_group.create_dataset(name,data = py_obj.value,**kwargs)
 
 	## NOTE: if your class represents a scalar using empty tuple for shape
     ##       than kwargs have to be filtered by no_compression
     # ds = h_group.create_dataset(name,data = py_obj.value,shape=(),**no_compression(kwargs))
 
-	# set addtional attributes providing additional specialisation of content
+	# set additional attributes providing additional specialisation of content
     ds.attrs['name'] = py_obj.name
 
     # when done return the new dataset object and an empty tuple or list
@@ -172,7 +172,7 @@ def create_MyClass_dataset(py_obj, h_group, name, **kwargs):
 
     ds = h_group.create_group(name)
 
-	# set addtional attributes providing additional specialisation of content
+	# set additional attributes providing additional specialisation of content
     ds.attrs['name'] = py_obj.name
 
     # when done return the new dataset object and a tuple, list or generator function
@@ -206,7 +206,7 @@ class MyClassContainer(PyContainer):
 	def filter(self,h_parent): # optional overload
         """
 		generator member functoin which can be overloaded to reorganize subitems
-        of h_parent h5py.Group before beeing restored by hickle. Its default
+        of h_parent h5py.Group before being restored by hickle. Its default
         implementation simply yields from h_parent.items(). 
 		"""
 		yield from super().filter(h_parent)
@@ -248,7 +248,7 @@ LoaderManager.register_class(
    create_MyClass_Dataset, # the create dataset function defined in first example above
    load_MyClass,           # the load dataset function defined in first example above
    None,                   # usually None
-   True,                   # Set to False to force explcit storage of MyClass instances in any case 
+   True,                   # Set to False to force explicit storage of MyClass instances in any case 
    'custom'                # Loader is only used when custom loaders are enabled on calling hickle.dump
 )
 
@@ -259,7 +259,7 @@ LoaderManager.register_class(
    create_MyClass_Dataset, # the create dataset function defined in first example above
    None,                   # usually None
    MyClassContainer        # the PyContainer to be used to restore content of MyClass
-   True,                   # Set to False to force explcit storage of MyClass instances in any case 
+   True,                   # Set to False to force explicit storage of MyClass instances in any case 
    None                    # if set to None loader is enabled unconditionally
 )
 
@@ -299,15 +299,15 @@ class_register = [
      create_MyClass_Dataset, # the create dataset function defined in first example above
      load_MyClass,           # the load dataset function defined in first example above
      None,                   # usually None
-     True,                   # Set to False to force explcit storage of MyClass instances in any case 
+     True,                   # Set to False to force explicit storage of MyClass instances in any case 
      'custom'                # Loader is only used when custom loaders are enabled on calling hickle.dump
    ],
    [ MyClass,                # MyClass type object this loader handles
      b'MyClass',             # byte string representing the name of the loader 
      create_MyClass_Dataset, # the create dataset function defined in first example above
      None,                   # usually None
      MyClassContainer        # the PyContainer to be used to restore content of MyClass
-     True,                   # Set to False to force explcit storage of MyClass instances in any case 
+     True,                   # Set to False to force explicit storage of MyClass instances in any case 
      None                    # if set to None loader is enabled unconditionally
    ]
 ]
@@ -325,7 +325,7 @@ such that they can be compressed when stored see default loader modules in  `hic
 
 ### Note: storing complex objects in HDF5 file
 The HDF5 file format is designed to store several big matrices, images and vectors efficiently
-and attache some metadata and to provide a convenient way access the data through a tree structure.
+and attach some metadata and to provide a convenient way access the data through a tree structure.
 It is not designed like python pickle format for efficiently mapping the in memory object structure
 to a file. Therefore mindlessly storing plenty of tiny objects and scalar values without combining
 them into a single datataset will cause the HDF5 and thus the file created by hickle explode. File

diff --git a/conftest.py b/conftest.py
@@ -68,7 +68,7 @@ def _get_trace_function(trace_function):
 
 # keyword arguments to yield from compression_kwargs fixture below
 # may in future become a list of dictionaries to be yieled for
-# running same test with different sets of compression keywors
+# running same test with different sets of compression keywords
 # (implizit parametrization of tests)
 _compression_args =  dict(
     compression='gzip',
@@ -86,7 +86,7 @@ def pytest_configure(config):
     global _test_compression
 
     config.addinivalue_line(
-        "markers","no_compression: do not enforce h5py comression hardening testing"
+        "markers","no_compression: do not enforce h5py compression hardening testing"
     )
     if _test_compression is not None:
         return
@@ -115,9 +115,9 @@ def compression_kwargs(request):
 # is called
 _trace_register_class = {}
 
-# list of dump_functions to be traced with respect to beeing
-# passed the compression related keywords provided throug compression_kwargs
-# fixture above. In case a call to any of theses does not include at least these
+# list of dump_functions to be traced with respect to being
+# passed the compression related keywords provided through compression_kwargs
+# fixture above. In case a call to any of these does not include at least these
 # keywords an AssertionError Exception is raised.
 _trace_functions = collections.OrderedDict()
 
@@ -135,7 +135,7 @@ def _chain_profile_call(frame,event,arg):
         if next_call:
             _trace_profile_call = next_call
 
-# argument names which correspond to argument beeing passed dump_function
+# argument names which correspond to argument being passed dump_function
 # object
 _trace_function_arg_names = {'dump_function'}
 
@@ -254,7 +254,7 @@ def traceback_from_frame(frame,stopafter):
 
 def pytest_collection_finish(session):
     """
-    collect all test functions for which comression related keyword monitoring
+    collect all test functions for which compression related keyword monitoring
     shall be disabled.
     """
     if not sys.getprofile() == _trace_loader_funcs:

diff --git a/hickle/hickle.py b/hickle/hickle.py
@@ -106,7 +106,7 @@ def _dump(py_obj, h_group, name, memo, loader,attrs={} , **kwargs):
     py_obj_ref = memo.get(py_obj_id,None)
     if py_obj_ref is not None:
 
-        # py_object already dumped to hdf5 file store a refrence to it instead
+        # py_object already dumped to hdf5 file store a reference to it instead
         # instead of dumping it again.
         #
         # Note: reference dataset share their base_type and py_obj_type with the
@@ -318,7 +318,7 @@ def load(file_obj, path='/', safe=True, filename = None):
     """
 
     # Try to read the provided file_obj as a hickle file
-    h5f, path, close_flag = file_opener(file_obj, path, 'r')
+    h5f, path, close_flag = file_opener(file_obj, path, 'r', filename)
     try:
         h_root_group = h5f.get(path,None) # only used by v4
         if not isinstance(h_root_group,h5.Group):

diff --git a/hickle/legacy_v3/lookup.py b/hickle/legacy_v3/lookup.py
@@ -187,7 +187,7 @@ def register_class_list(class_list):
         register_class(*class_item)
 
 def register_class_exclude(hkl_str_to_ignore):
-    """ Tell loading funciton to ignore any HDF5 dataset with attribute 'type=XYZ'
+    """ Tell loading function to ignore any HDF5 dataset with attribute 'type=XYZ'
 
     Args:
         hkl_str_to_ignore (str): attribute type=string to ignore and exclude from loading.

diff --git a/hickle/loaders/load_astropy.py b/hickle/loaders/load_astropy.py
@@ -127,15 +127,17 @@ def create_astropy_time(py_obj, h_group, name, **kwargs):
     tuple containing h5py.Dataset representing astropy time and empty subitems
     """
 
-    # Need to catch string times
+    # Need to catch string times, e.g. 1999-01-01T00:00:00.123
+    # Must be encoded into bytes. 
     if 'str' in py_obj.value.dtype.name:
+        bytes_dtype_str = py_obj.value.dtype.str.replace('<U', '|S')
         d = h_group.create_dataset(
             name,
-            data = np.array([item.encode('ascii') for item in py_obj.value ]),
+            data = np.array(py_obj.value.astype(bytes_dtype_str)),
             **kwargs
         )
     else:
-        d = h_group.create_dataset(name,data = py_obj.value,dtype = py_obj.value.dtype)
+        d = h_group.create_dataset(name, data=py_obj.value, dtype=py_obj.value.dtype)
     d.attrs['np_dtype'] = py_obj.value.dtype.str.encode('ascii')
 
     d.attrs['format'] = str(py_obj.format).encode('ascii')
@@ -258,7 +260,9 @@ def load_astropy_time_dataset(h_node,base_type,py_obj_type):
     if dtype:
         dtype = np.dtype(dtype)
         if 'str' in dtype.name:
-            return py_obj_type(np.array([item.decode('ascii') for item in h_node[()]],dtype=dtype), format=fmt, scale=scale)
+            bytes_dtype_str = dtype.str.replace('|S', '<U')
+            time_data = np.array(h_node[()]).astype(bytes_dtype_str)
+            return py_obj_type(time_data, format=fmt, scale=scale)
         return py_obj_type(np.array(h_node[()],dtype=dtype), format=fmt, scale=scale)
     return py_obj_type(np.array(h_node[()],dtype = h_node.dtype), format=fmt, scale=scale)
 

diff --git a/hickle/loaders/load_builtins.py b/hickle/loaders/load_builtins.py
@@ -193,7 +193,7 @@ def create_listlike_dataset(py_obj, h_group, name,list_len = -1,item_dtype = Non
             dataset[index] = item_dtype.type(item)
         return dataset,()
 
-    # crate group and provide generator yielding all subitems to be stored within
+    # create group and provide generator yielding all subitems to be stored within
     item_name = "data{:d}"
     def provide_listlike_items():
         for index,item in enumerate(py_obj,0):

diff --git a/hickle/lookup.py b/hickle/lookup.py
@@ -605,7 +605,7 @@ def store_type(self, h_node, py_obj_type, base_type = None, attr_name = 'type',
         base_type (bytes):
             the base-type bytes string of the loader used to create the h_node and
             restore an object instance form on load. If None no 'hickle_types_table'
-            will be crated for py_obj_type if not already present and a LookupError
+            will be created for py_obj_type if not already present and a LookupError
             exception is raised instead.
 
         attr_name (str):

diff --git a/hickle/tests/test_02_hickle_lookup.py b/hickle/tests/test_02_hickle_lookup.py
@@ -628,7 +628,7 @@ def test_LoaderManager_load_loader(loader_table,h5_data,monkeypatch):
             moc_import_lib.setattr("hickle.lookup.find_spec",patch_importlib_util_find_spec)
             sys.modules['hickle.loaders.load_builtins'] = backup_load_builtins
             loader.types_dict[dict] = backup_py_obj_type
-            # not added by missing legacy .pyc test readd manually here
+            # not added by missing legacy .pyc test re-add manually here
             lookup.LoaderManager.__loaded_loaders__.add('hickle.loaders.load_builtins')
             lookup._custom_loader_enabled_builtins.pop(py_obj_type.__class__.__module__,None)
 

diff --git a/hickle/tests/test_06_load_astropy.py b/hickle/tests/test_06_load_astropy.py
@@ -130,43 +130,53 @@ def test_astropy_time_array(h5_data,compression_kwargs):
     """
     test proper storage and loading of astropy time representations
     """
+
+    loop_counter = 0
 
-    times = ['1999-01-01T00:00:00.123456789', '2010-01-01T00:00:00']
-    t1 = Time(times, format='isot', scale='utc')
+
+    for times in ([58264, 58265, 58266], [[58264, 58265, 58266], [58264, 58265, 58266]]):
+        t1 = Time(times, format='mjd', scale='utc')
+
+        h_dataset, subitems = load_astropy.create_astropy_time(t1,h5_data, f'time_{loop_counter}',**compression_kwargs)
+        assert isinstance(h_dataset,h5.Dataset) and not subitems and iter(subitems)
+        assert h_dataset.attrs['format'] in( str(t1.format).encode('ascii'),str(t1.format))
+        assert h_dataset.attrs['scale'] in ( str(t1.scale).encode('ascii'),str(t1.scale))
+        assert h_dataset.attrs['np_dtype'] in( t1.value.dtype.str.encode('ascii'),t1.value.dtype.str)
+        reloaded = load_astropy.load_astropy_time_dataset(h_dataset,b'astropy_time',t1.__class__)
+        assert reloaded.value.shape == t1.value.shape
+        assert reloaded.format == t1.format
+        assert reloaded.scale == t1.scale
+        for index in range(len(t1)):
+            assert np.allclose(reloaded.value[index], t1.value[index])
+        loop_counter += 1
+
+    t_strings = ['1999-01-01T00:00:00.123456789', '2010-01-01T00:00:00']
+
+    # Check that 2D time arrays work as well (github issue #162)
+    for times in (t_strings, [t_strings, t_strings]):
+        t1 = Time(times, format='isot', scale='utc')
+
+        h_dataset,subitems = load_astropy.create_astropy_time(t1,h5_data,f'time_{loop_counter}',**compression_kwargs)
+        assert isinstance(h_dataset,h5.Dataset) and not subitems and iter(subitems)
+        assert h_dataset.attrs['format'] in (str(t1.format).encode('ascii'),str(t1.format))
+        assert h_dataset.attrs['scale'] in (str(t1.scale).encode('ascii'),str(t1.scale))
+        assert h_dataset.attrs['np_dtype'] in ( t1.value.dtype.str.encode('ascii'),t1.value.dtype.str)
+        reloaded = load_astropy.load_astropy_time_dataset(h_dataset,b'astropy_time',t1.__class__)
+        assert reloaded.value.shape == t1.value.shape
+        assert reloaded.format == t1.format
+        assert reloaded.scale == t1.scale
+        for index in range(len(t1)):
+             assert reloaded.value[index].tostring() == t1.value[index].tostring()
+        del h_dataset.attrs['np_dtype']
+
+        reloaded = load_astropy.load_astropy_time_dataset(h_dataset,b'astropy_time',t1.__class__)
+        assert reloaded.value.shape == t1.value.shape
+        assert reloaded.format == t1.format
+        assert reloaded.scale == t1.scale
+        for index in range(len(t1)):
+            assert reloaded.value[index].tostring() == t1.value[index].tostring()
+        loop_counter += 1
 
-    h_dataset,subitems = load_astropy.create_astropy_time(t1,h5_data,'time1',**compression_kwargs)
-    assert isinstance(h_dataset,h5.Dataset) and not subitems and iter(subitems)
-    assert h_dataset.attrs['format'] in (str(t1.format).encode('ascii'),str(t1.format))
-    assert h_dataset.attrs['scale'] in (str(t1.scale).encode('ascii'),str(t1.scale))
-    assert h_dataset.attrs['np_dtype'] in ( t1.value.dtype.str.encode('ascii'),t1.value.dtype.str)
-    reloaded = load_astropy.load_astropy_time_dataset(h_dataset,b'astropy_time',t1.__class__)
-    assert reloaded.value.shape == t1.value.shape
-    assert reloaded.format == t1.format
-    assert reloaded.scale == t1.scale
-    for index in range(len(t1)):
-        assert reloaded.value[index] == t1.value[index]
-    del h_dataset.attrs['np_dtype']
-
-    reloaded = load_astropy.load_astropy_time_dataset(h_dataset,b'astropy_time',t1.__class__)
-    assert reloaded.value.shape == t1.value.shape
-    assert reloaded.format == t1.format
-    assert reloaded.scale == t1.scale
-    for index in range(len(t1)):
-        assert reloaded.value[index] == t1.value[index]
-
-    times = [58264, 58265, 58266]
-    t1 = Time(times, format='mjd', scale='utc')
-    h_dataset,subitems = load_astropy.create_astropy_time(t1,h5_data,'time2',**compression_kwargs)
-    assert isinstance(h_dataset,h5.Dataset) and not subitems and iter(subitems)
-    assert h_dataset.attrs['format'] in( str(t1.format).encode('ascii'),str(t1.format))
-    assert h_dataset.attrs['scale'] in ( str(t1.scale).encode('ascii'),str(t1.scale))
-    assert h_dataset.attrs['np_dtype'] in( t1.value.dtype.str.encode('ascii'),t1.value.dtype.str)
-    reloaded = load_astropy.load_astropy_time_dataset(h_dataset,b'astropy_time',t1.__class__)
-    assert reloaded.value.shape == t1.value.shape
-    assert reloaded.format == t1.format
-    assert reloaded.scale == t1.scale
-    for index in range(len(t1)):
-        assert reloaded.value[index] == t1.value[index]
 
 
 def test_astropy_angle(h5_data,compression_kwargs):

diff --git a/tox.ini b/tox.ini
@@ -6,7 +6,7 @@ skip_missing_interpreters=true
 #do i need change here to trigger wf
 
 [gh-actions]
-# needed to match gh-action python versoin numbers with tox mnemonic
+# needed to match gh-action python version numbers with tox mnemonic
 python = 
 	3.5: py35
     3.6: py36