Skip to content

Commit

Permalink
Merge pull request #134 from 1313e/1313e_dev
Browse files Browse the repository at this point in the history
Last few changes
  • Loading branch information
telegraphic committed Jun 24, 2020
2 parents 7bc9965 + 350f744 commit 2224889
Show file tree
Hide file tree
Showing 8 changed files with 171 additions and 216 deletions.
124 changes: 50 additions & 74 deletions hickle/hickle.py
Expand Up @@ -68,43 +68,8 @@ def __str__(self):
return "Error: this functionality hasn't been implemented yet."


######################
# H5PY file wrappers #
######################

class H5GroupWrapper(h5.Group):
""" Group wrapper that provides a track_times kwarg.
track_times is a boolean flag that can be set to False, so that two
files created at different times will have identical MD5 hashes.
"""
def create_dataset(self, *args, **kwargs):
kwargs['track_times'] = getattr(self, 'track_times', True)
return super(H5GroupWrapper, self).create_dataset(*args, **kwargs)

def create_group(self, *args, **kwargs):
group = super(H5GroupWrapper, self).create_group(*args, **kwargs)
group.__class__ = H5GroupWrapper
group.track_times = getattr(self, 'track_times', True)
return group


class H5FileWrapper(h5.File):
""" Wrapper for h5py File that provides a track_times kwarg.
track_times is a boolean flag that can be set to False, so that two
files created at different times will have identical MD5 hashes.
"""

def create_group(self, *args, **kwargs):
group = super(H5FileWrapper, self).create_group(*args, **kwargs)
group.__class__ = H5GroupWrapper
group.track_times = getattr(self, 'track_times', True)
return group


# %% FUNCTION DEFINITIONS
def file_opener(f, path, mode='r', track_times=True):
def file_opener(f, path, mode='r'):
"""
A file opener helper function with some error handling.
This can open files through a file object, an h5py file, or just the
Expand All @@ -124,9 +89,6 @@ def file_opener(f, path, mode='r', track_times=True):
Accepted values are 'r' (read only), 'w' (write; default) or 'a'
(append).
Ignored if file is a file object.
track_times : bool, optional
If set to *True* (default), repeated hickling will produce different
files.
"""

Expand Down Expand Up @@ -154,7 +116,7 @@ def file_opener(f, path, mode='r', track_times=True):
"either a filename string, a file object, or"
"an open HDF5-file")
path = ''.join([f.name, path])
h5f = f
h5f = f.file

if path.endswith('/'):
path = path[:-1]
Expand All @@ -167,11 +129,6 @@ def file_opener(f, path, mode='r', track_times=True):
raise FileError("Cannot open file. Please pass either a filename "
"string, a file object, or a h5py.File")

if isinstance(h5f, h5._hl.files.File):
h5f.__class__ = H5FileWrapper
else:
h5f.__class__ = H5GroupWrapper
h5f.track_times = track_times
return(h5f, path, close_flag)


Expand Down Expand Up @@ -211,7 +168,7 @@ def _dump(py_obj, h_group, call_id=None, **kwargs):
item_type = check_iterable_item_type(py_obj)

# item_type == False implies multiple types. Create a dataset
if item_type is False:
if not item_type:
h_subgroup = create_hkl_group(py_obj, h_group, call_id)
for ii, py_subobj in enumerate(py_obj):
_dump(py_subobj, h_subgroup, call_id=ii, **kwargs)
Expand All @@ -232,7 +189,7 @@ def _dump(py_obj, h_group, call_id=None, **kwargs):
create_hkl_dataset(py_obj, h_group, call_id, **kwargs)


def dump(py_obj, file_obj, mode='w', path='/', track_times=True, **kwargs):
def dump(py_obj, file_obj, mode='w', path='/', **kwargs):
"""
Write a hickled representation of `py_obj` to the provided `file_obj`.
Expand All @@ -253,9 +210,6 @@ def dump(py_obj, file_obj, mode='w', path='/', track_times=True, **kwargs):
path : str, optional
Path within HDF5-file or group to save data to.
Defaults to root ('/').
track_times : bool, optional
If set to *True* (default), repeated hickling will produce different
files.
kwargs : keyword arguments
Additional keyword arguments that must be provided to the
:meth:`~h5py._hl.group.Group.create_dataset` method.
Expand All @@ -268,7 +222,7 @@ def dump(py_obj, file_obj, mode='w', path='/', track_times=True, **kwargs):

try:
# Open the file
h5f, path, close_flag = file_opener(file_obj, path, mode, track_times)
h5f, path, close_flag = file_opener(file_obj, path, mode)

# Log which version of python was used to generate the hickle file
pv = sys.version_info
Expand All @@ -294,7 +248,7 @@ def dump(py_obj, file_obj, mode='w', path='/', track_times=True, **kwargs):
# Close the file if requested.
# Closing a file twice will not cause any problems
if close_flag:
h5f.file.close()
h5f.close()


def create_dataset_lookup(py_obj):
Expand Down Expand Up @@ -338,10 +292,31 @@ def create_hkl_dataset(py_obj, h_group, call_id=None, **kwargs):
# Set the name of this dataset
name = 'data%s' % ("_%i" % (call_id) if call_id is not None else '')

# do the creation
h_subgroup = create_dataset(py_obj, h_group, name, **kwargs)
# Try to create the dataset
try:
h_subgroup = create_dataset(py_obj, h_group, name, **kwargs)
# If that fails, pickle the object instead
except Exception as error:
# Make sure builtins loader is loaded
load_loader(object)

# Obtain the proper dataset creator and base type
create_dataset, base_type = types_dict[object]

# Make sure that a group/dataset with name 'name' does not exist
try:
del h_group[name]
except Exception:
pass

# Create the pickled dataset
h_subgroup = create_dataset(py_obj, h_group, name, error, **kwargs)

# Save base type of py_obj
h_subgroup.attrs['base_type'] = base_type
if base_type != b'pickle':

# Save a pickled version of the true type of py_obj if necessary
if base_type != b'pickle' and 'type' not in h_subgroup.attrs:
h_subgroup.attrs['type'] = np.array(pickle.dumps(py_obj.__class__))


Expand All @@ -356,7 +331,10 @@ def create_hkl_group(py_obj, h_group, call_id=None):
"""

# Set the name of this group
name = 'data%s' % ("_%i" % (call_id) if call_id is not None else '')
if isinstance(call_id, str):
name = call_id
else:
name = 'data%s' % ("_%i" % (call_id) if call_id is not None else '')

h_subgroup = h_group.create_group(name)
h_subgroup.attrs['type'] = np.array(pickle.dumps(py_obj.__class__))
Expand All @@ -379,20 +357,18 @@ def create_dict_dataset(py_obj, h_group, name, **kwargs):
call_id (int): index to identify object's relative location in the
iterable.
"""

h_dictgroup = h_group.create_group(name)

for idx, (key, py_subobj) in enumerate(py_obj.items()):
# Obtain the string representation of this key
if isinstance(key, str):
# Get raw string format of string
subgroup_key = "%r" % (key)

# Make sure that the '\\\\' is not in the key, or raise error if so
if '\\\\' in subgroup_key:
raise ValueError("Dict item keys containing the '\\\\' string "
"are not supported!")
else:
subgroup_key = str(key)
# Obtain the raw string representation of this key
subgroup_key = "%r" % (key)

# Make sure that the '\\\\' is not in the key, or raise error if so
if '\\\\' in subgroup_key:
del h_group[name]
raise ValueError("Dict item keys containing the '\\\\' string are "
"not supported!")

# Replace any forward slashes with double backslashes
subgroup_key = subgroup_key.replace('/', '\\\\')
Expand Down Expand Up @@ -531,7 +507,7 @@ def load(file_obj, path='/', safe=True):

# Try to read the provided file_obj as a hickle file
try:
h5f, path, close_flag = file_opener(file_obj, path)
h5f, path, close_flag = file_opener(file_obj, path, 'r')
h_root_group = h5f.get(path)

# Define attributes h_root_group must have
Expand All @@ -541,7 +517,7 @@ def load(file_obj, path='/', safe=True):
# Check if the proper attributes for v3 loading are available
if all(map(h_root_group.attrs.get, v3_attrs)):
# Check if group attribute 'CLASS' has value 'hickle
if(h_root_group.attrs.get('CLASS') != b'hickle'):
if(h_root_group.attrs['CLASS'] != b'hickle'): # pragma: no cover
# If not, raise error
raise AttributeError("HDF5-file attribute 'CLASS' does not "
"have value 'hickle'!")
Expand All @@ -551,13 +527,13 @@ def load(file_obj, path='/', safe=True):
major_version = int(h_root_group.attrs['VERSION'][0])

# If this cannot be done, then this is not a v3 file
except Exception:
except Exception: # pragma: no cover
raise Exception("This file does not appear to be a hickle v3 "
"file.")

# Else, if the major version is not 3, it is not a v3 file either
else:
if(major_version != 3):
if(major_version != 3): # pragma: no cover
raise Exception("This file does not appear to be a hickle "
"v3 file.")

Expand All @@ -575,18 +551,18 @@ def load(file_obj, path='/', safe=True):
return(py_container[0])

# Else, raise error
else:
else: # pragma: no cover
raise FileError("HDF5-file does not have the proper attributes!")

# If this fails, raise error and provide user with caught error message
except Exception as error:
except Exception as error: # pragma: no cover
raise ValueError("Provided argument 'file_obj' does not appear to be a"
" valid hickle file! (%s)" % (error))
finally:
# Close the file if requested.
# Closing a file twice will not cause any problems
if close_flag:
h5f.file.close()
h5f.close()


def load_dataset(h_node):
Expand Down
13 changes: 8 additions & 5 deletions hickle/loaders/load_astropy.py
Expand Up @@ -23,7 +23,8 @@ def create_astropy_quantity(py_obj, h_group, name, **kwargs):
iterable.
"""

d = h_group.create_dataset(name, data=py_obj.value, dtype='float64')
d = h_group.create_dataset(name, data=py_obj.value, dtype='float64',
**kwargs)
unit = bytes(str(py_obj.unit), 'ascii')
d.attrs['unit'] = unit
return(d)
Expand All @@ -40,7 +41,8 @@ def create_astropy_angle(py_obj, h_group, name, **kwargs):
iterable.
"""

d = h_group.create_dataset(name, data=py_obj.value, dtype='float64')
d = h_group.create_dataset(name, data=py_obj.value, dtype='float64',
**kwargs)
unit = str(py_obj.unit).encode('ascii')
d.attrs['unit'] = unit
return(d)
Expand All @@ -61,7 +63,7 @@ def create_astropy_skycoord(py_obj, h_group, name, **kwargs):
lon = py_obj.data.lon.value
dd = np.column_stack((lon, lat))

d = h_group.create_dataset(name, data=dd, dtype='float64')
d = h_group.create_dataset(name, data=dd, dtype='float64', **kwargs)
lon_unit = str(py_obj.data.lon.unit).encode('ascii')
lat_unit = str(py_obj.data.lat.unit).encode('ascii')
d.attrs['lon_unit'] = lon_unit
Expand Down Expand Up @@ -91,7 +93,7 @@ def create_astropy_time(py_obj, h_group, name, **kwargs):
for item in py_obj.value:
data.append(str(item).encode('ascii'))

d = h_group.create_dataset(name, data=data, dtype=dtype)
d = h_group.create_dataset(name, data=data, dtype=dtype, **kwargs)
fmt = str(py_obj.format).encode('ascii')
scale = str(py_obj.scale).encode('ascii')
d.attrs['format'] = fmt
Expand All @@ -111,7 +113,8 @@ def create_astropy_constant(py_obj, h_group, name, **kwargs):
iterable.
"""

d = h_group.create_dataset(name, data=py_obj.value, dtype='float64')
d = h_group.create_dataset(name, data=py_obj.value, dtype='float64',
**kwargs)
d.attrs["unit"] = str(py_obj.unit)
d.attrs["abbrev"] = str(py_obj.abbrev)
d.attrs["name"] = str(py_obj.name)
Expand Down

0 comments on commit 2224889

Please sign in to comment.