Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Multi recording archive #1

Draft
wants to merge 29 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
208d74b
add support for multiple recordings in archives
jhazentia Apr 27, 2023
44462f1
fix SigMFArchiveReader error
jhazentia May 1, 2023
832b731
support single or multiple sigmffiles in archive __init__()
jhazentia May 3, 2023
8d25adf
renamed archive "name" to "path", allow os.PathLike
jhazentia May 3, 2023
4f58453
Fixed bug in checking sigmffiles type
jhazentia May 3, 2023
89242c8
add test for missing name
jhazentia May 3, 2023
0c503ab
require name in SigMFFile constructor
jhazentia May 5, 2023
d234ddf
return single or list of SigMFFiles in fromarchive
jhazentia May 5, 2023
348bed8
fix some formatting, unused imports, docstrings, rename archivereader…
jhazentia May 8, 2023
b6df262
add support for collections in archives, check for path and fileobj i…
jhazentia May 11, 2023
4cfc8c2
rename collectionfile to collection
jhazentia May 12, 2023
ea4e633
make json end of file new line consistent, add support for collection…
jhazentia May 12, 2023
68c6825
add README examples for archives with multiple recordings
jhazentia May 12, 2023
454dd34
fix archive docstring, remove unneeded variables from archivereader
jhazentia May 15, 2023
af9002d
simplify SigMFCollection archive tests
jhazentia May 15, 2023
f1d108b
organize SigMFFile constructor doc string
jhazentia May 15, 2023
a631eb3
clarify different ways to do the same thing in README
jhazentia May 26, 2023
74a7b86
fix typo
jhazentia May 26, 2023
ae4c424
Merge branch 'main' of https://github.com/NTIA/sigmf-python into mult…
jhazentia May 26, 2023
93ab02b
add support for passing SigMFFile objects to SigMFCollection to impro…
jhazentia May 30, 2023
5376ece
fix SigMFCollection docstring
jhazentia Jun 1, 2023
46e7d8f
SigMFCollection set_streams() will check type for each element of met…
jhazentia Jun 1, 2023
660ba82
break up and simplify archive examples in README
jhazentia Jun 1, 2023
e2919d8
fix docstring, add ability to control pretty print JSON for archive
jhazentia Jun 1, 2023
e4e1775
update docstrings, formatting
jhazentia Jun 2, 2023
3131683
improve docstrings, remove duplicative test, add test for fromarchive…
jhazentia Jun 2, 2023
29827af
fix error message
jhazentia Jun 5, 2023
b81289b
make archives work when using folders
jhazentia Jun 6, 2023
15ca451
folders in archives are no longer created by default to maintain cons…
jhazentia Jun 8, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
145 changes: 85 additions & 60 deletions sigmf/archive.py
Expand Up @@ -6,10 +6,15 @@

"""Create and extract SigMF archives."""

import collections
import os
import shutil
import tarfile
import tempfile
from typing import BinaryIO, Iterable, Union

import sigmf


from .error import SigMFFileError

Expand All @@ -21,108 +26,128 @@


class SigMFArchive():
"""Archive a SigMFFile.
"""Archive one or more `SigMFFile`s.

A `.sigmf` file must include both valid metadata and data.
If `self.data_file` is not set or the requested output file
is not writable, raise `SigMFFileError`.

Parameters:

sigmffile -- A SigMFFile object with valid metadata and data_file
sigmffile -- An iterable of SigMFFile objects with valid metadata and data_files

path -- path to archive file to create. If file exists, overwrite.
If `path` doesn't end in .sigmf, it will be appended. The
`self.path` instance variable will be updated upon
successful writing of the archive to point to the final
archive path.

name -- path to archive file to create. If file exists, overwrite.
If `name` doesn't end in .sigmf, it will be appended.
For example: if `name` == "/tmp/archive1", then the
following archive will be created:
/tmp/archive1.sigmf
- archive1/
- archive1.sigmf-meta
- archive1.sigmf-data

fileobj -- If `fileobj` is specified, it is used as an alternative to
a file object opened in binary mode for `name`. It is
supposed to be at position 0. `name` is not required, but
if specified will be used to determine the directory and
file names within the archive. `fileobj` won't be closed.
For example: if `name` == "archive1" and fileobj is given,
a tar archive will be written to fileobj with the
following structure:
- archive1/
- archive1.sigmf-meta
- archive1.sigmf-data
a file object opened in binary mode for `path`. If
`fileobj` is an open tarfile, it will be appended to. It is
supposed to be at position 0. `fileobj` won't be closed. If
`fileobj` is given, `path` has no effect.
"""
def __init__(self, sigmffile, name=None, fileobj=None):
self.sigmffile = sigmffile
self.name = name
def __init__(self, sigmffiles : Union["SigMFFile", Iterable["SigMFFile"]], path : Union[str, os.PathLike] = None, fileobj : BinaryIO =None):

if isinstance(sigmffiles, sigmf.sigmffile.SigMFFile):
self.sigmffiles = [sigmffiles]
elif hasattr(collections, "Iterable") and isinstance(sigmffiles, collections.Iterable):
self.sigmffiles = sigmffiles
elif isinstance(sigmffiles, collections.abc.Iterable): # python 3.10
self.sigmffiles = sigmffiles
else:
raise SigMFFileError("Unknown type for sigmffiles argument!")


self.path = str(path)
self.fileobj = fileobj

self._check_input()

archive_name = self._get_archive_name()
mode = "a" if fileobj is not None else "w"

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Eventual PR should call out this change in behavior and note that it could be changed to preserver the original behavior of writing over the archive.

sigmf_fileobj = self._get_output_fileobj()
sigmf_archive = tarfile.TarFile(mode="w",
fileobj=sigmf_fileobj,
format=tarfile.PAX_FORMAT)
tmpdir = tempfile.mkdtemp()
sigmf_md_filename = archive_name + SIGMF_METADATA_EXT
sigmf_md_path = os.path.join(tmpdir, sigmf_md_filename)
sigmf_data_filename = archive_name + SIGMF_DATASET_EXT
sigmf_data_path = os.path.join(tmpdir, sigmf_data_filename)

with open(sigmf_md_path, "w") as mdfile:
self.sigmffile.dump(mdfile, pretty=True)

shutil.copy(self.sigmffile.data_file, sigmf_data_path)

try:
sigmf_archive = tarfile.TarFile(mode=mode,
fileobj=sigmf_fileobj,
format=tarfile.PAX_FORMAT)
except tarfile.ReadError:
# fileobj doesn't contain any archives yet, so reopen in 'w' mode
sigmf_archive = tarfile.TarFile(mode='w',
fileobj=sigmf_fileobj,
format=tarfile.PAX_FORMAT)

def chmod(tarinfo):
if tarinfo.isdir():
tarinfo.mode = 0o755 # dwrxw-rw-r
else:
tarinfo.mode = 0o644 # -wr-r--r--
return tarinfo

sigmf_archive.add(tmpdir, arcname=archive_name, filter=chmod)
for sigmffile in self.sigmffiles:
with tempfile.TemporaryDirectory() as tmpdir:
sigmf_md_filename = sigmffile.name + SIGMF_METADATA_EXT
sigmf_md_path = os.path.join(tmpdir, sigmf_md_filename)
sigmf_data_filename = sigmffile.name + SIGMF_DATASET_EXT
sigmf_data_path = os.path.join(tmpdir, sigmf_data_filename)

with open(sigmf_md_path, "w") as mdfile:
sigmffile.dump(mdfile, pretty=True)

shutil.copy(sigmffile.data_file, sigmf_data_path)
sigmf_archive.add(tmpdir, arcname=sigmffile.name, filter=chmod)

sigmf_archive.close()
if not fileobj:
sigmf_fileobj.close()

shutil.rmtree(tmpdir)
else:
sigmf_fileobj.seek(0) # ensure next open can read this as a tar

self.path = sigmf_archive.name

def _check_input(self):
self._ensure_name_has_correct_extension()
self._ensure_data_file_set()
self._validate_sigmffile_metadata()

def _ensure_name_has_correct_extension(self):
name = self.name
if name is None:
self._ensure_path_has_correct_extension()
for sigmffile in self.sigmffiles:
self._ensure_sigmffile_name_set(sigmffile)
self._ensure_data_file_set(sigmffile)
self._validate_sigmffile_metadata(sigmffile)

def _ensure_path_has_correct_extension(self):
path = self.path
if path is None:
return

has_extension = "." in name
has_correct_extension = name.endswith(SIGMF_ARCHIVE_EXT)
has_extension = "." in path
has_correct_extension = path.endswith(SIGMF_ARCHIVE_EXT)
if has_extension and not has_correct_extension:
apparent_ext = os.path.splitext(name)[-1]
apparent_ext = os.path.splitext(path)[-1]
err = "extension {} != {}".format(apparent_ext, SIGMF_ARCHIVE_EXT)
raise SigMFFileError(err)

self.name = name if has_correct_extension else name + SIGMF_ARCHIVE_EXT
self.path = path if has_correct_extension else path + SIGMF_ARCHIVE_EXT

def _ensure_data_file_set(self):
if not self.sigmffile.data_file:
@staticmethod
def _ensure_sigmffile_name_set(sigmffile):
if not sigmffile.name:
err = "the `name` attribute must be set to pass to `SigMFArchive`"
raise SigMFFileError(err)

@staticmethod
def _ensure_data_file_set(sigmffile):
if not sigmffile.data_file:
err = "no data file - use `set_data_file`"
raise SigMFFileError(err)

def _validate_sigmffile_metadata(self):
self.sigmffile.validate()
@staticmethod
def _validate_sigmffile_metadata(sigmffile):
sigmffile.validate()

def _get_archive_name(self):
if self.fileobj and not self.name:
if self.fileobj and not self.path:
pathname = self.fileobj.name
else:
pathname = self.name
pathname = self.path

filename = os.path.split(pathname)[-1]
archive_name, archive_ext = os.path.splitext(filename)
Expand All @@ -135,7 +160,7 @@ def _get_output_fileobj(self):
if self.fileobj:
err = "fileobj {!r} is not byte-writable".format(self.fileobj)
else:
err = "can't open {!r} for writing".format(self.name)
err = "can't open {!r} for writing".format(self.path)

raise SigMFFileError(err)

Expand All @@ -146,6 +171,6 @@ def _get_open_fileobj(self):
fileobj = self.fileobj
fileobj.write(bytes()) # force exception if not byte-writable
else:
fileobj = open(self.name, "wb")
fileobj = open(self.path, "wb")

return fileobj
112 changes: 64 additions & 48 deletions sigmf/archivereader.py
Expand Up @@ -28,65 +28,81 @@ class SigMFArchiveReader():
"""
def __init__(self, name=None, skip_checksum=False, map_readonly=True, archive_buffer=None):
self.name = name
if self.name is not None:
if not name.endswith(SIGMF_ARCHIVE_EXT):
err = "archive extension != {}".format(SIGMF_ARCHIVE_EXT)
raise SigMFFileError(err)
tar_obj = None
try:
if self.name is not None:
if not name.endswith(SIGMF_ARCHIVE_EXT):
err = "archive extension != {}".format(SIGMF_ARCHIVE_EXT)
raise SigMFFileError(err)

tar_obj = tarfile.open(self.name)
tar_obj = tarfile.open(self.name)

elif archive_buffer is not None:
tar_obj = tarfile.open(fileobj=archive_buffer, mode='r:')
elif archive_buffer is not None:
tar_obj = tarfile.open(fileobj=archive_buffer, mode='r:')

else:
raise ValueError('In sigmf.archivereader.__init__(), either `name` or `archive_buffer` must be not None')

json_contents = None
data_offset_size = None

for memb in tar_obj.getmembers():
if memb.isdir(): # memb.type == tarfile.DIRTYPE:
# the directory structure will be reflected in the member name
continue

elif memb.isfile(): # memb.type == tarfile.REGTYPE:
if memb.name.endswith(SIGMF_METADATA_EXT):
json_contents = memb.name
if data_offset_size is None:
# consider a warnings.warn() here; the datafile should be earlier in the
# archive than the metadata, so that updating it (like, adding an annotation)
# is fast.
pass
with tar_obj.extractfile(memb) as memb_fid:
json_contents = memb_fid.read()

elif memb.name.endswith(SIGMF_DATASET_EXT):
data_offset_size = memb.offset_data, memb.size

else:
print('A regular file', memb.name, 'was found but ignored in the archive')
else:
print('A member of type', memb.type, 'and name', memb.name, 'was found but not handled, just FYI.')

if data_offset_size is None:
raise SigMFFileError('No .sigmf-data file found in archive!')
raise ValueError('In sigmf.archivereader.__init__(), either `name` or `archive_buffer` must be not None')

json_contents = None
data_offset_size = None
sigmffile_name = None
self.sigmffiles = []
data_found = False

for memb in tar_obj.getmembers():
if memb.isdir(): # memb.type == tarfile.DIRTYPE:
# the directory structure will be reflected in the member name
continue

elif memb.isfile(): # memb.type == tarfile.REGTYPE:
if memb.name.endswith(SIGMF_METADATA_EXT):
json_contents = memb.name
if data_offset_size is None:
# consider a warnings.warn() here; the datafile should be earlier in the
# archive than the metadata, so that updating it (like, adding an annotation)
# is fast.
pass
with tar_obj.extractfile(memb) as memb_fid:
json_contents = memb_fid.read()

_, sigmffile_name = os.path.split(memb.name)
sigmffile_name, _ = os.path.splitext(sigmffile_name)


elif memb.name.endswith(SIGMF_DATASET_EXT):
data_offset_size = memb.offset_data, memb.size
data_found = True

else:
print('A regular file', memb.name, 'was found but ignored in the archive')
else:
print('A member of type', memb.type, 'and name', memb.name, 'was found but not handled, just FYI.')

self.sigmffile = SigMFFile(metadata=json_contents)
valid_md = self.sigmffile.validate()
if data_offset_size is not None and json_contents is not None:
sigmffile = SigMFFile(sigmffile_name, metadata=json_contents)
valid_md = sigmffile.validate()

self.sigmffile.set_data_file(self.name, data_buffer=archive_buffer, skip_checksum=skip_checksum, offset=data_offset_size[0],
size_bytes=data_offset_size[1], map_readonly=map_readonly)
sigmffile.set_data_file(self.name, data_buffer=archive_buffer, skip_checksum=skip_checksum, offset=data_offset_size[0],
size_bytes=data_offset_size[1], map_readonly=map_readonly)

self.ndim = self.sigmffile.ndim
self.shape = self.sigmffile.shape
self.ndim = sigmffile.ndim
self.shape = sigmffile.shape
self.sigmffiles.append(sigmffile)
data_offset_size = None
json_contents = None
sigmffile_name = None


tar_obj.close()
if not data_found:
raise SigMFFileError('No .sigmf-data file found in archive!')
finally:
if tar_obj: tar_obj.close()

def __len__(self):
return self.sigmffile.__len__()
return len(self.sigmffiles)

def __iter__(self):
return self.sigmffile.__iter__()
return self.sigmffiles.__iter__()

def __getitem__(self, sli):
return self.sigmffile.__getitem__(sli)
return self.sigmffiles.__getitem__(sli)