Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Multi recording archive #1

Draft
wants to merge 29 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
208d74b
add support for multiple recordings in archives
jhazentia Apr 27, 2023
44462f1
fix SigMFArchiveReader error
jhazentia May 1, 2023
832b731
support single or multiple sigmffiles in archive __init__()
jhazentia May 3, 2023
8d25adf
renamed archive "name" to "path", allow os.PathLike
jhazentia May 3, 2023
4f58453
Fixed bug in checking sigmffiles type
jhazentia May 3, 2023
89242c8
add test for missing name
jhazentia May 3, 2023
0c503ab
require name in SigMFFile constructor
jhazentia May 5, 2023
d234ddf
return single or list of SigMFFiles in fromarchive
jhazentia May 5, 2023
348bed8
fix some formatting, unused imports, docstrings, rename archivereader…
jhazentia May 8, 2023
b6df262
add support for collections in archives, check for path and fileobj i…
jhazentia May 11, 2023
4cfc8c2
rename collectionfile to collection
jhazentia May 12, 2023
ea4e633
make json end of file new line consistent, add support for collection…
jhazentia May 12, 2023
68c6825
add README examples for archives with multiple recordings
jhazentia May 12, 2023
454dd34
fix archive docstring, remove unneeded variables from archivereader
jhazentia May 15, 2023
af9002d
simplify SigMFCollection archive tests
jhazentia May 15, 2023
f1d108b
organize SigMFFile constructor doc string
jhazentia May 15, 2023
a631eb3
clarify different ways to do the same thing in README
jhazentia May 26, 2023
74a7b86
fix typo
jhazentia May 26, 2023
ae4c424
Merge branch 'main' of https://github.com/NTIA/sigmf-python into mult…
jhazentia May 26, 2023
93ab02b
add support for passing SigMFFile objects to SigMFCollection to impro…
jhazentia May 30, 2023
5376ece
fix SigMFCollection docstring
jhazentia Jun 1, 2023
46e7d8f
SigMFCollection set_streams() will check type for each element of met…
jhazentia Jun 1, 2023
660ba82
break up and simplify archive examples in README
jhazentia Jun 1, 2023
e2919d8
fix docstring, add ability to control pretty print JSON for archive
jhazentia Jun 1, 2023
e4e1775
update docstrings, formatting
jhazentia Jun 2, 2023
3131683
improve docstrings, remove duplicative test, add test for fromarchive…
jhazentia Jun 2, 2023
29827af
fix error message
jhazentia Jun 5, 2023
b81289b
make archives work when using folders
jhazentia Jun 6, 2023
15ca451
folders in archives are no longer created by default to maintain cons…
jhazentia Jun 8, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
98 changes: 98 additions & 0 deletions README.md
Expand Up @@ -180,6 +180,104 @@ ci16_sigmffile = collection.get_SigMFFile(stream_name='example_ci16')
cf32_sigmffile = collection.get_SigMFFile(stream_name='example_cf32')
```

### Create and Read SigMF Archives with Multiple Recordings

```python
import numpy as np
from sigmf.archivereader import SigMFArchiveReader

from sigmf.sigmffile import (SigMFFile,
SigMFArchive,
SigMFCollection,
fromarchive,
fromfile)


# create data file
random_data1 = np.random.rand(128)
data1_path = "recording1.sigmf-data"
random_data1.tofile(data1_path)

# create metadata
sigmf_file_1 = SigMFFile(name='recording1')
sigmf_file_1.set_global_field("core:datatype", "rf32_le")
sigmf_file_1.add_annotation(start_index=0, length=len(random_data1))
sigmf_file_1.add_capture(start_index=0)
sigmf_file_1.set_data_file(data1_path)

# create archive using SigMFArchive
archive1 = SigMFArchive(sigmffiles=sigmf_file_1,
path="single_recording_archive1.sigmf")

# create archive using SigMFFile archive()
archive1_path = sigmf_file_1.archive(file_path="single_recording_archive2.sigmf")

# create archive using tofile
sigmf_file_1.tofile(file_path="single_recording_archive3.sigmf",
toarchive=True)
jhazentia marked this conversation as resolved.
Show resolved Hide resolved

# multiple recordings
random_data2 = np.random.rand(128)
data2_path = "recording2.sigmf-data"
random_data2.tofile(data2_path)

# create metadata
sigmf_file_2 = SigMFFile(name='recording2')
sigmf_file_2.set_global_field("core:datatype", "rf32_le")
sigmf_file_2.add_annotation(start_index=0, length=len(random_data2))
sigmf_file_2.add_capture(start_index=0)
sigmf_file_2.set_data_file(data2_path)

# create archive using SigMFArchive
sigmffiles = [sigmf_file_1, sigmf_file_2]
archive2 = SigMFArchive(sigmffiles=sigmffiles,
path="multi_recording_archive1.sigmf")

# create archive with collection
sigmf_file_1.tofile("recording1.sigmf-meta")
sigmf_file_2.tofile("recording2.sigmf-meta")
metafiles = ["recording1.sigmf-meta", "recording2.sigmf-meta"]
collection = SigMFCollection(metafiles=metafiles)

# create archive using SigMFArchive
archive3 = SigMFArchive(sigmffiles=sigmffiles,
collection=collection,
path="multi_recording_archive2.sigmf")

# create archive using collection archive
archive3_path = collection.archive(file_path="multi_recording_archive3.sigmf")

# create archive using collection tofile
collection.tofile(file_path="multi_recording_archive4.sigmf", toarchive=True)

# read multirecording archives using archive reader
reader = SigMFArchiveReader("multi_recording_archive1.sigmf")
print(len(reader)) # equal to 2 for 2 sigmffiles

# read multirecording archives using fromarchive
sigmffiles = fromarchive("multi_recording_archive1.sigmf")
print(len(sigmffiles)) # equal to 2 for 2 sigmffiles

# read multirecording archives using fromfile
sigmffiles = fromfile("multi_recording_archive1.sigmf")
print(len(sigmffiles)) # equal to 2 for 2 sigmffiles

# read multirecording archives using archive reader with collection
reader = SigMFArchiveReader("multi_recording_archive2.sigmf")
print(len(reader)) # equal to 2 for 2 sigmffiles
print(reader.collection)

# read multirecording archives using fromarchive with collection
sigmffiles, collection = fromarchive("multi_recording_archive2.sigmf")
print(len(sigmffiles)) # equal to 2 for 2 sigmffiles
print(collection)

# read multirecording archives using fromfile with collection
sigmffiles, collection = fromfile("multi_recording_archive2.sigmf")
print(len(sigmffiles)) # equal to 2 for 2 sigmffiles
print(collection)
```

### Load a SigMF Archive and slice its data without untaring it

Since an *archive* is merely a tarball (uncompressed), and since there any many
Expand Down
47 changes: 45 additions & 2 deletions sigmf/archive.py
Expand Up @@ -16,7 +16,7 @@
import sigmf


from .error import SigMFFileError
from .error import SigMFFileError, SigMFValidationError


SIGMF_ARCHIVE_EXT = ".sigmf"
Expand Down Expand Up @@ -53,9 +53,14 @@ class SigMFArchive():
def __init__(self,
sigmffiles: Union["sigmf.sigmffile.SigMFFile",
Iterable["sigmf.sigmffile.SigMFFile"]],
collection: "sigmf.sigmffile.SigMFCollection" = None,
path: Union[str, os.PathLike] = None,
fileobj: BinaryIO = None):

if (not path) and (not fileobj):
raise SigMFFileError("'path' or 'fileobj' required for creating "
"SigMF archive!")

if isinstance(sigmffiles, sigmf.sigmffile.SigMFFile):
self.sigmffiles = [sigmffiles]
elif (hasattr(collections, "Iterable") and
Expand All @@ -66,11 +71,16 @@ def __init__(self,
else:
raise SigMFFileError("Unknown type for sigmffiles argument!")

self.path = str(path)
if path:
self.path = str(path)
else:
self.path = None
self.fileobj = fileobj
self.collection = collection

self._check_input()

archive_name = self._get_archive_name()
mode = "a" if fileobj is not None else "w"

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Eventual PR should call out this change in behavior and note that it could be changed to preserver the original behavior of writing over the archive.

sigmf_fileobj = self._get_output_fileobj()
try:
Expand All @@ -90,6 +100,15 @@ def chmod(tarinfo):
tarinfo.mode = 0o644 # -wr-r--r--
return tarinfo

if collection:
with tempfile.NamedTemporaryFile(mode="w") as tmpfile:
collection.dump(tmpfile, pretty=True)
tmpfile.flush()
collection_filename = archive_name + SIGMF_COLLECTION_EXT
sigmf_archive.add(tmpfile.name,
arcname=collection_filename,
filter=chmod)

for sigmffile in self.sigmffiles:
with tempfile.TemporaryDirectory() as tmpdir:
sigmf_md_filename = sigmffile.name + SIGMF_METADATA_EXT
Expand Down Expand Up @@ -117,6 +136,9 @@ def _check_input(self):
self._ensure_sigmffile_name_set(sigmffile)
self._ensure_data_file_set(sigmffile)
self._validate_sigmffile_metadata(sigmffile)
if self.collection:
self._validate_sigmffile_collection(self.collection,
self.sigmffiles)

def _ensure_path_has_correct_extension(self):
path = self.path
Expand Down Expand Up @@ -148,6 +170,27 @@ def _ensure_data_file_set(sigmffile):
def _validate_sigmffile_metadata(sigmffile):
sigmffile.validate()

@staticmethod
def _validate_sigmffile_collection(collectionfile, sigmffiles):
if len(collectionfile) != len(sigmffiles):
raise SigMFValidationError("Mismatched number of recordings "
"between sigmffiles and collection "
"file!")
streams_key = collectionfile.STREAMS_KEY
streams = collectionfile.get_collection_field(streams_key)
sigmf_meta_hashes = [s["hash"] for s in streams]
if not streams:
raise SigMFValidationError("No recordings in collection file!")
for sigmffile in sigmffiles:
with tempfile.NamedTemporaryFile(mode="w") as tmpfile:
sigmffile.dump(tmpfile, pretty=True)
tmpfile.flush()
meta_path = tmpfile.name
sigmf_meta_hash = sigmf.sigmf_hash.calculate_sha512(meta_path)
if sigmf_meta_hash not in sigmf_meta_hashes:
raise SigMFValidationError("SigMFFile given that "
"is not in collection file!")

def _get_archive_name(self):
if self.fileobj and not self.path:
pathname = self.fileobj.name
Expand Down
23 changes: 19 additions & 4 deletions sigmf/archivereader.py
Expand Up @@ -9,8 +9,11 @@
import os
import tarfile

from .sigmffile import SigMFFile
from .archive import SIGMF_DATASET_EXT, SIGMF_METADATA_EXT, SIGMF_ARCHIVE_EXT
from .sigmffile import SigMFCollection, SigMFFile
from .archive import (SIGMF_COLLECTION_EXT,
SIGMF_DATASET_EXT,
SIGMF_METADATA_EXT,
SIGMF_ARCHIVE_EXT)
from .error import SigMFFileError


Expand Down Expand Up @@ -45,6 +48,7 @@ def __init__(self, path=None, skip_checksum=False, map_readonly=True, archive_bu
sigmffile_name = None
self.sigmffiles = []
data_found = False
collection_metadata = {}

for memb in tar_obj.getmembers():
if memb.isdir(): # memb.type == tarfile.DIRTYPE:
Expand All @@ -64,11 +68,12 @@ def __init__(self, path=None, skip_checksum=False, map_readonly=True, archive_bu

_, sigmffile_name = os.path.split(memb.name)
sigmffile_name, _ = os.path.splitext(sigmffile_name)

elif memb.name.endswith(SIGMF_DATASET_EXT):
data_offset_size = memb.offset_data, memb.size
data_found = True

elif memb.name.endswith(SIGMF_COLLECTION_EXT):
with tar_obj.extractfile(memb) as collection_f:
collection_metadata = collection_f.read()
else:
print('A regular file', memb.name, 'was found but ignored in the archive')
else:
Expand All @@ -92,6 +97,16 @@ def __init__(self, path=None, skip_checksum=False, map_readonly=True, archive_bu
data_offset_size = None
json_contents = None
sigmffile_name = None
if collection_metadata:
# Currently the SigMFCollection class does not support getting
# SigMFFiles (SigMFCollection.get_SigMFFile()) when created
# here in SigMFArchiveReader. This is because the SigMF
# metadata files are not extracted from the tarfile to the
# file system.
self.collection = SigMFCollection(metadata=collection_metadata,
skip_checksums=True)
else:
self.collection = None

if not data_found:
raise SigMFFileError('No .sigmf-data file found in archive!')
Expand Down
66 changes: 53 additions & 13 deletions sigmf/sigmffile.py
Expand Up @@ -78,6 +78,7 @@ def dump(self, filep, pretty=True):
indent=4 if pretty else None,
separators=(',', ': ') if pretty else None,
)
filep.write("\n")

def dumps(self, pretty=True):
'''
Expand Down Expand Up @@ -549,7 +550,7 @@ def archive(self, file_path=None, fileobj=None):
if file_path is None:
file_path = self.name

archive = SigMFArchive(self, file_path, fileobj)
archive = SigMFArchive(self, path=file_path, fileobj=fileobj)
return archive.path

def tofile(self, file_path, pretty=True, toarchive=False, skip_validate=False):
Expand All @@ -574,7 +575,6 @@ def tofile(self, file_path, pretty=True, toarchive=False, skip_validate=False):
else:
with open(fns['meta_fn'], 'w') as fp:
self.dump(fp, pretty=pretty)
fp.write('\n') # text files should end in carriage return

def read_samples_in_capture(self, index=0, autoscale=True):
'''
Expand Down Expand Up @@ -634,7 +634,10 @@ def read_samples(self, start_index=0, count=-1, autoscale=True, raw_components=F

if not self._is_conforming_dataset():
warnings.warn(f'Recording dataset appears non-compliant, resulting data may be erroneous')
return self._read_datafile(first_byte, count * self.get_num_channels(), autoscale, False)
return self._read_datafile(first_byte,
count * self.get_num_channels(),
autoscale,
raw_components)

def _read_datafile(self, first_byte, nitems, autoscale, raw_components):
'''
Expand Down Expand Up @@ -709,8 +712,10 @@ def __init__(self, metafiles=None, metadata=None, skip_checksums=False):
self._metadata = {self.COLLECTION_KEY:{}}
self._metadata[self.COLLECTION_KEY][self.VERSION_KEY] = __version__
self._metadata[self.COLLECTION_KEY][self.STREAMS_KEY] = []
else:
elif isinstance(metadata, dict):
self._metadata = metadata
else:
self._metadata = json.loads(metadata)

if metafiles is None:
self.metafiles = []
Expand All @@ -726,6 +731,15 @@ def __len__(self):
'''
return len(self.get_stream_names())

def __eq__(self, other):
"""Define equality between two `SigMFCollections's by comparing
metadata.
"""
if isinstance(other, SigMFCollection):
return self._metadata == other._metadata

return False

def verify_stream_hashes(self):
'''
compares the stream hashes in the collection metadata to the metadata files
Expand Down Expand Up @@ -789,21 +803,41 @@ def get_collection_field(self, key, default=None):
"""
return self._metadata[self.COLLECTION_KEY].get(key, default)

def tofile(self, file_path, pretty=True):
def archive(self, file_path=None, fileobj=None):
"""Dump contents to SigMF archive format.

`file_path` is passed to SigMFArchive `path` and `fileobj` is passed to
SigMFArchive `fileobj`.

"""

sigmffiles = []
for name in self.get_stream_names():
sigmffile = self.get_SigMFFile(name)
sigmffiles.append(sigmffile)
archive = SigMFArchive(sigmffiles, self, file_path, fileobj)
return archive.path

def tofile(self, file_path, pretty=True, toarchive=False):
'''
Write metadata file
Write metadata file or create archive.

Parameters
----------
file_path : string
Location to save.
pretty : bool, default True
When True will write more human-readable output, otherwise will be flat JSON.
toarchive : bool, default False
If True, create an archive from the collection file and recordings
instead of creating collection metadata file.
'''
fns = get_sigmf_filenames(file_path)
with open(fns['collection_fn'], 'w') as fp:
self.dump(fp, pretty=pretty)
fp.write('\n') # text files should end in carriage return
if toarchive:
self.archive(fns['archive_fn'])
else:
with open(fns['collection_fn'], 'w') as fp:
self.dump(fp, pretty=pretty)

def get_SigMFFile(self, stream_name=None, stream_index=None):
'''
Expand Down Expand Up @@ -923,17 +957,23 @@ def get_dataset_filename_from_metadata(meta_fn, metadata=None):


def fromarchive(archive_path, dir=None):
jhazentia marked this conversation as resolved.
Show resolved Hide resolved
"""Extract an archive and return containing SigMFFiles.
"""Extract an archive and return containing SigMFFiles and SigMFCollection.

The `dir` parameter is no longer used as this function has been changed to
access SigMF archives without extracting them.
"""
from .archivereader import SigMFArchiveReader
sigmffiles = SigMFArchiveReader(archive_path).sigmffiles
reader = SigMFArchiveReader(archive_path)
sigmffiles = reader.sigmffiles
sigmffile_ret = None
if len(sigmffiles) == 1:
return sigmffiles[0]
sigmffile_ret = sigmffiles[0]
else:
sigmffile_ret = sigmffiles
if reader.collection:
return sigmffile_ret, reader.collection
else:
return sigmffiles
return sigmffile_ret


def fromfile(filename, skip_checksum=False):
Expand Down