Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Multi recording archive #1

Draft
wants to merge 29 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
208d74b
add support for multiple recordings in archives
jhazentia Apr 27, 2023
44462f1
fix SigMFArchiveReader error
jhazentia May 1, 2023
832b731
support single or multiple sigmffiles in archive __init__()
jhazentia May 3, 2023
8d25adf
renamed archive "name" to "path", allow os.PathLike
jhazentia May 3, 2023
4f58453
Fixed bug in checking sigmffiles type
jhazentia May 3, 2023
89242c8
add test for missing name
jhazentia May 3, 2023
0c503ab
require name in SigMFFile constructor
jhazentia May 5, 2023
d234ddf
return single or list of SigMFFiles in fromarchive
jhazentia May 5, 2023
348bed8
fix some formatting, unused imports, docstrings, rename archivereader…
jhazentia May 8, 2023
b6df262
add support for collections in archives, check for path and fileobj i…
jhazentia May 11, 2023
4cfc8c2
rename collectionfile to collection
jhazentia May 12, 2023
ea4e633
make json end of file new line consistent, add support for collection…
jhazentia May 12, 2023
68c6825
add README examples for archives with multiple recordings
jhazentia May 12, 2023
454dd34
fix archive docstring, remove unneeded variables from archivereader
jhazentia May 15, 2023
af9002d
simplify SigMFCollection archive tests
jhazentia May 15, 2023
f1d108b
organize SigMFFile constructor doc string
jhazentia May 15, 2023
a631eb3
clarify different ways to do the same thing in README
jhazentia May 26, 2023
74a7b86
fix typo
jhazentia May 26, 2023
ae4c424
Merge branch 'main' of https://github.com/NTIA/sigmf-python into mult…
jhazentia May 26, 2023
93ab02b
add support for passing SigMFFile objects to SigMFCollection to impro…
jhazentia May 30, 2023
5376ece
fix SigMFCollection docstring
jhazentia Jun 1, 2023
46e7d8f
SigMFCollection set_streams() will check type for each element of met…
jhazentia Jun 1, 2023
660ba82
break up and simplify archive examples in README
jhazentia Jun 1, 2023
e2919d8
fix docstring, add ability to control pretty print JSON for archive
jhazentia Jun 1, 2023
e4e1775
update docstrings, formatting
jhazentia Jun 2, 2023
3131683
improve docstrings, remove duplicative test, add test for fromarchive…
jhazentia Jun 2, 2023
29827af
fix error message
jhazentia Jun 5, 2023
b81289b
make archives work when using folders
jhazentia Jun 6, 2023
15ca451
folders in archives are no longer created by default to maintain cons…
jhazentia Jun 8, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Expand Up @@ -276,6 +276,7 @@ print(len(sigmffiles)) # equal to 2 for 2 sigmffiles
reader = SigMFArchiveReader("multi_recording_archive2.sigmf")
print(len(reader)) # equal to 2 for 2 sigmffiles
print(reader.collection)
print(len(reader.collection.sigmffiles)) # get SigMFFiles from collection

# read multirecording archives using fromarchive with collection
sigmffiles, collection = fromarchive("multi_recording_archive2.sigmf")
Expand Down
9 changes: 2 additions & 7 deletions sigmf/archivereader.py
Expand Up @@ -96,13 +96,8 @@ def __init__(self, path=None, skip_checksum=False, map_readonly=True, archive_bu
json_contents = None
sigmffile_name = None
if collection_metadata:
# Currently the SigMFCollection class does not support getting
# SigMFFiles (SigMFCollection.get_SigMFFile()) when created
# here in SigMFArchiveReader. This is because the SigMF
# metadata files are not extracted from the tarfile to the
# file system.
self.collection = SigMFCollection(metadata=collection_metadata,
skip_checksums=True)
self.collection = SigMFCollection(metafiles=self.sigmffiles,
metadata=collection_metadata)
else:
self.collection = None

Expand Down
78 changes: 57 additions & 21 deletions sigmf/sigmffile.py
Expand Up @@ -8,6 +8,7 @@

from collections import OrderedDict
import codecs
from io import BytesIO
import json
import tarfile
import tempfile
Expand Down Expand Up @@ -98,7 +99,7 @@ def dumps(self, pretty=True):
self.ordered_metadata(),
indent=4 if pretty else None,
separators=(',', ': ') if pretty else None,
)
) + "\n"

class SigMFFile(SigMFMetafile):
START_INDEX_KEY = "core:sample_start"
Expand Down Expand Up @@ -697,9 +698,10 @@ def __init__(self, metafiles=None, metadata=None, skip_checksums=False):

Parameters:

metafiles -- A list of SigMF metadata filenames objects comprising the Collection,
there must be at least one file. If the files do not exist, this will
raise a SigMFFileError.
metafiles -- A list of SigMF metadata filenames objects or SigMFFile
jhazentia marked this conversation as resolved.
Show resolved Hide resolved
objects comprising the Collection, there must be at least
one file. If the files do not exist, this will raise a
SigMFFileError.

metadata -- collection metadata to use, if not provided this will populate a
minimal set of default metadata. The core:streams field will be
Expand Down Expand Up @@ -751,23 +753,58 @@ def verify_stream_hashes(self):
if path.isfile(metafile_name):
new_hash = sigmf_hash.calculate_sha512(filename=metafile_name)
if old_hash != new_hash:
raise SigMFFileError(f'Calculated file hash for {metafile_name} does not match collection metadata.')
raise SigMFFileError('Calculated file hash for metadata '
f'file {metafile_name} does not '
'match collection metadata.')
sigmffile = [x for x in self.sigmffiles
if x.name == stream.get('name')][0]
jhazentia marked this conversation as resolved.
Show resolved Hide resolved
sigmffile_meta = sigmffile.dumps()
sigmffile_bytes = sigmffile_meta.encode('utf-8')
size_of_meta = len(sigmffile_bytes)
sigmffile_hash = sigmf_hash.calculate_sha512(
fileobj=BytesIO(sigmffile_bytes),
offset_and_size=(0, size_of_meta)
)
if old_hash != sigmffile_hash:
raise SigMFFileError('Calculated file hash for SigMFFile '
f'{sigmffile.name} does not match '
'collection metadata.')

def set_streams(self, metafiles):
'''
configures the collection `core:streams` field from the specified list of metafiles
'''
self.metafiles = metafiles
streams = []
for metafile in self.metafiles:
if metafile.endswith('.sigmf-meta') and path.isfile(metafile):
stream = {
"name": get_sigmf_filenames(metafile)['base_fn'],
"hash": sigmf_hash.calculate_sha512(filename=metafile)
}
streams.append(stream)
else:
raise SigMFFileError(f'Specifed stream file {metafile} is not a valid SigMF Metadata file')
sigmffile_names = []
self.sigmffiles = []
if isinstance(metafiles[0], SigMFFile):
jhazentia marked this conversation as resolved.
Show resolved Hide resolved
for sigmffile in metafiles:
sigmffile_names.append(sigmffile.name + SIGMF_METADATA_EXT)
sigmffile_meta = sigmffile.dumps()
sigmffile_bytes = sigmffile_meta.encode('utf-8')
size_of_meta = len(sigmffile_bytes)
streams.append({
"name": sigmffile.name,
"hash": sigmf_hash.calculate_sha512(
fileobj=BytesIO(sigmffile_bytes),
offset_and_size=(0, size_of_meta))
})
self.sigmffiles.append(sigmffile)
self.metafiles = sigmffile_names
else:
self.metafiles = metafiles
for metafile in self.metafiles:
if (metafile.endswith(SIGMF_METADATA_EXT) and
path.isfile(metafile)):
stream = {
"name": get_sigmf_filenames(metafile)['base_fn'],
"hash": sigmf_hash.calculate_sha512(filename=metafile)
}
streams.append(stream)
else:
raise SigMFFileError(f'Specifed stream file {metafile} is'
' not a valid SigMF Metadata file')
self.sigmffiles.append(fromfile(metafile, skip_checksum=self.skip_checksums))
self.set_collection_field(self.STREAMS_KEY, streams)

def get_stream_names(self):
Expand Down Expand Up @@ -843,15 +880,14 @@ def get_SigMFFile(self, stream_name=None, stream_index=None):
'''
Returns the SigMFFile instance of the specified stream if it exists
'''
metafile = None
sigmffile = None
if stream_name is not None:
if stream_name in self.get_stream_names():
metafile = stream_name + '.sigmf_meta'
sigmffile = [x for x in self.sigmffiles
if x.name == stream_name][0]
if stream_index is not None and stream_index < self.__len__():
metafile = self.get_stream_names()[stream_index] + '.sigmf_meta'
sigmffile = self.sigmffiles[stream_index]
return sigmffile

if metafile is not None:
return fromfile(metafile, skip_checksum=self.skip_checksums)

def dtype_info(datatype):
"""
Expand Down
25 changes: 17 additions & 8 deletions tests/test_archivereader.py
Expand Up @@ -73,39 +73,48 @@ def test_extract_multi_recording(test_sigmffile, test_alternate_sigmffile):


def test_extract_single_recording_with_collection(test_sigmffile):
with tempfile.TemporaryDirectory() as tmpdir:
meta_filepath = os.path.join(tmpdir,
test_sigmffile.name + SIGMF_METADATA_EXT)
try:
meta_filepath = test_sigmffile.name + SIGMF_METADATA_EXT
with open(meta_filepath, "w") as meta_fd:
test_sigmffile.dump(meta_fd)
collection = SigMFCollection(metafiles=[meta_filepath])
archive_path = os.path.join(tmpdir, "test_archive.sigmf")
archive_path = "test_archive.sigmf"
arch = SigMFArchive(test_sigmffile, collection, path=archive_path)
reader = SigMFArchiveReader(arch.path)
assert len(reader) == 1
actual_sigmffile = reader[0]
assert test_sigmffile == actual_sigmffile
assert collection == reader.collection
finally:
if os.path.exists(meta_filepath):
os.remove(meta_filepath)
if os.path.exists(archive_path):
os.remove(archive_path)


def test_extract_multi_recording_with_collection(test_sigmffile,
test_alternate_sigmffile):
with tempfile.TemporaryDirectory() as tmpdir:
try:
meta1_filepath = test_sigmffile.name + SIGMF_METADATA_EXT
meta1_filepath = os.path.join(tmpdir, meta1_filepath)
with open(meta1_filepath, "w") as meta_fd:
test_sigmffile.dump(meta_fd)
meta2_filepath = test_alternate_sigmffile.name + SIGMF_METADATA_EXT
meta2_filepath = os.path.join(tmpdir, meta2_filepath)
with open(meta2_filepath, "w") as meta_fd:
test_alternate_sigmffile.dump(meta_fd)
collection = SigMFCollection(metafiles=[meta1_filepath,
meta2_filepath])
archive_path = os.path.join(tmpdir, "test_archive.sigmf")
archive_path = "test_archive.sigmf"
input_sigmffiles = [test_sigmffile, test_alternate_sigmffile]
arch = SigMFArchive(input_sigmffiles, collection, path=archive_path)
reader = SigMFArchiveReader(arch.path)
assert len(reader) == 2 # number of SigMFFiles
for actual_sigmffile in reader:
assert actual_sigmffile in input_sigmffiles
assert collection == reader.collection
finally:
if os.path.exists(meta1_filepath):
os.remove(meta1_filepath)
if os.path.exists(meta2_filepath):
os.remove(meta2_filepath)
if os.path.exists(archive_path):
os.remove(archive_path)
49 changes: 48 additions & 1 deletion tests/test_sigmffile.py
Expand Up @@ -27,7 +27,7 @@

from sigmf import sigmffile, utils
from sigmf.archivereader import SigMFArchiveReader
from sigmf.sigmffile import SigMFFile, fromarchive
from sigmf.sigmffile import SigMFCollection, SigMFFile, fromarchive
from sigmf.archive import SIGMF_DATASET_EXT, SIGMF_METADATA_EXT, SigMFArchive

from .testdata import *
Expand Down Expand Up @@ -334,6 +334,8 @@ def test_archive_collection(test_sigmffile,
for input_sigmf_file in input_sigmf_files:
assert input_sigmf_file in archive_reader.sigmffiles
assert test_collection == archive_reader.collection
for input_sigmf_file in input_sigmf_files:
assert input_sigmf_file in test_collection.sigmffiles
finally:
for sigmf_meta_file in sigmf_meta_files:
if os.path.exists(sigmf_meta_file):
Expand All @@ -342,3 +344,48 @@ def test_archive_collection(test_sigmffile,
filename = sigmf_file.name + SIGMF_DATASET_EXT
if os.path.exists(filename):
os.remove(filename)


def test_create_collection_with_sigmffiles(test_sigmffile,
test_alternate_sigmffile,
test_alternate_sigmffile_2):
input_sigmf_files = [test_sigmffile,
test_alternate_sigmffile,
test_alternate_sigmffile_2]
collection = SigMFCollection(metafiles=input_sigmf_files)
output_stream_names = collection.get_stream_names()
output_sigmf_files_by_name = []
for stream_name in output_stream_names:
output_sigmf_file = collection.get_SigMFFile(stream_name=stream_name)
output_sigmf_files_by_name.append(output_sigmf_file)
output_sigmf_files_by_index = []
for i in range(len(collection)):
output_sigmf_file = collection.get_SigMFFile(stream_index=i)
output_sigmf_files_by_index.append(output_sigmf_file)
for input_sigmf in input_sigmf_files:
assert input_sigmf.name in output_stream_names
assert input_sigmf in output_sigmf_files_by_name
assert input_sigmf in output_sigmf_files_by_index


def test_collection_set_sigmffiles(test_sigmffile,
test_alternate_sigmffile,
test_alternate_sigmffile_2):
input_sigmf_files = [test_sigmffile,
test_alternate_sigmffile,
test_alternate_sigmffile_2]
collection = SigMFCollection(metafiles=[test_sigmffile])
collection.set_streams(input_sigmf_files)
output_stream_names = collection.get_stream_names()
output_sigmf_files_by_name = []
for stream_name in output_stream_names:
output_sigmf_file = collection.get_SigMFFile(stream_name=stream_name)
output_sigmf_files_by_name.append(output_sigmf_file)
output_sigmf_files_by_index = []
for i in range(len(collection)):
output_sigmf_file = collection.get_SigMFFile(stream_index=i)
output_sigmf_files_by_index.append(output_sigmf_file)
for input_sigmf in input_sigmf_files:
assert input_sigmf.name in output_stream_names
assert input_sigmf in output_sigmf_files_by_name
assert input_sigmf in output_sigmf_files_by_index