Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Multi recording archive #1

Draft
wants to merge 29 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
208d74b
add support for multiple recordings in archives
jhazentia Apr 27, 2023
44462f1
fix SigMFArchiveReader error
jhazentia May 1, 2023
832b731
support single or multiple sigmffiles in archive __init__()
jhazentia May 3, 2023
8d25adf
renamed archive "name" to "path", allow os.PathLike
jhazentia May 3, 2023
4f58453
Fixed bug in checking sigmffiles type
jhazentia May 3, 2023
89242c8
add test for missing name
jhazentia May 3, 2023
0c503ab
require name in SigMFFile constructor
jhazentia May 5, 2023
d234ddf
return single or list of SigMFFiles in fromarchive
jhazentia May 5, 2023
348bed8
fix some formatting, unused imports, docstrings, rename archivereader…
jhazentia May 8, 2023
b6df262
add support for collections in archives, check for path and fileobj i…
jhazentia May 11, 2023
4cfc8c2
rename collectionfile to collection
jhazentia May 12, 2023
ea4e633
make json end of file new line consistent, add support for collection…
jhazentia May 12, 2023
68c6825
add README examples for archives with multiple recordings
jhazentia May 12, 2023
454dd34
fix archive docstring, remove unneeded variables from archivereader
jhazentia May 15, 2023
af9002d
simplify SigMFCollection archive tests
jhazentia May 15, 2023
f1d108b
organize SigMFFile constructor doc string
jhazentia May 15, 2023
a631eb3
clarify different ways to do the same thing in README
jhazentia May 26, 2023
74a7b86
fix typo
jhazentia May 26, 2023
ae4c424
Merge branch 'main' of https://github.com/NTIA/sigmf-python into mult…
jhazentia May 26, 2023
93ab02b
add support for passing SigMFFile objects to SigMFCollection to impro…
jhazentia May 30, 2023
5376ece
fix SigMFCollection docstring
jhazentia Jun 1, 2023
46e7d8f
SigMFCollection set_streams() will check type for each element of met…
jhazentia Jun 1, 2023
660ba82
break up and simplify archive examples in README
jhazentia Jun 1, 2023
e2919d8
fix docstring, add ability to control pretty print JSON for archive
jhazentia Jun 1, 2023
e4e1775
update docstrings, formatting
jhazentia Jun 2, 2023
3131683
improve docstrings, remove duplicative test, add test for fromarchive…
jhazentia Jun 2, 2023
29827af
fix error message
jhazentia Jun 5, 2023
b81289b
make archives work when using folders
jhazentia Jun 6, 2023
15ca451
folders in archives are no longer created by default to maintain cons…
jhazentia Jun 8, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
109 changes: 109 additions & 0 deletions README.md
Expand Up @@ -180,6 +180,115 @@ ci16_sigmffile = collection.get_SigMFFile(stream_name='example_ci16')
cf32_sigmffile = collection.get_SigMFFile(stream_name='example_cf32')
```

### Create and Read SigMF Archives with Multiple Recordings

The below example shows different ways to create and read an archive. The
`SigMFArchive` class, the `SigMFFile.archive()` method, and the
`SigMFFile.tofile()` method can all be used to create an archive. Archives
with collections can be created using `SigMFArchive` class,
`SigMFCollection.archive()` method, and the `SigMFCollection.tofile()` method.

There are also different ways to read an archive using `SigMFArchiveReader`
class, the `sigmffile.fromarchive()` method, and the `sigmffile.fromfile()`
method.

```python
import numpy as np
from sigmf.archivereader import SigMFArchiveReader

from sigmf.sigmffile import (SigMFFile,
SigMFArchive,
SigMFCollection,
fromarchive,
fromfile)


# create data file
random_data1 = np.random.rand(128)
data1_path = "recording1.sigmf-data"
random_data1.tofile(data1_path)

# create metadata
sigmf_file_1 = SigMFFile(name='recording1')
sigmf_file_1.set_global_field("core:datatype", "rf32_le")
sigmf_file_1.add_annotation(start_index=0, length=len(random_data1))
sigmf_file_1.add_capture(start_index=0)
sigmf_file_1.set_data_file(data1_path)

# create archive using SigMFArchive
archive1 = SigMFArchive(sigmffiles=sigmf_file_1,
path="single_recording_archive1.sigmf")

# create archive using SigMFFile archive()
archive1_path = sigmf_file_1.archive(file_path="single_recording_archive2.sigmf")

# create archive using tofile
sigmf_file_1.tofile(file_path="single_recording_archive3.sigmf",
toarchive=True)
jhazentia marked this conversation as resolved.
Show resolved Hide resolved

# multiple recordings
random_data2 = np.random.rand(128)
data2_path = "recording2.sigmf-data"
random_data2.tofile(data2_path)

# create metadata
sigmf_file_2 = SigMFFile(name='recording2')
sigmf_file_2.set_global_field("core:datatype", "rf32_le")
sigmf_file_2.add_annotation(start_index=0, length=len(random_data2))
sigmf_file_2.add_capture(start_index=0)
sigmf_file_2.set_data_file(data2_path)

# create archive using SigMFArchive
sigmffiles = [sigmf_file_1, sigmf_file_2]
archive2 = SigMFArchive(sigmffiles=sigmffiles,
path="multi_recording_archive1.sigmf")

# create archive with collection
sigmf_file_1.tofile("recording1.sigmf-meta")
sigmf_file_2.tofile("recording2.sigmf-meta")
metafiles = ["recording1.sigmf-meta", "recording2.sigmf-meta"]
collection = SigMFCollection(metafiles=metafiles)

# create archive using SigMFArchive
archive3 = SigMFArchive(sigmffiles=sigmffiles,
collection=collection,
path="multi_recording_archive2.sigmf")

# create archive using collection archive
archive3_path = collection.archive(file_path="multi_recording_archive3.sigmf")

# create archive using collection tofile
collection.tofile(file_path="multi_recording_archive4.sigmf", toarchive=True)

# read multirecording archives using archive reader
reader = SigMFArchiveReader("multi_recording_archive1.sigmf")
print(len(reader)) # equal to 2 for 2 sigmffiles

# read multirecording archives using fromarchive
sigmffiles = fromarchive("multi_recording_archive1.sigmf")
print(len(sigmffiles)) # equal to 2 for 2 sigmffiles

# read multirecording archives using fromfile
sigmffiles = fromfile("multi_recording_archive1.sigmf")
print(len(sigmffiles)) # equal to 2 for 2 sigmffiles

# read multirecording archives using archive reader with collection
reader = SigMFArchiveReader("multi_recording_archive2.sigmf")
print(len(reader)) # equal to 2 for 2 sigmffiles
print(reader.collection)
print(len(reader.collection.sigmffiles)) # get SigMFFiles from collection

# read multirecording archives using fromarchive with collection
sigmffiles, collection = fromarchive("multi_recording_archive2.sigmf")
print(len(sigmffiles)) # equal to 2 for 2 sigmffiles
print(collection)

# read multirecording archives using fromfile with collection
sigmffiles, collection = fromfile("multi_recording_archive2.sigmf")
print(len(sigmffiles)) # equal to 2 for 2 sigmffiles
print(collection)
```

### Load a SigMF Archive and slice its data without untaring it

Since an *archive* is merely a tarball (uncompressed), and since there any many
Expand Down
200 changes: 138 additions & 62 deletions sigmf/archive.py
Expand Up @@ -6,12 +6,17 @@

"""Create and extract SigMF archives."""

import collections
import os
import shutil
import tarfile
import tempfile
from typing import BinaryIO, Iterable, Union

from .error import SigMFFileError
import sigmf


from .error import SigMFFileError, SigMFValidationError


SIGMF_ARCHIVE_EXT = ".sigmf"
Expand All @@ -21,59 +26,75 @@


class SigMFArchive():
"""Archive a SigMFFile.
"""Archive one or more `SigMFFile`s. A collection file can
optionally be included.

A `.sigmf` file must include both valid metadata and data.
If `self.data_file` is not set or the requested output file
is not writable, raise `SigMFFileError`.

Parameters:

sigmffile -- A SigMFFile object with valid metadata and data_file

name -- path to archive file to create. If file exists, overwrite.
If `name` doesn't end in .sigmf, it will be appended.
For example: if `name` == "/tmp/archive1", then the
following archive will be created:
/tmp/archive1.sigmf
- archive1/
- archive1.sigmf-meta
- archive1.sigmf-data

fileobj -- If `fileobj` is specified, it is used as an alternative to
a file object opened in binary mode for `name`. It is
supposed to be at position 0. `name` is not required, but
if specified will be used to determine the directory and
file names within the archive. `fileobj` won't be closed.
For example: if `name` == "archive1" and fileobj is given,
a tar archive will be written to fileobj with the
following structure:
- archive1/
- archive1.sigmf-meta
- archive1.sigmf-data
sigmffiles -- A single SigMFFile or an iterable of SigMFFile objects with
valid metadata and data_files

collection -- An optional SigMFCollection.

path -- Path to archive file to create. If file exists, overwrite.
If `path` doesn't end in .sigmf, it will be appended. The
`self.path` instance variable will be updated upon
successful writing of the archive to point to the final
archive path.


fileobj -- If `fileobj` is specified, it is used as an alternative to
a file object opened in binary mode for `path`. If
`fileobj` is an open tarfile, it will be appended to. It is
supposed to be at position 0. `fileobj` won't be closed. If
`fileobj` is given, `path` has no effect.
"""
def __init__(self, sigmffile, name=None, fileobj=None):
self.sigmffile = sigmffile
self.name = name
def __init__(self,
sigmffiles: Union["sigmf.sigmffile.SigMFFile",
Iterable["sigmf.sigmffile.SigMFFile"]],
collection: "sigmf.sigmffile.SigMFCollection" = None,
path: Union[str, os.PathLike] = None,
fileobj: BinaryIO = None):

if (not path) and (not fileobj):
raise SigMFFileError("'path' or 'fileobj' required for creating "
"SigMF archive!")

if isinstance(sigmffiles, sigmf.sigmffile.SigMFFile):
self.sigmffiles = [sigmffiles]
elif (hasattr(collections, "Iterable") and
isinstance(sigmffiles, collections.Iterable)):
self.sigmffiles = sigmffiles
elif isinstance(sigmffiles, collections.abc.Iterable): # python 3.10
self.sigmffiles = sigmffiles
else:
raise SigMFFileError("Unknown type for sigmffiles argument!")

if path:
self.path = str(path)
else:
self.path = None
self.fileobj = fileobj
self.collection = collection

self._check_input()

archive_name = self._get_archive_name()
mode = "a" if fileobj is not None else "w"

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Eventual PR should call out this change in behavior and note that it could be changed to preserver the original behavior of writing over the archive.

sigmf_fileobj = self._get_output_fileobj()
sigmf_archive = tarfile.TarFile(mode="w",
fileobj=sigmf_fileobj,
format=tarfile.PAX_FORMAT)
tmpdir = tempfile.mkdtemp()
sigmf_md_filename = archive_name + SIGMF_METADATA_EXT
sigmf_md_path = os.path.join(tmpdir, sigmf_md_filename)
sigmf_data_filename = archive_name + SIGMF_DATASET_EXT
sigmf_data_path = os.path.join(tmpdir, sigmf_data_filename)

with open(sigmf_md_path, "w") as mdfile:
self.sigmffile.dump(mdfile, pretty=True)

shutil.copy(self.sigmffile.data_file, sigmf_data_path)
try:
sigmf_archive = tarfile.TarFile(mode=mode,
fileobj=sigmf_fileobj,
format=tarfile.PAX_FORMAT)
except tarfile.ReadError:
# fileobj doesn't contain any archives yet, so reopen in 'w' mode
sigmf_archive = tarfile.TarFile(mode='w',
fileobj=sigmf_fileobj,
format=tarfile.PAX_FORMAT)

def chmod(tarinfo):
if tarinfo.isdir():
Expand All @@ -82,47 +103,102 @@ def chmod(tarinfo):
tarinfo.mode = 0o644 # -wr-r--r--
return tarinfo

sigmf_archive.add(tmpdir, arcname=archive_name, filter=chmod)
if collection:
with tempfile.NamedTemporaryFile(mode="w") as tmpfile:
collection.dump(tmpfile, pretty=True)
tmpfile.flush()
collection_filename = archive_name + SIGMF_COLLECTION_EXT
sigmf_archive.add(tmpfile.name,
arcname=collection_filename,
filter=chmod)

for sigmffile in self.sigmffiles:
with tempfile.TemporaryDirectory() as tmpdir:
sigmf_md_filename = sigmffile.name + SIGMF_METADATA_EXT
sigmf_md_path = os.path.join(tmpdir, sigmf_md_filename)
sigmf_data_filename = sigmffile.name + SIGMF_DATASET_EXT
sigmf_data_path = os.path.join(tmpdir, sigmf_data_filename)

with open(sigmf_md_path, "w") as mdfile:
sigmffile.dump(mdfile, pretty=True)

shutil.copy(sigmffile.data_file, sigmf_data_path)
sigmf_archive.add(tmpdir, arcname=sigmffile.name, filter=chmod)

sigmf_archive.close()
if not fileobj:
sigmf_fileobj.close()

shutil.rmtree(tmpdir)
else:
sigmf_fileobj.seek(0) # ensure next open can read this as a tar

self.path = sigmf_archive.name

def _check_input(self):
self._ensure_name_has_correct_extension()
self._ensure_data_file_set()
self._validate_sigmffile_metadata()

def _ensure_name_has_correct_extension(self):
name = self.name
if name is None:
self._ensure_path_has_correct_extension()
for sigmffile in self.sigmffiles:
self._ensure_sigmffile_name_set(sigmffile)
self._ensure_data_file_set(sigmffile)
self._validate_sigmffile_metadata(sigmffile)
if self.collection:
self._validate_sigmffile_collection(self.collection,
self.sigmffiles)

def _ensure_path_has_correct_extension(self):
path = self.path
if path is None:
return

has_extension = "." in name
has_correct_extension = name.endswith(SIGMF_ARCHIVE_EXT)
has_extension = "." in path
has_correct_extension = path.endswith(SIGMF_ARCHIVE_EXT)
if has_extension and not has_correct_extension:
apparent_ext = os.path.splitext(name)[-1]
apparent_ext = os.path.splitext(path)[-1]
err = "extension {} != {}".format(apparent_ext, SIGMF_ARCHIVE_EXT)
raise SigMFFileError(err)

self.name = name if has_correct_extension else name + SIGMF_ARCHIVE_EXT
self.path = path if has_correct_extension else path + SIGMF_ARCHIVE_EXT

@staticmethod
def _ensure_sigmffile_name_set(sigmffile):
if not sigmffile.name:
err = "the `name` attribute must be set to pass to `SigMFArchive`"
raise SigMFFileError(err)

def _ensure_data_file_set(self):
if not self.sigmffile.data_file:
@staticmethod
def _ensure_data_file_set(sigmffile):
if not sigmffile.data_file:
err = "no data file - use `set_data_file`"
raise SigMFFileError(err)

def _validate_sigmffile_metadata(self):
self.sigmffile.validate()
@staticmethod
def _validate_sigmffile_metadata(sigmffile):
sigmffile.validate()

@staticmethod
def _validate_sigmffile_collection(collectionfile, sigmffiles):
if len(collectionfile) != len(sigmffiles):
raise SigMFValidationError("Mismatched number of recordings "
"between sigmffiles and collection "
"file!")
streams_key = collectionfile.STREAMS_KEY
streams = collectionfile.get_collection_field(streams_key)
sigmf_meta_hashes = [s["hash"] for s in streams]
if not streams:
raise SigMFValidationError("No recordings in collection file!")
for sigmffile in sigmffiles:
with tempfile.NamedTemporaryFile(mode="w") as tmpfile:
sigmffile.dump(tmpfile, pretty=True)
tmpfile.flush()
meta_path = tmpfile.name
sigmf_meta_hash = sigmf.sigmf_hash.calculate_sha512(meta_path)
if sigmf_meta_hash not in sigmf_meta_hashes:
raise SigMFValidationError("SigMFFile given that "
"is not in collection file!")

def _get_archive_name(self):
if self.fileobj and not self.name:
if self.fileobj and not self.path:
pathname = self.fileobj.name
else:
pathname = self.name
pathname = self.path

filename = os.path.split(pathname)[-1]
archive_name, archive_ext = os.path.splitext(filename)
Expand All @@ -135,7 +211,7 @@ def _get_output_fileobj(self):
if self.fileobj:
err = "fileobj {!r} is not byte-writable".format(self.fileobj)
else:
err = "can't open {!r} for writing".format(self.name)
err = "can't open {!r} for writing".format(self.path)

raise SigMFFileError(err)

Expand All @@ -146,6 +222,6 @@ def _get_open_fileobj(self):
fileobj = self.fileobj
fileobj.write(bytes()) # force exception if not byte-writable
else:
fileobj = open(self.name, "wb")
fileobj = open(self.path, "wb")

return fileobj