Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support for Multiple Recordings in Archives #2

Draft
wants to merge 15 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
101 changes: 101 additions & 0 deletions README.md
Expand Up @@ -47,6 +47,33 @@ handle.get_captures() # returns list of 'captures' dictionaries
handle.get_annotations() # returns list of all annotations
```

### Load a SigMF archive with multiple recordings
There are different ways to read an archive using `SigMFArchiveReader`
class, the `sigmffile.fromarchive()` method, and the `sigmffile.fromfile()`
method.

```python
import numpy as np
from sigmf.archivereader import SigMFArchiveReader

from sigmf.sigmffile import (fromarchive,
fromfile)

# read multirecording archive using fromarchive
sigmffiles = fromarchive("multi_recording_archive.sigmf")
print(len(sigmffiles))

# read multirecording archive using fromfile
sigmffiles = fromfile("multi_recording_archive.sigmf")
print(len(sigmffiles))

# # read multirecording archive using SigMFArchiveReader
jhazentia marked this conversation as resolved.
Show resolved Hide resolved
reader = SigMFArchiveReader("multi_recording_archive.sigmf")
# length of reader and reader.sigmffiles should be the same
jhazentia marked this conversation as resolved.
Show resolved Hide resolved
print(len(reader))
print(len(reader.sigmffiles))
```

### Verify SigMF dataset integrity & compliance

```bash
Expand Down Expand Up @@ -180,6 +207,80 @@ ci16_sigmffile = collection.get_SigMFFile(stream_name='example_ci16')
cf32_sigmffile = collection.get_SigMFFile(stream_name='example_cf32')
```

### Create a SigMF Archive
The `SigMFArchive` class, the `SigMFFile.archive()` method, and the
`SigMFFile.tofile()` method can all be used to create an archive.

```python
import numpy as np

from sigmf.sigmffile import (SigMFFile,
SigMFArchive)


# create data file
random_data = np.random.rand(128)
data_path = "recording.sigmf-data"
random_data.tofile(data_path)

# create metadata
sigmf_file = SigMFFile(name='recording')
sigmf_file.set_global_field("core:datatype", "rf32_le")
sigmf_file.add_annotation(start_index=0, length=len(random_data))
sigmf_file.add_capture(start_index=0)
sigmf_file.set_data_file(data_path)

# create archive using SigMFArchive
archive = SigMFArchive(sigmffiles=sigmf_file,
path="single_recording_archive1.sigmf")

# create archive using SigMFFile archive()
archive_path = sigmf_file.archive(file_path="single_recording_archive2.sigmf")

# create archive using tofile
sigmf_file.tofile(file_path="single_recording_archive3.sigmf",
toarchive=True)
```

### Create SigMF Archives with Multiple Recordings
Archives with multiple recordings can be created using `SigMFArchive` class.
jhazentia marked this conversation as resolved.
Show resolved Hide resolved

```python
import numpy as np

from sigmf.sigmffile import (SigMFFile,
SigMFArchive)


# create data files
random_data1 = np.random.rand(128)
data1_path = "recording1.sigmf-data"
random_data1.tofile(data1_path)

random_data2 = np.random.rand(128)
data2_path = "recording2.sigmf-data"
random_data2.tofile(data2_path)

# create metadata
sigmf_file_1 = SigMFFile(name='recording1')
sigmf_file_1.set_global_field("core:datatype", "rf32_le")
sigmf_file_1.add_annotation(start_index=0, length=len(random_data1))
sigmf_file_1.add_capture(start_index=0)
sigmf_file_1.set_data_file(data1_path)

sigmf_file_2 = SigMFFile(name='recording2')
sigmf_file_2.set_global_field("core:datatype", "rf32_le")
sigmf_file_2.add_annotation(start_index=0, length=len(random_data2))
sigmf_file_2.add_capture(start_index=0)
sigmf_file_2.set_data_file(data2_path)


# create archive using SigMFArchive
sigmffiles = [sigmf_file_1, sigmf_file_2]
archive = SigMFArchive(sigmffiles=sigmffiles,
path="multi_recording_archive.sigmf")
```

### Load a SigMF Archive and slice its data without untaring it

Since an *archive* is merely a tarball (uncompressed), and since there any many
Expand Down
197 changes: 134 additions & 63 deletions sigmf/archive.py
Expand Up @@ -6,10 +6,16 @@

"""Create and extract SigMF archives."""

import collections
from io import BytesIO
import os
import shutil
import tarfile
import tempfile
import time
from typing import BinaryIO, Iterable, Union

import sigmf


from .error import SigMFFileError

Expand All @@ -21,59 +27,73 @@


class SigMFArchive():
"""Archive a SigMFFile.
"""Archive one or more `SigMFFile`s.

A `.sigmf` file must include both valid metadata and data.
If `self.data_file` is not set or the requested output file
is not writable, raise `SigMFFileError`.

Parameters:

sigmffile -- A SigMFFile object with valid metadata and data_file

name -- path to archive file to create. If file exists, overwrite.
If `name` doesn't end in .sigmf, it will be appended.
For example: if `name` == "/tmp/archive1", then the
following archive will be created:
/tmp/archive1.sigmf
- archive1/
- archive1.sigmf-meta
- archive1.sigmf-data

fileobj -- If `fileobj` is specified, it is used as an alternative to
a file object opened in binary mode for `name`. It is
supposed to be at position 0. `name` is not required, but
if specified will be used to determine the directory and
file names within the archive. `fileobj` won't be closed.
For example: if `name` == "archive1" and fileobj is given,
a tar archive will be written to fileobj with the
following structure:
- archive1/
- archive1.sigmf-meta
- archive1.sigmf-data
sigmffiles -- A single SigMFFile or an iterable of SigMFFile objects with
valid metadata and data_files

path -- Path to archive file to create. If file exists, overwrite.
If `path` doesn't end in .sigmf, it will be appended. The
`self.path` instance variable will be updated upon
successful writing of the archive to point to the final
archive path.


fileobj -- If `fileobj` is specified, it is used as an alternative to
a file object opened in binary mode for `path`. If
`fileobj` is an open tarfile, it will be appended to. It is
supposed to be at position 0. `fileobj` won't be closed. If
jhazentia marked this conversation as resolved.
Show resolved Hide resolved
`fileobj` is given, `path` has no effect.

pretty -- If True, pretty print JSON when creating the metadata
files in the archive. Defaults to True.
"""
def __init__(self, sigmffile, name=None, fileobj=None):
self.sigmffile = sigmffile
self.name = name
def __init__(self,
sigmffiles: Union["sigmf.sigmffile.SigMFFile",
Iterable["sigmf.sigmffile.SigMFFile"]],
path: Union[str, os.PathLike] = None,
fileobj: BinaryIO = None,
pretty: bool = True):

if (not path) and (not fileobj):
raise SigMFFileError("'path' or 'fileobj' required for creating "
"SigMF archive!")

if isinstance(sigmffiles, sigmf.sigmffile.SigMFFile):
self.sigmffiles = [sigmffiles]
elif (hasattr(collections, "Iterable") and
isinstance(sigmffiles, collections.Iterable)):
self.sigmffiles = sigmffiles
elif isinstance(sigmffiles, collections.abc.Iterable): # python 3.10
self.sigmffiles = sigmffiles
else:
raise SigMFFileError("Unknown type for sigmffiles argument!")

if path:
self.path = str(path)
else:
self.path = None
self.fileobj = fileobj

self._check_input()

archive_name = self._get_archive_name()
mode = "a" if fileobj is not None else "w"
sigmf_fileobj = self._get_output_fileobj()
sigmf_archive = tarfile.TarFile(mode="w",
fileobj=sigmf_fileobj,
format=tarfile.PAX_FORMAT)
tmpdir = tempfile.mkdtemp()
sigmf_md_filename = archive_name + SIGMF_METADATA_EXT
sigmf_md_path = os.path.join(tmpdir, sigmf_md_filename)
sigmf_data_filename = archive_name + SIGMF_DATASET_EXT
sigmf_data_path = os.path.join(tmpdir, sigmf_data_filename)

with open(sigmf_md_path, "w") as mdfile:
self.sigmffile.dump(mdfile, pretty=True)

shutil.copy(self.sigmffile.data_file, sigmf_data_path)
try:
sigmf_archive = tarfile.TarFile(mode=mode,
fileobj=sigmf_fileobj,
format=tarfile.PAX_FORMAT)
except tarfile.ReadError:
# fileobj doesn't contain any archives yet, so reopen in 'w' mode
sigmf_archive = tarfile.TarFile(mode='w',
jhazentia marked this conversation as resolved.
Show resolved Hide resolved
fileobj=sigmf_fileobj,
format=tarfile.PAX_FORMAT)

def chmod(tarinfo):
if tarinfo.isdir():
Expand All @@ -82,47 +102,98 @@ def chmod(tarinfo):
tarinfo.mode = 0o644 # -wr-r--r--
return tarinfo

sigmf_archive.add(tmpdir, arcname=archive_name, filter=chmod)
for sigmffile in self.sigmffiles:
sigmffile_name = os.path.normpath(sigmffile.name)
if os.path.isabs(sigmffile_name):
# remove root path component to make relative path for tarfile
sigmffile_name_split = sigmffile_name.split(os.path.sep)
sigmffile_name = os.path.sep.join(sigmffile_name_split[1:])
if os.path.isabs(sigmffile_name):
raise SigMFFileError("Invalid SigMFFile name")
self._create_parent_dirs(sigmf_archive, sigmffile_name, chmod)
file_path = os.path.join(sigmffile_name,
os.path.basename(sigmffile_name))
sf_md_filename = file_path + SIGMF_METADATA_EXT
sf_data_filename = file_path + SIGMF_DATASET_EXT
metadata = sigmffile.dumps(pretty=pretty)
metadata_tarinfo = tarfile.TarInfo(sf_md_filename)
metadata_tarinfo.size = len(metadata)
metadata_tarinfo.mtime = time.time()
metadata_tarinfo = chmod(metadata_tarinfo)
metadata_buffer = BytesIO(metadata.encode("utf-8"))
sigmf_archive.addfile(metadata_tarinfo, fileobj=metadata_buffer)
data_tarinfo = sigmf_archive.gettarinfo(name=sigmffile.data_file,
arcname=sf_data_filename)
data_tarinfo = chmod(data_tarinfo)
with open(sigmffile.data_file, "rb") as data_file:
sigmf_archive.addfile(data_tarinfo, fileobj=data_file)

sigmf_archive.close()
if not fileobj:
sigmf_fileobj.close()

shutil.rmtree(tmpdir)
else:
sigmf_fileobj.seek(0) # ensure next open can read this as a tar

self.path = sigmf_archive.name

def _check_input(self):
self._ensure_name_has_correct_extension()
self._ensure_data_file_set()
self._validate_sigmffile_metadata()
def _create_parent_dirs(self, _tarfile, sigmffile_name, set_permission):
jhazentia marked this conversation as resolved.
Show resolved Hide resolved
path_components = sigmffile_name.split(os.path.sep)
current_path = ""
for path in path_components:
current_path = os.path.join(current_path, path)
path_found = False
for member in _tarfile.getmembers():
if member.name == current_path:
path_found = True
break
if not path_found:
tarinfo = tarfile.TarInfo(current_path)
tarinfo.type = tarfile.DIRTYPE
tarinfo = set_permission(tarinfo)
_tarfile.addfile(tarinfo)

def _ensure_name_has_correct_extension(self):
name = self.name
if name is None:
def _check_input(self):
self._ensure_path_has_correct_extension()
for sigmffile in self.sigmffiles:
self._ensure_sigmffile_name_set(sigmffile)
self._ensure_data_file_set(sigmffile)
self._validate_sigmffile_metadata(sigmffile)

def _ensure_path_has_correct_extension(self):
path = self.path
if path is None:
return

has_extension = "." in name
has_correct_extension = name.endswith(SIGMF_ARCHIVE_EXT)
has_extension = "." in path
has_correct_extension = path.endswith(SIGMF_ARCHIVE_EXT)
if has_extension and not has_correct_extension:
apparent_ext = os.path.splitext(name)[-1]
apparent_ext = os.path.splitext(path)[-1]
err = "extension {} != {}".format(apparent_ext, SIGMF_ARCHIVE_EXT)
raise SigMFFileError(err)

self.name = name if has_correct_extension else name + SIGMF_ARCHIVE_EXT
self.path = path if has_correct_extension else path + SIGMF_ARCHIVE_EXT

@staticmethod
def _ensure_sigmffile_name_set(sigmffile):
if not sigmffile.name:
err = "the `name` attribute must be set to pass to `SigMFArchive`"
raise SigMFFileError(err)

def _ensure_data_file_set(self):
if not self.sigmffile.data_file:
@staticmethod
def _ensure_data_file_set(sigmffile):
if not sigmffile.data_file:
err = "no data file - use `set_data_file`"
raise SigMFFileError(err)

def _validate_sigmffile_metadata(self):
self.sigmffile.validate()
@staticmethod
def _validate_sigmffile_metadata(sigmffile):
sigmffile.validate()

def _get_archive_name(self):
if self.fileobj and not self.name:
if self.fileobj and not self.path:
pathname = self.fileobj.name
else:
pathname = self.name
pathname = self.path

filename = os.path.split(pathname)[-1]
archive_name, archive_ext = os.path.splitext(filename)
Expand All @@ -135,7 +206,7 @@ def _get_output_fileobj(self):
if self.fileobj:
err = "fileobj {!r} is not byte-writable".format(self.fileobj)
else:
err = "can't open {!r} for writing".format(self.name)
err = "can't open {!r} for writing".format(self.path)

raise SigMFFileError(err)

Expand All @@ -146,6 +217,6 @@ def _get_open_fileobj(self):
fileobj = self.fileobj
fileobj.write(bytes()) # force exception if not byte-writable
else:
fileobj = open(self.name, "wb")
fileobj = open(self.path, "wb")

return fileobj