Skip to content

Commit

Permalink
Fix count samples from annotations (sigmf#47)
Browse files Browse the repository at this point in the history
Fixed two scenarios where SigMFFile._count_samples() failed:

* No data_file registered: sample_count should be calculated from annotation with highest end index, not from annotation with highest start index
* If no core:sample_count is provided in the annotation, core:sample_start should be used (sample count must at least be equal to this)

---------

Co-authored-by: messybear <messybear@thatsmessy.net>
  • Loading branch information
vejretvejret and messybear committed Jan 19, 2024
1 parent 7c979a4 commit 1386965
Show file tree
Hide file tree
Showing 2 changed files with 97 additions and 13 deletions.
55 changes: 42 additions & 13 deletions sigmf/sigmffile.py
Expand Up @@ -402,11 +402,23 @@ def get_annotations(self, index=None):
list of dict
Each dictionary contains one annotation for the sample at `index`.
'''
return [
x for x in self._metadata.get(self.ANNOTATION_KEY, [])
if index is None or (x[self.START_INDEX_KEY] <= index
and x[self.START_INDEX_KEY] + x[self.LENGTH_INDEX_KEY] > index)
]
annotations = self._metadata.get(self.ANNOTATION_KEY, [])
if index is None:
return annotations

annotations_including_index = []
for annotation in annotations:
if index < annotation[self.START_INDEX_KEY]:
# index is before annotation starts -> skip
continue
if self.LENGTH_INDEX_KEY in annotation:
# Annotation includes sample_count -> check end index
if index >= annotation[self.START_INDEX_KEY] + annotation[self.LENGTH_INDEX_KEY]:
# index is after annotation end -> skip
continue

annotations_including_index.append(annotation)
return annotations_including_index

def get_sample_size(self):
"""
Expand All @@ -418,16 +430,13 @@ def get_sample_size(self):
def _count_samples(self):
"""
Count, set, and return the total number of samples in the data file.
If there is no data file but there are annotations, use the end index
of the final annotation instead. If there are no annotations, use 0.
If there is no data file but there are annotations, use the sample_count
from the annotation with the highest end index. If there are no annotations,
use 0.
For complex data, a 'sample' includes both the real and imaginary part.
"""
annotations = self.get_annotations()
if self.data_file is None:
if len(annotations) > 0:
sample_count = annotations[-1][self.START_INDEX_KEY] + annotations[-1][self.LENGTH_INDEX_KEY]
else:
sample_count = 0
sample_count = self._get_sample_count_from_annotations()
else:
header_bytes = sum([c.get(self.HEADER_BYTES_KEY, 0) for c in self.get_captures()])
file_size = path.getsize(self.data_file) if self.offset_and_size is None else self.offset_and_size[1]
Expand All @@ -438,12 +447,32 @@ def _count_samples(self):
if file_data_size % (sample_size * num_channels) != 0:
warnings.warn(f'File `{self.data_file}` does not contain an integer '
'number of samples across channels. It may be invalid data.')
if len(annotations) > 0 and annotations[-1][self.START_INDEX_KEY] + annotations[-1][self.LENGTH_INDEX_KEY] > sample_count:
if self._get_sample_count_from_annotations() > sample_count:
warnings.warn(f'File `{self.data_file}` ends before the final annotation '
'in the corresponding SigMF metadata.')
self.sample_count = sample_count
return sample_count

def _get_sample_count_from_annotations(self):
"""
Returns the number of samples based on annotation with highest end index.
NOTE: Annotations are ordered by START_INDEX_KEY and not end index, so we
need to go through all annotations
"""
annon_sample_count = []
for annon in self.get_annotations():
if self.LENGTH_INDEX_KEY in annon:
# Annotation with sample_count
annon_sample_count.append(annon[self.START_INDEX_KEY] + annon[self.LENGTH_INDEX_KEY])
else:
# Annotation without sample_count - sample count must be at least sample_start
annon_sample_count.append(annon[self.START_INDEX_KEY])

if annon_sample_count:
return max(annon_sample_count)
else:
return 0

def calculate_hash(self):
"""
Calculates the hash of the data file and adds it to the global section.
Expand Down
55 changes: 55 additions & 0 deletions tests/test_sigmffile.py
Expand Up @@ -25,6 +25,7 @@
from pathlib import Path
import numpy as np
import unittest
import copy

from sigmf import sigmffile, utils
from sigmf.sigmffile import SigMFFile
Expand Down Expand Up @@ -61,6 +62,60 @@ def test_iterator_basic(self):
count += 1
self.assertEqual(count, len(self.sigmf_object))

class TestAnnotationHandling(unittest.TestCase):

def test_get_annotations_with_index(self):
"""Test that only annotations containing index are returned from get_annotations()"""
smf = SigMFFile(copy.deepcopy(TEST_METADATA))
smf.add_annotation(start_index=1)
smf.add_annotation(start_index=4, length=4)
annotations_idx10 = smf.get_annotations(index=10)
self.assertListEqual(
annotations_idx10,
[
{SigMFFile.START_INDEX_KEY: 0, SigMFFile.LENGTH_INDEX_KEY: 16},
{SigMFFile.START_INDEX_KEY: 1},
]
)

def test__count_samples_from_annotation(self):
"""Make sure sample count from annotations use correct end index"""
smf = SigMFFile(copy.deepcopy(TEST_METADATA))
smf.add_annotation(start_index=0, length=32)
smf.add_annotation(start_index=4, length=4)
sample_count = smf._count_samples()
self.assertEqual(sample_count, 32)

def test_set_data_file_without_annotations(self):
"""
Make sure setting data_file with no annotations registered does not
raise any errors
"""
smf = SigMFFile(copy.deepcopy(TEST_METADATA))
smf._metadata[SigMFFile.ANNOTATION_KEY].clear()
with tempfile.TemporaryDirectory() as tmpdir:
temp_path_data = os.path.join(tmpdir, "datafile")
TEST_FLOAT32_DATA.tofile(temp_path_data)
smf.set_data_file(temp_path_data)
samples = smf.read_samples()
self.assertTrue(len(samples)==16)

def test_set_data_file_with_annotations(self):
"""
Make sure setting data_file with annotations registered use sample
count from data_file and issue a warning if annotations have end
indices bigger than file end index
"""
smf = SigMFFile(copy.deepcopy(TEST_METADATA))
smf.add_annotation(start_index=0, length=32)
with tempfile.TemporaryDirectory() as tmpdir:
temp_path_data = os.path.join(tmpdir, "datafile")
TEST_FLOAT32_DATA.tofile(temp_path_data)
with self.assertWarns(Warning):
# Issues warning since file ends before the final annotatio
smf.set_data_file(temp_path_data)
samples = smf.read_samples()
self.assertTrue(len(samples)==16)

def simulate_capture(sigmf_md, n, capture_len):
start_index = capture_len * n
Expand Down

0 comments on commit 1386965

Please sign in to comment.