Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix count samples from annotations #47

Merged
merged 2 commits into from
Jan 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 42 additions & 13 deletions sigmf/sigmffile.py
Original file line number Diff line number Diff line change
Expand Up @@ -402,11 +402,23 @@ def get_annotations(self, index=None):
list of dict
Each dictionary contains one annotation for the sample at `index`.
'''
return [
x for x in self._metadata.get(self.ANNOTATION_KEY, [])
if index is None or (x[self.START_INDEX_KEY] <= index
and x[self.START_INDEX_KEY] + x[self.LENGTH_INDEX_KEY] > index)
]
annotations = self._metadata.get(self.ANNOTATION_KEY, [])
if index is None:
return annotations

annotations_including_index = []
for annotation in annotations:
if index < annotation[self.START_INDEX_KEY]:
# index is before annotation starts -> skip
continue
if self.LENGTH_INDEX_KEY in annotation:
# Annotation includes sample_count -> check end index
if index >= annotation[self.START_INDEX_KEY] + annotation[self.LENGTH_INDEX_KEY]:
# index is after annotation end -> skip
continue

annotations_including_index.append(annotation)
return annotations_including_index

def get_sample_size(self):
"""
Expand All @@ -418,16 +430,13 @@ def get_sample_size(self):
def _count_samples(self):
"""
Count, set, and return the total number of samples in the data file.
If there is no data file but there are annotations, use the end index
of the final annotation instead. If there are no annotations, use 0.
If there is no data file but there are annotations, use the sample_count
from the annotation with the highest end index. If there are no annotations,
use 0.
For complex data, a 'sample' includes both the real and imaginary part.
"""
annotations = self.get_annotations()
if self.data_file is None:
if len(annotations) > 0:
sample_count = annotations[-1][self.START_INDEX_KEY] + annotations[-1][self.LENGTH_INDEX_KEY]
else:
sample_count = 0
sample_count = self._get_sample_count_from_annotations()
else:
header_bytes = sum([c.get(self.HEADER_BYTES_KEY, 0) for c in self.get_captures()])
file_size = path.getsize(self.data_file) if self.offset_and_size is None else self.offset_and_size[1]
Expand All @@ -438,12 +447,32 @@ def _count_samples(self):
if file_data_size % (sample_size * num_channels) != 0:
warnings.warn(f'File `{self.data_file}` does not contain an integer '
'number of samples across channels. It may be invalid data.')
if len(annotations) > 0 and annotations[-1][self.START_INDEX_KEY] + annotations[-1][self.LENGTH_INDEX_KEY] > sample_count:
if self._get_sample_count_from_annotations() > sample_count:
warnings.warn(f'File `{self.data_file}` ends before the final annotation '
'in the corresponding SigMF metadata.')
self.sample_count = sample_count
return sample_count

def _get_sample_count_from_annotations(self):
"""
Returns the number of samples based on annotation with highest end index.
NOTE: Annotations are ordered by START_INDEX_KEY and not end index, so we
need to go through all annotations
"""
annon_sample_count = []
for annon in self.get_annotations():
if self.LENGTH_INDEX_KEY in annon:
# Annotation with sample_count
annon_sample_count.append(annon[self.START_INDEX_KEY] + annon[self.LENGTH_INDEX_KEY])
else:
# Annotation without sample_count - sample count must be at least sample_start
annon_sample_count.append(annon[self.START_INDEX_KEY])

if annon_sample_count:
return max(annon_sample_count)
else:
return 0

def calculate_hash(self):
"""
Calculates the hash of the data file and adds it to the global section.
Expand Down
55 changes: 55 additions & 0 deletions tests/test_sigmffile.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from pathlib import Path
import numpy as np
import unittest
import copy

from sigmf import sigmffile, utils
from sigmf.sigmffile import SigMFFile
Expand Down Expand Up @@ -61,6 +62,60 @@ def test_iterator_basic(self):
count += 1
self.assertEqual(count, len(self.sigmf_object))

class TestAnnotationHandling(unittest.TestCase):

def test_get_annotations_with_index(self):
"""Test that only annotations containing index are returned from get_annotations()"""
smf = SigMFFile(copy.deepcopy(TEST_METADATA))
smf.add_annotation(start_index=1)
smf.add_annotation(start_index=4, length=4)
annotations_idx10 = smf.get_annotations(index=10)
self.assertListEqual(
annotations_idx10,
[
{SigMFFile.START_INDEX_KEY: 0, SigMFFile.LENGTH_INDEX_KEY: 16},
{SigMFFile.START_INDEX_KEY: 1},
]
)

def test__count_samples_from_annotation(self):
Teque5 marked this conversation as resolved.
Show resolved Hide resolved
"""Make sure sample count from annotations use correct end index"""
smf = SigMFFile(copy.deepcopy(TEST_METADATA))
smf.add_annotation(start_index=0, length=32)
smf.add_annotation(start_index=4, length=4)
sample_count = smf._count_samples()
self.assertEqual(sample_count, 32)

def test_set_data_file_without_annotations(self):
"""
Make sure setting data_file with no annotations registered does not
raise any errors
"""
smf = SigMFFile(copy.deepcopy(TEST_METADATA))
smf._metadata[SigMFFile.ANNOTATION_KEY].clear()
with tempfile.TemporaryDirectory() as tmpdir:
temp_path_data = os.path.join(tmpdir, "datafile")
TEST_FLOAT32_DATA.tofile(temp_path_data)
smf.set_data_file(temp_path_data)
samples = smf.read_samples()
self.assertTrue(len(samples)==16)

def test_set_data_file_with_annotations(self):
"""
Make sure setting data_file with annotations registered use sample
count from data_file and issue a warning if annotations have end
indices bigger than file end index
"""
smf = SigMFFile(copy.deepcopy(TEST_METADATA))
smf.add_annotation(start_index=0, length=32)
with tempfile.TemporaryDirectory() as tmpdir:
temp_path_data = os.path.join(tmpdir, "datafile")
TEST_FLOAT32_DATA.tofile(temp_path_data)
with self.assertWarns(Warning):
# Issues warning since file ends before the final annotatio
smf.set_data_file(temp_path_data)
samples = smf.read_samples()
self.assertTrue(len(samples)==16)

def simulate_capture(sigmf_md, n, capture_len):
start_index = capture_len * n
Expand Down