Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix count samples from annotations #47

Merged
merged 2 commits into from
Jan 19, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 26 additions & 9 deletions sigmf/sigmffile.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,7 @@ def get_annotations(self, index=None):
return [
x for x in self._metadata.get(self.ANNOTATION_KEY, [])
if index is None or (x[self.START_INDEX_KEY] <= index
and x[self.START_INDEX_KEY] + x[self.LENGTH_INDEX_KEY] > index)
and (self.LENGTH_INDEX_KEY not in x or x[self.START_INDEX_KEY] + x[self.LENGTH_INDEX_KEY] > index))
]
Teque5 marked this conversation as resolved.
Show resolved Hide resolved

def get_sample_size(self):
Expand All @@ -418,16 +418,13 @@ def get_sample_size(self):
def _count_samples(self):
"""
Count, set, and return the total number of samples in the data file.
If there is no data file but there are annotations, use the end index
of the final annotation instead. If there are no annotations, use 0.
If there is no data file but there are annotations, use the sample_count
from the annotation with the highest end index. If there are no annotations,
use 0.
For complex data, a 'sample' includes both the real and imaginary part.
"""
annotations = self.get_annotations()
if self.data_file is None:
if len(annotations) > 0:
sample_count = annotations[-1][self.START_INDEX_KEY] + annotations[-1][self.LENGTH_INDEX_KEY]
else:
sample_count = 0
sample_count = self._get_sample_count_from_annotations()
else:
header_bytes = sum([c.get(self.HEADER_BYTES_KEY, 0) for c in self.get_captures()])
file_size = path.getsize(self.data_file) if self.offset_and_size is None else self.offset_and_size[1]
Expand All @@ -438,12 +435,32 @@ def _count_samples(self):
if file_data_size % (sample_size * num_channels) != 0:
warnings.warn(f'File `{self.data_file}` does not contain an integer '
'number of samples across channels. It may be invalid data.')
if len(annotations) > 0 and annotations[-1][self.START_INDEX_KEY] + annotations[-1][self.LENGTH_INDEX_KEY] > sample_count:
if self._get_sample_count_from_annotations() > sample_count:
warnings.warn(f'File `{self.data_file}` ends before the final annotation '
'in the corresponding SigMF metadata.')
self.sample_count = sample_count
return sample_count

def _get_sample_count_from_annotations(self):
"""
Returns the number of samples based on annotation with highest end index.
NOTE: Annotations are ordered by START_INDEX_KEY and not end index, so we
need to go through all annotations
"""
annon_sample_count = []
for annon in self.get_annotations():
if self.LENGTH_INDEX_KEY in annon:
# Annotation with sample_count
annon_sample_count.append(annon[self.START_INDEX_KEY] + annon[self.LENGTH_INDEX_KEY])
else:
# Annotation without sample_count - sample count must be at least sample_start
annon_sample_count.append(annon[self.START_INDEX_KEY])

if annon_sample_count:
return max(annon_sample_count)
else:
return 0

def calculate_hash(self):
"""
Calculates the hash of the data file and adds it to the global section.
Expand Down
39 changes: 39 additions & 0 deletions tests/test_sigmffile.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,45 @@ def test_iterator_basic(self):
count += 1
self.assertEqual(count, len(self.sigmf_object))

class TestAnnotationHandling(unittest.TestCase):

def test_get_annotations_with_index(self):
smf = SigMFFile(TEST_METADATA_MULTI_ANNON)
annotations_idx10 = smf.get_annotations(index=10)
self.assertListEqual(annotations_idx10,
[
{SigMFFile.START_INDEX_KEY: 0},
{SigMFFile.START_INDEX_KEY: 0, SigMFFile.LENGTH_INDEX_KEY: 32}
])
Teque5 marked this conversation as resolved.
Show resolved Hide resolved

def test__count_samples_from_annotation(self):
Teque5 marked this conversation as resolved.
Show resolved Hide resolved
smf = SigMFFile(TEST_METADATA_MULTI_ANNON)
sample_count = smf._count_samples()
self.assertEqual(sample_count, 32)

def test_set_data_file_without_annotations(self):
smf = SigMFFile(
global_info = {
SigMFFile.DATATYPE_KEY: utils.get_data_type_str(TEST_FLOAT32_DATA), # in this case, 'cf32_le'
}
)
with tempfile.TemporaryDirectory() as tmpdir:
temp_path_data = os.path.join(tmpdir, "datafile")
TEST_FLOAT32_DATA.tofile(temp_path_data)
smf.set_data_file(temp_path_data)
samples = smf.read_samples()
self.assertTrue(len(samples)==16)

def test_set_data_file_with_annotations(self):
smf = SigMFFile(TEST_METADATA_MULTI_ANNON)
with tempfile.TemporaryDirectory() as tmpdir:
temp_path_data = os.path.join(tmpdir, "datafile")
TEST_FLOAT32_DATA.tofile(temp_path_data)
with self.assertWarns(Warning):
# Issues warning since file ends before the final annotatio
smf.set_data_file(temp_path_data)
samples = smf.read_samples()
self.assertTrue(len(samples)==16)

def simulate_capture(sigmf_md, n, capture_len):
start_index = capture_len * n
Expand Down
14 changes: 14 additions & 0 deletions tests/testdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,20 @@
}
}

TEST_METADATA_MULTI_ANNON = {
Teque5 marked this conversation as resolved.
Show resolved Hide resolved
SigMFFile.ANNOTATION_KEY: [
{SigMFFile.START_INDEX_KEY: 0},
{SigMFFile.LENGTH_INDEX_KEY: 32, SigMFFile.START_INDEX_KEY: 0},
{SigMFFile.LENGTH_INDEX_KEY: 4, SigMFFile.START_INDEX_KEY: 4}],
SigMFFile.CAPTURE_KEY: [{SigMFFile.START_INDEX_KEY: 0}],
SigMFFile.GLOBAL_KEY: {
SigMFFile.DATATYPE_KEY: 'rf32_le',
SigMFFile.HASH_KEY: 'f4984219b318894fa7144519185d1ae81ea721c6113243a52b51e444512a39d74cf41a4cec3c5d000bd7277cc71232c04d7a946717497e18619bdbe94bfeadd6',
SigMFFile.NUM_CHANNELS_KEY: 1,
SigMFFile.VERSION_KEY: '1.0.0'
}
}

# Data0 is a test of a compliant two capture recording
TEST_U8_DATA0 = list(range(256))
TEST_U8_META0 = {
Expand Down