Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Draft] Make dynamic tables expandable #1137

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,9 @@ class DatasetIOConfiguration(BaseModel, ABC):
)
dataset_name: Literal["data", "timestamps"] = Field(description="The reference name of the dataset.", frozen=True)
dtype: InstanceOf[np.dtype] = Field(description="The data type of elements of this dataset.", frozen=True)
full_shape: tuple[int, ...] = Field(description="The maximum shape of the entire dataset.", frozen=True)
full_shape: tuple[Union[int, None], ...] = Field(
description="The maximum shape of the entire dataset.", frozen=False
)

# User specifiable fields
chunk_shape: Union[tuple[PositiveInt, ...], None] = Field(
Expand Down Expand Up @@ -277,7 +279,12 @@ def from_neurodata_object(cls, neurodata_object: Container, dataset_name: Litera
compression_method = "gzip"
elif dtype == np.dtype("object"): # Unclear what default chunking/compression should be for compound objects
# pandas reads in strings as objects by default: https://pandas.pydata.org/docs/user_guide/text.html
all_elements_are_strings = all([isinstance(element, str) for element in candidate_dataset[:].flat])
if isinstance(candidate_dataset, np.ndarray):
all_elements_are_strings = all([isinstance(element, str) for element in candidate_dataset[:].flat])
elif isinstance(candidate_dataset, list):
all_elements_are_strings = all([isinstance(element, str) for element in candidate_dataset])
else:
all_elements_are_strings = False
if all_elements_are_strings:
dtype = np.array([element for element in candidate_dataset[:].flat]).dtype
chunk_shape = SliceableDataChunkIterator.estimate_default_chunk_shape(
Expand All @@ -288,18 +295,17 @@ def from_neurodata_object(cls, neurodata_object: Container, dataset_name: Litera
)
compression_method = "gzip"
else:
raise NotImplementedError(
f"Unable to create a `DatasetIOConfiguration` for the dataset at '{location_in_file}'"
f"for neurodata object '{neurodata_object}' of type '{type(neurodata_object)}'!"

# chunk_shape = (1,) * len(full_shape) # Worse possible chunking
chunk_shape = full_shape
buffer_shape = full_shape
compression_method = None
import warnings

warnings.warn(
f"Default chunking and compression options for compound objects are not optimized. "
f"Consider manually specifying DatasetIOConfiguration for dataset at '{location_in_file}'."
)
# TODO: Add support for compound objects with non-string elements
# chunk_shape = full_shape # validate_all_shapes fails if chunk_shape or buffer_shape is None
# buffer_shape = full_shape
# compression_method = None
# warnings.warn(
# f"Default chunking and compression options for compound objects are not optimized. "
# f"Consider manually specifying DatasetIOConfiguration for dataset at '{location_in_file}'."
# )

return cls(
object_id=neurodata_object.object_id,
Expand Down
2 changes: 1 addition & 1 deletion src/neuroconv/tools/nwb_helpers/_configure_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def configure_backend(
is_ndx_events_installed = is_package_installed(package_name="ndx_events")
ndx_events = importlib.import_module("ndx_events") if is_ndx_events_installed else None

# A remapping of the object IDs in the backend configuration might necessary
# A remapping of the object IDs in the backend configuration might be necessary
locations_to_remap = backend_configuration.find_locations_requiring_remapping(nwbfile=nwbfile)
if any(locations_to_remap):
backend_configuration = backend_configuration.build_remapped_backend(locations_to_remap=locations_to_remap)
Expand Down
14 changes: 6 additions & 8 deletions src/neuroconv/tools/nwb_helpers/_dataset_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import h5py
import numpy as np
import zarr
from hdmf import Container
from hdmf.data_utils import DataIO
from hdmf.utils import get_data_shape
from hdmf_zarr import NWBZarrIO
Expand Down Expand Up @@ -104,10 +103,11 @@ def get_default_dataset_io_configurations(
known_dataset_fields = ("data", "timestamps")
for neurodata_object in nwbfile.objects.values():
if isinstance(neurodata_object, DynamicTable):
dynamic_table = neurodata_object # For readability

for column in dynamic_table.columns:
candidate_dataset = column.data # VectorData object
dynamic_table = neurodata_object # For readability
for column in (*dynamic_table.columns, dynamic_table.id):
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here, we add the ids of the tables to the set that we catch for setting the DataIO.

dataset_name = "data"
candidate_dataset = getattr(column, dataset_name) # Safer way to access data attribute
# noinspection PyTypeChecker
if _is_dataset_written_to_file(
candidate_dataset=candidate_dataset, backend=backend, existing_file=existing_file
Expand All @@ -119,16 +119,14 @@ def get_default_dataset_io_configurations(
continue # Skip

# Skip over columns whose values are links, such as the 'group' of an ElectrodesTable
if any(isinstance(value, Container) for value in candidate_dataset):
continue # Skip
# if any(isinstance(value, Container) for value in candidate_dataset):
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I commented this because this was catching the ElectrodeGroups column. There is probably a better way.

# continue # Skip

# Skip when columns whose values are a reference type
if isinstance(column, TimeSeriesReferenceVectorData):
continue

# Skip datasets with any zero-length axes
dataset_name = "data"
candidate_dataset = getattr(column, dataset_name)
full_shape = get_data_shape(data=candidate_dataset)
if any(axis_length == 0 for axis_length in full_shape):
continue
Expand Down
33 changes: 33 additions & 0 deletions tests/test_ecephys/test_tools_spikeinterface.py
Original file line number Diff line number Diff line change
Expand Up @@ -753,6 +753,39 @@ def test_manual_row_adition_before_add_electrodes_function_to_nwbfile(self):
self.assertListEqual(list(self.nwbfile.electrodes.id.data), expected_ids)
self.assertListEqual(list(self.nwbfile.electrodes["channel_name"].data), expected_names)

def test_add_electrodes_with_previously_written_nwbfile(self):
"""Add electrodes to a file that was already written"""
values_dic = self.defaults

values_dic.update(id=123)
self.nwbfile.add_electrode(**values_dic)

values_dic.update(id=124)
self.nwbfile.add_electrode(**values_dic)

from pynwb import NWBHDF5IO

from neuroconv.tools.nwb_helpers import (
configure_backend,
get_default_backend_configuration,
)

backend_configuration = get_default_backend_configuration(nwbfile=self.nwbfile, backend="hdf5")

configure_backend(nwbfile=self.nwbfile, backend_configuration=backend_configuration)
with NWBHDF5IO("test.nwb", "w") as io:
io.write(self.nwbfile)

with NWBHDF5IO("test.nwb", "a") as io:
nwbfile_read = io.read()

add_electrodes_to_nwbfile(recording=self.recording_1, nwbfile=nwbfile_read)

expected_ids = [123, 124, 2, 3, 4, 5]
expected_names = ["123", "124", "a", "b", "c", "d"]
self.assertListEqual(list(nwbfile_read.electrodes.id.data), expected_ids)
self.assertListEqual(list(nwbfile_read.electrodes["channel_name"].data), expected_names)

def test_manual_row_adition_after_add_electrodes_function_to_nwbfile(self):
"""Add some rows to the electrode table after using the add_electrodes_to_nwbfile function"""
add_electrodes_to_nwbfile(recording=self.recording_1, nwbfile=self.nwbfile)
Expand Down
Loading