catalystneuro · h-mayorquin · Nov 12, 2024 · Nov 12, 2024 · Nov 12, 2024 · h-mayorquin
diff --git a/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_dataset_io.py b/src/neuroconv/tools/nwb_helpers/_configuration_models/_base_dataset_io.py
@@ -102,7 +102,9 @@ class DatasetIOConfiguration(BaseModel, ABC):
     )
     dataset_name: Literal["data", "timestamps"] = Field(description="The reference name of the dataset.", frozen=True)
     dtype: InstanceOf[np.dtype] = Field(description="The data type of elements of this dataset.", frozen=True)
-    full_shape: tuple[int, ...] = Field(description="The maximum shape of the entire dataset.", frozen=True)
+    full_shape: tuple[Union[int, None], ...] = Field(
+        description="The maximum shape of the entire dataset.", frozen=False
+    )
 
     # User specifiable fields
     chunk_shape: Union[tuple[PositiveInt, ...], None] = Field(
@@ -277,7 +279,12 @@ def from_neurodata_object(cls, neurodata_object: Container, dataset_name: Litera
             compression_method = "gzip"
         elif dtype == np.dtype("object"):  # Unclear what default chunking/compression should be for compound objects
             # pandas reads in strings as objects by default: https://pandas.pydata.org/docs/user_guide/text.html
-            all_elements_are_strings = all([isinstance(element, str) for element in candidate_dataset[:].flat])
+            if isinstance(candidate_dataset, np.ndarray):
+                all_elements_are_strings = all([isinstance(element, str) for element in candidate_dataset[:].flat])
+            elif isinstance(candidate_dataset, list):
+                all_elements_are_strings = all([isinstance(element, str) for element in candidate_dataset])
+            else:
+                all_elements_are_strings = False
             if all_elements_are_strings:
                 dtype = np.array([element for element in candidate_dataset[:].flat]).dtype
                 chunk_shape = SliceableDataChunkIterator.estimate_default_chunk_shape(
@@ -288,18 +295,17 @@ def from_neurodata_object(cls, neurodata_object: Container, dataset_name: Litera
                 )
                 compression_method = "gzip"
             else:
-                raise NotImplementedError(
-                    f"Unable to create a `DatasetIOConfiguration` for the dataset at '{location_in_file}'"
-                    f"for neurodata object '{neurodata_object}' of type '{type(neurodata_object)}'!"
+
+                # chunk_shape = (1,) * len(full_shape)  # Worse possible chunking
+                chunk_shape = full_shape
+                buffer_shape = full_shape
+                compression_method = None
+                import warnings
+
+                warnings.warn(
+                    f"Default chunking and compression options for compound objects are not optimized. "
+                    f"Consider manually specifying DatasetIOConfiguration for dataset at '{location_in_file}'."
                 )
-                # TODO: Add support for compound objects with non-string elements
-                # chunk_shape = full_shape  # validate_all_shapes fails if chunk_shape or buffer_shape is None
-                # buffer_shape = full_shape
-                # compression_method = None
-                # warnings.warn(
-                #     f"Default chunking and compression options for compound objects are not optimized. "
-                #     f"Consider manually specifying DatasetIOConfiguration for dataset at '{location_in_file}'."
-                # )
 
         return cls(
             object_id=neurodata_object.object_id,

diff --git a/src/neuroconv/tools/nwb_helpers/_configure_backend.py b/src/neuroconv/tools/nwb_helpers/_configure_backend.py
@@ -27,7 +27,7 @@ def configure_backend(
     is_ndx_events_installed = is_package_installed(package_name="ndx_events")
     ndx_events = importlib.import_module("ndx_events") if is_ndx_events_installed else None
 
-    # A remapping of the object IDs in the backend configuration might necessary
+    # A remapping of the object IDs in the backend configuration might be necessary
     locations_to_remap = backend_configuration.find_locations_requiring_remapping(nwbfile=nwbfile)
     if any(locations_to_remap):
         backend_configuration = backend_configuration.build_remapped_backend(locations_to_remap=locations_to_remap)

diff --git a/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py b/src/neuroconv/tools/nwb_helpers/_dataset_configuration.py
@@ -5,7 +5,6 @@
 import h5py
 import numpy as np
 import zarr
-from hdmf import Container
 from hdmf.data_utils import DataIO
 from hdmf.utils import get_data_shape
 from hdmf_zarr import NWBZarrIO
@@ -104,10 +103,11 @@ def get_default_dataset_io_configurations(
     known_dataset_fields = ("data", "timestamps")
     for neurodata_object in nwbfile.objects.values():
         if isinstance(neurodata_object, DynamicTable):
-            dynamic_table = neurodata_object  # For readability
 
-            for column in dynamic_table.columns:
-                candidate_dataset = column.data  # VectorData object
+            dynamic_table = neurodata_object  # For readability
+            for column in (*dynamic_table.columns, dynamic_table.id):
+                dataset_name = "data"
+                candidate_dataset = getattr(column, dataset_name)  # Safer way to access data attribute
                 # noinspection PyTypeChecker
                 if _is_dataset_written_to_file(
                     candidate_dataset=candidate_dataset, backend=backend, existing_file=existing_file
@@ -119,16 +119,14 @@ def get_default_dataset_io_configurations(
                     continue  # Skip
 
                 # Skip over columns whose values are links, such as the 'group' of an ElectrodesTable
-                if any(isinstance(value, Container) for value in candidate_dataset):
-                    continue  # Skip
+                # if any(isinstance(value, Container) for value in candidate_dataset):
+                #     continue  # Skip
 
                 # Skip when columns whose values are a reference type
                 if isinstance(column, TimeSeriesReferenceVectorData):
                     continue
 
                 # Skip datasets with any zero-length axes
-                dataset_name = "data"
-                candidate_dataset = getattr(column, dataset_name)
                 full_shape = get_data_shape(data=candidate_dataset)
                 if any(axis_length == 0 for axis_length in full_shape):
                     continue

diff --git a/tests/test_ecephys/test_tools_spikeinterface.py b/tests/test_ecephys/test_tools_spikeinterface.py
@@ -753,6 +753,39 @@ def test_manual_row_adition_before_add_electrodes_function_to_nwbfile(self):
         self.assertListEqual(list(self.nwbfile.electrodes.id.data), expected_ids)
         self.assertListEqual(list(self.nwbfile.electrodes["channel_name"].data), expected_names)
 
+    def test_add_electrodes_with_previously_written_nwbfile(self):
+        """Add electrodes to a file that was already written"""
+        values_dic = self.defaults
+
+        values_dic.update(id=123)
+        self.nwbfile.add_electrode(**values_dic)
+
+        values_dic.update(id=124)
+        self.nwbfile.add_electrode(**values_dic)
+
+        from pynwb import NWBHDF5IO
+
+        from neuroconv.tools.nwb_helpers import (
+            configure_backend,
+            get_default_backend_configuration,
+        )
+
+        backend_configuration = get_default_backend_configuration(nwbfile=self.nwbfile, backend="hdf5")
+
+        configure_backend(nwbfile=self.nwbfile, backend_configuration=backend_configuration)
+        with NWBHDF5IO("test.nwb", "w") as io:
+            io.write(self.nwbfile)
+
+        with NWBHDF5IO("test.nwb", "a") as io:
+            nwbfile_read = io.read()
+
+            add_electrodes_to_nwbfile(recording=self.recording_1, nwbfile=nwbfile_read)
+
+            expected_ids = [123, 124, 2, 3, 4, 5]
+            expected_names = ["123", "124", "a", "b", "c", "d"]
+            self.assertListEqual(list(nwbfile_read.electrodes.id.data), expected_ids)
+            self.assertListEqual(list(nwbfile_read.electrodes["channel_name"].data), expected_names)
+
     def test_manual_row_adition_after_add_electrodes_function_to_nwbfile(self):
         """Add some rows to the electrode table after using the add_electrodes_to_nwbfile function"""
         add_electrodes_to_nwbfile(recording=self.recording_1, nwbfile=self.nwbfile)