From 5652a445a09c74ea28e25ee2d55a0cfdd6b68c71 Mon Sep 17 00:00:00 2001 From: Chris Ochoa <91976672+chrisochoatri@users.noreply.github.com> Date: Thu, 5 May 2022 10:14:40 -0700 Subject: [PATCH] fix: slight change to autolabels and support for pd (#92) * fix: slight change to autolabels and exposed to pd --- dgp/datasets/base_dataset.py | 36 ++++++++++++++++++++++++--------- dgp/datasets/pd_dataset.py | 30 +++++++++++++++++++++++++-- tests/test_autolabel_dataset.py | 14 ++++++++++--- 3 files changed, 66 insertions(+), 14 deletions(-) diff --git a/dgp/datasets/base_dataset.py b/dgp/datasets/base_dataset.py index 0a851702..00bc38c9 100644 --- a/dgp/datasets/base_dataset.py +++ b/dgp/datasets/base_dataset.py @@ -1007,7 +1007,11 @@ def _get_scene_container( if requested_autolabels is not None: logging.debug(f"Loading autolabeled annotations from {scene_dir}.") autolabeled_scenes = _parse_autolabeled_scenes( - scene_dir, requested_autolabels, autolabel_root=autolabel_root, skip_missing_data=skip_missing_data + scene_dir, + requested_autolabels, + autolabel_root=autolabel_root, + skip_missing_data=skip_missing_data, + use_diskcache=use_diskcache, ) else: autolabeled_scenes = None @@ -1381,9 +1385,10 @@ def load_annotations(self, scene_idx, sample_idx_in_scene, datum_name): autolabel_annotations = self.get_autolabels_for_datum(scene_idx, sample_idx_in_scene, datum_name) for autolabel_key in self.requested_autolabels: # Some datums in a sample may not have associated annotations. Return "None" for those datums - _, annotation_key = autolabel_key.split('/') - # NOTE: model_name should already be stored in the scene json - # which is why we do not have to add it here to the annotation_file + model_name, annotation_key = autolabel_key.split('/') + # NOTE: model_name should typically not be included in the annotation_path stored inside the scene.json + # if for some reason it is, then it needs to be removed. + annotation_path = autolabel_annotations.get(autolabel_key, None) if annotation_path is None: @@ -1391,11 +1396,13 @@ def load_annotations(self, scene_idx, sample_idx_in_scene, datum_name): continue if self.autolabel_root is not None: annotation_file = os.path.join( - self.autolabel_root, os.path.basename(self.scenes[scene_idx].directory), 'autolabels', - annotation_path + self.autolabel_root, os.path.basename(self.scenes[scene_idx].directory), AUTOLABEL_FOLDER, + model_name, annotation_path ) else: - annotation_file = os.path.join(self.scenes[scene_idx].directory, 'autolabels', annotation_path) + annotation_file = os.path.join( + self.scenes[scene_idx].directory, AUTOLABEL_FOLDER, model_name, annotation_path + ) if not os.path.exists(annotation_file): logging.warning(f'missing {annotation_file}') @@ -1835,7 +1842,13 @@ def get_file_meta_from_datum(self, scene_idx, sample_idx_in_scene, datum_name): return data, annotations -def _parse_autolabeled_scenes(scene_dir, requested_autolabels, autolabel_root=None, skip_missing_data=False): +def _parse_autolabeled_scenes( + scene_dir, + requested_autolabels, + autolabel_root=None, + skip_missing_data=False, + use_diskcache=False, +): """Parse autolabeled scene JSONs Parameters @@ -1852,6 +1865,9 @@ def _parse_autolabeled_scenes(scene_dir, requested_autolabels, autolabel_root=No skip_missing_data: bool, defaul: False If true, skip over missing autolabel scenes + use_diskcache: bool, default: False + If diskcache should be used for autolabels + Returns ------- autolabeled_scenes: dict @@ -1883,5 +1899,7 @@ def _parse_autolabeled_scenes(scene_dir, requested_autolabels, autolabel_root=No assert os.path.exists(autolabel_dir), 'Path to autolabels {} does not exist'.format(autolabel_dir) assert os.path.exists(autolabel_scene), 'Scene JSON expected but not found at {}'.format(autolabel_scene) - autolabeled_scenes[autolabel] = SceneContainer(autolabel_scene, directory=autolabel_dir) + autolabeled_scenes[autolabel] = SceneContainer( + autolabel_scene, directory=autolabel_dir, use_diskcache=use_diskcache + ) return autolabeled_scenes diff --git a/dgp/datasets/pd_dataset.py b/dgp/datasets/pd_dataset.py index edbd8cfd..60f89176 100644 --- a/dgp/datasets/pd_dataset.py +++ b/dgp/datasets/pd_dataset.py @@ -83,6 +83,9 @@ class _ParallelDomainDataset(_SynchronizedDataset): transform_accumulated_box_points: bool, default: False Flag to use cuboid pose and instance id to warp points when using lidar accumulation. + + autolabel_root: str, default: None + Path to autolabels. """ def __init__( self, @@ -98,6 +101,7 @@ def __init__( use_virtual_camera_datums=True, accumulation_context=None, transform_accumulated_box_points=False, + autolabel_root=None, ): self.coalesce_point_cloud = datum_names is not None and \ COALESCED_LIDAR_DATUM_NAME in datum_names @@ -136,6 +140,7 @@ def __init__( only_annotated_datums=only_annotated_datums, accumulation_context=accumulation_context, transform_accumulated_box_points=transform_accumulated_box_points, + autolabel_root=autolabel_root, ) def coalesce_pc_data(self, items): @@ -155,6 +160,12 @@ def coalesce_pc_data(self, items): assert self.coalesce_point_cloud assert len(pc_items) == len(LIDAR_DATUM_NAMES) + # TODO: fix this + if len(self.requested_autolabels) > 0: + logging.warning( + 'autolabels were requested, however point cloud coalesce does not support coalescing autolabels' + ) + # Only coalesce if there's more than 1 point cloud coalesced_pc = OrderedDict() X_V_merged, bbox_3d_V_merged, instance_ids_merged = [], [], [] @@ -248,6 +259,7 @@ def __init__( dataset_root=None, transform_accumulated_box_points=False, use_diskcache=True, + autolabel_root=None, ): if not use_diskcache: logging.warning('Instantiating a dataset with use_diskcache=False may exhaust memory with a large dataset.') @@ -261,10 +273,16 @@ def __init__( skip_missing_data=skip_missing_data, dataset_root=dataset_root, use_diskcache=use_diskcache, + autolabel_root=autolabel_root, ) # Return SynchronizedDataset with scenes built from dataset.json - dataset_metadata = DatasetMetadata.from_scene_containers(scenes, requested_annotations, requested_autolabels) + dataset_metadata = DatasetMetadata.from_scene_containers( + scenes, + requested_annotations, + requested_autolabels, + autolabel_root=autolabel_root, + ) super().__init__( dataset_metadata, scenes=scenes, @@ -278,6 +296,7 @@ def __init__( use_virtual_camera_datums=use_virtual_camera_datums, accumulation_context=accumulation_context, transform_accumulated_box_points=transform_accumulated_box_points, + autolabel_root=autolabel_root, ) @@ -300,6 +319,7 @@ def __init__( accumulation_context=None, transform_accumulated_box_points=False, use_diskcache=True, + autolabel_root=None, ): if not use_diskcache: logging.warning('Instantiating a dataset with use_diskcache=False may exhaust memory with a large dataset.') @@ -311,10 +331,16 @@ def __init__( is_datums_synchronized=True, skip_missing_data=skip_missing_data, use_diskcache=use_diskcache, + autolabel_root=autolabel_root, ) # Return SynchronizedDataset with scenes built from dataset.json - dataset_metadata = DatasetMetadata.from_scene_containers([scene], requested_annotations, requested_autolabels) + dataset_metadata = DatasetMetadata.from_scene_containers( + [scene], + requested_annotations, + requested_autolabels, + autolabel_root=autolabel_root, + ) super().__init__( dataset_metadata, scenes=[scene], diff --git a/tests/test_autolabel_dataset.py b/tests/test_autolabel_dataset.py index e5a8cb50..a22a0101 100644 --- a/tests/test_autolabel_dataset.py +++ b/tests/test_autolabel_dataset.py @@ -62,12 +62,16 @@ def clone_scene_as_autolabel(dataset_root, autolabel_root, autolabel_model, auto if 'scene' in scene_json and scene_json.endswith('json'): base_scene = open_pbobject(os.path.join(full_scene_dir, scene_json), Scene) for i in range(len(base_scene.data)): + name = base_scene.data[i].id.name datum = base_scene.data[i].datum datum_type = datum.WhichOneof('datum_oneof') datum_value = getattr(datum, datum_type) # This is datum.image or datum.point_cloud etc annotation_type_id = ANNOTATION_KEY_TO_TYPE_ID[autolabel_type] current_annotation = datum_value.annotations[annotation_type_id] - datum_value.annotations[annotation_type_id] = os.path.join(autolabel_scene_dir, current_annotation) + # NOTE: this should not actually change the path but is included for clarity + datum_value.annotations[annotation_type_id] = os.path.join( + ANNOTATION_TYPE_ID_TO_FOLDER[autolabel_type], name, os.path.basename(current_annotation) + ) save_pbobject_as_json(base_scene, os.path.join(autolabel_scene_dir, AUTOLABEL_SCENE_JSON_NAME)) # Only modify one scene.json, test scene should not contain multiple scene.jsons @@ -109,7 +113,8 @@ def test_autolabels_default_root(self): backward_context=1, requested_annotations=('bounding_box_3d', ), requested_autolabels=requested_autolabels, - autolabel_root=autolabel_root + autolabel_root=autolabel_root, + use_diskcache=False, ) assert len(dataset) == 2 @@ -139,7 +144,8 @@ def test_autolabels_custom_root(self): backward_context=1, requested_annotations=('bounding_box_3d', ), requested_autolabels=requested_autolabels, - autolabel_root=autolabel_root + autolabel_root=autolabel_root, + use_diskcache=False, ) assert len(dataset) == 2 @@ -174,6 +180,7 @@ def test_autolabels_missing_files(self): requested_autolabels=requested_autolabels, autolabel_root=autolabel_root, skip_missing_data=True, + use_diskcache=False, ) assert len(dataset) == 2 @@ -210,6 +217,7 @@ def test_only_annotated_datums(self): autolabel_root=autolabel_root, only_annotated_datums=True, skip_missing_data=True, + use_diskcache=False, ) assert len(dataset) == 1