feat: add dgp2wicker conversion (#132)

TRI-ML · Oct 17, 2022 · fa3aaa2 · fa3aaa2
1 parent 0386a84
commit fa3aaa2
Show file tree

Hide file tree

Showing 16 changed files with 1,800 additions and 3 deletions.
diff --git a/dgp/annotations/camera_transforms.py b/dgp/annotations/camera_transforms.py
@@ -633,15 +633,15 @@ def transform_datum(self, cam_datum: Dict[str, Any]) -> Dict[str, Any]:  # pylin
             rgb_mask = self.transform_mask_2d(rgb_mask)
             new_datum['rgb_mask'] = rgb_mask
 
-        if 'bounding_box_3d' in new_datum:
+        if 'bounding_box_3d' in new_datum and new_datum['bounding_box_3d'] is not None:
             # Note: DGP camera class does not model the full camera matrix just focal length and center
             # if using DGP camera class, do not use transformations that add a skew!
             boxes = new_datum['bounding_box_3d']
             pose_correction = new_datum['extrinsics'].inverse() * cam_datum['extrinsics']
             boxes = self.transform_detections_3d(boxes, pose_correction)
             new_datum['bounding_box_3d'] = boxes
 
-        if 'bounding_box_2d' in new_datum:
+        if 'bounding_box_2d' in new_datum and new_datum['bounding_box_2d'] is not None:
             boxes = new_datum['bounding_box_2d']
             boxes = self.transform_detections_2d(boxes, )
             new_datum['bounding_box_2d'] = boxes

diff --git a/dgp/annotations/transforms.py b/dgp/annotations/transforms.py
@@ -1,5 +1,8 @@
-# Copyright 2021 Toyota Research Institute.  All rights reserved.
+# Copyright 2021-2022 Woven Planet. All rights reserved.
 from collections import OrderedDict
+from typing import Any, Dict
+
+import numpy as np
 
 from dgp.annotations import ONTOLOGY_REGISTRY
 from dgp.annotations.transform_utils import (
@@ -8,6 +11,7 @@
     remap_instance_segmentation_2d_annotation,
     remap_semantic_segmentation_2d_annotation,
 )
+from dgp.utils.accumulate import points_in_cuboid
 
 
 class Compose:
@@ -198,3 +202,53 @@ def transform_datum(self, datum):
                 )
 
         return datum
+
+
+class AddLidarCuboidPoints(BaseTransform):
+    """Populate the num_points field for bounding_box_3d"""
+    def __init__(self, subsample: int = 1) -> None:
+        """Populate the num_points field for bounding_box_3d. Optionally downsamples the point cloud for speed.
+
+        Parameters
+        ----------
+        subsample: int, default: 1
+            Fraction of point cloud to use for computing the number of points. i.e., subsample=10 indicates that
+            1/10th of the points should be used.
+        """
+        super().__init__()
+        self.subsample = subsample
+
+    def transform_datum(self, datum: Dict[str, Any]) -> Dict[str, Any]:
+        """Populate the num_points field for bounding_box_3d
+        Parameters
+        ----------
+        datum: Dict[str,Any]
+            A dgp lidar or point cloud datum. Must contain keys bounding_box_3d and point_cloud
+
+        Returns
+        -------
+        datum: Dict[str,Any]
+            The datum with num_points added to the cuboids
+        """
+        if 'bounding_box_3d' not in datum:
+            return datum
+
+        boxes = datum['bounding_box_3d']
+        if boxes is None or len(boxes) == 0:
+            return datum
+
+        assert 'point_cloud' in datum, 'datum should contain point_cloud key'
+        point_cloud = datum['point_cloud']
+        if self.subsample > 1:
+            N = point_cloud.shape[0]
+            sample_idx = np.random.choice(N, N // self.subsample)
+            point_cloud = point_cloud[sample_idx].copy()
+
+        for box in boxes:
+            # If a box is missing this num_points value we expect it will have a value of 0
+            # so only run this for boxes that might be missing the value
+            if box.num_points == 0:
+                in_cuboid = points_in_cuboid(point_cloud, box)
+                box._num_points = np.sum(in_cuboid) * self.subsample
+
+        return datum
diff --git a/dgp/contribs/dgp2wicker/.dockerignore b/dgp/contribs/dgp2wicker/.dockerignore
@@ -0,0 +1,38 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# macOS files
+.DS_Store
+
+# dotenv
+.env
+
+# virtualenv
+.venv
+venv/
+ENV/
+
+# IDE
+.idea/
+\.vscode/
diff --git a/dgp/contribs/dgp2wicker/Dockerfile b/dgp/contribs/dgp2wicker/Dockerfile
@@ -0,0 +1,12 @@
+# Copyright 2022 Woven Planet. All rights reserved.
+FROM dgp:latest
+
+RUN apt-get update && apt-get install -y --allow-downgrades --allow-change-held-packages --no-install-recommends \
+  openjdk-11-jre-headless
+
+ARG WORKSPACE=/home/dgp2wicker
+WORKDIR ${WORKSPACE}
+COPY . ${WORKSPACE}
+COPY sample_wickerconfig.json /root/wickerconfig.json
+RUN pip install --editable .
+ENV PYTHONPATH="${WORKSPACE}:$PYTHONPATH"
diff --git a/dgp/contribs/dgp2wicker/Makefile b/dgp/contribs/dgp2wicker/Makefile
@@ -0,0 +1,44 @@
+# Copyright 2022 Woven Planet. All rights reserved.
+PYTHON ?= python3
+PACKAGE_NAME ?= dgp2wicker
+WORKSPACE ?= /home/$(PACKAGE_NAME)
+DOCKER_IMAGE_NAME ?= $(PACKAGE_NAME)
+DOCKER_IMAGE ?= $(DOCKER_IMAGE_NAME):latest
+DOCKER_OPTS ?= \
+	-it \
+	--rm \
+	--shm-size=62G \
+	-e AWS_DEFAULT_REGION \
+	-e AWS_ACCESS_KEY_ID \
+	-e AWS_SECRET_ACCESS_KEY \
+	-e AWS_SESSION_TOKEN\
+	-e AWS_PROFILE \
+	-e VAULT_ASSUMED_ROLE \
+	-e WICKER_CONFIG_PATH \
+	-e DISPLAY=${DISPLAY} \
+	-v $(PWD):$(WORKSPACE)
+
+develop:
+	pip install --editable .
+
+clean:
+	$(PYTHON) setup.py clean && \
+	rm -rf build dist && \
+	find . -name "*.pyc" | xargs rm -f && \
+	find . -name "__pycache__" | xargs rm -rf
+	find . -name "*egg-info" | xargs rm -rf
+
+docker-build:
+	docker build \
+	--build-arg WORKSPACE=$(WORKSPACE) \
+	-t $(DOCKER_IMAGE) .
+
+docker-run:
+	docker run \
+	--name $(PACKAGE_NAME) \
+	$(DOCKER_OPTS) $(DOCKER_IMAGE) $(COMMAND)
+
+docker-start-interactive:
+	docker run \
+	$(DOCKER_OPTS) \
+	$(DOCKER_IMAGE) bash
diff --git a/dgp/contribs/dgp2wicker/README.md b/dgp/contribs/dgp2wicker/README.md
@@ -0,0 +1,79 @@
+# DGP SynchronizedScene to Wicker Conversion
+
+This adds support for using DGP data in
+[wicker](https://github.com/woven-planet/wicker)
+
+Specifically this saves the output of SynchronizedScene to wicker
+
+---
+
+### Install
+
+```bash
+cd dgp/contribs/dgp2wicker
+pip install --editable .
+```
+
+or, use the included docker. Note: the s3 location of the wicker datasets is
+specified in a required wicker config file, please see Wicker documentaiton for
+more details. An example sample_wickerconfig.json is included in the docker,
+this can be modified with the s3 bucket path and will work with the docker.
+
+```bash
+cd dgp/contribs/dgp2wicker
+make docker-build
+```
+
+### Example
+
+#### Save dataset to wicker
+
+```bash
+$dgp2wicker ingest \
+--scene-dataset-json <path to scene dataset json in s3 or local> \
+--wicker-dataset-name test_dataset \
+--wicker-dataset-version 0.0.1 \
+--datum-names camera_01,camera_02,lidar \
+--requested-annotations bounding_box_3d,semantic_segmentation_2d \
+--only-annotated-datums
+```
+
+#### Read dataset from wicker
+
+```python
+from dgp2wicker.dataset import DGPS3Dataset, compute_columns
+
+columns = compute_columns(datum_names = ['camera_01','camera_02','lidar',],\
+                          datum_types = ['image','image','point_cloud',], \
+                          requested_annotations=['bounding_box_3d','semantic_segmentation_2d','depth',], \
+                          cuboid_datum = 'lidar',)
+
+dataset = DGPS3Dataset(dataset_name = 'test_dataset',\
+                       dataset_version = '0.0.1', \
+                       dataset_partition_name='train', \
+                       columns_to_load = columns,)
+
+context = dataset[0]
+```
+
+---
+
+### Supported datums/annotations
+
+datums:
+
+- [x] image
+- [x] point_cloud
+- [ ] radar_point_cloud
+- [ ] file_datum
+- [ ] agent
+
+annotations:
+
+- [x] bounding_box_2d
+- [x] bounding_box_3d
+- [x] depth
+- [x] semantic_segmentation_2d
+- [x] instance_segmentation_2d
+- [ ] key_point_2d
+- [ ] key_line_2d
diff --git a/dgp/contribs/dgp2wicker/dgp2wicker/__init__.py b/dgp/contribs/dgp2wicker/dgp2wicker/__init__.py
@@ -0,0 +1,3 @@
+# Copyright 2022 Woven Planet.  All rights reserved.
+
+__version__ = '1.0.0'
diff --git a/dgp/contribs/dgp2wicker/dgp2wicker/cli.py b/dgp/contribs/dgp2wicker/dgp2wicker/cli.py
@@ -0,0 +1,122 @@
+#!/usr/bin/env python
+# Copyright 2022 Woven Planet NA. All rights reserved.
+"""dgp2wicker command line interface
+"""
+import logging
+import os
+import sys
+from functools import partial
+from typing import Any, Dict, List
+
+import click
+from dgp2wicker.ingest import ingest_dgp_to_wicker
+
+from dgp.annotations.camera_transforms import ScaleAffineTransform
+from dgp.annotations.transforms import AddLidarCuboidPoints
+
+
+class AddLidarCuboidPointsContext(AddLidarCuboidPoints):
+    """Add Lidar Points but applied to samples not datums"""
+    def __call__(self, sample: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        new_sample = []
+        for datum in sample:
+            if datum['datum_type'] == 'point_cloud' and 'bounding_box_3d' in datum:
+                if datum['bounding_box_3d'] is not None:
+                    datum = super().__call__(datum)
+            new_sample.append(datum)
+        return new_sample
+
+
+class ScaleImages(ScaleAffineTransform):
+    """Scale Transform but applied to samples not datums"""
+    def __call__(self, sample: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        new_sample = []
+        for datum in sample:
+            if datum['datum_type'] == 'image' and 'rgb' in datum:
+                datum = super().__call__(datum)
+            new_sample.append(datum)
+        return new_sample
+
+
+@click.group()
+@click.version_option()
+def cli():
+    logging.getLogger('dgp2widker').setLevel(level=logging.INFO)
+    logging.getLogger('py4j').setLevel(level=logging.CRITICAL)
+    logging.getLogger('botocore').setLevel(logging.CRITICAL)
+    logging.getLogger('boto3').setLevel(logging.CRITICAL)
+    logging.getLogger('PIL').setLevel(logging.CRITICAL)
+
+
+@cli.command(name='ingest')
+@click.option("--scene-dataset-json", required=True, help="Path to DGP Dataset JSON")
+@click.option("--wicker-dataset-name", required=True, default=None, help="Name of dataset in Wicker")
+@click.option("--wicker-dataset-version", required=True, help="Version of dataset in Wicker")
+@click.option("--datum-names", required=True, help="List of datum names")
+@click.option("--requested-annotations", help="List of annotation types")
+@click.option("--only-annotated-datums", is_flag=True, help="Apply only annotated datums")
+@click.option("--max-num-scenes", required=False, default=None, help="The maximum number of scenes to process")
+@click.option("--max-len", required=False, default=1000, help="The maximum number of samples per scene")
+@click.option("--chunk-size", required=False, default=1000, help="The number of samples per chunk")
+@click.option("--skip-camera-cuboids", is_flag=True, help="If True, skip cuboids for non lidar datums")
+@click.option("--num-partitions", required=False, default=None, help="Number of scene partitions")
+@click.option("--num-repartitions", required=False, default=None, help="Number of sample partitions")
+@click.option("--is-pd", is_flag=True, help="If true, process the dataset with ParallelDomainScene")
+@click.option("--data-uri", required=False, default=None, help="Alternate location for scene data")
+@click.option("--add-lidar-points", is_flag=True, help="Add lidar point count to lidar cuboids")
+@click.option("--half-size-images", is_flag=True, help="Resize image datums to half size")
+def ingest(
+    scene_dataset_json,
+    wicker_dataset_name,
+    wicker_dataset_version,
+    datum_names,
+    requested_annotations,
+    only_annotated_datums,
+    max_num_scenes,
+    max_len,
+    chunk_size,
+    skip_camera_cuboids,
+    num_partitions,
+    num_repartitions,
+    is_pd,
+    data_uri,
+    add_lidar_points,
+    half_size_images,
+):
+    datum_names = [x.strip() for x in datum_names.split(',')]
+    requested_annotations = [x.strip() for x in requested_annotations.split(',')] if requested_annotations else None
+    dataset_kwargs = {
+        'datum_names': datum_names,
+        'requested_annotations': requested_annotations,
+        'only_annotated_datums': only_annotated_datums,
+    }
+
+    pipeline = []
+    if add_lidar_points:
+        pipeline.append(AddLidarCuboidPointsContext())
+    if half_size_images:
+        pipeline.append(ScaleImages(s=.5))
+
+    results = ingest_dgp_to_wicker(
+        scene_dataset_json=scene_dataset_json,
+        wicker_dataset_name=wicker_dataset_name,
+        wicker_dataset_version=wicker_dataset_version,
+        dataset_kwargs=dataset_kwargs,
+        spark_context=None,
+        pipeline=pipeline,
+        max_num_scenes=max_num_scenes,
+        max_len=max_len,
+        chunk_size=chunk_size,
+        skip_camera_cuboids=skip_camera_cuboids,
+        num_partitions=num_partitions,
+        num_repartitions=num_repartitions,
+        is_pd=is_pd,
+        data_uri=data_uri,
+    )
+
+    print('Finished ingest!')
+    print(results)
+
+
+if __name__ == '__main__':
+    cli()
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		# Copyright 2022 Woven Planet. All rights reserved.

		__version__ = '1.0.0'