Skip to content

Commit

Permalink
feat: add dgp2wicker conversion (#132)
Browse files Browse the repository at this point in the history
  • Loading branch information
chrisochoatri authored Oct 17, 2022
1 parent 0386a84 commit fa3aaa2
Show file tree
Hide file tree
Showing 16 changed files with 1,800 additions and 3 deletions.
4 changes: 2 additions & 2 deletions dgp/annotations/camera_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -633,15 +633,15 @@ def transform_datum(self, cam_datum: Dict[str, Any]) -> Dict[str, Any]: # pylin
rgb_mask = self.transform_mask_2d(rgb_mask)
new_datum['rgb_mask'] = rgb_mask

if 'bounding_box_3d' in new_datum:
if 'bounding_box_3d' in new_datum and new_datum['bounding_box_3d'] is not None:
# Note: DGP camera class does not model the full camera matrix just focal length and center
# if using DGP camera class, do not use transformations that add a skew!
boxes = new_datum['bounding_box_3d']
pose_correction = new_datum['extrinsics'].inverse() * cam_datum['extrinsics']
boxes = self.transform_detections_3d(boxes, pose_correction)
new_datum['bounding_box_3d'] = boxes

if 'bounding_box_2d' in new_datum:
if 'bounding_box_2d' in new_datum and new_datum['bounding_box_2d'] is not None:
boxes = new_datum['bounding_box_2d']
boxes = self.transform_detections_2d(boxes, )
new_datum['bounding_box_2d'] = boxes
Expand Down
56 changes: 55 additions & 1 deletion dgp/annotations/transforms.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# Copyright 2021 Toyota Research Institute. All rights reserved.
# Copyright 2021-2022 Woven Planet. All rights reserved.
from collections import OrderedDict
from typing import Any, Dict

import numpy as np

from dgp.annotations import ONTOLOGY_REGISTRY
from dgp.annotations.transform_utils import (
Expand All @@ -8,6 +11,7 @@
remap_instance_segmentation_2d_annotation,
remap_semantic_segmentation_2d_annotation,
)
from dgp.utils.accumulate import points_in_cuboid


class Compose:
Expand Down Expand Up @@ -198,3 +202,53 @@ def transform_datum(self, datum):
)

return datum


class AddLidarCuboidPoints(BaseTransform):
"""Populate the num_points field for bounding_box_3d"""
def __init__(self, subsample: int = 1) -> None:
"""Populate the num_points field for bounding_box_3d. Optionally downsamples the point cloud for speed.
Parameters
----------
subsample: int, default: 1
Fraction of point cloud to use for computing the number of points. i.e., subsample=10 indicates that
1/10th of the points should be used.
"""
super().__init__()
self.subsample = subsample

def transform_datum(self, datum: Dict[str, Any]) -> Dict[str, Any]:
"""Populate the num_points field for bounding_box_3d
Parameters
----------
datum: Dict[str,Any]
A dgp lidar or point cloud datum. Must contain keys bounding_box_3d and point_cloud
Returns
-------
datum: Dict[str,Any]
The datum with num_points added to the cuboids
"""
if 'bounding_box_3d' not in datum:
return datum

boxes = datum['bounding_box_3d']
if boxes is None or len(boxes) == 0:
return datum

assert 'point_cloud' in datum, 'datum should contain point_cloud key'
point_cloud = datum['point_cloud']
if self.subsample > 1:
N = point_cloud.shape[0]
sample_idx = np.random.choice(N, N // self.subsample)
point_cloud = point_cloud[sample_idx].copy()

for box in boxes:
# If a box is missing this num_points value we expect it will have a value of 0
# so only run this for boxes that might be missing the value
if box.num_points == 0:
in_cuboid = points_in_cuboid(point_cloud, box)
box._num_points = np.sum(in_cuboid) * self.subsample

return datum
38 changes: 38 additions & 0 deletions dgp/contribs/dgp2wicker/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg

# macOS files
.DS_Store

# dotenv
.env

# virtualenv
.venv
venv/
ENV/

# IDE
.idea/
\.vscode/
12 changes: 12 additions & 0 deletions dgp/contribs/dgp2wicker/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Copyright 2022 Woven Planet. All rights reserved.
FROM dgp:latest

RUN apt-get update && apt-get install -y --allow-downgrades --allow-change-held-packages --no-install-recommends \
openjdk-11-jre-headless

ARG WORKSPACE=/home/dgp2wicker
WORKDIR ${WORKSPACE}
COPY . ${WORKSPACE}
COPY sample_wickerconfig.json /root/wickerconfig.json
RUN pip install --editable .
ENV PYTHONPATH="${WORKSPACE}:$PYTHONPATH"
44 changes: 44 additions & 0 deletions dgp/contribs/dgp2wicker/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# Copyright 2022 Woven Planet. All rights reserved.
PYTHON ?= python3
PACKAGE_NAME ?= dgp2wicker
WORKSPACE ?= /home/$(PACKAGE_NAME)
DOCKER_IMAGE_NAME ?= $(PACKAGE_NAME)
DOCKER_IMAGE ?= $(DOCKER_IMAGE_NAME):latest
DOCKER_OPTS ?= \
-it \
--rm \
--shm-size=62G \
-e AWS_DEFAULT_REGION \
-e AWS_ACCESS_KEY_ID \
-e AWS_SECRET_ACCESS_KEY \
-e AWS_SESSION_TOKEN\
-e AWS_PROFILE \
-e VAULT_ASSUMED_ROLE \
-e WICKER_CONFIG_PATH \
-e DISPLAY=${DISPLAY} \
-v $(PWD):$(WORKSPACE)

develop:
pip install --editable .

clean:
$(PYTHON) setup.py clean && \
rm -rf build dist && \
find . -name "*.pyc" | xargs rm -f && \
find . -name "__pycache__" | xargs rm -rf
find . -name "*egg-info" | xargs rm -rf

docker-build:
docker build \
--build-arg WORKSPACE=$(WORKSPACE) \
-t $(DOCKER_IMAGE) .

docker-run:
docker run \
--name $(PACKAGE_NAME) \
$(DOCKER_OPTS) $(DOCKER_IMAGE) $(COMMAND)

docker-start-interactive:
docker run \
$(DOCKER_OPTS) \
$(DOCKER_IMAGE) bash
79 changes: 79 additions & 0 deletions dgp/contribs/dgp2wicker/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# DGP SynchronizedScene to Wicker Conversion

This adds support for using DGP data in
[wicker](https://github.com/woven-planet/wicker)

Specifically this saves the output of SynchronizedScene to wicker

---

### Install

```bash
cd dgp/contribs/dgp2wicker
pip install --editable .
```

or, use the included docker. Note: the s3 location of the wicker datasets is
specified in a required wicker config file, please see Wicker documentaiton for
more details. An example sample_wickerconfig.json is included in the docker,
this can be modified with the s3 bucket path and will work with the docker.

```bash
cd dgp/contribs/dgp2wicker
make docker-build
```

### Example

#### Save dataset to wicker

```bash
$dgp2wicker ingest \
--scene-dataset-json <path to scene dataset json in s3 or local> \
--wicker-dataset-name test_dataset \
--wicker-dataset-version 0.0.1 \
--datum-names camera_01,camera_02,lidar \
--requested-annotations bounding_box_3d,semantic_segmentation_2d \
--only-annotated-datums
```

#### Read dataset from wicker

```python
from dgp2wicker.dataset import DGPS3Dataset, compute_columns

columns = compute_columns(datum_names = ['camera_01','camera_02','lidar',],\
datum_types = ['image','image','point_cloud',], \
requested_annotations=['bounding_box_3d','semantic_segmentation_2d','depth',], \
cuboid_datum = 'lidar',)

dataset = DGPS3Dataset(dataset_name = 'test_dataset',\
dataset_version = '0.0.1', \
dataset_partition_name='train', \
columns_to_load = columns,)

context = dataset[0]
```

---

### Supported datums/annotations

datums:

- [x] image
- [x] point_cloud
- [ ] radar_point_cloud
- [ ] file_datum
- [ ] agent

annotations:

- [x] bounding_box_2d
- [x] bounding_box_3d
- [x] depth
- [x] semantic_segmentation_2d
- [x] instance_segmentation_2d
- [ ] key_point_2d
- [ ] key_line_2d
3 changes: 3 additions & 0 deletions dgp/contribs/dgp2wicker/dgp2wicker/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Copyright 2022 Woven Planet. All rights reserved.

__version__ = '1.0.0'
122 changes: 122 additions & 0 deletions dgp/contribs/dgp2wicker/dgp2wicker/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
#!/usr/bin/env python
# Copyright 2022 Woven Planet NA. All rights reserved.
"""dgp2wicker command line interface
"""
import logging
import os
import sys
from functools import partial
from typing import Any, Dict, List

import click
from dgp2wicker.ingest import ingest_dgp_to_wicker

from dgp.annotations.camera_transforms import ScaleAffineTransform
from dgp.annotations.transforms import AddLidarCuboidPoints


class AddLidarCuboidPointsContext(AddLidarCuboidPoints):
"""Add Lidar Points but applied to samples not datums"""
def __call__(self, sample: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
new_sample = []
for datum in sample:
if datum['datum_type'] == 'point_cloud' and 'bounding_box_3d' in datum:
if datum['bounding_box_3d'] is not None:
datum = super().__call__(datum)
new_sample.append(datum)
return new_sample


class ScaleImages(ScaleAffineTransform):
"""Scale Transform but applied to samples not datums"""
def __call__(self, sample: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
new_sample = []
for datum in sample:
if datum['datum_type'] == 'image' and 'rgb' in datum:
datum = super().__call__(datum)
new_sample.append(datum)
return new_sample


@click.group()
@click.version_option()
def cli():
logging.getLogger('dgp2widker').setLevel(level=logging.INFO)
logging.getLogger('py4j').setLevel(level=logging.CRITICAL)
logging.getLogger('botocore').setLevel(logging.CRITICAL)
logging.getLogger('boto3').setLevel(logging.CRITICAL)
logging.getLogger('PIL').setLevel(logging.CRITICAL)


@cli.command(name='ingest')
@click.option("--scene-dataset-json", required=True, help="Path to DGP Dataset JSON")
@click.option("--wicker-dataset-name", required=True, default=None, help="Name of dataset in Wicker")
@click.option("--wicker-dataset-version", required=True, help="Version of dataset in Wicker")
@click.option("--datum-names", required=True, help="List of datum names")
@click.option("--requested-annotations", help="List of annotation types")
@click.option("--only-annotated-datums", is_flag=True, help="Apply only annotated datums")
@click.option("--max-num-scenes", required=False, default=None, help="The maximum number of scenes to process")
@click.option("--max-len", required=False, default=1000, help="The maximum number of samples per scene")
@click.option("--chunk-size", required=False, default=1000, help="The number of samples per chunk")
@click.option("--skip-camera-cuboids", is_flag=True, help="If True, skip cuboids for non lidar datums")
@click.option("--num-partitions", required=False, default=None, help="Number of scene partitions")
@click.option("--num-repartitions", required=False, default=None, help="Number of sample partitions")
@click.option("--is-pd", is_flag=True, help="If true, process the dataset with ParallelDomainScene")
@click.option("--data-uri", required=False, default=None, help="Alternate location for scene data")
@click.option("--add-lidar-points", is_flag=True, help="Add lidar point count to lidar cuboids")
@click.option("--half-size-images", is_flag=True, help="Resize image datums to half size")
def ingest(
scene_dataset_json,
wicker_dataset_name,
wicker_dataset_version,
datum_names,
requested_annotations,
only_annotated_datums,
max_num_scenes,
max_len,
chunk_size,
skip_camera_cuboids,
num_partitions,
num_repartitions,
is_pd,
data_uri,
add_lidar_points,
half_size_images,
):
datum_names = [x.strip() for x in datum_names.split(',')]
requested_annotations = [x.strip() for x in requested_annotations.split(',')] if requested_annotations else None
dataset_kwargs = {
'datum_names': datum_names,
'requested_annotations': requested_annotations,
'only_annotated_datums': only_annotated_datums,
}

pipeline = []
if add_lidar_points:
pipeline.append(AddLidarCuboidPointsContext())
if half_size_images:
pipeline.append(ScaleImages(s=.5))

results = ingest_dgp_to_wicker(
scene_dataset_json=scene_dataset_json,
wicker_dataset_name=wicker_dataset_name,
wicker_dataset_version=wicker_dataset_version,
dataset_kwargs=dataset_kwargs,
spark_context=None,
pipeline=pipeline,
max_num_scenes=max_num_scenes,
max_len=max_len,
chunk_size=chunk_size,
skip_camera_cuboids=skip_camera_cuboids,
num_partitions=num_partitions,
num_repartitions=num_repartitions,
is_pd=is_pd,
data_uri=data_uri,
)

print('Finished ingest!')
print(results)


if __name__ == '__main__':
cli()
Loading

0 comments on commit fa3aaa2

Please sign in to comment.