diff --git a/dgp/utils/render_3d_to_2d.py b/dgp/utils/render_3d_to_2d.py new file mode 100644 index 00000000..acbb3675 --- /dev/null +++ b/dgp/utils/render_3d_to_2d.py @@ -0,0 +1,430 @@ +# Copyright 2023 Toyota Motor Corporation. All rights reserved. +import logging +import os +import time +from collections import defaultdict +from typing import Any, Optional, Tuple + +import numpy as np +from matplotlib import pyplot as plt + +from dgp.datasets.synchronized_dataset import ( + SynchronizedScene, + SynchronizedSceneDataset, +) +from dgp.utils.camera import Camera +from dgp.utils.structures.bounding_box_3d import BoundingBox3D + +ANNOTATIONS_3D = "bounding_box_3d" +ANNOTATIONS_2D = "bounding_box_2d" + + +def render_bounding_box_3d_to_2d(bbox_3d: BoundingBox3D, camera: Camera) -> np.ndarray: + """Render the bounding box from 3d to 2d to get the centroid. + Parameters + ---------- + bbox_3d: BoundingBox3D + 3D bounding box (cuboid) that is centered at `pose` with extent `sizes. + camera: dgp.utils.camera.Camera + Camera used to render the bounding box. + Returns + ---------- + centroid: np.ndarray + Centroid in image plane. + Raises + ------ + TypeError + Raised if camera is not an instance of Camera. + """ + if not isinstance(camera, Camera): + raise TypeError("`camera` should be of type Camera") + if (bbox_3d.corners[:, 2] <= 0).any(): + return None + # Get the centroid in image plane. + return camera.project(np.vstack([bbox_3d.pose.tvec, bbox_3d.pose.tvec, bbox_3d.pose.tvec])).astype(np.int32) + + +def render_bounding_boxes_3d_of_lidars( + dataset: SynchronizedSceneDataset, + camera_datum_names: Optional[list[str]] = None, + lidar_datum_names: Optional[list[str]] = None, + max_num_items: Optional[int] = None, +) -> defaultdict[defaultdict[list]]: + """Load and project 3D bounding boxes to 2d image with given dataset, camera_datum_name and lidar_datum_names. + Parameters + ---------- + dataset: SynchronizedSceneDataset + A DGP dataset. + camera_datum_names: Optional[list[str]] + List of camera names. + If None, use all the cameras available in the DGP dataset. + lidar_datum_names: Optional[list[str]] + List of lidar names. + If None, use all the lidars available in the DGP dataset. + max_num_items: Optional[int] + If not None, then show only up to this number of items. This is useful for debugging a large dataset. + Default: None. + Returns + ------- + bbox_2d_from_3d: defaultdict[defaultdict[list]] + a dictionary with key is the camera name, value is a dictionary whose key is class_name of bounding_box_3d, + value is list of (bbox_3d, centroid_2d) + """ + ontology = dataset.dataset_metadata.ontology_table.get(ANNOTATIONS_3D, None) + id_to_name = ontology.contiguous_id_to_name if ontology else dict() + + if camera_datum_names is None: + camera_datum_names = sorted(dataset.list_datum_names_available_in_all_scenes(datum_type="image")) + if lidar_datum_names is None: + lidar_datum_names = sorted(dataset.list_datum_names_available_in_all_scenes(datum_type="point_cloud")) + + bbox_2d_from_3d = defaultdict(lambda: defaultdict(list)) + + st = time.time() + logging_span = 200 + for idx, context in enumerate(dataset): + # no temporal context + context = context[0] + if idx == max_num_items: + break + if idx % logging_span == 0: + logging.info(f"2D:Frame {idx + 1} of {len(dataset)} in {time.time() - st:.2f}s.") + + context = {datum["datum_name"]: datum for datum in context} + camera_datums = [(camera_datum_name, context[camera_datum_name]) for camera_datum_name in camera_datum_names] + + for camera_name, camera_datum in camera_datums: + # Render 3D bboxes + if ANNOTATIONS_3D in camera_datum: + for bbox_3d in camera_datum[ANNOTATIONS_3D]: + class_name = id_to_name[bbox_3d.class_id] + center_2d = render_bounding_box_3d_to_2d(bbox_3d, Camera(K=camera_datum["intrinsics"])) + bbox_2d_from_3d[camera_name][class_name].append((bbox_3d, center_2d)) + return bbox_2d_from_3d + + +def render_bounding_boxes_2d_of_cameras( + dataset: SynchronizedSceneDataset, + camera_datum_names: Optional[list[str]] = None, + max_num_items: Optional[int] = None, +) -> defaultdict[defaultdict[list]]: + """Load 2d bounding boxes with given dataset, camera_datum_name. + Parameters + ---------- + dataset: SynchronizedSceneDataset + A DGP dataset. + camera_datum_names: Optional[list[str]] + List of camera names. + If None, use all the cameras available in the DGP dataset. + max_num_items: Optional[int] + If not None, then show only up to this number of items. This is useful for debugging a large dataset. + Default: None. + Returns + ------- + bboxes_2d: defaultdict[defaultdict[list]] + a dictionary with key is the camera name, value is a dictionary whose key is class_name of bounding_box_2d, + value is list of boxes as (N, 4) np.ndarray in format ([left, top, width, height]) + """ + bboxes_2d = defaultdict(lambda: defaultdict(list)) + if len(dataset): + if max_num_items is not None: + if max_num_items > len(dataset): + logging.info( + "`max_num_items` is reduced to the dataset size, from {:d} to {:d}".format( + max_num_items, len(dataset) + ) + ) + max_num_items = len(dataset) + + ontology_table = dataset.dataset_metadata.ontology_table + + if ANNOTATIONS_2D in ontology_table: + ontology = ontology_table[ANNOTATIONS_2D] + + if camera_datum_names is None: + camera_datum_names = sorted(dataset.list_datum_names_available_in_all_scenes(datum_type="image")) + + st = time.time() + logging_span = 200 + for idx, datums in enumerate(dataset): + # no temporal context + datums = datums[0] + if idx == max_num_items: + break + if idx % logging_span == 0: + logging.info(f"2D:Frame {idx + 1} of {len(dataset)} in {time.time() - st:.2f}s.") + datums = {datum["datum_name"]: datum for datum in datums} + camera_datums = [(camera_datum_name, datums[camera_datum_name]) for camera_datum_name in camera_datum_names] + # Visualize bounding box 2d + if ANNOTATIONS_2D in dataset.requested_annotations: + for camera_datum_name, camera_datum in camera_datums: + for bbox in camera_datum[ANNOTATIONS_2D]: + bboxes_2d[camera_datum_name][ontology.contiguous_id_to_name[bbox.class_id]].append(bbox.ltwh) + return bboxes_2d + + +def associate_lidar_and_camera_2d_bboxes( + bboxes_from_camera: list[np.ndarray], bboxes_from_lidar: list[Tuple[np.ndarray, np.ndarray]] +) -> list[Tuple[Any, Any, Any]]: + """Associate 3d bounding boxes and 2d bounding boxes to the same object by checking + whether the projected centroid of 3d bounding box is inside the 2d bounding box or not. + Limitation: + 1. Several 3d objects could project to the same place in an image. + 2. The 2d convex hull of a 3d projection will typically not be contained inside the tight axis aligned 2d box. + 3. One single large 2d box will lead to everything being associated. If ego is near a truck or a bus which fills + the view, this won't work anymore. + Future work: + To be more robust, ideas such as measure of 2d-3d similarity and doing a bipartite matching is suggested. + Parameters + ---------- + bboxes_from_camera: list[np.ndarray] + A list of 2d bounding boxes. + bboxes_from_lidar: list[Tuple[np.ndarray, np.ndarray]] + A list of Tuple (centroid in 2d image, 3d bounding box). + Returns + ------- + associated: list[Tuple[bounding_box_2d, bounding_box_3d, centroid_2d]] + a dictionary with key is the camera name, value is a dictionary whose key is class_name of bounding_box_2d, + value is list of boxes as (N, 4) np.ndarray in format ([left, top, width, height]) + """ + associated = [] + if bboxes_from_camera and bboxes_from_lidar: + for bbox_camera in bboxes_from_camera: + l, t, w, h = bbox_camera + for bbox_lidar, bbox_centroid_2d in bboxes_from_lidar: + if bbox_centroid_2d is None: + continue + bbox_centroid_x, bbox_centroid_y = bbox_centroid_2d[0] + if ( + bbox_centroid_x >= l and bbox_centroid_x < l + w and bbox_centroid_y >= t + and bbox_centroid_y < t + h + ): + associated.append((bbox_camera, bbox_lidar, bbox_centroid_2d[0])) + break + return associated + + +def associate_3d_and_2d_annotations( + dataset: SynchronizedSceneDataset, + ontology_name_mapper: dict, + camera_datum_names: Optional[list[str]], + lidar_datum_names: Optional[list[str]], + max_num_items: Optional[int], +) -> defaultdict[defaultdict[list[Tuple]]]: + """Associate 3d bounding boxes and 2d bounding boxes to the same object with given dataset. + Parameters + ---------- + dataset: SynchronizedSceneDataset + A DGP dataset. + ontology_name_mapper: dict + Map the class names from bounding_box_2d to bounding_box_3d if the class names are different. + eg: {'Pedestrian': 'Person','Car': 'Car'}. + camera_datum_names: Optional[list[str]] + List of camera names. + If None, use all the cameras available in the DGP dataset. + lidar_datum_names: Optional[list[str]] + List of lidar names. + If None, use all the lidars available in the DGP dataset. + max_num_items: Optional[int] + If not None, then show only up to this number of items. This is useful for debugging a large dataset. + Default: None. + Returns + ------- + associated_bboxes: defaultdict[defaultdict[list[Tuple]]] + a dictionary with key is the camera name, value is a dictionary whose key is class_name of bounding_box_2d, + value is list of Tuple [bounding_box_2d, bounding_box_3d, centroid_2d] + """ + bboxes_2d_from_lidars = render_bounding_boxes_3d_of_lidars( + dataset, camera_datum_names, lidar_datum_names, max_num_items + ) + bboxes_2d_from_cameras = render_bounding_boxes_2d_of_cameras(dataset, camera_datum_names, max_num_items) + associated_bboxes = defaultdict(lambda: defaultdict(list)) + for camera_name in camera_datum_names: + bboxes_2d_from_lidar = bboxes_2d_from_lidars[camera_name] + bboxes_2d_from_camera = bboxes_2d_from_cameras[camera_name] + for name_2d, name_3d in ontology_name_mapper.items(): + logging.info("{}: Associate {} to {}".format(camera_name, name_2d, name_3d)) + bboxes_from_lidar = None + if name_3d in bboxes_2d_from_lidar: + bboxes_from_lidar = bboxes_2d_from_lidar[name_3d] + bboxes_from_camera = None + if name_2d in bboxes_2d_from_camera: + bboxes_from_camera = bboxes_2d_from_camera[name_2d] + associated_bboxes[camera_name][name_2d] = associate_lidar_and_camera_2d_bboxes( + bboxes_from_camera, bboxes_from_lidar + ) + return associated_bboxes + + +def associate_3d_and_2d_annotations_scene( + scene_json: str, + ontology_name_mapper: dict, + camera_datum_names: Optional[list[str]], + lidar_datum_names: Optional[list[str]], + max_num_items: Optional[int], +) -> defaultdict[defaultdict[list[Tuple]]]: + """Associate 3d bounding boxes and 2d bounding boxes to the same object with given scene. + Parameters + ---------- + scene_json: str + Full path to the scene json. + ontology_name_mapper: dict + Map the class names from bounding_box_2d to bounding_box_3d if the class names are different. + eg: {'Pedestrian': 'Person','Car': 'Car'}. + camera_datum_names: Optional[list[str]] + List of camera names. + If None, use all the cameras available in the DGP dataset. + lidar_datum_names: Optional[list[str]] + List of lidar names. + If None, use all the lidars available in the DGP dataset. + max_num_items: Optional[int] + If not None, then show only up to this number of items. This is useful for debugging a large dataset. + Default: None. + Returns + ------- + associated_bboxes: defaultdict[defaultdict[list[Tuple]]] + a dictionary with key is the camera name, value is a dictionary whose key is class_name of bounding_box_2d, + value is list of Tuple [bounding_box_2d, bounding_box_3d, centroid_2d] + """ + datum_names = camera_datum_names + lidar_datum_names + dataset = SynchronizedScene( + scene_json, + datum_names=datum_names, + requested_annotations=[ANNOTATIONS_2D, ANNOTATIONS_3D], + only_annotated_datums=True, + ) + return associate_3d_and_2d_annotations( + dataset, ontology_name_mapper, camera_datum_names, lidar_datum_names, max_num_items + ) + + +def associate_3d_and_2d_annotations_dataset( + scenes_dataset_json: str, + ontology_name_mapper: dict, + camera_datum_names: Optional[list[str]], + lidar_datum_names: Optional[list[str]], + max_num_items: Optional[int], +) -> defaultdict[defaultdict[list[Tuple]]]: + """Associate 3d bounding boxes and 2d bounding boxes to the same object with given DGP dataset. + Parameters + ---------- + scenes_dataset_json: str + Full path to the dataset scene json. + ontology_name_mapper: dict + Map the class names from bounding_box_2d to bounding_box_3d if the class names are different. + eg: {'Pedestrian': 'Person','Car': 'Car'}. + camera_datum_names: Optional[list[str]] + List of camera names. + If None, use all the cameras available in the DGP dataset. + lidar_datum_names: Optional[list[str]] + List of lidar names. + If None, use all the lidars available in the DGP dataset. + max_num_items: Optional[int] + If not None, then show only up to this number of items. This is useful for debugging a large dataset. + Default: None. + Returns + ------- + associated_bboxes: defaultdict[defaultdict[list[Tuple]]] + a dictionary with key is the camera name, value is a dictionary whose key is class_name of bounding_box_2d, + value is list of Tuple [bounding_box_2d, bounding_box_3d, centroid_2d] + """ + # Merge Lidar and Camera datum names. + if camera_datum_names and lidar_datum_names: + datum_names = camera_datum_names + lidar_datum_names + else: + datum_names = None + dataset = SynchronizedSceneDataset( + scenes_dataset_json, + datum_names=datum_names, + requested_annotations=[ANNOTATIONS_2D, ANNOTATIONS_3D], + only_annotated_datums=True, + ) + associated_bboxes = associate_3d_and_2d_annotations( + dataset, ontology_name_mapper, camera_datum_names, lidar_datum_names, max_num_items + ) + return associated_bboxes + + +def draw_bounding_box_2d_distance_distribution( + scenes_dataset_json: str, + ontology_name_mapper: dict, + output_dir: str, + camera_datum_names: Optional[list[str]], + lidar_datum_names: Optional[list[str]], + max_num_items: Optional[int], +): + """Draw the distance's distributution histogram of 2d bounding boxes by associating bounding_box_3d of the same object. + Parameters + ---------- + scenes_dataset_json: str + Full path to the dataset scene json. + ontology_name_mapper: dict + Map the class names from bounding_box_2d to bounding_box_3d if the class names are different. + eg: {'Pedestrian': 'Person','Car': 'Car'}. + output_dir: str + Path to save the histogram picture. + camera_datum_names: Optional[list[str]] + List of camera names. + If None, use all the cameras available in the DGP dataset. + lidar_datum_names: Optional[list[str]] + List of lidar names. + If None, use all the lidars available in the DGP dataset. + max_num_items: Optional[int] + If not None, then show only up to this number of items. This is useful for debugging a large dataset. + Default: None. + """ + associated_bboxes = associate_3d_and_2d_annotations_dataset( + scenes_dataset_json, ontology_name_mapper, camera_datum_names, lidar_datum_names, max_num_items + ) + os.makedirs(output_dir, exist_ok=True) + # Summarize statistics per camera per class over all scenes. + for camera_name in camera_datum_names: + for name_2d, _ in ontology_name_mapper.items(): + logging.info("Summarizing class {}".format(name_2d)) + summarize_3d_statistics_per_class(associated_bboxes[camera_name][name_2d], output_dir, camera_name, name_2d) + + +def summarize_3d_statistics_per_class( + associated_bboxes: list[Tuple], output_dir: str, camera_name: str, class_name: str +): + """Accumulate distances of the associated bounding boxes and draw the histogram. + Parameters + ---------- + associated_bboxes: list[Tuple] + A list of Tuple [bounding_box_2d, bounding_box_3d, centroid_2d]. + output_dir: str + Path to save the histogram picture. + camera_name: str + camera name. + class_name: str + Class name. + """ + dist = [] + for _, bbox_lidar, __ in associated_bboxes: + dist.append(int(np.linalg.norm(bbox_lidar.pose.tvec[:2]))) + draw_hist(dist, output_dir, xlable="Dist", title=f"dist_{camera_name}_{class_name}") + + +def draw_hist(data: list, output_dir: str, xlable: str, title: str): + """Draw the histogram of given data. + Parameters + ---------- + data: list + A list of int. + output_dir: str + Path to save the histogram picture. + xlable: str + The label name of x. + title: str + The tile of the picture. + """ + data = np.array(data) + min_dist = -20 + max_dist = 100 + dist_bin = 10 + bins = np.arange(min_dist, max_dist, dist_bin) # fixed bin size + plt.hist(data, bins=bins) + plt.title(f"Distribution {title}(fixed bin size)") + plt.xlabel(f"variable {xlable} (bin size = {dist_bin})") + plt.ylabel("count") + plt.savefig(os.path.join(output_dir, f"histogram_{title}.png")) diff --git a/tests/data/dgp/associate_2d_to_3d_scene/scene_01/bounding_box_2d/CAMERA_21/6d3e25d9abf1e015564f56ac176e8a7526cdd5ca.json b/tests/data/dgp/associate_2d_to_3d_scene/scene_01/bounding_box_2d/CAMERA_21/6d3e25d9abf1e015564f56ac176e8a7526cdd5ca.json new file mode 100644 index 00000000..40e59d5d --- /dev/null +++ b/tests/data/dgp/associate_2d_to_3d_scene/scene_01/bounding_box_2d/CAMERA_21/6d3e25d9abf1e015564f56ac176e8a7526cdd5ca.json @@ -0,0 +1,56 @@ +{ + "annotations": [ + { + "area": 127092, + "attributes": {}, + "box": { + "h": 534, + "w": 238, + "x": 173, + "y": 1275 + }, + "class_id": 0, + "instance_id": 21, + "iscrowd": false + }, + { + "area": 9800, + "attributes": {}, + "box": { + "h": 140, + "w": 70, + "x": 1306, + "y": 974 + }, + "class_id": 0, + "instance_id": 25, + "iscrowd": false + }, + { + "area": 2958, + "attributes": {}, + "box": { + "h": 87, + "w": 34, + "x": 1605, + "y": 966 + }, + "class_id": 0, + "instance_id": 26, + "iscrowd": false + }, + { + "area": 936, + "attributes": {}, + "box": { + "h": 52, + "w": 18, + "x": 1963, + "y": 1008 + }, + "class_id": 0, + "instance_id": 32, + "iscrowd": false + } + ] +} \ No newline at end of file diff --git a/tests/data/dgp/associate_2d_to_3d_scene/scene_01/bounding_box_3d/CAMERA_21/1345048_001.json b/tests/data/dgp/associate_2d_to_3d_scene/scene_01/bounding_box_3d/CAMERA_21/1345048_001.json new file mode 100644 index 00000000..c72e5883 --- /dev/null +++ b/tests/data/dgp/associate_2d_to_3d_scene/scene_01/bounding_box_3d/CAMERA_21/1345048_001.json @@ -0,0 +1,193 @@ +{ + "annotations": [ + { + "attributes": {}, + "box": { + "height": 1.602, + "length": 0.923, + "occlusion": 0, + "pose": { + "reference_coordinate_system": "", + "rotation": { + "qw": -0.5460047245395266, + "qx": -0.5463773062342265, + "qy": 0.44794882501102434, + "qz": -0.4502138716032509 + }, + "translation": { + "x": -0.021767233204855074, + "y": -0.33786151698734557, + "z": 23.655119181323926 + } + }, + "truncation": 0.0, + "width": 0.702 + }, + "class_id": 0, + "instance_id": 4081736164, + "num_points": 488 + }, + { + "attributes": {}, + "box": { + "height": 1.458, + "length": 1.003, + "occlusion": 0, + "pose": { + "reference_coordinate_system": "", + "rotation": { + "qw": -0.515906332033461, + "qx": -0.5164238763720558, + "qy": 0.4821749584013948, + "qz": -0.4844113396394155 + }, + "translation": { + "x": -3.0012585309692668, + "y": 1.3998720942093428, + "z": 5.8925998920151414 + } + }, + "truncation": 0.0, + "width": 0.687 + }, + "class_id": 0, + "instance_id": 3157625369, + "num_points": 2506 + }, + { + "attributes": {}, + "box": { + "height": 1.7, + "length": 0.7, + "occlusion": 0, + "pose": { + "reference_coordinate_system": "", + "rotation": { + "qw": -0.6262508644615135, + "qx": -0.6261543455500395, + "qy": 0.32728740602920997, + "qz": -0.3295808613446478 + }, + "translation": { + "x": 21.405784649079266, + "y": -1.3684778965882174, + "z": 67.40673156544199 + } + }, + "truncation": 0.0, + "width": 0.7 + }, + "class_id": 0, + "instance_id": 4002157367, + "num_points": 7 + }, + { + "attributes": {}, + "box": { + "height": 1.7, + "length": 0.7, + "occlusion": 0, + "pose": { + "reference_coordinate_system": "", + "rotation": { + "qw": -0.39744937852144224, + "qx": -0.39515948875230333, + "qy": -0.5856921452824387, + "qz": 0.5855319640432897 + }, + "translation": { + "x": 5.950197640790634, + "y": -1.4554172491242525, + "z": 41.890144866234095 + } + }, + "truncation": 0.0, + "width": 0.7 + }, + "class_id": 0, + "instance_id": 3015682071, + "num_points": 75 + }, + { + "attributes": {}, + "box": { + "height": 1.7, + "length": 0.7, + "occlusion": 0, + "pose": { + "reference_coordinate_system": "", + "rotation": { + "qw": -0.5168602686509107, + "qx": -0.5173734033178335, + "qy": 0.4811559748520739, + "qz": -0.48339337200005283 + }, + "translation": { + "x": 1.1096138012673293, + "y": 6.686360433724587, + "z": -53.1512378533771 + } + }, + "truncation": 0.0, + "width": 0.7 + }, + "class_id": 0, + "instance_id": 3306246368, + "num_points": 71 + }, + { + "attributes": {}, + "box": { + "height": 1.7, + "length": 0.7, + "occlusion": 0, + "pose": { + "reference_coordinate_system": "", + "rotation": { + "qw": -0.5168602686509107, + "qx": -0.5173734033178335, + "qy": 0.4811559748520739, + "qz": -0.48339337200005283 + }, + "translation": { + "x": 0.37169697588876716, + "y": 6.63103626392018, + "z": -53.30554597065748 + } + }, + "truncation": 0.0, + "width": 0.7 + }, + "class_id": 0, + "instance_id": 279291877, + "num_points": 83 + }, + { + "attributes": {}, + "box": { + "height": 1.7, + "length": 0.7, + "occlusion": 0, + "pose": { + "reference_coordinate_system": "", + "rotation": { + "qw": -0.5021280729858156, + "qx": -0.5027080886060614, + "qy": 0.49645834492590357, + "qz": -0.49867934358967014 + }, + "translation": { + "x": 2.600034834051968, + "y": 9.379402753234501, + "z": -82.74383588414094 + } + }, + "truncation": 0.0, + "width": 0.7 + }, + "class_id": 0, + "instance_id": 129671060, + "num_points": 29 + } + ] +} \ No newline at end of file diff --git a/tests/data/dgp/associate_2d_to_3d_scene/scene_01/bounding_box_3d/LIDAR/1345048_001.json b/tests/data/dgp/associate_2d_to_3d_scene/scene_01/bounding_box_3d/LIDAR/1345048_001.json new file mode 100644 index 00000000..2eaf4d18 --- /dev/null +++ b/tests/data/dgp/associate_2d_to_3d_scene/scene_01/bounding_box_3d/LIDAR/1345048_001.json @@ -0,0 +1,193 @@ +{ + "annotations": [ + { + "attributes": {}, + "box": { + "height": 1.602, + "length": 0.923, + "occlusion": 0, + "pose": { + "reference_coordinate_system": "", + "rotation": { + "qw": -0.9945361181899448, + "qx": 0.005844301846375331, + "qy": -0.04575480638969989, + "qz": 0.09364962062841337 + }, + "translation": { + "x": 26.5559912338922, + "y": 0.19018018179900764, + "z": 0.996752776279834 + } + }, + "truncation": 0.0, + "width": 0.702 + }, + "class_id": 0, + "instance_id": 4081736164, + "num_points": 488 + }, + { + "attributes": {}, + "box": { + "height": 1.458, + "length": 1.003, + "occlusion": 0, + "pose": { + "reference_coordinate_system": "", + "rotation": { + "qw": -0.9985009114869703, + "qx": 0.0028882970852891765, + "qy": -0.04603602841012219, + "qz": 0.029466448511131504 + }, + "translation": { + "x": 8.688338234722778, + "y": 3.041835891958385, + "z": 0.8581381595079947 + } + }, + "truncation": 0.0, + "width": 0.687 + }, + "class_id": 0, + "instance_id": 3157625369, + "num_points": 2506 + }, + { + "attributes": {}, + "box": { + "height": 1.7, + "length": 0.7, + "occlusion": 0, + "pose": { + "reference_coordinate_system": "", + "rotation": { + "qw": -0.9546205851977209, + "qx": 0.015040096910548492, + "qy": -0.04360566083379287, + "qz": 0.29423099793347873 + }, + "translation": { + "x": 70.372899118791, + "y": -20.94096488390045, + "z": -1.8244220886784461 + } + }, + "truncation": 0.0, + "width": 0.7 + }, + "class_id": 0, + "instance_id": 4002157367, + "num_points": 7 + }, + { + "attributes": {}, + "box": { + "height": 1.7, + "length": 0.7, + "occlusion": 0, + "pose": { + "reference_coordinate_system": "", + "rotation": { + "qw": 0.1857940747174927, + "qx": 0.045012389826135105, + "qy": 0.010076851391705489, + "qz": 0.9815054272025399 + }, + "translation": { + "x": 44.8594045019388, + "y": -5.654335267526108, + "z": 0.49063134648028495 + } + }, + "truncation": 0.0, + "width": 0.7 + }, + "class_id": 0, + "instance_id": 3015682071, + "num_points": 75 + }, + { + "attributes": {}, + "box": { + "height": 1.7, + "length": 0.7, + "occlusion": 0, + "pose": { + "reference_coordinate_system": "", + "rotation": { + "qw": -0.998440882943295, + "qx": 0.00297904410121062, + "qy": -0.046030245144616705, + "qz": 0.03143477525898902 + }, + "translation": { + "x": -50.56086723143903, + "y": -1.4905566498621283, + "z": 0.9585148271079902 + } + }, + "truncation": 0.0, + "width": 0.7 + }, + "class_id": 0, + "instance_id": 3306246368, + "num_points": 71 + }, + { + "attributes": {}, + "box": { + "height": 1.7, + "length": 0.7, + "occlusion": 0, + "pose": { + "reference_coordinate_system": "", + "rotation": { + "qw": -0.998440882943295, + "qx": 0.00297904410121062, + "qy": -0.046030245144616705, + "qz": 0.03143477525898902 + }, + "translation": { + "x": -50.71478800739544, + "y": -0.753365988206383, + "z": 1.0237169980551286 + } + }, + "truncation": 0.0, + "width": 0.7 + }, + "class_id": 0, + "instance_id": 279291877, + "num_points": 83 + }, + { + "attributes": {}, + "box": { + "height": 1.7, + "length": 0.7, + "occlusion": 0, + "pose": { + "reference_coordinate_system": "", + "rotation": { + "qw": -0.9989345200499858, + "qx": 0.0015970034043485057, + "qy": -0.04609889100571691, + "qz": 0.0014718969645083901 + }, + "translation": { + "x": -80.2643855030285, + "y": -3.192519044929213, + "z": 0.9625515506138242 + } + }, + "truncation": 0.0, + "width": 0.7 + }, + "class_id": 0, + "instance_id": 129671060, + "num_points": 29 + } + ] +} \ No newline at end of file diff --git a/tests/data/dgp/associate_2d_to_3d_scene/scene_01/calibration/e10c7424d73936c7c8306cd02e927b7d2639c1a6.json b/tests/data/dgp/associate_2d_to_3d_scene/scene_01/calibration/e10c7424d73936c7c8306cd02e927b7d2639c1a6.json new file mode 100644 index 00000000..dca87874 --- /dev/null +++ b/tests/data/dgp/associate_2d_to_3d_scene/scene_01/calibration/e10c7424d73936c7c8306cd02e927b7d2639c1a6.json @@ -0,0 +1,94 @@ +{ + "extrinsics": [ + { + "reference_coordinate_system": "", + "rotation": { + "qw": 1.0, + "qx": 0.0, + "qy": 0.0, + "qz": 0.0 + }, + "translation": { + "x": 0.0, + "y": 0.0, + "z": 0.0 + } + }, + { + "reference_coordinate_system": "", + "rotation": { + "qw": 0.4771700483160217, + "qx": -0.5252321849265068, + "qy": 0.519020460103825, + "qz": -0.4765056448233028 + }, + "translation": { + "x": 2.968057416879674, + "y": 0.009892013289572787, + "z": 2.801236669326528 + } + } + ], + "intrinsics": [ + { + "alpha": 0.0, + "beta": 0.0, + "cx": 0.0, + "cy": 0.0, + "equirectangular": 0, + "fisheye": 0, + "fov": 0.0, + "fx": 0.0, + "fy": 0.0, + "k1": 0.0, + "k2": 0.0, + "k3": 0.0, + "k4": 0.0, + "k5": 0.0, + "k6": 0.0, + "p1": 0.0, + "p2": 0.0, + "s1": 0.0, + "s2": 0.0, + "s3": 0.0, + "s4": 0.0, + "skew": 0.0, + "taux": 0.0, + "tauy": 0.0, + "w": 0.0, + "xi": 0.0 + }, + { + "alpha": 0.0, + "beta": 0.0, + "cx": 1344.739990234375, + "cy": 1073.77001953125, + "equirectangular": 0, + "fisheye": 0, + "fov": 0.0, + "fx": 1976.449951171875, + "fy": 1977.06005859375, + "k1": 0.0, + "k2": 0.0, + "k3": 0.0, + "k4": 0.0, + "k5": 0.0, + "k6": 0.0, + "p1": 0.0, + "p2": 0.0, + "s1": 0.0, + "s2": 0.0, + "s3": 0.0, + "s4": 0.0, + "skew": 0.0, + "taux": 0.0, + "tauy": 0.0, + "w": 0.0, + "xi": 0.0 + } + ], + "names": [ + "LIDAR", + "CAMERA_21" + ] +} \ No newline at end of file diff --git a/tests/data/dgp/associate_2d_to_3d_scene/scene_01/ontology/67f011a9c86852dfe1b2275cbbb6c0c7aa8d1848.json b/tests/data/dgp/associate_2d_to_3d_scene/scene_01/ontology/67f011a9c86852dfe1b2275cbbb6c0c7aa8d1848.json new file mode 100644 index 00000000..f09afdf0 --- /dev/null +++ b/tests/data/dgp/associate_2d_to_3d_scene/scene_01/ontology/67f011a9c86852dfe1b2275cbbb6c0c7aa8d1848.json @@ -0,0 +1,26 @@ +{ + "items": [ + { + "color": { + "b": 20, + "g": 60, + "r": 220 + }, + "id": 0, + "isthing": true, + "name": "Person", + "supercategory": "" + }, + { + "color": { + "b": 0, + "g": 142, + "r": 0 + }, + "id": 1, + "isthing": true, + "name": "Car", + "supercategory": "" + } + ] +} \ No newline at end of file diff --git a/tests/data/dgp/associate_2d_to_3d_scene/scene_01/ontology/e77320974afa353aa67d28a8c3c59c595dd9037f.json b/tests/data/dgp/associate_2d_to_3d_scene/scene_01/ontology/e77320974afa353aa67d28a8c3c59c595dd9037f.json new file mode 100644 index 00000000..7dc3d7e8 --- /dev/null +++ b/tests/data/dgp/associate_2d_to_3d_scene/scene_01/ontology/e77320974afa353aa67d28a8c3c59c595dd9037f.json @@ -0,0 +1,26 @@ +{ + "items": [ + { + "color": { + "b": 60, + "g": 20, + "r": 220 + }, + "id": 0, + "isthing": true, + "name": "Pedestrian", + "supercategory": "" + }, + { + "color": { + "b": 142, + "g": 0, + "r": 0 + }, + "id": 1, + "isthing": true, + "name": "Car", + "supercategory": "" + } + ] +} \ No newline at end of file diff --git a/tests/data/dgp/associate_2d_to_3d_scene/scene_01/point_cloud/LIDAR/1345048_001.npz b/tests/data/dgp/associate_2d_to_3d_scene/scene_01/point_cloud/LIDAR/1345048_001.npz new file mode 100644 index 00000000..0039dbb0 Binary files /dev/null and b/tests/data/dgp/associate_2d_to_3d_scene/scene_01/point_cloud/LIDAR/1345048_001.npz differ diff --git a/tests/data/dgp/associate_2d_to_3d_scene/scene_01/rgb/CAMERA_21/1345048_001.jpg b/tests/data/dgp/associate_2d_to_3d_scene/scene_01/rgb/CAMERA_21/1345048_001.jpg new file mode 100644 index 00000000..a28a851f Binary files /dev/null and b/tests/data/dgp/associate_2d_to_3d_scene/scene_01/rgb/CAMERA_21/1345048_001.jpg differ diff --git a/tests/data/dgp/associate_2d_to_3d_scene/scene_01/scene_6245881cb04e9f71ae6de99064e771dfa370329d.json b/tests/data/dgp/associate_2d_to_3d_scene/scene_01/scene_6245881cb04e9f71ae6de99064e771dfa370329d.json new file mode 100644 index 00000000..f2e7696a --- /dev/null +++ b/tests/data/dgp/associate_2d_to_3d_scene/scene_01/scene_6245881cb04e9f71ae6de99064e771dfa370329d.json @@ -0,0 +1,123 @@ +{ + "creation_date": "2023-09-20T01:57:01.750472Z", + "data": [ + { + "datum": { + "image": { + "annotations": { + "0": "bounding_box_2d/CAMERA_21/6d3e25d9abf1e015564f56ac176e8a7526cdd5ca.json", + "1": "bounding_box_3d/CAMERA_21/1345048_001.json" + }, + "channels": 3, + "filename": "rgb/CAMERA_21/1345048_001.jpg", + "height": 1836, + "metadata": {}, + "pose": { + "reference_coordinate_system": "", + "rotation": { + "qw": -0.11017830669879913, + "qx": 0.10803831368684769, + "qy": 0.6982220411300659, + "qz": -0.6990525126457214 + }, + "translation": { + "x": 433.2738037109375, + "y": 807.4990844726562, + "z": -29.08929443359375 + } + }, + "width": 2692 + } + }, + "id": { + "index": "0", + "log": "dummy", + "name": "CAMERA_21", + "timestamp": "2023-04-26T22:35:22.999530Z" + }, + "key": "957a17d7c7c4e17a3693543306fb97af98c75b34", + "next_key": "7d1bb4ffb684d380f998dd09c61a182909e271c2", + "prev_key": "" + }, + { + "datum": { + "point_cloud": { + "annotations": { + "1": "bounding_box_3d/LIDAR/1345048_001.json" + }, + "filename": "point_cloud/LIDAR/1345048_001.npz", + "metadata": {}, + "point_fields": [], + "point_format": [ + "X", + "Y", + "Z", + "INTENSITY" + ], + "pose": { + "reference_coordinate_system": "", + "rotation": { + "qw": 0.5861750245094299, + "qx": -0.036432359367609024, + "qy": -0.02829030528664589, + "qz": -0.8088703155517578 + }, + "translation": { + "x": 434.11273193359375, + "y": 810.062744140625, + "z": -32.15201187133789 + } + } + } + }, + "id": { + "index": "0", + "log": "dummy", + "name": "LIDAR", + "timestamp": "2023-04-26T22:35:23.070990Z" + }, + "key": "02eabaf3ca2fb9d13ece23c0f9ae74be098b2617", + "next_key": "b9442c27d742b2ac0fc232bea2db933518a8b933", + "prev_key": "" + } + ], + "description": "", + "log": "", + "metadata": {}, + "name": "dummy", + "ontologies": { + "0": "e77320974afa353aa67d28a8c3c59c595dd9037f", + "1": "67f011a9c86852dfe1b2275cbbb6c0c7aa8d1848" + }, + "samples": [ + { + "calibration_key": "e10c7424d73936c7c8306cd02e927b7d2639c1a6", + "datum_keys": [ + "957a17d7c7c4e17a3693543306fb97af98c75b34", + "02eabaf3ca2fb9d13ece23c0f9ae74be098b2617" + ], + "id": { + "index": "0", + "log": "dummy", + "name": "1345048_001", + "timestamp": "2023-04-26T22:35:23.070990Z" + }, + "metadata": {} + } + ], + "statistics": { + "image_statistics": { + "count": "10", + "mean": [ + 117.7092, + 127.629196, + 117.40488 + ], + "stddev": [ + 7.1080227, + 6.942289, + 7.652583 + ] + } + } +} \ No newline at end of file diff --git a/tests/utils/test_render_3d_to_2d.py b/tests/utils/test_render_3d_to_2d.py new file mode 100644 index 00000000..513e16c6 --- /dev/null +++ b/tests/utils/test_render_3d_to_2d.py @@ -0,0 +1,48 @@ +# Copyright 2023 Woven by Toyota. All rights reserved. +"""Unit test to merge dgp datasets.""" +import logging +import os +import tempfile +import unittest +import unittest.mock as mock + +import dgp.utils.render_3d_to_2d as render_engine +from tests import TEST_DATA_DIR + +SCENE_JSON = 'scene_6245881cb04e9f71ae6de99064e771dfa370329d.json' +# The list of cameras' name +FRONT_CAMERA = ["CAMERA_21"] +# The list of Lidars' name +LIDAR = ["LIDAR"] +# Define the class names from bounding_box_2d to bounding_box_3d if the class names are different. +ONTOLOGY_NAME_MAPPER_2D_to_3D = { + 'Pedestrian': 'Person', + 'Car': 'Car', +} + + +class TestAssociateDGP3dto2d(unittest.TestCase): + logging.getLogger().setLevel(logging.INFO) + + def test_associate_scene(self): + """Verifies the target bounding box can be associated successfully.""" + # answer = gt_engine.associate_3d_and_2d_annotations_scene(scene_json=os.path.join(TEST_DATA_DIR,'dgp/test_scene/scene_03', SCENE_JSON)) + answer = render_engine.associate_3d_and_2d_annotations_scene( + scene_json=os.path.join(TEST_DATA_DIR, 'dgp/associate_2d_to_3d_scene/scene_01', SCENE_JSON), + ontology_name_mapper=ONTOLOGY_NAME_MAPPER_2D_to_3D, + camera_datum_names=FRONT_CAMERA, + lidar_datum_names=LIDAR, + max_num_items=1 + ) + assert FRONT_CAMERA[0] in answer + for class_name in ONTOLOGY_NAME_MAPPER_2D_to_3D.keys(): + if class_name == "Pedestrian": + assert len(answer[FRONT_CAMERA[0]][class_name]) == 4 + elif class_name == "Car": + assert len(answer[FRONT_CAMERA[0]][class_name]) == 0 + else: + raise RuntimeError('Unexpected class_name {}'.format(class_name)) + + +if __name__ == "__main__": + unittest.main()