Skip to content

Commit 8480cdf

Browse files
committed
Add mp4 compression and reading for pinhole and fisheye cameras.
1 parent 3081310 commit 8480cdf

21 files changed

+355
-285
lines changed

src/py123d/conversion/dataset_converter_config.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,11 @@ class DatasetConverterConfig:
4141
include_route: bool = False
4242

4343
def __post_init__(self):
44-
assert (
45-
self.pinhole_camera_store_option != "mp4"
46-
), "MP4 format is not yet supported, but planned for future releases."
44+
4745
assert self.pinhole_camera_store_option in [
4846
"path",
4947
"binary",
48+
"mp4",
5049
], f"Invalid camera store option, got {self.pinhole_camera_store_option}."
5150

5251
assert self.lidar_store_option in [

src/py123d/conversion/datasets/av2/av2_sensor_converter.py

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
find_closest_target_fpath,
1515
get_slice_with_timestamp_ns,
1616
)
17-
from py123d.conversion.log_writer.abstract_log_writer import AbstractLogWriter, LiDARData
17+
from py123d.conversion.log_writer.abstract_log_writer import AbstractLogWriter, CameraData, LiDARData
1818
from py123d.conversion.map_writer.abstract_map_writer import AbstractMapWriter
1919
from py123d.conversion.registry.box_detection_label_registry import AV2SensorBoxDetectionLabel
2020
from py123d.conversion.registry.lidar_index_registry import AVSensorLiDARIndex
@@ -322,9 +322,9 @@ def _extract_av2_sensor_pinhole_cameras(
322322
synchronization_df: pd.DataFrame,
323323
source_log_path: Path,
324324
dataset_converter_config: DatasetConverterConfig,
325-
) -> Dict[PinholeCameraType, Tuple[Union[str, bytes], StateSE3]]:
325+
) -> List[CameraData]:
326326

327-
camera_dict: Dict[PinholeCameraType, Tuple[Union[str, bytes], StateSE3]] = {}
327+
camera_data_list: List[CameraData] = []
328328
split = source_log_path.parent.name
329329
log_id = source_log_path.name
330330

@@ -351,17 +351,15 @@ def _extract_av2_sensor_pinhole_cameras(
351351
absolute_image_path = av2_sensor_data_root / relative_image_path
352352
assert absolute_image_path.exists()
353353

354-
# TODO: Adjust for finer IMU timestamps to correct the camera extrinsic.
355-
camera_extrinsic = _row_dict_to_state_se3(row)
356-
camera_data = None
357-
if dataset_converter_config.pinhole_camera_store_option == "path":
358-
camera_data = str(relative_image_path)
359-
elif dataset_converter_config.pinhole_camera_store_option == "binary":
360-
with open(absolute_image_path, "rb") as f:
361-
camera_data = f.read()
362-
camera_dict[pinhole_camera_type] = camera_data, camera_extrinsic
363-
364-
return camera_dict
354+
camera_data = CameraData(
355+
camera_type=pinhole_camera_type,
356+
extrinsic=_row_dict_to_state_se3(row),
357+
dataset_root=av2_sensor_data_root,
358+
relative_path=relative_image_path,
359+
)
360+
camera_data_list.append(camera_data)
361+
362+
return camera_data_list
365363

366364

367365
def _extract_av2_sensor_lidars(

src/py123d/conversion/datasets/kitti360/kitti360_converter.py

Lines changed: 29 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
kittiId2label,
2525
)
2626
from py123d.conversion.datasets.kitti360.utils.preprocess_detection import process_detection
27-
from py123d.conversion.log_writer.abstract_log_writer import AbstractLogWriter, LiDARData
27+
from py123d.conversion.log_writer.abstract_log_writer import AbstractLogWriter, CameraData, LiDARData
2828
from py123d.conversion.map_writer.abstract_map_writer import AbstractMapWriter
2929
from py123d.conversion.registry.box_detection_label_registry import KITTI360BoxDetectionLabel
3030
from py123d.conversion.registry.lidar_index_registry import Kitti360LiDARIndex
@@ -304,12 +304,9 @@ def convert_log(self, log_index: int, log_writer: AbstractLogWriter) -> None:
304304
timestamp=ts_list[valid_idx],
305305
ego_state=ego_state_all[idx],
306306
box_detections=box_detection_wrapper_all[valid_idx],
307-
traffic_lights=None,
308307
pinhole_cameras=pinhole_cameras,
309308
fisheye_mei_cameras=fisheye_cameras,
310309
lidars=lidars,
311-
scenario_tags=None,
312-
route_lane_group_ids=None,
313310
)
314311

315312
log_writer.close()
@@ -724,26 +721,24 @@ def _extract_kitti360_pinhole_cameras(
724721
camera_calibration: Dict[str, StateSE3],
725722
kitti360_folders: Dict[str, Path],
726723
data_converter_config: DatasetConverterConfig,
727-
) -> Dict[Union[PinholeCameraType, FisheyeMEICameraType], Optional[Tuple[Union[str, bytes], StateSE3]]]:
724+
) -> List[CameraData]:
728725

729-
pinhole_camera_dict: Dict[PinholeCameraType, Optional[Tuple[Union[str, bytes], StateSE3]]] = {}
726+
pinhole_camera_data_list: List[CameraData] = []
730727
if data_converter_config.include_pinhole_cameras:
731-
732728
for camera_type, cam_dir_name in KITTI360_PINHOLE_CAMERA_TYPES.items():
733729
img_path_png = kitti360_folders[DIR_2D_RAW] / log_name / cam_dir_name / "data_rect" / f"{idx:010d}.png"
734730
camera_extrinsic = camera_calibration[cam_dir_name]
735-
736731
if img_path_png.exists():
737-
if data_converter_config.pinhole_camera_store_option == "path":
738-
camera_data = str(img_path_png)
739-
elif data_converter_config.pinhole_camera_store_option == "binary":
740-
with open(img_path_png, "rb") as f:
741-
camera_data = f.read()
742-
else:
743-
camera_data = None
732+
pinhole_camera_data_list.append(
733+
CameraData(
734+
camera_type=camera_type,
735+
extrinsic=camera_extrinsic,
736+
dataset_root=kitti360_folders[DIR_ROOT],
737+
relative_path=img_path_png.relative_to(kitti360_folders[DIR_ROOT]),
738+
)
739+
)
744740

745-
pinhole_camera_dict[camera_type] = camera_data, camera_extrinsic
746-
return pinhole_camera_dict
741+
return pinhole_camera_data_list
747742

748743

749744
def _extract_kitti360_fisheye_mei_cameras(
@@ -752,22 +747,23 @@ def _extract_kitti360_fisheye_mei_cameras(
752747
camera_calibration: Dict[str, StateSE3],
753748
kitti360_folders: Dict[str, Path],
754749
data_converter_config: DatasetConverterConfig,
755-
) -> Dict[Union[PinholeCameraType, FisheyeMEICameraType], Optional[Tuple[Union[str, bytes], StateSE3]]]:
756-
757-
fisheye_camera_dict: Dict[FisheyeMEICameraType, Optional[Tuple[Union[str, bytes], StateSE3]]] = {}
758-
for camera_type, cam_dir_name in KITTI360_FISHEYE_MEI_CAMERA_TYPES.items():
759-
img_path_png = kitti360_folders[DIR_2D_RAW] / log_name / cam_dir_name / "data_rgb" / f"{idx:010d}.png"
760-
camera_extrinsic = camera_calibration[cam_dir_name]
761-
if img_path_png.exists():
762-
if data_converter_config.pinhole_camera_store_option == "path":
763-
camera_data = str(img_path_png)
764-
elif data_converter_config.pinhole_camera_store_option == "binary":
765-
with open(img_path_png, "rb") as f:
766-
camera_data = f.read()
767-
else:
768-
camera_data = None
769-
fisheye_camera_dict[camera_type] = camera_data, camera_extrinsic
770-
return fisheye_camera_dict
750+
) -> List[CameraData]:
751+
752+
fisheye_camera_data_list: List[CameraData] = []
753+
if data_converter_config.include_fisheye_mei_cameras:
754+
for camera_type, cam_dir_name in KITTI360_FISHEYE_MEI_CAMERA_TYPES.items():
755+
img_path_png = kitti360_folders[DIR_2D_RAW] / log_name / cam_dir_name / "data_rgb" / f"{idx:010d}.png"
756+
camera_extrinsic = camera_calibration[cam_dir_name]
757+
if img_path_png.exists():
758+
fisheye_camera_data_list.append(
759+
CameraData(
760+
camera_type=camera_type,
761+
extrinsic=camera_extrinsic,
762+
dataset_root=kitti360_folders[DIR_ROOT],
763+
relative_path=img_path_png.relative_to(kitti360_folders[DIR_ROOT]),
764+
)
765+
)
766+
return fisheye_camera_data_list
771767

772768

773769
def _load_kitti_360_calibration(kitti_360_data_root: Path) -> Dict[str, StateSE3]:

src/py123d/conversion/datasets/nuplan/nuplan_converter.py

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
get_box_detections_for_lidarpc_token_from_db,
2323
get_nearest_ego_pose_for_timestamp_from_db,
2424
)
25-
from py123d.conversion.log_writer.abstract_log_writer import AbstractLogWriter, LiDARData
25+
from py123d.conversion.log_writer.abstract_log_writer import AbstractLogWriter, CameraData, LiDARData
2626
from py123d.conversion.map_writer.abstract_map_writer import AbstractMapWriter
2727
from py123d.conversion.registry.box_detection_label_registry import NuPlanBoxDetectionLabel
2828
from py123d.conversion.registry.lidar_index_registry import NuPlanLiDARIndex
@@ -356,9 +356,9 @@ def _extract_nuplan_cameras(
356356
source_log_path: Path,
357357
nuplan_sensor_root: Path,
358358
dataset_converter_config: DatasetConverterConfig,
359-
) -> Dict[PinholeCameraType, Tuple[Union[str, bytes], StateSE3]]:
359+
) -> List[CameraData]:
360360

361-
camera_dict: Dict[str, Union[str, bytes]] = {}
361+
camera_data_list: List[CameraData] = []
362362

363363
if dataset_converter_config.include_pinhole_cameras:
364364
log_cam_infos = {camera.token: camera for camera in nuplan_log_db.log.cameras}
@@ -395,18 +395,17 @@ def _extract_nuplan_cameras(
395395
c2e = img_e2e @ c2img_e
396396
extrinsic = StateSE3.from_transformation_matrix(c2e)
397397

398-
# Store camera data, either as path or binary
399-
camera_data: Optional[Union[str, bytes]] = None
400-
if dataset_converter_config.pinhole_camera_store_option == "path":
401-
camera_data = str(filename_jpg)
402-
elif dataset_converter_config.pinhole_camera_store_option == "binary":
403-
with open(filename_jpg, "rb") as f:
404-
camera_data = f.read()
405-
406398
# Store in dictionary
407-
camera_dict[camera_type] = camera_data, extrinsic
399+
camera_data_list.append(
400+
CameraData(
401+
camera_type=camera_type,
402+
extrinsic=extrinsic,
403+
dataset_root=nuplan_sensor_root,
404+
relative_path=filename_jpg.relative_to(nuplan_sensor_root),
405+
)
406+
)
408407

409-
return camera_dict
408+
return camera_data_list
410409

411410

412411
def _extract_nuplan_lidars(

src/py123d/conversion/datasets/nuscenes/nuscenes_converter.py

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import gc
22
from pathlib import Path
3-
from typing import Any, Dict, List, Tuple, Union
3+
from typing import Any, Dict, List, Union
44

55
import numpy as np
66
from pyquaternion import Quaternion
@@ -15,7 +15,7 @@
1515
NUSCENES_DETECTION_NAME_DICT,
1616
NUSCENES_DT,
1717
)
18-
from py123d.conversion.log_writer.abstract_log_writer import AbstractLogWriter, LiDARData
18+
from py123d.conversion.log_writer.abstract_log_writer import AbstractLogWriter, CameraData, LiDARData
1919
from py123d.conversion.map_writer.abstract_map_writer import AbstractMapWriter
2020
from py123d.conversion.registry.box_detection_label_registry import NuScenesBoxDetectionLabel
2121
from py123d.conversion.registry.lidar_index_registry import NuScenesLiDARIndex
@@ -388,8 +388,8 @@ def _extract_nuscenes_cameras(
388388
sample: Dict[str, Any],
389389
nuscenes_data_root: Path,
390390
dataset_converter_config: DatasetConverterConfig,
391-
) -> Dict[PinholeCameraType, Tuple[Union[str, bytes], StateSE3]]:
392-
camera_dict: Dict[PinholeCameraType, Tuple[Union[str, bytes], StateSE3]] = {}
391+
) -> List[CameraData]:
392+
camera_data_list: List[CameraData] = []
393393

394394
if dataset_converter_config.include_pinhole_cameras:
395395
for camera_type, camera_channel in NUSCENES_CAMERA_TYPES.items():
@@ -409,20 +409,20 @@ def _extract_nuscenes_cameras(
409409
extrinsic_matrix[:3, 3] = translation
410410
extrinsic = StateSE3.from_transformation_matrix(extrinsic_matrix)
411411

412-
cam_path = nuscenes_data_root / cam_data["filename"]
412+
cam_path = nuscenes_data_root / str(cam_data["filename"])
413413

414414
if cam_path.exists() and cam_path.is_file():
415-
if dataset_converter_config.pinhole_camera_store_option == "path":
416-
camera_data = str(cam_path)
417-
elif dataset_converter_config.pinhole_camera_store_option == "binary":
418-
with open(cam_path, "rb") as f:
419-
camera_data = f.read()
420-
else:
421-
continue
422-
423-
camera_dict[camera_type] = (camera_data, extrinsic)
415+
# camera_dict[camera_type] = (camera_data, extrinsic)
416+
camera_data_list.append(
417+
CameraData(
418+
camera_type=camera_type,
419+
extrinsic=extrinsic,
420+
relative_path=cam_path.relative_to(nuscenes_data_root),
421+
dataset_root=nuscenes_data_root,
422+
)
423+
)
424424

425-
return camera_dict
425+
return camera_data_list
426426

427427

428428
def _extract_nuscenes_lidars(

src/py123d/conversion/datasets/pandaset/pandaset_converter.py

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
read_pkl_gz,
2323
rotate_pandaset_pose_to_iso_coordinates,
2424
)
25-
from py123d.conversion.log_writer.abstract_log_writer import AbstractLogWriter, LiDARData
25+
from py123d.conversion.log_writer.abstract_log_writer import AbstractLogWriter, CameraData, LiDARData
2626
from py123d.conversion.map_writer.abstract_map_writer import AbstractMapWriter
2727
from py123d.conversion.registry.box_detection_label_registry import PandasetBoxDetectionLabel
2828
from py123d.conversion.registry.lidar_index_registry import PandasetLiDARIndex
@@ -332,9 +332,8 @@ def _extract_pandaset_sensor_camera(
332332
ego_state_se3: EgoStateSE3,
333333
camera_poses: Dict[str, List[Dict[str, Dict[str, float]]]],
334334
dataset_converter_config: DatasetConverterConfig,
335-
) -> Dict[PinholeCameraType, Tuple[Union[str, bytes], StateSE3]]:
336-
337-
camera_dict: Dict[PinholeCameraType, Tuple[Union[str, bytes], StateSE3]] = {}
335+
) -> List[CameraData]:
336+
camera_data_list: List[CameraData] = []
338337
iteration_str = f"{iteration:02d}"
339338

340339
if dataset_converter_config.include_pinhole_cameras:
@@ -346,22 +345,20 @@ def _extract_pandaset_sensor_camera(
346345

347346
camera_pose_dict = camera_poses[camera_name][iteration]
348347
camera_extrinsic = pandaset_pose_dict_to_state_se3(camera_pose_dict)
349-
# camera_extrinsic = rotate_pandaset_pose_to_iso_coordinates(camera_extrinsic)
350348

351349
camera_extrinsic = StateSE3.from_array(
352350
convert_absolute_to_relative_se3_array(ego_state_se3.rear_axle_se3, camera_extrinsic.array), copy=True
353351
)
352+
camera_data_list.append(
353+
CameraData(
354+
camera_type=camera_type,
355+
extrinsic=camera_extrinsic,
356+
dataset_root=source_log_path.parent,
357+
relative_path=image_abs_path.relative_to(source_log_path.parent),
358+
)
359+
)
354360

355-
camera_data = None
356-
if dataset_converter_config.pinhole_camera_store_option == "path":
357-
pandaset_data_root = source_log_path.parent
358-
camera_data = str(image_abs_path.relative_to(pandaset_data_root))
359-
elif dataset_converter_config.pinhole_camera_store_option == "binary":
360-
with open(image_abs_path, "rb") as f:
361-
camera_data = f.read()
362-
camera_dict[camera_type] = camera_data, camera_extrinsic
363-
364-
return camera_dict
361+
return camera_data_list
365362

366363

367364
def _extract_pandaset_lidar(

src/py123d/conversion/datasets/wopd/waymo_sensor_io.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,11 @@ def _get_frame_at_iteration(filepath: Path, iteration: int) -> Optional[dataset_
2828
return frame
2929

3030

31-
def load_jpeg_binary_from_file(tf_record_path: Path, iteration: int, pinhole_camera_type: PinholeCameraType) -> bytes:
31+
def load_jpeg_binary_from_tf_record_file(
32+
tf_record_path: Path,
33+
iteration: int,
34+
pinhole_camera_type: PinholeCameraType,
35+
) -> bytes:
3236
frame = _get_frame_at_iteration(tf_record_path, iteration)
3337
assert frame is not None, f"Frame at iteration {iteration} not found in Waymo file: {tf_record_path}"
3438

src/py123d/conversion/datasets/wopd/wopd_converter.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
WOPD_LIDAR_TYPES,
1818
)
1919
from py123d.conversion.datasets.wopd.waymo_map_utils.wopd_map_utils import convert_wopd_map
20-
from py123d.conversion.log_writer.abstract_log_writer import AbstractLogWriter, LiDARData
20+
from py123d.conversion.log_writer.abstract_log_writer import AbstractLogWriter, CameraData, LiDARData
2121
from py123d.conversion.map_writer.abstract_map_writer import AbstractMapWriter
2222
from py123d.conversion.registry.box_detection_label_registry import WOPDBoxDetectionLabel
2323
from py123d.conversion.registry.lidar_index_registry import DefaultLiDARIndex, WOPDLiDARIndex
@@ -379,9 +379,9 @@ def _extract_wopd_box_detections(
379379

380380
def _extract_wopd_cameras(
381381
frame: dataset_pb2.Frame, dataset_converter_config: DatasetConverterConfig
382-
) -> Dict[PinholeCameraType, Tuple[Union[str, bytes], StateSE3]]:
382+
) -> List[CameraData]:
383383

384-
camera_dict: Dict[PinholeCameraType, Tuple[Union[str, bytes], StateSE3]] = {}
384+
camera_data_list: List[CameraData] = []
385385

386386
if dataset_converter_config.include_pinhole_cameras:
387387

@@ -404,10 +404,15 @@ def _extract_wopd_cameras(
404404

405405
for image_proto in frame.images:
406406
camera_type = WOPD_CAMERA_TYPES[image_proto.name]
407-
camera_bytes: bytes = image_proto.image
408-
camera_dict[camera_type] = camera_bytes, camera_extrinsic[camera_type]
407+
camera_data_list.append(
408+
CameraData(
409+
camera_type=camera_type,
410+
extrinsic=camera_extrinsic[camera_type],
411+
jpeg_binary=image_proto.image,
412+
)
413+
)
409414

410-
return camera_dict
415+
return camera_data_list
411416

412417

413418
def _extract_wopd_lidars(

0 commit comments

Comments
 (0)