Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 50 additions & 1 deletion docs/source/using_dataset_tools.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,14 @@ LeRobot provides several utilities for manipulating datasets:
3. **Merge Datasets** - Combine multiple datasets into one. The datasets must have identical features, and episodes are concatenated in the order specified in `repo_ids`
4. **Add Features** - Add new features to a dataset
5. **Remove Features** - Remove features from a dataset
6. **Convert to Video** - Convert image-based datasets to video format for efficient storage

The core implementation is in `lerobot.datasets.dataset_tools`.
An example script detailing how to use the tools API is available in `examples/dataset/use_dataset_tools.py`.

## Command-Line Tool: lerobot-edit-dataset

`lerobot-edit-dataset` is a command-line script for editing datasets. It can be used to delete episodes, split datasets, merge datasets, add features, and remove features.
`lerobot-edit-dataset` is a command-line script for editing datasets. It can be used to delete episodes, split datasets, merge datasets, add features, remove features, and convert image datasets to video format.

Run `lerobot-edit-dataset --help` for more information on the configuration of each operation.

Expand Down Expand Up @@ -86,6 +87,54 @@ lerobot-edit-dataset \
--operation.feature_names "['observation.images.top']"
```

#### Convert to Video

Convert an image-based dataset to video format. This is useful for reducing storage requirements and improving data loading performance. Videos are encoded with configurable quality settings.

```bash
# Convert all episodes to video format with default settings
lerobot-edit-dataset \
--repo_id lerobot/pusht_image \
--operation.type convert_to_video \
--operation.output_dir outputs/converted_videos

# Convert with custom video codec and quality settings
lerobot-edit-dataset \
--repo_id lerobot/pusht_image \
--operation.type convert_to_video \
--operation.output_dir outputs/converted_videos \
--operation.vcodec libsvtav1 \
--operation.pix_fmt yuv420p \
--operation.g 2 \
--operation.crf 30

# Convert only specific episodes
lerobot-edit-dataset \
--repo_id lerobot/pusht_image \
--operation.type convert_to_video \
--operation.output_dir outputs/converted_videos \
--operation.episode_indices "[0, 1, 2, 5, 10]"

# Convert with multiple workers for parallel processing
lerobot-edit-dataset \
--repo_id lerobot/pusht_image \
--operation.type convert_to_video \
--operation.output_dir outputs/converted_videos \
--operation.num_workers 8
```

**Parameters:**

- `output_dir`: Directory where videos will be saved (default: `outputs/converted_videos`)
- `vcodec`: Video codec to use - options: `h264`, `hevc`, `libsvtav1` (default: `libsvtav1`)
- `pix_fmt`: Pixel format - options: `yuv420p`, `yuv444p` (default: `yuv420p`)
- `g`: Group of pictures (GOP) size - lower values give better quality but larger files (default: 2)
- `crf`: Constant rate factor - lower values give better quality but larger files, 0 is lossless (default: 30)
- `fast_decode`: Fast decode tuning option (default: 0)
- `episode_indices`: List of specific episodes to convert (default: all episodes)
- `num_workers`: Number of parallel workers for processing (default: 4)
- `overwrite`: Overwrite existing video files if they exist

### Push to Hub

Add the `--push_to_hub` flag to any command to automatically upload the resulting dataset to the Hugging Face Hub:
Expand Down
266 changes: 262 additions & 4 deletions src/lerobot/scripts/lerobot_edit_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
Edit LeRobot datasets using various transformation tools.

This script allows you to delete episodes, split datasets, merge datasets,
and remove features. When new_repo_id is specified, creates a new dataset.
remove features, and convert image datasets to video format.
When new_repo_id is specified, creates a new dataset.

Usage Examples:

Expand Down Expand Up @@ -65,16 +66,27 @@
--operation.type remove_feature \
--operation.feature_names "['observation.images.top']"

Convert image dataset to video format:
python -m lerobot.scripts.lerobot_edit_dataset \
--repo_id lerobot/pusht_image \
--operation.type convert_to_video \
--operation.output_dir outputs/converted_videos \
--operation.vcodec libsvtav1 \
--operation.crf 30

Using JSON config file:
python -m lerobot.scripts.lerobot_edit_dataset \
--config_path path/to/edit_config.json
"""

import logging
import shutil
from concurrent.futures import ThreadPoolExecutor, as_completed
from dataclasses import dataclass
from pathlib import Path

from tqdm import tqdm

from lerobot.configs import parser
from lerobot.datasets.dataset_tools import (
delete_episodes,
Expand All @@ -83,7 +95,8 @@
split_dataset,
)
from lerobot.datasets.lerobot_dataset import LeRobotDataset
from lerobot.utils.constants import HF_LEROBOT_HOME
from lerobot.datasets.video_utils import encode_video_frames
from lerobot.utils.constants import HF_LEROBOT_HOME, OBS_IMAGE
from lerobot.utils.utils import init_logging


Expand Down Expand Up @@ -111,10 +124,24 @@ class RemoveFeatureConfig:
feature_names: list[str] | None = None


@dataclass
class ConvertToVideoConfig:
type: str = "convert_to_video"
output_dir: str = "outputs/converted_videos"
vcodec: str = "libsvtav1"
pix_fmt: str = "yuv420p"
g: int = 2
crf: int = 30
fast_decode: int = 0
episode_indices: list[int] | None = None
num_workers: int = 4
overwrite: bool = False


@dataclass
class EditDatasetConfig:
repo_id: str
operation: DeleteEpisodesConfig | SplitConfig | MergeConfig | RemoveFeatureConfig
operation: DeleteEpisodesConfig | SplitConfig | MergeConfig | RemoveFeatureConfig | ConvertToVideoConfig
root: str | None = None
new_repo_id: str | None = None
push_to_hub: bool = False
Expand Down Expand Up @@ -258,6 +285,235 @@ def handle_remove_feature(cfg: EditDatasetConfig) -> None:
LeRobotDataset(output_repo_id, root=output_dir).push_to_hub()


def save_episode_images(
dataset: LeRobotDataset,
imgs_dir: Path,
episode_index: int = 0,
overwrite: bool = False,
num_workers: int = 4,
) -> None:
"""Save images from a specific episode to disk.

Args:
dataset: The LeRobot dataset to extract images from
imgs_dir: Directory to save images to
episode_index: Index of the episode to save (default: 0)
overwrite: Whether to overwrite existing images
num_workers: Number of threads for parallel image saving (default: 4)
"""
ep_num_images = dataset.meta.episodes["length"][episode_index]

# Check if images already exist
if not overwrite and imgs_dir.exists() and len(list(imgs_dir.glob("frame-*.png"))) == ep_num_images:
logging.info(f"Images for episode {episode_index} already exist in {imgs_dir}. Skipping.")
return

# Create directory
imgs_dir.mkdir(parents=True, exist_ok=True)

# Get dataset without torch format for PIL image access
hf_dataset = dataset.hf_dataset.with_format(None)

# Get all image keys (for all cameras)
img_keys = [key for key in hf_dataset.features if key.startswith(OBS_IMAGE)]

if len(img_keys) == 0:
raise ValueError(f"No image keys found in dataset {dataset.repo_id}")

# Use first camera only
img_key = img_keys[0]
imgs_dataset = hf_dataset.select_columns(img_key)

# Get episode start and end indices
from_idx = dataset.meta.episodes["dataset_from_index"][episode_index]
to_idx = dataset.meta.episodes["dataset_to_index"][episode_index]

# Get all items for this episode
episode_dataset = imgs_dataset.select(range(from_idx, to_idx))

# Define function to save a single image
def save_single_image(i_item_tuple):
i, item = i_item_tuple
img = item[img_key]
# Use frame-XXXXXX.png format to match encode_video_frames expectations
img.save(str(imgs_dir / f"frame-{i:06d}.png"), quality=100)
return i

# Save images with proper naming convention for encode_video_frames (frame-XXXXXX.png)
# Use ThreadPoolExecutor for parallel processing
items = list(enumerate(episode_dataset))

with ThreadPoolExecutor(max_workers=num_workers) as executor:
futures = [executor.submit(save_single_image, item) for item in items]
for future in tqdm(
as_completed(futures),
total=len(items),
desc=f"Saving {dataset.repo_id} episode {episode_index} images",
leave=False,
):
future.result() # This will raise any exceptions that occurred


def process_single_episode(
dataset: LeRobotDataset,
episode_index: int,
output_dir: Path,
vcodec: str,
pix_fmt: str,
g: int | None,
crf: int | None,
fast_decode: int,
fps: int,
num_image_workers: int,
overwrite: bool,
) -> str:
"""Process a single episode: save images and encode to video.

Args:
dataset: The LeRobot dataset
episode_index: Index of the episode to process
output_dir: Base directory for outputs
vcodec: Video codec
pix_fmt: Pixel format
g: Group of pictures size
crf: Constant rate factor
fast_decode: Fast decode tuning
fps: Frames per second
num_image_workers: Number of threads for parallel image saving
overwrite: Whether to overwrite existing files

Returns:
Status message for this episode
"""
# Create paths
imgs_dir = output_dir / "images" / dataset.repo_id.replace("/", "_") / f"episode_{episode_index:06d}"

# Create video filename with encoding parameters
video_filename = (
f"{dataset.repo_id.replace('/', '_')}_ep{episode_index:06d}_{vcodec}_{pix_fmt}_g{g}_crf{crf}.mp4"
)
video_path = output_dir / "videos" / dataset.repo_id.replace("/", "_") / video_filename

# Save episode images
save_episode_images(dataset, imgs_dir, episode_index, overwrite, num_image_workers)

# Encode to video
if overwrite or not video_path.is_file():
video_path.parent.mkdir(parents=True, exist_ok=True)

encode_video_frames(
imgs_dir=imgs_dir,
video_path=video_path,
fps=fps,
vcodec=vcodec,
pix_fmt=pix_fmt,
g=g,
crf=crf,
fast_decode=fast_decode,
overwrite=True,
)

return f"✓ Video saved to {video_path}"
else:
return f"Video already exists: {video_path}. Skipping."


def convert_dataset_to_videos(
dataset: LeRobotDataset,
output_dir: Path,
vcodec: str = "libsvtav1",
pix_fmt: str = "yuv420p",
g: int = 2,
crf: int = 30,
fast_decode: int = 0,
episode_indices: list[int] | None = None,
num_workers: int = 4,
overwrite: bool = False,
) -> None:
"""Convert dataset images to video files.

Args:
dataset: The LeRobot dataset
output_dir: Base directory for outputs
vcodec: Video codec (default: libsvtav1)
pix_fmt: Pixel format (default: yuv420p)
g: Group of pictures size (default: 2)
crf: Constant rate factor (default: 30)
fast_decode: Fast decode tuning (default: 0)
episode_indices: List of episode indices to convert (None = all episodes)
num_workers: Number of threads for parallel episode processing (default: 4)
overwrite: Whether to overwrite existing files
"""
# Check that it's an image dataset
if len(dataset.meta.video_keys) > 0:
raise ValueError(
f"This operation is for image datasets only. Video dataset provided: {dataset.repo_id}"
)

fps = dataset.fps

# Determine which episodes to process
num_episodes = len(dataset.meta.episodes)
if episode_indices is None:
episode_indices = list(range(num_episodes))

logging.info(
f"Processing {len(episode_indices)} episodes from {dataset.repo_id} with {num_workers} workers"
)

# Process episodes in parallel
with ThreadPoolExecutor(max_workers=num_workers) as executor:
futures = [
executor.submit(
process_single_episode,
dataset=dataset,
episode_index=episode_index,
output_dir=output_dir,
vcodec=vcodec,
pix_fmt=pix_fmt,
g=g,
crf=crf,
fast_decode=fast_decode,
fps=fps,
num_image_workers=4, # Use fixed workers for image saving within each episode
overwrite=overwrite,
)
for episode_index in episode_indices
]

for future in tqdm(
as_completed(futures),
total=len(episode_indices),
desc="Episodes",
):
result = future.result() # This will raise any exceptions that occurred
logging.info(result)

logging.info(f"\n✓ Completed processing {dataset.repo_id}")


def handle_convert_to_video(cfg: EditDatasetConfig) -> None:
if not isinstance(cfg.operation, ConvertToVideoConfig):
raise ValueError("Operation config must be ConvertToVideoConfig")

dataset = LeRobotDataset(cfg.repo_id, root=cfg.root)
output_dir = Path(cfg.operation.output_dir)

logging.info(f"Converting dataset {cfg.repo_id} to video format")
convert_dataset_to_videos(
dataset=dataset,
output_dir=output_dir,
vcodec=cfg.operation.vcodec,
pix_fmt=cfg.operation.pix_fmt,
g=cfg.operation.g,
crf=cfg.operation.crf,
fast_decode=cfg.operation.fast_decode,
episode_indices=cfg.operation.episode_indices,
num_workers=cfg.operation.num_workers,
overwrite=cfg.operation.overwrite,
)


@parser.wrap()
def edit_dataset(cfg: EditDatasetConfig) -> None:
operation_type = cfg.operation.type
Expand All @@ -270,10 +526,12 @@ def edit_dataset(cfg: EditDatasetConfig) -> None:
handle_merge(cfg)
elif operation_type == "remove_feature":
handle_remove_feature(cfg)
elif operation_type == "convert_to_video":
handle_convert_to_video(cfg)
else:
raise ValueError(
f"Unknown operation type: {operation_type}\n"
f"Available operations: delete_episodes, split, merge, remove_feature"
f"Available operations: delete_episodes, split, merge, remove_feature, convert_to_video"
)


Expand Down