huggingface
diff --git a/‎docs/source/_toctree.yml‎
Lines changed: 2 additions & 0 deletions b/‎docs/source/_toctree.yml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎docs/source/libero.mdx‎
Lines changed: 5 additions & 0 deletions b/‎docs/source/libero.mdx‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎docs/source/xvla.mdx‎
Lines changed: 570 additions & 0 deletions b/‎docs/source/xvla.mdx‎
Lines changed: 570 additions & 0 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 2 additions & 0 deletions b/‎pyproject.toml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/lerobot/envs/configs.py‎
Lines changed: 2 additions & 1 deletion b/‎src/lerobot/envs/configs.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/lerobot/envs/factory.py‎
Lines changed: 9 additions & 0 deletions b/‎src/lerobot/envs/factory.py‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎src/lerobot/envs/libero.py‎
Lines changed: 26 additions & 5 deletions b/‎src/lerobot/envs/libero.py‎
Lines changed: 26 additions & 5 deletions
diff --git a/‎src/lerobot/optim/optimizers.py‎
Lines changed: 101 additions & 0 deletions b/‎src/lerobot/optim/optimizers.py‎
Lines changed: 101 additions & 0 deletions
diff --git a/‎src/lerobot/policies/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎src/lerobot/policies/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/lerobot/policies/factory.py‎
Lines changed: 17 additions & 2 deletions b/‎src/lerobot/policies/factory.py‎
Lines changed: 17 additions & 2 deletions
@@ -39,6 +39,8 @@
     title: π₀.₅ (Pi05)
   - local: groot
     title: NVIDIA GR00T N1.5
+  - local: xvla
+    title: X-VLA
   title: "Policies"
 - sections:
   - local: async
 
@@ -62,6 +62,11 @@ lerobot-eval \
 
 - Pass a comma-separated list to `--env.task` for multi-suite evaluation.
 
+### Control Mode
+
+LIBERO now supports two control modes: relative and absolute. This matters because different VLA checkpoints are trained with different mode of action to output hence control parameterizations.
+You can switch them with: `env.control_mode = "relative"` and `env.control_mode = "absolute"`
+
 ### Policy inputs and outputs
 
 When using LIBERO through LeRobot, policies interact with the environment via **observations** and **actions**:
 
@@ -133,6 +133,7 @@ groot = [
     "ninja>=1.11.1,<2.0.0",
     "flash-attn>=2.5.9,<3.0.0 ; sys_platform != 'darwin'"
 ]
+xvla = ["lerobot[transformers-dep]"]
 hilserl = ["lerobot[transformers-dep]", "gym-hil>=0.1.13,<0.2.0", "lerobot[grpcio-dep]", "lerobot[placo-dep]"]
 
 # Features
@@ -161,6 +162,7 @@ all = [
     "lerobot[pi]",
     "lerobot[smolvla]",
     # "lerobot[groot]", TODO(Steven): Gr00t requires specific installation instructions for flash-attn
+    "lerobot[xvla]",
     "lerobot[hilserl]",
     "lerobot[async]",
     "lerobot[dev]",
 
@@ -245,7 +245,7 @@ def gym_kwargs(self) -> dict:
 class LiberoEnv(EnvConfig):
     task: str = "libero_10"  # can also choose libero_spatial, libero_object, etc.
     fps: int = 30
-    episode_length: int = 520
+    episode_length: int | None = None
     obs_type: str = "pixels_agent_pos"
     render_mode: str = "rgb_array"
     camera_name: str = "agentview_image,robot0_eye_in_hand_image"
@@ -272,6 +272,7 @@ class LiberoEnv(EnvConfig):
             LIBERO_KEY_PIXELS_EYE_IN_HAND: f"{OBS_IMAGES}.image2",
         }
     )
+    control_mode: str = "relative"  # or "absolute"
 
     def __post_init__(self):
         if self.obs_type == "pixels":
 
@@ -19,8 +19,10 @@
 import gymnasium as gym
 from gymnasium.envs.registration import registry as gym_registry
 
+from lerobot.configs.policies import PreTrainedConfig
 from lerobot.envs.configs import AlohaEnv, EnvConfig, LiberoEnv, PushtEnv
 from lerobot.envs.utils import _call_make_env, _download_hub_file, _import_hub_module, _normalize_hub_result
+from lerobot.policies.xvla.configuration_xvla import XVLAConfig
 from lerobot.processor import ProcessorStep
 from lerobot.processor.env_processor import LiberoProcessorStep
 from lerobot.processor.pipeline import PolicyProcessorPipeline
@@ -39,6 +41,7 @@ def make_env_config(env_type: str, **kwargs) -> EnvConfig:
 
 def make_env_pre_post_processors(
     env_cfg: EnvConfig,
+    policy_cfg: PreTrainedConfig,
 ) -> tuple[
     PolicyProcessorPipeline[dict[str, Any], dict[str, Any]],
     PolicyProcessorPipeline[dict[str, Any], dict[str, Any]],
@@ -61,6 +64,10 @@ def make_env_pre_post_processors(
     # Preprocessor and Postprocessor steps are Identity for most environments
     preprocessor_steps: list[ProcessorStep] = []
     postprocessor_steps: list[ProcessorStep] = []
+    if isinstance(policy_cfg, XVLAConfig):
+        from lerobot.policies.xvla.processor_xvla import make_xvla_libero_pre_post_processors
+
+        return make_xvla_libero_pre_post_processors()
 
     # For LIBERO environments, add the LiberoProcessorStep to preprocessor
     if isinstance(env_cfg, LiberoEnv) or "libero" in env_cfg.type:
@@ -136,6 +143,8 @@ def make_env(
             init_states=cfg.init_states,
             gym_kwargs=cfg.gym_kwargs,
             env_cls=env_cls,
+            control_mode=cfg.control_mode,
+            episode_length=cfg.episode_length,
         )
     elif "metaworld" in cfg.type:
         from lerobot.envs.metaworld import create_metaworld_envs
 
@@ -80,10 +80,7 @@ def get_libero_dummy_action():
     return [0, 0, 0, 0, 0, 0, -1]
 
 
-OBS_STATE_DIM = 8
 ACTION_DIM = 7
-AGENT_POS_LOW = -1000.0
-AGENT_POS_HIGH = 1000.0
 ACTION_LOW = -1.0
 ACTION_HIGH = 1.0
 TASK_SUITE_MAX_STEPS: dict[str, int] = {
@@ -103,6 +100,7 @@ def __init__(
         task_suite: Any,
         task_id: int,
         task_suite_name: str,
+        episode_length: int | None = None,
         camera_name: str | Sequence[str] = "agentview_image,robot0_eye_in_hand_image",
         obs_type: str = "pixels",
         render_mode: str = "rgb_array",
@@ -114,6 +112,7 @@ def __init__(
         episode_index: int = 0,
         camera_name_mapping: dict[str, str] | None = None,
         num_steps_wait: int = 10,
+        control_mode: str = "relative",
     ):
         super().__init__()
         self.task_id = task_id
@@ -141,14 +140,19 @@ def __init__(
         self.camera_name_mapping = camera_name_mapping
         self.num_steps_wait = num_steps_wait
         self.episode_index = episode_index
+        self.episode_length = episode_length
         # Load once and keep
         self._init_states = get_task_init_states(task_suite, self.task_id) if self.init_states else None
         self._init_state_id = self.episode_index  # tie each sub-env to a fixed init state
 
         self._env = self._make_envs_task(task_suite, self.task_id)
         default_steps = 500
-        self._max_episode_steps = TASK_SUITE_MAX_STEPS.get(task_suite_name, default_steps)
-
+        self._max_episode_steps = (
+            TASK_SUITE_MAX_STEPS.get(task_suite_name, default_steps)
+            if self.episode_length is None
+            else self.episode_length
+        )
+        self.control_mode = control_mode
         images = {}
         for cam in self.camera_name:
             images[self.camera_name_mapping[cam]] = spaces.Box(
@@ -296,6 +300,15 @@ def reset(self, seed=None, **kwargs):
         # Increasing this value can improve determinism and reproducibility across resets.
         for _ in range(self.num_steps_wait):
             raw_obs, _, _, _ = self._env.step(get_libero_dummy_action())
+
+        if self.control_mode == "absolute":
+            for robot in self._env.robots:
+                robot.controller.use_delta = False
+        elif self.control_mode == "relative":
+            for robot in self._env.robots:
+                robot.controller.use_delta = True
+        else:
+            raise ValueError(f"Invalid control mode: {self.control_mode}")
         observation = self._format_raw_obs(raw_obs)
         info = {"is_success": False}
         return observation, info
@@ -341,8 +354,10 @@ def _make_env_fns(
     task_id: int,
     n_envs: int,
     camera_names: list[str],
+    episode_length: int | None,
     init_states: bool,
     gym_kwargs: Mapping[str, Any],
+    control_mode: str,
 ) -> list[Callable[[], LiberoEnv]]:
     """Build n_envs factory callables for a single (suite, task_id)."""
 
@@ -354,7 +369,9 @@ def _make_env(episode_index: int, **kwargs) -> LiberoEnv:
             task_suite_name=suite_name,
             camera_name=camera_names,
             init_states=init_states,
+            episode_length=episode_length,
             episode_index=episode_index,
+            control_mode=control_mode,
             **local_kwargs,
         )
 
@@ -374,6 +391,8 @@ def create_libero_envs(
     camera_name: str | Sequence[str] = "agentview_image,robot0_eye_in_hand_image",
     init_states: bool = True,
     env_cls: Callable[[Sequence[Callable[[], Any]]], Any] | None = None,
+    control_mode: str = "relative",
+    episode_length: int | None = None,
 ) -> dict[str, dict[int, Any]]:
     """
     Create vectorized LIBERO environments with a consistent return shape.
@@ -415,12 +434,14 @@ def create_libero_envs(
         for tid in selected:
             fns = _make_env_fns(
                 suite=suite,
+                episode_length=episode_length,
                 suite_name=suite_name,
                 task_id=tid,
                 n_envs=n_envs,
                 camera_names=camera_names,
                 init_states=init_states,
                 gym_kwargs=gym_kwargs,
+                control_mode=control_mode,
             )
             out[suite_name][tid] = env_cls(fns)
             print(f"Built vec env | suite={suite_name} | task_id={tid} | n_envs={n_envs}")
 
@@ -104,6 +104,107 @@ def build(self, params: dict) -> torch.optim.Optimizer:
         return torch.optim.SGD(params, **kwargs)
 
 
+@OptimizerConfig.register_subclass("xvla-adamw")
+@dataclass
+class XVLAAdamWConfig(OptimizerConfig):
+    """Custom AdamW optimizer for XVLA with differential learning rates.
+
+    The Vision-Language Model (VLM) is trained with 1/10 of the base learning rate
+    for stable optimization, while all other components use the full LR.
+
+    This LR ratio is crucial for achieving strong and stable finetuning performance.
+
+    Soft-prompts can optionally use a separate learning rate with warm-up support.
+    Set `soft_prompt_lr_scale` to a value < 1.0 (e.g., 0.1) to start soft-prompts
+    at a lower LR. Combine with a warmup scheduler for optimal results.
+
+    Note:
+        Completely matching official reported performance may require an additional
+        warm-up LR schedule for soft-prompts, which can bring minor improvements.
+        When `soft_prompt_warmup_lr_scale` is set, soft-prompts start at
+        `lr * soft_prompt_warmup_lr_scale` and should be warmed up via the scheduler.
+
+    Parameter Groups:
+        - Group 0 (vlm): VLM parameters at lr * 0.1, weight_decay * 0.1
+        - Group 1 (soft_prompts): Soft-prompt parameters at lr * soft_prompt_lr_scale
+        - Group 2 (other): All other parameters at full lr
+    """
+
+    lr: float = 1e-4
+    betas: tuple[float, float] = (0.9, 0.99)
+    eps: float = 1e-8
+    weight_decay: float = 0.0
+    grad_clip_norm: float = 10.0
+    # Soft-prompt specific settings
+    soft_prompt_lr_scale: float = 1.0  # Scale factor for soft-prompt LR (1.0 = same as base LR)
+    soft_prompt_warmup_lr_scale: float | None = None  # If set, start soft-prompts at this scale (e.g., 0.01)
+
+    def build(self, params: dict) -> torch.optim.Optimizer:
+        """
+        Build AdamW optimizer with differential learning rates.
+
+        Expects `named_parameters()` as input (dict of name -> param).
+        Applies:
+        - lr * 0.1 for all VLM-related parameters
+        - lr * soft_prompt_lr_scale for soft-prompt parameters (with optional warmup)
+        - full lr for all other parameters
+
+        Args:
+            params: Dictionary of parameter names to parameters (from named_parameters())
+
+        Returns:
+            AdamW optimizer with parameter groups for VLM, soft-prompts, and other components
+        """
+        assert isinstance(params, dict), "Custom LR optimizer requires `named_parameters()` as inputs."
+
+        vlm_group, soft_prompt_group, other_group = [], [], []
+        for name, p in params.items():
+            if not p.requires_grad:
+                continue
+            if "vlm" in name.lower():
+                vlm_group.append(p)
+            elif "soft_prompt" in name.lower():
+                soft_prompt_group.append(p)
+            else:
+                other_group.append(p)
+
+        # Determine soft-prompt LR
+        soft_prompt_lr = self.lr * self.soft_prompt_lr_scale
+        if self.soft_prompt_warmup_lr_scale is not None:
+            # Start at warmup scale, scheduler will warm up to soft_prompt_lr
+            soft_prompt_lr = self.lr * self.soft_prompt_warmup_lr_scale
+
+        param_groups = [
+            {
+                "params": vlm_group,
+                "lr": self.lr * 0.1,
+                "weight_decay": self.weight_decay * 0.1,
+                "name": "vlm",
+            },
+            {
+                "params": soft_prompt_group,
+                "lr": soft_prompt_lr,
+                "weight_decay": self.weight_decay,
+                "name": "soft_prompts",
+            },
+            {
+                "params": other_group,
+                "lr": self.lr,
+                "weight_decay": self.weight_decay,
+                "name": "other",
+            },
+        ]
+
+        # Filter out empty groups
+        param_groups = [g for g in param_groups if len(g["params"]) > 0]
+
+        return torch.optim.AdamW(
+            param_groups,
+            betas=self.betas,
+            eps=self.eps,
+        )
+
+
 @OptimizerConfig.register_subclass("multi_adam")
 @dataclass
 class MultiAdamConfig(OptimizerConfig):
 
@@ -21,6 +21,7 @@
 from .smolvla.processor_smolvla import SmolVLANewLineProcessor
 from .tdmpc.configuration_tdmpc import TDMPCConfig as TDMPCConfig
 from .vqbet.configuration_vqbet import VQBeTConfig as VQBeTConfig
+from .xvla.configuration_xvla import XVLAConfig as XVLAConfig
 
 __all__ = [
     "ACTConfig",
@@ -31,4 +32,5 @@
     "TDMPCConfig",
     "VQBeTConfig",
     "GrootConfig",
+    "XVLAConfig",
 ]
@@ -41,6 +41,7 @@
 from lerobot.policies.tdmpc.configuration_tdmpc import TDMPCConfig
 from lerobot.policies.utils import validate_visual_features_consistency
 from lerobot.policies.vqbet.configuration_vqbet import VQBeTConfig
+from lerobot.policies.xvla.configuration_xvla import XVLAConfig
 from lerobot.processor import PolicyAction, PolicyProcessorPipeline
 from lerobot.processor.converters import (
     batch_to_transition,
@@ -108,6 +109,10 @@ def get_policy_class(name: str) -> type[PreTrainedPolicy]:
         from lerobot.policies.groot.modeling_groot import GrootPolicy
 
         return GrootPolicy
+    elif name == "xvla":
+        from lerobot.policies.xvla.modeling_xvla import XVLAPolicy
+
+        return XVLAPolicy
     else:
         try:
             return _get_policy_cls_from_policy_name(name=name)
@@ -154,6 +159,8 @@ def make_policy_config(policy_type: str, **kwargs) -> PreTrainedConfig:
         return RewardClassifierConfig(**kwargs)
     elif policy_type == "groot":
         return GrootConfig(**kwargs)
+    elif policy_type == "xvla":
+        return XVLAConfig(**kwargs)
     else:
         try:
             config_cls = PreTrainedConfig.get_choice_class(policy_type)
@@ -337,6 +344,15 @@ def make_pre_post_processors(
             config=policy_cfg,
             dataset_stats=kwargs.get("dataset_stats"),
         )
+    elif isinstance(policy_cfg, XVLAConfig):
+        from lerobot.policies.xvla.processor_xvla import (
+            make_xvla_pre_post_processors,
+        )
+
+        processors = make_xvla_pre_post_processors(
+            config=policy_cfg,
+            dataset_stats=kwargs.get("dataset_stats"),
+        )
 
     else:
         try:
@@ -414,8 +430,7 @@ def make_policy(
             raise ValueError("env_cfg cannot be None when ds_meta is not provided")
         features = env_to_policy_features(env_cfg)
 
-    if not cfg.output_features:
-        cfg.output_features = {key: ft for key, ft in features.items() if ft.type is FeatureType.ACTION}
+    cfg.output_features = {key: ft for key, ft in features.items() if ft.type is FeatureType.ACTION}
     if not cfg.input_features:
         cfg.input_features = {key: ft for key, ft in features.items() if key not in cfg.output_features}
     kwargs["config"] = cfg