inclusionAI
diff --git a/‎.github/workflows/deploy-docs.yml‎
Lines changed: 1 addition & 3 deletions b/‎.github/workflows/deploy-docs.yml‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎.github/workflows/format-check.yml‎
Lines changed: 6 additions & 14 deletions b/‎.github/workflows/format-check.yml‎
Lines changed: 6 additions & 14 deletions
diff --git a/‎AGENTS.md‎
Lines changed: 222 additions & 202 deletions b/‎AGENTS.md‎
Lines changed: 222 additions & 202 deletions
diff --git a/‎README.md‎
Lines changed: 3 additions & 4 deletions b/‎README.md‎
Lines changed: 3 additions & 4 deletions
diff --git a/‎areal/api/cli_args.py‎
Lines changed: 140 additions & 16 deletions b/‎areal/api/cli_args.py‎
Lines changed: 140 additions & 16 deletions
@@ -32,16 +32,14 @@ jobs:
         uses: actions/cache@v3
         with:
           path: ~/.cache/pip
-          key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
+          key: ${{ runner.os }}-pip-${{ hashFiles('**/pyproject.toml') }}
           restore-keys: |
             ${{ runner.os }}-pip-
 
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
           pip install --upgrade jupyter-book==1.0.4.post1
-          # Install additional dependencies if you have a requirements.txt
-          # pip install -r requirements.txt
 
       - name: Build the book
         run: |
 
@@ -18,22 +18,14 @@ jobs:
       - name: Install Python dependencies
         run: |
           python3 -m pip install --upgrade pip
-          pip install ruff==0.14.1 black==25.1.0 clang-format==19.1.7 autoflake==2.3.1
+          pip install ruff==0.14.1 clang-format==19.1.7
 
-      - name: Check autoflake formatting
+      - name: Check Python formatting and linting with ruff
         run: |
-          autoflake --check -r areal/
-          autoflake --check -r examples/
-          autoflake --check -r docs/
-
-      - name: Check Python formatting with ruff
-        run: |
-          ruff check --select I areal/
-          ruff check --select I examples/
-          ruff check --select I docs/
-
-      - name: Check Python formatting with black
-        run: black --check .
+          ruff check areal/
+          ruff check examples/
+          ruff format --check areal/
+          ruff format --check examples/
 
       - name: Check C++ formatting
         run: |
 
@@ -35,9 +35,8 @@ our project just as you enjoy real-world milk tea (cheers).
 ## 📰 News
 
 **\[2025/08/30\]** Introducing ASearcher, a state-of-the-art search agent built with
-AReaL's end-to-end asynchronous RL training. Check out the
-[paper](https://arxiv.org/pdf/2508.07976) and the
-[open-source repository](https://github.com/inclusionAI/ASearcher)!
+AReaL's end-to-end asynchronous RL training. Check out the [paper](assets/paper.pdf) and
+the [open-source repository](https://github.com/inclusionAI/ASearcher)!
 
 **\[2025/07/31\] (AReaL-lite)** We introduce AReaL-lite, a **lightweight** version of
 AReaL designed specifically for AI researchers and rapid prototyping. AReaL-lite
@@ -56,7 +55,7 @@ asynchronous RL training, which achieves **2.77× speedup while delivering compa
 superior training performance** compared to synchronous systems. Furthermore,
 asynchronous RL significantly simplifies multi-turn agentic RL training setup! Check out
 [our v0.3 overview blog](/blog/AReaL_v0_3.md) and the
-[research paper](https://arxiv.org/pdf/2505.24298).
+[research paper](assets/paper.pdf).
 
 **\[2025/03/31\] (v0.2, boba)** Introducing our milestone release—boba! Please call it
 A-ReaL-boba! This release features significantly faster training with SGLang support and
 
@@ -4,23 +4,30 @@
 from dataclasses import MISSING as dataclass_missing
 from dataclasses import asdict, dataclass, field, fields
 from pathlib import Path
-from typing import Any
+from typing import Any, TypeVar
 
 import uvloop
 import yaml
 from hydra import compose as hydra_compose
 from hydra import initialize as hydra_init
 from hydra.core.global_hydra import GlobalHydra
 from omegaconf import MISSING, DictConfig, OmegaConf
+from transformers import PreTrainedTokenizerFast
 
 from areal.platforms import current_platform
 from areal.utils import logging, name_resolve, pkg_version
+from areal.utils.constants import (
+    PROX_LOGP_METHOD_RECOMPUTE,
+    PROX_LOGP_METHODS_ALL,
+)
 from areal.utils.pkg_version import is_version_less
 
 uvloop.install()
 
 logger = logging.getLogger("CLI args")
 
+ConfigT = TypeVar("ConfigT")
+
 
 @dataclass
 class NormConfig:
@@ -157,12 +164,25 @@ class GenerationHyperparameters:
             )
         },
     )
+    lora_name: str = field(
+        default="",
+        metadata={"help": "Lora name to be used for this generation."},
+    )
 
     def new(self, **kwargs):
         args = asdict(self)
         args.update(kwargs)
         return GenerationHyperparameters(**args)
 
+    def new_with_stop_and_pad_token_ids(self, tokenizer: PreTrainedTokenizerFast):
+        """Create a new generation hyperparameters with stop and pad token ids added."""
+        new_stop_token_ids = self.stop_token_ids.copy()
+        if tokenizer.pad_token_id not in new_stop_token_ids:
+            new_stop_token_ids.append(tokenizer.pad_token_id)
+        if tokenizer.eos_token_id not in new_stop_token_ids:
+            new_stop_token_ids.append(tokenizer.eos_token_id)
+        return self.new(stop_token_ids=new_stop_token_ids)
+
     def to_openai_args_dict(
         self, exclude_args: list[str] | None = None
     ) -> dict[str, Any]:
@@ -402,7 +422,6 @@ class SchedulingSpec:
         default_factory=dict,
         metadata={"help": "Environment variables for the container"},
     )
-    # cmd
     cmd: str | None = field(
         default=None,
         metadata={
@@ -488,13 +507,32 @@ class TrainEngineConfig:
         default="lora",
         metadata={"help": "peft method type. Only LoRA is supported for now."},
     )
-    scheduling_spec: SchedulingSpec = field(
-        default_factory=lambda: SchedulingSpec(
-            cmd="python -m areal.scheduler.rpc.rpc_server"
+    scheduling_spec: tuple[SchedulingSpec, ...] = field(
+        default_factory=lambda: (
+            SchedulingSpec(cmd="python -m areal.scheduler.rpc.rpc_server"),
         ),
-        metadata={"help": "train engine schedule specs"},
+        metadata={
+            "help": "Train engine schedule specs. Can accept 1 or 2 SchedulingSpec: "
+            "if 1 spec provided, it's used for both worker and engine, engine is embedded in the worker; "
+            "if 2 specs provided, first one is for worker, second one is for engine. "
+            "Currently only used by the TrainController."
+        },
+    )
+    scheduling_strategy: SchedulingStrategy = field(
+        default_factory=SchedulingStrategy,
+        metadata={
+            "help": "The scheduling strategy of this TrainEngine, either separation or colocation. "
+            "Currently only used by the TrainController."
+        },
     )
-    scheduling_strategy: SchedulingStrategy = field(default_factory=SchedulingStrategy)
+
+    def __post_init__(self):
+        """Validate scheduling_spec length."""
+        if len(self.scheduling_spec) not in (1, 2):
+            raise ValueError(
+                f"scheduling_spec must contain 1 or 2 SchedulingSpec, "
+                f"got {len(self.scheduling_spec)}"
+            )
 
 
 @dataclass
@@ -605,6 +643,18 @@ class PPOActorConfig(TrainEngineConfig):
             "choices": ["token", "sequence"],
         },
     )
+    # Proximal Log-Probability Computation Method
+    prox_logp_method: str = field(
+        default=PROX_LOGP_METHOD_RECOMPUTE,
+        metadata={
+            "help": "Method for computing proximal policy log-probabilities in decoupled PPO. "
+            "Only effective when use_decoupled_loss=True. Options: "
+            "'recompute' (default): Standard decoupled PPO, recompute proximal policy via forward pass. "
+            "'loglinear': Use log-linear interpolation to approximate proximal policy (skip forward pass). "
+            "'metrics': Like 'recompute', but also compute approximation metrics for evaluation.",
+            "choices": PROX_LOGP_METHODS_ALL,
+        },
+    )
     # Advanced Options
     dynamic_sampling: bool = field(
         default=False,
@@ -702,6 +752,8 @@ class vLLMConfig:
     )
     enable_sleep_mode: bool = False
     uvicorn_log_level: str = "warning"
+    enable_lora: bool = False
+    lora_modules: str = ""
 
     @staticmethod
     def build_args(
@@ -726,6 +778,18 @@ def build_args(
             args["port"] = port
         if host is not None:
             args["host"] = host
+        # handle lora modules separately
+        lm = args.get("lora_modules")
+        if lm:
+            if isinstance(lm, str):
+                lm = [lm]
+            if isinstance(lm, (list, tuple)):
+                try:
+                    args["lora_modules"] = [
+                        json.dumps(json.loads(s), separators=(",", ":")) for s in lm
+                    ]
+                except json.JSONDecodeError as e:
+                    raise ValueError(f"Invalid JSON string in lora_modules: {e}") from e
         return args
 
     @staticmethod
@@ -977,13 +1041,36 @@ class InferenceEngineConfig:
             "help": "The grace period after calling /pause_generation. Wait until all requests have been dropped."
         },
     )
-    scheduling_spec: SchedulingSpec = field(
-        default_factory=lambda: SchedulingSpec(
-            cmd="python -m areal.scheduler.rpc.rpc_server"
+    scheduling_spec: tuple[SchedulingSpec, ...] = field(
+        default_factory=lambda: (
+            SchedulingSpec(cmd="python -m areal.scheduler.rpc.rpc_server"),
         ),
-        metadata={"help": "inference engine schedule specs"},
+        metadata={
+            "help": "inference engine schedule specs. Can accept 1 or 2 SchedulingSpec: "
+            "if 1 spec provided, it's used for both worker and engine, engine is embedded in the worker; "
+            "if 2 specs provided, first one is for worker, second one is for engine. "
+            "Currently only used by the RolloutController."
+        },
+    )
+    scheduling_strategy: SchedulingStrategy = field(
+        default_factory=SchedulingStrategy,
+        metadata={
+            "help": "The scheduling strategy of this TrainEngine, either separation or colocation. "
+            "Currently only used by the RolloutController."
+        },
+    )
+    use_lora: bool = field(
+        default=False,
+        metadata={"help": "Whether to use LoRA. Should be same as actors LORA option."},
     )
-    scheduling_strategy: SchedulingStrategy = field(default_factory=SchedulingStrategy)
+
+    def __post_init__(self):
+        """Validate scheduling_spec length."""
+        if len(self.scheduling_spec) not in (1, 2):
+            raise ValueError(
+                f"scheduling_spec must contain 1 or 2 SchedulingSpec, "
+                f"got {len(self.scheduling_spec)}"
+            )
 
 
 @dataclass
@@ -1148,6 +1235,15 @@ class PerfTracerConfig:
             )
         },
     )
+    profile_steps: list[int] | None = field(
+        default=None,
+        metadata={
+            "help": (
+                "List of step numbers at which to capture detailed profiling traces. "
+                "If None, no detailed profiling traces are captured."
+            )
+        },
+    )
     session_tracer: SessionTracerConfig | None = field(
         default=None,
         metadata={"help": "Session tracing configuration."},
@@ -1223,7 +1319,7 @@ class SchedulerConfig:
 
 
 @dataclass
-class DatasetConfig:
+class _DatasetConfig:
     """Configuration for dataset loading and preprocessing."""
 
     path: str = field(
@@ -1262,6 +1358,27 @@ class DatasetConfig:
     )
 
 
+@dataclass
+class TrainDatasetConfig(_DatasetConfig):
+    """Configuration for training dataset loading and preprocessing."""
+
+
+@dataclass
+class ValidDatasetConfig(_DatasetConfig):
+    """Configuration for validation dataset loading and preprocessing.
+
+    It has different default values with `TrainDatasetConfig`.
+    `shuffle` and `drop_last` default to False.
+    """
+
+    shuffle: bool = field(
+        default=False, metadata={"help": "Whether to shuffle the dataset"}
+    )
+    drop_last: bool = field(
+        default=False, metadata={"help": "Drop the last incomplete batch"}
+    )
+
+
 @dataclass
 class SlurmLauncherConfig:
     """Configuration for launching the training jobs with Slurm."""
@@ -1359,6 +1476,13 @@ class BaseExperimentConfig:
         metadata={"help": "Pattern-based GPU parallel strategy allocation mode. "},
     )
     seed: int = field(default=1, metadata={"help": "Random seed for reproducibility."})
+    enable_offload: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to enable training offload using torch_memory_saver. "
+            "This requires setting up the environment for TMS (e.g., via LD_PRELOAD)."
+        },
+    )
     total_train_epochs: int = field(
         default=1, metadata={"help": "Total number of epochs to train the model."}
     )
@@ -1381,8 +1505,8 @@ class BaseExperimentConfig:
         metadata={"help": "Path to the tokenizer."},
     )
 
-    train_dataset: DatasetConfig = field(default_factory=DatasetConfig)
-    valid_dataset: DatasetConfig | None = field(default=None)
+    train_dataset: TrainDatasetConfig = field(default_factory=TrainDatasetConfig)
+    valid_dataset: ValidDatasetConfig | None = field(default=None)
 
     saver: SaverConfig = field(default_factory=SaverConfig)
     evaluator: EvaluatorConfig = field(default_factory=EvaluatorConfig)
@@ -1466,7 +1590,7 @@ def to_structured_cfg(cfg, config_cls):
     return cfg
 
 
-def load_expr_config(argv: list[str], config_cls):
+def load_expr_config(argv: list[str], config_cls: type[ConfigT]) -> tuple[ConfigT, str]:
     cfg, config_file = parse_cli_args(argv)
     cfg = to_structured_cfg(cfg, config_cls=config_cls)
     cfg = OmegaConf.to_object(cfg)