Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions examples/grpo_frozen_lake/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Frozen Lake

# Prepare the environment and data

```
pip install gymnasium[toy_text]
```
85 changes: 85 additions & 0 deletions examples/grpo_frozen_lake/frozen_lake.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# TODO: check this config again after all
project: "FrozenLake"
name: "test-trinity-0.6B"
checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints}
algorithm:
algorithm_type: grpo
repeat_times: 8
optimizer:
lr: 1e-6
policy_loss_fn_args:
loss_agg_mode: "seq-mean-token-sum"
clip_range_low: 0.2
clip_range_high: 0.28
kl_loss_fn_args:
kl_coef: 0.0
model:
model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen3-0.6B}
max_prompt_tokens: 20480
max_response_tokens: 4096
temperature: 0.7
cluster:
node_num: 1
gpu_per_node: 8
buffer:
total_epochs: 1
batch_size: 32
explorer_input:
taskset:
name: frozenlake
storage_type: file
path: ${oc.env:TRINITY_TASKSET_PATH}
split: train
workflow_args:
max_steps: 10
is_slippery: false
eval_tasksets:
- name: frozenlake
storage_type: file
path: ${oc.env:TRINITY_TASKSET_PATH}
split: test
workflow_args:
max_steps: 10
is_slippery: false
rollout_args:
n: 4
top_p: 0.8
top_k: 20
default_workflow_type: 'frozen_lake_workflow'
explorer:
eval_on_startup: false
eval_interval: 10
runner_per_model: 8
rollout_model:
engine_num: 2
tensor_parallel_size: 2
enable_thinking: true
enable_chunked_prefill: true
enforce_eager: false
dtype: bfloat16
seed: 42
gpu_memory_utilization: 0.85
trainer:
trainer_type: 'verl'
save_interval: 40
use_dynamic_bsz: true
max_token_len_per_gpu: 16384
ulysses_sequence_parallel_size: 2
trainer_config:
actor_rollout_ref:
hybrid_engine: true
model:
use_remove_padding: true
enable_gradient_checkpointing: true
actor:
clip_ratio_high: 0.28
fsdp_config:
param_offload: true
optimizer_offload: true
ref:
fsdp_config:
param_offload: true
synchronizer:
sync_method: nccl
sync_interval: 2
sync_timeout: 1200
91 changes: 91 additions & 0 deletions examples/grpo_frozen_lake/get_frozen_lake_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
"""
Modified from https://github.com/rllm-org/rllm/blob/main/examples/frozenlake/prepare_frozenlake_data.py
"""
import os

import numpy as np
import pandas as pd

from trinity.common.constants import TASKSET_PATH_ENV_VAR

if os.environ.get(TASKSET_PATH_ENV_VAR) is not None:
DATA_ROOT_DIR = os.path.dirname(os.environ.get(TASKSET_PATH_ENV_VAR))
else:
DATA_ROOT_DIR = os.path.join(os.path.dirname(__file__), "data")


def save_dataset_to_local(name: str, data: list[dict], split: str = "default") -> str:
"""Save dataset directly to local DATA_PATH.
Args:
name: Name of the dataset
data: List of dictionaries containing the dataset examples
split: Split name (e.g., 'train', 'test', 'default')
Returns:
str: Path to the saved parquet file
"""
dataset_dir = os.path.join(DATA_ROOT_DIR, name)
os.makedirs(dataset_dir, exist_ok=True)

# Convert to DataFrame and save
data_df = pd.DataFrame(data)
dataset_path = os.path.join(dataset_dir, f"{split}.parquet")
data_df.to_parquet(dataset_path)

print(
f"Saved dataset '{name}' split '{split}' with {len(data)} examples at {dataset_path}. Make sure to set the environment variable {TASKSET_PATH_ENV_VAR} to {DATA_ROOT_DIR}/{name}."
)

return dataset_path


def prepare_frozenlake_data(train_size=10000, test_size=100):
"""
Prepare and save FrozenLake datasets for training and testing.
Args:
train_size (int): Number of training examples to generate
test_size (int): Number of test examples to generate
Returns:
tuple: (train_data, test_data) - Lists of data dictionaries
"""
# Set random seed for reproducibility
np.random.seed(42)

# Generate random parameters for train and test sets
train_seeds = np.random.randint(0, 100000, size=train_size)
test_seeds = np.random.randint(0, 100000, size=test_size)
train_sizes = np.random.randint(2, 10, size=train_size)
test_sizes = np.random.randint(2, 10, size=test_size)
train_ps = np.random.uniform(0.6, 0.85, size=train_size)
test_ps = np.random.uniform(0.6, 0.85, size=test_size)

def frozenlake_process_fn(seed, size, p, idx):
"""Process function to create FrozenLake task instances."""
return {"seed": seed, "size": size, "p": p, "index": idx, "uid": f"{seed}_{size}_{p}"}

# Create train and test data
train_data = [
frozenlake_process_fn(seed, train_sizes[idx], train_ps[idx], idx)
for idx, seed in enumerate(train_seeds)
]
test_data = [
frozenlake_process_fn(seed, test_sizes[idx], test_ps[idx], idx)
for idx, seed in enumerate(test_seeds)
]

# Save datasets directly to local DATA_PATH
save_dataset_to_local("frozenlake", train_data, "train")
save_dataset_to_local("frozenlake", test_data, "test")

return train_data, test_data


if __name__ == "__main__":
train_data, test_data = prepare_frozenlake_data()
print(f"Train dataset: {len(train_data)} examples")
print(f"Test dataset: {len(test_data)} examples")
print("Sample train example:", train_data[0])
print("Sample test example:", test_data[0])
13 changes: 13 additions & 0 deletions trinity/common/models/vllm_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,7 @@ async def convert_messages_to_experience(
"""Convert a list of messages into an experience."""
if self.tokenizer is None:
await self._initialize_tokenizer()
is_truncated = False
if self.chat_template is None:
self.chat_template = self.tokenizer.get_chat_template()
token_ids, action_mask, prompt_length = self.action_mask_method(
Expand All @@ -370,12 +371,24 @@ async def convert_messages_to_experience(
chat_template=self.chat_template,
enable_thinking=self.enable_thinking,
) # (seq_length, ), (seq_length, )

if len(token_ids) > self.config.max_model_len - 1:
is_truncated = True
self.logger.warning(
f"Warning: {len(token_ids) = } exceeds the length limit {self.config.max_model_len-1 = }"
)
token_ids = token_ids[: self.config.max_model_len - 1]
action_mask = action_mask[: self.config.max_model_len - 1]

logprobs = await self.logprobs(token_ids=token_ids.tolist()) # (seq_length - 1,)
return Experience(
tokens=token_ids,
logprobs=logprobs[prompt_length - 1 :],
prompt_length=prompt_length,
action_mask=action_mask[prompt_length:], # Exclude the prompt tokens
info={"is_truncated": is_truncated},
prompt_text=self.tokenizer.decode(token_ids[:prompt_length]),
response_text=self.tokenizer.decode(token_ids[prompt_length:]),
)

async def shutdown(self):
Expand Down
2 changes: 2 additions & 0 deletions trinity/common/workflows/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
RAFTReflectAlfworldWorkflow,
)
from trinity.common.workflows.envs.email_searcher.workflow import EmailSearchWorkflow
from trinity.common.workflows.envs.frozen_lake.workflow import FrozenLakeWorkflow
from trinity.common.workflows.envs.sciworld.sciworld_workflow import SciWorldWorkflow
from trinity.common.workflows.envs.webshop.webshop_workflow import WebShopWorkflow
from trinity.common.workflows.eval_workflow import (
Expand Down Expand Up @@ -94,4 +95,5 @@
"SimpleMMWorkflow",
"RubricJudgeWorkflow",
"AgentScopeWorkflowAdapter",
"FrozenLakeWorkflow",
]
Loading
Loading