Initial commit

7f6cc211 · jerrrrry · 7f6cc211 · 7f6cc211 · 7f6cc211 · 7f6cc211
Commit 7f6cc211 authored Aug 05, 2025 by jerrrrry
20 changed files
--- a/requirements-npu.txt
+++ b/requirements-npu.txt
+# requirements.txt records the full set of dependencies for development
+accelerate
+codetiming
+datasets
+dill
+hydra-core
+numpy<2.0.0
+pandas
+peft
+pyarrow>=15.0.0
+pybind11
+pylatexenc
+tensordict>=0.8.0,<=0.9.1,!=0.9.0
+transformers==4.52.4
+ray==2.46.0
+wandb
+mathruler
+torchdata
+einops
+qwen_vl_utils
+torchvision==0.20.1
--- a/requirements.txt
+++ b/requirements.txt
+# requirements.txt records the full set of dependencies for development
+accelerate
+codetiming
+datasets
+dill
+flash-attn
+hydra-core
+liger-kernel
+numpy<2.0.0
+pandas
+peft
+pyarrow>=19.0.0
+pybind11
+pylatexenc
+pre-commit
+ray[default]
+tensordict>=0.8.0,<=0.9.1,!=0.9.0
+torchdata
+transformers
+# vllm==0.8.4
+wandb
+packaging>=20.0
+uvicorn
+fastapi
+latex2sympy2_extended
+math_verify
--- a/requirements_sglang.txt
+++ b/requirements_sglang.txt
+# requirements.txt records the full set of dependencies for development
+accelerate
+codetiming
+datasets
+dill
+flash-attn
+hydra-core
+numpy<2.0.0
+pandas
+peft
+pyarrow>=19.0.0
+pybind11
+pylatexenc
+ray[default]>=2.10
+tensordict>=0.8.0,<=0.9.1,!=0.9.0
+torchdata
+torchvision
+transformers
+wandb
+sglang[all]==0.4.6.post5
+torch-memory-saver>=0.0.5
+huggingface_hub
--- a/runverl.sh
+++ b/runverl.sh
+export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+#export ROCR_VISIBLE_DEVICES=$HIP_VISIBLE_DEVICES
+#export CUDA_VISIBLE_DEVICES=$HIP_VISIBLE_DEVICES
+export RAY_EXPERIMENTAL_NOSET_HIP_VISIBLE_DEVICES=1
+#unset ROCR_VISIBLE_DEVICES
+# PYTHONUNBUFFERED=1 python3 -m verl.trainer.main_ppo \
+#  data.train_files=/data/gsm8k/train.parquet \
+#  data.val_files=/data/gsm8k/test.parquet \
+#  data.train_batch_size=256 \
+#  data.max_prompt_length=512 \
+#  data.max_response_length=256 \
+#  actor_rollout_ref.model.path=/model/Qwen2.5-0.5B-Instruct \
+#  actor_rollout_ref.actor.optim.lr=1e-6 \
+#  actor_rollout_ref.actor.ppo_mini_batch_size=64 \
+#  actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \
+#  actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=4 \
+#  actor_rollout_ref.rollout.tensor_model_parallel_size=1 \
+#  actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \
+#  actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=4 \
+#  critic.optim.lr=1e-5 \
+#  critic.model.path=/model/Qwen2.5-0.5B-Instruct \
+#  critic.ppo_micro_batch_size_per_gpu=4 \
+#  algorithm.kl_ctrl.kl_coef=0.001 \
+#  trainer.logger=console \
+#  trainer.val_before_train=False \
+#  trainer.n_gpus_per_node=1 \
+#  trainer.nnodes=1 \
+#  trainer.save_freq=10 \
+#  trainer.test_freq=10 \
+#  trainer.total_epochs=15 2>&1 | tee verl_demo.log
+PYTHONUNBUFFERED=1 python3 -m verl.trainer.main_ppo \
+ algorithm.adv_estimator=grpo \
+ data.train_files=/data/gsm8k/train.parquet \
+ data.val_files=/data/gsm8k/test.parquet \
+ data.train_batch_size=1024 \
+ data.max_prompt_length=512 \
+ data.max_response_length=1024 \
+ data.filter_overlong_prompts=True \
+ data.truncation='error' \
+ actor_rollout_ref.model.path=/model/Qwen2.5-0.5B-Instruct \
+ actor_rollout_ref.actor.optim.lr=1e-6 \
+ actor_rollout_ref.model.use_remove_padding=True \
+ actor_rollout_ref.actor.ppo_mini_batch_size=256 \
+ actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=32 \
+ actor_rollout_ref.actor.use_kl_loss=True \
+ actor_rollout_ref.actor.kl_loss_coef=0.001 \
+ actor_rollout_ref.actor.kl_loss_type=low_var_kl \
+ actor_rollout_ref.actor.entropy_coeff=0 \
+ actor_rollout_ref.model.enable_gradient_checkpointing=True \
+ actor_rollout_ref.actor.fsdp_config.param_offload=False \
+ actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
+ actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=32 \
+ actor_rollout_ref.rollout.tensor_model_parallel_size=1 \
+ actor_rollout_ref.rollout.name=vllm \
+ actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \
+ actor_rollout_ref.rollout.n=5 \
+ actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=32 \
+ actor_rollout_ref.ref.fsdp_config.param_offload=True \
+ trainer.critic_warmup=0 \
+ trainer.logger=['console'] \
+ trainer.project_name='test' \
+ trainer.experiment_name='qwen2_5_0_5b_function_rm' \
+ trainer.n_gpus_per_node=8 \
+ trainer.default_local_dir=/verl/qwen2_5_14b_verl_grpo_8 \
+ trainer.nnodes=1 \
+ trainer.save_freq=5 \
+ trainer.test_freq=5 \
+ trainer.total_epochs=1
+# export CUDA_VISIBLE_DEVICES=0
+# export RAY_DISABLE_GPU_AUTODETECTION=1
+# PYTHONUNBUFFERED=1 python3 -m verl.trainer.main_ppo \
+#  data.train_files=/data/gsm8k/train.parquet \
+#  data.val_files=/data/gsm8k/test.parquet \
+#  data.train_batch_size=256 \
+#  data.max_prompt_length=512 \
+#  data.max_response_length=256 \
+#  actor_rollout_ref.model.path=/model/Qwen2.5-0.5B-Instruct \
+#  actor_rollout_ref.actor.optim.lr=1e-6 \
+#  actor_rollout_ref.actor.ppo_mini_batch_size=64 \
+#  actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=4 \
+#  actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=4 \
+#  actor_rollout_ref.rollout.tensor_model_parallel_size=1 \
+#  actor_rollout_ref.rollout.gpu_memory_utilization=0.4 \
+#  actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=4 \
+#  critic.optim.lr=1e-5 \
+#  critic.model.path=/model/Qwen2.5-0.5B-Instruct \
+#  critic.ppo_micro_batch_size_per_gpu=4 \
+#  algorithm.kl_ctrl.kl_coef=0.001 \
+#  trainer.logger=console \
+#  trainer.val_before_train=False \
+#  trainer.n_gpus_per_node=1 \
+#  trainer.nnodes=1 \
+#  trainer.save_freq=10 \
+#  trainer.test_freq=10 \
+#  trainer.total_epochs=15 2>&1 | tee verl_demo.log
\ No newline at end of file
--- a/scripts/__init__.py
+++ b/scripts/__init__.py
+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/scripts/converter_hf_to_mcore.py
+++ b/scripts/converter_hf_to_mcore.py
+# Copyright 2025 Bytedance Ltd. and/or its affiliates
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+import os
+import warnings
+from contextlib import contextmanager
+from importlib.metadata import version
+from typing import Any, Callable, ContextManager, Optional
+import numpy as np
+import torch
+import torch.distributed as dist
+from accelerate import init_empty_weights
+from megatron.core import dist_checkpointing
+from megatron.core import parallel_state as mpu
+from megatron.core.dist_checkpointing.mapping import ShardedTensor
+from megatron.core.dist_checkpointing.serialization import StrictHandling
+from megatron.core.models.gpt.gpt_model import ModelType
+from megatron.core.tensor_parallel.random import model_parallel_cuda_manual_seed
+from packaging.version import Version
+from transformers import AutoConfig
+from verl.model_merger.megatron_model_merger import get_dynamic_pipeline_shards
+from verl.models.mcore import hf_to_mcore_config
+from verl.utils.device import get_device_name, get_torch_device
+from verl.utils.megatron_utils import get_model
+def _init_args():
+    """
+    Examples:
+    1. single rank conversion for any model:
+        > python converter_hf_to_mcore.py --hf_model_path %{hf_model} --output_path ${output_path}
+    2. distributed conversion for DeepseekV3 671B:
+        > torchrun --nproc_per_node 1 --nnodes 4 --node_rank ${RANK} converter_hf_to_mcore.py \
+          --hf_model_path %{hf_model} --output_path ${output_path}
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--hf_model_path", type=str, required=True, help="The path for the huggingface model")
+    parser.add_argument("--output_path", type=str, required=True, help="The path for the output mcore model")
+    parser.add_argument("--use_cpu_initialization", action="store_true", help="Whether to use cpu initialization")
+    parser.add_argument("--test", action="store_true", help="Whether to test the conversion")
+    parser.add_argument("--trust_remote_code", action="store_true", help="Whether to trust remote code")
+    args = parser.parse_args()
+    return args
+def test_conversion(megatron_model_provider, tfconfig, output_path, model):
+    ########### test ###########
+    # load model
+    model_test = get_model(
+        model_provider_func=megatron_model_provider,
+        model_type=ModelType.encoder_or_decoder,
+        wrap_with_ddp=True,
+        transformer_config=tfconfig,
+    )
+    ref_state_dict = model_test[0].module.sharded_state_dict()
+    dist_checkpointing.load(ref_state_dict, output_path, strict=StrictHandling.ASSUME_OK_UNEXPECTED)
+    dut_state_dict = model[0].module.state_dict()
+    for name in dut_state_dict.keys():
+        if dut_state_dict[name] is None:
+            print(f"[Warning] {name} is none in dut_state_dict")
+            continue
+        dut_data = dut_state_dict[name].data
+        if name in ref_state_dict:
+            ref_data = ref_state_dict[name]
+            if isinstance(ref_data, ShardedTensor):
+                ref_data = ref_data.data.view(ref_data.local_shape)
+            else:
+                ref_data = ref_data.data
+            assert dut_data.shape == ref_data.shape, f"{name=} {dut_data.shape=} {ref_data.shape=}"
+            assert (dut_data == ref_data).all(), f"{name} is not equal"
+            print(f"{name} is equal")
+        else:
+            print(f"[Warning] {name} is not in ref_state_dict")
+    for name in ref_state_dict.keys():
+        if ref_state_dict[name] is None:
+            print(f"[Warning] {name} is none in ref_state_dict")
+            continue
+        ref_data = ref_state_dict[name]
+        if isinstance(ref_data, ShardedTensor):
+            ref_data = ref_data.data.view(ref_data.local_shape)
+        else:
+            ref_data = ref_data.data
+        if name in dut_state_dict:
+            dut_data = dut_state_dict[name].data
+            assert dut_data.shape == ref_data.shape, f"{name=} {dut_data.shape=} {ref_data.shape=}"
+            assert (dut_data == ref_data).all(), f"{name} is not equal"
+            print(f"{name} is equal")
+        else:
+            print(f"[Warning] {name} is not in dut_state_dict")
+    print("Conversion test passed!")
+@torch.inference_mode()
+def convert_checkpoint_from_transformers_to_megatron(
+    hf_model, model, hf_config, layer_start_end: Optional[tuple[int, int]] = None
+):
+    if layer_start_end is None:
+        layer_start_end = (0, len(model.decoder.layers))
+    layer_start, layer_end = layer_start_end
+    pp_rank = mpu.get_pipeline_model_parallel_rank()
+    pp_size = mpu.get_pipeline_model_parallel_world_size()
+    numel = 0
+    num_attention_heads = hf_config.num_attention_heads
+    num_key_value_heads = hf_config.num_key_value_heads
+    hidden_dim = hf_config.hidden_size
+    head_dim = getattr(hf_config, "head_dim", hidden_dim // num_attention_heads)
+    if num_attention_heads != num_key_value_heads:
+        print("[WARNING] Converting GQA model")
+    has_qkv_bias = getattr(hf_config, "qkv_bias", False) or getattr(hf_config, "attention_bias", False)
+    has_share_expert = getattr(hf_config, "shared_expert_intermediate_size", None)
+    if pp_rank == 0:
+        numel += safe_copy(hf_model.model.embed_tokens.weight, model.embedding.word_embeddings.weight)
+    assert len(model.decoder.layers) == (layer_end - layer_start), (
+        f"Expected {len(model.decoder.layers)} layers, but got {layer_end - layer_start}"
+    )
+    for layer_idx, (layer, hf_layer) in enumerate(
+        zip(model.decoder.layers, hf_model.model.layers[layer_start:layer_end], strict=True)
+    ):
+        global_layer_idx = layer_idx + layer_start
+        numel_cur = numel
+        numel += safe_copy(hf_layer.input_layernorm.weight, layer.self_attention.linear_qkv.layer_norm_weight)
+        q = hf_layer.self_attn.q_proj.weight.view(
+            [num_key_value_heads, head_dim * num_attention_heads // num_key_value_heads, -1]
+        )
+        k = hf_layer.self_attn.k_proj.weight.view([num_key_value_heads, head_dim, -1])
+        v = hf_layer.self_attn.v_proj.weight.view([num_key_value_heads, head_dim, -1])
+        qkv = torch.cat([q, k, v], dim=1).view(-1, hidden_dim).contiguous()
+        numel += safe_copy(qkv, layer.self_attention.linear_qkv.weight)
+        if has_qkv_bias:
+            q_bias = hf_layer.self_attn.q_proj.bias.view([num_key_value_heads, -1])
+            k_bias = hf_layer.self_attn.k_proj.bias.view([num_key_value_heads, -1])
+            v_bias = hf_layer.self_attn.v_proj.bias.view([num_key_value_heads, -1])
+            qkv_bias = torch.cat([q_bias, k_bias, v_bias], dim=1).view(-1).contiguous()
+            numel += safe_copy(qkv_bias, layer.self_attention.linear_qkv.bias)
+        if hasattr(hf_layer.self_attn, "q_norm"):
+            numel += safe_copy(hf_layer.self_attn.q_norm.weight.data, layer.self_attention.q_layernorm.weight)
+            numel += safe_copy(hf_layer.self_attn.k_norm.weight.data, layer.self_attention.k_layernorm.weight)
+        numel += safe_copy(hf_layer.self_attn.o_proj.weight, layer.self_attention.linear_proj.weight)
+        numel += safe_copy(hf_layer.post_attention_layernorm.weight, layer.pre_mlp_layernorm.weight)
+        numel += safe_copy(hf_layer.mlp.gate.weight, layer.mlp.router.weight)
+        for idx, hf_expert in enumerate(hf_layer.mlp.experts):
+            fc1_weight = torch.cat([hf_expert.gate_proj.weight, hf_expert.up_proj.weight])
+            numel += safe_copy(fc1_weight, layer.mlp.experts.linear_fc1._parameters[f"weight{idx}"])
+            numel += safe_copy(hf_expert.down_proj.weight, layer.mlp.experts.linear_fc2._parameters[f"weight{idx}"])
+        if has_share_expert:
+            numel += safe_copy(hf_layer.mlp.shared_expert_gate.weight, layer.mlp.shared_experts.gate_weight)
+            shared_fc1_weight = torch.cat(
+                [hf_layer.mlp.shared_expert.gate_proj.weight, hf_layer.mlp.shared_expert.up_proj.weight]
+            )
+            numel += safe_copy(shared_fc1_weight, layer.mlp.shared_experts.linear_fc1.weight)
+            numel += safe_copy(hf_layer.mlp.shared_expert.down_proj.weight, layer.mlp.shared_experts.linear_fc2.weight)
+        print(f"{pp_rank=} {global_layer_idx=} {layer_idx=} {numel=} numel this layer={numel - numel_cur}")
+    if pp_rank == pp_size - 1:
+        numel += safe_copy(hf_model.model.norm.weight, model.decoder.final_layernorm.weight)
+        numel += safe_copy(hf_model.lm_head.weight, model.output_layer.weight)
+    return numel
+def safe_copy(
+    src_tensor: torch.Tensor,
+    dst_tensor: torch.Tensor,
+    skip_dtype_assert: bool = False,
+):
+    if not skip_dtype_assert:
+        if src_tensor.dtype != dst_tensor.dtype:
+            raise ValueError(f"Get source dtype {src_tensor.dtype}, but target dtype {dst_tensor.dtype}")
+    assert src_tensor.shape == dst_tensor.shape
+    dst_tensor.data.copy_(src_tensor.data)
+    return src_tensor.numel()
+@torch.inference_mode()
+def convert_checkpoint_from_transformers_to_megatron_qwen2_5_vl(hfmodel, mgmodel, hf_config):
+    mgmodel = mgmodel.bfloat16()
+    hfmodel = hfmodel.bfloat16()
+    num_attention_heads = hf_config.num_attention_heads
+    num_query_groups = hf_config.num_key_value_heads
+    hidden_size = hf_config.hidden_size
+    head_dim = hidden_size // num_attention_heads
+    # 1. vision model
+    if Version(version("transformers")) < Version("4.52.0"):
+        print("Using transformers < 4.52 API to load vision model")
+        hfvision = hfmodel.visual
+    else:
+        hfvision = hfmodel.model.visual
+    mgvision = mgmodel.vision_model
+    vision_hidden_size = mgvision.config.hidden_size
+    vision_num_query_groups = mgvision.config.num_query_groups
+    vision_head_dim = vision_hidden_size // mgvision.config.num_attention_heads
+    copied_numel = 0
+    safe_copy(hfvision.rotary_pos_emb.inv_freq, mgvision.rotary_pos_emb.inv_freq)
+    copied_numel += safe_copy(hfvision.patch_embed.proj.weight, mgvision.patch_embed.proj.weight)
+    for hfblock, mgblock in zip(hfvision.blocks, mgvision.decoder.layers, strict=True):
+        # norm1 --> linear_qkv.norm
+        copied_numel += safe_copy(hfblock.norm1.weight, mgblock.self_attention.linear_qkv.layer_norm_weight)
+        # norm2 --> mlp.linear_fc1.norm
+        copied_numel += safe_copy(hfblock.norm2.weight, mgblock.mlp.linear_fc1.layer_norm_weight)
+        # qkv --> self_attention.linear_qkv
+        converted_weight = (
+            hfblock.attn.qkv.weight.view(3, vision_num_query_groups, -1, vision_head_dim, vision_hidden_size)
+            .transpose(0, 1)
+            .flatten(1, 2)
+            .reshape(-1, vision_hidden_size)
+            .contiguous()
+        )
+        copied_numel += safe_copy(converted_weight, mgblock.self_attention.linear_qkv.weight)
+        converted_bias = (
+            hfblock.attn.qkv.bias.view(3, vision_num_query_groups, -1)
+            .transpose(0, 1)
+            .flatten(1, 2)
+            .view(-1)
+            .contiguous()
+        )
+        copied_numel += safe_copy(converted_bias, mgblock.self_attention.linear_qkv.bias)
+        # proj --> self_attention.linear_proj
+        copied_numel += safe_copy(hfblock.attn.proj.weight, mgblock.self_attention.linear_proj.weight)
+        copied_numel += safe_copy(hfblock.attn.proj.bias, mgblock.self_attention.linear_proj.bias)
+        # mlp --> mlp: gate
+        fc1_weight = torch.cat([hfblock.mlp.gate_proj.weight, hfblock.mlp.up_proj.weight])
+        fc1_bias = torch.cat([hfblock.mlp.gate_proj.bias, hfblock.mlp.up_proj.bias])
+        copied_numel += safe_copy(fc1_weight, mgblock.mlp.linear_fc1.weight)
+        copied_numel += safe_copy(fc1_bias, mgblock.mlp.linear_fc1.bias)
+        copied_numel += safe_copy(hfblock.mlp.down_proj.weight, mgblock.mlp.linear_fc2.weight)
+        copied_numel += safe_copy(hfblock.mlp.down_proj.bias, mgblock.mlp.linear_fc2.bias)
+    # 2. vision projector
+    hfprojector = hfvision.merger
+    mgprojector = mgvision.projection
+    copied_numel += safe_copy(hfprojector.ln_q.weight, mgvision.decoder.final_layernorm.weight)
+    copied_numel += safe_copy(hfprojector.mlp[0].weight, mgprojector.encoder.linear_fc1.weight)
+    copied_numel += safe_copy(hfprojector.mlp[0].bias, mgprojector.encoder.linear_fc1.bias)
+    copied_numel += safe_copy(hfprojector.mlp[2].weight, mgprojector.encoder.linear_fc2.weight)
+    copied_numel += safe_copy(hfprojector.mlp[2].bias, mgprojector.encoder.linear_fc2.bias)
+    n_params = sum([t.numel() for t in hfvision.state_dict().values()])
+    assert n_params == copied_numel, f"n_params={n_params} != copied_numel={copied_numel}"
+    # 3. llm [just Qwen2]
+    if Version(version("transformers")) < Version("4.52.0"):
+        print("Using transformers < 4.52 API to load llm")
+        hfllm = hfmodel.model
+    else:
+        hfllm = hfmodel.model.language_model
+    mgllm = mgmodel.language_model
+    copied_numel = 0
+    copied_numel += safe_copy(hfllm.embed_tokens.weight, mgllm.embedding.word_embeddings.weight)
+    layermaps = zip(mgllm.decoder.layers, hfllm.layers, strict=True)
+    for mglayer, hflayer in layermaps:
+        copied_numel += safe_copy(hflayer.input_layernorm.weight, mglayer.self_attention.linear_qkv.layer_norm_weight)
+        q_proj_weight = hflayer.self_attn.q_proj.weight.view(num_query_groups, -1, head_dim, hidden_size)
+        k_proj_weight = hflayer.self_attn.k_proj.weight.view(num_query_groups, -1, head_dim, hidden_size)
+        v_proj_weight = hflayer.self_attn.v_proj.weight.view(num_query_groups, -1, head_dim, hidden_size)
+        qkv_proj = torch.cat([q_proj_weight, k_proj_weight, v_proj_weight], dim=1).view(-1, hidden_size).contiguous()
+        copied_numel += safe_copy(qkv_proj, mglayer.self_attention.linear_qkv.weight)
+        q_proj_bias = hflayer.self_attn.q_proj.bias.view(num_query_groups, -1)
+        k_proj_bias = hflayer.self_attn.k_proj.bias.view(num_query_groups, -1)
+        v_proj_bias = hflayer.self_attn.v_proj.bias.view(num_query_groups, -1)
+        qkv_bias = torch.cat([q_proj_bias, k_proj_bias, v_proj_bias], dim=1).view(-1).contiguous()
+        copied_numel += safe_copy(qkv_bias, mglayer.self_attention.linear_qkv.bias)
+        copied_numel += safe_copy(hflayer.self_attn.o_proj.weight, mglayer.self_attention.linear_proj.weight)
+        fc1_weight = torch.cat([hflayer.mlp.gate_proj.weight, hflayer.mlp.up_proj.weight])
+        copied_numel += safe_copy(fc1_weight, mglayer.mlp.linear_fc1.weight)
+        copied_numel += safe_copy(hflayer.mlp.down_proj.weight, mglayer.mlp.linear_fc2.weight)
+        copied_numel += safe_copy(hflayer.post_attention_layernorm.weight, mglayer.mlp.linear_fc1.layer_norm_weight)
+    copied_numel += safe_copy(hfllm.norm.weight, mgllm.decoder.final_layernorm.weight)
+    if not hf_config.tie_word_embeddings:
+        safe_copy(hfmodel.lm_head.weight, mgllm.output_layer.weight)
+    n_params = sum([t.numel() for t in hfllm.state_dict().values()])
+    assert n_params == copied_numel, f"n_params={n_params} != copied_numel={copied_numel}"
+@torch.inference_mode()
+def convert_checkpoint_from_transformers_to_megatron_dpskv3(
+    hf_model,
+    model,
+    hf_config,
+    tfconfig,
+    layer_start_end: Optional[tuple[int, int]] = None,
+):
+    warnings.warn("MTP model is not supported yet", stacklevel=2)
+    if layer_start_end is None:
+        layer_start_end = (0, len(model.decoder.layers))
+    layer_start, layer_end = layer_start_end
+    numel: int = 0
+    pp_rank = mpu.get_pipeline_model_parallel_rank()
+    pp_size = mpu.get_pipeline_model_parallel_world_size()
+    if pp_rank == 0:
+        numel += safe_copy(hf_model.model.embed_tokens.weight, model.embedding.word_embeddings.weight)
+    assert len(model.decoder.layers) == (layer_end - layer_start), (
+        f"Expected {len(model.decoder.layers)} layers, but got {layer_end - layer_start}"
+    )
+    for layer_idx, (layer, hf_layer) in enumerate(
+        zip(model.decoder.layers, hf_model.model.layers[layer_start:layer_end], strict=True)
+    ):
+        global_layer_idx = layer_idx + layer_start
+        numel_cur: int = numel
+        numel += safe_copy(hf_layer.input_layernorm.weight, layer.input_layernorm.weight)
+        if hf_config.q_lora_rank is None:
+            numel += safe_copy(hf_layer.self_attn.q_proj.weight, layer.self_attention.linear_q_proj.weight)
+        else:
+            numel += safe_copy(hf_layer.self_attn.q_a_proj.weight, layer.self_attention.linear_q_down_proj.weight)
+            numel += safe_copy(hf_layer.self_attn.q_b_proj.weight, layer.self_attention.linear_q_up_proj.weight)
+            numel += safe_copy(
+                hf_layer.self_attn.q_a_layernorm.weight, layer.self_attention.linear_q_up_proj.layer_norm_weight
+            )
+        numel += safe_copy(
+            hf_layer.self_attn.kv_a_proj_with_mqa.weight, layer.self_attention.linear_kv_down_proj.weight
+        )
+        numel += safe_copy(hf_layer.self_attn.kv_b_proj.weight, layer.self_attention.linear_kv_up_proj.weight)
+        numel += safe_copy(
+            hf_layer.self_attn.kv_a_layernorm.weight, layer.self_attention.linear_kv_up_proj.layer_norm_weight
+        )
+        numel += safe_copy(hf_layer.self_attn.o_proj.weight, layer.self_attention.linear_proj.weight)
+        if not hasattr(layer.mlp, "router"):
+            numel += safe_copy(hf_layer.post_attention_layernorm.weight, layer.mlp.linear_fc1.layer_norm_weight)
+            numel += safe_copy(
+                torch.cat([hf_layer.mlp.gate_proj.weight, hf_layer.mlp.up_proj.weight]), layer.mlp.linear_fc1.weight
+            )
+            numel += safe_copy(hf_layer.mlp.down_proj.weight, layer.mlp.linear_fc2.weight)
+        else:
+            numel += safe_copy(hf_layer.mlp.gate.weight, layer.mlp.router.weight)
+            # NOTE: the e_score_correction_bias in mcore model will be initialized with bfloat16 and \
+            # recover to fp32 in the first forward. There is always a diff in the bias between two models (~0.3%)
+            numel += safe_copy(
+                hf_layer.mlp.gate.e_score_correction_bias, layer.mlp.router.expert_bias, skip_dtype_assert=True
+            )
+            if tfconfig.moe_grouped_gemm:
+                for i, hf_expert in enumerate(hf_layer.mlp.experts):
+                    fc1_weight = torch.cat([hf_expert.gate_proj.weight, hf_expert.up_proj.weight])
+                    linear_fc1_weighti = getattr(layer.mlp.experts.linear_fc1, "weight" + str(i))
+                    numel += safe_copy(fc1_weight, linear_fc1_weighti)
+                    linear_fc2_weighti = getattr(layer.mlp.experts.linear_fc2, "weight" + str(i))
+                    numel += safe_copy(hf_expert.down_proj.weight, linear_fc2_weighti)
+            else:
+                for i, hf_expert in enumerate(hf_layer.mlp.experts):
+                    expert = layer.mlp.experts.local_experts[i]
+                    fc1_weight = torch.cat([hf_expert.gate_proj.weight, hf_expert.up_proj.weight])
+                    numel += safe_copy(fc1_weight, expert.linear_fc1.weight)
+                    numel += safe_copy(hf_expert.down_proj.weight, expert.linear_fc2.weight)
+            numel += safe_copy(hf_layer.post_attention_layernorm.weight, layer.pre_mlp_layernorm.weight)
+            shared_fc1_weight = torch.cat(
+                [hf_layer.mlp.shared_experts.gate_proj.weight, hf_layer.mlp.shared_experts.up_proj.weight]
+            )
+            numel += safe_copy(shared_fc1_weight, layer.mlp.shared_experts.linear_fc1.weight)
+            numel += safe_copy(hf_layer.mlp.shared_experts.down_proj.weight, layer.mlp.shared_experts.linear_fc2.weight)
+        print(f"{pp_rank=} {global_layer_idx=} {layer_idx=} {numel=} numel this layer={numel - numel_cur}")
+        assert numel - numel_cur == sum([i.numel() for i in hf_layer.state_dict().values()]), "numel mismatch"
+    if pp_rank == pp_size - 1:
+        numel += safe_copy(hf_model.model.norm.weight, model.decoder.final_layernorm.weight)
+        if not hf_config.tie_word_embeddings:
+            numel += safe_copy(hf_model.lm_head.weight, model.output_layer.weight)
+    print(f"{pp_rank=} {numel=}")
+    return numel
+@contextmanager
+def noop_context() -> Any:
+    yield
+def support_distributed_convert(hf_config: AutoConfig) -> bool:
+    for arch in ["DeepseekV3ForCausalLM", "Qwen3MoeForCausalLM", "Qwen2MoeForCausalLM"]:
+        if arch in hf_config.architectures:
+            return True
+    return False
+def convert_hf_to_mcore(hf_model_path, output_path, use_cpu_initialization=False, test=False, trust_remote_code=False):
+    os.makedirs(output_path, exist_ok=True)
+    if len(os.listdir(output_path)) > 0 and not test:
+        print(f"Output path {output_path} is not empty, skipping conversion")
+        return
+    # init torch distributed and mpu
+    if "WORLD_SIZE" not in os.environ:
+        os.environ["RANK"] = "0"
+        os.environ["WORLD_SIZE"] = "1"
+        os.environ["MASTER_ADDR"] = "localhost"
+        os.environ["MASTER_PORT"] = "12355"
+    torch.distributed.init_process_group("nccl")
+    rank = dist.get_rank()
+    local_rank = os.getenv("LOCAL_RANK", 0)
+    world_size = dist.get_world_size()
+    get_torch_device().set_device(f"{get_device_name()}:{local_rank}")
+    mpu.initialize_model_parallel(
+        tensor_model_parallel_size=1,
+        pipeline_model_parallel_size=world_size,
+        virtual_pipeline_model_parallel_size=None,
+        context_parallel_size=1,
+        expert_model_parallel_size=1,
+    )
+    model_parallel_cuda_manual_seed(0)
+    # init hf config
+    hf_config = AutoConfig.from_pretrained(hf_model_path)
+    print(hf_config, flush=True)
+    if world_size > 1 and not support_distributed_convert(hf_config):
+        raise NotImplementedError(f"distributed conversion is not supported for {hf_config.architectures} yet.")
+    pipeline_shards = get_dynamic_pipeline_shards(hf_config.num_hidden_layers, world_size)
+    print(f"Pipeline shards: {pipeline_shards}", flush=True)
+    tfconfig = hf_to_mcore_config(
+        hf_config,
+        torch.bfloat16,
+        num_layers_in_first_pipeline_stage=pipeline_shards[0] if len(pipeline_shards) > 1 else None,
+        num_layers_in_last_pipeline_stage=pipeline_shards[-1] if len(pipeline_shards) > 2 else None,
+    )
+    tfconfig.use_cpu_initialization = use_cpu_initialization
+    tie_word_embeddings = getattr(hf_config, "tie_word_embeddings", False)
+    # init megatron model
+    def megatron_model_provider(pre_process, post_process):
+        from verl.models.mcore import init_mcore_model
+        parallel_model = init_mcore_model(
+            tfconfig,
+            hf_config,
+            pre_process,
+            post_process,
+            share_embeddings_and_output_weights=tie_word_embeddings,
+            value=False,
+        )
+        return parallel_model
+    context: Callable[..., ContextManager] = init_empty_weights if use_cpu_initialization else noop_context
+    with context():
+        model = get_model(
+            model_provider_func=megatron_model_provider,
+            model_type=ModelType.encoder_or_decoder,
+            wrap_with_ddp=False,
+            transformer_config=tfconfig,
+        )
+    if use_cpu_initialization:
+        # convert meta device to empty tensor so it can use `copy_` function
+        model[0].module = model[0].module.to_empty(device="cpu")
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore")
+    from transformers import AutoModelForCausalLM, AutoModelForImageTextToText
+    # init hf model
+    if "Qwen2_5_VLForConditionalGeneration" in hf_config.architectures:
+        hf_model = AutoModelForImageTextToText.from_pretrained(
+            hf_model_path, torch_dtype=torch.bfloat16, trust_remote_code=trust_remote_code
+        )
+    else:
+        hf_model = AutoModelForCausalLM.from_pretrained(
+            hf_model_path, torch_dtype=torch.bfloat16, trust_remote_code=trust_remote_code
+        )
+    hf_state_dict = hf_model.state_dict()
+    # distributed convert
+    if world_size > 1 and support_distributed_convert(hf_config):
+        pipeline_cumsum = np.cumsum(pipeline_shards)
+        layer_start = 0 if rank == 0 else pipeline_cumsum[rank - 1]
+        layer_end = pipeline_cumsum[rank]
+        if "DeepseekV3ForCausalLM" in hf_config.architectures:
+            numel_partial: int = convert_checkpoint_from_transformers_to_megatron_dpskv3(
+                hf_model, model[0].module, hf_config, tfconfig=tfconfig, layer_start_end=(layer_start, layer_end)
+            )
+        elif "Qwen3MoeForCausalLM" in hf_config.architectures or "Qwen2MoeForCausalLM" in hf_config.architectures:
+            numel_partial: int = convert_checkpoint_from_transformers_to_megatron(
+                hf_model, model[0].module, hf_config, layer_start_end=(layer_start, layer_end)
+            )
+        else:
+            raise NotImplementedError(f"Distributed conversion is not supported for {hf_config.architectures} yet.")
+        numel_tensor = torch.tensor([numel_partial]).to(get_device_name())
+        dist.all_reduce(numel_tensor, op=dist.ReduceOp.SUM)
+        numel = int(numel_tensor.cpu().item())
+        print(f"total numel={numel} vs {hf_model.num_parameters()=}")
+        if numel != hf_model.num_parameters():
+            warnings.warn(f"numel mismatch: {numel=} != {hf_model.num_parameters()=}", stacklevel=1)
+    # load hf state dict to megatron model
+    elif "Qwen2MoeForCausalLM" in hf_config.architectures:
+        convert_checkpoint_from_transformers_to_megatron(hf_model, model[0].module, hf_config)
+    elif "Qwen2_5_VLForConditionalGeneration" in hf_config.architectures:
+        convert_checkpoint_from_transformers_to_megatron_qwen2_5_vl(hf_model, model[0].module, hf_config)
+    elif "DeepseekV3ForCausalLM" in hf_config.architectures:
+        convert_checkpoint_from_transformers_to_megatron_dpskv3(hf_model, model[0].module, hf_config, tfconfig=tfconfig)
+    elif "Qwen3MoeForCausalLM" in hf_config.architectures:
+        convert_checkpoint_from_transformers_to_megatron(hf_model, model[0].module, hf_config)
+    else:
+        assert not use_cpu_initialization, "use_cpu_initialization is only supported for MoE model"
+        from verl.models.mcore.loader import load_state_dict_to_megatron_gptmodel
+        load_state_dict_to_megatron_gptmodel(
+            state_dict=hf_state_dict,
+            wrapped_models=model,
+            config=hf_config,
+            params_dtype=torch.bfloat16,
+            is_value_model=False,
+        )
+    megatron_state_dict = model[0].module.sharded_state_dict()
+    del hf_state_dict, hf_model
+    # save megatron model
+    if len(os.listdir(output_path)) == 0:
+        dist_checkpointing.save(megatron_state_dict, output_path, sharded_strategy=None, async_sharded_save=False)
+    if test:
+        test_conversion(megatron_model_provider, tfconfig, output_path, model)
+if __name__ == "__main__":
+    args = _init_args()
+    convert_hf_to_mcore(
+        args.hf_model_path, args.output_path, args.use_cpu_initialization, args.test, args.trust_remote_code
+    )
--- a/scripts/diagnose.py
+++ b/scripts/diagnose.py
+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Diagnose script for checking OS/hardware/python/pip/verl/network.
+The output of this script can be a very good hint to issue/problem.
+"""
+import os
+import platform
+import socket
+import subprocess
+import sys
+import time
+import psutil
+try:
+    from urllib.parse import urlparse
+    from urllib.request import urlopen
+except ImportError:
+    from urllib2 import urlopen
+    from urlparse import urlparse
+import argparse
+import importlib.metadata
+import torch
+URLS = {
+    "PYPI": "https://pypi.python.org/pypi/pip",
+}
+REGIONAL_URLS = {
+    "cn": {
+        "PYPI(douban)": "https://pypi.douban.com/",
+        "Conda(tsinghua)": "https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/",
+    }
+}
+def test_connection(name, url, timeout=10):
+    """Simple connection test"""
+    urlinfo = urlparse(url)
+    start = time.time()
+    try:
+        socket.gethostbyname(urlinfo.netloc)
+    except Exception as e:
+        print("Error resolving DNS for {}: {}, {}".format(name, url, e))
+        return
+    dns_elapsed = time.time() - start
+    start = time.time()
+    try:
+        _ = urlopen(url, timeout=timeout)
+    except Exception as e:
+        print("Error open {}: {}, {}, DNS finished in {} sec.".format(name, url, e, dns_elapsed))
+        return
+    load_elapsed = time.time() - start
+    print("Timing for {}: {}, DNS: {:.4f} sec, LOAD: {:.4f} sec.".format(name, url, dns_elapsed, load_elapsed))
+def check_python():
+    print("----------Python Info----------")
+    print("Version      :", platform.python_version())
+    print("Compiler     :", platform.python_compiler())
+    print("Build        :", platform.python_build())
+    print("Arch         :", platform.architecture())
+def check_pip():
+    print("------------Pip Info-----------")
+    try:
+        import pip
+        print("Version      :", pip.__version__)
+        print("Directory    :", os.path.dirname(pip.__file__))
+    except ImportError:
+        print("No corresponding pip install for current python.")
+def _get_current_git_commit():
+    try:
+        result = subprocess.run(["git", "rev-parse", "HEAD"], capture_output=True, text=True, check=True)
+        return result.stdout.strip()
+    except subprocess.CalledProcessError as e:
+        print(f"Error running git command: {e.stderr.strip()}")
+        return None
+    except FileNotFoundError:
+        print("Did not find command: git")
+        return None
+def check_verl():
+    print("----------verl Info-----------")
+    try:
+        sys.path.insert(0, os.getcwd())
+        import verl
+        print("Version      :", verl.__version__)
+        verl_dir = os.path.dirname(verl.__file__)
+        print("Directory    :", verl_dir)
+        try:
+            commit_hash = _get_current_git_commit()
+            print("Commit Hash  :", commit_hash)
+        except AttributeError:
+            print("Commit hash not found. ")
+    except ImportError as e:
+        print(f"No verl installed: {e}")
+    except Exception as e:
+        import traceback
+        if not isinstance(e, IOError):
+            print("An error occurred trying to import verl.")
+            print("This is very likely due to missing or incompatible library files.")
+        print(traceback.format_exc())
+def check_os():
+    print("----------Platform Info----------")
+    print("Platform     :", platform.platform())
+    print("system       :", platform.system())
+    print("node         :", platform.node())
+    print("release      :", platform.release())
+    print("version      :", platform.version())
+def check_hardware():
+    print("----------Hardware Info----------")
+    print("machine      :", platform.machine())
+    print("processor    :", platform.processor())
+    if sys.platform.startswith("darwin"):
+        pipe = subprocess.Popen(("sysctl", "-a"), stdout=subprocess.PIPE)
+        output = pipe.communicate()[0]
+        for line in output.split(b"\n"):
+            if b"brand_string" in line or b"features" in line:
+                print(line.strip())
+    elif sys.platform.startswith("linux"):
+        subprocess.call(["lscpu"])
+    elif sys.platform.startswith("win32"):
+        subprocess.call(["wmic", "cpu", "get", "name"])
+def check_network(args):
+    print("----------Network Test----------")
+    if args.timeout > 0:
+        print("Setting timeout: {}".format(args.timeout))
+        socket.setdefaulttimeout(10)
+    for region in args.region.strip().split(","):
+        r = region.strip().lower()
+        if not r:
+            continue
+        if r in REGIONAL_URLS:
+            URLS.update(REGIONAL_URLS[r])
+        else:
+            import warnings
+            warnings.warn("Region {} do not need specific test, please refer to global sites.".format(r), stacklevel=2)
+    for name, url in URLS.items():
+        test_connection(name, url, args.timeout)
+def check_environment():
+    print("----------Environment----------")
+    for k, v in os.environ.items():
+        if k.startswith("VERL_") or k.startswith("OMP_") or k.startswith("KMP_") or k == "CC" or k == "CXX":
+            print('{}="{}"'.format(k, v))
+def check_pip_package_versions():
+    packages = ["vllm", "sglang", "ray", "torch"]
+    for package in packages:
+        try:
+            version = importlib.metadata.version(package)
+            print(f"{package}\t     : {version}")
+        except importlib.metadata.PackageNotFoundError:
+            print(f"{package}\t     : not found.")
+def check_cuda_versions():
+    if torch.cuda.is_available():
+        try:
+            cuda_runtime_version = torch.version.cuda
+            print(f"CUDA Runtime : {cuda_runtime_version}")
+            import subprocess
+            nvcc_output = subprocess.check_output(["nvcc", "--version"]).decode("utf-8")
+            cuda_compiler_version = next((line for line in nvcc_output.splitlines() if "release" in line), None)
+            if cuda_compiler_version:
+                print(f"CUDA Compiler : {cuda_compiler_version.strip()}")
+            else:
+                print("Could not determine CUDA compiler version.")
+        except FileNotFoundError as e:
+            print(f"CUDA compiler : Not found: {e}")
+        except Exception as e:
+            print(f"An error occurred while checking CUDA versions: {e}")
+    else:
+        print("CUDA is not available.")
+def _get_cpu_memory():
+    """
+    Get the total CPU memory capacity in GB.
+    """
+    memory = psutil.virtual_memory()
+    return memory.total / (1024**3)
+def _get_gpu_info():
+    """
+    Get GPU type, GPU memory, and GPU count using nvidia-smi command.
+    """
+    try:
+        result = subprocess.run(
+            ["nvidia-smi", "--query-gpu=gpu_name,memory.total", "--format=csv,noheader,nounits"],
+            capture_output=True,
+            text=True,
+            check=True,
+        )
+        gpu_lines = result.stdout.strip().split("\n")
+        gpu_count = len(gpu_lines)
+        gpu_info = []
+        for line in gpu_lines:
+            gpu_name, gpu_memory = line.split(", ")
+            gpu_info.append(
+                {
+                    "type": gpu_name,
+                    "memory": float(gpu_memory) / 1024,  # Convert to GB
+                }
+            )
+        return gpu_count, gpu_info
+    except subprocess.CalledProcessError:
+        print("Failed to execute nvidia-smi command.")
+        return 0, []
+def _get_system_info():
+    """
+    Get CPU memory capacity, GPU type, GPU memory, and GPU count.
+    """
+    cpu_memory = _get_cpu_memory()
+    gpu_count, gpu_info = _get_gpu_info()
+    return {"cpu_memory": cpu_memory, "gpu_count": gpu_count, "gpu_info": gpu_info}
+def check_system_info():
+    print("----------System Info----------")
+    system_info = _get_system_info()
+    print(f"CPU Memory\t: {system_info['cpu_memory']:.2f} GB")
+    print(f"GPU Count\t: {system_info['gpu_count']}")
+    for i, gpu in enumerate(system_info["gpu_info"]):
+        print(f"GPU {i + 1}\tType    : {gpu['type']}")
+        print(f"GPU {i + 1}\tMemory  : {gpu['memory']:.2f} GB")
+def parse_args():
+    """Parse arguments."""
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+        description="Diagnose script for checking the current system.",
+    )
+    choices = ["python", "pip", "verl", "system", "os", "environment"]
+    for choice in choices:
+        parser.add_argument("--" + choice, default=1, type=int, help="Diagnose {}.".format(choice))
+    parser.add_argument("--network", default=0, type=int, help="Diagnose network.")
+    parser.add_argument("--hardware", default=0, type=int, help="Diagnose hardware.")
+    parser.add_argument(
+        "--region",
+        default="",
+        type=str,
+        help="Additional sites in which region(s) to test. \
+                        Specify 'cn' for example to test mirror sites in China.",
+    )
+    parser.add_argument("--timeout", default=10, type=int, help="Connection test timeout threshold, 0 to disable.")
+    args = parser.parse_args()
+    return args
+if __name__ == "__main__":
+    args = parse_args()
+    if args.python:
+        check_python()
+    if args.pip:
+        check_pip()
+        check_pip_package_versions()
+    if args.verl:
+        check_verl()
+    if args.os:
+        check_os()
+    if args.hardware:
+        check_hardware()
+    if args.network:
+        check_network(args)
+    if args.environment:
+        check_environment()
+        check_cuda_versions()
+    if args.system:
+        check_system_info()
--- a/scripts/generate_trainer_config.sh
+++ b/scripts/generate_trainer_config.sh
+#!/usr/bin/env bash
+set -euox pipefail
+# Define config specifications: "config_name:output_file:config_arg"
+CONFIG_SPECS=(
+    "ppo_trainer:_generated_ppo_trainer.yaml:"
+    "ppo_megatron_trainer:_generated_ppo_megatron_trainer.yaml:--config-name=ppo_megatron_trainer.yaml"
+)
+generate_config() {
+    local config_name="$1"
+    local output_file="$2"
+    local config_arg="$3"
+    local target_cfg="verl/trainer/config/${output_file}"
+    local tmp_header=$(mktemp)
+    local tmp_cfg=$(mktemp)
+    echo "# This reference configration yaml is automatically generated via 'scripts/generate_trainer_config.sh'" > "$tmp_header"
+    echo "# in which it invokes 'python3 scripts/print_cfg.py --cfg job ${config_arg}' to flatten the 'verl/trainer/config/${config_name}.yaml' config fields into a single file." >> "$tmp_header"
+    echo "# Do not modify this file directly." >> "$tmp_header"
+    echo "# The file is usually only for reference and never used." >> "$tmp_header"
+    echo "" >> "$tmp_header"
+    python3 scripts/print_cfg.py --cfg job ${config_arg} > "$tmp_cfg"
+    cat "$tmp_header" > "$target_cfg"
+    sed -n '/^actor_rollout_ref/,$p' "$tmp_cfg" >> "$target_cfg"
+    rm "$tmp_cfg" "$tmp_header"
+    echo "Generated: $target_cfg"
+}
+for spec in "${CONFIG_SPECS[@]}"; do
+    IFS=':' read -r config_name output_file config_arg <<< "$spec"
+    generate_config "$config_name" "$output_file" "$config_arg"
+done
+for spec in "${CONFIG_SPECS[@]}"; do
+    IFS=':' read -r config_name output_file config_arg <<< "$spec"
+    target_cfg="verl/trainer/config/${output_file}"
+    if ! git diff --exit-code -- "$target_cfg" >/dev/null; then
+        echo "✖ $target_cfg is out of date. Please regenerate via 'scripts/generate_trainer_config.sh' and commit the changes."
+        exit 1
+    fi
+done
+echo "All good"
+exit 0
--- a/scripts/init_random_model.py
+++ b/scripts/init_random_model.py
+# Copyright 2025 Bytedance Ltd. and/or its affiliates
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This script override a model with custom config and random weights, mainly for create small models for 
+debugging purposes.
+Usage:
+    python scripts/init_random_model.py \
+        --hf_model_path <path_to_hf_model> \
+        --new_config_path <path_to_new_config.json> \
+        --output_path <path_to_output_model>
+"""
+import argparse
+import json
+import os
+import warnings
+from typing import Any
+from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer, PretrainedConfig
+def _init_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--hf_model_path", type=str, required=True, help="The path for the huggingface model")
+    parser.add_argument("--new_config_path", type=str, required=True, help="The path for the new config file")
+    parser.add_argument("--output_path", type=str, required=True, help="The path for the output random model")
+    args = parser.parse_args()
+    return args
+def check_output_path(output_path: str):
+    if os.path.exists(output_path):
+        warnings.warn(f"Output path '{output_path}' already exists. Will do nothing.", stacklevel=2)
+        exit()
+    else:
+        os.makedirs(output_path, exist_ok=True)
+        print(f"Output path '{output_path}' created.")
+def check_configs(original_config: dict[str, Any], new_config: dict[str, Any]) -> bool:
+    """
+    Check if the original config and new config are compatible.
+    This is a placeholder function; actual implementation may vary based on requirements.
+    """
+    # Example check: ensure 'model_type' is the same
+    if new_config.get("model_type", None) is not None and original_config.get("model_type") != new_config.get(
+        "model_type"
+    ):
+        raise RuntimeError("Model types do not match.")
+    for key in new_config:
+        if key not in original_config:
+            warnings.warn(
+                f"Key '{key}' in new config does not exist in original config, may not take effect.", stacklevel=2
+            )
+def init_random_model(hf_model_path, new_config_path, output_path):
+    config = AutoConfig.from_pretrained(hf_model_path)
+    tokenizer = AutoTokenizer.from_pretrained(hf_model_path)
+    config_dict = PretrainedConfig.get_config_dict(hf_model_path)[0]
+    print(config_dict)
+    with open(new_config_path) as f:
+        new_config_dict = json.load(f)
+    check_configs(config_dict, new_config_dict)
+    config_dict.update(new_config_dict)
+    new_confg = config.from_dict(config_dict)
+    print(f"new_config: {new_confg}")
+    model = AutoModelForCausalLM.from_config(new_confg)
+    model.save_pretrained(output_path)
+    tokenizer.save_pretrained(output_path)
+    new_confg.save_pretrained(output_path)
+    print(f"Random model initialized and saved to {output_path}")
+if __name__ == "__main__":
+    args = _init_args()
+    check_output_path(args.output_path)
+    init_random_model(
+        hf_model_path=args.hf_model_path, new_config_path=args.new_config_path, output_path=args.output_path
+    )
--- a/scripts/install_vllm_sglang_mcore.sh
+++ b/scripts/install_vllm_sglang_mcore.sh
+#!/bin/bash
+USE_MEGATRON=${USE_MEGATRON:-1}
+USE_SGLANG=${USE_SGLANG:-1}
+export MAX_JOBS=32
+echo "1. install inference frameworks and pytorch they need"
+if [ $USE_SGLANG -eq 1 ]; then
+    pip install "sglang[all]==0.4.6.post1" --no-cache-dir --find-links https://flashinfer.ai/whl/cu124/torch2.6/flashinfer-python && pip install torch-memory-saver --no-cache-dir
+fi
+pip install --no-cache-dir "vllm==0.8.5.post1" "torch==2.6.0" "torchvision==0.21.0" "torchaudio==2.6.0" "tensordict==0.6.2" torchdata
+echo "2. install basic packages"
+pip install "transformers[hf_xet]>=4.51.0" accelerate datasets peft hf-transfer \
+    "numpy<2.0.0" "pyarrow>=15.0.0" pandas \
+    ray[default] codetiming hydra-core pylatexenc qwen-vl-utils wandb dill pybind11 liger-kernel mathruler \
+    pytest py-spy pyext pre-commit ruff
+pip install "nvidia-ml-py>=12.560.30" "fastapi[standard]>=0.115.0" "optree>=0.13.0" "pydantic>=2.9" "grpcio>=1.62.1"
+echo "3. install FlashAttention and FlashInfer"
+# Install flash-attn-2.7.4.post1 (cxx11abi=False)
+wget -nv https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp310-cp310-linux_x86_64.whl && \
+    pip install --no-cache-dir flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
+# Install flashinfer-0.2.2.post1+cu124 (cxx11abi=False)
+# vllm-0.8.3 does not support flashinfer>=0.2.3
+# see https://github.com/vllm-project/vllm/pull/15777
+wget -nv https://github.com/flashinfer-ai/flashinfer/releases/download/v0.2.2.post1/flashinfer_python-0.2.2.post1+cu124torch2.6-cp38-abi3-linux_x86_64.whl && \
+    pip install --no-cache-dir flashinfer_python-0.2.2.post1+cu124torch2.6-cp38-abi3-linux_x86_64.whl
+if [ $USE_MEGATRON -eq 1 ]; then
+    echo "4. install TransformerEngine and Megatron"
+    echo "Notice that TransformerEngine installation can take very long time, please be patient"
+    NVTE_FRAMEWORK=pytorch pip3 install --no-deps git+https://github.com/NVIDIA/TransformerEngine.git@v2.2.1
+    pip3 install --no-deps git+https://github.com/NVIDIA/Megatron-LM.git@core_v0.12.2
+fi
+echo "5. May need to fix opencv"
+pip install opencv-python
+pip install opencv-fixer && \
+    python -c "from opencv_fixer import AutoFix; AutoFix()"
+if [ $USE_MEGATRON -eq 1 ]; then
+    echo "6. Install cudnn python package (avoid being overridden)"
+    pip install nvidia-cudnn-cu12==9.8.0.87
+fi
+echo "Successfully installed all packages"
--- a/scripts/legacy_model_merger.py
+++ b/scripts/legacy_model_merger.py
+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This script is used to merge huggingface model and test verl checkpoints from FSDP and Megatron backends.
+To merge FSDP checkpoints:
+```sh
+python scripts/legacy_model_merger.py merge \
+    --backend fsdp \
+    --local_dir checkpoints/verl_fsdp_gsm8k_examples/qwen2_5_0b5_fsdp_saveload/global_step_1/actor \
+    --target_dir /path/to/merged_hf_model
+```
+To merge Megatron checkpoints:
+```sh
+python scripts/legacy_model_merger.py merge \
+    --backend megatron \
+    --tie-word-embedding \
+    --local_dir checkpoints/verl_megatron_gsm8k_examples/qwen2_5_0b5_megatron_saveload/global_step_1/actor \
+    --target_dir /path/to/merged_hf_model
+```
+For more details, please refer to documentation:
+https://verl.readthedocs.io/en/latest/advance/checkpoint.html#convert-fsdp-and-megatron-checkpoints-to-huggingface-format-model
+"""
+import argparse
+import os
+import re
+import warnings
+from abc import ABC, abstractmethod
+from concurrent.futures import ThreadPoolExecutor
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Optional, Union
+import numpy as np
+import torch
+from accelerate import init_empty_weights
+from safetensors.torch import load_file
+from torch.distributed._tensor import Placement, Shard
+from transformers import (
+    AutoConfig,
+    AutoModelForCausalLM,
+    AutoModelForTokenClassification,
+    AutoModelForVision2Seq,
+    GenerationConfig,
+    PretrainedConfig,
+)
+try:
+    # for torch 2.5+
+    from torch.distributed.tensor import DTensor
+except ImportError:
+    from torch.distributed._tensor import DTensor
+from tqdm import tqdm
+from verl.utils import hf_processor, hf_tokenizer
+@dataclass
+class ModelMergerConfig:
+    operation: str  # 'merge' or 'test'
+    backend: str
+    local_dir: str
+    hf_model_config_path: str
+    target_dir: Optional[str] = "tmp"
+    hf_upload_path: Optional[str] = None
+    private: bool = False
+    test_hf_dir: Optional[str] = None
+    tie_word_embedding: bool = False
+    is_value_model: bool = False
+    hf_model_path: Optional[str] = None
+    hf_upload: bool = field(init=False)
+    def __post_init__(self):
+        self.hf_upload = self.operation == "merge" and bool(self.hf_upload_path)
+        if self.operation == "test":
+            self.target_dir = None
+            self.hf_upload_path = None
+            self.private = False
+class BaseModelMerger(ABC):
+    def __init__(self, config: ModelMergerConfig):
+        self.config = config
+        self.hf_model_config_path = config.hf_model_config_path
+        if config.hf_model_path:
+            print(
+                "Warning: --hf_model_path is deprecated and will be removed in a future version. Currently verl will save huggingface model configuration files into checkpoint directories. Therefore, there is no need to provide --hf_model_path. "
+            )
+            self.hf_model_config_path = config.hf_model_path
+        self.model_config = AutoConfig.from_pretrained(self.hf_model_config_path)
+    def get_transformers_auto_model_class(self):
+        if "ForTokenClassification" in self.model_config.architectures[0]:
+            return AutoModelForTokenClassification
+        elif "ForCausalLM" in self.model_config.architectures[0]:
+            return AutoModelForCausalLM
+        elif "ForConditionalGeneration" in self.model_config.architectures[0]:
+            return AutoModelForVision2Seq
+        raise NotImplementedError(f"Unknown architecture {self.model_config.architectures}")
+    def patch_model_generation_config(self, model):
+        """
+        The generation_config created from model config may be different to the pretrained model,
+        this may lead to error when generating: https://github.com/volcengine/verl/issues/1246
+        This function patch the generation_config created from model config to the pretrained model.
+        """
+        if model.can_generate():
+            try:
+                model.generation_config = GenerationConfig.from_pretrained(self.hf_model_config_path)
+            except OSError:
+                print(
+                    f"Warning: Generation config file not found in {self.hf_model_config_path}, using a generation config created from the model config."
+                )
+        return model
+    def save_lora_adapter(self, state_dict: dict[str, torch.Tensor]):
+        """
+        Save lora adapter to safetensors.
+        Returns:
+            lora_path: str, the path to the lora adapter. None if no lora adapter found.
+        Note:
+            This function change the 'state_dict' in place.
+        """
+        lora_params_names = [name for name in state_dict.keys() if "lora_" in name]
+        if len(lora_params_names) == 0:
+            return None
+        import json
+        from typing import OrderedDict
+        import peft
+        from safetensors.torch import save_file
+        lora_params = OrderedDict()
+        target_modules = set()
+        lora_key = None
+        for name in lora_params_names:
+            lora_key = name.replace(".default.weight", ".weight")
+            target_modules.add(lora_key.split(".")[-3])
+            lora_params[lora_key] = state_dict.pop(name)
+        lora_rank = min(lora_params[lora_key].shape[0], lora_params[lora_key].shape[1])
+        peft_dict = {
+            "r": lora_rank,
+            "lora_alpha": 0,  # lora_alpha is not set. An error should be raised to inform the user to set it manually.
+            "target_modules": list(target_modules),
+        }
+        peft_config = peft.LoraConfig(**peft_dict).to_dict()
+        peft_config["task_type"] = peft_config["task_type"].value if peft_config["task_type"] else None
+        peft_config["peft_type"] = peft_config["peft_type"].value if peft_config["peft_type"] else None
+        peft_config["target_modules"] = list(peft_config["target_modules"])
+        lora_path = os.path.join(self.config.target_dir, "lora_adapter")
+        os.makedirs(lora_path, exist_ok=True)
+        with open(os.path.join(lora_path, "adapter_config.json"), "w", encoding="utf-8") as f:
+            json.dump(peft_config, f, ensure_ascii=False, indent=4)
+        save_file(lora_params, os.path.join(lora_path, "adapter_model.safetensors"))
+        for name in list(state_dict.keys()):
+            key = (
+                name.replace("base_model.model.", "")
+                .replace(".base_layer.weight", ".weight")
+                .replace(".base_layer.bias", ".bias")
+            )
+            state_dict[key] = state_dict.pop(name)
+        return lora_path
+    def save_hf_model_and_tokenizer(self, state_dict: dict[str, torch.Tensor]):
+        auto_model_class = self.get_transformers_auto_model_class()
+        with init_empty_weights():
+            model = auto_model_class.from_config(self.model_config, torch_dtype=torch.bfloat16)
+        model.to_empty(device="cpu")
+        model = self.patch_model_generation_config(model)
+        lora_path = self.save_lora_adapter(state_dict)
+        if lora_path:
+            print(f"Saving lora adapter to {lora_path}")
+        print(f"Saving model to {self.config.target_dir}")
+        model.save_pretrained(self.config.target_dir, state_dict=state_dict)
+        del state_dict
+        del model
+        processor = hf_processor(self.hf_model_config_path)
+        tokenizer = hf_tokenizer(self.hf_model_config_path)
+        if processor is not None:
+            print(f"Saving processor to {self.config.target_dir}")
+            processor.save_pretrained(self.config.target_dir)
+        if tokenizer is not None:
+            print(f"Saving tokenizer to {self.config.target_dir}")
+            tokenizer.save_pretrained(self.config.target_dir)
+    def upload_to_huggingface(self):
+        from huggingface_hub import HfApi
+        api = HfApi()
+        api.create_repo(repo_id=self.config.hf_upload_path, private=self.config.private, exist_ok=True)
+        api.upload_folder(folder_path=self.config.target_dir, repo_id=self.config.hf_upload_path, repo_type="model")
+    @abstractmethod
+    def merge_and_save(self):
+        raise NotImplementedError("Subclasses should implement this method")
+class FSDPModelMerger(BaseModelMerger):
+    def _get_world_size(self) -> int:
+        """Extracts the FSDP world_size from checkpoint filenames (e.g., 'model_world_size_8_rank_0.pt')."""
+        for filename in os.listdir(self.config.local_dir):
+            match = re.match(r"model_world_size_(\d+)_rank_0\.pt", filename)
+            if match:
+                return int(match.group(1))
+        raise FileNotFoundError(
+            f"Could not determine world size. No file matching 'model_world_size_(\d+)_rank_0.pt' found in {self.config.local_dir}"
+        )
+    def _load_rank_zero_state_dict(self, world_size: int) -> dict:
+        return torch.load(
+            Path(self.config.local_dir) / f"model_world_size_{world_size}_rank_0.pt",
+            map_location="cpu",
+            weights_only=False,
+        )
+    def _extract_device_mesh_info(self, state_dict: dict, world_size: int) -> tuple[np.ndarray, tuple[str, ...]]:
+        """
+        Retrieves sharding information (device_mesh, mesh_dim_names) from a DTensor in the state_dict.
+        If no DTensor is found, infers a simple FSDP mesh based on world_size.
+        """
+        pivot_key = sorted(list(state_dict.keys()))[0]
+        weight = state_dict[pivot_key]
+        if isinstance(weight, DTensor):
+            # get sharding info
+            device_mesh = weight.device_mesh
+            mesh = device_mesh.mesh
+            mesh_dim_names = device_mesh.mesh_dim_names
+        else:
+            # for non-DTensor
+            mesh = np.array([world_size], dtype=np.int64)
+            mesh_dim_names = ("fsdp",)
+        return mesh, mesh_dim_names
+    def _calculate_shard_configuration(
+        self, mesh: np.ndarray, mesh_dim_names: tuple[str, ...]
+    ) -> tuple[int, tuple[int, ...]]:
+        """Calculates the total number of shards and the shape of the device mesh."""
+        assert mesh_dim_names in (("fsdp",), ("ddp", "fsdp")), f"Unsupported mesh_dim_names {mesh_dim_names}"
+        if "tp" in mesh_dim_names:
+            # TODO: "tp" is not supported yet due to the above assert
+            total_shards = mesh.shape[-1] * mesh.shape[-2]
+            mesh_shape = (mesh.shape[-2], mesh.shape[-1])
+        else:
+            total_shards = mesh.shape[-1]
+            mesh_shape = (mesh.shape[-1],)
+        return total_shards, mesh_shape
+    def _merge_by_placement(self, tensors: list[torch.Tensor], placement: Placement) -> torch.Tensor:
+        """Merges a list of tensors based on their DTensor placement"""
+        if placement.is_replicate():
+            return tensors[0]
+        elif placement.is_partial():
+            raise NotImplementedError("Partial placement is not supported yet")
+        elif placement.is_shard():
+            return torch.cat(tensors, dim=placement.dim).contiguous()
+        raise NotImplementedError(f"Unsupported placement: {placement}")
+    def _load_and_merge_state_dicts(
+        self, world_size: int, total_shards: int, mesh_shape: tuple[int, ...], mesh_dim_names: tuple[str, ...]
+    ) -> dict[str, torch.Tensor]:
+        model_state_dict_lst = [None] * total_shards
+        def process_one_shard(rank: int, model_state_dict_lst: list):
+            model_path = Path(self.config.local_dir) / f"model_world_size_{world_size}_rank_{rank}.pt"
+            state_dict = torch.load(model_path, map_location="cpu", weights_only=False)
+            model_state_dict_lst[rank] = state_dict
+            return state_dict
+        with ThreadPoolExecutor(max_workers=min(32, os.cpu_count())) as executor:
+            futures = [executor.submit(process_one_shard, rank, model_state_dict_lst) for rank in range(total_shards)]
+            for future in tqdm(futures, desc=f"Loading {total_shards} FSDP shards", total=total_shards):
+                future.result()
+        # Merge state dicts from all shards
+        state_dict = {}
+        param_placements: dict[str, list] = {}
+        for key in set(model_state_dict_lst[0].keys()):
+            state_dict[key] = []
+            for model_state_shard in model_state_dict_lst:
+                # add tensor shard in order of rank to state_dict[key]
+                tensor = model_state_shard.pop(key)
+                if isinstance(tensor, DTensor):
+                    state_dict[key].append(tensor._local_tensor.bfloat16())
+                    placements = tuple(tensor.placements)
+                    # replicated placement at dp dimension can be discarded
+                    if mesh_dim_names[0] in ("dp", "ddp"):
+                        placements = placements[1:]
+                    if key not in param_placements:
+                        param_placements[key] = placements
+                    else:
+                        assert param_placements[key] == placements
+                else:
+                    state_dict[key].append(tensor.bfloat16())
+        del model_state_dict_lst
+        # Merge tensors
+        for key in sorted(state_dict):
+            if not isinstance(state_dict[key], list):
+                print(f"No need to merge key {key}")
+                continue
+            if key in param_placements:
+                # merge shards
+                placements: tuple[Shard] = param_placements[key]
+                if len(mesh_shape) == 1:
+                    # 1-D list, FSDP without TP
+                    assert len(placements) == 1
+                    shards = state_dict[key]
+                    state_dict[key] = self._merge_by_placement(shards, placements[0])
+                else:
+                    # 2-D list, FSDP + TP
+                    raise NotImplementedError("FSDP + TP is not supported yet")
+            else:
+                state_dict[key] = torch.cat(state_dict[key], dim=0)
+        return state_dict
+    def merge_and_save(self):
+        world_size = self._get_world_size()
+        rank_zero_state_dict = self._load_rank_zero_state_dict(world_size)
+        mesh, mesh_dim_names = self._extract_device_mesh_info(rank_zero_state_dict, world_size)
+        print(f"Got device mesh {mesh}, mesh_dim_names {mesh_dim_names}")
+        total_shards, mesh_shape = self._calculate_shard_configuration(mesh, mesh_dim_names)
+        print(f"Processing model shards with {total_shards} {mesh_shape} in total")
+        merged_state_dict = self._load_and_merge_state_dicts(world_size, total_shards, mesh_shape, mesh_dim_names)
+        if self.config.operation == "test":
+            if not self.config.test_hf_dir:
+                raise ValueError("test_hf_dir must be provided for test operation")
+            self._test_state_dict(merged_state_dict)
+        elif self.config.operation == "merge":
+            self.save_hf_model_and_tokenizer(merged_state_dict)
+            if self.config.hf_upload:
+                self.upload_to_huggingface()
+        else:
+            raise ValueError(f"Unknown operation: {self.config.operation}")
+    def _test_state_dict(self, state_dict: dict[str, torch.Tensor]):
+        auto_model_class = self.get_transformers_auto_model_class()
+        hf_model = auto_model_class.from_pretrained(self.config.test_hf_dir, torch_dtype=torch.bfloat16)
+        hf_state_dict = hf_model.state_dict()
+        del hf_model
+        hf_model_keys = set(hf_state_dict.keys())
+        collected_keys = set(state_dict.keys())
+        missing_keys = hf_model_keys - collected_keys
+        assert len(missing_keys) == 0, f"Missing keys in collected state dict: {list(sorted(missing_keys))}"
+        extra_keys = collected_keys - hf_model_keys
+        assert len(extra_keys) == 0, f"Extra keys in collected state dict: {list(sorted(extra_keys))}"
+        for key in hf_model_keys:
+            hf_shape = hf_state_dict[key].shape
+            collected_shape = state_dict[key].shape
+            assert hf_shape == collected_shape, (
+                f"Shape mismatch for key '{key}': original {hf_shape} vs collected {collected_shape}"
+            )
+            hf_dtype = hf_state_dict[key].dtype
+            collected_dtype = state_dict[key].dtype
+            assert hf_dtype == collected_dtype, (
+                f"Dtype mismatch for key '{key}': original {hf_dtype} vs collected {collected_dtype}"
+            )
+            torch.testing.assert_close(hf_state_dict[key], state_dict[key], atol=1e-6, rtol=1e-6)
+        print("FSDP checks passed: The merged state_dict matches the hf model saved by FSDPCheckpointManager.")
+class MegatronModelMerger(BaseModelMerger):
+    def __init__(self, config: ModelMergerConfig):
+        from verl.utils.megatron_utils import get_hf_config_and_tokenizer_checkpoint_path
+        config.hf_model_config_path = get_hf_config_and_tokenizer_checkpoint_path(config.local_dir)
+        super().__init__(config)
+        self.params_mapping = {
+            # megatron core gpt model name, huggingface model name
+            # NOTICE: It's a little bit tricky, when 2 keys have the same prefix, we need to make sure the longer key within the containing relationship is processed first.
+            "embedding.word_embeddings": "model.embed_tokens",
+            # attn
+            "self_attention.linear_qkv.layer_norm_weight": "input_layernorm.weight",
+            "self_attention.linear_qkv.layer_norm_bias": "input_layernorm.bias",
+            "self_attention.linear_qkv": "self_attn.qkv_proj",
+            "self_attention.q_layernorm": "self_attn.q_norm",
+            "self_attention.k_layernorm": "self_attn.k_norm",
+            "self_attention.linear_proj": "self_attn.o_proj",
+            # mla
+            "self_attention.linear_q_proj": "self_attn.q_proj",
+            "self_attention.linear_q_down_proj": "self_attn.q_a_proj",
+            "self_attention.linear_q_up_proj.layer_norm_weight": "self_attn.q_a_layernorm.weight",
+            "self_attention.linear_q_up_proj": "self_attn.q_b_proj",
+            "self_attention.linear_kv_down_proj": "self_attn.kv_a_proj_with_mqa",
+            "self_attention.linear_kv_up_proj.layer_norm_weight": "self_attn.kv_a_layernorm.weight",
+            "self_attention.linear_kv_up_proj": "self_attn.kv_b_proj",
+            # mlp
+            "pre_mlp_layernorm": "post_attention_layernorm",
+            "mlp.linear_fc1.layer_norm_weight": "post_attention_layernorm.weight",
+            "mlp.linear_fc1.layer_norm_bias": "post_attention_layernorm.bias",
+            "mlp.linear_fc1": "mlp.gate_up_proj",
+            "mlp.linear_fc2": "mlp.down_proj",
+            # moe
+            "mlp.router.expert_bias": "mlp.gate.e_score_correction_bias",
+            "mlp.router": "mlp.gate",
+            "mlp.shared_experts.linear_fc1": "mlp.shared_experts.gate_up_proj",
+            "mlp.shared_experts.linear_fc2": "mlp.shared_experts.down_proj",
+            "linear_fc1": "gate_up_proj",
+            "linear_fc2": "down_proj",
+            # output
+            "final_layernorm": "norm",
+            "output_layer": "lm_head",
+        }
+    def _get_tp_pp_rank_from_sharded_dir(self, sharded_dir: str) -> tuple[int, int]:
+        tp_rank = pp_rank = None
+        rank_list = sharded_dir.split("_")[2:]
+        if re.match(r"mp_rank_(\d\d)_(\d\d\d)", sharded_dir):
+            tp_rank = int(rank_list[0])
+            pp_rank = int(rank_list[1])
+        elif re.match(r"mp_rank_(\d\d)", sharded_dir):
+            tp_rank = int(rank_list[0])
+            pp_rank = 0
+        assert tp_rank is not None and pp_rank is not None, f"Invalid sharded dir {sharded_dir}"
+        return tp_rank, pp_rank
+    def _check_megatron_checkpoint_path(self, model_path: str) -> tuple[list[str], int, int]:
+        """
+        Validates the Megatron checkpoint structure (presence of 'model.pt' in sharded directories).
+        Determines TP and PP sizes from directory names.
+        """
+        tp_size = 0
+        pp_size = 0
+        sharded_dirs = sorted(os.listdir(model_path))
+        for sharded_dir in sharded_dirs:
+            assert "model.pt" in os.listdir(Path(model_path) / sharded_dir), f"model.pt not found in {sharded_dir}"
+            tp_rank, pp_rank = self._get_tp_pp_rank_from_sharded_dir(sharded_dir)
+            tp_size = max(tp_size, tp_rank + 1)
+            pp_size = max(pp_size, pp_rank + 1)
+        return sharded_dirs, tp_size, pp_size
+    def _merge_across_tp(
+        self,
+        key: str,
+        tp_data: list[torch.Tensor],
+        config: PretrainedConfig,
+        tp_size: int,
+        is_value_model: bool = False,
+    ) -> Union[torch.Tensor, list[torch.Tensor]]:
+        if "linear_fc1.weight" in key:
+            # if the tensor is gate and proj
+            gate_lst = []
+            up_lst = []
+            for infer_param in tp_data:
+                gate, up = infer_param.chunk(2)
+                gate_lst.append(gate)
+                up_lst.append(up)
+            gate = torch.cat(gate_lst, dim=0)
+            up = torch.cat(up_lst, dim=0)
+            return [gate, up]
+        elif "self_attention.linear_qkv." in key and "layer_norm" not in key:
+            # if the tensor is qkv, for each param on tp, split into q, k, v
+            # concat q, k, v separately.
+            q_lst = []
+            k_lst = []
+            v_lst = []
+            assert config.num_attention_heads % config.num_key_value_heads == 0
+            num_q_per_kv = config.num_attention_heads // config.num_key_value_heads
+            assert tp_data[0].shape[0] % (num_q_per_kv + 2) == 0
+            kv_size_per_tp = tp_data[0].shape[0] // (num_q_per_kv + 2)
+            split_size = [kv_size_per_tp * num_q_per_kv, kv_size_per_tp, kv_size_per_tp]
+            for infer_param in tp_data:
+                num_query_groups_per_partition = config.num_key_value_heads // tp_size
+                for chunk in infer_param.chunk(num_query_groups_per_partition):
+                    split_size = [
+                        kv_size_per_tp * num_q_per_kv // num_query_groups_per_partition,
+                        kv_size_per_tp // num_query_groups_per_partition,
+                        kv_size_per_tp // num_query_groups_per_partition,
+                    ]
+                    q, k, v = chunk.split(split_size)
+                    q_lst.append(q)
+                    k_lst.append(k)
+                    v_lst.append(v)
+            q = torch.cat(q_lst, dim=0)
+            k = torch.cat(k_lst, dim=0)
+            v = torch.cat(v_lst, dim=0)
+            return [q, k, v]
+        elif "layer_norm" in key or "layernorm" in key or "router" in key or ("output_layer" in key and is_value_model):
+            return tp_data[0]
+        else:
+            dim = 0
+            if "linear_fc2.weight" in key or "self_attention.linear_proj" in key:
+                dim = 1
+            return torch.cat(tp_data, dim=dim)
+    def _load_state_dicts(
+        self, model_ckpt_path: str, sharded_dirs: list[str], tp_size: int, pp_size: int
+    ) -> list[list[dict]]:
+        model_state_dict_lst = [[None for _ in range(tp_size)] for _ in range(pp_size)]
+        def _process_one_megatron_shard(sharded_dir: str):
+            model_file_path = Path(model_ckpt_path) / sharded_dir / "model.pt"
+            state_dict = torch.load(model_file_path, map_location="cpu", weights_only=False)
+            tp_rank, pp_rank = self._get_tp_pp_rank_from_sharded_dir(sharded_dir)
+            model_state_dict_lst[pp_rank][tp_rank] = state_dict
+        with ThreadPoolExecutor(max_workers=min(32, os.cpu_count())) as executor:
+            futures = [executor.submit(_process_one_megatron_shard, sharded_dir) for sharded_dir in sharded_dirs]
+            for future in tqdm(futures, desc=f"Loading {len(sharded_dirs)} Megatron shards", total=len(sharded_dirs)):
+                future.result()
+        return model_state_dict_lst
+    def _check_megatron_state_key(self, key: str) -> bool:
+        """
+        Checks if the key is a valid Megatron state key.
+        Now the model merger only supports keys that start with "decoder/embedding/output_layer" in TransformerLayer.
+        Shall not use key starts with "model."
+        """
+        if key.startswith("model."):
+            raise ValueError(
+                f"Invalid key {key} in Megatron state_dict. Expected keys to start with 'decoder/embedding/output_layer' in TransformerLayer."
+            )
+        skip_checking_keys = ["embedding.word_embeddings", "output_layer"]
+        for skip_key in skip_checking_keys:
+            if skip_key in key:
+                print(f"skip checking key {key}")
+                return
+        # Exclude extra state keys
+        if not key.startswith("decoder"):
+            raise ValueError(
+                f"Invalid key {key} in Megatron state_dict. Expected keys to start with 'decoder' in TransformerLayer."
+            )
+    def _merge_state_dicts(
+        self, model_state_dict_lst: list[list[dict]], tp_size: int, pp_size: int
+    ) -> dict[str, torch.Tensor]:
+        state_dict = {}
+        vpp_size = len(model_state_dict_lst[0][0])
+        layers_cum = 0
+        for vpp_rank in range(vpp_size):
+            for pp_rank in range(pp_size):
+                layers_handled = 0
+                keys = model_state_dict_lst[pp_rank][0][vpp_rank].keys()
+                for key in keys:
+                    if "extra_state" in key:
+                        continue
+                    if self.config.tie_word_embedding and ("output_layer" in key):
+                        print("skip lm_head and reward_head loading because of tie_word_embeddings")
+                        continue
+                    self._check_megatron_state_key(key)
+                    hf_name = self._replace_name(key, self.params_mapping)
+                    assert hf_name is not None, f"Failed to convert layer name [{key}] from megatron to huggingface."
+                    if "model.layers." in hf_name:
+                        local_layer_no = int(hf_name.split(".")[2])
+                        layers_handled = max(local_layer_no, layers_handled)
+                        global_layer_no = local_layer_no + layers_cum
+                        new_key_list = hf_name.split(".")
+                        new_key_list[2] = str(global_layer_no)
+                        hf_name = ".".join(new_key_list)
+                    else:
+                        warnings.warn(f"hf_name {hf_name} will not be fixed with layer number", stacklevel=2)
+                    tp_data = [model_state_dict_lst[pp_rank][tp_rank][vpp_rank][key] for tp_rank in range(tp_size)]
+                    merged = self._merge_across_tp(key, tp_data, self.model_config, tp_size, self.config.is_value_model)
+                    if not isinstance(merged, list):
+                        state_dict[hf_name] = merged
+                    elif len(merged) == 3:
+                        # split qkv
+                        for n, d in zip(["q", "k", "v"], merged):
+                            state_dict[hf_name.replace("qkv", n)] = d
+                    elif len(merged) == 2:
+                        # split gate up
+                        state_dict[hf_name.replace("gate_up", "gate")] = merged[0]
+                        state_dict[hf_name.replace("gate_up", "up")] = merged[1]
+                    print(
+                        f"converted {key} to {hf_name} with shape {merged.shape if isinstance(merged, torch.Tensor) else [t.shape for t in merged]}"
+                    )
+                layers_cum += layers_handled + 1  # zero based
+        return state_dict
+    def merge_and_save(self):
+        from verl.utils.megatron_utils import get_model_checkpoint_path
+        model_ckpt_path = get_model_checkpoint_path(self.config.local_dir)
+        sharded_dirs, tp_size, pp_size = self._check_megatron_checkpoint_path(model_ckpt_path)
+        print(f"sharded_dirs: {sharded_dirs}, tp_size: {tp_size}, pp_size: {pp_size}, mp_size: {len(sharded_dirs)}")
+        model_state_dict_lst = self._load_state_dicts(model_ckpt_path, sharded_dirs, tp_size, pp_size)
+        merged_state_dict = self._merge_state_dicts(model_state_dict_lst, tp_size, pp_size)
+        del model_state_dict_lst
+        if self.config.operation == "test":
+            if not self.config.test_hf_dir:
+                raise ValueError("test_hf_dir must be provided for test operation")
+            self._test_state_dict(merged_state_dict)
+        elif self.config.operation == "merge":
+            self.save_hf_model_and_tokenizer(merged_state_dict)
+            if self.config.hf_upload:
+                self.upload_to_huggingface()
+        else:
+            raise ValueError(f"Unknown operation: {self.config.operation}")
+    def _test_state_dict(self, state_dict: dict[str, torch.Tensor]):
+        """
+        Compares the merged Megatron state_dict against a reference safetensors model.
+        Applies necessary name mappings from Megatron to Hugging Face conventions using _replace_name.
+        """
+        ref_state_dict = load_file(Path(self.config.test_hf_dir) / "model.safetensors")
+        for name, loaded_weight in state_dict.items():
+            # name = self._replace_name(original_name, self.params_mapping)
+            if not name or name.endswith(".bias") and name not in ref_state_dict:
+                continue
+            if "rotary_emb.inv_freq" in name:
+                continue
+            if self.config.tie_word_embedding and "lm_head.weight" in name:
+                continue
+            if name not in ref_state_dict:
+                raise RuntimeError(f"key: {name} not exist in state_dict")
+            param = ref_state_dict[name]
+            assert loaded_weight.dtype == param.dtype
+            torch.testing.assert_close(loaded_weight, param, atol=1e-2, rtol=5e-2)
+    def _replace_name(self, megatron_name: str, name_mapping: dict[str, str]) -> str:
+        for m_name, v_name in name_mapping.items():
+            if m_name not in megatron_name:
+                continue
+            megatron_name = megatron_name.replace("decoder", "model")
+            param_name = megatron_name.replace(m_name, v_name)
+            return param_name
+        return None  # Return None if no mapping found
+def main():
+    parser = argparse.ArgumentParser(description="verl model merger")
+    subparsers = parser.add_subparsers(dest="operation", required=True, help="Specify 'merge' or 'test' operation.")
+    base_op_parser = argparse.ArgumentParser(add_help=False)
+    base_op_parser.add_argument(
+        "--backend", type=str, required=True, choices=["fsdp", "megatron"], help="The backend of the model"
+    )
+    base_op_parser.add_argument("--local_dir", type=str, required=True, help="Path to the saved model checkpoints")
+    base_op_parser.add_argument(
+        "--hf_model_path",
+        type=str,
+        default=None,
+        help="(Deprecated) Path to the original Hugging Face model for config.",
+    )
+    base_op_parser.add_argument(
+        "--tie-word-embedding",
+        action="store_true",
+        help="Whether to tie word embedding weights (currently only Megatron supported)",
+    )
+    base_op_parser.add_argument(
+        "--is-value-model",
+        action="store_true",
+        help="Whether the model is a value model (currently only Megatron supported)",
+    )
+    merge_parser = subparsers.add_parser("merge", parents=[base_op_parser], help="Merge model checkpoints and save.")
+    merge_parser.add_argument(
+        "--target_dir", default="tmp", type=str, help="Directory to save the merged huggingface model"
+    )
+    merge_parser.add_argument(
+        "--hf_upload_path", default=None, type=str, help="Hugging Face repository ID to upload the model"
+    )
+    merge_parser.add_argument(
+        "--private", action="store_true", help="Whether to upload the model to a private Hugging Face repository"
+    )
+    test_parser = subparsers.add_parser(
+        "test", parents=[base_op_parser], help="Test merged model against a reference Hugging Face model"
+    )
+    test_parser.add_argument(
+        "--test_hf_dir", type=str, required=True, help="Path to the reference Hugging Face model directory for testing"
+    )
+    args = parser.parse_args()
+    common_config_args = {
+        "operation": args.operation,
+        "backend": args.backend,
+        "tie_word_embedding": args.tie_word_embedding,
+        "is_value_model": args.is_value_model,
+        "local_dir": args.local_dir,
+        "hf_model_path": args.hf_model_path,
+        "hf_model_config_path": args.local_dir,
+    }
+    if args.operation == "merge":
+        config = ModelMergerConfig(
+            **common_config_args,
+            target_dir=args.target_dir,
+            hf_upload_path=args.hf_upload_path,
+            private=args.private,
+            test_hf_dir=None,
+        )
+        os.makedirs(config.target_dir, exist_ok=True)
+    elif args.operation == "test":
+        config = ModelMergerConfig(
+            **common_config_args,
+            test_hf_dir=args.test_hf_dir,
+            # the following args are not used by test operation
+            target_dir=None,
+            hf_upload_path=None,
+            private=False,
+        )
+    else:
+        raise NotImplementedError(f"Unknown operation: {args.operation}")
+    if config.backend == "fsdp":
+        merger = FSDPModelMerger(config)
+    elif config.backend == "megatron":
+        merger = MegatronModelMerger(config)
+    else:
+        raise NotImplementedError(f"Unknown backend: {config.backend}")
+    merger.merge_and_save()
+if __name__ == "__main__":
+    main()
--- a/scripts/print_cfg.py
+++ b/scripts/print_cfg.py
+# Copyright 2025 Bytedance Ltd. and/or its affiliates
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+try:
+    import hydra
+except ImportError as e:
+    raise ImportError("Please install hydra-core via 'pip install hydra-core' and retry.") from e
+@hydra.main(config_path="../verl/trainer/config", config_name="ppo_trainer", version_base=None)
+def main(config):
+    """Main entry point for PPO training with Hydra configuration management.
+    Args:
+        config_dict: Hydra configuration dictionary containing training parameters.
+    """
+    print(config)
+    from verl.utils.config import omega_conf_to_dataclass
+    profiler_config = omega_conf_to_dataclass(config.critic.profiler)
+    print(profiler_config)
+if __name__ == "__main__":
+    main()
--- a/scripts/rollout_viewer.py
+++ b/scripts/rollout_viewer.py
+# Copyright 2025 Bytedance Ltd. and/or its affiliates
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import asyncio
+import re
+import traceback
+from pathlib import Path
+from typing import Annotated, Optional
+import aiofiles
+try:
+    import ujson as json
+except ImportError:
+    import json
+import typer
+from rich.highlighter import ReprHighlighter
+from rich.markdown import Markdown
+from rich.table import Table
+from rich.text import Text
+from textual import on
+from textual.app import App, ComposeResult
+from textual.containers import Horizontal, Vertical, VerticalScroll
+from textual.widgets import Input, ProgressBar, Select, SelectionList, Static
+INDEX_KEY = "__IDX"
+FILE_SUFFIX = ".jsonl"
+def check_textual_version():
+    # check if textual version is equal to 0.52.1
+    import textual
+    from packaging.version import Version
+    if Version(textual.__version__) != Version("0.52.1"):
+        raise ImportError(f"Textual version {textual.__version__} is not supported, please pip install textual==0.52.1")
+check_textual_version()
+async def load_path(p: Path, data: dict, mask_strs: str, idx: int, pbar):
+    samples = []
+    async with aiofiles.open(p, encoding="utf-8") as f:
+        async for line in f:
+            d = json.loads(line)
+            for k in d:
+                if isinstance(d[k], str):
+                    if mask_strs:
+                        d[k] = re.sub(rf"{mask_strs}", "*", d[k])
+                else:
+                    d[k] = json.dumps(d[k], ensure_ascii=False, indent=4)
+            d[INDEX_KEY] = len(samples)
+            samples.append(d)
+        data[idx] = {"samples": samples}
+    print(f"path {p} loaded")
+    pbar.advance(1)
+async def load_dir(path: Path, data: dict[int, dict], pbar, mask_strs: str = ""):
+    paths = list(path.glob(f"*{FILE_SUFFIX}"))
+    paths = sorted(paths, key=lambda x: int(x.stem))
+    tasks = [load_path(p, data, mask_strs, i, pbar) for i, p in enumerate(paths)]
+    await asyncio.gather(*tasks)
+class Highlighter(ReprHighlighter):
+    highlights = ReprHighlighter.highlights + [
+        r"(?P<tag_name>[][\<\>{}()\|（）【】\[\]=`])",
+        r"\<\|(?P<tag_name>[\w\W]*?)\|\>",
+    ]
+def center_word_with_equals_exactly(word: str, total_length: int, char: str = "=") -> str:
+    if len(word) > total_length:
+        return word
+    padding = total_length - len(word)
+    left_pad = (padding) // 2
+    right_pad = (padding + 1) // 2
+    return char * left_pad + " " + word + " " + char * right_pad
+def highlight_keyword(content: str, keyword: Optional[str]):
+    if not keyword:
+        return Text(content)
+    text = Text()
+    parts = content.split(keyword)
+    for i, part in enumerate(parts):
+        text.append(part, style=None)
+        if i < len(parts) - 1:
+            # text.append(keyword, style=Style(color="#d154d1", bgcolor="yellow", bold=True))
+            text.append(keyword, style="on #8f51b5")
+    return text
+help_doc = """
+⌨️   keybinds：
+- `f/esc`: find/cancel
+- `tab/←/→`: change focus
+- `j/k`: page down/up
+- `g/G`: scroll home/end
+- `n/N`: next sample/step
+- `p/P`: previous sample/step
+- `s`: switch display mode
+  - plain text
+  - rich table
+"""
+class JsonLineViewer(App):
+    BINDINGS = [
+        ("left", "focus_previous", "Focus Previous"),
+        ("right", "focus_next", "Focus Next"),
+        ("s", "swith_render", "switch render"),
+        # control
+        ("n", "next_sample", "Next Sample"),
+        ("N", "next_step", "Next Step"),
+        ("p", "previous_sample", "Previous Sample"),
+        ("P", "previous_step", "Previous Step"),
+        # search
+        ("f", "toggle_search", "find"),
+        ("enter", "next_search", "find next"),
+        ("escape", "cancel_search", "cancel find"),
+        # scroll
+        ("j", "page_down", "page down"),
+        ("k", "page_up", "page up"),
+        ("g", "page_home", "page home"),
+        ("G", "page_end", "page end"),
+    ]
+    CSS = """
+    Select:focus > SelectCurrent {
+        border: tall #8f51b5;
+    }
+    Select.-expanded > SelectCurrent {
+        border: tall #8f51b5;
+    }
+    #select-container {
+        width: 15%;
+        height: 100%;
+        align: center top;
+    }
+    #search-container {
+        height: 10%;
+        align: center top;
+    }
+    #search-box {
+        width: 50%;
+    }
+    #reqid-box {
+        width: 50%;
+    }
+    """
+    def __init__(self, step_num: int, data: dict[int, dict], pbar):
+        super().__init__()
+        self.step_num = step_num
+        self.data = data
+        self.render_table = False
+        self.selected_step_index = 0
+        self.selected_sample_index = 0
+        self.pbar = pbar
+        self.matches = []
+        self.current_match_index = 0
+        self.highlighter = Highlighter()
+        first_samples = data[list(data.keys())[0]]["samples"]
+        # Prepare the initial field filter list (all keys from the first sample)
+        self.filter_fields = [(f, f, True) for f in first_samples[0].keys()]
+        # Internal set used for fast membership checks when we add new fields on the fly.
+        # We keep it here so that when new columns appear in later steps (e.g. `request_id`),
+        # they can be added to the UI automatically without restarting the viewer.
+        self._field_set: set[str] = set(first_samples[0].keys())
+        self.sample_num = len(first_samples)
+    def compose(self) -> ComposeResult:
+        with Horizontal(id="search-container"):
+            yield Input(placeholder="find something...", id="search-box")
+            yield Input(placeholder="request id...", id="reqid-box")
+            with Vertical(id="search-container2"):
+                yield self.pbar
+                yield Static("", id="search-status")
+        with Horizontal():
+            with Vertical(id="select-container"):
+                yield Static("\n")
+                yield Static(
+                    renderable=Markdown(
+                        help_doc,
+                    ),
+                    markup=False,
+                )
+                yield Static("\n")
+                yield Select(
+                    id="step-select",
+                    value=0,
+                    prompt="select step",
+                    options=[("step: 1", 0)],
+                    allow_blank=False,
+                )
+                yield Select(
+                    id="sample-select",
+                    value=0,
+                    prompt="select sample",
+                    options=[("sample: 1", 0)],
+                    allow_blank=False,
+                )
+                yield Select(
+                    id="sample-sort",
+                    value=0,
+                    prompt="排序",
+                    options=[
+                        ("sort", 0),
+                        ("score asc", 1),
+                        ("score desc", 2),
+                    ],
+                    allow_blank=False,
+                )
+                yield SelectionList[int](("Select ALL", 1, True), id="fields-select-all")
+                with VerticalScroll(id="scroll-view2"):
+                    yield SelectionList[str](*self.filter_fields, id="fields-select")
+            with VerticalScroll(id="scroll-view"):
+                yield Static(id="content", markup=False)
+    async def on_mount(self) -> None:
+        self.step_select = self.query_one("#step-select", Select)
+        self.sample_select = self.query_one("#sample-select", Select)
+        self.sample_sort = self.query_one("#sample-sort", Select)
+        self.content_display = self.query_one("#content", Static)
+        self.search_box = self.query_one("#search-box", Input)
+        self.reqid_box = self.query_one("#reqid-box", Input)
+        self.scroll_view = self.query_one("#scroll-view", VerticalScroll)
+        self.search_status = self.query_one("#search-status", Static)
+        self.fields_select = self.query_one("#fields-select", SelectionList)
+        self.fields_select.border_title = "field filter"
+        if self.data:
+            self.step_select.set_options([(f"step: {i + 1}", i) for i in range(self.step_num)])
+            self.sample_select.set_options([(f"sample: {i + 1}", i) for i in range(self.sample_num)])
+            self.step_select.focus()
+            await self.update_content()
+    def update_result_options(self, offset: int = 0, sort_desc: Optional[bool] = None):
+        options = []
+        if isinstance(self.selected_step_index, int) and self.selected_step_index < len(self.data):
+            if self.sample_num is None or sort_desc is not None:
+                samples = self.data[self.selected_step_index].get("samples", [])
+                if not samples:
+                    self.selected_sample_index = offset
+                    return
+                if sort_desc is not None:
+                    samples = sorted(
+                        samples,
+                        key=lambda x: x.get("score", x.get("score_1", 0)),
+                        reverse=sort_desc,
+                    )
+                options = [(f"sample: {r[INDEX_KEY] + 1}", r[INDEX_KEY]) for r in samples]
+                self.sample_select.set_options(options)
+                self.sample_num = len(samples)
+            if sort_desc is not None and options:
+                self.selected_sample_index = options[0][1]
+            else:
+                self.selected_sample_index = offset
+    async def update_content(self, search_keyword: Optional[str] = None):
+        content = ""
+        try:
+            samples = self.data[self.selected_step_index].get("samples", [])
+            content_dict_full = samples[self.selected_sample_index]
+            # Dynamically track any NEW keys that appear and add them to the field filter.
+            self._update_fields_select(content_dict_full.keys())
+            # Apply field selection filter (only show selected fields)
+            content_dict = {k: v for k, v in content_dict_full.items() if k in self.fields_select.selected}
+            if self.render_table:
+                content = Table("key", "value", show_lines=True)
+                for k in content_dict:
+                    v = content_dict[k]
+                    v = f"{v}"
+                    content.add_row(
+                        k,
+                        self.highlighter(highlight_keyword(v, search_keyword)),
+                    )
+            else:
+                text = Text()
+                for k in content_dict:
+                    v = content_dict[k]
+                    s = center_word_with_equals_exactly(k, 64) + f"\n{v}\n"
+                    text.append(highlight_keyword(s, search_keyword))
+                content = self.highlighter(text)
+        except KeyError:
+            content = f"Loading data asynchronously, progress: {len(self.data)}/{self.step_num} step"
+        except Exception:
+            content = self.highlighter(traceback.format_exc())
+        self.content_display.update(content)
+    # ---------------------------------------------------------------------
+    # Request-ID jump logic
+    # ---------------------------------------------------------------------
+    @on(Input.Submitted, "#reqid-box")
+    async def on_reqid_submitted(self, event: Input.Submitted) -> None:
+        """Jump to the sample that has a matching `request_id`."""
+        req_id_raw = event.value.strip()
+        # Remove hyphens so search is tolerant to different id formats
+        req_id = req_id_raw.replace("-", "")
+        if not req_id:
+            return
+        found = False
+        for step_idx, step_data in self.data.items():
+            for sample in step_data.get("samples", []):
+                sample_id = str(sample.get("request_id", ""))
+                if sample_id.replace("-", "") == req_id:
+                    # Update selected indices
+                    self.selected_step_index = step_idx
+                    self.step_select.value = step_idx
+                    # Ensure sample list is updated and select sample
+                    self.update_result_options(offset=sample[INDEX_KEY])
+                    self.selected_sample_index = sample[INDEX_KEY]
+                    self.sample_select.value = sample[INDEX_KEY]
+                    await self._clear_search()
+                    await self.update_content()
+                    found = True
+                    break
+            if found:
+                break
+        if not found:
+            self.search_status.update(Text(f"request_id '{req_id_raw}' not found", style="bold red"))
+        else:
+            # Keep the typed id in the input box so users see what was searched.
+            pass
+    # ---------------------------------------------------------------------
+    # Helper: add new fields to SelectionList on-the-fly
+    # ---------------------------------------------------------------------
+    def _update_fields_select(self, keys):
+        """Add any unseen *keys* to the field-selection widget so they can be toggled.
+        The viewer is often launched with only the first step loaded. Later steps may
+        introduce new columns (e.g. `request_id`). This helper ensures those fields
+        become visible without requiring a restart.
+        """
+        # Ensure we have the widget (only after on_mount)
+        if not hasattr(self, "fields_select"):
+            return
+        for k in keys:
+            if k not in self._field_set:
+                self._field_set.add(k)
+                try:
+                    # By default, new fields are selected so they appear immediately.
+                    self.fields_select.add_option(k, k, selected=True)
+                except Exception:
+                    # Fallback for older textual versions where signature is different.
+                    self.fields_select.add_option((k, k, True))
+    @on(Select.Changed, "#step-select")
+    async def step_changed(self, event):
+        self.selected_step_index = event.value
+        self.update_result_options()
+        await self.update_content()
+    @on(Select.Changed, "#sample-select")
+    async def sample_changed(self, event):
+        self.selected_sample_index = event.value
+        await self._clear_search()
+        await self.update_content()
+    @on(Select.Changed, "#sample-sort")
+    async def sort_changed(self, event):
+        v = event.value
+        self.update_result_options(sort_desc=None if v == 0 else False if v == 1 else True)
+        await self.update_content()
+    @on(SelectionList.SelectedChanged, "#fields-select")
+    async def fields_changed(self, event):
+        await self.update_content()
+    @on(SelectionList.SelectedChanged, "#fields-select-all")
+    async def fields_all_changed(self, event):
+        s = self.query_one("#fields-select-all", SelectionList)
+        if s.selected:
+            self.fields_select.select_all()
+        else:
+            self.fields_select.deselect_all()
+    def action_focus_previous(self):
+        self.screen.focus_previous()
+    def action_focus_next(self):
+        self.screen.focus_next()
+    async def action_next_step(self) -> None:
+        self.selected_step_index += 1
+        if self.selected_step_index >= self.step_num:
+            self.selected_step_index = 0
+        self.step_select.value = self.selected_step_index
+        self.update_result_options()
+        await self.update_content()
+    async def action_next_sample(self) -> None:
+        self.selected_sample_index += 1
+        if not self.sample_num or self.selected_sample_index >= self.sample_num:
+            self.selected_sample_index = 0
+        self.sample_select.value = self.selected_sample_index
+        await self._clear_search()
+        await self.update_content()
+    async def action_previous_step(self) -> None:
+        self.selected_step_index -= 1
+        if self.selected_step_index < 0:
+            self.selected_step_index = self.step_num - 1
+        self.step_select.value = self.selected_step_index
+        self.update_result_options()
+        await self.update_content()
+    async def action_previous_sample(self) -> None:
+        self.selected_sample_index -= 1
+        if self.selected_sample_index < 0:
+            self.selected_sample_index = self.sample_num - 1
+        self.sample_select.value = self.selected_sample_index
+        await self._clear_search()
+        await self.update_content()
+    async def action_swith_render(self):
+        self.render_table = not self.render_table
+        await self.update_content()
+    def action_toggle_search(self) -> None:
+        self.search_box.focus()
+    async def action_cancel_search(self) -> None:
+        self.search_box.value = ""
+        await self._clear_search()
+        await self.update_content()
+    async def _clear_search(self):
+        self.matches = []
+        self.search_status.update("")
+        self.current_match_index = 0
+    @on(Input.Submitted, "#search-box")
+    async def on_search_submitted(self, event: Input.Submitted) -> None:
+        self.matches = []
+        self.current_match_index = 0
+        if event.value:
+            await self.update_content(event.value)
+            renderable = self.content_display.render()
+            if isinstance(renderable, Table):
+                return
+            assert isinstance(renderable, Text)
+            console = self.content_display._console
+            lines = renderable.wrap(console, self.scroll_view.container_size.width)
+            line_idx_recorded = set()
+            for line_idx, line in enumerate(lines):
+                if line_idx in line_idx_recorded:
+                    continue
+                if event.value in line:
+                    self.matches.append(
+                        {
+                            "line": line_idx,
+                            "word": event.value,
+                        }
+                    )
+                    line_idx_recorded.add(line_idx)
+            self.scroll_view.focus()
+            await self.action_next_search()
+    async def action_next_search(self) -> None:
+        if not self.matches or self.current_match_index >= len(self.matches):
+            return
+        target_line = self.matches[self.current_match_index]["line"]
+        self.scroll_view.scroll_to(x=0, y=target_line * 1, animate=False)
+        self.current_match_index = (self.current_match_index + 1) % len(self.matches)
+        self.search_status.update(
+            Text(
+                f"Find ：{self.current_match_index + 1}/{len(self.matches)}",
+                style="bold on #8f51b5",
+            )
+        )
+    def action_page_up(self):
+        self.scroll_view.scroll_page_up(animate=False)
+    def action_page_down(self):
+        self.scroll_view.scroll_page_down(animate=False)
+    def action_page_home(self):
+        self.scroll_view.scroll_home(animate=False)
+    def action_page_end(self):
+        self.scroll_view.scroll_end(animate=False)
+async def _run(path: Path, mask_str: str):
+    assert path.exists(), f"{path} not exist"
+    paths = list(path.glob(f"*{FILE_SUFFIX}"))
+    paths = sorted(paths, key=lambda x: int(x.stem))
+    if not paths:
+        raise ValueError(f"no available reward dump files under f{path}")
+    print(f"get jsonl file nums: {len(paths)}")
+    pbar = ProgressBar(total=len(paths), name="data load progress")
+    data = {}
+    await load_path(paths[0], data, mask_str, 0, pbar)
+    app = JsonLineViewer(step_num=len(paths), data=data, pbar=pbar)
+    await asyncio.gather(load_dir(path, data, pbar, mask_str), app.run_async())
+app = typer.Typer()
+@app.command(help="launch TUI APP")
+def run(
+    rollout_data_dir: Path,
+    mask_str: Annotated[str, typer.Option(help="string that will be masked to *")] = "<\|image_pad\|>|<\|imgpad\|>",
+):
+    loop = asyncio.get_event_loop()
+    loop.run_until_complete(_run(rollout_data_dir, mask_str))
+if __name__ == "__main__":
+    app()
--- a/setup.py
+++ b/setup.py
+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# setup.py is the fallback installation script when pyproject.toml does not work
+import os
+from pathlib import Path
+from setuptools import find_packages, setup
+version_folder = os.path.dirname(os.path.join(os.path.abspath(__file__)))
+with open(os.path.join(version_folder, "verl/version/version")) as f:
+    __version__ = f.read().strip()
+install_requires = [
+    "accelerate",
+    "codetiming",
+    "datasets",
+    "dill",
+    "hydra-core",
+    "numpy<2.0.0",
+    "pandas",
+    "peft",
+    "pyarrow>=19.0.0",
+    "pybind11",
+    "pylatexenc",
+    "ray[default]>=2.41.0",
+    "torchdata",
+    "tensordict>=0.8.0,<=0.9.1,!=0.9.0",
+    "transformers",
+    "wandb",
+    "packaging>=20.0",
+]
+TEST_REQUIRES = ["pytest", "pre-commit", "py-spy", "pytest-asyncio"]
+PRIME_REQUIRES = ["pyext"]
+GEO_REQUIRES = ["mathruler", "torchvision", "qwen_vl_utils"]
+GPU_REQUIRES = ["liger-kernel", "flash-attn"]
+MATH_REQUIRES = ["math-verify"]  # Add math-verify as an optional dependency
+VLLM_REQUIRES = ["tensordict>=0.8.0,<=0.9.1,!=0.9.0", "vllm>=0.7.3,<=0.9.1"]
+SGLANG_REQUIRES = [
+    "tensordict>=0.8.0,<=0.9.1,!=0.9.0",
+    "sglang[srt,openai]==0.4.8",
+    "torch-memory-saver>=0.0.8",
+    "torch==2.7.1",
+]
+TRL_REQUIRES = ["trl<=0.9.6"]
+MCORE_REQUIRES = ["mbridge"]
+extras_require = {
+    "test": TEST_REQUIRES,
+    "prime": PRIME_REQUIRES,
+    "geo": GEO_REQUIRES,
+    "gpu": GPU_REQUIRES,
+    "math": MATH_REQUIRES,
+    "vllm": VLLM_REQUIRES,
+    "sglang": SGLANG_REQUIRES,
+    "trl": TRL_REQUIRES,
+    "mcore": MCORE_REQUIRES,
+}
+this_directory = Path(__file__).parent
+long_description = (this_directory / "README.md").read_text()
+setup(
+    name="verl",
+    version=__version__,
+    package_dir={"": "."},
+    packages=find_packages(where="."),
+    url="https://github.com/volcengine/verl",
+    license="Apache 2.0",
+    author="Bytedance - Seed - MLSys",
+    author_email="zhangchi.usc1992@bytedance.com, gmsheng@connect.hku.hk",
+    description="verl: Volcano Engine Reinforcement Learning for LLM",
+    install_requires=install_requires,
+    extras_require=extras_require,
+    package_data={
+        "": ["version/*"],
+        "verl": ["trainer/config/*.yaml"],
+    },
+    include_package_data=True,
+    long_description=long_description,
+    long_description_content_type="text/markdown",
+)
--- a/test.py
+++ b/test.py
+import torch
+print(torch.__version__)
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+print(device)
+print(torch.version.hip)
+print(torch.cuda.get_device_name(0))
+print(torch.cuda.device_count())
+import vllm
+print(vllm.__version__)
+import multiprocessing
+cpu_count = multiprocessing.cpu_count()
+print(f"当前系统可用 CPU 核数: {cpu_count}")
\ No newline at end of file
--- a/tests/README.md
+++ b/tests/README.md
+# Tests layout
+Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
+- `tests/trainer` for testing functionality related to `verl/trainer`
+- `tests/models` for testing functionality related to `verl/models`
+- ...
+There are a few folders with `special_` prefix, created for special purposes:
+- `special_distributed`: unit tests that must run with multiple GPUs
+- `special_e2e`: end-to-end tests with training/generation scripts
+- `special_npu`: tests for NPUs
+- `special_sanity`: a suite of quick sanity tests
+- `special_standalone`: a set of test that are designed to run in dedicated environments
+Accelerators for tests 
+- By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
+- For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.
+# Workflow layout
+All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs:
+1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml`
+2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml`
+3. End-to-end tests: `e2e_*.yml`
+4. Unit tests
+  - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py`
+  - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix.
+  - Since cpu/gpu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when
+    - new workflow yaml is added to `.github/workflows`
+    - new tests are added to workflow mentioned in 2.
\ No newline at end of file
--- a/tests/__init__.py
+++ b/tests/__init__.py
+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/tests/experimental/agent_loop/agent_utils.py
+++ b/tests/experimental/agent_loop/agent_utils.py
+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import ray
+from omegaconf import DictConfig
+from verl.experimental.agent_loop import AgentLoopManager
+from verl.single_controller.ray import RayClassWithInitArgs, RayWorkerGroup
+from verl.single_controller.ray.base import create_colocated_worker_cls
+from verl.trainer.ppo.ray_trainer import ResourcePoolManager, Role
+from verl.workers.fsdp_workers import ActorRolloutRefWorker, AsyncActorRolloutRefWorker
+def init_agent_loop_manager(config: DictConfig) -> AgentLoopManager | RayWorkerGroup:
+    # =========================== 1. Create hybrid ActorRollout workers ===========================
+    actor_rollout_cls = (
+        AsyncActorRolloutRefWorker if config.actor_rollout_ref.rollout.mode == "async" else ActorRolloutRefWorker
+    )
+    role_worker_mapping = {
+        Role.ActorRollout: ray.remote(actor_rollout_cls),
+    }
+    global_pool_id = "global_pool"
+    resource_pool_spec = {
+        global_pool_id: [config.trainer.n_gpus_per_node] * config.trainer.nnodes,
+    }
+    mapping = {
+        Role.ActorRollout: global_pool_id,
+    }
+    resource_pool_manager = ResourcePoolManager(resource_pool_spec=resource_pool_spec, mapping=mapping)
+    resource_pool_manager.create_resource_pool()
+    resource_pool_to_cls = {pool: {} for pool in resource_pool_manager.resource_pool_dict.values()}
+    # create actor and rollout
+    resource_pool = resource_pool_manager.get_resource_pool(Role.ActorRollout)
+    actor_rollout_cls = RayClassWithInitArgs(
+        cls=role_worker_mapping[Role.ActorRollout], config=config.actor_rollout_ref, role="actor_rollout"
+    )
+    resource_pool_to_cls[resource_pool]["actor_rollout"] = actor_rollout_cls
+    all_wg = {}
+    for resource_pool, class_dict in resource_pool_to_cls.items():
+        worker_dict_cls = create_colocated_worker_cls(class_dict=class_dict)
+        wg_dict = RayWorkerGroup(resource_pool=resource_pool, ray_cls_with_init=worker_dict_cls)
+        spawn_wg = wg_dict.spawn(prefix_set=class_dict.keys())
+        all_wg.update(spawn_wg)
+    actor_rollout_wg = all_wg["actor_rollout"]
+    actor_rollout_wg.init_model()
+    if config.actor_rollout_ref.rollout.mode == "sync":
+        return actor_rollout_wg
+    # =========================== 2. Create AgentLoopManager ===========================
+    agent_loop_manager = AgentLoopManager(
+        config=config,
+        worker_group=actor_rollout_wg,
+    )
+    return agent_loop_manager
--- a/tests/experimental/agent_loop/test_basic_agent_loop.py
+++ b/tests/experimental/agent_loop/test_basic_agent_loop.py
+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import json
+import os
+from typing import Any
+import numpy as np
+import pytest
+import ray
+from omegaconf import DictConfig
+from transformers.utils import get_json_schema
+from tests.experimental.agent_loop.agent_utils import init_agent_loop_manager
+from verl.experimental.agent_loop.agent_loop import get_trajectory_info
+from verl.protocol import DataProto
+from verl.tools.base_tool import BaseTool, OpenAIFunctionToolSchema
+from verl.utils import hf_tokenizer
+@pytest.fixture
+def init_config() -> DictConfig:
+    from hydra import compose, initialize_config_dir
+    with initialize_config_dir(config_dir=os.path.abspath("verl/trainer/config")):
+        config = compose(
+            config_name="ppo_trainer",
+            overrides=[
+                "actor_rollout_ref.actor.use_dynamic_bsz=true",
+                # test sleep/wake_up with fsdp offload
+                "actor_rollout_ref.actor.fsdp_config.param_offload=True",
+                "actor_rollout_ref.actor.fsdp_config.optimizer_offload=True",
+            ],
+        )
+    model_path = "Qwen/Qwen2.5-1.5B-Instruct"
+    config.actor_rollout_ref.model.path = model_path
+    config.actor_rollout_ref.rollout.name = os.getenv("ROLLOUT_NAME", "vllm")
+    config.actor_rollout_ref.rollout.mode = "async"
+    config.actor_rollout_ref.rollout.prompt_length = 4096
+    config.actor_rollout_ref.rollout.response_length = 4096
+    config.actor_rollout_ref.rollout.n = 4
+    config.actor_rollout_ref.rollout.agent.num_workers = 2
+    return config
+def test_single_turn(init_config):
+    ray.init(
+        runtime_env={
+            "env_vars": {
+                "TOKENIZERS_PARALLELISM": "true",
+                "NCCL_DEBUG": "WARN",
+                "VLLM_LOGGING_LEVEL": "INFO",
+                "VLLM_USE_V1": "1",
+            }
+        }
+    )
+    agent_loop_manager = init_agent_loop_manager(init_config)
+    raw_prompts = [
+        [
+            {
+                "role": "user",
+                "content": "Let's play a role playing game. Your name is Alice, your favorite color is blue.",
+            }
+        ],
+        [{"role": "user", "content": "Let's play a role playing game. Your name is Bob, your favorite color is red."}],
+    ]
+    batch = DataProto(
+        non_tensor_batch={
+            "raw_prompt": np.array(raw_prompts),
+            "agent_name": np.array(["single_turn_agent"] * len(raw_prompts)),
+        },
+    )
+    n = init_config.actor_rollout_ref.rollout.n
+    batch = batch.repeat(n)
+    result = agent_loop_manager.generate_sequences(prompts=batch)
+    assert len(result) == len(raw_prompts) * n
+    # check result
+    seq_len = result.batch["prompts"].size(1) + result.batch["responses"].size(1)
+    assert result.batch["input_ids"].size(1) == seq_len
+    assert result.batch["attention_mask"].size(1) == seq_len
+    assert result.batch["position_ids"].size(1) == seq_len
+    # check turns
+    num_turns = result.non_tensor_batch["__num_turns__"]
+    assert np.all(num_turns == 2)
+    print("Test passed!")
+    ray.shutdown()
+class WeatherTool(BaseTool):
+    def get_current_temperature(self, location: str, unit: str = "celsius"):
+        """Get current temperature at a location.
+        Args:
+            location: The location to get the temperature for, in the format "City, State, Country".
+            unit: The unit to return the temperature in. Defaults to "celsius". (choices: ["celsius", "fahrenheit"])
+        Returns:
+            the temperature, the location, and the unit in a dict
+        """
+        print(f"[DEBUG] get_current_temperature: {location}, {unit}")
+        return {
+            "temperature": 26.1,
+            "location": location,
+            "unit": unit,
+        }
+    def get_openai_tool_schema(self) -> OpenAIFunctionToolSchema:
+        schema = get_json_schema(self.get_current_temperature)
+        return OpenAIFunctionToolSchema(**schema)
+    async def execute(self, instance_id: str, parameters: dict[str, Any], **kwargs) -> tuple[str, float, dict]:
+        try:
+            result = self.get_current_temperature(**parameters)
+            return json.dumps(result), 0, {}
+        except Exception as e:
+            return str(e), 0, {}
+class WeatherToolWithData(BaseTool):
+    def get_openai_tool_schema(self) -> OpenAIFunctionToolSchema:
+        schema = get_json_schema(self.get_temperature_date)
+        return OpenAIFunctionToolSchema(**schema)
+    def get_temperature_date(self, location: str, date: str, unit: str = "celsius"):
+        """Get temperature at a location and date.
+        Args:
+            location: The location to get the temperature for, in the format "City, State, Country".
+            date: The date to get the temperature for, in the format "Year-Month-Day".
+            unit: The unit to return the temperature in. Defaults to "celsius". (choices: ["celsius", "fahrenheit"])
+        Returns:
+            the temperature, the location, the date and the unit in a dict
+        """
+        print(f"[DEBUG] get_temperature_date: {location}, {date}, {unit}")
+        return {
+            "temperature": 25.9,
+            "location": location,
+            "date": date,
+            "unit": unit,
+        }
+    async def execute(self, instance_id: str, parameters: dict[str, Any], **kwargs) -> tuple[str, float, dict]:
+        try:
+            result = self.get_temperature_date(**parameters)
+            return json.dumps(result), 0, {}
+        except Exception as e:
+            return str(e), 0, {}
+def test_tool_agent(init_config):
+    ray.init(
+        runtime_env={
+            "env_vars": {
+                "TOKENIZERS_PARALLELISM": "true",
+                "NCCL_DEBUG": "WARN",
+                "VLLM_LOGGING_LEVEL": "INFO",
+                "VLLM_USE_V1": "1",
+            }
+        }
+    )
+    # =========================== 1. Init rollout manager ===========================
+    tool_config = {
+        "tools": [
+            {
+                "class_name": "tests.experimental.agent_loop.test_basic_agent_loop.WeatherTool",
+                "config": {"type": "native"},
+            },
+            {
+                "class_name": "tests.experimental.agent_loop.test_basic_agent_loop.WeatherToolWithData",
+                "config": {"type": "native"},
+            },
+        ]
+    }
+    tool_config_path = "/tmp/tool_config.json"
+    with open(tool_config_path, "w") as f:
+        json.dump(tool_config, f)
+    n = 2
+    init_config.actor_rollout_ref.rollout.n = n
+    init_config.actor_rollout_ref.rollout.multi_turn.tool_config_path = tool_config_path
+    init_config.actor_rollout_ref.rollout.multi_turn.max_parallel_calls = 2
+    agent_loop_manager = init_agent_loop_manager(init_config)
+    # =========================== 2. Generate sequences  ===========================
+    raw_prompts = [
+        [
+            {"role": "user", "content": "How are you?"},
+        ],
+        [
+            {"role": "user", "content": "What's the temperature in Los Angeles now?"},
+        ],
+        [
+            {"role": "user", "content": "What's the temperature in New York now?"},
+        ],
+        [
+            {
+                "role": "system",
+                "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant.\n\n"
+                "Current Date: 2024-09-30",
+            },
+            {"role": "user", "content": "What's the temperature in San Francisco now? How about tomorrow?"},
+        ],
+    ]
+    batch = DataProto(
+        non_tensor_batch={
+            "raw_prompt": np.array([np.array(prompt) for prompt in raw_prompts], dtype=object),
+            "agent_name": np.array(["tool_agent"] * len(raw_prompts)),
+        },
+    )
+    batch = batch.repeat(n)
+    result = agent_loop_manager.generate_sequences(prompts=batch)
+    assert len(result) == len(raw_prompts) * n
+    # Check turns
+    num_turns = result.non_tensor_batch["__num_turns__"]
+    print(f"num_turns: {num_turns}")
+    for i in range(len(num_turns)):
+        if i // n == 0:
+            # [user, assistant]
+            assert num_turns[i] == 2
+        else:
+            # [user, assistant, tool, assistant]
+            assert num_turns[i] == 4
+    # Check response_mask
+    tokenizer = hf_tokenizer(init_config.actor_rollout_ref.model.path)
+    responses = result.batch["responses"]
+    response_mask = result.batch["response_mask"]
+    attention_mask = result.batch["attention_mask"]
+    assert responses.size() == response_mask.size(), f"{responses.size()} != {response_mask.size()}"
+    response_length = response_mask.size(1)
+    for i in range(len(responses)):
+        # response with tool response
+        valid_tokens = responses[i][attention_mask[i][-response_length:].bool()]
+        response_with_obs = tokenizer.decode(valid_tokens)
+        # response without tool response
+        valid_tokens = responses[i][response_mask[i].bool()]
+        response_without_obs = tokenizer.decode(valid_tokens)
+        assert "<tool_response>" not in response_without_obs, (
+            f"found <tool_response> in response: {response_without_obs}"
+        )
+        assert "</tool_response>" not in response_without_obs, (
+            f"found </tool_response> in response: {response_without_obs}"
+        )
+        print("=========================")
+        print(response_with_obs)
+        print("---")
+        print(response_without_obs)
+    print("Test passed!")
+    ray.shutdown()
+@pytest.mark.asyncio
+async def test_get_trajectory_info():
+    """Tests the get_trajectory_info method."""
+    # Initialize the class to set up class-level attributes
+    step = 10
+    index = [1, 1, 3, 3]
+    expected_info = [
+        {"step": step, "sample_index": 1, "rollout_n": 0, "validate": False},
+        {"step": step, "sample_index": 1, "rollout_n": 1, "validate": False},
+        {"step": step, "sample_index": 3, "rollout_n": 0, "validate": False},
+        {"step": step, "sample_index": 3, "rollout_n": 1, "validate": False},
+    ]
+    trajectory_info = await get_trajectory_info(step, index, validate=False)
+    assert trajectory_info == expected_info
--- a/tests/interactions/__init__.py
+++ b/tests/interactions/__init__.py
+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+# Copyright 2023-2024 SGLang Team
+# Copyright 2025 ModelBest Inc. and/or its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.