Initial commit

b84161d1 · jerrrrry · b84161d1 · b84161d1 · b84161d1 · b84161d1
Commit b84161d1 authored May 20, 2025 by jerrrrry
20 changed files
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
+## Enabled the dependabot to check the dependencies of the project
+## Dependabot will open pull requests to update dependencies automatically
+
+version: 2
+updates:
+  - package-ecosystem: pip
+    directory: "/"
+    schedule:
+      interval: weekly
\ No newline at end of file
--- a/.github/workflows/dataset.yml
+++ b/.github/workflows/dataset.yml
+name: dataset
+
+on:
+  # Trigger the workflow on push or pull request,
+  # but only for the main branch
+  pull_request:
+    branches:
+      - main
+    paths:
+      - "verl/utils/**/*.py"
+      - .github/workflows/dataset.yml
+      - "!verl/workers/fsdp_workers.py"
+      - "!verl/workers/megatron_workers.py"
+      - "!recipe/**"
+
+# Cancel jobs on the same ref if a new one is triggered
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+# Declare permissions just read content.
+permissions: 
+  contents: read
+
+jobs:
+  ray:
+    runs-on: [L20x8]
+    timeout-minutes: 10 # Increase this timeout value as needed
+    env:
+      HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
+      HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
+      NO_PROXY: "localhost,127.0.0.1"
+      HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+    container:
+      image: whatcanyousee/verl:ngc-th2.6.0-cu124-vllm0.8.2-mcore0.11.0-te2.0
+      options: --gpus all --shm-size=10g
+    steps:
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+            fetch-depth: 0
+      - name: Install the current repository
+        run: |
+          pip install -e .[test]
+          pip install --upgrade "ray>=2.40.0"
+          pip install cupy-cuda12x
+      - name: Running dataset tests
+        run: |
+          [ ! -d "$HOME/verl-data" ] && git clone --depth 1 https://github.com/eric-haibin-lin/verl-data ~/verl-data
+          python3 examples/data_preprocess/geo3k.py
+          pytest -s -x tests/verl/utils/dataset/test_rl_dataset.py
+          pytest -s -x tests/verl/utils/dataset/test_sft_dataset.py
+          pytest -s -x tests/verl/utils/test_import_utils.py
+#          pytest -s -x tests/verl/utils/dataset/test_rm_dataset.py
+      - name: Running ray test using cupy (move it to L20 when dockerfile ready)
+        run: |
+          cd tests/ray
+          pytest -s -x test_rvdz.py
--- a/.github/workflows/e2e_ascend.yml
+++ b/.github/workflows/e2e_ascend.yml
+name: e2e_ascend
+
+on:
+  # Trigger the workflow on push or pull request,
+  # but only for the main branch
+  pull_request:
+    branches:
+      - main
+    paths:
+      - "**/*.py"
+      - .github/workflows/e2e_ascend.yml
+
+permissions:
+  contents: read
+  
+jobs:
+  test:
+    name: verl Ascend test (self-host)
+    runs-on: [self-hosted, npu-0]
+    timeout-minutes: 5 # Increase this timeout value as needed
+    container:
+      image: quay.io/ascend/cann:8.0.0-910b-ubuntu22.04-py3.10
+      volumes:
+        - /usr/local/dcmi:/usr/local/dcmi
+        - /usr/local/bin/npu-smi:/usr/local/bin/npu-smi
+        - /usr/local/Ascend/driver/lib64/:/usr/local/Ascend/driver/lib64/
+        # Use self-host cache speed up pip and model download
+        # - /home/action/actions-runner/_work/cache:/github/home/.cache/
+      options: >-
+        --device /dev/davinci0
+        --device /dev/davinci_manager
+        --device /dev/devmm_svm
+        --device /dev/hisi_hdc
+        --privileged
+        --network "host"
+    steps:
+      - name: Check npu and CANN info
+        run: |
+          cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
+          npu-smi info
+      - name: Checkout volcengine/verl repo
+        uses: actions/checkout@v4
+      - name: Run test
+        run: |
+          lscpu
--- a/.github/workflows/e2e_dapo.yml
+++ b/.github/workflows/e2e_dapo.yml
+name: e2e_dapo
+
+on:
+  # Trigger the workflow on push or pull request,
+  # but only for the main branch
+  pull_request:
+    branches:
+      - main
+      - v0.2.x
+    paths:
+      - "**/*.py"
+      # Home
+      - "recipe/dapo/src"
+      # Entrypoints
+      - ".github/workflows/e2e_dapo.yml"
+      - "examples/data_preprocess/gsm8k.py"
+      - "tests/e2e/run_dapo.sh"
+      - "!examples"
+      - "!verl/trainer/main_*.py"
+      - "!verl/trainer/fsdp_sft_trainer.py"
+      # Megatron
+      - "!verl/workers/**/megatron_*.py"
+
+
+# Declare permissions just read content.
+permissions:
+  contents: read
+
+jobs:
+  e2e_dapo:
+    runs-on: [L20x8]
+    timeout-minutes: 40 # Increase this timeout value as needed
+    env:
+      HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
+      HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
+      NO_PROXY: "localhost,127.0.0.1"
+      HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+    container:
+      image: whatcanyousee/verl:ngc-th2.6.0-cu124-vllm0.8.2-mcore0.11.0-te2.0
+      options: --gpus all --shm-size=10g
+    steps:
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+            fetch-depth: 0
+      - name: Install the current repository
+        run: |
+          pip3 install -e .[test,gpu]
+      - name: Prepare GSM8K dataset
+        run: |
+          python3 examples/data_preprocess/gsm8k.py
+      - name: Running the E2E test with the DAPO algorithm
+        run: | 
+          ray stop --force
+          bash tests/e2e/run_dapo.sh
\ No newline at end of file
--- a/.github/workflows/e2e_eval_aime24.yml
+++ b/.github/workflows/e2e_eval_aime24.yml
+name: e2e_eval_aime24
+
+on:
+  # Trigger the workflow on push or pull request,
+  # but only for the main branch
+  pull_request:
+    branches:
+      - main
+    paths:
+      - "**/*.py"
+      # Home
+      - "recipe/r1"
+      - "!recipe/r1/README.md"
+      # Entrypoints
+      - ".github/workflows/e2e_eval_aime24.yml"
+      - "tests/e2e/run_r1_distill_qwen_aime24_eval.sh"
+      - "verl/trainer/main_generation.py"
+      - "verl/trainer/config/generation.yaml"
+      - "!examples"
+      - "!verl/trainer/main_*.py"
+      - "!verl/trainer/fsdp_sft_trainer.py"
+      # Other recipes
+      - "!recipe"
+
+# Declare permissions just read content.
+permissions:
+  contents: read
+
+jobs:
+  e2e_eval_aime24:
+    runs-on: [L20x8]
+    timeout-minutes: 40 # Increase this timeout value as needed
+    env:
+      HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
+      HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
+      NO_PROXY: "localhost,127.0.0.1"
+      HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+    container:
+      image: whatcanyousee/verl:ngc-th2.6.0-cu124-vllm0.8.2-mcore0.11.0-te2.0
+      options: --gpus all --shm-size=10g
+    steps:
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+            fetch-depth: 0
+      - name: Install the current repository
+        run: |
+          pip3 install -e .[test,gpu,math]
+      - name: Prepare aime24 dataset
+        run: |
+          ray stop --force
+          python3 recipe/r1/data_process.py --task aime2024
+      - name: Running generation and evaluation in AIME 2024
+        run: | 
+          ray stop --force
+          bash tests/e2e/run_r1_distill_qwen_aime24_eval.sh
\ No newline at end of file
--- a/.github/workflows/e2e_ppo_trainer.yml
+++ b/.github/workflows/e2e_ppo_trainer.yml
+name: e2e_ppo_trainer
+
+on:
+  # Trigger the workflow on push or pull request,
+  # but only for the main branch
+  pull_request:
+    branches:
+      - main
+      - v0.2.x
+    paths:
+      - "**/*.py"
+      # Entrypoints
+      - ".github/workflows/e2e_ppo_trainer.yml"
+      - "examples/data_preprocess/gsm8k.py"
+      - "examples/data_preprocess/geo3k.py"
+      - "tests/e2e/ppo_trainer"
+      - "verl/trainer/main_ppo.py"
+      - "verl/trainer/config/ppo_trainer.yaml"
+      - "!examples"
+      - "!verl/trainer/main_*.py"
+      - "!verl/trainer/fsdp_sft_trainer.py"
+      # Recipes
+      - "!recipe"
+      # Megatron
+      - "!verl/workers/**/megatron_*.py"
+
+# Cancel jobs on the same ref if a new one is triggered
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+# Declare permissions just read content.
+permissions: 
+  contents: read
+
+jobs:
+  e2e_ppo_trainer_vllm:
+    runs-on: [L20x8]
+    timeout-minutes: 40 # Increase this timeout value as needed
+    env:
+      HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
+      HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
+      NO_PROXY: "localhost,127.0.0.1"
+      HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+    container:
+      image: whatcanyousee/verl:ngc-th2.6.0-cu124-vllm0.8.2-mcore0.11.0-te2.0
+      options: --gpus all --shm-size=10g
+    steps:
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+            fetch-depth: 0
+      - name: Install the current repository
+        run: |
+          pip3 install -e .[test,vllm]
+      - name: Prepare GSM8K dataset
+        run: |
+          ray stop --force
+          python3 examples/data_preprocess/gsm8k.py
+      # Function RM
+      - name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm with validation and saving
+        run: |
+          ray stop --force
+          VAL_BEFORE_TRAIN=True TEST_FREQ=1 SAVE_FREQ=1 bash tests/e2e/ppo_trainer/run_function_reward.sh
+      - name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm after resuming
+        run: |
+          ray stop --force
+          RESUME_MODE=auto bash tests/e2e/ppo_trainer/run_function_reward.sh
+      - name: Running GSM8K E2E without rmpad using function rm
+        run: |
+          ray stop --force
+          RM_PAD=False bash tests/e2e/ppo_trainer/run_function_reward.sh
+      - name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm (GRPO)
+        run: |
+          ray stop --force
+          ADV_ESTIMATOR=grpo USE_KL=True bash tests/e2e/ppo_trainer/run_function_reward.sh
+      - name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm (ReMax)
+        run: |
+          ray stop --force
+          ADV_ESTIMATOR=remax USE_KL=True bash tests/e2e/ppo_trainer/run_function_reward.sh
+      - name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using customized reward function
+        run: |
+          ray stop --force
+          CUSTOM_REWARD_FN=True bash tests/e2e/ppo_trainer/run_function_reward.sh
+      - name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm with in-reward kl and kl loss
+        run: |
+          ray stop --force
+          USE_KL=True bash tests/e2e/ppo_trainer/run_function_reward.sh
+      # Model RM
+      - name: Running GRPO GSM8K E2E training tests with FSDP on 8 L20 GPUs (DeepSeek)
+        run: |
+          ray stop --force
+          MODEL_ID=deepseek-ai/deepseek-coder-1.3b-instruct bash tests/e2e/ppo_trainer/run_function_reward.sh
+      - name: Running GSM8K E2E with rmpad using model rm
+        run: |
+          ray stop --force
+          bash tests/e2e/ppo_trainer/run_model_reward.sh
+      - name: Running GSM8K E2E without rmpad using model rm
+        run: |
+          ray stop --force
+          RM_PAD=False bash tests/e2e/ppo_trainer/run_model_reward.sh
+      - name: Running GSM8K E2E with rmpad using model rm and ulysses sp=2
+        run: |
+          ray stop --force
+          SP_SIZE=2 bash tests/e2e/ppo_trainer/run_model_reward.sh
+      - name: Running GSM8K E2E with rmpad using model rm and dynamic batch size
+        run: |
+          ray stop --force
+          SEQ_BALANCE=True bash tests/e2e/ppo_trainer/run_model_reward.sh
+      - name: Running GSM8K E2E with rmpad using model rm with Liger Kernel enabled
+        run: |
+          ray stop --force
+          LIGER=True bash tests/e2e/ppo_trainer/run_model_reward.sh
+
+  e2e_ppo_trainer_vllm_vlm:
+    runs-on: [L20x8]
+    timeout-minutes: 40 # Increase this timeout value as needed
+    env:
+      HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
+      HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
+      NO_PROXY: "localhost,127.0.0.1"
+      HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+    container:
+      image: hiyouga/verl:ngc-th2.6.0-cu126-vllm0.8.3-flashinfer0.2.2-cxx11abi0
+      options: --gpus all --shm-size=50g # Visual dataloader requires large memory
+    steps:
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+            fetch-depth: 0
+      - name: Install the current repository
+        run: |
+          pip3 install -e .[test,geo,vllm]
+      # Geo3k
+      - name: Prepare Geo3k dataset
+        run: |
+          ray stop --force
+          python3 examples/data_preprocess/geo3k.py
+      - name: Running Geo3k VLM E2E training tests on 8 L20 GPUs with rmpad using function rm
+        run: |
+          ray stop --force
+          TRAIN_FILES=$HOME/data/geo3k/train.parquet VAL_FILES=$HOME/data/geo3k/test.parquet \
+            MAX_PROMPT_LEN=1536 MAX_RESPONSE_LEN=1536 \
+            MODEL_ID=Qwen/Qwen2-VL-2B-Instruct \
+            ADV_ESTIMATOR=grpo RM_PAD=True USE_KL=True ENABLE_CHUNKED_PREFILL=False \
+            bash tests/e2e/ppo_trainer/run_function_reward.sh
+
+  e2e_ppo_trainer_sglang:
+    runs-on: [L20x8]
+    timeout-minutes: 40 # Increase this timeout value as needed
+    env:
+      HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
+      HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
+      NO_PROXY: "localhost,127.0.0.1"
+      HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+    container:
+      image: ocss884/verl-sglang:ngc-th2.5.1-cu126-sglang0.4.4.post4
+      options: --gpus all --shm-size=10g
+    steps:
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+            fetch-depth: 0
+      - name: Install the current repository
+        run: |
+          pip3 install -e .[test,gpu,sglang] --no-deps
+      - name: Prepare gsm8k dataset
+        run: |
+          ray stop --force
+          python3 examples/data_preprocess/gsm8k.py
+      - name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm and save ckpt
+        run: |
+          ray stop --force
+          ENGINE=sglang bash tests/e2e/ppo_trainer/run_function_reward.sh
+
+  e2e_ppo_trainer_sglang_vlm:
+    runs-on: [L20x8]
+    timeout-minutes: 40 # Increase this timeout value as needed
+    env:
+      HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
+      HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
+      NO_PROXY: "localhost,127.0.0.1"
+      HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+    container:
+      image: ocss884/verl-sglang:ngc-th2.5.1-cu126-sglang0.4.4.post4
+      options: --gpus all --shm-size=50g # Visual dataloader requires large memory
+    steps:
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+            fetch-depth: 0
+      - name: Install the current repository
+        run: |
+          pip3 install -e .[test,geo,gpu,sglang]
+      # Geo3k
+      - name: Prepare Geo3k dataset
+        run: |
+          ray stop --force
+          python3 examples/data_preprocess/geo3k.py
+      - name: Running Geo3k VLM E2E training tests on 8 L20 GPUs with rmpad using function rm
+        run: |
+          ray stop --force
+          TRAIN_FILES=$HOME/data/geo3k/train.parquet VAL_FILES=$HOME/data/geo3k/test.parquet \
+            MAX_PROMPT_LEN=1536 MAX_RESPONSE_LEN=1536 \
+            MODEL_ID=Qwen/Qwen2-VL-2B-Instruct \
+            ADV_ESTIMATOR=grpo RM_PAD=True USE_KL=True ENABLE_CHUNKED_PREFILL=False \
+            ENGINE=sglang bash tests/e2e/ppo_trainer/run_function_reward.sh
--- a/.github/workflows/e2e_ppo_trainer_megatron.yml
+++ b/.github/workflows/e2e_ppo_trainer_megatron.yml
+name: e2e_ppo_trainer_megatron
+# latest version: Megatron-LM core_r0.11.0 https://github.com/NVIDIA/Megatron-LM/tree/core_r0.11.0
+
+on:
+  # Trigger the workflow on push or pull request,
+  # but only for the main branch
+  pull_request:
+    branches:
+      - main
+      - v0.3.x
+    paths:
+      - "**/*.py"
+      # Entrypoints
+      - ".github/workflows/e2e_ppo_trainer_megatron.yml"
+      - "examples/data_preprocess/gsm8k.py"
+      - "tests/e2e/run_ppo_trainer_megatron.sh"
+      - "verl/trainer/main_ppo.py"
+      - "verl/trainer/config/ppo_megatron_trainer.yaml"
+      - "!examples"
+      - "!verl/trainer/main_*.py"
+      - "!verl/trainer/fsdp_sft_trainer.py"
+      # Recipes
+      - "!recipe"
+      # FSDP
+      - "!verl/workers/**/*dp_*.py"
+
+
+# Cancel jobs on the same ref if a new one is triggered
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+# Declare permissions just read content.
+permissions: 
+  contents: read
+
+jobs:
+  e2e_ppo_trainer_megatron:
+    runs-on: [L20x8]
+    timeout-minutes: 40 # Increase this timeout value as needed
+    env:
+      HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
+      HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
+      NO_PROXY: "localhost,127.0.0.1"
+      HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+      VLLM_ATTENTION_BACKEND: XFORMERS # TODO: Remove this if we update the vLLM
+    container:
+      image: whatcanyousee/verl:ngc-th2.6.0-cu124-vllm0.8.2-mcore0.11.0-te2.0
+      options: --gpus all --shm-size=10g
+    steps:
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+            fetch-depth: 0
+      - name: Install the current repository
+        run: |
+          pip3 install -e .[test]
+      - name: Prepare GSM8K dataset
+        run: |
+          python3 examples/data_preprocess/gsm8k.py
+      - name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (Qwen) with validation and saving
+        run: |
+          ray stop --force
+          VAL_BEFORE_TRAIN=True TEST_FREQ=1 SAVE_FREQ=1 bash tests/e2e/run_ppo_trainer_megatron.sh
+      - name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (Qwen) after resuming
+        run: |
+          ray stop --force
+          RESUME_MODE=auto bash tests/e2e/run_ppo_trainer_megatron.sh
+      - name: Test Megatron checkpoints merging function (Qwen Actor and Critic)
+        run: |
+          exp_name="qwen2.5-0.5b-megatron-gsm8k-minimal"
+          python scripts/model_merger.py --backend megatron --tie-word-embedding --hf_model_path Qwen/Qwen2.5-0.5B --local_dir checkpoints/verl-test/${exp_name}/global_step_1/actor --test --test_hf_dir checkpoints/verl-test/${exp_name}/global_step_1/actor/huggingface
+          python scripts/model_merger.py --backend megatron --is-value-model --hf_model_path Qwen/Qwen2.5-0.5B --local_dir checkpoints/verl-test/${exp_name}/global_step_1/critic --test --test_hf_dir checkpoints/verl-test/${exp_name}/global_step_1/critic/huggingface
+      - name: Running GRPO GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (Qwen)
+        run: |
+          ray stop --force
+          ADV_ESTIMATOR=grpo bash tests/e2e/run_ppo_trainer_megatron.sh
+      - name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (DeepSeek)
+        run: |
+          ray stop --force
+          SAVE_FREQ=1 MODEL_ID=deepseek-ai/deepseek-coder-1.3b-instruct bash tests/e2e/run_ppo_trainer_megatron.sh
+      - name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (DeepSeek)
+        run: |
+          ray stop --force
+          RESUME_MODE=auto MODEL_ID=deepseek-ai/deepseek-coder-1.3b-instruct bash tests/e2e/run_ppo_trainer_megatron.sh
+      - name: Running GRPO GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (Deepseek)
+        run: |
+          ray stop --force
+          ADV_ESTIMATOR=grpo MODEL_ID=deepseek-ai/deepseek-coder-1.3b-instruct bash tests/e2e/run_ppo_trainer_megatron.sh
+      - name: Test Megatron checkpoints merging function (DeepSeek Actor and Critic)
+        run: |
+          exp_name="deepseek-coder-1.3b-instruct-megatron-gsm8k-minimal"
+          python scripts/model_merger.py --backend megatron --hf_model_path deepseek-ai/deepseek-coder-1.3b-instruct --local_dir checkpoints/verl-test/${exp_name}/global_step_1/actor --test --test_hf_dir checkpoints/verl-test/${exp_name}/global_step_1/actor/huggingface
+          python scripts/model_merger.py --backend megatron --is-value-model --hf_model_path deepseek-ai/deepseek-coder-1.3b-instruct --local_dir checkpoints/verl-test/${exp_name}/global_step_1/critic --test --test_hf_dir checkpoints/verl-test/${exp_name}/global_step_1/critic/huggingface
\ No newline at end of file
--- a/.github/workflows/e2e_prime.yml
+++ b/.github/workflows/e2e_prime.yml
+name: e2e_prime
+
+on:
+  # Trigger the workflow on push or pull request,
+  # but only for the main branch
+  pull_request:
+    branches:
+      - main
+      - v0.2.x
+    paths:
+      - "**/*.py"
+      # Home
+      - "recipe/prime"
+      # Entrypoints
+      - ".github/workflows/e2e_prime.yml"
+      - "examples/data_preprocess/gsm8k.py"
+      - "tests/e2e/run_prime.sh"
+      - "!examples"
+      - "!verl/trainer/main_*.py"
+      - "!verl/trainer/fsdp_sft_trainer.py"
+      # Megatron
+      - "!verl/workers/**/megatron_*.py"
+
+# Declare permissions just read content.
+permissions:
+  contents: read
+
+jobs:
+  e2e_prime:
+    runs-on: [L20x8]
+    timeout-minutes: 40 # Increase this timeout value as needed
+    env:
+      HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
+      HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
+      NO_PROXY: "localhost,127.0.0.1"
+      HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+    container:
+      image: whatcanyousee/verl:ngc-th2.6.0-cu124-vllm0.8.2-mcore0.11.0-te2.0
+      options: --gpus all --shm-size=10g
+    steps:
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+            fetch-depth: 0
+      - name: Install the current repository
+        run: |
+          pip3 install -e .[test,gpu]
+      - name: Prepare gsm8k dataset
+        run: |
+          ray stop --force
+          python3 examples/data_preprocess/gsm8k.py
+      - name: Running GSM8K E2E with prime alg
+        run: | 
+          ray stop --force
+          bash tests/e2e/run_prime.sh
\ No newline at end of file
--- a/.github/workflows/e2e_sft.yml
+++ b/.github/workflows/e2e_sft.yml
+name: e2e_sft
+
+on:
+  # Trigger the workflow on push or pull request,
+  # but only for the main branch
+  pull_request:
+    branches:
+      - main
+      - v0.2.x
+    paths:
+      - "**/*.py"
+      # Entrypoints
+      - ".github/workflows/e2e_sft.yml"
+      - "examples/data_preprocess/gsm8k.py"
+      - "tests/e2e/sft"
+      - "verl/trainer/fsdp_sft_trainer.py"
+      - "verl/trainer/config/sft_trainer.yaml"
+      - "!examples"
+      - "!verl/trainer/main_*.py"
+      # Recipes
+      - "!recipe"
+      # Megatron
+      - "!verl/workers/**/megatron_*.py"
+
+
+# Cancel jobs on the same ref if a new one is triggered
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+# Declare permissions just read content.
+permissions: 
+  contents: read
+
+jobs:
+  e2e_sft:
+    runs-on: [L20x8]
+    timeout-minutes: 20 # Increase this timeout value as needed
+    env:
+      HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
+      HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
+      NO_PROXY: "localhost,127.0.0.1"
+      HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+    container:
+      image: whatcanyousee/verl:ngc-th2.6.0-cu124-vllm0.8.2-mcore0.11.0-te2.0
+      options: --gpus all --shm-size=10g
+    steps:
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+            fetch-depth: 0
+      - name: Install the current repository
+        run: |
+          pip3 install peft
+          pip3 install -e .[test,gpu]
+      - name: Prepare gsm8k dataset
+        run: |
+          ray stop --force
+          python3 examples/data_preprocess/gsm8k.py
+      - name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm
+        run: |
+          ray stop --force
+          bash tests/e2e/sft/run_sft.sh
+      - name: Running GSM8K E2E training tests on 8 L20 GPUs w/o rmpad using function rm
+        run: |
+          ray stop --force
+          RM_PAD=False bash tests/e2e/sft/run_sft.sh
+      - name: Running GSM8K E2E training tests on 8 L20 GPUs with sequence parallism
+        run: |
+          ray stop --force
+          SP_SIZE=2 bash tests/e2e/sft/run_sft.sh
+      - name: Check loss difference between sequence parallel vs. default implementation
+        run: |
+          ray stop --force
+          ENTRYPOINT="tests/e2e/sft/test_sp_loss_match.py" SP_SIZE=2 bash tests/e2e/sft/run_sft.sh
+      - name: Running GSM8K E2E training tests on 8 L20 GPUs with sequence parallism and liger
+        run: |
+          ray stop --force
+          SP_SIZE=2 LIGER=True bash tests/e2e/sft/run_sft.sh
+      - name: Running GSM8K E2E training tests with LoRA
+        run: |
+          ray stop --force
+          LORA_RANK=32 bash tests/e2e/sft/run_sft.sh
+      # TODO: multiturn
\ No newline at end of file
--- a/.github/workflows/model.yml
+++ b/.github/workflows/model.yml
+name: model_rmpad
+
+on:
+  # Trigger the workflow on push or pull request,
+  # but only for the main branch
+  pull_request:
+    branches:
+      - main
+      - v0.3.x
+    paths:
+      - "verl/**/*.py"
+      - "tests/**/*.sh"
+      - "tests/model/*"
+      - .github/workflows/model.yml
+      - "!recipe/**"
+
+# Declare permissions just read content.
+permissions: 
+  contents: read
+
+jobs:
+  model_rmpad:
+    runs-on: [L20x8]
+    timeout-minutes: 20 # Increase this timeout value as needed
+    env:
+      HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
+      HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
+      NO_PROXY: "localhost,127.0.0.1"
+      HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+    container:
+      image: whatcanyousee/verl:ngc-th2.6.0-cu124-vllm0.8.2-mcore0.11.0-te2.0
+      options: --gpus all --shm-size=10g
+    steps:
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+            fetch-depth: 0
+      - name: Install the current repository and upgrade to latest transformers/flash_attn
+        run: |
+          pip3 install -e .[test]
+          pip3 install --upgrade transformers
+      - name: Running rmpad model tests on 8 L20 GPUs + flash_attn 2.5.8
+        run: |
+          pytest -s tests/model/test_transformer.py
+      - name: Running rmpad model tests on 8 L20 GPUs + latest flash_attn
+        run: |
+          pip3 install --upgrade flash_attn --no-build-isolation
+          pytest -s tests/model/test_transformer.py
+      - name: Running FSDP rmpad model tests on 8 L20 GPUs + latest flash_attn
+        run: |
+          torchrun --nproc_per_node=8 tests/checkpoint/test_fsdp_ckpt.py
+      - name: Running transformers ulysses tests on 8 L20 GPUs + latest transformers
+        run: |
+          torchrun --nproc_per_node=8 -m pytest tests/model/test_transformers_ulysses.py
+      - name: Running transformers ulysses tests on 8 L20 GPUs + transformers 4.49.0
+        run: |
+          pip3 install transformers==4.49.0
+          torchrun --nproc_per_node=8 -m pytest tests/model/test_transformers_ulysses.py
+      - name: Running transformers ulysses tests on 8 L20 GPUs + transformers 4.48.0
+        run: |
+          pip3 install transformers==4.48.0
+          torchrun --nproc_per_node=8 -m pytest tests/model/test_transformers_ulysses.py
+      - name: Running transformers ulysses tests on 8 L20 GPUs + transformers 4.47.0
+        run: |
+          pip3 install transformers==4.47.0
+          torchrun --nproc_per_node=8 -m pytest tests/model/test_transformers_ulysses.py
+      - name: Running transformers ulysses tests on 8 L20 GPUs + transformers 4.46.0
+        run: |
+          pip3 install transformers==4.46.0
+          torchrun --nproc_per_node=8 -m pytest tests/model/test_transformers_ulysses.py
+      - name: Running transformers ulysses tests on 8 L20 GPUs + transformers 4.45.0
+        run: |
+          pip3 install transformers==4.45.0
+          torchrun --nproc_per_node=8 -m pytest tests/model/test_transformers_ulysses.py
+      - name: Run distributed test
+        run: |
+          bash tests/distributed/run_all.sh
--- a/.github/workflows/pylint.yml
+++ b/.github/workflows/pylint.yml
+name: Pylint Check
+
+on:
+  push:
+    paths:
+      - '**.py'
+      - 'requirements.txt'
+      - 'pyproject.toml'
+  pull_request:
+    paths:
+      - '**.py'
+      - 'requirements.txt'
+      - 'pyproject.toml'
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v3
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.12'
+
+      - name: Install pylint (version from requirements.txt)
+        run: |
+          PYLINT_VERSION=$(grep '^pylint' requirements.txt)
+          if [ -z "$PYLINT_VERSION" ]; then
+            echo "No pylint version found in requirements.txt"
+            exit 1
+          fi
+          # only install pylint to avoid dependency problems on CPU
+          pip install "$PYLINT_VERSION"
+
+      - name: Run pylint
+        run: |
+          pylint --recursive=y --rcfile=pyproject.toml ./
--- a/.github/workflows/ray_test.yml
+++ b/.github/workflows/ray_test.yml
+name: ray
+
+on:
+  # Trigger the workflow on push or pull request,
+  # but only for the main branch
+  push:
+    branches:
+      - main
+      - v0.2.x
+    paths:
+      - "verl/single_controller/*.py"
+      - .github/workflows/ray_test.yml
+  pull_request:
+    branches:
+      - main
+      - v0.2.x
+    paths:
+      - "verl/single_controller/*.py"
+      - .github/workflows/ray_test.yml
+      - "!recipe/**"
+
+# Cancel jobs on the same ref if a new one is triggered
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+# Declare permissions just read content.
+permissions: 
+  contents: read
+
+jobs:
+  ray:
+    runs-on: [L20x8]
+    timeout-minutes: 10 # Increase this timeout value as needed
+    env:
+      HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
+      HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
+      NO_PROXY: "localhost,127.0.0.1"
+      HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+    container:
+      image: whatcanyousee/verl:ngc-th2.6.0-cu124-vllm0.8.2-mcore0.11.0-te2.0
+      options: --gpus all --shm-size=10g
+    steps:
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+            fetch-depth: 0
+      - name: Install the current repository
+        run: |
+          pip install -e .[test]
+          pip install --upgrade "ray>=2.40.0"
+      - name: Running ray tests that need 8 GPUs
+        run: |
+          cd tests/ray
+          pytest -s -x --ignore=test_check_worker_alive.py --ignore=test_rvdz.py .
--- a/.github/workflows/sandbox.yml
+++ b/.github/workflows/sandbox.yml
+name: sandbox
+
+on:
+  # Trigger the workflow on push or pull request,
+  # but only for the main branch
+  pull_request:
+    branches:
+      - main
+      - v0.3.x
+    paths:
+      - "**/*.py"
+      - .github/workflows/sandbox.yml
+
+# Cancel jobs on the same ref if a new one is triggered
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+# Declare permissions just read content.
+permissions: 
+  contents: read
+
+jobs:
+  sandbox:
+    runs-on: [L20x8]
+    timeout-minutes: 10 # Increase this timeout value as needed
+    env:
+      HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
+      HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
+      NO_PROXY: "localhost,127.0.0.1"
+      HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+    container:
+      image: whatcanyousee/verl:ngc-th2.6.0-cu124-vllm0.8.2-mcore0.11.0-te2.0
+      options: --gpus all --shm-size=10g
+    steps:
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+            fetch-depth: 0
+      - name: Install the current repository
+        run: |
+          pip3 install -e .[test,prime]
+          pip3 install vllm==0.5.4
+      - name: Running sandbox tests on 8 L20 GPUs
+        run: |
+          cd tests/sandbox
+          pytest -s -x .
--- a/.github/workflows/sanity.yml
+++ b/.github/workflows/sanity.yml
+name: sanity
+
+on:
+  # Trigger the workflow on push or pull request,
+  # but only for the main branch
+  push:
+    branches:
+      - main
+      - v0.2.x
+    paths:
+      - "**/*.py"
+      - .github/workflows/sanity.yml
+  pull_request:
+    branches:
+      - main
+      - v0.2.x
+    paths:
+      - "**/*.py"
+      - .github/workflows/sanity.yml
+
+# Cancel jobs on the same ref if a new one is triggered
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+# Declare permissions just read content.
+permissions: 
+  contents: read
+
+jobs:
+  sanity:
+    runs-on: ubuntu-latest
+    timeout-minutes: 5 # Increase this timeout value as needed
+    strategy:
+      matrix:
+        python-version: ["3.10"]
+    steps:
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install the current repository
+        run: |
+          pip install -e .[test]
+      - name: Run sanity test
+        run: |
+          pytest -s -x tests/sanity
+      - name: Run utility test
+        run: |
+          pytest -s -x tests/utility
+      - name: Run license test
+        run: |
+          python3 tests/sanity/check_license.py --directory .
--- a/.github/workflows/scorecard.yml
+++ b/.github/workflows/scorecard.yml
+# This workflow uses actions that are not certified by GitHub. They are provided
+# by a third-party and are governed by separate terms of service, privacy
+# policy, and support documentation.
+
+name: Scorecard supply-chain security
+on:
+  # For Branch-Protection check. Only the default branch is supported. See
+  # https://github.com/ossf/scorecard/blob/main/docs/checks.md#branch-protection
+  branch_protection_rule:
+  # To guarantee Maintained check is occasionally updated. See
+  # https://github.com/ossf/scorecard/blob/main/docs/checks.md#maintained
+  schedule:
+    - cron: '27 7 * * 1'
+  push:
+    branches: [ "main" ]
+
+# Declare default permissions as read only.
+permissions: read-all
+
+jobs:
+  analysis:
+    name: Scorecard analysis
+    runs-on: ubuntu-latest
+    permissions:
+      # Needed to upload the results to code-scanning dashboard.
+      security-events: write
+      # Needed to publish results and get a badge (see publish_results below).
+      id-token: write
+      # Uncomment the permissions below if installing in a private repository.
+      # contents: read
+      # actions: read
+
+    steps:
+      - name: "Checkout code"
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+        with:
+          persist-credentials: false
+
+      - name: "Run analysis"
+        uses: ossf/scorecard-action@0864cf19026789058feabb7e87baa5f140aac736 # v2.3.1
+        with:
+          results_file: results.sarif
+          results_format: sarif
+          # (Optional) "write" PAT token. Uncomment the `repo_token` line below if:
+          # - you want to enable the Branch-Protection check on a *public* repository, or
+          # - you are installing Scorecard on a *private* repository
+          # To create the PAT, follow the steps in https://github.com/ossf/scorecard-action?tab=readme-ov-file#authentication-with-fine-grained-pat-optional.
+          # repo_token: ${{ secrets.SCORECARD_TOKEN }}
+
+          # Public repositories:
+          #   - Publish results to OpenSSF REST API for easy access by consumers
+          #   - Allows the repository to include the Scorecard badge.
+          #   - See https://github.com/ossf/scorecard-action#publishing-results.
+          # For private repositories:
+          #   - `publish_results` will always be set to `false`, regardless
+          #     of the value entered here.
+          publish_results: true
+
+      # Upload the results to GitHub's code scanning dashboard (optional).
+      # Commenting out will disable upload of results to your repo's Code Scanning dashboard
+      - name: "Upload to code-scanning"
+        uses: github/codeql-action/upload-sarif@9e8d0789d4a0fa9ceb6b1738f7e269594bdd67f0 #v3.28.9
+        with:
+          sarif_file: results.sarif
--- a/.github/workflows/secrets_scan.yml
+++ b/.github/workflows/secrets_scan.yml
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+
+permissions:
+  contents: read
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
+      with:
+        fetch-depth: 0
+    - name: Secret Scanning
+      uses: trufflesecurity/trufflehog@7dc056a193116ba8d82154bf0549381c8fb8545c # v3.88.14
+      with:
+        extra_args: --results=verified,unknown
\ No newline at end of file
--- a/.github/workflows/vllm.yml
+++ b/.github/workflows/vllm.yml
+name: vllm
+
+on:
+  # Trigger the workflow on push or pull request,
+  # but only for the main branch
+  pull_request:
+    branches:
+      - main
+      - v0.3.x
+    paths:
+      - "**/*.py"
+      # Entrypoints
+      - ".github/workflows/vllm.yml"
+      - "tests/generation"
+      - "verl/trainer/main_generation.py"
+      - "verl/trainer/config/generation.yaml"
+      - "!examples"
+      - "!verl/trainer/main_*.py"
+      - "!verl/trainer/fsdp_sft_trainer.py"
+      # Recipes
+      - "!recipe"
+      # FSDP
+      - "!verl/workers/**/*dp_*.py"
+      # Megatron
+      - "!verl/workers/**/megatron_*.py"
+      # SGLang
+      - "!**/*sglang*"
+
+# Cancel jobs on the same ref if a new one is triggered
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+# Declare permissions just read content.
+permissions: 
+  contents: read
+
+jobs:
+  vllm:
+    runs-on: [L20x8]
+    timeout-minutes: 60 # Increase this timeout value as needed
+    env:
+      HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
+      HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
+      NO_PROXY: "localhost,127.0.0.1"
+      HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+      HF_ENDPOINT: "https://hf-mirror.com"
+    container:
+      image: verlai/verl:vemlp-th2.4.0-cu124-vllm0.6.3-ray2.10-te1.7-v0.0.3
+      options: --gpus all --shm-size=10g
+    steps:
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+            fetch-depth: 0
+      - name: Install the current repository
+        run: |
+          pip3 install -e .[test]
+          pip3 install vllm==0.5.4
+      - name: Download Model to Use
+        run: |
+          huggingface-cli download Qwen/Qwen2.5-0.5B-Instruct
+          huggingface-cli download 'Qwen/Qwen2-7B-Instruct'
+          huggingface-cli download 'deepseek-ai/deepseek-llm-7b-chat'
+          export HF_HUB_OFFLINE=1
+        # Disable requests to avoid network errors
+      - name: Running vllm tests on 8 L20 GPUs
+        run: |
+          cd tests/rollout
+          torchrun --standalone --nnodes=1 --nproc_per_node=8 $(which pytest) -s test_vllm_hf_loader.py
+      - name: Test the latest vLLM
+        run: |
+          pip3 install --upgrade vllm==0.7.3
+          cd tests/rollout
+          torchrun --standalone --nnodes=1 --nproc_per_node=4 $(which pytest) -s test_vllm_spmd.py
+      - name: Run Qwen 0.5B generation test
+        run: |
+          cd tests/generation
+          export OUTPUT_PATH="${HOME}/data/gen/qwen_05_gen_test.parquet"
+          MODEL_ID=Qwen/Qwen2.5-0.5B-Instruct NGPUS_PER_NODE=4 GEN_TP=2 bash ./run_gen_qwen05.sh
+          rm -rf "${OUTPUT_PATH}"
+      - name: Run Qwen 0.5B generation test when world_size == 1
+        run: |
+          cd tests/generation
+          export OUTPUT_PATH="${HOME}/data/gen/qwen_05_gen_test.parquet"
+          MODEL_ID=Qwen/Qwen2.5-0.5B-Instruct NGPUS_PER_NODE=1 GEN_TP=1 bash ./run_gen_qwen05.sh
+          rm -rf "${OUTPUT_PATH}"
\ No newline at end of file
--- a/.github/workflows/yapf_format.yml
+++ b/.github/workflows/yapf_format.yml
+name: yapf
+
+on:
+  # Trigger the workflow on push or pull request,
+  # but only for the main branch
+  push:
+    branches:
+      - main
+      - v0.2.x
+    paths:
+      - "**/*.py"
+      - .github/workflows/yapf_format.yml
+  pull_request:
+    branches:
+      - main
+      - v0.2.x
+    paths:
+      - "**/*.py"
+      - .github/workflows/yapf_format.yml
+
+# Cancel jobs on the same ref if a new one is triggered
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+# Declare permissions just read content.
+permissions: 
+  contents: read
+
+jobs:
+  yapf:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.12"]
+    steps:
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+      # - name: checkout
+      #   run: |
+      #     commits=${{ github.event.pull_request.commits }}
+      #     if [[ -n "$commits" ]]; then
+      #       # Prepare enough depth for diffs with main
+      #       git fetch --depth="$(( commits + 1 ))"
+      #     fi
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install --upgrade yapf
+          pip install toml==0.10.2
+      - name: Running yapf
+        run: |
+          yapf -r -vv -d --style=./.style.yapf verl tests examples recipe
--- a/.gitignore
+++ b/.gitignore
+
+**/*.pt
+**/checkpoints
+**/wget-log
+**/_build/
+**/*.ckpt
+**/outputs
+**/*.tar.gz
+**/playground
+**/wandb
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+dataset/*
+tensorflow/my_graph/*
+.idea/
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+tmp/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*,cover
+.hypothesis/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# IPython Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# dotenv
+.env
+
+# virtualenv
+venv/
+.venv/
+ENV/
+
+# Spyder project settings
+.spyderproject
+
+# Rope project settings
+.ropeproject
+
+# vscode
+.vscode
+
+# Mac
+.DS_Store
+
+# output logs
+tests/e2e/toy_examples/deepspeed/synchronous/output.txt
+
+# vim
+*.swp
+
+# ckpt
+*.lock
+
+# data
+*.parquet
+
+
+# local logs
+logs
+log
+outputs
--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
+# Read the Docs configuration file
+# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
+
+version: 2
+
+build:
+  os: ubuntu-22.04
+  tools:
+    python: "3.11"
+    rust: "1.70"
+
+sphinx:
+  configuration: docs/conf.py
+
+python:
+  install:
+    - requirements: docs/requirements-docs.txt
+    - method: pip
+      path: .