run_r1_distill_qwen_aime24_eval.sh 1.08 KB
Newer Older
jerrrrry's avatar
jerrrrry committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
#!/usr/bin/env bash
set -xeuo pipefail

huggingface-cli download deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B \
    --local-dir $HOME/models/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B

python3 -m verl.trainer.main_generation \
    trainer.nnodes=1 \
    trainer.n_gpus_per_node=8 \
    data.path=$HOME/data/r1/test.parquet \
    data.prompt_key=prompt \
    data.batch_size=1024 \
    data.n_samples=1 \
    data.output_path=$HOME/data/r1/test-output-k1.parquet \
    model.path=$HOME/models/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B \
    rollout.temperature=0.6 \
    rollout.top_p=0.95 \
    rollout.prompt_length=1024 \
    rollout.response_length=32768 \
    rollout.tensor_model_parallel_size=1 \
    rollout.gpu_memory_utilization=0.95 \
    rollout.max_num_batched_tokens=65536 \
    rollout.enforce_eager=False \
    rollout.free_cache_engine=False

python3 -m recipe.r1.main_eval \
    data.path=$HOME/data/r1/test-output-k1.parquet \
    data.prompt_key=prompt \
    data.response_key=responses \
    custom_reward_function.path=recipe/r1/reward_score.py \
    custom_reward_function.name=reward_func