train.sh

#!/bin/bash
export DISABLE_VERSION_CHECK=1
export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 # 默认节点8张卡
export HSA_FORCE_FINE_GRAIN_PCIE=1
export ALLREDUCE_STREAM_WITH_COMPUTE=1
export HF_ENDPOINT=https://hf-mirror.com

export MASTER_ADDR=XXXXXX ## 实际启动mastet节点 hostname或者IP地址
export MASTER_PORT=29569
export RANK=$1

export NCCL_SOCKET_IFNAME=ibxxxxx # ifconfig查看实际IB网口名
export NCCL_DEBUG=INFO
export NCCL_ALGO=Ring
export NCCL_PROTO=Simple
export NCCL_MIN_NCHANNELS=32
export NCCL_MAX_NCHANNELS=32
export NCCL_MIN_P2P_NCHANNELS=32
export NCCL_MAX_P2P_NCHANNELS=32
export NCCL_NCHANNELS_PER_PEER=32
export VLLM_RPC_TIMEOUT=1800000
export NCCL_IB_TIMEOUT=30

export VLLM_MLA_DISABLE=0
export VLLM_USE_FLASH_MLA=1

## nnodes 代表几台机器，请根据实际情况填写数量,eg: 2，代表2台机器
torchrun --nproc_per_node=8 \
    --nnodes=xxxx\
    --node-rank=${RANK} \
    --master_addr=${MASTER_ADDR} \
    --master_port=${MASTER_PORT} \
    src/train.py   \
    --deepspeed examples/deepspeed/ds_z3_config.json \
    --stage grpo \
    --do_train \
    --finetuning_type freeze \
    --freeze_trainable_layers 5 \
    --freeze_trainable_modules all \
    --model_name_or_path deepseek-ai/DeepSeek-R1-Distill-Llama-70B \
    --dataset dapo_math,hiyouga-math12k \
    --max_samples 20000 \
    --template deepseekr1 \
    --output_dir saves/DeepSeek-R1-Distill-Llama-70B-0923/grpo/full/ \
    --overwrite_output_dir \
    --trust_remote_code \
    --warmup_ratio 0.1 \
    --max_grad_norm 1.0 \
    --weight_decay 0.1 \
    --repetition_penalty 50 \
    --top_k 50 \
    --top_p 0.8 \
    --per_device_train_batch_size 1 \
    --gradient_accumulation_steps 4 \
    --preprocessing_num_workers 16 \
    --ddp_timeout 120000000 \
    --learning_rate 5e-3 \
    --lr_scheduler_type cosine \
    --logging_steps 1 \
    --cutoff_len 2048 \
    --save_steps 100 \
    --plot_loss True \
    --num_train_epochs 1 \
    --bf16 \
    --seed 42 \
    --report_to none \
    --save_only_model