export MODEL_PATH='your model path' export SAVE_PATH='path/to/save' export MASTER_ADDR="localhost" export MASTER_PORT="1231" export GLOO_SOCKET_IFNAME="lo" export NCCL_SOCKET_IFNAME="lo" export WANDB_DISABLED=true wandb offline CUDA_VISIBLE_DEVICES=0,1,2,3 python3 -m torch.distributed.launch --master_addr ${MASTER_ADDR} --master_port ${MASTER_PORT} --nproc_per_node=8 --use_env train_math.py \ --model_name_or_path $MODEL_PATH \ --data_path "your data path" \ --data_length 10000000 \ --bf16 True \ --output_dir $SAVE_PATH \ --num_train_epochs 3 \ --per_device_train_batch_size 4 \ --per_device_eval_batch_size 4 \ --gradient_accumulation_steps 4 \ --evaluation_strategy "no" \ --save_strategy "steps" \ --save_steps 1000 \ --save_total_limit 2 \ --learning_rate 2e-5 \ --weight_decay 0. \ --warmup_ratio 0.03 \ --lr_scheduler_type "cosine" \ --logging_steps 1 \ --fsdp "full_shard auto_wrap" \ --fsdp_transformer_layer_cls_to_wrap 'LlamaDecoderLayer' \ --tf32 True #精度测试(推荐使用八卡),若要改变卡数,需要到对应的py文件中修改代码 export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python eval_gsm8k.py --model $SAVE_PATH --data_path ./data/test/GSM8K_test.jsonl python eval_math.py --model $SAVE_PATH --data_path ./data/test/MATH_test.jsonl