#!/bin/bash # also launch it on slave machine using slave_config.yaml NPROC_PER_NODE=2 NNODES=1 RANK=0 MASTER_ADDR=127.0.0.1 MASTER_PORT=17170 HIP_VISIBLE_DEVICES=6,7 torchrun \ --nproc_per_node $NPROC_PER_NODE \ --nnodes $NNODES \ --node_rank $RANK \ --master_addr $MASTER_ADDR \ --master_port $MASTER_PORT \ src/train.py \ --stage sft --do_train \ --model_name_or_path /home/practice/deepseek-math-7b-base \ --dataset alpaca_en_demo --template deepseek --finetuning_type lora --lora_target q_proj,v_proj \ --output_dir saves/deepseek/lora/sft \ --overwrite_output_dir \ --overwrite_cache --per_device_train_batch_size 2 --gradient_accumulation_steps 32 --lr_scheduler_type cosine \ --logging_steps 10 --save_steps 1000 --learning_rate 1e-4 --num_train_epochs 3.0 --plot_loss --fp16