run-7b-sft-lora-single.sh 1.91 KB
Newer Older
qianyj's avatar
qianyj committed
1
#!/bin/bash
qianyj's avatar
update  
qianyj committed
2
3
4
5
6
7
8
9

GPUS=$1

string=""
for ((i=0; i<$GPUS; i++)); do
  string="$string$i,"
done
string=${string%","}
zhaoying1's avatar
zhaoying1 committed
10
11
12
13
14
15
16
17
export MASTER_ADDR=${2}
export WORLD_SIZE=$OMPI_COMM_WORLD_SIZE
export RANK=$OMPI_COMM_WORLD_RANK
local_rank=$OMPI_COMM_WORLD_LOCAL_RANK
export HSA_FORCE_FINE_GRAIN_PCIE=1
export LOCAL_RANK=$OMPI_COMM_WORLD_LOCAL_RANK
export MASTER_PORT=12365
export OMP_NUM_THREADS=1
qianyj's avatar
update  
qianyj committed
18

qianyj's avatar
qianyj committed
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47


APP="python3 ../src/train_bash.py --stage sft \
    --model_name_or_path ../../baichuan-7b-base \
    --do_train \
    --template default \
    --dataset alpaca_gpt4_en \
    --finetuning_type lora \
    --lora_rank 16 \
    --lora_target W_pack,o_proj,gate_proj,down_proj,up_proj \
    --output_dir out/baichuan-7b-lora-test7 \
    --per_device_train_batch_size 1 \
    --per_device_eval_batch_size 1 \
    --gradient_accumulation_steps 1 \
    --preprocessing_num_workers 8 \
    --lr_scheduler_type cosine \
    --logging_steps 10 \
    --save_steps 2 \
    --eval_steps 2 \
    --learning_rate 1e-4 \
    --max_grad_norm 0.5 \
    --num_train_epochs 1.0 \
    --val_size 0.001 \
    --evaluation_strategy steps \
    --load_best_model_at_end \
    --plot_loss \
    --fp16 \
    --deepspeed deepspeed.json
"
zhaoying1's avatar
zhaoying1 committed
48

qianyj's avatar
update  
qianyj committed
49
case ${local_rank} in
qianyj's avatar
qianyj committed
50
[0])
qianyj's avatar
update  
qianyj committed
51
  export HIP_VISIBLE_DEVICES=$string
qianyj's avatar
qianyj committed
52
53
54
  numactl --cpunodebind=0 --membind=0 ${APP}
  ;;
[1])
qianyj's avatar
update  
qianyj committed
55
  export HIP_VISIBLE_DEVICES=$string
zhaoying1's avatar
zhaoying1 committed
56
  numactl --cpunodebind=1 --membind=1 ${APP}
qianyj's avatar
qianyj committed
57
58
  ;;
[2])
qianyj's avatar
update  
qianyj committed
59
  export HIP_VISIBLE_DEVICES=$string
zhaoying1's avatar
zhaoying1 committed
60
  numactl --cpunodebind=2 --membind=2 ${APP}
qianyj's avatar
qianyj committed
61
62
  ;;
[3])
qianyj's avatar
update  
qianyj committed
63
  export HIP_VISIBLE_DEVICES=$string
zhaoying1's avatar
zhaoying1 committed
64
  numactl --cpunodebind=3 --membind=3 ${APP}
qianyj's avatar
update  
qianyj committed
65
66
67
  ;;
[4])
  export HIP_VISIBLE_DEVICES=$string
zhaoying1's avatar
zhaoying1 committed
68
  numactl --cpunodebind=4 --membind=4 ${APP}
qianyj's avatar
update  
qianyj committed
69
70
71
  ;;
[5])
  export HIP_VISIBLE_DEVICES=$string
zhaoying1's avatar
zhaoying1 committed
72
  numactl --cpunodebind=5 --membind=5 ${APP}
qianyj's avatar
update  
qianyj committed
73
74
75
  ;;
[6])
  export HIP_VISIBLE_DEVICES=$string
zhaoying1's avatar
zhaoying1 committed
76
  numactl --cpunodebind=6 --membind=6 ${APP}
qianyj's avatar
update  
qianyj committed
77
78
79
  ;;
[7])
  export HIP_VISIBLE_DEVICES=$string
zhaoying1's avatar
zhaoying1 committed
80
  numactl --cpunodebind=7 --membind=7 ${APP}
qianyj's avatar
qianyj committed
81
82
  ;;
esac
qianyj's avatar
update  
qianyj committed
83

zhaoying1's avatar
zhaoying1 committed
84