run-7b-sft-lora-single.sh 2.17 KB
Newer Older
qianyj's avatar
qianyj committed
1
#!/bin/bash
qianyj's avatar
update  
qianyj committed
2
3
4
5
6
7
8
9
10
11
12
13

source env.sh
GPUS=$1

string=""
for ((i=0; i<$GPUS; i++)); do
  string="$string$i,"
done
string=${string%","}
export HIP_VISIBLE_DEVICES=$string
# echo "$HIP_VISIBLE_DEVICES"\

qianyj's avatar
qianyj committed
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42


APP="python3 ../src/train_bash.py --stage sft \
    --model_name_or_path ../../baichuan-7b-base \
    --do_train \
    --template default \
    --dataset alpaca_gpt4_en \
    --finetuning_type lora \
    --lora_rank 16 \
    --lora_target W_pack,o_proj,gate_proj,down_proj,up_proj \
    --output_dir out/baichuan-7b-lora-test7 \
    --per_device_train_batch_size 1 \
    --per_device_eval_batch_size 1 \
    --gradient_accumulation_steps 1 \
    --preprocessing_num_workers 8 \
    --lr_scheduler_type cosine \
    --logging_steps 10 \
    --save_steps 2 \
    --eval_steps 2 \
    --learning_rate 1e-4 \
    --max_grad_norm 0.5 \
    --num_train_epochs 1.0 \
    --val_size 0.001 \
    --evaluation_strategy steps \
    --load_best_model_at_end \
    --plot_loss \
    --fp16 \
    --deepspeed deepspeed.json
"
qianyj's avatar
update  
qianyj committed
43
44
45
local_rank=$OMPI_COMM_WORLD_LOCAL_RANK
echo $local_rank
case ${local_rank} in
qianyj's avatar
qianyj committed
46
[0])
qianyj's avatar
update  
qianyj committed
47
48
  export HIP_VISIBLE_DEVICES=$string
  echo numactl --cpunodebind=0 --membind=0 ${APP}
qianyj's avatar
qianyj committed
49
50
51
  numactl --cpunodebind=0 --membind=0 ${APP}
  ;;
[1])
qianyj's avatar
update  
qianyj committed
52
53
54
  export HIP_VISIBLE_DEVICES=$string
  echo numactl --cpunodebind=0 --membind=0 ${APP}
  numactl --cpunodebind=0 --membind=0 ${APP}
qianyj's avatar
qianyj committed
55
56
  ;;
[2])
qianyj's avatar
update  
qianyj committed
57
58
59
  export HIP_VISIBLE_DEVICES=$string
  echo numactl --cpunodebind=0 --membind=0 ${APP}
  numactl --cpunodebind=0 --membind=0 ${APP}
qianyj's avatar
qianyj committed
60
61
  ;;
[3])
qianyj's avatar
update  
qianyj committed
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
  export HIP_VISIBLE_DEVICES=$string
  echo numactl --cpunodebind=0 --membind=0 ${APP}
  numactl --cpunodebind=0 --membind=0 ${APP}
  ;;
[4])
  export HIP_VISIBLE_DEVICES=$string
  echo numactl --cpunodebind=3 --membind=3 ${APP}
  numactl --cpunodebind=3 --membind=3 ${APP}
  ;;
[5])
  export HIP_VISIBLE_DEVICES=$string
  echo numactl --cpunodebind=3 --membind=3 ${APP}
  numactl --cpunodebind=3 --membind=3 ${APP}
  ;;
[6])
  export HIP_VISIBLE_DEVICES=$string
  echo numactl --cpunodebind=3 --membind=3 ${APP}
  numactl --cpunodebind=3 --membind=3 ${APP}
  ;;
[7])
  export HIP_VISIBLE_DEVICES=$string
  echo numactl --cpunodebind=3 --membind=3 ${APP}
qianyj's avatar
qianyj committed
84
85
86
  numactl --cpunodebind=3 --membind=3 ${APP}
  ;;
esac
qianyj's avatar
update  
qianyj committed
87