run_train_single.sh 1.82 KB
Newer Older
zhaoying1's avatar
zhaoying1 committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
#!/bin/bash

export HSA_FORCE_FINE_GRAIN_PCIE=1
export MIOPEN_FIND_MODE=3
export MIOPEN_COMPILE_PARALLEL_LEVEL=1
export NCCL_PLUGIN_P2P=ucx
export RCCL_NCHANNELS=2
export NCCL_SOCKET_IFNAME=ib0
export NCCL_P2P_LEVEL=5
export NCCL_IB_HCA=mlx5_0
export NCCL_DEBUG=INFO
export NCCL_NET_GDR_LEVEL=SYS
export NCCL_NET_PLUGIN=none
unset RCCL_NCHANNELS
unset NCCL_NET_GDR_LEVEL
lrank=$OMPI_COMM_WORLD_LOCAL_RANK
echo "LRANK===============================$lrank"
RANK=$OMPI_COMM_WORLD_RANK
WORLD_SIZE=$OMPI_COMM_WORLD_SIZE
export HIP_VISIBLE_DEVICES=0,1,2,3

LR=1e-5
APP="python3 ../main.py \
    --deepspeed ../deepspeed.json \
    --do_train \
    --train_file AdvertiseGen/train.json \
zhaoying1's avatar
zhaoying1 committed
27
28
    --prompt_column content  \
    --response_column summary \
zhaoying1's avatar
zhaoying1 committed
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
    --model_name_or_path THUDM/chatglm-6b \
    --output_dir ./output_ft/pretrain \
    --overwrite_output_dir \
    --max_source_length 64 \
    --max_target_length 64 \
    --per_device_train_batch_size 1 \
    --per_device_eval_batch_size 1 \
    --gradient_accumulation_steps 1 \
    --predict_with_generate \
    --max_steps 2000 \
    --logging_steps 5 \
    --save_steps 1000 \
    --learning_rate $LR \
    --fp16 \
    --local_rank $lrank "

case ${lrank} in
[0])
  export HIP_VISIBLE_DEVICES=0,1,2,3
  export UCX_NET_DEVICES=mlx5_0:1
  export UCX_IB_PCI_BW=mlx5_0:50Gbs
  numactl --cpunodebind=0 --membind=0 ${APP}
  ;;
[1])
  export HIP_VISIBLE_DEVICES=0,1,2,3
  export UCX_NET_DEVICES=mlx5_1:1
  export UCX_IB_PCI_BW=mlx5_1:50Gbs
  numactl --cpunodebind=1 --membind=1 ${APP}
  ;;
[2])
  export HIP_VISIBLE_DEVICES=0,1,2,3
  export UCX_NET_DEVICES=mlx5_2:1
  export UCX_IB_PCI_BW=mlx5_2:50Gbs
  numactl --cpunodebind=2 --membind=2 ${APP}
  ;;
[3])
  export HIP_VISIBLE_DEVICES=0,1,2,3
  export UCX_NET_DEVICES=mlx5_3:1
  export UCX_IB_PCI_BW=mlx5_3:50Gbs
  numactl --cpunodebind=3 --membind=3 ${APP}
  ;;
esac