run-13b-pretrain-single.sh 1.72 KB
Newer Older
zhaoying1's avatar
UPDATE  
zhaoying1 committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
#!/bin/bash
export HSA_FORCE_FINE_GRAIN_PCIE=1
export MIOPEN_FIND_MODE=3
export MIOPEN_COMPILE_PARALLEL_LEVEL=1
export NCCL_SOCKET_IFNAME=ib0
export NCCL_P2P_LEVEL=5
export RCCL_NCHANNELS=2
export NCCL_IB_HCA=mlx5_0

lrank=$OMPI_COMM_WORLD_LOCAL_RANK
echo "LRANK===============================$lrank"
RANK=$OMPI_COMM_WORLD_RANK
WORLD_SIZE=$OMPI_COMM_WORLD_SIZE


DATASET_PATH=../data/dataset.pt
MODEL_PATH=model_scope/Linly-llama-13b-base/model
SPM_MODEL_PATH=model_scope/Linly-llama-13b-base/tokenizer.model



APP="python3 ../pretrain.py --deepspeed --deepspeed_config ../models/deepspeed_zero3_config.json \
                      --pretrained_model_path  $MODEL_PATH \
                      --dataset_path $DATASET_PATH --spm_model_path $SPM_MODEL_PATH \
                      --config_path ../models/llama/13b_config.json \
                      --output_model_path output/13b/ --deepspeed_checkpoint_activations \
zhaoying1's avatar
zhaoying1 committed
27
                      --world_size ${1} --data_processor lm\
zhaoying1's avatar
UPDATE  
zhaoying1 committed
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
                      --total_steps 10000 --save_checkpoint_steps 1000 --batch_size 2  --enable_zero3 \
"

case ${lrank} in
[0])
  export HIP_VISIBLE_DEVICES=0,1,2,3
  export UCX_NET_DEVICES=mlx5_0:1
  export UCX_IB_PCI_BW=mlx5_0:50Gbs
  numactl --cpunodebind=0 --membind=0 ${APP}
  ;;
[1])
  export HIP_VISIBLE_DEVICES=0,1,2,3
  export UCX_NET_DEVICES=mlx5_1:1
  export UCX_IB_PCI_BW=mlx5_1:50Gbs
  numactl --cpunodebind=1 --membind=1 ${APP}
  ;;
[2])
  export HIP_VISIBLE_DEVICES=0,1,2,3
  export UCX_NET_DEVICES=mlx5_2:1
  export UCX_IB_PCI_BW=mlx5_2:50Gbs
  numactl --cpunodebind=2 --membind=2 ${APP}
  ;;
[3])
  export HIP_VISIBLE_DEVICES=0,1,2,3
  export UCX_NET_DEVICES=mlx5_3:1
  export UCX_IB_PCI_BW=mlx5_3:50Gbs
  numactl --cpunodebind=3 --membind=3 ${APP}
  ;;
esac