Commit 296c1ee2 authored by zhaoying1's avatar zhaoying1
Browse files

Update run-13b-pretrain-single.sh

parent 8edbbd3e
#!/bin/bash #!/bin/bash
# export NCCL_IB_HCA=mlx5
export HSA_FORCE_FINE_GRAIN_PCIE=1 export HSA_FORCE_FINE_GRAIN_PCIE=1
export MIOPEN_FIND_MODE=3 export MIOPEN_FIND_MODE=3
export MIOPEN_COMPILE_PARALLEL_LEVEL=1 export MIOPEN_COMPILE_PARALLEL_LEVEL=1
# export NCCL_PLUGIN_P2P=ucx
export NCCL_SOCKET_IFNAME=ib0 export NCCL_SOCKET_IFNAME=ib0
export NCCL_P2P_LEVEL=5 export NCCL_P2P_LEVEL=5
export RCCL_NCHANNELS=2 export RCCL_NCHANNELS=2
...@@ -26,7 +24,7 @@ APP="python3 ../pretrain.py --deepspeed --deepspeed_config ../models/deepspeed_z ...@@ -26,7 +24,7 @@ APP="python3 ../pretrain.py --deepspeed --deepspeed_config ../models/deepspeed_z
--dataset_path $DATASET_PATH --spm_model_path $SPM_MODEL_PATH \ --dataset_path $DATASET_PATH --spm_model_path $SPM_MODEL_PATH \
--config_path ../models/llama/13b_config.json \ --config_path ../models/llama/13b_config.json \
--output_model_path output/13b/ --deepspeed_checkpoint_activations \ --output_model_path output/13b/ --deepspeed_checkpoint_activations \
--world_size ${2} --data_processor lm\ --world_size ${1} --data_processor lm\
--total_steps 10000 --save_checkpoint_steps 1000 --batch_size 2 --enable_zero3 \ --total_steps 10000 --save_checkpoint_steps 1000 --batch_size 2 --enable_zero3 \
" "
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment