Commit 296c1ee2 authored by zhaoying1's avatar zhaoying1
Browse files

Update run-13b-pretrain-single.sh

parent 8edbbd3e
#!/bin/bash
# export NCCL_IB_HCA=mlx5
export HSA_FORCE_FINE_GRAIN_PCIE=1
export MIOPEN_FIND_MODE=3
export MIOPEN_COMPILE_PARALLEL_LEVEL=1
# export NCCL_PLUGIN_P2P=ucx
export NCCL_SOCKET_IFNAME=ib0
export NCCL_P2P_LEVEL=5
export RCCL_NCHANNELS=2
......@@ -26,7 +24,7 @@ APP="python3 ../pretrain.py --deepspeed --deepspeed_config ../models/deepspeed_z
--dataset_path $DATASET_PATH --spm_model_path $SPM_MODEL_PATH \
--config_path ../models/llama/13b_config.json \
--output_model_path output/13b/ --deepspeed_checkpoint_activations \
--world_size ${2} --data_processor lm\
--world_size ${1} --data_processor lm\
--total_steps 10000 --save_checkpoint_steps 1000 --batch_size 2 --enable_zero3 \
"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment