Commit 57944e55 authored by silencealiang's avatar silencealiang
Browse files

update model parameters format

parent 90ae7f5c
for para in $*
do
if [[ $para == --profiling* ]];then
profiling=${para#*=}
fi
done
# Those variables need to modify
GPUS="" # how many gpus to use
DTK_ENV="" # where env.sh of dtk
NCCL_ENV="" # where env.sh of nccl (requirements/nccl_wz/env.sh or requirements/nccl_zz/env.sh)
HOST="" # hostname
PORT="" # port id
DATA_PATH="" # path to mmap_deepseekv3_datasets_text_document
TOKENIZER_MODEL_PATH="" # path to deepseekv3_dataset
CHECKPOINT_PATH="" # path to ckpt
# Runs DeepseekV3 671B model
mpirun -np ${GPUS} --hostfile hostfile_deepseekv3_671B \
--allow-run-as-root \
--bind-to none \
--mca plm_rsh_no_tree_spawn 1 \
bash -c "
source ${DTK_ENV} && \
source ${NCCL_ENV} && \
./train_deepseekv3_671B_$((${GPUS} / 8))nodes.sh \
${HOST} \
${PORT} \
--data_path=$DATA_PATH \
--tokenizer_path=$TOKENIZER_MODEL_PATH \
--checkpoint_path=$CHECKPOINT_PATH \
--profiling=$profiling" > log-$((${GPUS} / 8))nodes-`date +%F-%H%M`.log 2>&1
wait
\ No newline at end of file
for para in $*
do
if [[ $para == --profiling* ]];then
profiling=${para#*=}
fi
done
# Runs DeepseekV3 671B model
source /opt/dtk/env.sh
HOST=localhost
PORT=25900
DATA_PATH="path to mmap_deepseekv3_datasets_text_document"
TOKENIZER_MODEL_PATH="path to deepseekv3_dataset"
CHECKPOINT_PATH="path to output"
mpirun -np 8 --allow-run-as-root \
train_deepseekv3_671B_1nodes.sh \
${HOST} \
${PORT} \
--data_path=$DATA_PATH \
--tokenizer_path=$TOKENIZER_MODEL_PATH \
--checkpoint_path=$CHECKPOINT_PATH \
--profiling=$profiling > log-1nodes-`date +%F-%H%M`.log 2>&1
wait
\ No newline at end of file
for para in $*
do
if [[ $para == --profiling* ]];then
profiling=${para#*=}
fi
done
# Runs DeepseekV3 671B model
source /opt/dtk/env.sh
HOST="" # modify this variable
PORT=25900
DATA_PATH="path to mmap_deepseekv3_datasets_text_document"
TOKENIZER_MODEL_PATH="path to deepseekv3_dataset"
CHECKPOINT_PATH="path to output"
mpirun -np 32 --hostfile hostfile_deepseekv3_671B_4nodes \
--allow-run-as-root \
--bind-to none \
--mca plm_rsh_no_tree_spawn 1 \
train_deepseekv3_671B_4nodes.sh \
${HOST} \
${PORT} \
--data_path=$DATA_PATH \
--tokenizer_path=$TOKENIZER_MODEL_PATH \
--checkpoint_path=$CHECKPOINT_PATH \
--profiling=$profiling > log-4nodes-`date +%F-%H%M`.log 2>&1
wait
\ No newline at end of file
for para in $*
do
if [[ $para == --profiling* ]];then
profiling=${para#*=}
fi
done
# Runs DeepseekV3 671B model
source /opt/dtk/env.sh
HOST="" # modify this variable
PORT=25900
DATA_PATH="path to mmap_deepseekv3_datasets_text_document"
TOKENIZER_MODEL_PATH="path to deepseekv3_dataset"
CHECKPOINT_PATH="path to output"
mpirun -np 1024 --hostfile hostfile_deepseekv3_671B \
--allow-run-as-root \
--bind-to none \
--mca plm_rsh_no_tree_spawn 1 \
train_deepseekv3_671B_multinodes.sh \
${HOST} \
${PORT} \
--data_path=$DATA_PATH \
--tokenizer_path=$TOKENIZER_MODEL_PATH \
--checkpoint_path=$CHECKPOINT_PATH \
--profiling=$profiling > log-4nodes-`date +%F-%H%M`.log 2>&1
wait
\ No newline at end of file
......@@ -27,16 +27,6 @@ export HSA_FORCE_FINE_GRAIN_PCIE=1
export OMP_NUM_THREADS=1
export GPU_MAX_HW_QUEUES=10
# nccl env
export NCCL_ALGO=Ring
export NCCL_MIN_NCHANNELS=32
export NCCL_MAX_NCHANNELS=32
export NCCL_NET_GDR_LEVEL=7
export NCCL_NET_GDR_READ=1
export RCCL_SDMA_COPY_ENABLE=0
export NCCL_IB_HCA=mlx5_2:1,mlx5_3:1,mlx5_4:1,mlx5_5:1,mlx5_6:1,mlx5_7:1,mlx5_8:1,mlx5_9:1
export NCCL_TOPO_FILE="${MEGATRON_PATH}/topo-input.xml"
# enable BatchLinear
export GROUPED_GEMM_BatchLinear=1
export MP_PP0_LAYERS=5 # 是否使能视实际情况而定
......
......@@ -27,16 +27,6 @@ export HSA_FORCE_FINE_GRAIN_PCIE=1
export OMP_NUM_THREADS=1
export GPU_MAX_HW_QUEUES=10
# nccl env
export NCCL_ALGO=Ring
export NCCL_MIN_NCHANNELS=32
export NCCL_MAX_NCHANNELS=32
export NCCL_NET_GDR_LEVEL=7
export NCCL_NET_GDR_READ=1
export RCCL_SDMA_COPY_ENABLE=0
export NCCL_IB_HCA=mlx5_2:1,mlx5_3:1,mlx5_4:1,mlx5_5:1,mlx5_6:1,mlx5_7:1,mlx5_8:1,mlx5_9:1
export NCCL_TOPO_FILE="${MEGATRON_PATH}/topo-input.xml"
# enable BatchLinear
export GROUPED_GEMM_BatchLinear=1
#export MP_PP0_LAYERS=2 # 是否使能视实际情况而定
......
......@@ -27,16 +27,6 @@ export HSA_FORCE_FINE_GRAIN_PCIE=1
export OMP_NUM_THREADS=1
export GPU_MAX_HW_QUEUES=10
# nccl env
export NCCL_ALGO=Ring
export NCCL_MIN_NCHANNELS=32
export NCCL_MAX_NCHANNELS=32
export NCCL_NET_GDR_LEVEL=7
export NCCL_NET_GDR_READ=1
export RCCL_SDMA_COPY_ENABLE=0
export NCCL_IB_HCA=mlx5_2:1,mlx5_3:1,mlx5_4:1,mlx5_5:1,mlx5_6:1,mlx5_7:1,mlx5_8:1,mlx5_9:1
export NCCL_TOPO_FILE="${MEGATRON_PATH}/topo-input.xml"
# enable BatchLinear
export GROUPED_GEMM_BatchLinear=1
export MP_PP0_LAYERS=2 # 是否使能视实际情况而定
......
for para in $*
do
if [[ $para == --profiling* ]];then
profiling=${para#*=}
fi
done
# Those variables need to modify
GPUS="" # how many gpus to use
DTK_ENV="" # where env.sh of dtk
NCCL_ENV="" # where env.sh of nccl (requirements/nccl_wz/env.sh or requirements/nccl_zz/env.sh)
HOST="" # hostname
PORT="" # port id
DATA_PATH="" # path to redpajama_text_document
TOKENIZER_MODEL_PATH="" # path to tokenizer.model
CHECKPOINT_PATH="" # path to ckpt
# Runs GPT 567B model
mpirun -np ${GPUS} --hostfile hostfile_gpt_567B \
--allow-run-as-root \
--bind-to none \
--mca plm_rsh_no_tree_spawn 1 \
bash -c "
source ${DTK_ENV} && \
source ${NCCL_ENV} && \
./train_gpt_567B_$((${GPUS} / 8))nodes.sh \
${HOST} \
${PORT} \
--data_path=$DATA_PATH \
--tokenizer_path=$TOKENIZER_MODEL_PATH \
--checkpoint_path=$CHECKPOINT_PATH \
--profiling=$profiling" > log-$((${GPUS} / 8))nodes-`date +%F-%H%M`.log 2>&1
wait
\ No newline at end of file
for para in $*
do
if [[ $para == --profiling* ]];then
profiling=${para#*=}
fi
done
# Runs GPT 567B model
source /opt/dtk/env.sh
HOST=localhost
PORT=25900
DATA_PATH="path to redpajama_text_document"
TOKENIZER_MODEL_PATH="path to tokenizer.model"
CHECKPOINT_PATH="path to ckpt"
mpirun -np 8 --allow-run-as-root \
train_gpt_567B_1nodes.sh \
${HOST} \
${PORT} \
--data_path=$DATA_PATH \
--tokenizer_path=$TOKENIZER_MODEL_PATH \
--checkpoint_path=$CHECKPOINT_PATH \
--profiling=$profiling > log-1nodes-`date +%F-%H%M`.log 2>&1
wait
\ No newline at end of file
for para in $*
do
if [[ $para == --profiling* ]];then
profiling=${para#*=}
fi
done
# Runs GPT 567B model
source /opt/dtk/env.sh
HOST="" # modify this variable
PORT=25900
DATA_PATH="path to redpajama_text_document"
TOKENIZER_MODEL_PATH="path to tokenizer.model"
CHECKPOINT_PATH="path to ckpt"
mpirun -np 1024 --hostfile hostfile_gpt_567B \
--allow-run-as-root \
--bind-to none \
--mca plm_rsh_no_tree_spawn 1 \
train_gpt_567B_multinodes.sh \
${HOST} \
${PORT} \
--data_path=$DATA_PATH \
--tokenizer_path=$TOKENIZER_MODEL_PATH \
--checkpoint_path=$CHECKPOINT_PATH \
--profiling=$profiling > log-1024nodes-`date +%F-%H%M`.log 2>&1
wait
\ No newline at end of file
......@@ -32,16 +32,6 @@ export HSA_FORCE_FINE_GRAIN_PCIE=1
export OMP_NUM_THREADS=1
export GPU_MAX_HW_QUEUES=10
# nccl env
export NCCL_ALGO=Ring
export NCCL_MIN_NCHANNELS=32
export NCCL_MAX_NCHANNELS=32
export NCCL_NET_GDR_LEVEL=7
export NCCL_NET_GDR_READ=1
export RCCL_SDMA_COPY_ENABLE=0
export NCCL_IB_HCA=mlx5_2:1,mlx5_3:1,mlx5_4:1,mlx5_5:1,mlx5_6:1,mlx5_7:1,mlx5_8:1,mlx5_9:1
export NCCL_TOPO_FILE="${MEGATRON_PATH}/topo-input.xml"
# enable BatchLinear
export GROUPED_GEMM_BatchLinear=1
......
......@@ -32,16 +32,6 @@ export HSA_FORCE_FINE_GRAIN_PCIE=1
export OMP_NUM_THREADS=1
export GPU_MAX_HW_QUEUES=10
# nccl env
export NCCL_ALGO=Ring
export NCCL_MIN_NCHANNELS=32
export NCCL_MAX_NCHANNELS=32
export NCCL_NET_GDR_LEVEL=7
export NCCL_NET_GDR_READ=1
export RCCL_SDMA_COPY_ENABLE=0
export NCCL_IB_HCA=mlx5_2:1,mlx5_3:1,mlx5_4:1,mlx5_5:1,mlx5_6:1,mlx5_7:1,mlx5_8:1,mlx5_9:1
export NCCL_TOPO_FILE="${MEGATRON_PATH}/topo-input.xml"
# enable BatchLinear
export GROUPED_GEMM_BatchLinear=1
......
for para in $*
do
if [[ $para == --profiling* ]];then
profiling=${para#*=}
fi
done
# Those variables need to modify
GPUS="" # how many gpus to use
DTK_ENV="" # where env.sh of dtk
NCCL_ENV="" # where env.sh of nccl (requirements/nccl_wz/env.sh or requirements/nccl_zz/env.sh)
HOST="" # hostname
PORT="" # port id
DATA_PATH="" # path to oscar-1GB_head-llama2_text_document
TOKENIZER_MODEL_PATH="" # path to tokenizer.model
CHECKPOINT_PATH="" # path to ckpt
# Runs Llama2 7B model
mpirun -np ${GPUS} --hostfile hostfile_llama2_7B \
--allow-run-as-root \
--bind-to none \
--mca plm_rsh_no_tree_spawn 1 \
bash -c "
source ${DTK_ENV} && \
source ${NCCL_ENV} && \
./train_llama2_7b_$((${GPUS} / 8))nodes.sh \
${HOST} \
${PORT} \
--data_path=$DATA_PATH \
--tokenizer_path=$TOKENIZER_MODEL_PATH \
--checkpoint_path=$CHECKPOINT_PATH \
--profiling=$profiling" > log-$((${GPUS} / 8))nodes-`date +%F-%H%M`.log 2>&1
wait
\ No newline at end of file
for para in $*
do
if [[ $para == --profiling* ]];then
profiling=${para#*=}
fi
done
# Runs Llama2 7B model
source /opt/dtk/env.sh
HOST=localhost
PORT=34577
DATA_PATH="path to oscar-1GB_head-llama2_text_document"
TOKENIZER_MODEL_PATH="path to tokenizer.model"
CHECKPOINT_PATH="path to ckpt"
mpirun -np 8 --allow-run-as-root \
train_llama2_7b_1nodes.sh \
${HOST} \
${PORT} \
--data_path=$DATA_PATH \
--tokenizer_path=$TOKENIZER_MODEL_PATH \
--checkpoint_path=$CHECKPOINT_PATH \
--profiling=$profiling > log-1nodes-`date +%F-%H%M`.log 2>&1
wait
......@@ -32,16 +32,6 @@ export HSA_FORCE_FINE_GRAIN_PCIE=1
export OMP_NUM_THREADS=1
export GPU_MAX_HW_QUEUES=10
# nccl env
export NCCL_ALGO=Ring
export NCCL_MIN_NCHANNELS=32
export NCCL_MAX_NCHANNELS=32
export NCCL_NET_GDR_LEVEL=7
export NCCL_NET_GDR_READ=1
export RCCL_SDMA_COPY_ENABLE=0
export NCCL_IB_HCA=mlx5_2:1,mlx5_3:1,mlx5_4:1,mlx5_5:1,mlx5_6:1,mlx5_7:1,mlx5_8:1,mlx5_9:1
export NCCL_TOPO_FILE="${MEGATRON_PATH}/topo-input.xml"
# torch控制多流转单流
export ALLREDUCE_STREAM_WITH_COMPUTE=1
export SENDRECV_STREAM_WITH_COMPUTE=1
......
for para in $*
do
if [[ $para == --profiling* ]];then
profiling=${para#*=}
fi
done
# Those variables need to modify
GPUS="" # how many gpus to use
DTK_ENV="" # where env.sh of dtk
NCCL_ENV="" # where env.sh of nccl (requirements/nccl_wz/env.sh or requirements/nccl_zz/env.sh)
HOST="" # hostname
PORT="" # port id
DATA_PATH="" # path to my-mixtral_text_document
TOKENIZER_MODEL_PATH="" # path to tokenizer.model
CHECKPOINT_PATH="" # path to ckpt
# Runs Mixtral 8x22B model
mpirun -np ${GPUS} --hostfile hostfile_mixtral_8x22B \
--allow-run-as-root \
--bind-to none \
--mca plm_rsh_no_tree_spawn 1 \
bash -c "
source ${DTK_ENV} && \
source ${NCCL_ENV} && \
./train_mixtral_8x22B_$((${GPUS} / 8))nodes.sh \
${HOST} \
${PORT} \
--data_path=$DATA_PATH \
--tokenizer_path=$TOKENIZER_MODEL_PATH \
--checkpoint_path=$CHECKPOINT_PATH \
--profiling=$profiling" > log-$((${GPUS} / 8))nodes-`date +%F-%H%M`.log 2>&1
wait
\ No newline at end of file
for para in $*
do
if [[ $para == --profiling* ]];then
profiling=${para#*=}
fi
done
# Runs Mixtral 8x22B model
source /opt/dtk/env.sh
HOST=localhost
PORT=25900
DATA_PATH="path to my-mixtral_text_document"
TOKENIZER_MODEL_PATH="path to tokenizer.model"
CHECKPOINT_PATH="path to ckpt"
mpirun -np 8 --allow-run-as-root \
train_mixtral_8x22B_1nodes.sh \
${HOST} \
${PORT} \
--data_path=$DATA_PATH \
--tokenizer_path=$TOKENIZER_MODEL_PATH \
--checkpoint_path=$CHECKPOINT_PATH \
--profiling=$profiling > log-1nodes-`date +%F-%H%M`.log 2>&1
wait
\ No newline at end of file
for para in $*
do
if [[ $para == --profiling* ]];then
profiling=${para#*=}
fi
done
# Runs Mixtral 8x22B model
source /opt/dtk/env.sh
HOST="" # modify this variable
PORT=25900
DATA_PATH="path to my-mixtral_text_document"
TOKENIZER_MODEL_PATH="path to tokenizer.model"
CHECKPOINT_PATH="path to ckpt"
mpirun -np 32 --hostfile hostfile_mixtral_8x22B \
--allow-run-as-root \
--bind-to none \
--mca plm_rsh_no_tree_spawn 1 \
train_mixtral_8x22B_multinodes.sh \
${HOST} \
${PORT} \
--data_path=$DATA_PATH \
--tokenizer_path=$TOKENIZER_MODEL_PATH \
--checkpoint_path=$CHECKPOINT_PATH \
--profiling=$profiling > log-4nodes-`date +%F-%H%M`.log 2>&1
wait
\ No newline at end of file
for para in $*
do
if [[ $para == --profiling* ]];then
profiling=${para#*=}
fi
done
# Those variables need to modify
GPUS="" # how many gpus to use
DTK_ENV="" # where env.sh of dtk
NCCL_ENV="" # where env.sh of nccl (requirements/nccl_wz/env.sh or requirements/nccl_zz/env.sh)
HOST="" # hostname
PORT="" # port id
DATA_PATH="" # path to my-mixtral_text_document
TOKENIZER_MODEL_PATH="" # path to tokenizer.model
CHECKPOINT_PATH="" # path to ckpt
# Runs Mixtral 8x7B model
mpirun -np ${GPUS} --hostfile hostfile_mixtral_8x7B \
--allow-run-as-root \
--bind-to none \
--mca plm_rsh_no_tree_spawn 1 \
bash -c "
source ${DTK_ENV} && \
source ${NCCL_ENV} && \
./train_mixtral_8x7B_$((${GPUS} / 8))nodes.sh \
${HOST} \
${PORT} \
--data_path=$DATA_PATH \
--tokenizer_path=$TOKENIZER_MODEL_PATH \
--checkpoint_path=$CHECKPOINT_PATH \
--profiling=$profiling" > log-$((${GPUS} / 8))nodes-`date +%F-%H%M`.log 2>&1
wait
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment