Commit 250c7fb0 authored by wxj's avatar wxj
Browse files

update model parameters format

parent c788823b
......@@ -7,8 +7,7 @@ done
# Those variables need to modify
GPUS="" # how many gpus to use
DTK_ENV="" # where env.sh of dtk
NCCL_ENV="" # where env.sh of nccl (requirements/nccl_wz/env.sh or requirements/nccl_zz/env.sh)
MPI_PORT="" # mpi port to use
HOST="" # hostname
PORT="" # port id
DATA_PATH="" # path to mmap_deepseekv3_datasets_text_document
......@@ -20,9 +19,8 @@ mpirun -np ${GPUS} --hostfile hostfile_deepseekv3_671B \
--allow-run-as-root \
--bind-to none \
--mca plm_rsh_no_tree_spawn 1 \
--mca plm_rsh_args "-p ${MPI_PORT}" \
bash -c "
source ${DTK_ENV} && \
source ${NCCL_ENV} && \
./train_deepseekv3_671B_$((${GPUS} / 8))nodes.sh \
${HOST} \
${PORT} \
......
......@@ -29,7 +29,7 @@ export GPU_MAX_HW_QUEUES=10
export PYTHONPATH=${MEGATRON_PATH}/Megatron-LM:$PYTHONPATH
# enable BatchLinear
export GROUPED_GEMM_BatchLinear=1
#export GROUPED_GEMM_BatchLinear=1
export MP_PP0_LAYERS=5 # 是否使能视实际情况而定
### BASE CONFIG ###
......
......@@ -29,7 +29,7 @@ export GPU_MAX_HW_QUEUES=10
export PYTHONPATH=${MEGATRON_PATH}/Megatron-LM:$PYTHONPATH
# enable BatchLinear
export GROUPED_GEMM_BatchLinear=1
#export GROUPED_GEMM_BatchLinear=1
#export MP_PP0_LAYERS=2 # 是否使能视实际情况而定
### BASE CONFIG ###
......
......@@ -29,7 +29,7 @@ export GPU_MAX_HW_QUEUES=10
export PYTHONPATH=${MEGATRON_PATH}/Megatron-LM:$PYTHONPATH
# enable BatchLinear
export GROUPED_GEMM_BatchLinear=1
#export GROUPED_GEMM_BatchLinear=1
export MP_PP0_LAYERS=2 # 是否使能视实际情况而定
### BASE CONFIG ###
......
......@@ -7,8 +7,7 @@ done
# Those variables need to modify
GPUS="" # how many gpus to use
DTK_ENV="" # where env.sh of dtk
NCCL_ENV="" # where env.sh of nccl (requirements/nccl_wz/env.sh or requirements/nccl_zz/env.sh)
MPI_PORT="" # mpi port to use
HOST="" # hostname
PORT="" # port id
DATA_PATH="" # path to redpajama_text_document
......@@ -20,9 +19,8 @@ mpirun -np ${GPUS} --hostfile hostfile_gpt_567B \
--allow-run-as-root \
--bind-to none \
--mca plm_rsh_no_tree_spawn 1 \
--mca plm_rsh_args "-p ${MPI_PORT}" \
bash -c "
source ${DTK_ENV} && \
source ${NCCL_ENV} && \
./train_gpt_567B_$((${GPUS} / 8))nodes.sh \
${HOST} \
${PORT} \
......
......@@ -34,7 +34,7 @@ export GPU_MAX_HW_QUEUES=10
export PYTHONPATH=${MEGATRON_PATH}/Megatron-LM:$PYTHONPATH
# enable BatchLinear
export GROUPED_GEMM_BatchLinear=1
#export GROUPED_GEMM_BatchLinear=1
DISTRIBUTED_ARGS=(
--rank ${RANK}
......
......@@ -34,7 +34,7 @@ export GPU_MAX_HW_QUEUES=10
export PYTHONPATH=${MEGATRON_PATH}/Megatron-LM:$PYTHONPATH
# enable BatchLinear
export GROUPED_GEMM_BatchLinear=1
#export GROUPED_GEMM_BatchLinear=1
DISTRIBUTED_ARGS=(
--rank ${RANK}
......
......@@ -7,8 +7,7 @@ done
# Those variables need to modify
GPUS="" # how many gpus to use
DTK_ENV="" # where env.sh of dtk
NCCL_ENV="" # where env.sh of nccl (requirements/nccl_wz/env.sh or requirements/nccl_zz/env.sh)
MPI_PORT="" # mpi port to use
HOST="" # hostname
PORT="" # port id
DATA_PATH="" # path to my-mixtral_text_document
......@@ -20,9 +19,8 @@ mpirun -np ${GPUS} --hostfile hostfile_mixtral_8x22B \
--allow-run-as-root \
--bind-to none \
--mca plm_rsh_no_tree_spawn 1 \
--mca plm_rsh_args "-p ${MPI_PORT}" \
bash -c "
source ${DTK_ENV} && \
source ${NCCL_ENV} && \
./train_mixtral_8x22B_$((${GPUS} / 8))nodes.sh \
${HOST} \
${PORT} \
......
......@@ -7,8 +7,7 @@ done
# Those variables need to modify
GPUS="" # how many gpus to use
DTK_ENV="" # where env.sh of dtk
NCCL_ENV="" # where env.sh of nccl (requirements/nccl_wz/env.sh or requirements/nccl_zz/env.sh)
MPI_PORT="" # mpi port to use
HOST="" # hostname
PORT="" # port id
DATA_PATH="" # path to my-mixtral_text_document
......@@ -20,9 +19,8 @@ mpirun -np ${GPUS} --hostfile hostfile_mixtral_8x7B \
--allow-run-as-root \
--bind-to none \
--mca plm_rsh_no_tree_spawn 1 \
--mca plm_rsh_args "-p ${MPI_PORT}" \
bash -c "
source ${DTK_ENV} && \
source ${NCCL_ENV} && \
./train_mixtral_8x7B_$((${GPUS} / 8))nodes.sh \
${HOST} \
${PORT} \
......
......@@ -34,7 +34,7 @@ export GPU_MAX_HW_QUEUES=10
export PYTHONPATH=${MEGATRON_PATH}/Megatron-LM:$PYTHONPATH
# enable BatchLinear
export GROUPED_GEMM_BatchLinear=1
#export GROUPED_GEMM_BatchLinear=1
DISTRIBUTED_ARGS=(
--rank ${RANK}
......
......@@ -34,7 +34,7 @@ export GPU_MAX_HW_QUEUES=10
export PYTHONPATH=${MEGATRON_PATH}/Megatron-LM:$PYTHONPATH
# enable BatchLinear
export GROUPED_GEMM_BatchLinear=1
#export GROUPED_GEMM_BatchLinear=1
DISTRIBUTED_ARGS=(
--rank ${RANK}
......
......@@ -34,7 +34,7 @@ export GPU_MAX_HW_QUEUES=10
export PYTHONPATH=${MEGATRON_PATH}/Megatron-LM:$PYTHONPATH
# enable BatchLinear
export GROUPED_GEMM_BatchLinear=1
#export GROUPED_GEMM_BatchLinear=1
DISTRIBUTED_ARGS=(
--rank ${RANK}
......
......@@ -34,7 +34,7 @@ export GPU_MAX_HW_QUEUES=10
export PYTHONPATH=${MEGATRON_PATH}/Megatron-LM:$PYTHONPATH
# enable BatchLinear
export GROUPED_GEMM_BatchLinear=1
#export GROUPED_GEMM_BatchLinear=1
DISTRIBUTED_ARGS=(
--rank ${RANK}
......
#!/bin/bash
export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
"${@:2}"
# LOCAL_RANK=$1
# shift
LOCAL_RANK=$1
shift
numa_map=(0 1 2 3 4 5 6 7)
NUMA_ID=${numa_map[$LOCAL_RANK]}
numactl --cpunodebind=${NUMA_ID} --membind=${NUMA_ID} "$@"
# numa_map=(0 1 2 3 4 5 6 7)
# NUMA_ID=${numa_map[$LOCAL_RANK]}
# numactl --cpunodebind=${NUMA_ID} --membind=${NUMA_ID} "$@"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment