Commit 9dabea91 authored by silencealiang's avatar silencealiang
Browse files

update

parent 66d982b8
Pipeline #2563 passed with stage
......@@ -11,6 +11,11 @@ done
source /opt/dtk/env.sh
# default env
DIST_URL=${1}
DIST_PORT=25900
RANK=$OMPI_COMM_WORLD_RANK
LOCAL_RANK=$OMPI_COMM_WORLD_LOCAL_RANK
WORLD_SIZE=$OMPI_COMM_WORLD_SIZE
CURRENT_DIR="$( cd "$( dirname "$0" )" && pwd )"
MEGATRON_PATH=$( dirname $( dirname ${CURRENT_DIR}))
export PYTHONPATH=${MEGATRON_PATH}:$PYTHONPATH
......@@ -33,13 +38,8 @@ export NCCL_TOPO_FILE="./topo-input.xml"
# enable BatchLinear
export GROUPED_GEMM_BatchLinear=1
RANK=$OMPI_COMM_WORLD_RANK
LOCAL_RANK=$OMPI_COMM_WORLD_LOCAL_RANK
WORLD_SIZE=$OMPI_COMM_WORLD_SIZE
DIST_URL=${1}
DIST_PORT=25900
CHECKPOINT_PATH=./CKPT
# data path
CHECKPOINT_PATH=./CKPT
TOKENIZER_MODEL="path to tokenizer.model"
DATA_PATH="path to my-mixtral_text_document"
......
......@@ -11,6 +11,11 @@ done
source /opt/dtk/env.sh
# default env
DIST_URL=${1}
DIST_PORT=25900
RANK=$OMPI_COMM_WORLD_RANK
LOCAL_RANK=$OMPI_COMM_WORLD_LOCAL_RANK
WORLD_SIZE=$OMPI_COMM_WORLD_SIZE
CURRENT_DIR="$( cd "$( dirname "$0" )" && pwd )"
MEGATRON_PATH=$( dirname $( dirname ${CURRENT_DIR}))
export PYTHONPATH=${MEGATRON_PATH}:$PYTHONPATH
......@@ -33,13 +38,8 @@ export NCCL_TOPO_FILE="./topo-input.xml"
# enable BatchLinear
export GROUPED_GEMM_BatchLinear=1
RANK=$OMPI_COMM_WORLD_RANK
LOCAL_RANK=$OMPI_COMM_WORLD_LOCAL_RANK
WORLD_SIZE=$OMPI_COMM_WORLD_SIZE
DIST_URL=${1}
DIST_PORT=25900
CHECKPOINT_PATH=./CKPT
# data path
CHECKPOINT_PATH=./CKPT
TOKENIZER_MODEL="path to tokenizer.model"
DATA_PATH="path to my-mixtral_text_document"
......
......@@ -11,6 +11,11 @@ done
source /opt/dtk/env.sh
# default env
DIST_URL=${1}
DIST_PORT=25900
RANK=$OMPI_COMM_WORLD_RANK
LOCAL_RANK=$OMPI_COMM_WORLD_LOCAL_RANK
WORLD_SIZE=$OMPI_COMM_WORLD_SIZE
CURRENT_DIR="$( cd "$( dirname "$0" )" && pwd )"
MEGATRON_PATH=$( dirname $( dirname ${CURRENT_DIR}))
export PYTHONPATH=${MEGATRON_PATH}:$PYTHONPATH
......@@ -33,12 +38,7 @@ export NCCL_TOPO_FILE="./topo-input.xml"
# enable BatchLinear
export GROUPED_GEMM_BatchLinear=1
RANK=$OMPI_COMM_WORLD_RANK
LOCAL_RANK=$OMPI_COMM_WORLD_LOCAL_RANK
WORLD_SIZE=$OMPI_COMM_WORLD_SIZE
DIST_URL=${1}
DIST_PORT=25900
# data path
CHECKPOINT_PATH=./CKPT
TOKENIZER_MODEL="path to tokenizer.model"
DATA_PATH="path to my-mixtral_text_document"
......@@ -112,7 +112,7 @@ TORCH_PROFIE_ARGS=(
--profile-ranks 0 1 2 3 4 5 6 7
--profile-step-start 3
--profile-step-end 4
--profile-dir torch_prof_mixtral_1nodes_tp2-pp1-ep8-ep_tp1
--profile-dir torch_prof_mixtral_1nodes_tp2-pp1-ep8-ep_tp1-cp1
--use-pytorch-profiler
)
......
......@@ -11,6 +11,11 @@ done
source /opt/dtk/env.sh
# default env
DIST_URL=${1}
DIST_PORT=25900
RANK=$OMPI_COMM_WORLD_RANK
LOCAL_RANK=$OMPI_COMM_WORLD_LOCAL_RANK
WORLD_SIZE=$OMPI_COMM_WORLD_SIZE
CURRENT_DIR="$( cd "$( dirname "$0" )" && pwd )"
MEGATRON_PATH=$( dirname $( dirname ${CURRENT_DIR}))
export PYTHONPATH=${MEGATRON_PATH}:$PYTHONPATH
......@@ -33,12 +38,7 @@ export NCCL_TOPO_FILE="./topo-input.xml"
# enable BatchLinear
export GROUPED_GEMM_BatchLinear=1
RANK=$OMPI_COMM_WORLD_RANK
LOCAL_RANK=$OMPI_COMM_WORLD_LOCAL_RANK
WORLD_SIZE=$OMPI_COMM_WORLD_SIZE
DIST_URL=${1}
DIST_PORT=25900
# data path
CHECKPOINT_PATH=./CKPT
TOKENIZER_MODEL="path to tokenizer.model"
DATA_PATH="path to my-mixtral_text_document"
......@@ -81,7 +81,7 @@ MOE_ARGS=(
--moe-token-dispatcher-type alltoall
--moe-expert-capacity-factor 0.5
--moe-pad-expert-input-to-capacity
--moe-grouped-gemm
#--moe-grouped-gemm
)
DATA_ARGS=(
......@@ -112,14 +112,14 @@ TORCH_PROFIE_ARGS=(
--profile-ranks 0 1 2 3 8 9 10 11
--profile-step-start 3
--profile-step-end 4
--profile-dir torch_prof_mixtral_4nodes_tp2-pp8-ep2-ep_tp1
--profile-dir torch_prof_mixtral_4nodes_tp2-pp4-ep8-ep_tp1-cp1
--use-pytorch-profiler
)
MODEL_PARALLEL_ARGS=(
--tensor-model-parallel-size 2
--pipeline-model-parallel-size 8
--expert-model-parallel-size 2
--pipeline-model-parallel-size 4
--expert-model-parallel-size 8
--expert-tensor-parallel-size 1
--use-distributed-optimizer
--sequence-parallel
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment