Unverified Commit d8b13cb0 authored by Tim Moon's avatar Tim Moon Committed by GitHub
Browse files

Disable FP8 in Mcore integration test on older GPUs (#1357)



Debug Mcore integration test

Avoid FP8 on Ampere and older. Generate synthetic data instead of depending on external data.
Signed-off-by: default avatarTim Moon <tmoon@nvidia.com>
parent d978e800
Megatron-LM
vocab.json
\ No newline at end of file
...@@ -8,6 +8,12 @@ set -e ...@@ -8,6 +8,12 @@ set -e
: ${TE_PATH:=/opt/transformerengine} : ${TE_PATH:=/opt/transformerengine}
: ${MCORE_PATH:=${TE_PATH}/qa/L1_pytorch_mcore_integration/Megatron-LM} : ${MCORE_PATH:=${TE_PATH}/qa/L1_pytorch_mcore_integration/Megatron-LM}
# Check whether FP8 is supported
DEVICE_ARCH=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader | head -n 1 | sed 's/[^0-9]//g')
if [[ ${DEVICE_ARCH} -ge 89 ]]; then
WITH_FP8=1
fi
# Download Megatron-LM if needed # Download Megatron-LM if needed
if [ ! -d "${MCORE_PATH}" ]; then if [ ! -d "${MCORE_PATH}" ]; then
pushd $(dirname ${MCORE_PATH}) pushd $(dirname ${MCORE_PATH})
...@@ -15,6 +21,14 @@ if [ ! -d "${MCORE_PATH}" ]; then ...@@ -15,6 +21,14 @@ if [ ! -d "${MCORE_PATH}" ]; then
popd popd
fi fi
# Create mock vocab
VOCAB_FILE=${TE_PATH}/qa/L1_pytorch_mcore_integration/vocab.json
printf "" > ${VOCAB_FILE}
printf "{" >> ${VOCAB_FILE}
printf "\"<|endoftext|>\": 0" >> ${VOCAB_FILE}
seq 1 4095 | awk '{ printf(", \"%d\": %d", $1, $1) }' >> ${VOCAB_FILE}
printf "}" >> ${VOCAB_FILE}
# Megatron-LM invocation # Megatron-LM invocation
COMMAND=" COMMAND="
NVTE_TORCH_COMPILE=0 NVTE_TORCH_COMPILE=0
...@@ -40,17 +54,17 @@ ${MCORE_PATH}/pretrain_gpt.py ...@@ -40,17 +54,17 @@ ${MCORE_PATH}/pretrain_gpt.py
--hidden-size 128 --hidden-size 128
--num-attention-heads 8 --num-attention-heads 8
--seq-length 128 --seq-length 128
--max-position-embeddings 2048 --max-position-embeddings 128
--micro-batch-size 1 --micro-batch-size 1
--global-batch-size 8 --global-batch-size 8
--train-iters 10 --train-iters 10
--eval-iters 10 --eval-iters 10
--lr 1e-4 --lr 1e-4
--mock-data --mock-data
--vocab-file /data/gpt3/pile-cc1-cc2-shuf/bpe/gpt2-vocab.json --vocab-file ${VOCAB_FILE}
--merge-file /data/gpt3/pile-cc1-cc2-shuf/bpe/gpt2-merges.txt --merge-file ${TE_PATH}/qa/L1_pytorch_mcore_integration/merges.txt
--transformer-impl transformer_engine --transformer-impl transformer_engine
--fp8-format hybrid ${WITH_FP8:+--fp8-format hybrid}
" "
COMMAND=$(echo "${COMMAND}" | tr '\n' ' ') COMMAND=$(echo "${COMMAND}" | tr '\n' ' ')
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment