Commit 3aca1415 authored by liangjing's avatar liangjing
Browse files

Merge branch 'megatron-lm_dtk24.04' into 'main'

Megatron lm dtk24.04

See merge request !1
parents 0024a5c6 1005e9d3
Pipeline #1806 passed with stage
{"lm loss": {"start_step": 0, "end_step": 41, "step_interval": 5, "values": [10.79471, 10.86601, 10.89073, 10.78482, 10.6587, 10.58125, 10.0813, 10.19422, 10.13437]}, "num-zeros": {"start_step": 0, "end_step": 41, "step_interval": 5, "values": [1609.0, 1850.0, 1921.0, 1942.0, 1853.0, 1674.0, 1544.0, 1884.0, 2438.0]}, "iteration_timing_avg": 0.12650857142857144}
\ No newline at end of file
{"lm loss": {"start_step": 0, "end_step": 40, "step_interval": 5, "values": [10.73442, 10.82095, 10.84047, 10.75831, 10.70386, 10.63718, 10.20959, 10.36611]}, "num-zeros": {"start_step": 0, "end_step": 40, "step_interval": 5, "values": [2625.0, 2815.0, 2837.0, 2870.0, 2755.0, 2617.0, 2345.0, 2529.0]}, "iteration_timing_avg": 0.1255659259259259}
\ No newline at end of file
{"lm loss": {"start_step": 0, "end_step": 40, "step_interval": 5, "values": [10.89427, 10.9106, 10.917, 10.84465, 10.70825, 10.63519, 10.15543, 10.26206]}, "num-zeros": {"start_step": 0, "end_step": 40, "step_interval": 5, "values": [22727188.0, 23020756.0, 22501138.0, 22830610.0, 22739638.0, 22547160.0, 22955250.0, 22589434.0]}, "iteration_timing_avg": 0.12411037037037034}
\ No newline at end of file
{"lm loss": {"start_step": 0, "end_step": 48, "step_interval": 5, "values": [10.85716, 10.88973, 10.879, 10.87014, 10.87978, 10.84463, 10.67266, 10.62932, 10.52767, 10.25362]}, "num-zeros": {"start_step": 0, "end_step": 31, "step_interval": 5, "values": [2450.0, 2396.0, 2523.0, 2242.0, 2225.0, 2478.0, 2536.0]}, "iteration_timing_avg": 0.11416968750000002} {"lm loss": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [10.85543, 10.89355, 10.87608, 10.87365, 10.88042, 10.84182, 10.67177, 10.62854, 10.52511, 10.25229]}, "num-zeros": {"start_step": 0, "end_step": 33, "step_interval": 5, "values": [2470.0, 2444.0, 2570.0, 2192.0, 2241.0, 2574.0, 2476.0]}, "iteration_timing_avg": 0.14008088235294117}
\ No newline at end of file
{"lm loss": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [10.92215, 10.93714, 10.89742, 10.87588, 10.75165, 10.65713, 10.1606, 10.24967, 10.15339, 9.84198]}, "num-zeros": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [1655.0, 1837.0, 1968.0, 1854.0, 1811.0, 1810.0, 1593.0, 1997.0, 2315.0, 2343.0]}, "iteration_timing_avg": 0.13743323529411763}
{"lm loss": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [10.8559, 10.89255, 10.8665, 10.81693, 10.69856, 10.60955, 10.10845, 10.21443, 10.12855, 9.80126]}, "num-zeros": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [1693.0, 1878.0, 1977.0, 1871.0, 2022.0, 1716.0, 1646.0, 2006.0, 2280.0, 2365.0]}, "iteration_timing_avg": 0.12973323529411762}
\ No newline at end of file
{"lm loss": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [10.86276, 10.88058, 10.87527, 10.88402, 10.89173, 10.84724, 10.6886, 10.62864, 10.53925, 10.26646]}, "num-zeros": {"start_step": 0, "end_step": 33, "step_interval": 5, "values": [2199.0, 2306.0, 2412.0, 2032.0, 2077.0, 2475.0, 2347.0]}, "iteration_timing_avg": 0.15481029411764707} {"lm loss": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [10.85921, 10.8797, 10.87381, 10.88658, 10.88912, 10.84826, 10.68571, 10.62946, 10.54289, 10.26918]}, "num-zeros": {"start_step": 0, "end_step": 33, "step_interval": 5, "values": [2289.0, 2368.0, 2427.0, 2023.0, 2234.0, 2501.0, 2316.0]}, "iteration_timing_avg": 0.20419529411764706}
\ No newline at end of file
{"lm loss": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [10.86168, 10.88879, 10.87894, 10.8312, 10.71384, 10.61221, 10.13333, 10.23204, 10.16051, 9.83654]}, "num-zeros": {"start_step": 0, "end_step": 50, "step_interval": 5, "values": [1854.0, 2137.0, 2162.0, 2176.0, 2072.0, 1947.0, 1702.0, 2222.0, 2457.0, 2535.0]}, "iteration_timing_avg": 0.20128235294117644}
...@@ -17,10 +17,10 @@ export CUDA_DEVICE_MAX_CONNECTIONS=1 ...@@ -17,10 +17,10 @@ export CUDA_DEVICE_MAX_CONNECTIONS=1
# Runs the "345M" parameter model # Runs the "345M" parameter model
DISTRIBUTED_ARGS="--nproc_per_node $GPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT" DISTRIBUTED_ARGS="--nproc_per_node $GPUS_PER_NODE --nnodes $NNODES"
# Run for 100 iterations # Run for 100 iterations
python -m torch.distributed.launch $DISTRIBUTED_ARGS \ torchrun $DISTRIBUTED_ARGS \
pretrain_bert.py \ pretrain_bert.py \
--use-checkpoint-args \ --use-checkpoint-args \
--use-checkpoint-opt_param-scheduler \ --use-checkpoint-opt_param-scheduler \
...@@ -61,7 +61,7 @@ python -m torch.distributed.launch $DISTRIBUTED_ARGS \ ...@@ -61,7 +61,7 @@ python -m torch.distributed.launch $DISTRIBUTED_ARGS \
echo 50 > $CHECKPOINT_PATH/latest_checkpointed_iteration.txt echo 50 > $CHECKPOINT_PATH/latest_checkpointed_iteration.txt
# Resume from 50th iteration ckpt and continue to 100 iterations # Resume from 50th iteration ckpt and continue to 100 iterations
python -m torch.distributed.launch $DISTRIBUTED_ARGS \ torchrun $DISTRIBUTED_ARGS \
pretrain_bert.py \ pretrain_bert.py \
--use-checkpoint-args \ --use-checkpoint-args \
--use-checkpoint-opt_param-scheduler \ --use-checkpoint-opt_param-scheduler \
......
#! /bin/bash #! /bin/bash
set -o xtrace set -x
DATA_PATH=$1 DATA_PATH=$1
CHECKPOINT_PATH=$2 CHECKPOINT_PATH=$2
...@@ -19,9 +19,9 @@ export CUDA_DEVICE_MAX_CONNECTIONS=1 ...@@ -19,9 +19,9 @@ export CUDA_DEVICE_MAX_CONNECTIONS=1
# Runs the "345M" parameter model # Runs the "345M" parameter model
DISTRIBUTED_ARGS="--nproc_per_node $GPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT" DISTRIBUTED_ARGS="--nproc_per_node $GPUS_PER_NODE --nnodes $NNODES"
python -m torch.distributed.launch $DISTRIBUTED_ARGS \ torchrun $DISTRIBUTED_ARGS \
pretrain_bert.py \ pretrain_bert.py \
--num-layers 24 \ --num-layers 24 \
--hidden-size 1024 \ --hidden-size 1024 \
......
#!/bin/bash #!/bin/bash
# Parameters # Parameters
#SBATCH --account=adlr #SBATCH --account=adlr_nlp_llmnext
#SBATCH --job-name=adlr-ci:megatron-job #SBATCH --job-name=adlr_nlp_llmnext-ci:megatron-job
#SBATCH --nodes=1 #SBATCH --nodes=1
#SBATCH --partition=luna #SBATCH --partition=luna
...@@ -10,7 +10,9 @@ DATA_PATH=/workspace/data/bert_data/my-bert_00_text_sentence ...@@ -10,7 +10,9 @@ DATA_PATH=/workspace/data/bert_data/my-bert_00_text_sentence
CHECKPOINT_PATH=/workspace/checkpoints CHECKPOINT_PATH=/workspace/checkpoints
TENSORBOARD_DIR=/workspace/logs TENSORBOARD_DIR=/workspace/logs
srun --output $BASE_DIR/results/slurm-%j.out --error $BASE_DIR/results/slurm-%j.out --container-image gitlab-master.nvidia.com/dl/dgx/pytorch:21.12-py3-devel --container-mounts $BASE_DIR/logs:/workspace/logs,$BASE_DIR/checkpoints:/workspace/checkpoints,$BUILD_DIR:/workspace/megatron-lm,$DATA_DIR:/workspace/data --no-container-mount-home bash -c " echo 'Running tests using $PYTORCH_IMAGE image'
srun --output $BASE_DIR/results/slurm-%j.out --error $BASE_DIR/results/slurm-%j.out --container-image $PYTORCH_IMAGE --container-mounts $BASE_DIR/logs:/workspace/logs,$BASE_DIR/checkpoints:/workspace/checkpoints,$BUILD_DIR:/workspace/megatron-lm,$DATA_DIR:/workspace/data --no-container-mount-home bash -c "
ls ls
cd /workspace/megatron-lm cd /workspace/megatron-lm
./tests/functional_tests/test_scripts/bert/pretrain_bert_distributed_resume_checkpoint_test.sh $DATA_PATH $CHECKPOINT_PATH $TENSORBOARD_DIR $TP_SIZE $PP_SIZE $NUM_NODES" ./tests/functional_tests/test_scripts/bert/pretrain_bert_distributed_resume_checkpoint_test.sh $DATA_PATH $CHECKPOINT_PATH $TENSORBOARD_DIR $TP_SIZE $PP_SIZE $NUM_NODES"
\ No newline at end of file
#!/bin/bash #!/bin/bash
# Parameters # Parameters
#SBATCH --account=adlr #SBATCH --account=adlr_nlp_llmnext
#SBATCH --job-name=adlr-ci:megatron-job #SBATCH --job-name=adlr_nlp_llmnext-ci:megatron-job
#SBATCH --nodes=1 #SBATCH --nodes=1
#SBATCH --partition=luna #SBATCH --partition=luna
...@@ -10,7 +10,9 @@ DATA_PATH=/workspace/data/bert_data/my-bert_00_text_sentence ...@@ -10,7 +10,9 @@ DATA_PATH=/workspace/data/bert_data/my-bert_00_text_sentence
CHECKPOINT_PATH=/workspace/checkpoints CHECKPOINT_PATH=/workspace/checkpoints
TENSORBOARD_DIR=/workspace/logs TENSORBOARD_DIR=/workspace/logs
srun --output $BASE_DIR/results/slurm-%j.out --error $BASE_DIR/results/slurm-%j.out --container-image gitlab-master.nvidia.com/dl/dgx/pytorch:21.12-py3-devel --container-mounts $BASE_DIR/logs:/workspace/logs,$BASE_DIR/checkpoints:/workspace/checkpoints,$BUILD_DIR:/workspace/megatron-lm,$DATA_DIR:/workspace/data --no-container-mount-home bash -c " echo 'Running tests using $PYTORCH_IMAGE image'
srun --output $BASE_DIR/results/slurm-%j.out --error $BASE_DIR/results/slurm-%j.out --container-image $PYTORCH_IMAGE --container-mounts $BASE_DIR/logs:/workspace/logs,$BASE_DIR/checkpoints:/workspace/checkpoints,$BUILD_DIR:/workspace/megatron-lm,$DATA_DIR:/workspace/data --no-container-mount-home bash -c "
ls ls
cd /workspace/megatron-lm cd /workspace/megatron-lm
./tests/functional_tests/test_scripts/bert/pretrain_bert_distributed_test.sh $DATA_PATH $CHECKPOINT_PATH $TENSORBOARD_DIR $TP_SIZE $PP_SIZE $NUM_NODES $MAX_STEPS $VP_SIZE" ./tests/functional_tests/test_scripts/bert/pretrain_bert_distributed_test.sh $DATA_PATH $CHECKPOINT_PATH $TENSORBOARD_DIR $TP_SIZE $PP_SIZE $NUM_NODES $MAX_STEPS $VP_SIZE"
\ No newline at end of file
...@@ -17,10 +17,10 @@ export CUDA_DEVICE_MAX_CONNECTIONS=1 ...@@ -17,10 +17,10 @@ export CUDA_DEVICE_MAX_CONNECTIONS=1
# Runs the "345M" parameter model # Runs the "345M" parameter model
DISTRIBUTED_ARGS="--nproc_per_node $GPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT" DISTRIBUTED_ARGS="--nproc_per_node $GPUS_PER_NODE --nnodes $NNODES"
# Run for 100 iterations and save checkpoint at 50 # Run for 100 iterations and save checkpoint at 50
python -m torch.distributed.launch $DISTRIBUTED_ARGS \ torchrun $DISTRIBUTED_ARGS \
pretrain_gpt.py \ pretrain_gpt.py \
--use-checkpoint-args \ --use-checkpoint-args \
--use-checkpoint-opt_param-scheduler \ --use-checkpoint-opt_param-scheduler \
...@@ -65,7 +65,7 @@ python -m torch.distributed.launch $DISTRIBUTED_ARGS \ ...@@ -65,7 +65,7 @@ python -m torch.distributed.launch $DISTRIBUTED_ARGS \
echo 50 > $CHECKPOINT_PATH/latest_checkpointed_iteration.txt echo 50 > $CHECKPOINT_PATH/latest_checkpointed_iteration.txt
# Resume from 50th iteration ckpt and continue to 100 iterations # Resume from 50th iteration ckpt and continue to 100 iterations
python -m torch.distributed.launch $DISTRIBUTED_ARGS \ torchrun $DISTRIBUTED_ARGS \
pretrain_gpt.py \ pretrain_gpt.py \
--use-checkpoint-args \ --use-checkpoint-args \
--use-checkpoint-opt_param-scheduler \ --use-checkpoint-opt_param-scheduler \
...@@ -105,4 +105,5 @@ python -m torch.distributed.launch $DISTRIBUTED_ARGS \ ...@@ -105,4 +105,5 @@ python -m torch.distributed.launch $DISTRIBUTED_ARGS \
--tensor-model-parallel-size $TP_SIZE \ --tensor-model-parallel-size $TP_SIZE \
--pipeline-model-parallel-size $PP_SIZE \ --pipeline-model-parallel-size $PP_SIZE \
--no-gradient-accumulation-fusion \ --no-gradient-accumulation-fusion \
--fp16 --fp16
\ No newline at end of file
#! /bin/bash #! /bin/bash
set -x
DATA_PATH=$1 DATA_PATH=$1
CHECKPOINT_PATH=$2 CHECKPOINT_PATH=$2
TENSORBOARD_DIR=$3 TENSORBOARD_DIR=$3
TP_SIZE=$4 USE_TE=$4
PP_SIZE=$5 TP_SIZE=$5
NNODES=$6 PP_SIZE=$6
MAX_STEPS=$7 NNODES=$7
VP_SIZE=$8 MAX_STEPS=$8
MBS=$9 USE_CORE=$9
GBS=${10} VP_SIZE=${10}
MBS=${11}
GBS=${12}
ADDITIONAL_PARAMS=${13}
GPUS_PER_NODE=8 GPUS_PER_NODE=8
# Change for multinode config # Change for multinode config
MASTER_ADDR=localhost MASTER_ADDR=localhost
...@@ -18,12 +22,31 @@ NODE_RANK=0 ...@@ -18,12 +22,31 @@ NODE_RANK=0
WORLD_SIZE=$(($GPUS_PER_NODE*$NNODES)) WORLD_SIZE=$(($GPUS_PER_NODE*$NNODES))
export CUDA_DEVICE_MAX_CONNECTIONS=1 export CUDA_DEVICE_MAX_CONNECTIONS=1
TRANSFORMER_IMPL=local
TRAINING_DTYPE=fp16
CALLING_SCRIPT=pretrain_gpt.py
if [[ $USE_CORE -eq 1 ]]; then
echo "Running using megatron core"
TRANSFORMER_IMPL=local
TRAINING_DTYPE=bf16
CALLING_SCRIPT=pretrain_gpt_core.py
export NVTE_ALLOW_NONDETERMINISTIC_ALGO=0
fi
if [[ $USE_TE -eq 1 ]]; then
echo "Running with TransformerEngine ..."
TRANSFORMER_IMPL=transformer_engine
TRAINING_DTYPE=bf16
else
echo "Running with local transformer implementation ..."
fi
# Runs the "345M" parameter model # Runs the "345M" parameter model
DISTRIBUTED_ARGS="--nproc_per_node $GPUS_PER_NODE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT" DISTRIBUTED_ARGS="--nproc_per_node $GPUS_PER_NODE --nnodes $NNODES"
python -m torch.distributed.launch $DISTRIBUTED_ARGS \ torchrun $DISTRIBUTED_ARGS \
pretrain_gpt.py \ $CALLING_SCRIPT \
--num-layers 12 \ --num-layers 12 \
--hidden-size 512 \ --hidden-size 512 \
--num-attention-heads 8 \ --num-attention-heads 8 \
...@@ -57,8 +80,10 @@ python -m torch.distributed.launch $DISTRIBUTED_ARGS \ ...@@ -57,8 +80,10 @@ python -m torch.distributed.launch $DISTRIBUTED_ARGS \
--save-interval 10000 \ --save-interval 10000 \
--eval-interval 1000 \ --eval-interval 1000 \
--eval-iters 10 \ --eval-iters 10 \
--transformer-impl $TRANSFORMER_IMPL \
--tensor-model-parallel-size $TP_SIZE \ --tensor-model-parallel-size $TP_SIZE \
--pipeline-model-parallel-size $PP_SIZE \ --pipeline-model-parallel-size $PP_SIZE \
${VP_SIZE:+--num-layers-per-virtual-pipeline-stage "$VP_SIZE"} \ ${VP_SIZE:+--num-layers-per-virtual-pipeline-stage "$VP_SIZE"} \
${ADDITIONAL_PARAMS:+$ADDITIONAL_PARAMS} \
--no-gradient-accumulation-fusion \ --no-gradient-accumulation-fusion \
--fp16 --${TRAINING_DTYPE}
#!/bin/bash #!/bin/bash
# Parameters # Parameters
#SBATCH --account=adlr #SBATCH --account=adlr_nlp_llmnext
#SBATCH --job-name=adlr-ci:megatron-job #SBATCH --job-name=adlr_nlp_llmnext-ci:megatron-job
#SBATCH --nodes=1 #SBATCH --nodes=1
#SBATCH --partition=luna #SBATCH --partition=luna
...@@ -10,7 +10,9 @@ DATA_PATH=/workspace/data/gpt3_data/my-gpt3_00_text_document ...@@ -10,7 +10,9 @@ DATA_PATH=/workspace/data/gpt3_data/my-gpt3_00_text_document
CHECKPOINT_PATH=/workspace/checkpoints CHECKPOINT_PATH=/workspace/checkpoints
TENSORBOARD_DIR=/workspace/logs TENSORBOARD_DIR=/workspace/logs
srun --output $BASE_DIR/results/slurm-%j.out --error $BASE_DIR/results/slurm-%j.out --container-image gitlab-master.nvidia.com/dl/dgx/pytorch:21.12-py3-devel --container-mounts $BASE_DIR/logs:/workspace/logs,$BASE_DIR/checkpoints:/workspace/checkpoints,$BUILD_DIR:/workspace/megatron-lm,$DATA_DIR:/workspace/data --no-container-mount-home bash -c " echo 'Running tests using $PYTORCH_IMAGE image'
srun --output $BASE_DIR/results/slurm-%j.out --error $BASE_DIR/results/slurm-%j.out --container-image $PYTORCH_IMAGE --container-mounts $BASE_DIR/logs:/workspace/logs,$BASE_DIR/checkpoints:/workspace/checkpoints,$BUILD_DIR:/workspace/megatron-lm,$DATA_DIR:/workspace/data --no-container-mount-home bash -c "
ls ls
cd /workspace/megatron-lm cd /workspace/megatron-lm
./tests/functional_tests/test_scripts/gpt3/pretrain_gpt3_distributed_resume_checkpoint_test.sh $DATA_PATH $CHECKPOINT_PATH $TENSORBOARD_DIR $TP_SIZE $PP_SIZE $NUM_NODES" ./tests/functional_tests/test_scripts/gpt3/pretrain_gpt3_distributed_resume_checkpoint_test.sh $DATA_PATH $CHECKPOINT_PATH $TENSORBOARD_DIR $TP_SIZE $PP_SIZE $NUM_NODES"
\ No newline at end of file
#!/bin/bash #!/bin/bash
# Parameters # Parameters
#SBATCH --account=adlr #SBATCH --account=adlr_nlp_llmnext
#SBATCH --job-name=adlr-ci:megatron-job #SBATCH --job-name=adlr_nlp_llmnext-ci:megatron-job
#SBATCH --nodes=1 #SBATCH --nodes=1
#SBATCH --partition=luna #SBATCH --partition=luna
...@@ -10,7 +10,14 @@ DATA_PATH=/workspace/data/gpt3_data/my-gpt3_00_text_document ...@@ -10,7 +10,14 @@ DATA_PATH=/workspace/data/gpt3_data/my-gpt3_00_text_document
CHECKPOINT_PATH=/workspace/checkpoints CHECKPOINT_PATH=/workspace/checkpoints
TENSORBOARD_DIR=/workspace/logs TENSORBOARD_DIR=/workspace/logs
srun --output $BASE_DIR/results/slurm-%j.out --error $BASE_DIR/results/slurm-%j.out --container-image gitlab-master.nvidia.com/dl/dgx/pytorch:21.12-py3-devel --container-mounts $BASE_DIR/logs:/workspace/logs,$BASE_DIR/checkpoints:/workspace/checkpoints,$BUILD_DIR:/workspace/megatron-lm,$DATA_DIR:/workspace/data --no-container-mount-home bash -c " if [[ -n $MBS ]]; then MBS=4; fi
if [[ -n $GBS ]]; then GBS=32; fi
if [[ -n $VP_SIZE ]]; then VP_SIZE="" ; fi
echo 'Running tests using $PYTORCH_IMAGE image'
srun --output $BASE_DIR/results/slurm-%j.out --error $BASE_DIR/results/slurm-%j.out --container-image $PYTORCH_IMAGE --container-mounts $BASE_DIR/logs:/workspace/logs,$BASE_DIR/checkpoints:/workspace/checkpoints,$BUILD_DIR:/workspace/megatron-lm,$DATA_DIR:/workspace/data --no-container-mount-home bash -c "
ls ls
cd /workspace/megatron-lm cd /workspace/megatron-lm
./tests/functional_tests/test_scripts/gpt3/pretrain_gpt3_distributed_test.sh $DATA_PATH $CHECKPOINT_PATH $TENSORBOARD_DIR $TP_SIZE $PP_SIZE $NUM_NODES $MAX_STEPS $VP_SIZE $MBS $GBS" ./tests/functional_tests/test_scripts/gpt3/pretrain_gpt3_distributed_test.sh $DATA_PATH $CHECKPOINT_PATH $TENSORBOARD_DIR $USE_TE $TP_SIZE $PP_SIZE $NUM_NODES $MAX_STEPS $USE_CORE \"$VP_SIZE\" \"$MBS\" \"$GBS\" \"$ADDITIONAL_PARAMS\""
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
import pytest
import torch
from megatron.core.transformer.transformer_config import TransformerConfig
from megatron.core.models.gpt.gpt_embedding import GPTEmbedding
from tests.unit_tests.test_utilities import Utils
class TestGPTEmbedding:
def setup_method(self, method):
Utils.initialize_model_parallel(1,1)
transformer_config = TransformerConfig(num_layers=2, hidden_size=12, num_attention_heads=4, use_cpu_initialization=True)
self.gpt_embedding = GPTEmbedding(config=transformer_config, vocab_size=100, max_sequence_length=4, add_position_embedding=True)
def teardown_method(self, method):
Utils.destroy_model_parallel()
def test_constructor(self):
assert isinstance(self.gpt_embedding, GPTEmbedding)
num_weights = sum([p.numel() for p in self.gpt_embedding.parameters()])
assert num_weights == 1248
def test_zero_parameters(self):
sum_weights = sum([p.sum() for p in self.gpt_embedding.parameters()])
assert sum_weights != 0
self.gpt_embedding.zero_parameters()
sum_weights = sum([p.sum() for p in self.gpt_embedding.parameters()])
assert sum_weights == 0
def test_cpu_forward(self):
input_ids = torch.tensor([0, 1, 2, 3], dtype=torch.int64).repeat((2, 1))
position_ids = torch.tensor([0, 1, 2, 3], dtype=torch.int64).repeat((2, 1))
embeddings = self.gpt_embedding(input_ids, position_ids)
assert embeddings.device.type == 'cpu'
assert embeddings.shape[0] == self.gpt_embedding.max_sequence_length
assert embeddings.shape[1] == input_ids.shape[0]
assert embeddings.shape[2] == self.gpt_embedding.config.hidden_size
def test_gpu_forward(self):
self.gpt_embedding.cuda()
input_ids = torch.tensor([0, 1, 2, 3], dtype=torch.int64).repeat((2, 1)).cuda()
position_ids = torch.tensor([0, 1, 2, 3], dtype=torch.int64).repeat((2, 1)).cuda()
embeddings = self.gpt_embedding(input_ids, position_ids)
assert embeddings.device.type == 'cuda'
assert embeddings.shape[0] == self.gpt_embedding.max_sequence_length
assert embeddings.shape[1] == input_ids.shape[0]
assert embeddings.shape[2] == self.gpt_embedding.config.hidden_size
\ No newline at end of file
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
import pytest
import torch
from megatron.core.transformer.transformer_config import TransformerConfig
from megatron.core.models.gpt.gpt_model import GPTModel
from tests.unit_tests.test_utilities import Utils
from megatron.core.tensor_parallel.random import model_parallel_cuda_manual_seed
class TestGPTModel:
def setup_method(self, method):
Utils.initialize_model_parallel(1,1)
model_parallel_cuda_manual_seed(123)
transformer_config = TransformerConfig(num_layers=2, hidden_size=12, num_attention_heads=4, use_cpu_initialization=True)
self.gpt_model = GPTModel(config=transformer_config, vocab_size=100, max_sequence_length=4)
def teardown_method(self, method):
Utils.destroy_model_parallel()
def test_constructor(self):
assert isinstance(self.gpt_model, GPTModel)
assert self.gpt_model.max_sequence_length == 4
num_weights = sum([p.numel() for p in self.gpt_model.parameters()])
assert num_weights == 6240
def test_set_input_tensor(self):
config: TransformerConfig = self.gpt_model.config
sequence_length = self.gpt_model.max_sequence_length
micro_batch_size = 2
# [sequence length, batch size, hidden size]
input_tensor = torch.ones((sequence_length, micro_batch_size, config.hidden_size))
self.gpt_model.set_input_tensor(input_tensor)
assert self.gpt_model.decoder.input_tensor.shape[0] == sequence_length
assert self.gpt_model.decoder.input_tensor.shape[1] == micro_batch_size
assert self.gpt_model.decoder.input_tensor.shape[2] == config.hidden_size
def test_post_process_forward(self):
config: TransformerConfig = self.gpt_model.config
sequence_length = self.gpt_model.max_sequence_length
micro_batch_size = 2
self.gpt_model.cuda()
data = list(range(sequence_length))
input_ids = torch.tensor(data, dtype=torch.int64).repeat((micro_batch_size, 1)).cuda()
position_ids = torch.tensor(data, dtype=torch.int64).repeat((micro_batch_size, 1)).cuda()
attention_mask = torch.ones((1, 1, sequence_length, sequence_length), dtype=bool).cuda()
logits = self.gpt_model.forward(input_ids=input_ids, position_ids=position_ids, attention_mask=attention_mask)
assert logits.shape[0] == micro_batch_size
assert logits.shape[1] == sequence_length
assert logits.shape[2] == self.gpt_model.vocab_size
def test_no_post_process_forward(self):
pass
def test_no_preprocess_forward(self):
pass
def test_state_dict_for_save_checkpoint(self):
pass
def test_load_state_dict(self):
pass
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment