Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Megatron-LM
Commits
8e6fa622
Commit
8e6fa622
authored
Mar 08, 2023
by
Shanmugam Ramasamy
Committed by
Jared Casper
Mar 08, 2023
Browse files
Testing infrastructure for Megatron core
parent
95f872f5
Changes
32
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
38 additions
and
6 deletions
+38
-6
tests/functional_tests/test_scripts/gpt3/sbatch_gpt3_distributed_resume_checkpoint_test.sh
...ts/gpt3/sbatch_gpt3_distributed_resume_checkpoint_test.sh
+16
-0
tests/functional_tests/test_scripts/gpt3/sbatch_gpt3_distributed_test.sh
...l_tests/test_scripts/gpt3/sbatch_gpt3_distributed_test.sh
+16
-0
tests/unit_tests/__init__.py
tests/unit_tests/__init__.py
+0
-0
tests/unit_tests/tensor_parallel/test_cross_entropy.py
tests/unit_tests/tensor_parallel/test_cross_entropy.py
+1
-1
tests/unit_tests/tensor_parallel/test_data.py
tests/unit_tests/tensor_parallel/test_data.py
+1
-1
tests/unit_tests/tensor_parallel/test_mappings.py
tests/unit_tests/tensor_parallel/test_mappings.py
+1
-1
tests/unit_tests/tensor_parallel/test_random.py
tests/unit_tests/tensor_parallel/test_random.py
+1
-1
tests/unit_tests/tensor_parallel/test_tensor_parallel_utils.py
.../unit_tests/tensor_parallel/test_tensor_parallel_utils.py
+1
-1
tests/unit_tests/test_basic.py
tests/unit_tests/test_basic.py
+0
-0
tests/unit_tests/test_parallel_state.py
tests/unit_tests/test_parallel_state.py
+1
-1
tests/unit_tests/test_utilities.py
tests/unit_tests/test_utilities.py
+0
-0
tests/unit_tests/test_utils.py
tests/unit_tests/test_utils.py
+0
-0
No files found.
tests/functional_tests/test_scripts/gpt3/sbatch_gpt3_distributed_resume_checkpoint_test.sh
0 → 100644
View file @
8e6fa622
#!/bin/bash
# Parameters
#SBATCH --account=adlr
#SBATCH --job-name=adlr-ci:megatron-job
#SBATCH --nodes=1
#SBATCH --partition=luna
DATA_PATH
=
/workspace/data/gpt3_data/my-gpt3_00_text_document
CHECKPOINT_PATH
=
/workspace/checkpoints
TENSORBOARD_DIR
=
/workspace/logs
srun
--output
$BASE_DIR
/results/slurm-%j.out
--error
$BASE_DIR
/results/slurm-%j.out
--container-image
gitlab-master.nvidia.com/dl/dgx/pytorch:21.12-py3-devel
--container-mounts
$BASE_DIR
/logs:/workspace/logs,
$BASE_DIR
/checkpoints:/workspace/checkpoints,
$BUILD_DIR
:/workspace/megatron-lm,
$DATA_DIR
:/workspace/data
--no-container-mount-home
bash
-c
"
ls
cd /workspace/megatron-lm
./tests/functional_tests/test_scripts/gpt3/pretrain_gpt3_distributed_resume_checkpoint_test.sh
$DATA_PATH
$CHECKPOINT_PATH
$TENSORBOARD_DIR
$TP_SIZE
$PP_SIZE
$NUM_NODES
"
\ No newline at end of file
tests/functional_tests/test_scripts/gpt3/sbatch_gpt3_distributed_test.sh
0 → 100755
View file @
8e6fa622
#!/bin/bash
# Parameters
#SBATCH --account=adlr
#SBATCH --job-name=adlr-ci:megatron-job
#SBATCH --nodes=1
#SBATCH --partition=luna
DATA_PATH
=
/workspace/data/gpt3_data/my-gpt3_00_text_document
CHECKPOINT_PATH
=
/workspace/checkpoints
TENSORBOARD_DIR
=
/workspace/logs
srun
--output
$BASE_DIR
/results/slurm-%j.out
--error
$BASE_DIR
/results/slurm-%j.out
--container-image
gitlab-master.nvidia.com/dl/dgx/pytorch:21.12-py3-devel
--container-mounts
$BASE_DIR
/logs:/workspace/logs,
$BASE_DIR
/checkpoints:/workspace/checkpoints,
$BUILD_DIR
:/workspace/megatron-lm,
$DATA_DIR
:/workspace/data
--no-container-mount-home
bash
-c
"
ls
cd /workspace/megatron-lm
./tests/functional_tests/test_scripts/gpt3/pretrain_gpt3_distributed_test.sh
$DATA_PATH
$CHECKPOINT_PATH
$TENSORBOARD_DIR
$TP_SIZE
$PP_SIZE
$NUM_NODES
$MAX_STEPS
"
\ No newline at end of file
tests/unit_tests/__init__.py
0 → 100644
View file @
8e6fa622
tests/tensor_parallel/test_cross_entropy.py
→
tests/unit_
tests/tensor_parallel/test_cross_entropy.py
View file @
8e6fa622
from
megatron.core.tensor_parallel.cross_entropy
import
vocab_parallel_cross_entropy
from
megatron.core.tensor_parallel.cross_entropy
import
vocab_parallel_cross_entropy
import
torch
import
torch
from
tests.test_utilities
import
Utils
from
tests.unit_
tests.test_utilities
import
Utils
import
numpy
as
np
import
numpy
as
np
def
test_vocab_parallel_cross_entropy
():
def
test_vocab_parallel_cross_entropy
():
...
...
tests/tensor_parallel/test_data.py
→
tests/unit_
tests/tensor_parallel/test_data.py
View file @
8e6fa622
from
megatron.core.tensor_parallel.data
import
broadcast_data
from
megatron.core.tensor_parallel.data
import
broadcast_data
import
torch
import
torch
from
tests.test_utilities
import
Utils
from
tests.unit_
tests.test_utilities
import
Utils
def
test_broadcast_data
():
def
test_broadcast_data
():
Utils
.
initialize_model_parallel
(
2
,
4
)
Utils
.
initialize_model_parallel
(
2
,
4
)
...
...
tests/tensor_parallel/test_mappings.py
→
tests/unit_
tests/tensor_parallel/test_mappings.py
View file @
8e6fa622
from
megatron.core.tensor_parallel
import
mappings
from
megatron.core.tensor_parallel
import
mappings
from
tests.test_utilities
import
Utils
from
tests.unit_
tests.test_utilities
import
Utils
import
torch
import
torch
def
test_CopyToModelParallelRegion
():
def
test_CopyToModelParallelRegion
():
...
...
tests/tensor_parallel/test_random.py
→
tests/unit_
tests/tensor_parallel/test_random.py
View file @
8e6fa622
...
@@ -2,7 +2,7 @@ from megatron.core.tensor_parallel.random import CudaRNGStatesTracker
...
@@ -2,7 +2,7 @@ from megatron.core.tensor_parallel.random import CudaRNGStatesTracker
from
megatron.core.tensor_parallel.random
import
model_parallel_cuda_manual_seed
from
megatron.core.tensor_parallel.random
import
model_parallel_cuda_manual_seed
from
megatron.core.tensor_parallel.random
import
_CUDA_RNG_STATE_TRACKER
from
megatron.core.tensor_parallel.random
import
_CUDA_RNG_STATE_TRACKER
from
megatron.core.tensor_parallel.random
import
checkpoint
from
megatron.core.tensor_parallel.random
import
checkpoint
from
tests.test_utilities
import
Utils
from
tests.unit_
tests.test_utilities
import
Utils
import
pytest
import
pytest
import
torch
import
torch
...
...
tests/tensor_parallel/test_tensor_parallel_utils.py
→
tests/unit_
tests/tensor_parallel/test_tensor_parallel_utils.py
View file @
8e6fa622
import
torch
import
torch
import
megatron.core.tensor_parallel.utils
as
util
import
megatron.core.tensor_parallel.utils
as
util
import
megatron.core.parallel_state
as
ps
import
megatron.core.parallel_state
as
ps
from
tests.test_utilities
import
Utils
from
tests.unit_
tests.test_utilities
import
Utils
rank
=
Utils
.
rank
rank
=
Utils
.
rank
...
...
tests/test_basic.py
→
tests/unit_
tests/test_basic.py
View file @
8e6fa622
File moved
tests/test_parallel_state.py
→
tests/unit_
tests/test_parallel_state.py
View file @
8e6fa622
import
torch
import
torch
import
megatron.core.parallel_state
as
ps
import
megatron.core.parallel_state
as
ps
import
pytest
import
pytest
from
tests.test_utilities
import
Utils
from
tests.unit_
tests.test_utilities
import
Utils
import
os
import
os
rank
=
Utils
.
rank
rank
=
Utils
.
rank
...
...
tests/test_utilities.py
→
tests/unit_
tests/test_utilities.py
View file @
8e6fa622
File moved
tests/test_utils.py
→
tests/unit_
tests/test_utils.py
View file @
8e6fa622
File moved
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment