Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
deepspeed
Commits
53ac7947
Unverified
Commit
53ac7947
authored
May 20, 2020
by
Jeff Rasley
Committed by
GitHub
May 20, 2020
Browse files
reduce size of megatron tests (#223)
parent
8a18e73e
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
78 additions
and
57 deletions
+78
-57
tests/model/Megatron_GPT2/ds_config_func_bs4_zero2.json
tests/model/Megatron_GPT2/ds_config_func_bs4_zero2.json
+4
-1
tests/model/Megatron_GPT2/ds_config_func_bs8_zero2.json
tests/model/Megatron_GPT2/ds_config_func_bs8_zero2.json
+4
-1
tests/model/Megatron_GPT2/ds_gpt2_test.sh
tests/model/Megatron_GPT2/ds_gpt2_test.sh
+2
-2
tests/model/Megatron_GPT2/run_checkpoint_test.py
tests/model/Megatron_GPT2/run_checkpoint_test.py
+19
-9
tests/model/Megatron_GPT2/run_func_test.py
tests/model/Megatron_GPT2/run_func_test.py
+41
-36
tests/model/run_sanity_check.py
tests/model/run_sanity_check.py
+8
-8
No files found.
tests/model/Megatron_GPT2/ds_config_func_bs4_zero2.json
View file @
53ac7947
...
...
@@ -3,7 +3,10 @@
"gradient_accumulation_steps"
:
1
,
"steps_per_print"
:
1
,
"zero_optimization"
:
{
"stage"
:
2
"stage"
:
2
,
"reduce_bucket_size"
:
7000000
,
"allgather_bucket_size"
:
7000000
,
"reduce_scatter"
:
true
},
"optimizer"
:
{
"type"
:
"Adam"
,
...
...
tests/model/Megatron_GPT2/ds_config_func_bs8_zero2.json
View file @
53ac7947
...
...
@@ -3,7 +3,10 @@
"gradient_accumulation_steps"
:
1
,
"steps_per_print"
:
1
,
"zero_optimization"
:
{
"stage"
:
2
"stage"
:
2
,
"reduce_bucket_size"
:
7000000
,
"allgather_bucket_size"
:
7000000
,
"reduce_scatter"
:
true
},
"optimizer"
:
{
"type"
:
"Adam"
,
...
...
tests/model/Megatron_GPT2/ds_gpt2_test.sh
View file @
53ac7947
...
...
@@ -20,8 +20,8 @@ helpFunction()
exit
1
}
layers
=
2
4
hidden_size
=
1
024
layers
=
2
hidden_size
=
1
28
seq_length
=
1024
ckpt_num_layers
=
1
other_args
=
""
...
...
tests/model/Megatron_GPT2/run_checkpoint_test.py
View file @
53ac7947
...
...
@@ -10,6 +10,16 @@ import time
import
re
from
.test_common
import
BaseTestCase
LAYERS
=
2
HIDDEN_SIZE
=
128
ATTN_HEADS
=
8
def
remove_file
(
test_id
,
filename
):
cmd
=
f
"if [ -f
{
filename
}
] ; then rm -v
{
filename
}
; fi"
print
(
f
"
{
test_id
}
cmd:
{
cmd
}
"
)
subprocess
.
run
(
cmd
,
shell
=
True
,
check
=
False
,
executable
=
'/bin/bash'
)
def
grep_loss_from_file
(
file_name
):
loss
=
0.0
...
...
@@ -50,10 +60,10 @@ class GPT2CheckpointTestCase(BaseTestCase):
"nodes"
:
1
,
"bs"
:
8
,
"steps"
:
1100
,
"layers"
:
2
,
"hidden_size"
:
256
,
"layers"
:
LAYERS
,
"hidden_size"
:
HIDDEN_SIZE
,
"seq_length"
:
256
,
"heads"
:
16
,
"heads"
:
ATTN_HEADS
,
"deepspeed"
:
True
,
"tag"
:
"ds_zero1"
,
"zero"
:
True
,
...
...
@@ -72,10 +82,10 @@ class GPT2CheckpointTestCase(BaseTestCase):
"nodes"
:
1
,
"bs"
:
8
,
"steps"
:
1100
,
"layers"
:
2
,
"hidden_size"
:
256
,
"layers"
:
LAYERS
,
"hidden_size"
:
HIDDEN_SIZE
,
"seq_length"
:
256
,
"heads"
:
16
,
"heads"
:
ATTN_HEADS
,
"deepspeed"
:
True
,
"tag"
:
"ds_zero2"
,
"zero"
:
True
,
...
...
@@ -94,10 +104,10 @@ class GPT2CheckpointTestCase(BaseTestCase):
"nodes"
:
1
,
"bs"
:
8
,
"steps"
:
1100
,
"layers"
:
2
,
"hidden_size"
:
256
,
"layers"
:
LAYERS
,
"hidden_size"
:
HIDDEN_SIZE
,
"seq_length"
:
256
,
"heads"
:
16
,
"heads"
:
ATTN_HEADS
,
"deepspeed"
:
True
,
"zero"
:
False
,
"other_args"
:
""
,
...
...
tests/model/Megatron_GPT2/run_func_test.py
View file @
53ac7947
...
...
@@ -10,6 +10,11 @@ import time
import
re
from
.test_common
import
BaseTestCase
LAYERS
=
2
HIDDEN_SIZE
=
128
ATTN_HEADS
=
8
SEQ_LEN
=
64
def
grep_loss_from_file
(
file_name
):
loss
=
0.0
...
...
@@ -50,10 +55,10 @@ class GPT2FuncTestCase(BaseTestCase):
"nodes"
:
1
,
"bs"
:
4
,
"steps"
:
1000
,
"layers"
:
12
,
"hidden_size"
:
768
,
"seq_length"
:
256
,
"heads"
:
12
,
"layers"
:
LAYERS
,
"hidden_size"
:
HIDDEN_SIZE
,
"seq_length"
:
SEQ_LEN
,
"heads"
:
ATTN_HEADS
,
"deepspeed"
:
False
,
"json"
:
"ds_config_func_bs4_zero1.json"
,
}
...
...
@@ -68,10 +73,10 @@ class GPT2FuncTestCase(BaseTestCase):
"nodes"
:
1
,
"bs"
:
8
,
"steps"
:
1000
,
"layers"
:
12
,
"hidden_size"
:
768
,
"seq_length"
:
256
,
"heads"
:
12
,
"layers"
:
LAYERS
,
"hidden_size"
:
HIDDEN_SIZE
,
"seq_length"
:
SEQ_LEN
,
"heads"
:
ATTN_HEADS
,
"deepspeed"
:
False
,
"json"
:
"ds_config_func_bs8_zero1.json"
,
}
...
...
@@ -86,10 +91,10 @@ class GPT2FuncTestCase(BaseTestCase):
"nodes"
:
1
,
"bs"
:
8
,
"steps"
:
1000
,
"layers"
:
12
,
"hidden_size"
:
768
,
"seq_length"
:
256
,
"heads"
:
12
,
"layers"
:
LAYERS
,
"hidden_size"
:
HIDDEN_SIZE
,
"seq_length"
:
SEQ_LEN
,
"heads"
:
ATTN_HEADS
,
"deepspeed"
:
False
,
"json"
:
"ds_config_func_bs8_zero1.json"
,
}
...
...
@@ -104,10 +109,10 @@ class GPT2FuncTestCase(BaseTestCase):
"nodes"
:
1
,
"bs"
:
8
,
"steps"
:
1000
,
"layers"
:
12
,
"hidden_size"
:
768
,
"seq_length"
:
256
,
"heads"
:
12
,
"layers"
:
LAYERS
,
"hidden_size"
:
HIDDEN_SIZE
,
"seq_length"
:
SEQ_LEN
,
"heads"
:
ATTN_HEADS
,
"deepspeed"
:
False
,
"json"
:
"ds_config_func_bs8_zero1.json"
,
}
...
...
@@ -122,10 +127,10 @@ class GPT2FuncTestCase(BaseTestCase):
"nodes"
:
1
,
"bs"
:
4
,
"steps"
:
1000
,
"layers"
:
12
,
"hidden_size"
:
768
,
"seq_length"
:
256
,
"heads"
:
12
,
"layers"
:
LAYERS
,
"hidden_size"
:
HIDDEN_SIZE
,
"seq_length"
:
SEQ_LEN
,
"heads"
:
ATTN_HEADS
,
"deepspeed"
:
False
,
"json"
:
"ds_config_func_bs4_zero2.json"
,
}
...
...
@@ -140,10 +145,10 @@ class GPT2FuncTestCase(BaseTestCase):
"nodes"
:
1
,
"bs"
:
8
,
"steps"
:
1000
,
"layers"
:
12
,
"hidden_size"
:
768
,
"seq_length"
:
256
,
"heads"
:
12
,
"layers"
:
LAYERS
,
"hidden_size"
:
HIDDEN_SIZE
,
"seq_length"
:
SEQ_LEN
,
"heads"
:
ATTN_HEADS
,
"deepspeed"
:
False
,
"json"
:
"ds_config_func_bs8_zero2.json"
,
}
...
...
@@ -158,10 +163,10 @@ class GPT2FuncTestCase(BaseTestCase):
"nodes"
:
1
,
"bs"
:
8
,
"steps"
:
1000
,
"layers"
:
12
,
"hidden_size"
:
768
,
"seq_length"
:
256
,
"heads"
:
12
,
"layers"
:
LAYERS
,
"hidden_size"
:
HIDDEN_SIZE
,
"seq_length"
:
SEQ_LEN
,
"heads"
:
ATTN_HEADS
,
"deepspeed"
:
False
,
"json"
:
"ds_config_func_bs8_zero2.json"
,
}
...
...
@@ -179,10 +184,10 @@ class GPT2FuncTestCase(BaseTestCase):
"nodes"
:
1
,
"bs"
:
8
,
"steps"
:
1000
,
"layers"
:
12
,
"hidden_size"
:
768
,
"seq_length"
:
256
,
"heads"
:
12
,
"layers"
:
LAYERS
,
"hidden_size"
:
HIDDEN_SIZE
,
"seq_length"
:
SEQ_LEN
,
"heads"
:
ATTN_HEADS
,
"deepspeed"
:
False
,
"json"
:
"ds_config_func_bs8_zero2.json"
,
}
...
...
@@ -200,10 +205,10 @@ class GPT2FuncTestCase(BaseTestCase):
"nodes"
:
1
,
"bs"
:
4
,
"steps"
:
20
,
"layers"
:
12
,
"hidden_size"
:
768
,
"seq_length"
:
256
,
"heads"
:
12
,
"layers"
:
LAYERS
,
"hidden_size"
:
HIDDEN_SIZE
,
"seq_length"
:
SEQ_LEN
,
"heads"
:
ATTN_HEADS
,
"deepspeed"
:
False
,
"json"
:
"ds_config_func_scheduler.json"
,
}
...
...
tests/model/run_sanity_check.py
View file @
53ac7947
...
...
@@ -29,14 +29,14 @@ def pytest_hack(runner_result):
assert
runner_result
.
wasSuccessful
()
# fail the test
#
def test_megatron():
#
runner = unittest.TextTestRunner(failfast=True)
#
pytest_hack(runner.run(Megatron_GPT2.suite()))
#
#
#
def test_megatron_checkpoint():
#
runner = unittest.TextTestRunner(failfast=True)
#
pytest_hack(runner.run(Megatron_GPT2.checkpoint_suite()))
def
test_megatron
():
runner
=
unittest
.
TextTestRunner
(
failfast
=
True
)
pytest_hack
(
runner
.
run
(
Megatron_GPT2
.
suite
()))
def
test_megatron_checkpoint
():
runner
=
unittest
.
TextTestRunner
(
failfast
=
True
)
pytest_hack
(
runner
.
run
(
Megatron_GPT2
.
checkpoint_suite
()))
def
test_squad
():
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment