Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Megatron-LM
Commits
6985e589
Commit
6985e589
authored
Aug 18, 2021
by
slym
Browse files
more updates on examples
parent
c1e0689d
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
20 additions
and
20 deletions
+20
-20
examples/sc21/run_figure_11.sh
examples/sc21/run_figure_11.sh
+1
-1
examples/sc21/run_figure_12.sh
examples/sc21/run_figure_12.sh
+2
-2
examples/sc21/run_figure_13.sh
examples/sc21/run_figure_13.sh
+1
-1
examples/sc21/run_figure_14.sh
examples/sc21/run_figure_14.sh
+1
-1
examples/sc21/run_figure_15.sh
examples/sc21/run_figure_15.sh
+1
-1
examples/sc21/run_figure_16.sh
examples/sc21/run_figure_16.sh
+1
-1
examples/sc21/run_figure_17.sh
examples/sc21/run_figure_17.sh
+1
-1
examples/sc21/run_figure_18.sh
examples/sc21/run_figure_18.sh
+2
-2
examples/sc21/run_table_1.sh
examples/sc21/run_table_1.sh
+10
-10
No files found.
examples/sc21/run_figure_11.sh
View file @
6985e589
...
...
@@ -25,7 +25,7 @@ MBS=1
HS
=
20480
NAH
=
128
DDP
=
local
MEGATRON_EXTRA_PARAMS
=
"--
checkpoint-
activations "
MEGATRON_EXTRA_PARAMS
=
"--activations
-checkpoint-method uniform
"
# Name of the job.
...
...
examples/sc21/run_figure_12.sh
View file @
6985e589
...
...
@@ -16,9 +16,9 @@ GBS=12
# Set interleaved schedule options.
if
[
${
INTERLEAVED
}
==
"YES"
]
;
then
MEGATRON_EXTRA_PARAMS
=
"--
checkpoint-
activations --num-layers-per-virtual-pipeline-stage 2 "
MEGATRON_EXTRA_PARAMS
=
"--activations
-checkpoint-method uniform
--num-layers-per-virtual-pipeline-stage 2 "
elif
[
${
INTERLEAVED
}
==
"NO"
]
;
then
MEGATRON_EXTRA_PARAMS
=
"--
checkpoint-
activations "
MEGATRON_EXTRA_PARAMS
=
"--activations
-checkpoint-method uniform
"
else
echo
"Invalid configuration"
exit
1
...
...
examples/sc21/run_figure_13.sh
View file @
6985e589
...
...
@@ -24,7 +24,7 @@ NLS=32
HS
=
20480
NAH
=
128
DDP
=
local
MEGATRON_EXTRA_PARAMS
=
"--
checkpoint-
activations "
MEGATRON_EXTRA_PARAMS
=
"--activations
-checkpoint-method uniform
"
NNODES
=
8
...
...
examples/sc21/run_figure_14.sh
View file @
6985e589
...
...
@@ -25,7 +25,7 @@ NLS=32
HS
=
3840
NAH
=
32
DDP
=
local
MEGATRON_EXTRA_PARAMS
=
"--
checkpoint-
activations "
MEGATRON_EXTRA_PARAMS
=
"--activations
-checkpoint-method uniform
"
NNODES
=
8
...
...
examples/sc21/run_figure_15.sh
View file @
6985e589
...
...
@@ -25,7 +25,7 @@ NLS=32
HS
=
3840
NAH
=
32
DDP
=
local
MEGATRON_EXTRA_PARAMS
=
"--
checkpoint-
activations "
MEGATRON_EXTRA_PARAMS
=
"--activations
-checkpoint-method uniform
"
NNODES
=
8
...
...
examples/sc21/run_figure_16.sh
View file @
6985e589
...
...
@@ -21,7 +21,7 @@ NLS=32
HS
=
15360
NAH
=
128
DDP
=
local
MEGATRON_EXTRA_PARAMS
=
"--
checkpoint-
activations "
MEGATRON_EXTRA_PARAMS
=
"--activations
-checkpoint-method uniform
"
NNODES
=
8
...
...
examples/sc21/run_figure_17.sh
View file @
6985e589
...
...
@@ -16,7 +16,7 @@ GBS=1
# Set activation recomputation.
if
[
${
ACTIVATION_RECOMPUTATION
}
==
"YES"
]
;
then
MEGATRON_EXTRA_PARAMS
=
"--
checkpoint-
activations "
MEGATRON_EXTRA_PARAMS
=
"--activations
-checkpoint-method uniform
"
elif
[
${
ACTIVATION_RECOMPUTATION
}
==
"NO"
]
;
then
MEGATRON_EXTRA_PARAMS
=
""
else
...
...
examples/sc21/run_figure_18.sh
View file @
6985e589
...
...
@@ -16,9 +16,9 @@ GBS=12
# Set scatter-gather communication optimization options.
if
[
${
SCATTER_GATHER
}
==
"YES"
]
;
then
MEGATRON_EXTRA_PARAMS
=
"--
checkpoint-
activations --num-layers-per-virtual-pipeline-stage 2 "
MEGATRON_EXTRA_PARAMS
=
"--activations
-checkpoint-method uniform
--num-layers-per-virtual-pipeline-stage 2 "
elif
[
${
SCATTER_GATHER
}
==
"NO"
]
;
then
MEGATRON_EXTRA_PARAMS
=
"--
checkpoint-
activations --num-layers-per-virtual-pipeline-stage 2 --no-scatter-gather-tensors-in-pipeline "
MEGATRON_EXTRA_PARAMS
=
"--activations
-checkpoint-method uniform
--num-layers-per-virtual-pipeline-stage 2 --no-scatter-gather-tensors-in-pipeline "
else
echo
"Invalid configuration"
exit
1
...
...
examples/sc21/run_table_1.sh
View file @
6985e589
...
...
@@ -21,7 +21,7 @@ if [ ${MODEL_SIZE} == "1.7B" ]; then
NAH
=
24
DDP
=
torch
NNODES
=
4
MEGATRON_EXTRA_PARAMS
=
"--
checkpoint-
activations "
MEGATRON_EXTRA_PARAMS
=
"--activations
-checkpoint-method uniform
"
elif
[
${
MODEL_SIZE
}
==
"3.6B"
]
;
then
TP
=
2
PP
=
1
...
...
@@ -32,7 +32,7 @@ elif [ ${MODEL_SIZE} == "3.6B" ]; then
NAH
=
32
DDP
=
torch
NNODES
=
8
MEGATRON_EXTRA_PARAMS
=
"--
checkpoint-
activations "
MEGATRON_EXTRA_PARAMS
=
"--activations
-checkpoint-method uniform
"
elif
[
${
MODEL_SIZE
}
==
"7.5B"
]
;
then
TP
=
4
PP
=
1
...
...
@@ -43,7 +43,7 @@ elif [ ${MODEL_SIZE} == "7.5B" ]; then
NAH
=
32
DDP
=
torch
NNODES
=
16
MEGATRON_EXTRA_PARAMS
=
"--
checkpoint-
activations "
MEGATRON_EXTRA_PARAMS
=
"--activations
-checkpoint-method uniform
"
elif
[
${
MODEL_SIZE
}
==
"18B"
]
;
then
TP
=
8
PP
=
1
...
...
@@ -54,7 +54,7 @@ elif [ ${MODEL_SIZE} == "18B" ]; then
NAH
=
48
DDP
=
torch
NNODES
=
32
MEGATRON_EXTRA_PARAMS
=
"--
checkpoint-
activations "
MEGATRON_EXTRA_PARAMS
=
"--activations
-checkpoint-method uniform
"
elif
[
${
MODEL_SIZE
}
==
"39B"
]
;
then
TP
=
8
PP
=
2
...
...
@@ -65,7 +65,7 @@ elif [ ${MODEL_SIZE} == "39B" ]; then
NAH
=
64
DDP
=
local
NNODES
=
64
MEGATRON_EXTRA_PARAMS
=
"--
checkpoint-
activations "
MEGATRON_EXTRA_PARAMS
=
"--activations
-checkpoint-method uniform
"
elif
[
${
MODEL_SIZE
}
==
"76B"
]
;
then
TP
=
8
PP
=
4
...
...
@@ -76,7 +76,7 @@ elif [ ${MODEL_SIZE} == "76B" ]; then
NAH
=
80
DDP
=
local
NNODES
=
128
MEGATRON_EXTRA_PARAMS
=
"--
checkpoint-
activations --num-layers-per-virtual-pipeline-stage 5"
MEGATRON_EXTRA_PARAMS
=
"--activations
-checkpoint-method uniform
--num-layers-per-virtual-pipeline-stage 5"
elif
[
${
MODEL_SIZE
}
==
"145B"
]
;
then
TP
=
8
PP
=
8
...
...
@@ -87,7 +87,7 @@ elif [ ${MODEL_SIZE} == "145B" ]; then
NAH
=
96
DDP
=
local
NNODES
=
192
MEGATRON_EXTRA_PARAMS
=
"--
checkpoint-
activations --num-layers-per-virtual-pipeline-stage 5 "
MEGATRON_EXTRA_PARAMS
=
"--activations
-checkpoint-method uniform
--num-layers-per-virtual-pipeline-stage 5 "
elif
[
${
MODEL_SIZE
}
==
"310B"
]
;
then
TP
=
8
PP
=
16
...
...
@@ -98,7 +98,7 @@ elif [ ${MODEL_SIZE} == "310B" ]; then
NAH
=
128
DDP
=
local
NNODES
=
240
MEGATRON_EXTRA_PARAMS
=
"--
checkpoint-
activations --num-layers-per-virtual-pipeline-stage 3 "
MEGATRON_EXTRA_PARAMS
=
"--activations
-checkpoint-method uniform
--num-layers-per-virtual-pipeline-stage 3 "
elif
[
${
MODEL_SIZE
}
==
"530B"
]
;
then
TP
=
8
PP
=
35
...
...
@@ -109,7 +109,7 @@ elif [ ${MODEL_SIZE} == "530B" ]; then
NAH
=
128
DDP
=
local
NNODES
=
315
MEGATRON_EXTRA_PARAMS
=
"--
checkpoint-
activations --num-layers-per-virtual-pipeline-stage 1 "
MEGATRON_EXTRA_PARAMS
=
"--activations
-checkpoint-method uniform
--num-layers-per-virtual-pipeline-stage 1 "
elif
[
${
MODEL_SIZE
}
==
"1T"
]
;
then
TP
=
8
PP
=
64
...
...
@@ -120,7 +120,7 @@ elif [ ${MODEL_SIZE} == "1T" ]; then
NAH
=
160
DDP
=
local
NNODES
=
384
MEGATRON_EXTRA_PARAMS
=
"--
checkpoint-
activations "
MEGATRON_EXTRA_PARAMS
=
"--activations
-checkpoint-method uniform
"
else
echo
"Invalid configuration"
exit
1
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment