Commit 6985e589 authored by slym's avatar slym
Browse files

more updates on examples

parent c1e0689d
...@@ -25,7 +25,7 @@ MBS=1 ...@@ -25,7 +25,7 @@ MBS=1
HS=20480 HS=20480
NAH=128 NAH=128
DDP=local DDP=local
MEGATRON_EXTRA_PARAMS="--checkpoint-activations " MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform "
# Name of the job. # Name of the job.
......
...@@ -16,9 +16,9 @@ GBS=12 ...@@ -16,9 +16,9 @@ GBS=12
# Set interleaved schedule options. # Set interleaved schedule options.
if [ ${INTERLEAVED} == "YES" ]; then if [ ${INTERLEAVED} == "YES" ]; then
MEGATRON_EXTRA_PARAMS="--checkpoint-activations --num-layers-per-virtual-pipeline-stage 2 " MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform --num-layers-per-virtual-pipeline-stage 2 "
elif [ ${INTERLEAVED} == "NO" ]; then elif [ ${INTERLEAVED} == "NO" ]; then
MEGATRON_EXTRA_PARAMS="--checkpoint-activations " MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform "
else else
echo "Invalid configuration" echo "Invalid configuration"
exit 1 exit 1
......
...@@ -24,7 +24,7 @@ NLS=32 ...@@ -24,7 +24,7 @@ NLS=32
HS=20480 HS=20480
NAH=128 NAH=128
DDP=local DDP=local
MEGATRON_EXTRA_PARAMS="--checkpoint-activations " MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform "
NNODES=8 NNODES=8
......
...@@ -25,7 +25,7 @@ NLS=32 ...@@ -25,7 +25,7 @@ NLS=32
HS=3840 HS=3840
NAH=32 NAH=32
DDP=local DDP=local
MEGATRON_EXTRA_PARAMS="--checkpoint-activations " MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform "
NNODES=8 NNODES=8
......
...@@ -25,7 +25,7 @@ NLS=32 ...@@ -25,7 +25,7 @@ NLS=32
HS=3840 HS=3840
NAH=32 NAH=32
DDP=local DDP=local
MEGATRON_EXTRA_PARAMS="--checkpoint-activations " MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform "
NNODES=8 NNODES=8
......
...@@ -21,7 +21,7 @@ NLS=32 ...@@ -21,7 +21,7 @@ NLS=32
HS=15360 HS=15360
NAH=128 NAH=128
DDP=local DDP=local
MEGATRON_EXTRA_PARAMS="--checkpoint-activations " MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform "
NNODES=8 NNODES=8
......
...@@ -16,7 +16,7 @@ GBS=1 ...@@ -16,7 +16,7 @@ GBS=1
# Set activation recomputation. # Set activation recomputation.
if [ ${ACTIVATION_RECOMPUTATION} == "YES" ]; then if [ ${ACTIVATION_RECOMPUTATION} == "YES" ]; then
MEGATRON_EXTRA_PARAMS="--checkpoint-activations " MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform "
elif [ ${ACTIVATION_RECOMPUTATION} == "NO" ]; then elif [ ${ACTIVATION_RECOMPUTATION} == "NO" ]; then
MEGATRON_EXTRA_PARAMS="" MEGATRON_EXTRA_PARAMS=""
else else
......
...@@ -16,9 +16,9 @@ GBS=12 ...@@ -16,9 +16,9 @@ GBS=12
# Set scatter-gather communication optimization options. # Set scatter-gather communication optimization options.
if [ ${SCATTER_GATHER} == "YES" ]; then if [ ${SCATTER_GATHER} == "YES" ]; then
MEGATRON_EXTRA_PARAMS="--checkpoint-activations --num-layers-per-virtual-pipeline-stage 2 " MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform --num-layers-per-virtual-pipeline-stage 2 "
elif [ ${SCATTER_GATHER} == "NO" ]; then elif [ ${SCATTER_GATHER} == "NO" ]; then
MEGATRON_EXTRA_PARAMS="--checkpoint-activations --num-layers-per-virtual-pipeline-stage 2 --no-scatter-gather-tensors-in-pipeline " MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform --num-layers-per-virtual-pipeline-stage 2 --no-scatter-gather-tensors-in-pipeline "
else else
echo "Invalid configuration" echo "Invalid configuration"
exit 1 exit 1
......
...@@ -21,7 +21,7 @@ if [ ${MODEL_SIZE} == "1.7B" ]; then ...@@ -21,7 +21,7 @@ if [ ${MODEL_SIZE} == "1.7B" ]; then
NAH=24 NAH=24
DDP=torch DDP=torch
NNODES=4 NNODES=4
MEGATRON_EXTRA_PARAMS="--checkpoint-activations " MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform "
elif [ ${MODEL_SIZE} == "3.6B" ]; then elif [ ${MODEL_SIZE} == "3.6B" ]; then
TP=2 TP=2
PP=1 PP=1
...@@ -32,7 +32,7 @@ elif [ ${MODEL_SIZE} == "3.6B" ]; then ...@@ -32,7 +32,7 @@ elif [ ${MODEL_SIZE} == "3.6B" ]; then
NAH=32 NAH=32
DDP=torch DDP=torch
NNODES=8 NNODES=8
MEGATRON_EXTRA_PARAMS="--checkpoint-activations " MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform "
elif [ ${MODEL_SIZE} == "7.5B" ]; then elif [ ${MODEL_SIZE} == "7.5B" ]; then
TP=4 TP=4
PP=1 PP=1
...@@ -43,7 +43,7 @@ elif [ ${MODEL_SIZE} == "7.5B" ]; then ...@@ -43,7 +43,7 @@ elif [ ${MODEL_SIZE} == "7.5B" ]; then
NAH=32 NAH=32
DDP=torch DDP=torch
NNODES=16 NNODES=16
MEGATRON_EXTRA_PARAMS="--checkpoint-activations " MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform "
elif [ ${MODEL_SIZE} == "18B" ]; then elif [ ${MODEL_SIZE} == "18B" ]; then
TP=8 TP=8
PP=1 PP=1
...@@ -54,7 +54,7 @@ elif [ ${MODEL_SIZE} == "18B" ]; then ...@@ -54,7 +54,7 @@ elif [ ${MODEL_SIZE} == "18B" ]; then
NAH=48 NAH=48
DDP=torch DDP=torch
NNODES=32 NNODES=32
MEGATRON_EXTRA_PARAMS="--checkpoint-activations " MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform "
elif [ ${MODEL_SIZE} == "39B" ]; then elif [ ${MODEL_SIZE} == "39B" ]; then
TP=8 TP=8
PP=2 PP=2
...@@ -65,7 +65,7 @@ elif [ ${MODEL_SIZE} == "39B" ]; then ...@@ -65,7 +65,7 @@ elif [ ${MODEL_SIZE} == "39B" ]; then
NAH=64 NAH=64
DDP=local DDP=local
NNODES=64 NNODES=64
MEGATRON_EXTRA_PARAMS="--checkpoint-activations " MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform "
elif [ ${MODEL_SIZE} == "76B" ]; then elif [ ${MODEL_SIZE} == "76B" ]; then
TP=8 TP=8
PP=4 PP=4
...@@ -76,7 +76,7 @@ elif [ ${MODEL_SIZE} == "76B" ]; then ...@@ -76,7 +76,7 @@ elif [ ${MODEL_SIZE} == "76B" ]; then
NAH=80 NAH=80
DDP=local DDP=local
NNODES=128 NNODES=128
MEGATRON_EXTRA_PARAMS="--checkpoint-activations --num-layers-per-virtual-pipeline-stage 5" MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform --num-layers-per-virtual-pipeline-stage 5"
elif [ ${MODEL_SIZE} == "145B" ]; then elif [ ${MODEL_SIZE} == "145B" ]; then
TP=8 TP=8
PP=8 PP=8
...@@ -87,7 +87,7 @@ elif [ ${MODEL_SIZE} == "145B" ]; then ...@@ -87,7 +87,7 @@ elif [ ${MODEL_SIZE} == "145B" ]; then
NAH=96 NAH=96
DDP=local DDP=local
NNODES=192 NNODES=192
MEGATRON_EXTRA_PARAMS="--checkpoint-activations --num-layers-per-virtual-pipeline-stage 5 " MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform --num-layers-per-virtual-pipeline-stage 5 "
elif [ ${MODEL_SIZE} == "310B" ]; then elif [ ${MODEL_SIZE} == "310B" ]; then
TP=8 TP=8
PP=16 PP=16
...@@ -98,7 +98,7 @@ elif [ ${MODEL_SIZE} == "310B" ]; then ...@@ -98,7 +98,7 @@ elif [ ${MODEL_SIZE} == "310B" ]; then
NAH=128 NAH=128
DDP=local DDP=local
NNODES=240 NNODES=240
MEGATRON_EXTRA_PARAMS="--checkpoint-activations --num-layers-per-virtual-pipeline-stage 3 " MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform --num-layers-per-virtual-pipeline-stage 3 "
elif [ ${MODEL_SIZE} == "530B" ]; then elif [ ${MODEL_SIZE} == "530B" ]; then
TP=8 TP=8
PP=35 PP=35
...@@ -109,7 +109,7 @@ elif [ ${MODEL_SIZE} == "530B" ]; then ...@@ -109,7 +109,7 @@ elif [ ${MODEL_SIZE} == "530B" ]; then
NAH=128 NAH=128
DDP=local DDP=local
NNODES=315 NNODES=315
MEGATRON_EXTRA_PARAMS="--checkpoint-activations --num-layers-per-virtual-pipeline-stage 1 " MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform --num-layers-per-virtual-pipeline-stage 1 "
elif [ ${MODEL_SIZE} == "1T" ]; then elif [ ${MODEL_SIZE} == "1T" ]; then
TP=8 TP=8
PP=64 PP=64
...@@ -120,7 +120,7 @@ elif [ ${MODEL_SIZE} == "1T" ]; then ...@@ -120,7 +120,7 @@ elif [ ${MODEL_SIZE} == "1T" ]; then
NAH=160 NAH=160
DDP=local DDP=local
NNODES=384 NNODES=384
MEGATRON_EXTRA_PARAMS="--checkpoint-activations " MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform "
else else
echo "Invalid configuration" echo "Invalid configuration"
exit 1 exit 1
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment