Commit 6985e589 authored by slym's avatar slym
Browse files

more updates on examples

parent c1e0689d
......@@ -25,7 +25,7 @@ MBS=1
HS=20480
NAH=128
DDP=local
MEGATRON_EXTRA_PARAMS="--checkpoint-activations "
MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform "
# Name of the job.
......
......@@ -16,9 +16,9 @@ GBS=12
# Set interleaved schedule options.
if [ ${INTERLEAVED} == "YES" ]; then
MEGATRON_EXTRA_PARAMS="--checkpoint-activations --num-layers-per-virtual-pipeline-stage 2 "
MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform --num-layers-per-virtual-pipeline-stage 2 "
elif [ ${INTERLEAVED} == "NO" ]; then
MEGATRON_EXTRA_PARAMS="--checkpoint-activations "
MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform "
else
echo "Invalid configuration"
exit 1
......
......@@ -24,7 +24,7 @@ NLS=32
HS=20480
NAH=128
DDP=local
MEGATRON_EXTRA_PARAMS="--checkpoint-activations "
MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform "
NNODES=8
......
......@@ -25,7 +25,7 @@ NLS=32
HS=3840
NAH=32
DDP=local
MEGATRON_EXTRA_PARAMS="--checkpoint-activations "
MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform "
NNODES=8
......
......@@ -25,7 +25,7 @@ NLS=32
HS=3840
NAH=32
DDP=local
MEGATRON_EXTRA_PARAMS="--checkpoint-activations "
MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform "
NNODES=8
......
......@@ -21,7 +21,7 @@ NLS=32
HS=15360
NAH=128
DDP=local
MEGATRON_EXTRA_PARAMS="--checkpoint-activations "
MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform "
NNODES=8
......
......@@ -16,7 +16,7 @@ GBS=1
# Set activation recomputation.
if [ ${ACTIVATION_RECOMPUTATION} == "YES" ]; then
MEGATRON_EXTRA_PARAMS="--checkpoint-activations "
MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform "
elif [ ${ACTIVATION_RECOMPUTATION} == "NO" ]; then
MEGATRON_EXTRA_PARAMS=""
else
......
......@@ -16,9 +16,9 @@ GBS=12
# Set scatter-gather communication optimization options.
if [ ${SCATTER_GATHER} == "YES" ]; then
MEGATRON_EXTRA_PARAMS="--checkpoint-activations --num-layers-per-virtual-pipeline-stage 2 "
MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform --num-layers-per-virtual-pipeline-stage 2 "
elif [ ${SCATTER_GATHER} == "NO" ]; then
MEGATRON_EXTRA_PARAMS="--checkpoint-activations --num-layers-per-virtual-pipeline-stage 2 --no-scatter-gather-tensors-in-pipeline "
MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform --num-layers-per-virtual-pipeline-stage 2 --no-scatter-gather-tensors-in-pipeline "
else
echo "Invalid configuration"
exit 1
......
......@@ -21,7 +21,7 @@ if [ ${MODEL_SIZE} == "1.7B" ]; then
NAH=24
DDP=torch
NNODES=4
MEGATRON_EXTRA_PARAMS="--checkpoint-activations "
MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform "
elif [ ${MODEL_SIZE} == "3.6B" ]; then
TP=2
PP=1
......@@ -32,7 +32,7 @@ elif [ ${MODEL_SIZE} == "3.6B" ]; then
NAH=32
DDP=torch
NNODES=8
MEGATRON_EXTRA_PARAMS="--checkpoint-activations "
MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform "
elif [ ${MODEL_SIZE} == "7.5B" ]; then
TP=4
PP=1
......@@ -43,7 +43,7 @@ elif [ ${MODEL_SIZE} == "7.5B" ]; then
NAH=32
DDP=torch
NNODES=16
MEGATRON_EXTRA_PARAMS="--checkpoint-activations "
MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform "
elif [ ${MODEL_SIZE} == "18B" ]; then
TP=8
PP=1
......@@ -54,7 +54,7 @@ elif [ ${MODEL_SIZE} == "18B" ]; then
NAH=48
DDP=torch
NNODES=32
MEGATRON_EXTRA_PARAMS="--checkpoint-activations "
MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform "
elif [ ${MODEL_SIZE} == "39B" ]; then
TP=8
PP=2
......@@ -65,7 +65,7 @@ elif [ ${MODEL_SIZE} == "39B" ]; then
NAH=64
DDP=local
NNODES=64
MEGATRON_EXTRA_PARAMS="--checkpoint-activations "
MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform "
elif [ ${MODEL_SIZE} == "76B" ]; then
TP=8
PP=4
......@@ -76,7 +76,7 @@ elif [ ${MODEL_SIZE} == "76B" ]; then
NAH=80
DDP=local
NNODES=128
MEGATRON_EXTRA_PARAMS="--checkpoint-activations --num-layers-per-virtual-pipeline-stage 5"
MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform --num-layers-per-virtual-pipeline-stage 5"
elif [ ${MODEL_SIZE} == "145B" ]; then
TP=8
PP=8
......@@ -87,7 +87,7 @@ elif [ ${MODEL_SIZE} == "145B" ]; then
NAH=96
DDP=local
NNODES=192
MEGATRON_EXTRA_PARAMS="--checkpoint-activations --num-layers-per-virtual-pipeline-stage 5 "
MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform --num-layers-per-virtual-pipeline-stage 5 "
elif [ ${MODEL_SIZE} == "310B" ]; then
TP=8
PP=16
......@@ -98,7 +98,7 @@ elif [ ${MODEL_SIZE} == "310B" ]; then
NAH=128
DDP=local
NNODES=240
MEGATRON_EXTRA_PARAMS="--checkpoint-activations --num-layers-per-virtual-pipeline-stage 3 "
MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform --num-layers-per-virtual-pipeline-stage 3 "
elif [ ${MODEL_SIZE} == "530B" ]; then
TP=8
PP=35
......@@ -109,7 +109,7 @@ elif [ ${MODEL_SIZE} == "530B" ]; then
NAH=128
DDP=local
NNODES=315
MEGATRON_EXTRA_PARAMS="--checkpoint-activations --num-layers-per-virtual-pipeline-stage 1 "
MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform --num-layers-per-virtual-pipeline-stage 1 "
elif [ ${MODEL_SIZE} == "1T" ]; then
TP=8
PP=64
......@@ -120,7 +120,7 @@ elif [ ${MODEL_SIZE} == "1T" ]; then
NAH=160
DDP=local
NNODES=384
MEGATRON_EXTRA_PARAMS="--checkpoint-activations "
MEGATRON_EXTRA_PARAMS="--activations-checkpoint-method uniform "
else
echo "Invalid configuration"
exit 1
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment