Merge branch 't5_scripts' into 'main'

Update T5 scripts See merge request ADLR/megatron-lm!279

Merge branch 't5_scripts' into 'main'
Update T5 scripts See merge request ADLR/megatron-lm!279
2be1e510 · Jared Casper · 598d7ee2 · 3dadd16d · 2be1e510 · 2be1e510
Commit 2be1e510 authored Jun 10, 2021 by Jared Casper
3 changed files
--- a/examples/pretrain_t5.sh
+++ b/examples/pretrain_t5.sh
@@ -15,7 +15,7 @@ python pretrain_t5.py \
       --encoder-seq-length 512 \
       --decoder-seq-length 128 \
       --micro-batch-size 16 \
-       --global-batch-size 2048 \
+       --global-batch-size 16 \
       --max-position-embeddings 512 \
       --train-iters 1000000 \
       --lr-decay-iters 1000000 \
@@ -35,4 +35,5 @@ python pretrain_t5.py \
       --save-interval 10000 \
       --eval-interval 1000 \
       --eval-iters 10 \
-       --fp16
+       --fp16 \
+       --vocab-extra-ids 100
--- a/examples/pretrain_t5_distributed.sh
+++ b/examples/pretrain_t5_distributed.sh
@@ -24,7 +24,7 @@ python -m torch.distributed.launch $DISTRIBUTED_ARGS \
       --encoder-seq-length 512 \
       --decoder-seq-length 128 \
       --micro-batch-size 16 \
-       --global-batch-size 2048 \
+       --global-batch-size 128 \
       --max-position-embeddings 512 \
       --train-iters 1000000 \
       --lr-decay-iters 1000000 \
@@ -44,4 +44,5 @@ python -m torch.distributed.launch $DISTRIBUTED_ARGS \
       --save-interval 10000 \
       --eval-interval 1000 \
       --eval-iters 10 \
-       --fp16
+       --fp16 \
+       --vocab-extra-ids 100
--- a/examples/pretrain_t5_distributed_with_mp.sh
+++ b/examples/pretrain_t5_distributed_with_mp.sh
@@ -24,7 +24,7 @@ python -m torch.distributed.launch $DISTRIBUTED_ARGS \
       --encoder-seq-length 512 \
       --decoder-seq-length 128 \
       --micro-batch-size 16 \
-       --global-batch-size 2048 \
+       --global-batch-size 128 \
       --seq-length 512 \
       --max-position-embeddings 512 \
       --train-iters 1000000 \
@@ -45,4 +45,5 @@ python -m torch.distributed.launch $DISTRIBUTED_ARGS \
       --save-interval 10000 \
       --eval-interval 1000 \
       --eval-iters 10 \
-       --fp16
+       --fp16  \
+       --vocab-extra-ids 100