update

77361dae · liangjing · c9289b90 · 77361dae · 77361dae · 77361dae
Commit 77361dae authored Nov 08, 2024 by liangjing
Hide whitespace changes
Inline Side-by-side

Showing with 8 additions and 6 deletions

scripts/llama2_13b.sh scripts/llama2_13b.sh +3 -2

scripts/llama2_70b.sh scripts/llama2_70b.sh +3 -2

scripts/llama2_7b.sh scripts/llama2_7b.sh +2 -2

No files found.
--- a/scripts/llama2_13b.sh
+++ b/scripts/llama2_13b.sh
@@ -32,8 +32,9 @@ GPT_MODEL_ARGS=(
 )
 TRAINING_ARGS=(
-    --transformer-impl local
+    --log-throughput
-    --use-legacy-models 
+    --transformer-impl transformer_engine
+    --use-mcore-models
    --micro-batch-size 1 
    --global-batch-size 256
    --train-iters 100 

--- a/scripts/llama2_70b.sh
+++ b/scripts/llama2_70b.sh
@@ -36,8 +36,9 @@ GPT_MODEL_ARGS=(
 )
 TRAINING_ARGS=(
-    --transformer-impl local
+    --log-throughput
-    --use-legacy-models
+    --transformer-impl transformer_engine
+    --use-mcore-models
    --micro-batch-size 1
    --global-batch-size 512
    --train-iters 100

--- a/scripts/llama2_7b.sh
+++ b/scripts/llama2_7b.sh
@@ -32,8 +32,8 @@ GPT_MODEL_ARGS=(
 TRAINING_ARGS=(
    --log-throughput
-    --transformer-impl local
+    --transformer-impl transformer_engine
-    --use-legacy-models 
+    --use-mcore-models
    --micro-batch-size 1 
    --global-batch-size 240 
    --train-iters 100