更新llama预训练脚本

bc09f8ef · wxj · 5bd8b291 · bc09f8ef
Commit bc09f8ef authored Dec 09, 2024 by wxj
Hide whitespace changes
Inline Side-by-side

Showing with 25 additions and 26 deletions

Llama_pretraining.sh Llama_pretraining.sh +25 -26

No files found.
--- a/Llama_pretraining.sh
+++ b/Llama_pretraining.sh
@@ -25,7 +25,7 @@ source /opt/dtk/env.sh
 CHECKPOINT_PATH=./tmp_7b #$1 #<Specify path>
 TENSORBOARD_LOGS_PATH=./tmp_7b  #$2 #<Specify path>
-DATA_PATH="/data/datasets/nemo_pretrain/oscar-1GB/oscar-1GB-llama_text_document" #<Specify path and file prefix>_text_document
+DATA_PATH="/datasets/oscar-1GB-llama_text_document" #<Specify path and file prefix>_text_document
 GPT_MODEL_ARGS=(
    --num-layers 6
@@ -40,11 +40,10 @@ GPT_MODEL_ARGS=(
 # export NVTE_FLASH_ATTN_TRITON=1 # 走triton_fa
 # --transformer-impl transformer_engine
    # --use-mcore-models
-    # --transformer-impl local
-    # --use-legacy-models 
 TRAINING_ARGS=(
-    --transformer-impl transformer_engine
+    --transformer-impl local
-    --use-mcore-models
+    --use-legacy-models 
    --micro-batch-size 1 
    --global-batch-size 60 #240 #512 #64
    --train-iters 100
@@ -86,7 +85,7 @@ DATA_ARGS=(
    --normalization RMSNorm 
    --no-position-embedding 
    --tokenizer-type Llama2Tokenizer
-    --tokenizer-model /data/model_weights/llama2_7b_hf/tokenizer.model
+    --tokenizer-model /path/to/llama2_7b_hf/tokenizer.model
 )
 EVAL_AND_LOGGING_ARGS=(
@@ -143,24 +142,24 @@ case ${LOCAL_RANK} in
 #   ${APP}
  numactl --cpunodebind=0 --membind=0 ${APP}
  ;;
-# [4])
+[4])
-#   export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+  export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
-# #   ${APP}
+#   ${APP}
-#   numactl --cpunodebind=0 --membind=0 ${APP}
+  numactl --cpunodebind=0 --membind=0 ${APP}
-#   ;;
+  ;;
-# [5])
+[5])
-#   export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+  export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
-# #   ${APP}
+#   ${APP}
-#   numactl --cpunodebind=0 --membind=0 ${APP}
+  numactl --cpunodebind=0 --membind=0 ${APP}
-#   ;;
+  ;;
-# [6])
+[6])
-#   export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+  export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
-# #   ${APP}
+#   ${APP}
-#   numactl --cpunodebind=0 --membind=0 ${APP}
+  numactl --cpunodebind=0 --membind=0 ${APP}
-#   ;;
+  ;;
-# [7])
+[7])
-#   export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+  export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
-# #   ${APP}
+#   ${APP}
-#   numactl --cpunodebind=0 --membind=0 ${APP}
+  numactl --cpunodebind=0 --membind=0 ${APP}
-#   ;;
+  ;;
 esac