v1.0版本

56325238 · hepj987 · 5b30acdf · 56325238 · 56325238
Commit 56325238 authored Jun 16, 2023 by hepj987
Hide whitespace changes
Inline Side-by-side

Showing with 13 additions and 6 deletions

README.md README.md +9 -1

single-16B.sh single-16B.sh +4 -5

No files found.
--- a/README.md
+++ b/README.md
@@ -126,7 +126,7 @@ SAVE_INTERVAL				保存频率

 |   卡数    | 性能（samples per second） | 收敛性lm loss value | 收敛性lm loss PPL |
 | :-------: | :------------------------: | :-----------------: | :---------------: |
-| 16 x 4DCU |           2.540            |    6.601086E+00     |   7.358937E+02    |
+| 32 x 4DCU |           2.449            |    4.299443E+00     |   7.365877E+01    |



@@ -204,3 +204,11 @@ sh run-inf.sh（这里以单节点小模型为例）
 ![image-20230524143710566](image-gpt-loss.png)

 ![image-20230524143830580](image-gpt-loss2.png)
+
+## 源码仓库及问题反馈
+
+https://developer.hpccube.com/codes/modelzoo/gpt2-pytorch/
+
+## 参考
+
+https://github.com/bigscience-workshop/Megatron-DeepSpeed
--- a/single-16B.sh
+++ b/single-16B.sh
@@ -10,7 +10,7 @@ RANK=$OMPI_COMM_WORLD_RANK
 WORLD_SIZE=$OMPI_COMM_WORLD_SIZE


-MODEL_NAME=gpt2-oscar_16B-4tp
+MODEL_NAME=gpt2-oscar_16B-8tp
 DATA_OUTPUT_PATH=./
 LOGS_PATH=$DATA_OUTPUT_PATH/logs
 CHECKPOINT_PATH=output-module/$MODEL_NAME
@@ -20,7 +20,7 @@ TENSORBOARD_PATH=output_dir/tensorboard/$MODEL_NAME
 CODECARBON_PATH=output_dir/codecarbon/$MODEL_NAME

 TP_SIZE=4   # always fixed to the size of a single node
-PP_SIZE=4   # NLAYERS must be a multiple of PP_SIZE here
+PP_SIZE=8   # NLAYERS must be a multiple of PP_SIZE here


 MICRO_BATCH_SIZE=1
@@ -53,12 +53,11 @@ GPT_ARGS=" \
    --max-position-embeddings $SEQ_LEN \
    --micro-batch-size $MICRO_BATCH_SIZE \
    --global-batch-size $GLOBAL_BATCH_SIZE \
-    --train-samples  3782590 \
+    --train_iters 7000 \
    --loss-scale 12 \
    --vocab-file gpt2-vocab.json \
    --merge-file gpt2-merges.txt \
    --clip-grad 1.0 \
-    --fp16 \
    --checkpoint-activations \
    --seed 42
    $OPTIMIZER_ARGS \
@@ -93,7 +92,7 @@ cat <<EOT > $config_json
    "stage": $ZERO_STAGE
  },
  "fp16": {
-    "enabled": true,
+    "enabled": false,
    "loss_scale": 0,
    "loss_scale_window": 500,
    "hysteresis": 2,