[hotfix/hybridengine] Fix init model with random parameters in benchmark (#5074)

* fix init model with random parameters * fix example

[hotfix/hybridengine] Fix init model with random parameters in benchmark (#5074)
* fix init model with random parameters * fix example
4e3959d3 · Bin Jia · GitHub · 8921a73c · 4e3959d3 · 4e3959d3
Unverified Commit 4e3959d3 authored Nov 20, 2023 by Bin Jia Committed by GitHub Nov 20, 2023
3 changed files
--- a/colossalai/shardformer/modeling/chatglm2_6b/__init__.py
+++ b/colossalai/shardformer/modeling/chatglm2_6b/__init__.py
--- a/examples/inference/benchmark.py
+++ b/examples/inference/benchmark.py
@@ -95,11 +95,27 @@ def print_details_info(timestamps, model_config, args, whole_end2end):
 def benchmark_inference(args):
    if args.model == "toy":
-        model = transformers.LlamaForCausalLM(transformers.LlamaConfig(num_hidden_layers=8))
+        model = transformers.LlamaForCausalLM(transformers.LlamaConfig(num_hidden_layers=4))
    elif args.model == "7b":
-        model = transformers.LlamaForCausalLM(transformers.AutoConfig.from_pretrained("decapoda-research/llama-7b-hf"))
+        model = transformers.LlamaForCausalLM(
+            transformers.LlamaConfig(
+                hidden_size=4096,
+                intermediate_size=11008,
+                num_attention_heads=32,
+                num_hidden_layers=32,
+                num_key_value_heads=32,
+            )
+        )
    elif args.model == "13b":
-        model = transformers.LlamaForCausalLM(transformers.AutoConfig.from_pretrained("decapoda-research/llama-13b-hf"))
+        model = transformers.LlamaForCausalLM(
+            transformers.LlamaConfig(
+                hidden_size=5120,
+                intermediate_size=13824,
+                num_attention_heads=40,
+                num_hidden_layers=40,
+                num_key_value_heads=40,
+            )
+        )
    else:
        raise NotImplementedError

--- a/examples/inference/run_benchmark.sh
+++ b/examples/inference/run_benchmark.sh
 script_dir=$(cd "$(dirname "$0")" && pwd)
 cd "${script_dir}"
+# toy model, 2tp*2pp 1024, 128
+python ./benchmark.py \
+    --model="toy" \
+    --dtype="fp16" \
+    --batch_size=2 \
+    --seq_len=1024 \
+    --output_len=128 \
+    --mb_size=1 \
+    --pp_size=2 \
+    --tp_size=2
 # 7b, fp16, 2 gpu, 1024, 128
 for BATCH_SIZE in 2 4 8 16; do
@@ -9,7 +19,7 @@ for BATCH_SIZE in 2 4 8 16; do
        --dtype="fp16" \
        --batch_size=${BATCH_SIZE} \
        --seq_len=1024 \
-        --new_length=128 \
+        --output_len=128 \
        --mb_size=$((${BATCH_SIZE}/2)) \
        --pp_size=2 \
        --tp_size=2
@@ -22,7 +32,7 @@ for BATCH_SIZE in 2 4 8 16 32; do
        --dtype="fp16" \
        --batch_size=${BATCH_SIZE} \
        --seq_len=512 \
-        --new_length=512 \
+        --output_len=512 \
        --mb_size=$((${BATCH_SIZE}/2)) \
        --pp_size=2 \
        --tp_size=2
@@ -35,7 +45,7 @@ for BATCH_SIZE in 2 4 8; do
        --dtype="fp16" \
        --batch_size=${BATCH_SIZE} \
        --seq_len=1024 \
-        --new_length=128 \
+        --output_len=128 \
        --mb_size=$((${BATCH_SIZE}/2)) \
        --pp_size=2 \
        --tp_size=2
@@ -48,7 +58,7 @@ for BATCH_SIZE in 2 4 8 16; do
        --dtype="fp16" \
        --batch_size=${BATCH_SIZE} \
        --seq_len=512 \
-        --new_length=512 \
+        --output_len=512 \
        --mb_size=$((${BATCH_SIZE}/2)) \
        --pp_size=2 \
        --tp_size=2