Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
infinilm
Commits
e76bb324
Unverified
Commit
e76bb324
authored
Feb 24, 2026
by
thatPepe
Committed by
GitHub
Feb 24, 2026
Browse files
Merge pull request #238 from InfiniTensor/issue/237
issue/237 - support hygon in bench and inf server
parents
39b594ff
8f71a5ec
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
30 additions
and
15 deletions
+30
-15
README.md
README.md
+4
-4
examples/bench.py
examples/bench.py
+7
-0
python/infinilm/server/inference_server.py
python/infinilm/server/inference_server.py
+4
-1
test/bench/test_benchmark.py
test/bench/test_benchmark.py
+15
-10
No files found.
README.md
View file @
e76bb324
...
@@ -15,7 +15,7 @@ xmake && xmake install
...
@@ -15,7 +15,7 @@ xmake && xmake install
-
运行模型推理测试
-
运行模型推理测试
```
bash
```
bash
python scripts/jiuge.py
[
--cpu
|
--nvidia
|
--qy
|
--cambricon
|
--ascend
|
--metax
|
--moore
|
--iluvatar
|
--kunlun
|
--hygon
]
path/to/model_dir
[
n_device]
python scripts/jiuge.py
[
--cpu
|
--nvidia
|
--qy
|
--cambricon
|
--ascend
|
--metax
|
--moore
|
--iluvatar
|
--kunlun
|
--hygon
|
--ali
]
path/to/model_dir
[
n_device]
```
```
-
部署模型推理服务
-
部署模型推理服务
...
@@ -77,11 +77,11 @@ python scripts/test_ppl.py --model-path MODEL_PATH [--ndev NDEV] [--max-batch MA
...
@@ -77,11 +77,11 @@ python scripts/test_ppl.py --model-path MODEL_PATH [--ndev NDEV] [--max-batch MA
-
单次推理测试
-
单次推理测试
-
llama示例
-
llama示例
```
bash
```
bash
python examples/
llama
.py
[
--cpu
|
--nvidia
|
--qy
|
--metax
|
--moore
|
--iluvatar
|
--ali
]
--model_path
=
<path/to/model_dir>
python examples/
jiuge
.py
[
--cpu
|
--nvidia
|
--qy
|
--metax
|
--moore
|
--iluvatar
|
--ali
|
--cambricon
|
--hygon
]
--model_path
=
<path/to/model_dir>
```
```
-
例如:
-
例如:
```
bash
```
bash
python examples/
llama
.py
--nvidia
--model_path
=
/models/TinyLlama-1.1B-Chat-v1.0
python examples/
jigue
.py
--nvidia
--model_path
=
/models/TinyLlama-1.1B-Chat-v1.0
```
```
-
分布式推理测试
-
分布式推理测试
-
9g示例
-
9g示例
...
@@ -119,7 +119,7 @@ python scripts/test_ppl.py --model-path MODEL_PATH [--ndev NDEV] [--max-batch MA
...
@@ -119,7 +119,7 @@ python scripts/test_ppl.py --model-path MODEL_PATH [--ndev NDEV] [--max-batch MA
- 运行推理基准测试(C-Eval/MMLU)
- 运行推理基准测试(C-Eval/MMLU)
```
bash
```
bash
python test/bench/test_benchmark.py [--cpu | --nvidia | --cambricon | --ascend | --metax | --moore | --iluvatar | --kunlun | --hygon]
<path
/
to
/
model_dir
>
--bench {ceval|mmlu} [--backend cpp] [--ndev N] [--subject SUBJECT] [--num_samples N] [--max_new_tokens N] [--output_csv PATH] [--cache_dir PATH]
python test/bench/test_benchmark.py [--cpu | --nvidia | --cambricon | --ascend | --metax | --moore | --iluvatar | --kunlun | --hygon
| --ali
]
<path
/
to
/
model_dir
>
--bench {ceval|mmlu} [--backend cpp] [--ndev N] [--subject SUBJECT] [--num_samples N] [--max_new_tokens N] [--output_csv PATH] [--cache_dir PATH]
```
```
- 参数说明:
- 参数说明:
...
...
examples/bench.py
View file @
e76bb324
...
@@ -167,6 +167,11 @@ def get_args():
...
@@ -167,6 +167,11 @@ def get_args():
action
=
"store_true"
,
action
=
"store_true"
,
help
=
"Run alippu test"
,
help
=
"Run alippu test"
,
)
)
parser
.
add_argument
(
"--hygon"
,
action
=
"store_true"
,
help
=
"Run hygon test"
,
)
parser
.
add_argument
(
parser
.
add_argument
(
"--model"
,
"--model"
,
type
=
str
,
type
=
str
,
...
@@ -387,6 +392,8 @@ if __name__ == "__main__":
...
@@ -387,6 +392,8 @@ if __name__ == "__main__":
device_str
=
"mlu"
device_str
=
"mlu"
elif
args
.
ali
:
elif
args
.
ali
:
device_str
=
"cuda"
device_str
=
"cuda"
elif
args
.
hygon
:
device_str
=
"cuda"
else
:
else
:
print
(
print
(
"python examples/bench.py --nvidia --model=~/TinyLlama-1.1B-Chat-v1.0/ --batch-size=2 --tp=1 --input-len=50 --output-len=50"
"python examples/bench.py --nvidia --model=~/TinyLlama-1.1B-Chat-v1.0/ --batch-size=2 --tp=1 --input-len=50 --output-len=50"
...
...
python/infinilm/server/inference_server.py
View file @
e76bb324
...
@@ -594,6 +594,7 @@ def parse_args():
...
@@ -594,6 +594,7 @@ def parse_args():
parser
.
add_argument
(
"--iluvatar"
,
action
=
"store_true"
,
help
=
"Use Iluvatar device"
)
parser
.
add_argument
(
"--iluvatar"
,
action
=
"store_true"
,
help
=
"Use Iluvatar device"
)
parser
.
add_argument
(
"--cambricon"
,
action
=
"store_true"
,
help
=
"Use Cambricon device"
)
parser
.
add_argument
(
"--cambricon"
,
action
=
"store_true"
,
help
=
"Use Cambricon device"
)
parser
.
add_argument
(
"--ali"
,
action
=
"store_true"
,
help
=
"Use Ali PPU device"
)
parser
.
add_argument
(
"--ali"
,
action
=
"store_true"
,
help
=
"Use Ali PPU device"
)
parser
.
add_argument
(
"--hygon"
,
action
=
"store_true"
,
help
=
"Use Hygon DCU device"
)
parser
.
add_argument
(
parser
.
add_argument
(
"--enable-graph"
,
"--enable-graph"
,
action
=
"store_true"
,
action
=
"store_true"
,
...
@@ -631,9 +632,11 @@ def main():
...
@@ -631,9 +632,11 @@ def main():
device
=
"mlu"
device
=
"mlu"
elif
args
.
ali
:
elif
args
.
ali
:
device
=
"cuda"
device
=
"cuda"
elif
args
.
hygon
:
device
=
"cuda"
else
:
else
:
print
(
print
(
"Usage: python infinilm.server.inference_server [--cpu | --nvidia | --qy | --metax | --moore | --iluvatar | --cambricon | --ali] "
"Usage: python infinilm.server.inference_server [--cpu | --nvidia | --qy | --metax | --moore | --iluvatar | --cambricon | --ali
| --hygon
] "
"--model_path=<path/to/model_dir> --max_tokens=MAX_TOKENS --max_batch_size=MAX_BATCH_SIZE"
"--model_path=<path/to/model_dir> --max_tokens=MAX_TOKENS --max_batch_size=MAX_BATCH_SIZE"
"
\n
"
"
\n
"
"Example: python infinilm.server.inference_server --nvidia --model_path=/data/shared/models/9G7B_MHA/ "
"Example: python infinilm.server.inference_server --nvidia --model_path=/data/shared/models/9G7B_MHA/ "
...
...
test/bench/test_benchmark.py
View file @
e76bb324
...
@@ -73,6 +73,7 @@ class InfiniLMBenchmark(BaseBenchmark):
...
@@ -73,6 +73,7 @@ class InfiniLMBenchmark(BaseBenchmark):
"iluvatar"
:
"cuda"
,
"iluvatar"
:
"cuda"
,
"kunlun"
:
"cuda"
,
"kunlun"
:
"cuda"
,
"hygon"
:
"cuda"
,
"hygon"
:
"cuda"
,
"ali"
:
"cuda"
,
}
}
device_name
=
device_map
.
get
(
device_type_str
.
lower
(),
"cpu"
)
device_name
=
device_map
.
get
(
device_type_str
.
lower
(),
"cpu"
)
...
@@ -184,11 +185,17 @@ class InfiniLMBenchmark(BaseBenchmark):
...
@@ -184,11 +185,17 @@ class InfiniLMBenchmark(BaseBenchmark):
start_time
=
time
.
perf_counter
()
start_time
=
time
.
perf_counter
()
# For cpp backend, reset cache before generation if use_cache is enabled
# For cpp backend, reset cache before generation if use_cache is enabled
if
self
.
model
.
use_cache
and
hasattr
(
self
.
model
,
"_model"
)
and
hasattr
(
self
.
model
.
_model
,
"reset_cache"
):
if
(
self
.
model
.
use_cache
and
hasattr
(
self
.
model
,
"_model"
)
and
hasattr
(
self
.
model
.
_model
,
"reset_cache"
)
):
batch_size
=
input_ids
.
shape
[
0
]
batch_size
=
input_ids
.
shape
[
0
]
seq_len
=
input_ids
.
shape
[
1
]
seq_len
=
input_ids
.
shape
[
1
]
max_cache_len
=
max_steps
+
seq_len
max_cache_len
=
max_steps
+
seq_len
self
.
model
.
reset_cache
(
batch_size
=
batch_size
,
initial_capacity
=
max_cache_len
)
self
.
model
.
reset_cache
(
batch_size
=
batch_size
,
initial_capacity
=
max_cache_len
)
# Use model's built-in generate() method which properly handles KV cache
# Use model's built-in generate() method which properly handles KV cache
# Pass sampling parameters (temperature, topk, topp) via kwargs
# Pass sampling parameters (temperature, topk, topp) via kwargs
...
@@ -656,7 +663,7 @@ def test():
...
@@ -656,7 +663,7 @@ def test():
# Parse arguments manually to handle device flags properly
# Parse arguments manually to handle device flags properly
if
len
(
sys
.
argv
)
<
4
:
if
len
(
sys
.
argv
)
<
4
:
print
(
print
(
"Usage: python test_benchmark.py [--cpu | --nvidia| --cambricon | --ascend | --metax | --moore | --iluvatar | --kunlun | --hygon] <path/to/model_dir> --bench [ceval|mmlu] [--backend cpp|torch] [--ndev N] [--subject SUBJECT] [--split {test|val|all}] [--num_samples N] [--max_new_tokens N] [--output_csv PATH] [--cache_dir PATH]"
"Usage: python test_benchmark.py [--cpu | --nvidia| --cambricon | --ascend | --metax | --moore | --iluvatar | --kunlun | --hygon
| --ali
] <path/to/model_dir> --bench [ceval|mmlu] [--backend cpp|torch] [--ndev N] [--subject SUBJECT] [--split {test|val|all}] [--num_samples N] [--max_new_tokens N] [--output_csv PATH] [--cache_dir PATH]"
)
)
sys
.
exit
(
1
)
sys
.
exit
(
1
)
...
@@ -739,9 +746,11 @@ def test():
...
@@ -739,9 +746,11 @@ def test():
device_type_str
=
"kunlun"
device_type_str
=
"kunlun"
elif
device_flag
==
"--hygon"
:
elif
device_flag
==
"--hygon"
:
device_type_str
=
"hygon"
device_type_str
=
"hygon"
elif
device_flag
==
"--ali"
:
device_type_str
=
"ali"
else
:
else
:
print
(
print
(
"Usage: python test_benchmark.py [--cpu | --nvidia| --cambricon | --ascend | --metax | --moore | --iluvatar | --kunlun | --hygon] <path/to/model_dir> --bench [ceval|mmlu] [--backend cpp|torch] [--ndev N] [--subject SUBJECT] [--num_samples N] [--max_new_tokens N] [--output_csv PATH] [--cache_dir PATH]"
"Usage: python test_benchmark.py [--cpu | --nvidia| --cambricon | --ascend | --metax | --moore | --iluvatar | --kunlun | --hygon
| --ali
] <path/to/model_dir> --bench [ceval|mmlu] [--backend cpp|torch] [--ndev N] [--subject SUBJECT] [--num_samples N] [--max_new_tokens N] [--output_csv PATH] [--cache_dir PATH]"
)
)
sys
.
exit
(
1
)
sys
.
exit
(
1
)
...
@@ -935,9 +944,7 @@ def test():
...
@@ -935,9 +944,7 @@ def test():
splits_to_load
=
(
splits_to_load
=
(
[
"test"
]
[
"test"
]
if
split
==
"test"
if
split
==
"test"
else
[
"validation"
]
else
[
"validation"
]
if
split
==
"val"
else
[
"validation"
,
"test"
]
if
split
==
"val"
else
[
"validation"
,
"test"
]
)
)
# Load each subject individually from hardcoded list, excluding "all"
# Load each subject individually from hardcoded list, excluding "all"
for
subject_name
in
mmlu_subjects
:
for
subject_name
in
mmlu_subjects
:
...
@@ -959,9 +966,7 @@ def test():
...
@@ -959,9 +966,7 @@ def test():
splits_to_load
=
(
splits_to_load
=
(
[
"test"
]
[
"test"
]
if
split
==
"test"
if
split
==
"test"
else
[
"validation"
]
else
[
"validation"
]
if
split
==
"val"
else
[
"validation"
,
"test"
]
if
split
==
"val"
else
[
"validation"
,
"test"
]
)
)
records
=
[]
records
=
[]
for
sp
in
splits_to_load
:
for
sp
in
splits_to_load
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment