Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
infinilm
Commits
0879747e
Unverified
Commit
0879747e
authored
Feb 12, 2026
by
thatPepe
Committed by
GitHub
Feb 12, 2026
Browse files
Merge pull request #227 from InfiniTensor/issue/226
Issue/226:add warmup before InfiniLM bench.py generation
parents
a940a967
b59f768e
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
59 additions
and
0 deletions
+59
-0
examples/bench.py
examples/bench.py
+59
-0
No files found.
examples/bench.py
View file @
0879747e
...
@@ -234,6 +234,11 @@ def get_args():
...
@@ -234,6 +234,11 @@ def get_args():
action
=
"store_true"
,
action
=
"store_true"
,
help
=
"enable graph compiling"
,
help
=
"enable graph compiling"
,
)
)
parser
.
add_argument
(
"--warmup"
,
action
=
"store_true"
,
help
=
"Perform a warmup run before benchmarking/inference."
)
return
parser
.
parse_args
()
return
parser
.
parse_args
()
...
@@ -437,6 +442,60 @@ if __name__ == "__main__":
...
@@ -437,6 +442,60 @@ if __name__ == "__main__":
enable_graph
=
enable_graph
,
enable_graph
=
enable_graph
,
)
)
# ---------------------------------------------------------------------------- #
# Warmup
# ---------------------------------------------------------------------------- #
if
args
.
warmup
:
warmup_steps
=
1
# warmup cache capacity
warmup_cache_len
=
128
warmup_batch
=
len
(
test
.
input_ids_list
)
test
.
model
.
reset_cache
(
StaticKVCacheConfig
(
max_batch_size
=
warmup_batch
,
max_cache_len
=
warmup_cache_len
,
)
)
avg_prompt_len
=
min
(
64
,
max
(
len
(
ids
)
for
ids
in
test
.
input_ids_list
)
)
warmup_ids
=
[
ids
[:
avg_prompt_len
]
if
len
(
ids
)
>=
avg_prompt_len
else
ids
for
ids
in
test
.
input_ids_list
]
input_ids_infini
=
infinicore
.
from_list
(
warmup_ids
)
print
(
"=================== warmup start ==================="
)
for
_
in
range
(
warmup_steps
):
_
=
test
.
model
.
generate
(
input_ids_infini
,
GenerationConfig
(
max_new_tokens
=
5
,
# decode kernel warmup
temperature
=
args
.
temperature
,
top_k
=
args
.
top_k
,
top_p
=
args
.
top_p
,
),
_measure_and_log_time
=
False
,
)
print
(
"=================== warmup done ===================="
)
# reset cache back to benchmark config
if
cache_config
is
not
None
:
test
.
model
.
reset_cache
(
cache_config
)
# ---------------------------------------------------------------------------- #
# Warmup done
# ---------------------------------------------------------------------------- #
for
idx
,
case
in
tqdm
(
cases_dict
.
items
(),
desc
=
"Processing cases"
):
for
idx
,
case
in
tqdm
(
cases_dict
.
items
(),
desc
=
"Processing cases"
):
tqdm
.
write
(
f
"
\033
[92mProcessing :
{
case
}
\033
[0m"
)
tqdm
.
write
(
f
"
\033
[92mProcessing :
{
case
}
\033
[0m"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment