Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
SIYIXNI
vllm
Commits
4caf7044
"...composable_kernel_rocm.git" did not exist on "68f2b5e7c763c1525b01b32315c5e2d0a901ae27"
Unverified
Commit
4caf7044
authored
Feb 23, 2024
by
Ronen Schaffer
Committed by
GitHub
Feb 22, 2024
Browse files
Include tokens from prompt phase in `counter_generation_tokens` (#2802)
parent
6f32cddf
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
39 additions
and
1 deletion
+39
-1
.buildkite/test-pipeline.yaml
.buildkite/test-pipeline.yaml
+3
-0
tests/metrics/test_metrics.py
tests/metrics/test_metrics.py
+33
-1
vllm/engine/llm_engine.py
vllm/engine/llm_engine.py
+3
-0
No files found.
.buildkite/test-pipeline.yaml
View file @
4caf7044
...
...
@@ -52,6 +52,9 @@ steps:
-
label
:
LoRA Test
command
:
pytest -v -s lora
-
label
:
Metrics Test
command
:
pytest -v -s metrics
-
label
:
Benchmarks
working_dir
:
"
/vllm-workspace/.buildkite"
commands
:
...
...
tests/metrics/test_metrics.py
View file @
4caf7044
...
...
@@ -9,13 +9,16 @@ MODELS = [
@
pytest
.
mark
.
parametrize
(
"model"
,
MODELS
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"float"
])
@
pytest
.
mark
.
parametrize
(
"max_tokens"
,
[
128
])
def
test_metrics
(
def
test_metric
_counter_prompt_token
s
(
vllm_runner
,
example_prompts
,
model
:
str
,
dtype
:
str
,
max_tokens
:
int
,
)
->
None
:
# Reset metric
vllm
.
engine
.
metrics
.
counter_prompt_tokens
.
set_value
({},
0
)
vllm_model
=
vllm_runner
(
model
,
dtype
=
dtype
,
disable_log_stats
=
False
)
tokenizer
=
vllm_model
.
model
.
get_tokenizer
()
prompt_token_counts
=
[
len
(
tokenizer
.
encode
(
p
))
for
p
in
example_prompts
]
...
...
@@ -31,3 +34,32 @@ def test_metrics(
assert
vllm_prompt_token_count
==
metric_count
,
(
f
"prompt token count:
{
vllm_prompt_token_count
!
r
}
\n
metric:
{
metric_count
!
r
}
"
)
@
pytest
.
mark
.
parametrize
(
"model"
,
MODELS
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"float"
])
@
pytest
.
mark
.
parametrize
(
"max_tokens"
,
[
128
])
def
test_metric_counter_generation_tokens
(
vllm_runner
,
example_prompts
,
model
:
str
,
dtype
:
str
,
max_tokens
:
int
,
)
->
None
:
# Reset metric
vllm
.
engine
.
metrics
.
counter_generation_tokens
.
set_value
({},
0
)
vllm_model
=
vllm_runner
(
model
,
dtype
=
dtype
,
disable_log_stats
=
False
)
vllm_outputs
=
vllm_model
.
generate_greedy
(
example_prompts
,
max_tokens
)
tokenizer
=
vllm_model
.
model
.
get_tokenizer
()
metric_count
=
vllm
.
engine
.
metrics
.
counter_generation_tokens
.
get_value
({})
vllm_generation_count
=
0
for
i
in
range
(
len
(
example_prompts
)):
vllm_output_ids
,
vllm_output_str
=
vllm_outputs
[
i
]
prompt_ids
=
tokenizer
.
encode
(
example_prompts
[
i
])
# vllm_output_ids contains both prompt tokens and generation tokens. We're interested only in the count of the generation tokens.
vllm_generation_count
+=
len
(
vllm_output_ids
)
-
len
(
prompt_ids
)
assert
vllm_generation_count
==
metric_count
,
(
f
"generation token count:
{
vllm_generation_count
!
r
}
\n
metric:
{
metric_count
!
r
}
"
)
vllm/engine/llm_engine.py
View file @
4caf7044
...
...
@@ -872,6 +872,9 @@ class LLMEngine:
num_prompt_tokens
=
sum
(
len
(
seq_group
.
prompt_token_ids
)
for
seq_group
in
scheduler_outputs
.
scheduled_seq_groups
)
num_generation_tokens
=
sum
(
seq_group
.
num_seqs
()
for
seq_group
in
scheduler_outputs
.
scheduled_seq_groups
)
else
:
num_generation_tokens
=
scheduler_outputs
.
num_batched_tokens
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment