Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
norm
vllm
Commits
e433c115
Unverified
Commit
e433c115
authored
Feb 19, 2024
by
Ronen Schaffer
Committed by
GitHub
Feb 18, 2024
Browse files
Fix `vllm:prompt_tokens_total` metric calculation (#2869)
parent
86fd8bb0
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
41 additions
and
6 deletions
+41
-6
tests/conftest.py
tests/conftest.py
+5
-5
tests/metrics/test_metrics.py
tests/metrics/test_metrics.py
+33
-0
vllm/engine/llm_engine.py
vllm/engine/llm_engine.py
+3
-1
No files found.
tests/conftest.py
View file @
e433c115
...
@@ -13,12 +13,10 @@ _TEST_PROMPTS = [os.path.join(_TEST_DIR, "prompts", "example.txt")]
...
@@ -13,12 +13,10 @@ _TEST_PROMPTS = [os.path.join(_TEST_DIR, "prompts", "example.txt")]
_LONG_PROMPTS
=
[
os
.
path
.
join
(
_TEST_DIR
,
"prompts"
,
"summary.txt"
)]
_LONG_PROMPTS
=
[
os
.
path
.
join
(
_TEST_DIR
,
"prompts"
,
"summary.txt"
)]
def
_read_prompts
(
filename
:
str
)
->
str
:
def
_read_prompts
(
filename
:
str
)
->
List
[
str
]:
prompts
=
[]
with
open
(
filename
,
"r"
)
as
f
:
with
open
(
filename
,
"r"
)
as
f
:
prompt
=
f
.
readline
()
prompts
=
f
.
readlines
()
prompts
.
append
(
prompt
)
return
prompts
return
prompts
@
pytest
.
fixture
@
pytest
.
fixture
...
@@ -165,6 +163,7 @@ class VllmRunner:
...
@@ -165,6 +163,7 @@ class VllmRunner:
model_name
:
str
,
model_name
:
str
,
tokenizer_name
:
Optional
[
str
]
=
None
,
tokenizer_name
:
Optional
[
str
]
=
None
,
dtype
:
str
=
"half"
,
dtype
:
str
=
"half"
,
disable_log_stats
:
bool
=
True
,
tensor_parallel_size
:
int
=
1
,
tensor_parallel_size
:
int
=
1
,
)
->
None
:
)
->
None
:
self
.
model
=
LLM
(
self
.
model
=
LLM
(
...
@@ -173,6 +172,7 @@ class VllmRunner:
...
@@ -173,6 +172,7 @@ class VllmRunner:
trust_remote_code
=
True
,
trust_remote_code
=
True
,
dtype
=
dtype
,
dtype
=
dtype
,
swap_space
=
0
,
swap_space
=
0
,
disable_log_stats
=
disable_log_stats
,
tensor_parallel_size
=
tensor_parallel_size
,
tensor_parallel_size
=
tensor_parallel_size
,
)
)
...
...
tests/metrics/test_metrics.py
0 → 100644
View file @
e433c115
import
pytest
import
vllm.engine.metrics
MODELS
=
[
"facebook/opt-125m"
,
]
@
pytest
.
mark
.
parametrize
(
"model"
,
MODELS
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
"float"
])
@
pytest
.
mark
.
parametrize
(
"max_tokens"
,
[
128
])
def
test_metrics
(
vllm_runner
,
example_prompts
,
model
:
str
,
dtype
:
str
,
max_tokens
:
int
,
)
->
None
:
vllm_model
=
vllm_runner
(
model
,
dtype
=
dtype
,
disable_log_stats
=
False
)
tokenizer
=
vllm_model
.
model
.
get_tokenizer
()
prompt_token_counts
=
[
len
(
tokenizer
.
encode
(
p
))
for
p
in
example_prompts
]
# This test needs at least 2 prompts in a batch of different lengths to verify their token count is correct despite padding.
assert
len
(
example_prompts
)
>
1
,
"at least 2 prompts are required"
assert
prompt_token_counts
[
0
]
!=
prompt_token_counts
[
1
],
(
"prompts of different lengths are required"
)
vllm_prompt_token_count
=
sum
(
prompt_token_counts
)
_
=
vllm_model
.
generate_greedy
(
example_prompts
,
max_tokens
)
metric_count
=
vllm
.
engine
.
metrics
.
counter_prompt_tokens
.
get_value
({})
assert
vllm_prompt_token_count
==
metric_count
,
(
f
"prompt token count:
{
vllm_prompt_token_count
!
r
}
\n
metric:
{
metric_count
!
r
}
"
)
vllm/engine/llm_engine.py
View file @
e433c115
...
@@ -867,7 +867,9 @@ class LLMEngine:
...
@@ -867,7 +867,9 @@ class LLMEngine:
# Number of Tokens.
# Number of Tokens.
if
prompt_run
:
if
prompt_run
:
num_prompt_tokens
=
scheduler_outputs
.
num_batched_tokens
num_prompt_tokens
=
sum
(
len
(
seq_group
.
prompt_token_ids
)
for
seq_group
in
scheduler_outputs
.
scheduled_seq_groups
)
else
:
else
:
num_generation_tokens
=
scheduler_outputs
.
num_batched_tokens
num_generation_tokens
=
scheduler_outputs
.
num_batched_tokens
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment