Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
3d09e592
Unverified
Commit
3d09e592
authored
Feb 04, 2025
by
Nick Hill
Committed by
GitHub
Feb 04, 2025
Browse files
[V1][Misc] Shorten `FinishReason` enum and use constant strings (#12760)
parent
fcf2e3d7
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
25 additions
and
21 deletions
+25
-21
vllm/v1/engine/__init__.py
vllm/v1/engine/__init__.py
+9
-3
vllm/v1/engine/detokenizer.py
vllm/v1/engine/detokenizer.py
+3
-4
vllm/v1/metrics/loggers.py
vllm/v1/metrics/loggers.py
+3
-3
vllm/v1/metrics/stats.py
vllm/v1/metrics/stats.py
+3
-4
vllm/v1/request.py
vllm/v1/request.py
+7
-7
No files found.
vllm/v1/engine/__init__.py
View file @
3d09e592
...
...
@@ -14,11 +14,17 @@ if TYPE_CHECKING:
from
vllm.multimodal.inputs
import
PlaceholderRange
from
vllm.sampling_params
import
SamplingParams
# These are possible values of RequestOutput.finish_reason,
# so form part of the external API.
FINISH_REASON_STRINGS
=
(
"stop"
,
"length"
,
"abort"
)
class
RequestFinishedReason
(
enum
.
IntEnum
):
class
FinishReason
(
enum
.
IntEnum
):
"""
Reason a request finished - stop, length, or abort.
Int rather than Str for more compact serialization.
stop - a stop string was emitted
length - max_tokens was consumed, or max_model_len was reached
abort - aborted for another reason
...
...
@@ -29,7 +35,7 @@ class RequestFinishedReason(enum.IntEnum):
ABORT
=
2
def
__str__
(
self
):
return
self
.
name
.
lower
()
return
FINISH_REASON_STRINGS
[
self
.
value
]
@
dataclass
...
...
@@ -62,7 +68,7 @@ class EngineCoreOutput(
request_id
:
str
new_token_ids
:
List
[
int
]
finished
:
bool
finish_reason
:
Optional
[
Request
Finish
ed
Reason
]
=
None
finish_reason
:
Optional
[
FinishReason
]
=
None
stop_reason
:
Union
[
int
,
str
,
None
]
=
None
...
...
vllm/v1/engine/detokenizer.py
View file @
3d09e592
...
...
@@ -8,8 +8,7 @@ from vllm.logger import init_logger
from
vllm.sampling_params
import
RequestOutputKind
from
vllm.transformers_utils.detokenizer_utils
import
(
AnyTokenizer
,
convert_prompt_ids_to_tokens
,
detokenize_incrementally
)
from
vllm.v1.engine
import
(
EngineCoreOutput
,
EngineCoreRequest
,
RequestFinishedReason
)
from
vllm.v1.engine
import
EngineCoreOutput
,
EngineCoreRequest
,
FinishReason
logger
=
init_logger
(
__name__
)
...
...
@@ -19,7 +18,7 @@ class DetokenizerOutput:
output_text
:
str
token_ids
:
List
[
int
]
finished
:
bool
finish_reason
:
Optional
[
Request
Finish
ed
Reason
]
=
None
finish_reason
:
Optional
[
FinishReason
]
=
None
stop_reason
:
Union
[
int
,
str
,
None
]
=
None
...
...
@@ -148,7 +147,7 @@ class IncrementalDetokenizer:
stop_str
,
truncate_to
=
stop
if
truncate_to
!=
-
1
:
self
.
output_text
=
self
.
output_text
[:
truncate_to
]
finish_reason
=
Request
Finish
ed
Reason
.
STOP
finish_reason
=
FinishReason
.
STOP
stop_reason
=
stop_str
# TODO: handle stop_token_ids here too?
...
...
vllm/v1/metrics/loggers.py
View file @
3d09e592
...
...
@@ -9,7 +9,7 @@ import prometheus_client
from
vllm.config
import
ModelConfig
from
vllm.logger
import
init_logger
from
vllm.v1.engine
import
Request
Finish
ed
Reason
from
vllm.v1.engine
import
FinishReason
from
vllm.v1.metrics.stats
import
IterationStats
,
SchedulerStats
logger
=
init_logger
(
__name__
)
...
...
@@ -117,13 +117,13 @@ class PrometheusStatLogger(StatLoggerBase):
documentation
=
"Number of generation tokens processed."
,
labelnames
=
labelnames
).
labels
(
*
labelvalues
)
self
.
counter_request_success
:
Dict
[
Request
Finish
ed
Reason
,
self
.
counter_request_success
:
Dict
[
FinishReason
,
prometheus_client
.
Counter
]
=
{}
counter_request_success_base
=
prometheus_client
.
Counter
(
name
=
"vllm:request_success_total"
,
documentation
=
"Count of successfully processed requests."
,
labelnames
=
labelnames
+
[
"finished_reason"
])
for
reason
in
Request
Finish
ed
Reason
:
for
reason
in
FinishReason
:
self
.
counter_request_success
[
reason
]
=
counter_request_success_base
.
labels
(
*
(
labelvalues
+
[
str
(
reason
)]))
...
...
vllm/v1/metrics/stats.py
View file @
3d09e592
...
...
@@ -6,7 +6,7 @@ from typing import TYPE_CHECKING, List
if
TYPE_CHECKING
:
from
vllm.outputs
import
RequestOutput
from
vllm.v1.engine
import
EngineCoreOutput
,
Request
Finish
ed
Reason
from
vllm.v1.engine
import
EngineCoreOutput
,
FinishReason
@
dataclass
...
...
@@ -32,7 +32,7 @@ class RequestStateStats:
class
FinishedRequestStats
:
"""Stats associated with a finished request."""
finish_reason
:
"
Request
Finish
ed
Reason"
finish_reason
:
"FinishReason"
num_prompt_tokens
:
int
=
0
num_generation_tokens
:
int
=
0
...
...
@@ -74,8 +74,7 @@ class IterationStats:
request_state_stats
.
num_generation_tokens
+=
num_new_generation_tokens
request_state_stats
.
last_token_time
=
now
def
update_from_finished_request
(
self
,
finish_reason
:
"RequestFinishedReason"
,
def
update_from_finished_request
(
self
,
finish_reason
:
"FinishReason"
,
request_output
:
"RequestOutput"
,
request_state_stats
:
RequestStateStats
):
self
.
finished_requests
.
append
(
...
...
vllm/v1/request.py
View file @
3d09e592
...
...
@@ -6,7 +6,7 @@ from typing import TYPE_CHECKING, List, Optional, Union
from
vllm.lora.request
import
LoRARequest
from
vllm.sampling_params
import
SamplingParams
from
vllm.sequence
import
RequestMetrics
from
vllm.v1.engine
import
EngineCoreRequest
,
Request
Finish
ed
Reason
from
vllm.v1.engine
import
EngineCoreRequest
,
FinishReason
from
vllm.v1.utils
import
ConstantList
if
TYPE_CHECKING
:
...
...
@@ -109,7 +109,7 @@ class Request:
def
is_finished
(
self
)
->
bool
:
return
RequestStatus
.
is_finished
(
self
.
status
)
def
get_finished_reason
(
self
)
->
Union
[
Request
Finish
ed
Reason
,
None
]:
def
get_finished_reason
(
self
)
->
Union
[
FinishReason
,
None
]:
return
RequestStatus
.
get_finished_reason
(
self
.
status
)
def
has_encoder_inputs
(
self
)
->
bool
:
...
...
@@ -150,7 +150,7 @@ class RequestStatus(enum.IntEnum):
@
staticmethod
def
get_finished_reason
(
status
:
"RequestStatus"
)
->
Union
[
Request
Finish
ed
Reason
,
None
]:
status
:
"RequestStatus"
)
->
Union
[
FinishReason
,
None
]:
return
_FINISHED_REASON_MAP
.
get
(
status
)
...
...
@@ -159,8 +159,8 @@ class RequestStatus(enum.IntEnum):
# are longer than the model's length cap. Therefore, the stop
# reason should also be "length" as in OpenAI API.
_FINISHED_REASON_MAP
=
{
RequestStatus
.
FINISHED_STOPPED
:
Request
Finish
ed
Reason
.
STOP
,
RequestStatus
.
FINISHED_LENGTH_CAPPED
:
Request
Finish
ed
Reason
.
LENGTH
,
RequestStatus
.
FINISHED_ABORTED
:
Request
Finish
ed
Reason
.
ABORT
,
RequestStatus
.
FINISHED_IGNORED
:
Request
Finish
ed
Reason
.
LENGTH
,
RequestStatus
.
FINISHED_STOPPED
:
FinishReason
.
STOP
,
RequestStatus
.
FINISHED_LENGTH_CAPPED
:
FinishReason
.
LENGTH
,
RequestStatus
.
FINISHED_ABORTED
:
FinishReason
.
ABORT
,
RequestStatus
.
FINISHED_IGNORED
:
FinishReason
.
LENGTH
,
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment