Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
3e802e87
Unverified
Commit
3e802e87
authored
Mar 31, 2026
by
Flora Feng
Committed by
GitHub
Mar 31, 2026
Browse files
[Mypy] Fix adjust_request typing (#38264)
Signed-off-by:
sfeng33
<
4florafeng@gmail.com
>
parent
350af48e
Changes
14
Show whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
49 additions
and
17 deletions
+49
-17
docs/features/tool_calling.md
docs/features/tool_calling.md
+1
-1
vllm/entrypoints/serve/render/serving.py
vllm/entrypoints/serve/render/serving.py
+1
-1
vllm/parser/abstract_parser.py
vllm/parser/abstract_parser.py
+4
-4
vllm/tool_parsers/abstract_tool_parser.py
vllm/tool_parsers/abstract_tool_parser.py
+3
-1
vllm/tool_parsers/deepseekv32_tool_parser.py
vllm/tool_parsers/deepseekv32_tool_parser.py
+4
-1
vllm/tool_parsers/functiongemma_tool_parser.py
vllm/tool_parsers/functiongemma_tool_parser.py
+4
-1
vllm/tool_parsers/gigachat3_tool_parser.py
vllm/tool_parsers/gigachat3_tool_parser.py
+4
-1
vllm/tool_parsers/glm4_moe_tool_parser.py
vllm/tool_parsers/glm4_moe_tool_parser.py
+4
-1
vllm/tool_parsers/granite4_tool_parser.py
vllm/tool_parsers/granite4_tool_parser.py
+4
-1
vllm/tool_parsers/hermes_tool_parser.py
vllm/tool_parsers/hermes_tool_parser.py
+4
-1
vllm/tool_parsers/internlm2_tool_parser.py
vllm/tool_parsers/internlm2_tool_parser.py
+4
-1
vllm/tool_parsers/jamba_tool_parser.py
vllm/tool_parsers/jamba_tool_parser.py
+4
-1
vllm/tool_parsers/mistral_tool_parser.py
vllm/tool_parsers/mistral_tool_parser.py
+4
-1
vllm/tool_parsers/step3_tool_parser.py
vllm/tool_parsers/step3_tool_parser.py
+4
-1
No files found.
docs/features/tool_calling.md
View file @
3e802e87
...
...
@@ -505,7 +505,7 @@ Here is a summary of a plugin file:
# adjust request. e.g.: set skip special tokens
# to False for tool call output.
def adjust_request(self, request: ChatCompletionRequest
) -> ChatCompletion
Request:
def adjust_request(self, request: ChatCompletionRequest
| ResponsesRequest) -> ChatCompletionRequest | Responses
Request:
return request
# implement the tool call parse for stream call
...
...
vllm/entrypoints/serve/render/serving.py
View file @
3e802e87
...
...
@@ -546,7 +546,7 @@ class OpenAIServingRender:
raise
NotImplementedError
(
msg
)
tokenizer
=
renderer
.
get_tokenizer
()
request
=
tool_parser
(
tokenizer
,
request
.
tools
).
adjust_request
(
request
=
request
# type: ignore[arg-type]
request
=
request
)
return
conversation
,
[
engine_input
]
vllm/parser/abstract_parser.py
View file @
3e802e87
...
...
@@ -32,9 +32,7 @@ from vllm.entrypoints.openai.engine.protocol import (
FunctionCall
,
FunctionDefinition
,
)
from
vllm.entrypoints.openai.responses.protocol
import
(
ResponsesRequest
,
)
from
vllm.entrypoints.openai.responses.protocol
import
ResponsesRequest
from
vllm.logger
import
init_logger
from
vllm.reasoning.abs_reasoning_parsers
import
ReasoningParser
from
vllm.tokenizers
import
TokenizerLike
...
...
@@ -229,7 +227,9 @@ class Parser:
# ========== Tool Parser Methods ==========
def
adjust_request
(
self
,
request
:
ChatCompletionRequest
)
->
ChatCompletionRequest
:
def
adjust_request
(
self
,
request
:
ChatCompletionRequest
|
ResponsesRequest
)
->
ChatCompletionRequest
|
ResponsesRequest
:
"""
Adjust the request parameters for tool calling.
...
...
vllm/tool_parsers/abstract_tool_parser.py
View file @
3e802e87
...
...
@@ -62,7 +62,9 @@ class ToolParser:
# whereas all tokenizers have .get_vocab()
return
self
.
model_tokenizer
.
get_vocab
()
def
adjust_request
(
self
,
request
:
ChatCompletionRequest
)
->
ChatCompletionRequest
:
def
adjust_request
(
self
,
request
:
ChatCompletionRequest
|
ResponsesRequest
)
->
ChatCompletionRequest
|
ResponsesRequest
:
"""
Static method that used to adjust the request parameters.
"""
...
...
vllm/tool_parsers/deepseekv32_tool_parser.py
View file @
3e802e87
...
...
@@ -19,6 +19,7 @@ from vllm.entrypoints.openai.engine.protocol import (
FunctionCall
,
ToolCall
,
)
from
vllm.entrypoints.openai.responses.protocol
import
ResponsesRequest
from
vllm.logger
import
init_logger
from
vllm.tokenizers
import
TokenizerLike
from
vllm.tool_parsers.abstract_tool_parser
import
(
...
...
@@ -78,7 +79,9 @@ class DeepSeekV32ToolParser(ToolParser):
"vLLM Successfully import tool parser %s !"
,
self
.
__class__
.
__name__
)
def
adjust_request
(
self
,
request
):
def
adjust_request
(
self
,
request
:
ChatCompletionRequest
|
ResponsesRequest
)
->
ChatCompletionRequest
|
ResponsesRequest
:
request
=
super
().
adjust_request
(
request
)
if
request
.
tools
and
request
.
tool_choice
!=
"none"
:
# Ensure tool call tokens
...
...
vllm/tool_parsers/functiongemma_tool_parser.py
View file @
3e802e87
...
...
@@ -18,6 +18,7 @@ from vllm.entrypoints.openai.engine.protocol import (
FunctionCall
,
ToolCall
,
)
from
vllm.entrypoints.openai.responses.protocol
import
ResponsesRequest
from
vllm.logger
import
init_logger
from
vllm.tokenizers
import
TokenizerLike
from
vllm.tool_parsers.abstract_tool_parser
import
Tool
,
ToolParser
...
...
@@ -86,7 +87,9 @@ class FunctionGemmaToolParser(ToolParser):
return
arguments
def
adjust_request
(
self
,
request
:
ChatCompletionRequest
)
->
ChatCompletionRequest
:
def
adjust_request
(
self
,
request
:
ChatCompletionRequest
|
ResponsesRequest
)
->
ChatCompletionRequest
|
ResponsesRequest
:
request
=
super
().
adjust_request
(
request
)
if
request
.
tools
and
request
.
tool_choice
!=
"none"
:
request
.
skip_special_tokens
=
False
...
...
vllm/tool_parsers/gigachat3_tool_parser.py
View file @
3e802e87
...
...
@@ -18,6 +18,7 @@ from vllm.entrypoints.openai.engine.protocol import (
FunctionCall
,
ToolCall
,
)
from
vllm.entrypoints.openai.responses.protocol
import
ResponsesRequest
from
vllm.logger
import
init_logger
from
vllm.tokenizers
import
TokenizerLike
from
vllm.tool_parsers.abstract_tool_parser
import
Tool
,
ToolParser
...
...
@@ -55,7 +56,9 @@ class GigaChat3ToolParser(ToolParser):
self
.
end_content
:
bool
=
False
self
.
streamed_args_for_tool
:
list
[
str
]
=
[]
def
adjust_request
(
self
,
request
:
ChatCompletionRequest
)
->
ChatCompletionRequest
:
def
adjust_request
(
self
,
request
:
ChatCompletionRequest
|
ResponsesRequest
)
->
ChatCompletionRequest
|
ResponsesRequest
:
request
=
super
().
adjust_request
(
request
)
if
request
.
tools
and
request
.
tool_choice
!=
"none"
:
request
.
skip_special_tokens
=
False
...
...
vllm/tool_parsers/glm4_moe_tool_parser.py
View file @
3e802e87
...
...
@@ -30,6 +30,7 @@ from vllm.entrypoints.openai.engine.protocol import (
FunctionCall
,
ToolCall
,
)
from
vllm.entrypoints.openai.responses.protocol
import
ResponsesRequest
from
vllm.logger
import
init_logger
from
vllm.tokenizers
import
TokenizerLike
from
vllm.tool_parsers.abstract_tool_parser
import
(
...
...
@@ -151,7 +152,9 @@ class Glm4MoeModelToolParser(ToolParser):
logger
.
exception
(
"Failed to determine if tools are enabled."
)
return
False
def
adjust_request
(
self
,
request
:
ChatCompletionRequest
)
->
ChatCompletionRequest
:
def
adjust_request
(
self
,
request
:
ChatCompletionRequest
|
ResponsesRequest
)
->
ChatCompletionRequest
|
ResponsesRequest
:
"""Adjust request parameters for tool call token handling."""
request
=
super
().
adjust_request
(
request
)
if
request
.
tools
and
request
.
tool_choice
!=
"none"
:
...
...
vllm/tool_parsers/granite4_tool_parser.py
View file @
3e802e87
...
...
@@ -19,6 +19,7 @@ from vllm.entrypoints.openai.engine.protocol import (
FunctionCall
,
ToolCall
,
)
from
vllm.entrypoints.openai.responses.protocol
import
ResponsesRequest
from
vllm.logger
import
init_logger
from
vllm.tokenizers
import
TokenizerLike
from
vllm.tool_parsers.abstract_tool_parser
import
(
...
...
@@ -59,7 +60,9 @@ class Granite4ToolParser(ToolParser):
self
.
start_regex
=
re
.
compile
(
self
.
tc_start
)
self
.
end_regex
=
re
.
compile
(
self
.
tc_end
)
def
adjust_request
(
self
,
request
:
ChatCompletionRequest
)
->
ChatCompletionRequest
:
def
adjust_request
(
self
,
request
:
ChatCompletionRequest
|
ResponsesRequest
)
->
ChatCompletionRequest
|
ResponsesRequest
:
request
=
super
().
adjust_request
(
request
)
if
request
.
tools
and
request
.
tool_choice
!=
"none"
:
# do not skip special tokens because the tool_call tokens are
...
...
vllm/tool_parsers/hermes_tool_parser.py
View file @
3e802e87
...
...
@@ -18,6 +18,7 @@ from vllm.entrypoints.openai.engine.protocol import (
FunctionCall
,
ToolCall
,
)
from
vllm.entrypoints.openai.responses.protocol
import
ResponsesRequest
from
vllm.logger
import
init_logger
from
vllm.tokenizers
import
TokenizerLike
from
vllm.tool_parsers.abstract_tool_parser
import
(
...
...
@@ -77,7 +78,9 @@ class Hermes2ProToolParser(ToolParser):
# Streaming state: what has been sent to the client.
self
.
_sent_content_idx
:
int
=
0
def
adjust_request
(
self
,
request
:
ChatCompletionRequest
)
->
ChatCompletionRequest
:
def
adjust_request
(
self
,
request
:
ChatCompletionRequest
|
ResponsesRequest
)
->
ChatCompletionRequest
|
ResponsesRequest
:
request
=
super
().
adjust_request
(
request
)
if
request
.
tools
and
request
.
tool_choice
!=
"none"
:
# do not skip special tokens because the tool_call tokens are
...
...
vllm/tool_parsers/internlm2_tool_parser.py
View file @
3e802e87
...
...
@@ -19,6 +19,7 @@ from vllm.entrypoints.openai.engine.protocol import (
FunctionCall
,
ToolCall
,
)
from
vllm.entrypoints.openai.responses.protocol
import
ResponsesRequest
from
vllm.logger
import
init_logger
from
vllm.tokenizers
import
TokenizerLike
from
vllm.tool_parsers.abstract_tool_parser
import
(
...
...
@@ -35,7 +36,9 @@ class Internlm2ToolParser(ToolParser):
super
().
__init__
(
tokenizer
,
tools
)
self
.
position
=
0
def
adjust_request
(
self
,
request
:
ChatCompletionRequest
)
->
ChatCompletionRequest
:
def
adjust_request
(
self
,
request
:
ChatCompletionRequest
|
ResponsesRequest
)
->
ChatCompletionRequest
|
ResponsesRequest
:
request
=
super
().
adjust_request
(
request
)
if
request
.
tools
and
request
.
tool_choice
!=
"none"
:
# do not skip special tokens because internlm use the special
...
...
vllm/tool_parsers/jamba_tool_parser.py
View file @
3e802e87
...
...
@@ -20,6 +20,7 @@ from vllm.entrypoints.openai.engine.protocol import (
FunctionCall
,
ToolCall
,
)
from
vllm.entrypoints.openai.responses.protocol
import
ResponsesRequest
from
vllm.logger
import
init_logger
from
vllm.tokenizers
import
TokenizerLike
from
vllm.tool_parsers.abstract_tool_parser
import
Tool
,
ToolParser
...
...
@@ -68,7 +69,9 @@ class JambaToolParser(ToolParser):
"tokens in the tokenizer!"
)
def
adjust_request
(
self
,
request
:
ChatCompletionRequest
)
->
ChatCompletionRequest
:
def
adjust_request
(
self
,
request
:
ChatCompletionRequest
|
ResponsesRequest
)
->
ChatCompletionRequest
|
ResponsesRequest
:
request
=
super
().
adjust_request
(
request
)
if
request
.
tools
and
request
.
tool_choice
!=
"none"
:
# do not skip special tokens because jamba use the special
...
...
vllm/tool_parsers/mistral_tool_parser.py
View file @
3e802e87
...
...
@@ -23,6 +23,7 @@ from vllm.entrypoints.openai.engine.protocol import (
FunctionCall
,
ToolCall
,
)
from
vllm.entrypoints.openai.responses.protocol
import
ResponsesRequest
from
vllm.logger
import
init_logger
from
vllm.tokenizers
import
TokenizerLike
from
vllm.tool_parsers.abstract_tool_parser
import
(
...
...
@@ -111,7 +112,9 @@ class MistralToolParser(ToolParser):
"the tokenizer!"
)
def
adjust_request
(
self
,
request
:
ChatCompletionRequest
)
->
ChatCompletionRequest
:
def
adjust_request
(
self
,
request
:
ChatCompletionRequest
|
ResponsesRequest
)
->
ChatCompletionRequest
|
ResponsesRequest
:
request
=
super
().
adjust_request
(
request
)
if
(
not
is_mistral_tokenizer
(
self
.
model_tokenizer
)
...
...
vllm/tool_parsers/step3_tool_parser.py
View file @
3e802e87
...
...
@@ -19,6 +19,7 @@ from vllm.entrypoints.openai.engine.protocol import (
FunctionCall
,
ToolCall
,
)
from
vllm.entrypoints.openai.responses.protocol
import
ResponsesRequest
from
vllm.logger
import
init_logger
from
vllm.tokenizers
import
TokenizerLike
from
vllm.tool_parsers.abstract_tool_parser
import
(
...
...
@@ -51,7 +52,9 @@ class Step3ToolParser(ToolParser):
self
.
tool_block_started
=
False
self
.
tool_block_finished
=
False
def
adjust_request
(
self
,
request
:
ChatCompletionRequest
)
->
ChatCompletionRequest
:
def
adjust_request
(
self
,
request
:
ChatCompletionRequest
|
ResponsesRequest
)
->
ChatCompletionRequest
|
ResponsesRequest
:
request
=
super
().
adjust_request
(
request
)
if
request
.
tools
and
request
.
tool_choice
!=
"none"
:
request
.
skip_special_tokens
=
False
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment