Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
7a5df8f7
Commit
7a5df8f7
authored
Aug 21, 2025
by
zhuwenwen
Browse files
Merge tag 'v0.10.1.1' into v0.10.1.1-ori
parents
5876ee95
1da94e67
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
46 additions
and
10 deletions
+46
-10
vllm/entrypoints/constants.py
vllm/entrypoints/constants.py
+10
-0
vllm/entrypoints/launcher.py
vllm/entrypoints/launcher.py
+21
-0
vllm/entrypoints/openai/api_server.py
vllm/entrypoints/openai/api_server.py
+2
-0
vllm/entrypoints/openai/cli_args.py
vllm/entrypoints/openai/cli_args.py
+8
-0
vllm/entrypoints/openai/tool_parsers/qwen3coder_tool_parser.py
...entrypoints/openai/tool_parsers/qwen3coder_tool_parser.py
+4
-9
vllm/v1/attention/backends/mla/cutlass_mla.py
vllm/v1/attention/backends/mla/cutlass_mla.py
+1
-1
No files found.
vllm/entrypoints/constants.py
0 → 100644
View file @
7a5df8f7
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""
Shared constants for vLLM entrypoints.
"""
# HTTP header limits for h11 parser
# These constants help mitigate header abuse attacks
H11_MAX_INCOMPLETE_EVENT_SIZE_DEFAULT
=
4194304
# 4 MB
H11_MAX_HEADER_COUNT_DEFAULT
=
256
vllm/entrypoints/launcher.py
View file @
7a5df8f7
...
...
@@ -14,6 +14,8 @@ from vllm import envs
from
vllm.engine.async_llm_engine
import
AsyncEngineDeadError
from
vllm.engine.multiprocessing
import
MQEngineDeadError
from
vllm.engine.protocol
import
EngineClient
from
vllm.entrypoints.constants
import
(
H11_MAX_HEADER_COUNT_DEFAULT
,
H11_MAX_INCOMPLETE_EVENT_SIZE_DEFAULT
)
from
vllm.entrypoints.ssl
import
SSLCertRefresher
from
vllm.logger
import
init_logger
from
vllm.utils
import
find_process_using_port
...
...
@@ -26,6 +28,11 @@ async def serve_http(app: FastAPI,
sock
:
Optional
[
socket
.
socket
],
enable_ssl_refresh
:
bool
=
False
,
**
uvicorn_kwargs
:
Any
):
"""
Start a FastAPI app using Uvicorn, with support for custom Uvicorn config
options. Supports http header limits via h11_max_incomplete_event_size and
h11_max_header_count.
"""
logger
.
info
(
"Available routes are:"
)
for
route
in
app
.
routes
:
methods
=
getattr
(
route
,
"methods"
,
None
)
...
...
@@ -36,7 +43,21 @@ async def serve_http(app: FastAPI,
logger
.
info
(
"Route: %s, Methods: %s"
,
path
,
', '
.
join
(
methods
))
# Extract header limit options if present
h11_max_incomplete_event_size
=
uvicorn_kwargs
.
pop
(
"h11_max_incomplete_event_size"
,
None
)
h11_max_header_count
=
uvicorn_kwargs
.
pop
(
"h11_max_header_count"
,
None
)
# Set safe defaults if not provided
if
h11_max_incomplete_event_size
is
None
:
h11_max_incomplete_event_size
=
H11_MAX_INCOMPLETE_EVENT_SIZE_DEFAULT
if
h11_max_header_count
is
None
:
h11_max_header_count
=
H11_MAX_HEADER_COUNT_DEFAULT
config
=
uvicorn
.
Config
(
app
,
**
uvicorn_kwargs
)
# Set header limits
config
.
h11_max_incomplete_event_size
=
h11_max_incomplete_event_size
config
.
h11_max_header_count
=
h11_max_header_count
config
.
load
()
server
=
uvicorn
.
Server
(
config
)
_add_shutdown_handlers
(
app
,
server
)
...
...
vllm/entrypoints/openai/api_server.py
View file @
7a5df8f7
...
...
@@ -1894,6 +1894,8 @@ async def run_server_worker(listen_address,
ssl_certfile
=
args
.
ssl_certfile
,
ssl_ca_certs
=
args
.
ssl_ca_certs
,
ssl_cert_reqs
=
args
.
ssl_cert_reqs
,
h11_max_incomplete_event_size
=
args
.
h11_max_incomplete_event_size
,
h11_max_header_count
=
args
.
h11_max_header_count
,
**
uvicorn_kwargs
,
)
...
...
vllm/entrypoints/openai/cli_args.py
View file @
7a5df8f7
...
...
@@ -20,6 +20,8 @@ from vllm.config import config
from
vllm.engine.arg_utils
import
AsyncEngineArgs
,
optional_type
from
vllm.entrypoints.chat_utils
import
(
ChatTemplateContentFormatOption
,
validate_chat_template
)
from
vllm.entrypoints.constants
import
(
H11_MAX_HEADER_COUNT_DEFAULT
,
H11_MAX_INCOMPLETE_EVENT_SIZE_DEFAULT
)
from
vllm.entrypoints.openai.serving_models
import
LoRAModulePath
from
vllm.entrypoints.openai.tool_parsers
import
ToolParserManager
from
vllm.logger
import
init_logger
...
...
@@ -172,6 +174,12 @@ schema. Example: `[{"type": "text", "text": "Hello world!"}]`"""
enable_log_outputs
:
bool
=
False
"""If set to True, enable logging of model outputs (generations)
in addition to the input logging that is enabled by default."""
h11_max_incomplete_event_size
:
int
=
H11_MAX_INCOMPLETE_EVENT_SIZE_DEFAULT
"""Maximum size (bytes) of an incomplete HTTP event (header or body) for
h11 parser. Helps mitigate header abuse. Default: 4194304 (4 MB)."""
h11_max_header_count
:
int
=
H11_MAX_HEADER_COUNT_DEFAULT
"""Maximum number of HTTP headers allowed in a request for h11 parser.
Helps mitigate header abuse. Default: 256."""
@
staticmethod
def
add_cli_args
(
parser
:
FlexibleArgumentParser
)
->
FlexibleArgumentParser
:
...
...
vllm/entrypoints/openai/tool_parsers/qwen3coder_tool_parser.py
View file @
7a5df8f7
...
...
@@ -208,15 +208,10 @@ class Qwen3CoderToolParser(ToolParser):
"valid JSON object in tool '%s', will try other "
"methods to parse it."
,
param_value
,
param_name
,
func_name
)
try
:
converted_value
=
eval
(
param_value
)
return
converted_value
except
Exception
:
logger
.
warning
(
"Parsed value '%s' of parameter '%s' cannot be "
"converted via Python `eval()` in tool '%s', "
"degenerating to string."
,
param_value
,
param_name
,
func_name
)
logger
.
warning
(
"Parameter '%s' has unknown type '%s'. "
"The value will be treated as a string."
,
param_name
,
param_type
)
return
param_value
# Extract function name
...
...
vllm/v1/attention/backends/mla/cutlass_mla.py
View file @
7a5df8f7
...
...
@@ -21,7 +21,7 @@ logger = init_logger(__name__)
class
CutlassMLAMetadataBuilder
(
MLACommonMetadataBuilder
[
MLACommonMetadata
]):
# enable full CUDA Graph support for decode-only capture
attn_
cudagraph_support
:
ClassVar
[
cudagraph_support
:
ClassVar
[
AttentionCGSupport
]
=
AttentionCGSupport
.
UNIFORM_SINGLE_TOKEN_DECODE
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment