Unverified Commit 79c7e092 authored by Seiji Eicher's avatar Seiji Eicher Committed by GitHub
Browse files

[KV Connector] Add temporary, off-by-default...


[KV Connector] Add temporary, off-by-default `VLLM_DISABLE_REQUEST_ID_RANDOMIZATION` workaround (#34415)
Signed-off-by: default avatarSeiji Eicher <seiji@anyscale.com>
parent 79f3fab0
...@@ -169,6 +169,7 @@ if TYPE_CHECKING: ...@@ -169,6 +169,7 @@ if TYPE_CHECKING:
VLLM_XGRAMMAR_CACHE_MB: int = 0 VLLM_XGRAMMAR_CACHE_MB: int = 0
VLLM_MSGPACK_ZERO_COPY_THRESHOLD: int = 256 VLLM_MSGPACK_ZERO_COPY_THRESHOLD: int = 256
VLLM_ALLOW_INSECURE_SERIALIZATION: bool = False VLLM_ALLOW_INSECURE_SERIALIZATION: bool = False
VLLM_DISABLE_REQUEST_ID_RANDOMIZATION: bool = False
VLLM_NIXL_SIDE_CHANNEL_HOST: str = "localhost" VLLM_NIXL_SIDE_CHANNEL_HOST: str = "localhost"
VLLM_NIXL_SIDE_CHANNEL_PORT: int = 5600 VLLM_NIXL_SIDE_CHANNEL_PORT: int = 5600
VLLM_MOONCAKE_BOOTSTRAP_PORT: int = 8998 VLLM_MOONCAKE_BOOTSTRAP_PORT: int = 8998
...@@ -1236,6 +1237,11 @@ environment_variables: dict[str, Callable[[], Any]] = { ...@@ -1236,6 +1237,11 @@ environment_variables: dict[str, Callable[[], Any]] = {
"VLLM_ALLOW_INSECURE_SERIALIZATION": lambda: bool( "VLLM_ALLOW_INSECURE_SERIALIZATION": lambda: bool(
int(os.getenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "0")) int(os.getenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "0"))
), ),
# Temporary: skip adding random suffix to internal request IDs. May be
# needed for KV connectors that match request IDs across instances.
"VLLM_DISABLE_REQUEST_ID_RANDOMIZATION": lambda: bool(
int(os.getenv("VLLM_DISABLE_REQUEST_ID_RANDOMIZATION", "0"))
),
# IP address used for NIXL handshake between remote agents. # IP address used for NIXL handshake between remote agents.
"VLLM_NIXL_SIDE_CHANNEL_HOST": lambda: os.getenv( "VLLM_NIXL_SIDE_CHANNEL_HOST": lambda: os.getenv(
"VLLM_NIXL_SIDE_CHANNEL_HOST", "localhost" "VLLM_NIXL_SIDE_CHANNEL_HOST", "localhost"
......
...@@ -5,6 +5,7 @@ import time ...@@ -5,6 +5,7 @@ import time
from collections.abc import Mapping from collections.abc import Mapping
from typing import Any, Literal, cast from typing import Any, Literal, cast
import vllm.envs as envs
from vllm.config import VllmConfig from vllm.config import VllmConfig
from vllm.inputs.data import ( from vllm.inputs.data import (
ProcessorInputs, ProcessorInputs,
...@@ -296,6 +297,13 @@ class InputProcessor: ...@@ -296,6 +297,13 @@ class InputProcessor:
" passed to vLLM; use the request_id field." " passed to vLLM; use the request_id field."
) )
request.external_req_id = request.request_id request.external_req_id = request.request_id
if envs.VLLM_DISABLE_REQUEST_ID_RANDOMIZATION:
logger.warning_once(
"VLLM_DISABLE_REQUEST_ID_RANDOMIZATION is set and will be "
"removed in a future release. Duplicate externally-provided "
"request IDs may cause failures and/or subtle correctness errors."
)
else:
request.request_id = f"{request.external_req_id}-{random_uuid():.8}" request.request_id = f"{request.external_req_id}-{random_uuid():.8}"
def process_inputs( def process_inputs(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment