Unverified Commit 11cec296 authored by Nick Hill's avatar Nick Hill Committed by GitHub
Browse files

[BugFix] Add spec-decode-incompatible request param validation (#31982)


Signed-off-by: default avatarNick Hill <nickhill123@gmail.com>
parent 5825bbc1
...@@ -51,7 +51,6 @@ async def test_run_eagle_dp(monkeypatch: pytest.MonkeyPatch): ...@@ -51,7 +51,6 @@ async def test_run_eagle_dp(monkeypatch: pytest.MonkeyPatch):
# https://github.com/vllm-project/vllm/issues/31913 # https://github.com/vllm-project/vllm/issues/31913
num_expected_tokens = 20 num_expected_tokens = 20
sampling_params = SamplingParams( sampling_params = SamplingParams(
min_tokens=num_expected_tokens,
max_tokens=num_expected_tokens, max_tokens=num_expected_tokens,
ignore_eos=True, ignore_eos=True,
output_kind=RequestOutputKind.FINAL_ONLY, output_kind=RequestOutputKind.FINAL_ONLY,
......
...@@ -19,7 +19,7 @@ from vllm.multimodal.parse import MultiModalDataParser ...@@ -19,7 +19,7 @@ from vllm.multimodal.parse import MultiModalDataParser
from vllm.multimodal.processing import EncDecMultiModalProcessor, set_request_id from vllm.multimodal.processing import EncDecMultiModalProcessor, set_request_id
from vllm.multimodal.utils import argsort_mm_positions from vllm.multimodal.utils import argsort_mm_positions
from vllm.pooling_params import PoolingParams from vllm.pooling_params import PoolingParams
from vllm.sampling_params import SamplingParams from vllm.sampling_params import _SAMPLING_EPS, SamplingParams
from vllm.tokenizers import TokenizerLike from vllm.tokenizers import TokenizerLike
from vllm.tokenizers.mistral import MistralTokenizer from vllm.tokenizers.mistral import MistralTokenizer
from vllm.utils import length_from_prompt_token_ids_or_embeds, random_uuid from vllm.utils import length_from_prompt_token_ids_or_embeds, random_uuid
...@@ -153,8 +153,18 @@ class InputProcessor: ...@@ -153,8 +153,18 @@ class InputProcessor:
# Logits processors not supported. # Logits processors not supported.
if params.logits_processors: if params.logits_processors:
raise ValueError( raise ValueError(
"vLLM V1 does not support per request user provided logits processors." "vLLM V1 does not support per request user-provided logits processors."
) )
# Some sampling parameters are not yet compatible with spec decoding.
if self.vllm_config.speculative_config is not None and (
params.min_tokens > 1 or params.min_p > _SAMPLING_EPS or params.logit_bias
):
raise ValueError(
"The min_tokens, min_p, and logit_bias sampling parameters "
"are not yet supported with speculative decoding."
)
# Async scheduling + spec decode currently incompatible with some # Async scheduling + spec decode currently incompatible with some
# sampling parameters. # sampling parameters.
if ( if (
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment