Unverified Commit 0a05ed57 authored by Harry Mellor's avatar Harry Mellor Committed by GitHub
Browse files

Simplify `TokenizerGroup` (#16790)


Signed-off-by: default avatarHarry Mellor <19981378+hmellor@users.noreply.github.com>
parent 14288d13
...@@ -8,7 +8,7 @@ from typing import Optional, Union ...@@ -8,7 +8,7 @@ from typing import Optional, Union
from vllm.outputs import CompletionOutput, RequestOutput from vllm.outputs import CompletionOutput, RequestOutput
from vllm.sampling_params import RequestOutputKind from vllm.sampling_params import RequestOutputKind
from vllm.transformers_utils.tokenizer import AnyTokenizer from vllm.transformers_utils.tokenizer import AnyTokenizer
from vllm.transformers_utils.tokenizer_group import BaseTokenizerGroup from vllm.transformers_utils.tokenizer_group import TokenizerGroup
from vllm.v1.engine import EngineCoreOutput, EngineCoreRequest, FinishReason from vllm.v1.engine import EngineCoreOutput, EngineCoreRequest, FinishReason
from vllm.v1.engine.detokenizer import IncrementalDetokenizer from vllm.v1.engine.detokenizer import IncrementalDetokenizer
from vllm.v1.engine.logprobs import LogprobsProcessor from vllm.v1.engine.logprobs import LogprobsProcessor
...@@ -225,7 +225,7 @@ class OutputProcessor: ...@@ -225,7 +225,7 @@ class OutputProcessor:
def __init__( def __init__(
self, self,
tokenizer: BaseTokenizerGroup, tokenizer: TokenizerGroup,
log_stats: bool, log_stats: bool,
): ):
self.log_stats = log_stats self.log_stats = log_stats
......
...@@ -17,7 +17,7 @@ from vllm.multimodal.utils import merge_and_sort_multimodal_metadata ...@@ -17,7 +17,7 @@ from vllm.multimodal.utils import merge_and_sort_multimodal_metadata
from vllm.pooling_params import PoolingParams from vllm.pooling_params import PoolingParams
from vllm.prompt_adapter.request import PromptAdapterRequest from vllm.prompt_adapter.request import PromptAdapterRequest
from vllm.sampling_params import SamplingParams from vllm.sampling_params import SamplingParams
from vllm.transformers_utils.tokenizer_group import BaseTokenizerGroup from vllm.transformers_utils.tokenizer_group import TokenizerGroup
from vllm.v1.engine import EngineCoreRequest from vllm.v1.engine import EngineCoreRequest
from vllm.v1.engine.mm_input_cache import MirroredProcessingCache from vllm.v1.engine.mm_input_cache import MirroredProcessingCache
from vllm.v1.structured_output.backend_guidance import ( from vllm.v1.structured_output.backend_guidance import (
...@@ -31,7 +31,7 @@ class Processor: ...@@ -31,7 +31,7 @@ class Processor:
def __init__( def __init__(
self, self,
vllm_config: VllmConfig, vllm_config: VllmConfig,
tokenizer: BaseTokenizerGroup, tokenizer: TokenizerGroup,
mm_registry: MultiModalRegistry = MULTIMODAL_REGISTRY, mm_registry: MultiModalRegistry = MULTIMODAL_REGISTRY,
): ):
......
...@@ -61,9 +61,7 @@ class GuidanceBackend(StructuredOutputBackend): ...@@ -61,9 +61,7 @@ class GuidanceBackend(StructuredOutputBackend):
tokenizer_group = init_tokenizer_from_configs( tokenizer_group = init_tokenizer_from_configs(
model_config=vllm_config.model_config, model_config=vllm_config.model_config,
scheduler_config=vllm_config.scheduler_config, scheduler_config=vllm_config.scheduler_config,
parallel_config=vllm_config.parallel_config,
lora_config=vllm_config.lora_config) # type: ignore[arg-type] lora_config=vllm_config.lora_config) # type: ignore[arg-type]
tokenizer_group.ping()
self.vllm_config = vllm_config self.vllm_config = vllm_config
self.vocab_size = vllm_config.model_config.get_vocab_size() self.vocab_size = vllm_config.model_config.get_vocab_size()
......
...@@ -35,9 +35,7 @@ class XgrammarBackend(StructuredOutputBackend): ...@@ -35,9 +35,7 @@ class XgrammarBackend(StructuredOutputBackend):
tokenizer_group = init_tokenizer_from_configs( tokenizer_group = init_tokenizer_from_configs(
model_config=vllm_config.model_config, model_config=vllm_config.model_config,
scheduler_config=vllm_config.scheduler_config, scheduler_config=vllm_config.scheduler_config,
parallel_config=vllm_config.parallel_config,
lora_config=vllm_config.lora_config) # type: ignore[arg-type] lora_config=vllm_config.lora_config) # type: ignore[arg-type]
tokenizer_group.ping()
self.disable_any_whitespace = False self.disable_any_whitespace = False
backend_options = GuidedDecodingParams( backend_options = GuidedDecodingParams(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment