Unverified Commit 4bdf7ac5 authored by Cyrus Leung's avatar Cyrus Leung Committed by GitHub
Browse files

[Bugfix] Fix SHM cache initialization (#26427)


Signed-off-by: default avatarDarkLight1337 <tlleungac@connect.ust.hk>
parent dc7976dd
...@@ -49,7 +49,6 @@ from openai.types.responses.response_reasoning_item import ( ...@@ -49,7 +49,6 @@ from openai.types.responses.response_reasoning_item import (
from openai_harmony import Message as OpenAIHarmonyMessage from openai_harmony import Message as OpenAIHarmonyMessage
from vllm import envs from vllm import envs
from vllm.config import ModelConfig
from vllm.engine.protocol import EngineClient from vllm.engine.protocol import EngineClient
from vllm.entrypoints.chat_utils import ( from vllm.entrypoints.chat_utils import (
ChatCompletionMessageParam, ChatCompletionMessageParam,
...@@ -109,7 +108,6 @@ class OpenAIServingResponses(OpenAIServing): ...@@ -109,7 +108,6 @@ class OpenAIServingResponses(OpenAIServing):
def __init__( def __init__(
self, self,
engine_client: EngineClient, engine_client: EngineClient,
model_config: ModelConfig,
models: OpenAIServingModels, models: OpenAIServingModels,
*, *,
request_logger: Optional[RequestLogger], request_logger: Optional[RequestLogger],
...@@ -127,7 +125,6 @@ class OpenAIServingResponses(OpenAIServing): ...@@ -127,7 +125,6 @@ class OpenAIServingResponses(OpenAIServing):
) -> None: ) -> None:
super().__init__( super().__init__(
engine_client=engine_client, engine_client=engine_client,
model_config=model_config,
models=models, models=models,
request_logger=request_logger, request_logger=request_logger,
return_tokens_as_token_ids=return_tokens_as_token_ids, return_tokens_as_token_ids=return_tokens_as_token_ids,
...@@ -176,7 +173,7 @@ class OpenAIServingResponses(OpenAIServing): ...@@ -176,7 +173,7 @@ class OpenAIServingResponses(OpenAIServing):
"the store." "the store."
) )
self.use_harmony = model_config.hf_config.model_type == "gpt_oss" self.use_harmony = self.model_config.hf_config.model_type == "gpt_oss"
if self.use_harmony: if self.use_harmony:
logger.warning( logger.warning(
"For gpt-oss, we ignore --enable-auto-tool-choice " "For gpt-oss, we ignore --enable-auto-tool-choice "
......
...@@ -7,7 +7,6 @@ from typing import Any, Optional, Union ...@@ -7,7 +7,6 @@ from typing import Any, Optional, Union
from fastapi import Request from fastapi import Request
from vllm.config import ModelConfig
from vllm.engine.protocol import EngineClient from vllm.engine.protocol import EngineClient
from vllm.entrypoints.logger import RequestLogger from vllm.entrypoints.logger import RequestLogger
from vllm.entrypoints.openai.protocol import ( from vllm.entrypoints.openai.protocol import (
...@@ -47,7 +46,6 @@ class ServingScores(OpenAIServing): ...@@ -47,7 +46,6 @@ class ServingScores(OpenAIServing):
def __init__( def __init__(
self, self,
engine_client: EngineClient, engine_client: EngineClient,
model_config: ModelConfig,
models: OpenAIServingModels, models: OpenAIServingModels,
*, *,
request_logger: Optional[RequestLogger], request_logger: Optional[RequestLogger],
...@@ -55,7 +53,6 @@ class ServingScores(OpenAIServing): ...@@ -55,7 +53,6 @@ class ServingScores(OpenAIServing):
) -> None: ) -> None:
super().__init__( super().__init__(
engine_client=engine_client, engine_client=engine_client,
model_config=model_config,
models=models, models=models,
request_logger=request_logger, request_logger=request_logger,
log_error_stack=log_error_stack, log_error_stack=log_error_stack,
......
...@@ -6,7 +6,6 @@ from typing import Any, Final, Optional, Union ...@@ -6,7 +6,6 @@ from typing import Any, Final, Optional, Union
import jinja2 import jinja2
from fastapi import Request from fastapi import Request
from vllm.config import ModelConfig
from vllm.engine.protocol import EngineClient from vllm.engine.protocol import EngineClient
from vllm.entrypoints.chat_utils import ChatTemplateContentFormatOption from vllm.entrypoints.chat_utils import ChatTemplateContentFormatOption
from vllm.entrypoints.logger import RequestLogger from vllm.entrypoints.logger import RequestLogger
...@@ -32,7 +31,6 @@ class OpenAIServingTokenization(OpenAIServing): ...@@ -32,7 +31,6 @@ class OpenAIServingTokenization(OpenAIServing):
def __init__( def __init__(
self, self,
engine_client: EngineClient, engine_client: EngineClient,
model_config: ModelConfig,
models: OpenAIServingModels, models: OpenAIServingModels,
*, *,
request_logger: Optional[RequestLogger], request_logger: Optional[RequestLogger],
...@@ -43,7 +41,6 @@ class OpenAIServingTokenization(OpenAIServing): ...@@ -43,7 +41,6 @@ class OpenAIServingTokenization(OpenAIServing):
) -> None: ) -> None:
super().__init__( super().__init__(
engine_client=engine_client, engine_client=engine_client,
model_config=model_config,
models=models, models=models,
request_logger=request_logger, request_logger=request_logger,
log_error_stack=log_error_stack, log_error_stack=log_error_stack,
......
...@@ -5,7 +5,6 @@ from typing import Optional, Union ...@@ -5,7 +5,6 @@ from typing import Optional, Union
from fastapi import Request from fastapi import Request
from vllm.config import ModelConfig
from vllm.engine.protocol import EngineClient from vllm.engine.protocol import EngineClient
from vllm.entrypoints.logger import RequestLogger from vllm.entrypoints.logger import RequestLogger
from vllm.entrypoints.openai.protocol import ( from vllm.entrypoints.openai.protocol import (
...@@ -34,7 +33,6 @@ class OpenAIServingTranscription(OpenAISpeechToText): ...@@ -34,7 +33,6 @@ class OpenAIServingTranscription(OpenAISpeechToText):
def __init__( def __init__(
self, self,
engine_client: EngineClient, engine_client: EngineClient,
model_config: ModelConfig,
models: OpenAIServingModels, models: OpenAIServingModels,
*, *,
request_logger: Optional[RequestLogger], request_logger: Optional[RequestLogger],
...@@ -43,7 +41,6 @@ class OpenAIServingTranscription(OpenAISpeechToText): ...@@ -43,7 +41,6 @@ class OpenAIServingTranscription(OpenAISpeechToText):
): ):
super().__init__( super().__init__(
engine_client=engine_client, engine_client=engine_client,
model_config=model_config,
models=models, models=models,
request_logger=request_logger, request_logger=request_logger,
return_tokens_as_token_ids=return_tokens_as_token_ids, return_tokens_as_token_ids=return_tokens_as_token_ids,
...@@ -95,7 +92,6 @@ class OpenAIServingTranslation(OpenAISpeechToText): ...@@ -95,7 +92,6 @@ class OpenAIServingTranslation(OpenAISpeechToText):
def __init__( def __init__(
self, self,
engine_client: EngineClient, engine_client: EngineClient,
model_config: ModelConfig,
models: OpenAIServingModels, models: OpenAIServingModels,
*, *,
request_logger: Optional[RequestLogger], request_logger: Optional[RequestLogger],
...@@ -104,7 +100,6 @@ class OpenAIServingTranslation(OpenAISpeechToText): ...@@ -104,7 +100,6 @@ class OpenAIServingTranslation(OpenAISpeechToText):
): ):
super().__init__( super().__init__(
engine_client=engine_client, engine_client=engine_client,
model_config=model_config,
models=models, models=models,
request_logger=request_logger, request_logger=request_logger,
return_tokens_as_token_ids=return_tokens_as_token_ids, return_tokens_as_token_ids=return_tokens_as_token_ids,
......
...@@ -12,7 +12,6 @@ import numpy as np ...@@ -12,7 +12,6 @@ import numpy as np
from fastapi import Request from fastapi import Request
import vllm.envs as envs import vllm.envs as envs
from vllm.config import ModelConfig
from vllm.engine.protocol import EngineClient from vllm.engine.protocol import EngineClient
from vllm.entrypoints.logger import RequestLogger from vllm.entrypoints.logger import RequestLogger
from vllm.entrypoints.openai.protocol import ( from vllm.entrypoints.openai.protocol import (
...@@ -53,7 +52,6 @@ class OpenAISpeechToText(OpenAIServing): ...@@ -53,7 +52,6 @@ class OpenAISpeechToText(OpenAIServing):
def __init__( def __init__(
self, self,
engine_client: EngineClient, engine_client: EngineClient,
model_config: ModelConfig,
models: OpenAIServingModels, models: OpenAIServingModels,
*, *,
request_logger: Optional[RequestLogger], request_logger: Optional[RequestLogger],
...@@ -63,7 +61,6 @@ class OpenAISpeechToText(OpenAIServing): ...@@ -63,7 +61,6 @@ class OpenAISpeechToText(OpenAIServing):
): ):
super().__init__( super().__init__(
engine_client=engine_client, engine_client=engine_client,
model_config=model_config,
models=models, models=models,
request_logger=request_logger, request_logger=request_logger,
return_tokens_as_token_ids=return_tokens_as_token_ids, return_tokens_as_token_ids=return_tokens_as_token_ids,
...@@ -74,7 +71,7 @@ class OpenAISpeechToText(OpenAIServing): ...@@ -74,7 +71,7 @@ class OpenAISpeechToText(OpenAIServing):
self.task_type = task_type self.task_type = task_type
self.asr_config = self.model_cls.get_speech_to_text_config( self.asr_config = self.model_cls.get_speech_to_text_config(
model_config, task_type self.model_config, task_type
) )
self.max_audio_filesize_mb = envs.VLLM_MAX_AUDIO_CLIP_FILESIZE_MB self.max_audio_filesize_mb = envs.VLLM_MAX_AUDIO_CLIP_FILESIZE_MB
......
...@@ -20,13 +20,13 @@ class TextPrompt(TypedDict): ...@@ -20,13 +20,13 @@ class TextPrompt(TypedDict):
prompt: str prompt: str
"""The input text to be tokenized before passing to the model.""" """The input text to be tokenized before passing to the model."""
multi_modal_data: NotRequired["MultiModalDataDict"] multi_modal_data: NotRequired[Optional["MultiModalDataDict"]]
""" """
Optional multi-modal data to pass to the model, Optional multi-modal data to pass to the model,
if the model supports it. if the model supports it.
""" """
mm_processor_kwargs: NotRequired[dict[str, Any]] mm_processor_kwargs: NotRequired[Optional[dict[str, Any]]]
""" """
Optional multi-modal processor kwargs to be forwarded to the Optional multi-modal processor kwargs to be forwarded to the
multimodal input mapper & processor. Note that if multiple modalities multimodal input mapper & processor. Note that if multiple modalities
...@@ -61,13 +61,13 @@ class TokensPrompt(TypedDict): ...@@ -61,13 +61,13 @@ class TokensPrompt(TypedDict):
token_type_ids: NotRequired[list[int]] token_type_ids: NotRequired[list[int]]
"""A list of token type IDs to pass to the cross encoder model.""" """A list of token type IDs to pass to the cross encoder model."""
multi_modal_data: NotRequired["MultiModalDataDict"] multi_modal_data: NotRequired[Optional["MultiModalDataDict"]]
""" """
Optional multi-modal data to pass to the model, Optional multi-modal data to pass to the model,
if the model supports it. if the model supports it.
""" """
mm_processor_kwargs: NotRequired[dict[str, Any]] mm_processor_kwargs: NotRequired[Optional[dict[str, Any]]]
""" """
Optional multi-modal processor kwargs to be forwarded to the Optional multi-modal processor kwargs to be forwarded to the
multimodal input mapper & processor. Note that if multiple modalities multimodal input mapper & processor. Note that if multiple modalities
......
...@@ -17,7 +17,7 @@ from vllm.multimodal.inputs import ( ...@@ -17,7 +17,7 @@ from vllm.multimodal.inputs import (
MultiModalUUIDDict, MultiModalUUIDDict,
) )
from vllm.multimodal.processing import BaseMultiModalProcessor from vllm.multimodal.processing import BaseMultiModalProcessor
from vllm.transformers_utils.tokenizer import AnyTokenizer, init_tokenizer_from_configs from vllm.transformers_utils.tokenizer import AnyTokenizer
from vllm.utils.jsontree import json_iter_leaves from vllm.utils.jsontree import json_iter_leaves
from .data import ( from .data import (
...@@ -45,20 +45,17 @@ class InputPreprocessor: ...@@ -45,20 +45,17 @@ class InputPreprocessor:
def __init__( def __init__(
self, self,
model_config: ModelConfig, model_config: ModelConfig,
tokenizer: Optional[AnyTokenizer],
mm_registry: MultiModalRegistry = MULTIMODAL_REGISTRY, mm_registry: MultiModalRegistry = MULTIMODAL_REGISTRY,
mm_processor_cache: Optional[BaseMultiModalProcessorCache] = None, mm_processor_cache: Optional[BaseMultiModalProcessorCache] = None,
) -> None: ) -> None:
super().__init__() super().__init__()
self.model_config = model_config self.model_config = model_config
self.tokenizer = tokenizer
self.mm_registry = mm_registry self.mm_registry = mm_registry
self.mm_processor_cache = mm_processor_cache self.mm_processor_cache = mm_processor_cache
if model_config.skip_tokenizer_init:
self.tokenizer = None
else:
self.tokenizer = init_tokenizer_from_configs(model_config)
def get_tokenizer(self) -> AnyTokenizer: def get_tokenizer(self) -> AnyTokenizer:
if self.tokenizer is None: if self.tokenizer is None:
raise ValueError( raise ValueError(
...@@ -351,8 +348,8 @@ class InputPreprocessor: ...@@ -351,8 +348,8 @@ class InputPreprocessor:
if self.model_config.is_multimodal_model: if self.model_config.is_multimodal_model:
inputs = self._process_multimodal( inputs = self._process_multimodal(
prompt_token_ids, prompt_token_ids,
parsed_content.get("multi_modal_data", {}), parsed_content.get("multi_modal_data") or {},
parsed_content.get("mm_processor_kwargs"), parsed_content.get("mm_processor_kwargs") or {},
tokenization_kwargs=tokenization_kwargs, tokenization_kwargs=tokenization_kwargs,
mm_uuids=mm_uuids, mm_uuids=mm_uuids,
) )
...@@ -380,8 +377,8 @@ class InputPreprocessor: ...@@ -380,8 +377,8 @@ class InputPreprocessor:
if self.model_config.is_multimodal_model: if self.model_config.is_multimodal_model:
inputs = self._process_multimodal( inputs = self._process_multimodal(
prompt_text, prompt_text,
parsed_content.get("multi_modal_data", {}), parsed_content.get("multi_modal_data") or {},
parsed_content.get("mm_processor_kwargs"), parsed_content.get("mm_processor_kwargs") or {},
tokenization_kwargs=tokenization_kwargs, tokenization_kwargs=tokenization_kwargs,
mm_uuids=mm_uuids, mm_uuids=mm_uuids,
) )
......
...@@ -12,23 +12,23 @@ import numpy as np ...@@ -12,23 +12,23 @@ import numpy as np
import torch import torch
import vllm.envs as envs import vllm.envs as envs
from vllm.config import ModelConfig, VllmConfig from vllm.config import VllmConfig
from vllm.engine.arg_utils import AsyncEngineArgs from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.engine.protocol import EngineClient from vllm.engine.protocol import EngineClient
from vllm.entrypoints.utils import _validate_truncation_size from vllm.entrypoints.utils import _validate_truncation_size
from vllm.envs import VLLM_V1_OUTPUT_PROC_CHUNK_SIZE from vllm.envs import VLLM_V1_OUTPUT_PROC_CHUNK_SIZE
from vllm.inputs import PromptType from vllm.inputs import PromptType
from vllm.inputs.preprocess import InputPreprocessor
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.lora.request import LoRARequest from vllm.lora.request import LoRARequest
from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalRegistry from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalRegistry
from vllm.outputs import PoolingRequestOutput, RequestOutput from vllm.outputs import PoolingRequestOutput, RequestOutput
from vllm.plugins.io_processors import get_io_processor
from vllm.pooling_params import PoolingParams from vllm.pooling_params import PoolingParams
from vllm.sampling_params import SamplingParams from vllm.sampling_params import SamplingParams
from vllm.tasks import SupportedTask from vllm.tasks import SupportedTask
from vllm.tracing import init_tracer from vllm.tracing import init_tracer
from vllm.transformers_utils.config import maybe_register_config_serialize_by_value from vllm.transformers_utils.config import maybe_register_config_serialize_by_value
from vllm.transformers_utils.tokenizer import AnyTokenizer from vllm.transformers_utils.tokenizer import AnyTokenizer, init_tokenizer_from_configs
from vllm.usage.usage_lib import UsageContext from vllm.usage.usage_lib import UsageContext
from vllm.utils import Device, as_list, cancel_task_threadsafe, cdiv, deprecate_kwargs from vllm.utils import Device, as_list, cancel_task_threadsafe, cdiv, deprecate_kwargs
from vllm.v1.engine import EngineCoreRequest from vllm.v1.engine import EngineCoreRequest
...@@ -104,8 +104,16 @@ class AsyncLLM(EngineClient): ...@@ -104,8 +104,16 @@ class AsyncLLM(EngineClient):
"logger list; enabling logging without default stat loggers" "logger list; enabling logging without default stat loggers"
) )
# Processor (converts Inputs --> EngineCoreRequests). if self.model_config.skip_tokenizer_init:
self.processor = Processor(vllm_config, mm_registry=mm_registry) tokenizer = None
else:
tokenizer = init_tokenizer_from_configs(self.model_config)
self.processor = Processor(self.vllm_config, tokenizer)
self.io_processor = get_io_processor(
self.vllm_config,
self.model_config.io_processor_plugin,
)
# OutputProcessor (converts EngineCoreOutputs --> RequestOutput). # OutputProcessor (converts EngineCoreOutputs --> RequestOutput).
self.output_processor = OutputProcessor( self.output_processor = OutputProcessor(
...@@ -245,10 +253,6 @@ class AsyncLLM(EngineClient): ...@@ -245,10 +253,6 @@ class AsyncLLM(EngineClient):
cancel_task_threadsafe(getattr(self, "output_handler", None)) cancel_task_threadsafe(getattr(self, "output_handler", None))
@property
def tokenizer(self) -> Optional[AnyTokenizer]:
return self.processor.tokenizer
async def get_supported_tasks(self) -> tuple[SupportedTask, ...]: async def get_supported_tasks(self) -> tuple[SupportedTask, ...]:
return await self.engine_core.get_supported_tasks_async() return await self.engine_core.get_supported_tasks_async()
...@@ -615,14 +619,13 @@ class AsyncLLM(EngineClient): ...@@ -615,14 +619,13 @@ class AsyncLLM(EngineClient):
logger.info("Request %s failed.", request_id) logger.info("Request %s failed.", request_id)
raise EngineGenerateError() from e raise EngineGenerateError() from e
async def get_vllm_config(self) -> VllmConfig: @property
return self.vllm_config def tokenizer(self) -> Optional[AnyTokenizer]:
return self.processor.tokenizer
async def get_model_config(self) -> ModelConfig:
return self.model_config
async def get_input_preprocessor(self) -> InputPreprocessor: @tokenizer.setter
return self.processor.input_preprocessor def tokenizer(self, tokenizer: Optional[AnyTokenizer]) -> None:
self.processor.tokenizer = tokenizer
async def get_tokenizer(self) -> AnyTokenizer: async def get_tokenizer(self) -> AnyTokenizer:
if self.tokenizer is None: if self.tokenizer is None:
......
...@@ -19,11 +19,12 @@ from vllm.logger import init_logger ...@@ -19,11 +19,12 @@ from vllm.logger import init_logger
from vllm.lora.request import LoRARequest from vllm.lora.request import LoRARequest
from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalRegistry from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalRegistry
from vllm.outputs import PoolingRequestOutput, RequestOutput from vllm.outputs import PoolingRequestOutput, RequestOutput
from vllm.plugins.io_processors import get_io_processor
from vllm.pooling_params import PoolingParams from vllm.pooling_params import PoolingParams
from vllm.sampling_params import SamplingParams from vllm.sampling_params import SamplingParams
from vllm.tasks import SupportedTask from vllm.tasks import SupportedTask
from vllm.tracing import init_tracer from vllm.tracing import init_tracer
from vllm.transformers_utils.tokenizer import AnyTokenizer from vllm.transformers_utils.tokenizer import AnyTokenizer, init_tokenizer_from_configs
from vllm.usage.usage_lib import UsageContext from vllm.usage.usage_lib import UsageContext
from vllm.utils import Device from vllm.utils import Device
from vllm.v1.engine import EngineCoreRequest from vllm.v1.engine import EngineCoreRequest
...@@ -95,8 +96,16 @@ class LLMEngine: ...@@ -95,8 +96,16 @@ class LLMEngine:
self.dp_group = None self.dp_group = None
self.should_execute_dummy_batch = False self.should_execute_dummy_batch = False
# Processor (convert Inputs --> EngineCoreRequests) if self.model_config.skip_tokenizer_init:
self.processor = Processor(vllm_config, mm_registry=mm_registry) tokenizer = None
else:
tokenizer = init_tokenizer_from_configs(self.model_config)
self.processor = Processor(self.vllm_config, tokenizer)
self.io_processor = get_io_processor(
self.vllm_config,
self.model_config.io_processor_plugin,
)
# OutputProcessor (convert EngineCoreOutputs --> RequestOutput). # OutputProcessor (convert EngineCoreOutputs --> RequestOutput).
self.output_processor = OutputProcessor( self.output_processor = OutputProcessor(
...@@ -204,14 +213,6 @@ class LLMEngine: ...@@ -204,14 +213,6 @@ class LLMEngine:
def validate_outputs(cls, outputs, output_type): def validate_outputs(cls, outputs, output_type):
return outputs return outputs
@property
def tokenizer(self) -> Optional[AnyTokenizer]:
return self.processor.tokenizer
@tokenizer.setter
def tokenizer(self, tokenizer: Optional[AnyTokenizer]) -> None:
self.processor.tokenizer = tokenizer
def get_supported_tasks(self) -> tuple[SupportedTask, ...]: def get_supported_tasks(self) -> tuple[SupportedTask, ...]:
return self.engine_core.get_supported_tasks() return self.engine_core.get_supported_tasks()
...@@ -313,12 +314,6 @@ class LLMEngine: ...@@ -313,12 +314,6 @@ class LLMEngine:
return processed_outputs.request_outputs return processed_outputs.request_outputs
def get_vllm_config(self):
return self.vllm_config
def get_model_config(self):
return self.model_config
def start_profile(self): def start_profile(self):
self.engine_core.profile(True) self.engine_core.profile(True)
...@@ -345,6 +340,14 @@ class LLMEngine: ...@@ -345,6 +340,14 @@ class LLMEngine:
assert self.log_stats, "Stat logging disabled" assert self.log_stats, "Stat logging disabled"
return get_metrics_snapshot() return get_metrics_snapshot()
@property
def tokenizer(self) -> Optional[AnyTokenizer]:
return self.processor.tokenizer
@tokenizer.setter
def tokenizer(self, tokenizer: Optional[AnyTokenizer]) -> None:
self.processor.tokenizer = tokenizer
def get_tokenizer(self) -> AnyTokenizer: def get_tokenizer(self) -> AnyTokenizer:
if self.tokenizer is None: if self.tokenizer is None:
raise ValueError( raise ValueError(
......
...@@ -37,6 +37,7 @@ class Processor: ...@@ -37,6 +37,7 @@ class Processor:
def __init__( def __init__(
self, self,
vllm_config: VllmConfig, vllm_config: VllmConfig,
tokenizer: Optional[AnyTokenizer],
mm_registry: MultiModalRegistry = MULTIMODAL_REGISTRY, mm_registry: MultiModalRegistry = MULTIMODAL_REGISTRY,
) -> None: ) -> None:
self.vllm_config = vllm_config self.vllm_config = vllm_config
...@@ -52,6 +53,7 @@ class Processor: ...@@ -52,6 +53,7 @@ class Processor:
self.input_preprocessor = InputPreprocessor( self.input_preprocessor = InputPreprocessor(
self.model_config, self.model_config,
tokenizer,
mm_registry, mm_registry,
mm_processor_cache=self.mm_processor_cache, mm_processor_cache=self.mm_processor_cache,
) )
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment