Unverified Commit fcb73f30 authored by Chenguang Zheng's avatar Chenguang Zheng Committed by GitHub
Browse files

[bugfix] add api process rank in default multimodal request (#36150)


Signed-off-by: default avatarfake0fan <645327136@qq.com>
Signed-off-by: default avatarChenguang ZHENG <645327136@qq.com>
parent e2090bf3
...@@ -59,16 +59,22 @@ class MockModelConfig: ...@@ -59,16 +59,22 @@ class MockModelConfig:
return self.diff_sampling_param or {} return self.diff_sampling_param or {}
@dataclass
class MockParallelConfig:
_api_process_rank: int = 0
@dataclass @dataclass
class MockVllmConfig: class MockVllmConfig:
model_config: MockModelConfig model_config: MockModelConfig
parallel_config: MockParallelConfig
def _build_renderer(model_config: MockModelConfig): def _build_renderer(model_config: MockModelConfig):
_, tokenizer_name, _, kwargs = tokenizer_args_from_config(model_config) _, tokenizer_name, _, kwargs = tokenizer_args_from_config(model_config)
return HfRenderer.from_config( return HfRenderer.from_config(
MockVllmConfig(model_config), MockVllmConfig(model_config, parallel_config=MockParallelConfig()),
tokenizer_kwargs={**kwargs, "tokenizer_name": tokenizer_name}, tokenizer_kwargs={**kwargs, "tokenizer_name": tokenizer_name},
) )
......
...@@ -58,9 +58,15 @@ class MockModelConfig: ...@@ -58,9 +58,15 @@ class MockModelConfig:
return self.diff_sampling_param or {} return self.diff_sampling_param or {}
@dataclass
class MockParallelConfig:
_api_process_rank: int = 0
@dataclass @dataclass
class MockVllmConfig: class MockVllmConfig:
model_config: MockModelConfig model_config: MockModelConfig
parallel_config: MockParallelConfig
def _build_serving_completion(engine: AsyncLLM) -> OpenAIServingCompletion: def _build_serving_completion(engine: AsyncLLM) -> OpenAIServingCompletion:
...@@ -79,7 +85,7 @@ def _build_renderer(model_config: MockModelConfig): ...@@ -79,7 +85,7 @@ def _build_renderer(model_config: MockModelConfig):
_, tokenizer_name, _, kwargs = tokenizer_args_from_config(model_config) _, tokenizer_name, _, kwargs = tokenizer_args_from_config(model_config)
return HfRenderer.from_config( return HfRenderer.from_config(
MockVllmConfig(model_config), MockVllmConfig(model_config, parallel_config=MockParallelConfig()),
tokenizer_kwargs={**kwargs, "tokenizer_name": tokenizer_name}, tokenizer_kwargs={**kwargs, "tokenizer_name": tokenizer_name},
) )
......
...@@ -58,9 +58,15 @@ class MockModelConfig: ...@@ -58,9 +58,15 @@ class MockModelConfig:
return self.diff_sampling_param or {} return self.diff_sampling_param or {}
@dataclass
class MockParallelConfig:
_api_process_rank: int = 0
@dataclass @dataclass
class MockVllmConfig: class MockVllmConfig:
model_config: MockModelConfig model_config: MockModelConfig
parallel_config: MockParallelConfig
class MockLoRAResolver(LoRAResolver): class MockLoRAResolver(LoRAResolver):
...@@ -97,7 +103,7 @@ def _build_renderer(model_config: MockModelConfig): ...@@ -97,7 +103,7 @@ def _build_renderer(model_config: MockModelConfig):
_, tokenizer_name, _, kwargs = tokenizer_args_from_config(model_config) _, tokenizer_name, _, kwargs = tokenizer_args_from_config(model_config)
return HfRenderer.from_config( return HfRenderer.from_config(
MockVllmConfig(model_config), MockVllmConfig(model_config, parallel_config=MockParallelConfig()),
tokenizer_kwargs={**kwargs, "tokenizer_name": tokenizer_name}, tokenizer_kwargs={**kwargs, "tokenizer_name": tokenizer_name},
) )
......
...@@ -537,16 +537,22 @@ class MockModelConfig: ...@@ -537,16 +537,22 @@ class MockModelConfig:
return self.diff_sampling_param or {} return self.diff_sampling_param or {}
@dataclass
class MockParallelConfig:
_api_process_rank: int = 0
@dataclass @dataclass
class MockVllmConfig: class MockVllmConfig:
model_config: MockModelConfig model_config: MockModelConfig
parallel_config: MockParallelConfig
def _build_renderer(model_config: MockModelConfig): def _build_renderer(model_config: MockModelConfig):
_, tokenizer_name, _, kwargs = tokenizer_args_from_config(model_config) _, tokenizer_name, _, kwargs = tokenizer_args_from_config(model_config)
return HfRenderer.from_config( return HfRenderer.from_config(
MockVllmConfig(model_config), MockVllmConfig(model_config, parallel_config=MockParallelConfig()),
tokenizer_kwargs={**kwargs, "tokenizer_name": tokenizer_name}, tokenizer_kwargs={**kwargs, "tokenizer_name": tokenizer_name},
) )
...@@ -797,7 +803,7 @@ async def test_serving_chat_mistral_token_ids_prompt_is_validated(): ...@@ -797,7 +803,7 @@ async def test_serving_chat_mistral_token_ids_prompt_is_validated():
mock_tokenizer = MagicMock(spec=MistralTokenizer) mock_tokenizer = MagicMock(spec=MistralTokenizer)
mock_renderer = MistralRenderer( mock_renderer = MistralRenderer(
MockVllmConfig(mock_engine.model_config), MockVllmConfig(mock_engine.model_config, parallel_config=MockParallelConfig()),
tokenizer=mock_tokenizer, tokenizer=mock_tokenizer,
) )
# Force the Mistral chat template renderer to return token IDs. # Force the Mistral chat template renderer to return token IDs.
...@@ -837,7 +843,7 @@ async def test_serving_chat_mistral_token_ids_prompt_too_long_is_rejected(): ...@@ -837,7 +843,7 @@ async def test_serving_chat_mistral_token_ids_prompt_too_long_is_rejected():
mock_tokenizer = MagicMock(spec=MistralTokenizer) mock_tokenizer = MagicMock(spec=MistralTokenizer)
mock_renderer = MistralRenderer( mock_renderer = MistralRenderer(
MockVllmConfig(mock_engine.model_config), MockVllmConfig(mock_engine.model_config, parallel_config=MockParallelConfig()),
tokenizer=mock_tokenizer, tokenizer=mock_tokenizer,
) )
# prompt_token_ids length == max_model_len should be rejected for # prompt_token_ids length == max_model_len should be rejected for
......
...@@ -41,9 +41,15 @@ class MockModelConfig: ...@@ -41,9 +41,15 @@ class MockModelConfig:
is_multimodal_model: bool = False is_multimodal_model: bool = False
@dataclass
class MockParallelConfig:
_api_process_rank: int = 0
@dataclass @dataclass
class MockVllmConfig: class MockVllmConfig:
model_config: MockModelConfig model_config: MockModelConfig
parallel_config: MockParallelConfig
@dataclass @dataclass
...@@ -78,7 +84,7 @@ def _build_renderer( ...@@ -78,7 +84,7 @@ def _build_renderer(
_, tokenizer_name, _, kwargs = tokenizer_args_from_config(model_config) _, tokenizer_name, _, kwargs = tokenizer_args_from_config(model_config)
renderer = HfRenderer( renderer = HfRenderer(
MockVllmConfig(model_config), MockVllmConfig(model_config, parallel_config=MockParallelConfig()),
tokenizer=( tokenizer=(
None None
if model_config.skip_tokenizer_init if model_config.skip_tokenizer_init
......
...@@ -39,9 +39,15 @@ class MockModelConfig: ...@@ -39,9 +39,15 @@ class MockModelConfig:
is_multimodal_model: bool = False is_multimodal_model: bool = False
@dataclass
class MockParallelConfig:
_api_process_rank: int = 0
@dataclass @dataclass
class MockVllmConfig: class MockVllmConfig:
model_config: MockModelConfig model_config: MockModelConfig
parallel_config: MockParallelConfig
@pytest.mark.asyncio @pytest.mark.asyncio
...@@ -57,7 +63,7 @@ async def test_async_mistral_tokenizer_does_not_block_event_loop(): ...@@ -57,7 +63,7 @@ async def test_async_mistral_tokenizer_does_not_block_event_loop():
mock_tokenizer = Mock(spec=MistralTokenizer) mock_tokenizer = Mock(spec=MistralTokenizer)
mock_tokenizer.apply_chat_template = mocked_apply_chat_template mock_tokenizer.apply_chat_template = mocked_apply_chat_template
mock_renderer = MistralRenderer( mock_renderer = MistralRenderer(
MockVllmConfig(mock_model_config), MockVllmConfig(mock_model_config, parallel_config=MockParallelConfig()),
tokenizer=mock_tokenizer, tokenizer=mock_tokenizer,
) )
......
...@@ -75,6 +75,7 @@ class BaseRenderer(ABC, Generic[_T]): ...@@ -75,6 +75,7 @@ class BaseRenderer(ABC, Generic[_T]):
self.config = config self.config = config
self.model_config = config.model_config self.model_config = config.model_config
self.api_process_rank = config.parallel_config._api_process_rank
self.tokenizer = tokenizer self.tokenizer = tokenizer
...@@ -539,7 +540,7 @@ class BaseRenderer(ABC, Generic[_T]): ...@@ -539,7 +540,7 @@ class BaseRenderer(ABC, Generic[_T]):
from vllm.multimodal.parse import parse_mm_uuids from vllm.multimodal.parse import parse_mm_uuids
from vllm.multimodal.processing import ProcessorInputs as MMProcessorInputs from vllm.multimodal.processing import ProcessorInputs as MMProcessorInputs
mm_req_id = f"renderer-mm-{self._mm_req_counter.inc(1)}" mm_req_id = f"renderer{self.api_process_rank}-mm-{self._mm_req_counter.inc(1)}"
mm_processor = self.get_mm_processor() mm_processor = self.get_mm_processor()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment