Unverified Commit f01a5c71 authored by Daniel Socek's avatar Daniel Socek Committed by GitHub
Browse files

fix: vision model loader fixes (#6952)


Signed-off-by: default avatarDaniel Socek <daniel.socek@intel.com>
Co-authored-by: default avatarRyan McCormick <rmccormick@nvidia.com>
parent fca0a801
...@@ -65,7 +65,9 @@ class EncodeWorkerHandler: ...@@ -65,7 +65,9 @@ class EncodeWorkerHandler:
self.image_processor = AutoImageProcessor.from_pretrained( self.image_processor = AutoImageProcessor.from_pretrained(
self.model, trust_remote_code=True self.model, trust_remote_code=True
) )
self.vision_model = load_vision_model(self.model) self.vision_model = load_vision_model(
self.model, enforce_eager=self.engine_args.enforce_eager
)
hidden_size = getattr(self.vision_model, "out_hidden_size", None) hidden_size = getattr(self.vision_model, "out_hidden_size", None)
if hidden_size is None: if hidden_size is None:
hidden_size = getattr( hidden_size = getattr(
......
...@@ -150,7 +150,7 @@ def is_qwen_vl_model(model_name: str) -> bool: ...@@ -150,7 +150,7 @@ def is_qwen_vl_model(model_name: str) -> bool:
) )
def load_vision_model(model_id: str) -> torch.nn.Module: def load_vision_model(model_id: str, enforce_eager: bool = False) -> torch.nn.Module:
""" """
Load a vision model from a HuggingFace model ID. Load a vision model from a HuggingFace model ID.
""" """
...@@ -167,10 +167,10 @@ def load_vision_model(model_id: str) -> torch.nn.Module: ...@@ -167,10 +167,10 @@ def load_vision_model(model_id: str) -> torch.nn.Module:
# Load only the vision model via vLLM # Load only the vision model via vLLM
vllm_model = LLM( vllm_model = LLM(
model=model_id, model=model_id,
enforce_eager=False, enforce_eager=enforce_eager,
kv_cache_memory_bytes=1024 kv_cache_memory_bytes=1024
* 1024 * 1024
* 8, # 8MB KV cache for vLLM to complete the init lifecycle, encoder-only doesn't require KV cache. * 64, # 64MB KV cache for vLLM to complete the init lifecycle, encoder-only doesn't require KV cache.
max_model_len=1, max_model_len=1,
mm_encoder_only=True, mm_encoder_only=True,
enable_prefix_caching=False, enable_prefix_caching=False,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment