Fix DeepSeek-OCR tensor validation for all size variants (#34085)

Co-authored-by: Cursor <cursoragent@cursor.com>

Fix DeepSeek-OCR tensor validation for all size variants (#34085)
Co-authored-by: Cursor <cursoragent@cursor.com>
80f2ba6e · Yichuan Wang · GitHub · 136b0bfa · 80f2ba6e
Unverified Commit 80f2ba6e authored Feb 11, 2026 by Yichuan Wang Committed by GitHub Feb 11, 2026
Show whitespace changes
Inline Side-by-side

Showing with 11 additions and 1 deletion

vllm/model_executor/models/deepseek_ocr.py vllm/model_executor/models/deepseek_ocr.py +11 -1

No files found.
--- a/vllm/model_executor/models/deepseek_ocr.py
+++ b/vllm/model_executor/models/deepseek_ocr.py
@@ -448,7 +448,16 @@ class DeepseekOCRForCausalLM(nn.Module, SupportsMultiModal, SupportsPP, Supports
        if pixel_values is None or torch.sum(pixel_values).item() == 0:
            return None

-        base_size = self.vision_config.image_size
+        # Use actual tensor spatial dim instead of hardcoded
+        # vision_config.image_size (1024). The vision encoders (SAM & CLIP)
+        # support arbitrary resolutions via pos-encoding interpolation,
+        # so Tiny/Small/Base/Large variants all work with the same weights.
+        base_size = pixel_values.shape[-1]
+        if images_crop is not None and images_crop.numel() > 0:
+            image_size = images_crop.shape[-1]
+        else:
+            image_size = base_size
+
        return DeepseekOCRImagePixelInputs(
            type="pixel_values",
            data=pixel_values,
@@ -456,6 +465,7 @@ class DeepseekOCRForCausalLM(nn.Module, SupportsMultiModal, SupportsPP, Supports
            images_spatial_crop=images_spatial_crop,
            resolve_bindings={
                "base_size": base_size,
+                "image_size": image_size,
            },
        )