[Fix] Remove unused packing_position_embedding from PaddleOCRVL for better...

[Fix] Remove unused packing_position_embedding from PaddleOCRVL for better checkpoint compatibility (#38232) Signed-off-by: zhangyue66 <zhangyue66@baidu.com>

[Fix] Remove unused packing_position_embedding from PaddleOCRVL for better...
[Fix] Remove unused packing_position_embedding from PaddleOCRVL for better checkpoint compatibility (#38232) Signed-off-by: zhangyue66 <zhangyue66@baidu.com>
0f5b5260 · zhang-prog · GitHub · be1a85b7 · 0f5b5260
Unverified Commit 0f5b5260 authored Mar 26, 2026 by zhang-prog Committed by GitHub Mar 26, 2026
Show whitespace changes
Inline Side-by-side

Showing with 18 additions and 19 deletions

vllm/model_executor/models/paddleocr_vl.py vllm/model_executor/models/paddleocr_vl.py +18 -19

No files found.
--- a/vllm/model_executor/models/paddleocr_vl.py
+++ b/vllm/model_executor/models/paddleocr_vl.py
@@ -409,7 +409,6 @@ class SiglipVisionEmbeddings(nn.Module):
        self.cache_position_embedding = dict()
        self.cache_position_count = dict()
        self.position_embedding = nn.Embedding(self.num_positions, self.embed_dim)
-        self.packing_position_embedding = nn.Embedding(32768, self.embed_dim)
        self.register_buffer(
            "position_ids",
@@ -501,7 +500,6 @@ class SiglipVisionEmbeddings(nn.Module):
            patch_embeds = self.patch_embedding(pixel_values.to(dtype=target_dtype))
            embeddings = patch_embeds.flatten(-2).squeeze(-1)
-            if interpolate_pos_encoding and image_grid_thw is not None:
            start = 0
            tmp_embeddings = list()
            for image_grid in image_grid_thw:
@@ -517,8 +515,7 @@ class SiglipVisionEmbeddings(nn.Module):
                tmp_embeddings.append(image_embeddings)
                start = end
            embeddings = torch.concat(tmp_embeddings, dim=0).unsqueeze(0)
-            else:
-                embeddings = embeddings + self.packing_position_embedding(position_ids)
            return embeddings
        else:
            raise ValueError(
@@ -939,6 +936,8 @@ class SiglipVisionModel(nn.Module):
                continue
            if "head.mlp" in name or "head.probe" in name:
                continue
+            if "packing_position_embedding" in name:
+                continue
            if self.quant_config is not None and (
                scale_name := self.quant_config.get_cache_scale(name)
            ):