[Bugfix] Fix Qwen3-Reranker-8B load (#28117)

Signed-off-by: wang.yuqi <noooop@126.com>

[Bugfix] Fix Qwen3-Reranker-8B load (#28117)
Signed-off-by: wang.yuqi <noooop@126.com>
802748bd · wang.yuqi · GitHub · faedbb4d · 802748bd
Unverified Commit 802748bd authored Nov 06, 2025 by wang.yuqi Committed by GitHub Nov 05, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 19 additions and 9 deletions

vllm/model_executor/models/adapters.py vllm/model_executor/models/adapters.py +19 -9

No files found.
--- a/vllm/model_executor/models/adapters.py
+++ b/vllm/model_executor/models/adapters.py
@@ -186,15 +186,21 @@ def _create_pooling_model_cls(orig_cls: _T) -> _T:
        def _init_pooler(self, vllm_config: "VllmConfig", prefix: str = ""):
            raise NotImplementedError

-        def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]):
+        def load_weights(
+            self,
+            weights: Iterable[tuple[str, torch.Tensor]],
+            load_lm_head: bool = False,
+        ):
            # TODO: Support uninitialized params tracking

-            # We have deleted this attribute, so don't load it
-            weights = (
-                (name, data)
-                for name, data in weights
-                if not name.startswith("lm_head.")
-            )
+            # For most pooling models: We have deleted this attribute, so don't load it.
+            # For converting an LLM into a seq cls model, we need the lm_head.
+            if not load_lm_head:
+                weights = (
+                    (name, data)
+                    for name, data in weights
+                    if not name.startswith("lm_head.")
+                )

            # If `*ForCausalLM` defines `load_weights` on the inner model
            # and there are no other inner modules with parameters,
@@ -431,8 +437,12 @@ def load_weights_using_from_2_way_softmax(
        )
        model.lm_head = model.lm_head.tie_weights(embed_tokens)

-    # Skip ModelForSequenceClassification in MRO to avoid infinite recursion
-    loaded_weights = type(model).__mro__[1].load_weights(model, weights)
+    # ModelForPooling is dynamically defined inside the _create_pooling_model_cls
+    # function, so we need use this hacky method to obtain it.
+    pooling_model_cls = next(
+        x for x in type(model).__mro__ if x.__name__ == "ModelForPooling"
+    )
+    loaded_weights = pooling_model_cls.load_weights(model, weights, load_lm_head=True)

    from vllm.transformers_utils.tokenizer import get_tokenizer