[Bugfix][CI/Build] Fix failing Mteb CI (#26638)

Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>

[Bugfix][CI/Build] Fix failing Mteb CI (#26638)
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
045b396d · Isotr0py · GitHub · 76852017 · 045b396d · 045b396d
Unverified Commit 045b396d authored Oct 12, 2025 by Isotr0py Committed by GitHub Oct 12, 2025
5 changed files
--- a/tests/models/language/pooling_mteb_test/mteb_utils.py
+++ b/tests/models/language/pooling_mteb_test/mteb_utils.py
@@ -191,7 +191,7 @@ def mteb_test_embed_models(
    with vllm_runner(
        model_info.name,
        runner="pooling",
-        max_model_len=None,
+        max_model_len=model_info.max_model_len,
        **vllm_extra_kwargs,
    ) as vllm_model:
        model_config = vllm_model.llm.llm_engine.model_config

--- a/tests/models/language/pooling_mteb_test/test_jina.py
+++ b/tests/models/language/pooling_mteb_test/test_jina.py
@@ -25,6 +25,11 @@ EMBEDDING_MODELS = [
        mteb_score=0.824413164,
        architecture="XLMRobertaModel",
        is_matryoshka=True,
+        # The default max length of the model is 8194, which will crash
+        # CUDAGraph due to odd length for Gemm. We set it to 8192 to avoid
+        # avoid this issue.
+        max_model_len=8192,
+        dtype="float32",
    )
 ]


--- a/tests/models/language/pooling_mteb_test/test_st_projector.py
+++ b/tests/models/language/pooling_mteb_test/test_st_projector.py
@@ -23,6 +23,7 @@ ST_PROJECTOR_MODELS = [
        architecture="Gemma3TextModel",
        mteb_score=0.7473819294684156,
        enable_test=True,
+        dtype="float32",
    ),
 ]


--- a/tests/models/utils.py
+++ b/tests/models/utils.py
@@ -369,6 +369,7 @@ class ModelInfo:
    name: str
    architecture: str = ""
    dtype: str = "auto"
+    max_model_len: Optional[int] = None
    hf_dtype: str = "float32"
    hf_overrides: Optional[dict[str, Any]] = None
    default_pooling_type: str = ""

--- a/vllm/model_executor/layers/layernorm.py
+++ b/vllm/model_executor/layers/layernorm.py
@@ -318,7 +318,11 @@ class GemmaRMSNorm(CustomOp):
        """PyTorch-native implementation equivalent to forward()."""
        orig_dtype = x.dtype
        if residual is not None:
-            x = x + residual.float() if orig_dtype == torch.float16 else x + residual
+            x = (
+                x.float() + residual.float()
+                if orig_dtype == torch.float16
+                else x + residual
+            )
            residual = x

        x = x.float()