[Model] Add torch.compile support for InternVL vision encoder (#38049)

Signed-off-by: tianrengao <terrygao87@gmail.com>

[Model] Add torch.compile support for InternVL vision encoder (#38049)
Signed-off-by: tianrengao <terrygao87@gmail.com>
38de8223 · Terry Gao · GitHub · 2bfbdca2 · 38de8223 · 38de8223
Unverified Commit 38de8223 authored Mar 25, 2026 by Terry Gao Committed by GitHub Mar 25, 2026
Hide whitespace changes
Inline Side-by-side

Showing with 20 additions and 3 deletions

vllm/config/utils.py vllm/config/utils.py +9 -1

vllm/model_executor/models/intern_vit.py vllm/model_executor/models/intern_vit.py +11 -2

No files found.
--- a/vllm/config/utils.py
+++ b/vllm/config/utils.py
@@ -296,7 +296,15 @@ def normalize_value(x):
    # PretrainedConfig
    if hasattr(x, "to_json_string") and callable(x.to_json_string):
-        return x.to_json_string()
+        try:
+            return x.to_json_string()
+        except (TypeError, ValueError):
+            # to_json_string() may fail for trust-remote-code configs
+            # with non-JSON-serializable nested objects. Fall back to
+            # normalizing the dict representation recursively.
+            if hasattr(x, "to_dict") and callable(x.to_dict):
+                return normalize_value(x.to_dict())
+            raise
    # Unsupported type: e.g., modules, generators, open files, or objects
    # without a stable JSON/UUID representation. Hard-error to avoid

--- a/vllm/model_executor/models/intern_vit.py
+++ b/vllm/model_executor/models/intern_vit.py
@@ -15,6 +15,10 @@ import torch.nn as nn
 import torch.nn.functional as F
 from transformers import PretrainedConfig
+from vllm.compilation.decorators import (
+    should_torch_compile_mm_encoder,
+    support_torch_compile,
+)
 from vllm.distributed import (
    divide,
    get_tensor_model_parallel_rank,
@@ -280,6 +284,11 @@ class InternMLP(nn.Module):
        return hidden_states
+@support_torch_compile(
+    dynamic_arg_dims={"hidden_states": 0},
+    enable_if=should_torch_compile_mm_encoder,
+    is_encoder=True,
+)
 class InternVisionEncoderLayer(nn.Module):
    def __init__(
        self,
@@ -364,8 +373,8 @@ class InternVisionEncoder(nn.Module):
        self.layers = nn.ModuleList(
            [
                self.layer_cls(
-                    config,
+                    config=config,
-                    quant_config,
+                    quant_config=quant_config,
                    num_dummy_heads=num_dummy_heads,
                    prefix=f"{prefix}.layers.{layer_idx}",
                )