Fix vLLM x torch.compile config caching (#16491)

Signed-off-by: rzou <zou3519@gmail.com>

Fix vLLM x torch.compile config caching (#16491)
Signed-off-by: rzou <zou3519@gmail.com>
b590adfd · Richard Zou · GitHub · b4fe16c7 · b590adfd
Unverified Commit b590adfd authored Apr 15, 2025 by Richard Zou Committed by GitHub Apr 14, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 9 additions and 3 deletions

vllm/config.py vllm/config.py +9 -3

No files found.
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -298,12 +298,18 @@ class ModelConfig:
        factors.append(self.quantization)
        factors.append(self.revision)
        factors.append(self.code_revision)
+        factors.append(self.max_model_len)
+        factors.append(self.max_logprobs)
+        factors.append(self.disable_sliding_window)
        factors.append(self.trust_remote_code)
+        factors.append(self.mm_processor_kwargs)
+        factors.append(self.generation_config)
+        factors.append(self.model_impl)
+        factors.append(self.override_generation_config)
        factors.append(self.rope_scaling)
        factors.append(self.rope_theta)
-        # rope cos/sin cache depends on the max_position_embeddings
+        # hf_config can control how the model looks!
-        factors.append(
+        factors.append(self.hf_config.to_json_string())
-            getattr(self.hf_config, "max_position_embeddings", "None"))
        return hashlib.sha256(str(factors).encode()).hexdigest()
    def __init__(