[Bugfix][V1] Fix FlashInfer V1 backend using the wrong VllmConfig (#18086)

12e6c0b4 · Michael Goin · GitHub · 9a2a6357 · 12e6c0b4
Unverified Commit 12e6c0b4 authored May 13, 2025 by Michael Goin Committed by GitHub May 13, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 2 additions and 3 deletions

vllm/v1/attention/backends/flashinfer.py vllm/v1/attention/backends/flashinfer.py +2 -3

No files found.
--- a/vllm/v1/attention/backends/flashinfer.py
+++ b/vllm/v1/attention/backends/flashinfer.py
@@ -14,8 +14,7 @@ import vllm.envs as envs
 from vllm.attention.backends.abstract import (AttentionBackend, AttentionImpl,
                                              AttentionType)
 from vllm.attention.layer import Attention
-from vllm.config import (VllmConfig, get_current_vllm_config,
+from vllm.config import VllmConfig, get_layers_from_vllm_config
-                         get_layers_from_vllm_config)
 from vllm.logger import init_logger
 from vllm.v1.attention.backends.flash_attn import use_cascade_attention
 from vllm.v1.attention.backends.utils import CommonAttentionMetadata
@@ -215,7 +214,7 @@ class FlashInferMetadataBuilder:
        # Global hyperparameters shared by all attention layers
        self.global_hyperparameters: Optional[PerLayerParameters] = None
-        self.vllm_config = get_current_vllm_config()
+        self.vllm_config = runner.vllm_config
        self.kv_cache_spec = kv_cache_spec
        self.block_table = block_table