Unverified Commit 12e6c0b4 authored by Michael Goin's avatar Michael Goin Committed by GitHub
Browse files

[Bugfix][V1] Fix FlashInfer V1 backend using the wrong VllmConfig (#18086)

parent 9a2a6357
...@@ -14,8 +14,7 @@ import vllm.envs as envs ...@@ -14,8 +14,7 @@ import vllm.envs as envs
from vllm.attention.backends.abstract import (AttentionBackend, AttentionImpl, from vllm.attention.backends.abstract import (AttentionBackend, AttentionImpl,
AttentionType) AttentionType)
from vllm.attention.layer import Attention from vllm.attention.layer import Attention
from vllm.config import (VllmConfig, get_current_vllm_config, from vllm.config import VllmConfig, get_layers_from_vllm_config
get_layers_from_vllm_config)
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.v1.attention.backends.flash_attn import use_cascade_attention from vllm.v1.attention.backends.flash_attn import use_cascade_attention
from vllm.v1.attention.backends.utils import CommonAttentionMetadata from vllm.v1.attention.backends.utils import CommonAttentionMetadata
...@@ -215,7 +214,7 @@ class FlashInferMetadataBuilder: ...@@ -215,7 +214,7 @@ class FlashInferMetadataBuilder:
# Global hyperparameters shared by all attention layers # Global hyperparameters shared by all attention layers
self.global_hyperparameters: Optional[PerLayerParameters] = None self.global_hyperparameters: Optional[PerLayerParameters] = None
self.vllm_config = get_current_vllm_config() self.vllm_config = runner.vllm_config
self.kv_cache_spec = kv_cache_spec self.kv_cache_spec = kv_cache_spec
self.block_table = block_table self.block_table = block_table
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment