Unverified Commit f961d7f6 authored by Zhengyuan Su (苏政渊)'s avatar Zhengyuan Su (苏政渊) Committed by GitHub
Browse files

[BugFix] Pass in correct VLLM config in FlashInfer backend (#13207) (#16973)


Signed-off-by: default avatar苏政渊 <suzhengyuan@moonshot.cn>
Co-authored-by: default avatar苏政渊 <suzhengyuan@moonshot.cn>
parent d0591104
...@@ -37,7 +37,7 @@ from vllm.attention.backends.utils import (PAD_SLOT_ID, compute_slot_mapping, ...@@ -37,7 +37,7 @@ from vllm.attention.backends.utils import (PAD_SLOT_ID, compute_slot_mapping,
is_block_tables_empty) is_block_tables_empty)
from vllm.attention.layer import Attention from vllm.attention.layer import Attention
from vllm.attention.ops.paged_attn import PagedAttention from vllm.attention.ops.paged_attn import PagedAttention
from vllm.config import VllmConfig, get_current_vllm_config from vllm.config import VllmConfig
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.utils import (async_tensor_h2d, get_kv_cache_torch_dtype, from vllm.utils import (async_tensor_h2d, get_kv_cache_torch_dtype,
make_tensor_with_pad) make_tensor_with_pad)
...@@ -187,7 +187,7 @@ class FlashInferState(AttentionState): ...@@ -187,7 +187,7 @@ class FlashInferState(AttentionState):
# Global hyperparameters shared by all attention layers # Global hyperparameters shared by all attention layers
self.global_hyperparameters: Optional[PerLayerParameters] = None self.global_hyperparameters: Optional[PerLayerParameters] = None
self.vllm_config = get_current_vllm_config() self.vllm_config = self.runner.vllm_config
def _get_workspace_buffer(self): def _get_workspace_buffer(self):
if self._workspace_buffer is None: if self._workspace_buffer is None:
...@@ -613,7 +613,7 @@ class FlashInferMetadataBuilder(AttentionMetadataBuilder[FlashInferMetadata]): ...@@ -613,7 +613,7 @@ class FlashInferMetadataBuilder(AttentionMetadataBuilder[FlashInferMetadata]):
# Global hyperparameters shared by all attention layers # Global hyperparameters shared by all attention layers
self.global_hyperparameters: Optional[PerLayerParameters] = None self.global_hyperparameters: Optional[PerLayerParameters] = None
self.vllm_config = get_current_vllm_config() self.vllm_config = self.runner.vllm_config
def prepare(self): def prepare(self):
self.slot_mapping: List[int] = [] self.slot_mapping: List[int] = []
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment