"vscode:/vscode.git/clone" did not exist on "a4b438d0c00c07adb8efce46a7d99c5be8e395d7"
Unverified Commit 1e799b7e authored by Lucas Wilkinson's avatar Lucas Wilkinson Committed by GitHub
Browse files

[BugFix] Fix MLA + V1 + TP==1 causing reinitialization of cuda context (#14910)

parent 7f6c5ee0
...@@ -152,7 +152,7 @@ class CudaPlatformBase(Platform): ...@@ -152,7 +152,7 @@ class CudaPlatformBase(Platform):
# here # here
use_flashmla = (envs.VLLM_ATTENTION_BACKEND is None \ use_flashmla = (envs.VLLM_ATTENTION_BACKEND is None \
or envs.VLLM_ATTENTION_BACKEND == "FLASHMLA") or envs.VLLM_ATTENTION_BACKEND == "FLASHMLA")
from vllm.attention.backends.flashmla import is_flashmla_supported from vllm.attention.ops.flashmla import is_flashmla_supported
if use_flashmla and is_flashmla_supported()[0] \ if use_flashmla and is_flashmla_supported()[0] \
and cache_config.block_size != 64: and cache_config.block_size != 64:
cache_config.block_size = 64 cache_config.block_size = 64
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment