Unverified Commit 3b5567a2 authored by Woosuk Kwon's avatar Woosuk Kwon Committed by GitHub
Browse files

[V1][Minor] Do not print attn backend twice (#13985)


Signed-off-by: default avatarWoosuk Kwon <woosuk.kwon@berkeley.edu>
parent fdcc4053
...@@ -178,7 +178,8 @@ class CudaPlatformBase(Platform): ...@@ -178,7 +178,8 @@ class CudaPlatformBase(Platform):
block_size) block_size)
else: else:
if use_v1: if use_v1:
logger.info("Using FlashMLA backend on V1 engine.") logger.info_once(
"Using FlashMLA backend on V1 engine.")
return ("vllm.v1.attention.backends.mla." return ("vllm.v1.attention.backends.mla."
"flashmla.FlashMLABackend") "flashmla.FlashMLABackend")
else: else:
...@@ -187,14 +188,14 @@ class CudaPlatformBase(Platform): ...@@ -187,14 +188,14 @@ class CudaPlatformBase(Platform):
"flashmla.FlashMLABackend") "flashmla.FlashMLABackend")
if use_v1: if use_v1:
logger.info("Using Triton MLA backend on V1 engine.") logger.info_once("Using Triton MLA backend on V1 engine.")
return ("vllm.v1.attention.backends.mla." return ("vllm.v1.attention.backends.mla."
"triton_mla.TritonMLABackend") "triton_mla.TritonMLABackend")
else: else:
logger.info("Using Triton MLA backend.") logger.info("Using Triton MLA backend.")
return "vllm.attention.backends.triton_mla.TritonMLABackend" return "vllm.attention.backends.triton_mla.TritonMLABackend"
if use_v1: if use_v1:
logger.info("Using Flash Attention backend on V1 engine.") logger.info_once("Using Flash Attention backend on V1 engine.")
return ("vllm.v1.attention.backends.flash_attn." return ("vllm.v1.attention.backends.flash_attn."
"FlashAttentionBackend") "FlashAttentionBackend")
if selected_backend == _Backend.FLASHINFER: if selected_backend == _Backend.FLASHINFER:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment