Commit 25a9d4b3 authored by 王敏's avatar 王敏
Browse files

[fix]添加VLLM_USE_LIGHTOP_FUSED_TOPP_TOPK控制lightop topp_topk融合算子开关

parent 806ca2be
......@@ -309,6 +309,7 @@ if TYPE_CHECKING:
VLLM_USE_LIGHTOP_FILL_MOE_ALIGN: bool = False
VLLM_USE_LIGHTOP_RMS_ROPE_CONCAT: bool = False
VLLM_USE_CUDA_GRAPH_SIZES: bool = False
VLLM_USE_LIGHTOP_FUSED_TOPP_TOPK: bool = False
def get_default_cache_root():
......@@ -1934,6 +1935,11 @@ environment_variables: dict[str, Callable[[], Any]] = {
"VLLM_USE_CUDA_GRAPH_SIZES":
lambda: (os.getenv("VLLM_USE_CUDA_GRAPH_SIZES", "False").lower() in
("true", "1")),
#If set to 1/True, enable fused topk topk kernel in lightop
"VLLM_USE_LIGHTOP_FUSED_TOPP_TOPK":
lambda: (os.environ.get("VLLM_USE_LIGHTOP_FUSED_TOPP_TOPK", "False").lower() in
("true", "1")),
}
# --8<-- [end:env-vars-definition]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment