Commit ce52b8a8 authored by wangmin6's avatar wangmin6
Browse files

Merge branch 'v0.15.1-dev-wm' into 'v0.15.1-dev'

[fix]添加VLLM_USE_LIGHTOP_FUSED_TOPP_TOPK控制lightop topp_topk融合算子开关

See merge request dcutoolkit/deeplearing/vllm!496
parents 3b38e285 25a9d4b3
...@@ -309,6 +309,7 @@ if TYPE_CHECKING: ...@@ -309,6 +309,7 @@ if TYPE_CHECKING:
VLLM_USE_LIGHTOP_FILL_MOE_ALIGN: bool = False VLLM_USE_LIGHTOP_FILL_MOE_ALIGN: bool = False
VLLM_USE_LIGHTOP_RMS_ROPE_CONCAT: bool = False VLLM_USE_LIGHTOP_RMS_ROPE_CONCAT: bool = False
VLLM_USE_CUDA_GRAPH_SIZES: bool = False VLLM_USE_CUDA_GRAPH_SIZES: bool = False
VLLM_USE_LIGHTOP_FUSED_TOPP_TOPK: bool = False
def get_default_cache_root(): def get_default_cache_root():
...@@ -1934,6 +1935,11 @@ environment_variables: dict[str, Callable[[], Any]] = { ...@@ -1934,6 +1935,11 @@ environment_variables: dict[str, Callable[[], Any]] = {
"VLLM_USE_CUDA_GRAPH_SIZES": "VLLM_USE_CUDA_GRAPH_SIZES":
lambda: (os.getenv("VLLM_USE_CUDA_GRAPH_SIZES", "False").lower() in lambda: (os.getenv("VLLM_USE_CUDA_GRAPH_SIZES", "False").lower() in
("true", "1")), ("true", "1")),
#If set to 1/True, enable fused topk topk kernel in lightop
"VLLM_USE_LIGHTOP_FUSED_TOPP_TOPK":
lambda: (os.environ.get("VLLM_USE_LIGHTOP_FUSED_TOPP_TOPK", "False").lower() in
("true", "1")),
} }
# --8<-- [end:env-vars-definition] # --8<-- [end:env-vars-definition]
......
...@@ -94,7 +94,7 @@ class TopKTopPSampler(nn.Module): ...@@ -94,7 +94,7 @@ class TopKTopPSampler(nn.Module):
self.forward = self.forward_native self.forward = self.forward_native
else: else:
self.forward = self.forward_native self.forward = self.forward_native
if HAS_LIGHTOP_OPT_KERNEL: if HAS_LIGHTOP_OPT_KERNEL and envs.VLLM_USE_LIGHTOP_FUSED_TOPP_TOPK:
self.forward = self.forward_lightop_opt self.forward = self.forward_lightop_opt
self.apply_top_k_top_p = apply_top_k_top_p self.apply_top_k_top_p = apply_top_k_top_p
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment