[Refactor] Remove Unused Env `VLLM_ENABLE_MOE_ALIGN_BLOCK_SIZE_TRITON` (#20334)

Signed-off-by: yewentao256 <zhyanwentao@126.com>

[Refactor] Remove Unused Env `VLLM_ENABLE_MOE_ALIGN_BLOCK_SIZE_TRITON` (#20334)
Signed-off-by: yewentao256 <zhyanwentao@126.com>
9dae7d46 · Wentao Ye · GitHub · 7058d7dd · 9dae7d46 · 9dae7d46
Unverified Commit 9dae7d46 authored Jul 01, 2025 by Wentao Ye Committed by GitHub Jul 01, 2025
Show whitespace changes
Inline Side-by-side

Showing with 0 additions and 8 deletions

vllm/envs.py vllm/envs.py +0 -7

vllm/model_executor/layers/fused_moe/moe_align_block_size.py vllm/model_executor/layers/fused_moe/moe_align_block_size.py +0 -1

No files found.
--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -104,7 +104,6 @@ if TYPE_CHECKING:
    VLLM_SERVER_DEV_MODE: bool = False
    VLLM_V1_OUTPUT_PROC_CHUNK_SIZE: int = 128
    VLLM_MLA_DISABLE: bool = False
-    VLLM_ENABLE_MOE_ALIGN_BLOCK_SIZE_TRITON: bool = False
    VLLM_RAY_PER_WORKER_GPUS: float = 1.0
    VLLM_RAY_BUNDLE_INDICES: str = ""
    VLLM_CUDART_SO_PATH: Optional[str] = None
@@ -769,12 +768,6 @@ environment_variables: dict[str, Callable[[], Any]] = {
    "VLLM_MLA_DISABLE":
    lambda: bool(int(os.getenv("VLLM_MLA_DISABLE", "0"))),

-    # If set, vLLM will use the Triton implementation of moe_align_block_size,
-    # i.e. moe_align_block_size_triton in fused_moe.py.
-    "VLLM_ENABLE_MOE_ALIGN_BLOCK_SIZE_TRITON":
-    lambda: bool(int(os.getenv("VLLM_ENABLE_MOE_ALIGN_BLOCK_SIZE_TRITON", "0"))
-                 ),
-
    # Number of GPUs per worker in Ray, if it is set to be a fraction,
    # it allows ray to schedule multiple actors on a single GPU,
    # so that users can colocate other actors on the same GPUs as vLLM.

--- a/vllm/model_executor/layers/fused_moe/moe_align_block_size.py
+++ b/vllm/model_executor/layers/fused_moe/moe_align_block_size.py
@@ -94,7 +94,6 @@ def moe_align_block_size_stage4(

 # Triton implementation based on:
 # https://github.com/sgl-project/sglang/commit/ba5112ff691d791a9e38c6c71f59324a5fcb49d0
-# TODO(wentao): Deprecated this function in the future.
 def moe_align_block_size_triton(
    topk_ids: torch.Tensor,
    num_experts: int,