fix moe run error

10184690 · zhuwenwen · 675d3e87 · 10184690 · 10184690
Commit 10184690 authored Jun 23, 2025 by zhuwenwen
2 changed files
--- a/vllm/model_executor/layers/fused_moe/fused_moe.py
+++ b/vllm/model_executor/layers/fused_moe/fused_moe.py
@@ -5,7 +5,7 @@ import functools
 import json
 import os
 import math
-from typing import Any, Callable, Optional, List, Optional, Tuple
+from typing import Any, Callable, Dict, Optional, List, Optional, Tuple
 import torch

--- a/vllm/model_executor/layers/fused_moe/moe_align_block_size.py
+++ b/vllm/model_executor/layers/fused_moe/moe_align_block_size.py
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-from typing import Optional
+from typing import Optional, Tuple
 import torch
@@ -153,9 +153,9 @@ def moe_align_block_size(
    block_size: int,
    num_experts: int,
    expert_map: Optional[torch.Tensor] = None,
-    pad_sorted_ids: bool = False
+    pad_sorted_ids: bool = False,
    num_token: Optional[int] = None
-) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
    """
    Aligns the token distribution across experts to be compatible with block
    size for matrix multiplication.
@@ -253,4 +253,4 @@ def moe_align_block_size(
    if expert_map is not None:
        expert_ids = expert_map[expert_ids]
    return sorted_ids, expert_ids, num_tokens_post_pad
\ No newline at end of file