update List

029da5e8 · zhuwenwen · 09396f62 · 029da5e8
Commit 029da5e8 authored Jul 21, 2025 by zhuwenwen
Hide whitespace changes
Inline Side-by-side

Showing with 8 additions and 8 deletions

vllm/model_executor/layers/quantization/utils/fp8_utils.py vllm/model_executor/layers/quantization/utils/fp8_utils.py +8 -8

No files found.
--- a/vllm/model_executor/layers/quantization/utils/fp8_utils.py
+++ b/vllm/model_executor/layers/quantization/utils/fp8_utils.py
@@ -5,7 +5,7 @@
 import functools
 import json
 import os
-from typing import Any, Callable, Optional, Union
+from typing import Any, Callable, Optional, Union, List
 import torch
@@ -34,7 +34,7 @@ def cutlass_scaled_mm(
    B: torch.Tensor,
    As: torch.Tensor,
    Bs: torch.Tensor,
-    block_size: list[int],
+    block_size: List[int],
    output_dtype: torch.dtype = torch.float16,
 ) -> torch.Tensor:
    return ops.cutlass_scaled_mm(A,
@@ -49,7 +49,7 @@ def rocm_aiter_gemm_w8a8_blockscale_impl(
    B: torch.Tensor,
    As: torch.Tensor,
    Bs: torch.Tensor,
-    block_size: list[int],
+    block_size: List[int],
    output_dtype: torch.dtype = torch.float16,
 ) -> torch.Tensor:
    import aiter as rocm_aiter
@@ -62,7 +62,7 @@ def rocm_aiter_gemm_w8a8_blockscale_fake(
    B: torch.Tensor,
    As: torch.Tensor,
    Bs: torch.Tensor,
-    block_size: list[int],
+    block_size: List[int],
    output_dtype: torch.dtype = torch.float16,
 ) -> torch.Tensor:
@@ -89,7 +89,7 @@ def dispatch_w8a8_blockscale_func(
        torch.Tensor,
        torch.Tensor,
        torch.Tensor,
-        list[int],
+        List[int],
        torch.dtype,
 ], torch.Tensor]:
    if use_cutlass:
@@ -117,7 +117,7 @@ def should_use_deepgemm(output_dtype: torch.dtype, weight: torch.Tensor):
 def apply_w8a8_block_fp8_linear(
    input: torch.Tensor,
    weight: torch.Tensor,
-    block_size: list[int],
+    block_size: List[int],
    weight_scale: torch.Tensor,
    input_scale: Optional[torch.Tensor] = None,
    bias: Optional[torch.Tensor] = None,
@@ -190,7 +190,7 @@ def apply_w8a8_block_fp8_linear(
 def apply_w8a8_block_fp8_linear_fake(
    input: torch.Tensor,
    weight: torch.Tensor,
-    block_size: list[int],
+    block_size: List[int],
    weight_scale: torch.Tensor,
    input_scale: Optional[torch.Tensor] = None,
    bias: Optional[torch.Tensor] = None,
@@ -571,7 +571,7 @@ def w8a8_block_fp8_matmul(
    B: torch.Tensor,
    As: torch.Tensor,
    Bs: torch.Tensor,
-    block_size: list[int],
+    block_size: List[int],
    output_dtype: torch.dtype = torch.float16,
 ) -> torch.Tensor:
    """This function performs matrix multiplication with block-wise