fix optional and list

ce888aa4 · zhuwenwen · 9ae6a059 · ce888aa4 · ce888aa4
Commit ce888aa4 authored Dec 13, 2025 by zhuwenwen
Hide whitespace changes
Inline Side-by-side

Showing with 3 additions and 3 deletions

vllm/config/model.py vllm/config/model.py +1 -1

vllm/model_executor/layers/quantization/utils/fp8_utils.py vllm/model_executor/layers/quantization/utils/fp8_utils.py +2 -2

No files found.
--- a/vllm/config/model.py
+++ b/vllm/config/model.py
@@ -297,7 +297,7 @@ class ModelConfig:
    definitions"""
    io_processor_plugin: str | None = None
    """IOProcessor plugin name to load at model startup"""
-    enable_chunked_prefill: Optional[bool] = None
+    enable_chunked_prefill: bool | None = None
    """If True, prefill requests can be chunked based
    on the remaining max_num_batched_tokens."""


--- a/vllm/model_executor/layers/quantization/utils/fp8_utils.py
+++ b/vllm/model_executor/layers/quantization/utils/fp8_utils.py
@@ -54,7 +54,7 @@ def cutlass_scaled_mm(
    B: torch.Tensor,
    As: torch.Tensor,
    Bs: torch.Tensor,
-    block_size: List[int],
+    block_size: list[int],
    output_dtype: torch.dtype = torch.float16,
 ) -> torch.Tensor:
    return ops.cutlass_scaled_mm(
@@ -787,7 +787,7 @@ def w8a8_triton_block_scaled_mm(
    B: torch.Tensor,
    As: torch.Tensor,
    Bs: torch.Tensor,
-    block_size: List[int],
+    block_size: list[int],
    output_dtype: torch.dtype = torch.float16,
 ) -> torch.Tensor:
    """This function performs matrix multiplication with block-wise