"...git@developer.sourcefind.cn:2222/OpenDAS/vllm_cscc.git" did not exist on "e1d97c38f8689da0b11da0fac54cc277c237d5c4"
Commit ce888aa4 authored by zhuwenwen's avatar zhuwenwen
Browse files

fix optional and list

parent 9ae6a059
...@@ -297,7 +297,7 @@ class ModelConfig: ...@@ -297,7 +297,7 @@ class ModelConfig:
definitions""" definitions"""
io_processor_plugin: str | None = None io_processor_plugin: str | None = None
"""IOProcessor plugin name to load at model startup""" """IOProcessor plugin name to load at model startup"""
enable_chunked_prefill: Optional[bool] = None enable_chunked_prefill: bool | None = None
"""If True, prefill requests can be chunked based """If True, prefill requests can be chunked based
on the remaining max_num_batched_tokens.""" on the remaining max_num_batched_tokens."""
......
...@@ -54,7 +54,7 @@ def cutlass_scaled_mm( ...@@ -54,7 +54,7 @@ def cutlass_scaled_mm(
B: torch.Tensor, B: torch.Tensor,
As: torch.Tensor, As: torch.Tensor,
Bs: torch.Tensor, Bs: torch.Tensor,
block_size: List[int], block_size: list[int],
output_dtype: torch.dtype = torch.float16, output_dtype: torch.dtype = torch.float16,
) -> torch.Tensor: ) -> torch.Tensor:
return ops.cutlass_scaled_mm( return ops.cutlass_scaled_mm(
...@@ -787,7 +787,7 @@ def w8a8_triton_block_scaled_mm( ...@@ -787,7 +787,7 @@ def w8a8_triton_block_scaled_mm(
B: torch.Tensor, B: torch.Tensor,
As: torch.Tensor, As: torch.Tensor,
Bs: torch.Tensor, Bs: torch.Tensor,
block_size: List[int], block_size: list[int],
output_dtype: torch.dtype = torch.float16, output_dtype: torch.dtype = torch.float16,
) -> torch.Tensor: ) -> torch.Tensor:
"""This function performs matrix multiplication with block-wise """This function performs matrix multiplication with block-wise
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment