Commit b8ef3436 authored by zhuwenwen's avatar zhuwenwen
Browse files

fix optional error

parent cffe15ef
......@@ -237,7 +237,7 @@ if TYPE_CHECKING:
VLLM_COMPILE_CACHE_SAVE_FORMAT: Literal["binary", "unpacked"] = "binary"
VLLM_USE_V2_MODEL_RUNNER: bool = False
# add envs
VLLM_OPTEST_URLS_PORT: Optional[int] = None
VLLM_OPTEST_URLS_PORT: int | None = None
VLLM_OPTEST_MODELS_PATH: str = ""
VLLM_USE_TRITON_PREFIX_FLASH_ATTN: bool = False
VLLM_USE_FLASH_MLA: bool = False
......@@ -248,7 +248,7 @@ if TYPE_CHECKING:
VLLM_SPEC_DECODE_EAGER: bool = False
VLLM_PCIE_USE_CUSTOM_ALLREDUCE: bool = False
VLLM_CUSTOM_ALLREDUCE_SUPPORTED_WORLDSIZE_MAX: int = 16
VLLM_ENFORCE_EAGER_BS_THRESHOLD: Optional[int] = None
VLLM_ENFORCE_EAGER_BS_THRESHOLD: int | None = None
VLLM_HAS_CONTEXT_DEFAULT: bool = False
VLLM_USE_NN: bool = False
VLLM_ENABLE_TBO: bool = False
......
......@@ -1208,7 +1208,7 @@ def get_moe_configs(
dtype: str | None,
block_n: int | None = None,
block_k: int | None = None,
use_nn_moe: Optional[bool] = False,
use_nn_moe: bool | None = False,
) -> dict[int, Any] | None:
"""
Return optimized configurations for the fused MoE kernel.
......@@ -1365,7 +1365,7 @@ def get_default_config(
topk: int,
dtype: str | None,
block_shape: list[int] | None = None,
use_nn_moe: Optional[bool]=False,
use_nn_moe: bool | None = False,
) -> dict[str, int]:
if vllm_is_batch_invariant():
config = {
......@@ -1434,7 +1434,7 @@ def try_get_optimal_moe_config(
dtype: str | None,
M: int,
block_shape: list[int] | None = None,
use_nn_moe: Optional[bool] = False,
use_nn_moe: bool | None = False,
) -> dict[str, int]:
from vllm.model_executor.layers.fused_moe import get_config
......@@ -1791,7 +1791,7 @@ def inplace_fused_experts(
block_shape: list[int] | None = None,
w1_bias: torch.Tensor | None = None,
w2_bias: torch.Tensor | None = None,
use_nn_moe: Optional[bool] = False,
use_nn_moe: bool | None = False,
) -> None:
fused_experts_impl(
hidden_states,
......@@ -1850,7 +1850,7 @@ def inplace_fused_experts_fake(
block_shape: list[int] | None = None,
w1_bias: torch.Tensor | None = None,
w2_bias: torch.Tensor | None = None,
use_nn_moe: Optional[bool] = False,
use_nn_moe: bool | None = False,
) -> None:
pass
......@@ -1952,7 +1952,7 @@ def outplace_fused_experts_fake(
block_shape: list[int] | None = None,
w1_bias: torch.Tensor | None = None,
w2_bias: torch.Tensor | None = None,
use_nn_moe: Optional[bool] = False,
use_nn_moe: bool | None = False,
) -> torch.Tensor:
return torch.empty_like(hidden_states)
......@@ -2002,7 +2002,7 @@ def fused_experts(
allow_deep_gemm: bool = False,
allow_cutlass_block_scaled_grouped_gemm: bool = False,
use_int4_w4a8: bool = False,
use_nn_moe: Optional[bool] = False,
use_nn_moe: bool | None = False,
) -> torch.Tensor:
if quant_config is None:
quant_config = FUSED_MOE_UNQUANTIZED_CONFIG
......@@ -2145,7 +2145,7 @@ def fused_experts_impl(
block_shape: list[int] | None = None,
w1_bias: torch.Tensor | None = None,
w2_bias: torch.Tensor | None = None,
use_nn_moe: Optional[bool] = False,
use_nn_moe: bool | None = False,
) -> torch.Tensor:
# Check constraints.
num_tokens = hidden_states.size(0)
......
......@@ -249,8 +249,8 @@ class RMSNorm(CustomOp):
def forward_apex(
self,
x: torch.Tensor,
residual: Optional[torch.Tensor] = None,
) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
residual: torch.Tensor | None = None,
) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
if self.variance_size_override is not None:
return self.forward_native(x, residual)
......
......@@ -740,10 +740,10 @@ class MergedColumnParallelLinear(ColumnParallelLinear):
def forward(
self, input_,
rms_weight: Optional[torch.Tensor] = None,
residual: Optional[torch.Tensor] = None,
update_hd: Optional[bool] = True
) -> Union[torch.Tensor, tuple[torch.Tensor, Optional[Parameter]]]:
rms_weight: torch.Tensor | None = None,
residual: torch.Tensor | None = None,
update_hd: bool | None = True
) -> torch.Tensor | tuple[torch.Tensor, Parameter] | None:
if envs.USE_FUSED_RMS_QUANT and rms_weight is not None:
input_quant_args = None
assert residual is not None and rms_weight is not None
......@@ -795,7 +795,7 @@ class MergedColumnParallelLinear(ColumnParallelLinear):
skip_bias_add: bool = False,
params_dtype: torch.dtype | None = None,
quant_config: QuantizationConfig | None = None,
eps: Optional[float] = 1e-6,
eps: float | None = 1e-6,
prefix: str = "",
*,
return_bias: bool = True,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment