Commit a053add9 authored by zhuwenwen's avatar zhuwenwen
Browse files

remove unused code

parent 2b84890b
...@@ -221,8 +221,7 @@ def _support_torch_compile( ...@@ -221,8 +221,7 @@ def _support_torch_compile(
# torch.compiler.is_compiling() means we are inside the compilation # torch.compiler.is_compiling() means we are inside the compilation
# e.g. TPU has the compilation logic in model runner, so we don't # e.g. TPU has the compilation logic in model runner, so we don't
# need to compile the model inside. # need to compile the model inside.
skip_cuda_graphs = get_forward_context().skip_cuda_graphs if envs.VLLM_ENABLE_TBO and get_forward_context().skip_cuda_graphs:
if envs.VLLM_ENABLE_TBO and skip_cuda_graphs:
return self.forward(*args, **kwargs) return self.forward(*args, **kwargs)
if self.do_not_compile or torch.compiler.is_compiling() or get_profilling(): if self.do_not_compile or torch.compiler.is_compiling() or get_profilling():
......
...@@ -437,7 +437,6 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp): ...@@ -437,7 +437,6 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp):
logical_to_physical_map: Optional[torch.Tensor] = None, logical_to_physical_map: Optional[torch.Tensor] = None,
logical_replica_count: Optional[torch.Tensor] = None, logical_replica_count: Optional[torch.Tensor] = None,
use_nn_moe: Optional[bool] = False, use_nn_moe: Optional[bool] = False,
routed_scaling_factor: Optional[float] = None,
use_fused_gate: Optional[bool] = False, use_fused_gate: Optional[bool] = False,
) -> torch.Tensor: ) -> torch.Tensor:
if enable_eplb: if enable_eplb:
...@@ -468,7 +467,6 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp): ...@@ -468,7 +467,6 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp):
logical_to_physical_map=logical_to_physical_map, logical_to_physical_map=logical_to_physical_map,
logical_replica_count=logical_replica_count, logical_replica_count=logical_replica_count,
use_nn_moe=use_nn_moe, use_nn_moe=use_nn_moe,
routed_scaling_factor=routed_scaling_factor,
use_fused_gate=use_fused_gate use_fused_gate=use_fused_gate
) )
...@@ -495,7 +493,6 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp): ...@@ -495,7 +493,6 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp):
logical_to_physical_map: Optional[torch.Tensor] = None, logical_to_physical_map: Optional[torch.Tensor] = None,
logical_replica_count: Optional[torch.Tensor] = None, logical_replica_count: Optional[torch.Tensor] = None,
use_nn_moe: Optional[bool] = False, use_nn_moe: Optional[bool] = False,
routed_scaling_factor: Optional[float] = None,
use_fused_gate: Optional[bool] = False, use_fused_gate: Optional[bool] = False,
) -> torch.Tensor: ) -> torch.Tensor:
...@@ -517,7 +514,6 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp): ...@@ -517,7 +514,6 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp):
expert_load_view=expert_load_view, expert_load_view=expert_load_view,
logical_to_physical_map=logical_to_physical_map, logical_to_physical_map=logical_to_physical_map,
logical_replica_count=logical_replica_count, logical_replica_count=logical_replica_count,
routed_scaling_factor=routed_scaling_factor,
use_fused_gate=use_fused_gate) use_fused_gate=use_fused_gate)
if self.rocm_aiter_moe_enabled: if self.rocm_aiter_moe_enabled:
...@@ -588,7 +584,6 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp): ...@@ -588,7 +584,6 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp):
logical_to_physical_map: Optional[torch.Tensor] = None, logical_to_physical_map: Optional[torch.Tensor] = None,
logical_replica_count: Optional[torch.Tensor] = None, logical_replica_count: Optional[torch.Tensor] = None,
use_nn_moe: Optional[bool] = False, use_nn_moe: Optional[bool] = False,
routed_scaling_factor: Optional[float] = None,
use_fused_gate: Optional[bool] = False, use_fused_gate: Optional[bool] = False,
): ):
if enable_eplb is not False or expert_load_view is not None or \ if enable_eplb is not False or expert_load_view is not None or \
...@@ -828,7 +823,6 @@ class FusedMoE(CustomOp): ...@@ -828,7 +823,6 @@ class FusedMoE(CustomOp):
enable_eplb: bool = False, enable_eplb: bool = False,
num_redundant_experts: int = 0, num_redundant_experts: int = 0,
has_bias: bool = False, has_bias: bool = False,
routed_scaling_factor: Optional[float] = None,
): ):
super().__init__() super().__init__()
if params_dtype is None: if params_dtype is None:
...@@ -909,7 +903,6 @@ class FusedMoE(CustomOp): ...@@ -909,7 +903,6 @@ class FusedMoE(CustomOp):
self.e_score_correction_bias = e_score_correction_bias self.e_score_correction_bias = e_score_correction_bias
self.apply_router_weight_on_input = apply_router_weight_on_input self.apply_router_weight_on_input = apply_router_weight_on_input
self.activation = activation self.activation = activation
self.routed_scaling_factor = routed_scaling_factor
if self.scoring_func != "softmax" and not self.use_grouped_topk: if self.scoring_func != "softmax" and not self.use_grouped_topk:
raise ValueError("Only softmax scoring function is supported for " raise ValueError("Only softmax scoring function is supported for "
...@@ -1519,7 +1512,6 @@ class FusedMoE(CustomOp): ...@@ -1519,7 +1512,6 @@ class FusedMoE(CustomOp):
expert_load_view: Optional[torch.Tensor] = None, expert_load_view: Optional[torch.Tensor] = None,
logical_to_physical_map: Optional[torch.Tensor] = None, logical_to_physical_map: Optional[torch.Tensor] = None,
logical_replica_count: Optional[torch.Tensor] = None, logical_replica_count: Optional[torch.Tensor] = None,
routed_scaling_factor: Optional[float] = None,
use_fused_gate: Optional[bool] = False use_fused_gate: Optional[bool] = False
) -> tuple[torch.Tensor, torch.Tensor]: ) -> tuple[torch.Tensor, torch.Tensor]:
""" """
...@@ -1827,7 +1819,6 @@ class FusedMoE(CustomOp): ...@@ -1827,7 +1819,6 @@ class FusedMoE(CustomOp):
logical_to_physical_map=self.logical_to_physical_map, logical_to_physical_map=self.logical_to_physical_map,
logical_replica_count=self.logical_replica_count, logical_replica_count=self.logical_replica_count,
use_nn_moe=self.use_nn_moe, use_nn_moe=self.use_nn_moe,
routed_scaling_factor=self.routed_scaling_factor,
use_fused_gate=self.use_fused_gate use_fused_gate=self.use_fused_gate
) )
......
...@@ -38,11 +38,9 @@ from vllm.model_executor.layers.layernorm import RMSNorm ...@@ -38,11 +38,9 @@ from vllm.model_executor.layers.layernorm import RMSNorm
from vllm.model_executor.layers.linear import (QKVParallelLinear, from vllm.model_executor.layers.linear import (QKVParallelLinear,
RowParallelLinear) RowParallelLinear)
from vllm.model_executor.layers.logits_processor import LogitsProcessor from vllm.model_executor.layers.logits_processor import LogitsProcessor
from vllm.model_executor.layers.pooler import Pooler, PoolingType
from vllm.model_executor.layers.quantization import QuantizationConfig from vllm.model_executor.layers.quantization import QuantizationConfig
from vllm.model_executor.layers.rotary_embedding import get_rope from vllm.model_executor.layers.rotary_embedding import get_rope
from vllm.model_executor.layers.vocab_parallel_embedding import ParallelLMHead from vllm.model_executor.layers.vocab_parallel_embedding import ParallelLMHead
from vllm.model_executor.pooling_metadata import PoolingMetadata
from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.model_executor.sampling_metadata import SamplingMetadata
from vllm.sequence import IntermediateTensors, PoolerOutput from vllm.sequence import IntermediateTensors, PoolerOutput
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment