Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
a053add9
Commit
a053add9
authored
Sep 11, 2025
by
zhuwenwen
Browse files
remove unused code
parent
2b84890b
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
1 addition
and
13 deletions
+1
-13
vllm/compilation/decorators.py
vllm/compilation/decorators.py
+1
-2
vllm/model_executor/layers/fused_moe/layer.py
vllm/model_executor/layers/fused_moe/layer.py
+0
-9
vllm/model_executor/models/qwen3.py
vllm/model_executor/models/qwen3.py
+0
-2
No files found.
vllm/compilation/decorators.py
View file @
a053add9
...
@@ -221,8 +221,7 @@ def _support_torch_compile(
...
@@ -221,8 +221,7 @@ def _support_torch_compile(
# torch.compiler.is_compiling() means we are inside the compilation
# torch.compiler.is_compiling() means we are inside the compilation
# e.g. TPU has the compilation logic in model runner, so we don't
# e.g. TPU has the compilation logic in model runner, so we don't
# need to compile the model inside.
# need to compile the model inside.
skip_cuda_graphs
=
get_forward_context
().
skip_cuda_graphs
if
envs
.
VLLM_ENABLE_TBO
and
get_forward_context
().
skip_cuda_graphs
:
if
envs
.
VLLM_ENABLE_TBO
and
skip_cuda_graphs
:
return
self
.
forward
(
*
args
,
**
kwargs
)
return
self
.
forward
(
*
args
,
**
kwargs
)
if
self
.
do_not_compile
or
torch
.
compiler
.
is_compiling
()
or
get_profilling
():
if
self
.
do_not_compile
or
torch
.
compiler
.
is_compiling
()
or
get_profilling
():
...
...
vllm/model_executor/layers/fused_moe/layer.py
View file @
a053add9
...
@@ -437,7 +437,6 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp):
...
@@ -437,7 +437,6 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp):
logical_to_physical_map
:
Optional
[
torch
.
Tensor
]
=
None
,
logical_to_physical_map
:
Optional
[
torch
.
Tensor
]
=
None
,
logical_replica_count
:
Optional
[
torch
.
Tensor
]
=
None
,
logical_replica_count
:
Optional
[
torch
.
Tensor
]
=
None
,
use_nn_moe
:
Optional
[
bool
]
=
False
,
use_nn_moe
:
Optional
[
bool
]
=
False
,
routed_scaling_factor
:
Optional
[
float
]
=
None
,
use_fused_gate
:
Optional
[
bool
]
=
False
,
use_fused_gate
:
Optional
[
bool
]
=
False
,
)
->
torch
.
Tensor
:
)
->
torch
.
Tensor
:
if
enable_eplb
:
if
enable_eplb
:
...
@@ -468,7 +467,6 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp):
...
@@ -468,7 +467,6 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp):
logical_to_physical_map
=
logical_to_physical_map
,
logical_to_physical_map
=
logical_to_physical_map
,
logical_replica_count
=
logical_replica_count
,
logical_replica_count
=
logical_replica_count
,
use_nn_moe
=
use_nn_moe
,
use_nn_moe
=
use_nn_moe
,
routed_scaling_factor
=
routed_scaling_factor
,
use_fused_gate
=
use_fused_gate
use_fused_gate
=
use_fused_gate
)
)
...
@@ -495,7 +493,6 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp):
...
@@ -495,7 +493,6 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp):
logical_to_physical_map
:
Optional
[
torch
.
Tensor
]
=
None
,
logical_to_physical_map
:
Optional
[
torch
.
Tensor
]
=
None
,
logical_replica_count
:
Optional
[
torch
.
Tensor
]
=
None
,
logical_replica_count
:
Optional
[
torch
.
Tensor
]
=
None
,
use_nn_moe
:
Optional
[
bool
]
=
False
,
use_nn_moe
:
Optional
[
bool
]
=
False
,
routed_scaling_factor
:
Optional
[
float
]
=
None
,
use_fused_gate
:
Optional
[
bool
]
=
False
,
use_fused_gate
:
Optional
[
bool
]
=
False
,
)
->
torch
.
Tensor
:
)
->
torch
.
Tensor
:
...
@@ -517,7 +514,6 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp):
...
@@ -517,7 +514,6 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp):
expert_load_view
=
expert_load_view
,
expert_load_view
=
expert_load_view
,
logical_to_physical_map
=
logical_to_physical_map
,
logical_to_physical_map
=
logical_to_physical_map
,
logical_replica_count
=
logical_replica_count
,
logical_replica_count
=
logical_replica_count
,
routed_scaling_factor
=
routed_scaling_factor
,
use_fused_gate
=
use_fused_gate
)
use_fused_gate
=
use_fused_gate
)
if
self
.
rocm_aiter_moe_enabled
:
if
self
.
rocm_aiter_moe_enabled
:
...
@@ -588,7 +584,6 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp):
...
@@ -588,7 +584,6 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp):
logical_to_physical_map
:
Optional
[
torch
.
Tensor
]
=
None
,
logical_to_physical_map
:
Optional
[
torch
.
Tensor
]
=
None
,
logical_replica_count
:
Optional
[
torch
.
Tensor
]
=
None
,
logical_replica_count
:
Optional
[
torch
.
Tensor
]
=
None
,
use_nn_moe
:
Optional
[
bool
]
=
False
,
use_nn_moe
:
Optional
[
bool
]
=
False
,
routed_scaling_factor
:
Optional
[
float
]
=
None
,
use_fused_gate
:
Optional
[
bool
]
=
False
,
use_fused_gate
:
Optional
[
bool
]
=
False
,
):
):
if
enable_eplb
is
not
False
or
expert_load_view
is
not
None
or
\
if
enable_eplb
is
not
False
or
expert_load_view
is
not
None
or
\
...
@@ -828,7 +823,6 @@ class FusedMoE(CustomOp):
...
@@ -828,7 +823,6 @@ class FusedMoE(CustomOp):
enable_eplb
:
bool
=
False
,
enable_eplb
:
bool
=
False
,
num_redundant_experts
:
int
=
0
,
num_redundant_experts
:
int
=
0
,
has_bias
:
bool
=
False
,
has_bias
:
bool
=
False
,
routed_scaling_factor
:
Optional
[
float
]
=
None
,
):
):
super
().
__init__
()
super
().
__init__
()
if
params_dtype
is
None
:
if
params_dtype
is
None
:
...
@@ -909,7 +903,6 @@ class FusedMoE(CustomOp):
...
@@ -909,7 +903,6 @@ class FusedMoE(CustomOp):
self
.
e_score_correction_bias
=
e_score_correction_bias
self
.
e_score_correction_bias
=
e_score_correction_bias
self
.
apply_router_weight_on_input
=
apply_router_weight_on_input
self
.
apply_router_weight_on_input
=
apply_router_weight_on_input
self
.
activation
=
activation
self
.
activation
=
activation
self
.
routed_scaling_factor
=
routed_scaling_factor
if
self
.
scoring_func
!=
"softmax"
and
not
self
.
use_grouped_topk
:
if
self
.
scoring_func
!=
"softmax"
and
not
self
.
use_grouped_topk
:
raise
ValueError
(
"Only softmax scoring function is supported for "
raise
ValueError
(
"Only softmax scoring function is supported for "
...
@@ -1519,7 +1512,6 @@ class FusedMoE(CustomOp):
...
@@ -1519,7 +1512,6 @@ class FusedMoE(CustomOp):
expert_load_view
:
Optional
[
torch
.
Tensor
]
=
None
,
expert_load_view
:
Optional
[
torch
.
Tensor
]
=
None
,
logical_to_physical_map
:
Optional
[
torch
.
Tensor
]
=
None
,
logical_to_physical_map
:
Optional
[
torch
.
Tensor
]
=
None
,
logical_replica_count
:
Optional
[
torch
.
Tensor
]
=
None
,
logical_replica_count
:
Optional
[
torch
.
Tensor
]
=
None
,
routed_scaling_factor
:
Optional
[
float
]
=
None
,
use_fused_gate
:
Optional
[
bool
]
=
False
use_fused_gate
:
Optional
[
bool
]
=
False
)
->
tuple
[
torch
.
Tensor
,
torch
.
Tensor
]:
)
->
tuple
[
torch
.
Tensor
,
torch
.
Tensor
]:
"""
"""
...
@@ -1827,7 +1819,6 @@ class FusedMoE(CustomOp):
...
@@ -1827,7 +1819,6 @@ class FusedMoE(CustomOp):
logical_to_physical_map
=
self
.
logical_to_physical_map
,
logical_to_physical_map
=
self
.
logical_to_physical_map
,
logical_replica_count
=
self
.
logical_replica_count
,
logical_replica_count
=
self
.
logical_replica_count
,
use_nn_moe
=
self
.
use_nn_moe
,
use_nn_moe
=
self
.
use_nn_moe
,
routed_scaling_factor
=
self
.
routed_scaling_factor
,
use_fused_gate
=
self
.
use_fused_gate
use_fused_gate
=
self
.
use_fused_gate
)
)
...
...
vllm/model_executor/models/qwen3.py
View file @
a053add9
...
@@ -38,11 +38,9 @@ from vllm.model_executor.layers.layernorm import RMSNorm
...
@@ -38,11 +38,9 @@ from vllm.model_executor.layers.layernorm import RMSNorm
from
vllm.model_executor.layers.linear
import
(
QKVParallelLinear
,
from
vllm.model_executor.layers.linear
import
(
QKVParallelLinear
,
RowParallelLinear
)
RowParallelLinear
)
from
vllm.model_executor.layers.logits_processor
import
LogitsProcessor
from
vllm.model_executor.layers.logits_processor
import
LogitsProcessor
from
vllm.model_executor.layers.pooler
import
Pooler
,
PoolingType
from
vllm.model_executor.layers.quantization
import
QuantizationConfig
from
vllm.model_executor.layers.quantization
import
QuantizationConfig
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
vllm.model_executor.layers.rotary_embedding
import
get_rope
from
vllm.model_executor.layers.vocab_parallel_embedding
import
ParallelLMHead
from
vllm.model_executor.layers.vocab_parallel_embedding
import
ParallelLMHead
from
vllm.model_executor.pooling_metadata
import
PoolingMetadata
from
vllm.model_executor.sampling_metadata
import
SamplingMetadata
from
vllm.model_executor.sampling_metadata
import
SamplingMetadata
from
vllm.sequence
import
IntermediateTensors
,
PoolerOutput
from
vllm.sequence
import
IntermediateTensors
,
PoolerOutput
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment