Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
4ae218c1
Unverified
Commit
4ae218c1
authored
Apr 06, 2026
by
Wentao Ye
Committed by
GitHub
Apr 06, 2026
Browse files
[Refactor] Remove unused dead code (#38842)
Signed-off-by:
yewentao256
<
zhyanwentao@126.com
>
parent
f40d9879
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
0 additions
and
74 deletions
+0
-74
vllm/model_executor/models/mlp_speculator.py
vllm/model_executor/models/mlp_speculator.py
+0
-53
vllm/v1/attention/ops/flashmla.py
vllm/v1/attention/ops/flashmla.py
+0
-13
vllm/v1/executor/ray_distributed_executor.py
vllm/v1/executor/ray_distributed_executor.py
+0
-8
No files found.
vllm/model_executor/models/mlp_speculator.py
View file @
4ae218c1
...
...
@@ -17,8 +17,6 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from
.utils
import
maybe_prefix
SQRT2
=
2
**
0.5
class
MLPSpeculatorLayerNorm
(
nn
.
Module
):
"""
...
...
@@ -171,57 +169,6 @@ class MLPSpeculator(nn.Module):
config
.
vocab_size
,
config
.
vocab_size
,
1.0
)
# NOTE(woosuk): This method is commented out because it is old code
# using V0. We should either port it to V1 or remove it.
# def generate_proposals(
# self,
# input_ids: torch.Tensor,
# previous_hidden_states: torch.Tensor,
# num_predict_tokens: int,
# sampling_metadata: SamplingMetadata,
# ) -> list[SamplerOutput]:
# if num_predict_tokens > self.max_speculative_tokens:
# raise ValueError(f"Max speculative tokens for model is "
# f"{self.max_speculative_tokens}, but "
# f"{num_predict_tokens} were requested")
# # b x 1 x d
# previous_hidden_states = previous_hidden_states.unsqueeze(1)
# if self.scale_input:
# previous_hidden_states = self.ln0(previous_hidden_states) / SQRT2
# # b x 1
# last_tokens = input_ids.unsqueeze(1)
# next_tokens = []
# for head_index in range(num_predict_tokens):
# # Project and predict
# z = self.emb[head_index](last_tokens) # b k d
# states = self.proj[head_index](previous_hidden_states)
# # Weighted add of state_weight*state and emb_weight*z
# # Let subsequent LN take care of denominator
# # state_weight is close to 1, so shouldn't be any precision issues
# states.add_(z, alpha=self.emb_weight / self.state_weight)
# states = self.activation(self.ln[head_index](states)) # b k d
# previous_hidden_states = states
# # TODO: not yet supporting top_k_tokens_per_head
# states = states.flatten(0, 1)
# logits = self.logits_processor(self.head[head_index], states,
# sampling_metadata)
# output = self.sampler(logits, sampling_metadata)
# last_tokens = output.sampled_token_ids
# next_tokens.append(output)
# return next_tokens
def
load_weights
(
self
,
weights
:
Iterable
[
tuple
[
str
,
torch
.
Tensor
]])
->
set
[
str
]:
params_dict
=
dict
(
self
.
named_parameters
())
loaded_params
:
set
[
str
]
=
set
()
...
...
vllm/v1/attention/ops/flashmla.py
View file @
4ae218c1
...
...
@@ -151,16 +151,3 @@ def flash_mla_with_kvcache_fp8(
descale_k
,
)
return
out
,
softmax_lse
#
# TODO: Add fake functions
#
# @register_fake("_flashmla_C::get_mla_metadata")
# def _get_mla_metadata_fake(....) -> Tuple[torch.Tensor, torch.Tensor]:
# return ....
#
# @register_fake("_flashmla_C::fwd_kvcache_mla")
# def _fwd_kvcache_mla_fake(....) -> Tuple[torch.Tensor, torch.Tensor]:
# return ....
#
vllm/v1/executor/ray_distributed_executor.py
deleted
100644 → 0
View file @
f40d9879
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from
vllm.v1.executor.ray_executor
import
(
RayDistributedExecutor
as
_RayDistributedExecutor
,
)
# For backwards compatibility.
RayDistributedExecutor
=
_RayDistributedExecutor
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment