Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
6395b73e
Commit
6395b73e
authored
Mar 17, 2026
by
liuchy5
Browse files
feat:接入VLLM_USE_FUSED_FILL_RMS_CAT优化
parent
af0e6d8f
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
19 additions
and
1 deletion
+19
-1
vllm/model_executor/models/deepseek_mtp.py
vllm/model_executor/models/deepseek_mtp.py
+19
-1
No files found.
vllm/model_executor/models/deepseek_mtp.py
View file @
6395b73e
...
...
@@ -41,7 +41,7 @@ from .interfaces import SupportsPP
from
vllm
import
_custom_ops
as
ops
from
vllm.model_executor.layers.quantization.blockwise_int8
import
BlockInt8Config
import
vllm.envs
as
envs
from
vllm.utils
import
direct_register_custom_op
logger
=
init_logger
(
__name__
)
...
...
@@ -102,6 +102,24 @@ class DeepSeekMultiTokenPredictorLayer(nn.Module):
config
=
self
.
config
,
topk_indices_buffer
=
topk_indices_buffer
,
)
def
fuse_fill_rms_x2_concat
(
hidden_states_fuse
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
inputs_embeds
:
torch
.
Tensor
,
previous_hidden_states
:
torch
.
Tensor
,
weight_inputs_embeds
:
torch
.
Tensor
,
weight_previous_hidden_states
:
torch
.
Tensor
,
epsilon
:
float
)
->
None
:
from
lightop
import
fuse_fill_rms_x2_concat
fuse_fill_rms_x2_concat
(
hidden_states_fuse
,
positions
,
inputs_embeds
,
previous_hidden_states
,
weight_inputs_embeds
,
weight_previous_hidden_states
,
epsilon
)
def
fuse_fill_rms_x2_concat_fake
(
hidden_states_fuse
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
inputs_embeds
:
torch
.
Tensor
,
previous_hidden_states
:
torch
.
Tensor
,
weight_inputs_embeds
:
torch
.
Tensor
,
weight_previous_hidden_states
:
torch
.
Tensor
,
epsilon
:
float
)
->
None
:
pass
direct_register_custom_op
(
op_name
=
"fuse_fill_rms_x2_concat"
,
op_func
=
fuse_fill_rms_x2_concat
,
mutates_args
=
[
"hidden_states_fuse"
,
"inputs_embeds"
],
fake_impl
=
fuse_fill_rms_x2_concat_fake
,
)
def
forward
(
self
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment