Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
f9795c8c
Commit
f9795c8c
authored
Jul 03, 2025
by
zhuwenwen
Browse files
增加dpsk awq mtp推理的支持
parent
058b32ae
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
4 additions
and
0 deletions
+4
-0
vllm/model_executor/models/deepseek_mtp.py
vllm/model_executor/models/deepseek_mtp.py
+4
-0
No files found.
vllm/model_executor/models/deepseek_mtp.py
View file @
f9795c8c
...
@@ -27,6 +27,7 @@ from .deepseek_v2 import (DeepseekV2DecoderLayer,
...
@@ -27,6 +27,7 @@ from .deepseek_v2 import (DeepseekV2DecoderLayer,
from
.interfaces
import
SupportsPP
from
.interfaces
import
SupportsPP
from
.utils
import
maybe_prefix
from
.utils
import
maybe_prefix
from
vllm
import
_custom_ops
as
ops
from
vllm
import
_custom_ops
as
ops
from
vllm.model_executor.layers.quantization.blockwise_int8
import
BlockInt8Config
class
SharedHead
(
nn
.
Module
):
class
SharedHead
(
nn
.
Module
):
...
@@ -164,6 +165,9 @@ class DeepSeekMTP(nn.Module, SupportsPP):
...
@@ -164,6 +165,9 @@ class DeepSeekMTP(nn.Module, SupportsPP):
self
.
quant_method
=
quant_config
.
get_name
()
self
.
quant_method
=
quant_config
.
get_name
()
os
.
environ
[
'LLAMA_NN'
]
=
'0'
os
.
environ
[
'LLAMA_NN'
]
=
'0'
os
.
environ
[
'LM_NN'
]
=
'0'
os
.
environ
[
'LM_NN'
]
=
'0'
# The AWQ layer of MTP uses BlockInt8W8A8.
if
self
.
quant_method
==
"moe_wna16"
:
vllm_config
.
quant_config
=
BlockInt8Config
(
is_checkpoint_int8_serialized
=
True
,
weight_block_size
=
[
128
,
128
])
self
.
model
=
DeepSeekMultiTokenPredictor
(
vllm_config
=
vllm_config
,
self
.
model
=
DeepSeekMultiTokenPredictor
(
vllm_config
=
vllm_config
,
prefix
=
maybe_prefix
(
prefix
=
maybe_prefix
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment