Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
96802ca0
Commit
96802ca0
authored
Feb 26, 2025
by
zhuwenwen
Browse files
update deepseek-v2
parent
4d4c6fe3
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
8 additions
and
10 deletions
+8
-10
benchmarks/kernels/benchmark_moe_int4.py
benchmarks/kernels/benchmark_moe_int4.py
+5
-5
vllm/model_executor/models/deepseek_v2.py
vllm/model_executor/models/deepseek_v2.py
+3
-5
No files found.
benchmarks/kernels/benchmark_moe_int4.py
View file @
96802ca0
...
...
@@ -161,7 +161,7 @@ def benchmark_config(
nn_moe
=
False
block_shape
=
[
0
,
group_size
]
input_gating
=
torch
.
randn
(
num_tokens
,
num_experts
,
dtype
=
torch
.
float32
)
print
(
f
"shape:
{
x
.
shape
[
0
]
}
| config:
{
config
}
"
)
def
prepare
(
i
:
int
):
input_gating
.
copy_
(
gating_output
[
i
])
...
...
@@ -187,6 +187,7 @@ def benchmark_config(
a2_scale
=
a2_scale
,
use_nn_moe
=
nn_moe
,
block_shape
=
block_shape
,
moe_ep_size
=
1
,
)
# JIT compilation & warmup
...
...
@@ -221,8 +222,7 @@ def benchmark_config(
end_event
.
record
()
end_event
.
synchronize
()
latencies
.
append
(
start_event
.
elapsed_time
(
end_event
))
avg
=
sum
(
latencies
)
/
(
num_iters
*
10
)
*
1000
# us
print
(
f
"avg:
{
avg
}
"
)
avg
=
sum
(
latencies
)
/
(
num_iters
)
*
1000
# us
# graph.reset()
return
avg
...
...
@@ -694,7 +694,7 @@ if __name__ == "__main__":
parser
=
FlexibleArgumentParser
()
parser
.
add_argument
(
"--model"
,
type
=
str
,
default
=
"
/home/yang/llm-models/vllm-awq-models/DeepSeek-R1-AWQ/
"
)
default
=
""
)
parser
.
add_argument
(
"--tp-size"
,
"-tp"
,
"--tensor-parallel-size"
,
...
...
@@ -711,4 +711,4 @@ if __name__ == "__main__":
parser
.
add_argument
(
"--trust-remote-code"
,
action
=
"store_true"
,
default
=
True
)
args
=
parser
.
parse_args
()
main
(
args
)
main
(
args
)
\ No newline at end of file
vllm/model_executor/models/deepseek_v2.py
View file @
96802ca0
...
...
@@ -666,8 +666,6 @@ class DeepseekV2ForCausalLM(nn.Module, SupportsPP):
def
__init__
(
self
,
*
,
vllm_config
:
VllmConfig
,
prefix
:
str
=
""
):
super
().
__init__
()
# 暂时awq不支持cutlass
envs
.
VLLM_USE_TRITON_AWQ
=
True
config
=
vllm_config
.
model_config
.
hf_config
quant_config
=
vllm_config
.
quant_config
...
...
@@ -875,13 +873,13 @@ class DeepseekV2ForCausalLM(nn.Module, SupportsPP):
weight
.
data
.
copy_
(
_weight
)
weight
.
data
=
weight
.
data
.
reshape
(
ori_shape
[
1
],
-
1
)
# 暂时不支持TN
if
self
.
quant_method
==
"awq"
and
not
envs
.
VLLM_USE_TRITON_AWQ
:
if
self
.
config
.
quantization_config
[
"
quant_method
"
]
==
"awq"
and
not
envs
.
VLLM_USE_TRITON_AWQ
:
lay_key_words
=
[
"self_attn.q_a_proj.qweight"
,
"self_attn.q_b_proj.qweight"
,
"self_attn.kv_a_proj_with_mqa.qweight"
,
"self_attn.kv_b_proj.qweight"
,
"self_attn.kv_a_proj_with_mqa.qweight"
,
"self_attn.o_proj.qweight"
,
"mlp.gate_up_proj.qweight"
,
"mlp.down_proj.qweight"
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment