Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
f5a7f12c
Commit
f5a7f12c
authored
Jul 31, 2025
by
gaoqiong
Browse files
增加fused moe文件中w4a8的相关修改
parent
7e5fb6fe
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
4 additions
and
2 deletions
+4
-2
vllm/model_executor/layers/fused_moe/fused_moe.py
vllm/model_executor/layers/fused_moe/fused_moe.py
+4
-2
No files found.
vllm/model_executor/layers/fused_moe/fused_moe.py
View file @
f5a7f12c
...
@@ -1206,7 +1206,8 @@ def get_config_dtype_str(
...
@@ -1206,7 +1206,8 @@ def get_config_dtype_str(
use_int4_w4a16
:
Optional
[
bool
]
=
False
,
use_int4_w4a16
:
Optional
[
bool
]
=
False
,
use_int8_w8a16
:
Optional
[
bool
]
=
False
,
use_int8_w8a16
:
Optional
[
bool
]
=
False
,
use_fp8_w8a8
:
Optional
[
bool
]
=
False
,
use_fp8_w8a8
:
Optional
[
bool
]
=
False
,
use_int8_w8a8
:
Optional
[
bool
]
=
False
)
->
Optional
[
str
]:
use_int8_w8a8
:
Optional
[
bool
]
=
False
,
use_int4_w4a8
:
Optional
[
bool
]
=
False
)
->
Optional
[
str
]:
if
use_fp8_w8a8
:
if
use_fp8_w8a8
:
return
"fp8_w8a8"
return
"fp8_w8a8"
elif
use_int8_w8a8
:
elif
use_int8_w8a8
:
...
@@ -1215,7 +1216,7 @@ def get_config_dtype_str(
...
@@ -1215,7 +1216,7 @@ def get_config_dtype_str(
return
"int8_w8a16"
return
"int8_w8a16"
elif
use_int4_w4a16
:
elif
use_int4_w4a16
:
return
"int4_w4a16"
return
"int4_w4a16"
elif
use_int4_w4a
16
:
elif
use_int4_w4a
8
:
return
"int4_w4a8"
return
"int4_w4a8"
elif
dtype
==
torch
.
float
:
elif
dtype
==
torch
.
float
:
# avoiding cases where kernel fails when float32 MoE
# avoiding cases where kernel fails when float32 MoE
...
@@ -1961,6 +1962,7 @@ class TritonExperts(mk.FusedMoEPermuteExpertsUnpermute):
...
@@ -1961,6 +1962,7 @@ class TritonExperts(mk.FusedMoEPermuteExpertsUnpermute):
config_dtype
=
get_config_dtype_str
(
use_fp8_w8a8
=
self
.
use_fp8_w8a8
,
config_dtype
=
get_config_dtype_str
(
use_fp8_w8a8
=
self
.
use_fp8_w8a8
,
use_int8_w8a16
=
self
.
use_int8_w8a16
,
use_int8_w8a16
=
self
.
use_int8_w8a16
,
use_int4_w4a16
=
self
.
use_int4_w4a16
,
use_int4_w4a16
=
self
.
use_int4_w4a16
,
use_int4_w4a8
=
self
.
use_int4_w4a8
,
dtype
=
hidden_states
.
dtype
)
dtype
=
hidden_states
.
dtype
)
config
=
try_get_optimal_moe_config
(
config
=
try_get_optimal_moe_config
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment