Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
22a4e07b
"tests/vscode:/vscode.git/clone" did not exist on "300892fe16d2b098d5cd9aa3fd67869707972d13"
Commit
22a4e07b
authored
Sep 08, 2025
by
王敏
Browse files
Merge remote-tracking branch 'origin/v0.9.2-dev-ep_wm' into v0.9.2-dev-ep_wm
parents
fe70dcb2
9c86df96
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
79 additions
and
62 deletions
+79
-62
vllm/model_executor/layers/quantization/slimquant_w4a8.py
vllm/model_executor/layers/quantization/slimquant_w4a8.py
+79
-62
No files found.
vllm/model_executor/layers/quantization/slimquant_w4a8.py
View file @
22a4e07b
...
@@ -21,7 +21,10 @@ from vllm.utils import W8a8GetCacheJSON
...
@@ -21,7 +21,10 @@ from vllm.utils import W8a8GetCacheJSON
import
os
import
os
from
vllm
import
_custom_ops
as
ops
from
vllm
import
_custom_ops
as
ops
try
:
from
lmslim.layers.fused_moe.fuse_moe_w4a8
import
fused_experts_impl_w4a8_ep
except
Exception
:
print
(
"INFO: Please install lmslim if you want to infer the quantitative model of moe.
\n
"
)
W8A8_TRITONJSON
=
W8a8GetCacheJSON
()
W8A8_TRITONJSON
=
W8a8GetCacheJSON
()
def
baseline_scaled_mm
(
a
:
torch
.
Tensor
,
def
baseline_scaled_mm
(
a
:
torch
.
Tensor
,
...
@@ -328,7 +331,21 @@ class SlimQuantW4A8Int8MoEMethod:
...
@@ -328,7 +331,21 @@ class SlimQuantW4A8Int8MoEMethod:
layer
.
w2_weight_scale
.
data
,
requires_grad
=
False
layer
.
w2_weight_scale
.
data
,
requires_grad
=
False
)
)
def
apply
(
def
apply_ep
(
#dp+ep
self
,
layer
:
torch
.
nn
.
Module
,
hidden_states
:
torch
.
Tensor
,
tokens_per_expert
:
torch
.
Tensor
,
)
->
torch
.
Tensor
:
return
fused_experts_impl_w4a8_ep
(
hidden_states
,
layer
.
w13_weight
,
layer
.
w2_weight
,
layer
.
w13_weight_scale
,
layer
.
w2_weight_scale
,
tokens_per_expert
)
def
apply
(
# tp
self
,
self
,
layer
:
torch
.
nn
.
Module
,
layer
:
torch
.
nn
.
Module
,
x
:
torch
.
Tensor
,
x
:
torch
.
Tensor
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment