Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
0d4ff65d
Commit
0d4ff65d
authored
Aug 06, 2025
by
zhuwenwen
Browse files
update lmslim import
parent
3ae8665d
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
12 additions
and
6 deletions
+12
-6
vllm/model_executor/layers/fused_moe/fused_moe.py
vllm/model_executor/layers/fused_moe/fused_moe.py
+7
-4
vllm/model_executor/layers/fused_moe/utils.py
vllm/model_executor/layers/fused_moe/utils.py
+5
-2
No files found.
vllm/model_executor/layers/fused_moe/fused_moe.py
View file @
0d4ff65d
...
@@ -25,10 +25,13 @@ from vllm.model_executor.layers.fused_moe.deep_gemm_moe import (
...
@@ -25,10 +25,13 @@ from vllm.model_executor.layers.fused_moe.deep_gemm_moe import (
from
vllm.model_executor.layers.fused_moe.moe_align_block_size
import
(
from
vllm.model_executor.layers.fused_moe.moe_align_block_size
import
(
moe_align_block_size
)
moe_align_block_size
)
from
lmslim.layers.gemm.int8_utils
import
(
try
:
per_token_group_quant_int8
,
per_token_quant_int8
)
from
lmslim.layers.gemm.int8_utils
import
(
from
lmslim.layers.fused_moe.fuse_moe_int8
import
(
fused_experts_impl_int8
,
get_w8a8moe_json
)
per_token_group_quant_int8
,
per_token_quant_int8
)
from
lmslim.layers.fused_moe.fuse_moe_w4a8
import
fused_experts_impl_w4a8
from
lmslim.layers.fused_moe.fuse_moe_int8
import
(
fused_experts_impl_int8
,
get_w8a8moe_json
)
from
lmslim.layers.fused_moe.fuse_moe_w4a8
import
fused_experts_impl_w4a8
except
Exception
:
print
(
"INFO: Please install lmslim if you want to infer the quantitative model of moe.
\n
"
)
from
vllm.model_executor.layers.fused_moe.prepare_finalize
import
(
from
vllm.model_executor.layers.fused_moe.prepare_finalize
import
(
MoEPrepareAndFinalizeNoEP
)
MoEPrepareAndFinalizeNoEP
)
...
...
vllm/model_executor/layers/fused_moe/utils.py
View file @
0d4ff65d
...
@@ -8,8 +8,11 @@ import torch
...
@@ -8,8 +8,11 @@ import torch
from
vllm
import
_custom_ops
as
ops
from
vllm
import
_custom_ops
as
ops
from
vllm.model_executor.layers.quantization.utils.fp8_utils
import
(
from
vllm.model_executor.layers.quantization.utils.fp8_utils
import
(
per_token_group_quant_fp8
)
per_token_group_quant_fp8
)
from
lmslim.layers.gemm.int8_utils
import
(
try
:
per_token_group_quant_int8
,
per_token_quant_int8
)
from
lmslim.layers.gemm.int8_utils
import
(
per_token_group_quant_int8
,
per_token_quant_int8
)
except
Exception
:
print
(
"INFO: Please install lmslim if you want to use int utils.
\n
"
)
from
vllm.utils
import
cdiv
from
vllm.utils
import
cdiv
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment