Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
e445bd91
Commit
e445bd91
authored
Dec 11, 2025
by
wujl5
Committed by
zhuwenwen
Dec 11, 2025
Browse files
fix: 细化量化模型开启融合场景
parent
c0e0e7cd
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
5 additions
and
14 deletions
+5
-14
vllm/model_executor/layers/linear.py
vllm/model_executor/layers/linear.py
+2
-12
vllm/model_executor/model_loader/utils.py
vllm/model_executor/model_loader/utils.py
+3
-2
No files found.
vllm/model_executor/layers/linear.py
View file @
e445bd91
...
...
@@ -33,18 +33,8 @@ from vllm.platforms import current_platform
import
os
from
vllm.model_executor.utils
import
gemm_bank_conf
if
envs
.
USE_FUSED_RMS_QUANT
:
try
:
from
lmslim.quantize.quant_ops
import
lm_faster_rmsquant
except
Exception
as
e
:
print
(
f
"Error: Import fused rmsquant error:
{
e
}
"
)
if
envs
.
USE_FUSED_SILU_MUL_QUANT
:
try
:
# from lightop import fuse_silu_mul_quant
from
lmslim.quantize.quant_ops
import
lm_fuse_silu_mul_quant
except
Exception
as
e
:
print
(
f
"Error: Import fused silu_mul_qunat error:
{
e
}
"
)
from
lmslim.quantize.quant_ops
import
lm_faster_rmsquant
from
lmslim.quantize.quant_ops
import
lm_fuse_silu_mul_quant
logger
=
init_logger
(
__name__
)
...
...
vllm/model_executor/model_loader/utils.py
View file @
e445bd91
...
...
@@ -261,7 +261,7 @@ def get_model_architecture(
# os.environ['VLLM_USE_LIGHTOP_RMS_ROPE_CONCAT'] = '1'
if
not
envs
.
is_set
(
"VLLM_SCHED_ENABLE_MINIMAL_INJECTION"
):
os
.
environ
[
'VLLM_SCHED_ENABLE_MINIMAL_INJECTION'
]
=
'1'
if
model_config
.
quantization
i
s
not
None
:
if
model_config
.
quantization
i
n
{
"slimquant_w4a8"
,
"slimquant_w4a8_marlin"
,
"slimquant_compressed_tensors_marlin"
,
"compressed-tensors"
}
:
if
not
envs
.
is_set
(
"USE_FUSED_RMS_QUANT"
):
os
.
environ
[
'USE_FUSED_RMS_QUANT'
]
=
'1'
if
not
envs
.
is_set
(
"USE_FUSED_SILU_MUL_QUANT"
):
...
...
@@ -306,7 +306,8 @@ def get_model_architecture(
# os.environ['VLLM_USE_LIGHTOP_RMS_ROPE_CONCAT'] = '1'
if
not
envs
.
is_set
(
"VLLM_SCHED_ENABLE_MINIMAL_INJECTION"
):
os
.
environ
[
'VLLM_SCHED_ENABLE_MINIMAL_INJECTION'
]
=
'1'
if
model_config
.
quantization
is
not
None
:
if
model_config
.
quantization
in
{
"slimquant_w4a8"
,
"slimquant_w4a8_marlin"
,
"slimquant_compressed_tensors_marlin"
,
"compressed-tensors"
}:
if
not
envs
.
is_set
(
"USE_FUSED_RMS_QUANT"
):
os
.
environ
[
'USE_FUSED_RMS_QUANT'
]
=
'1'
if
not
envs
.
is_set
(
"USE_FUSED_SILU_MUL_QUANT"
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment