Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
656dcc1a
Unverified
Commit
656dcc1a
authored
Jan 18, 2025
by
Ke Bao
Committed by
GitHub
Jan 18, 2025
Browse files
Remove fp8 monkey patch (#2960)
parent
8af7048d
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
0 additions
and
20 deletions
+0
-20
python/sglang/srt/layers/quantization/__init__.py
python/sglang/srt/layers/quantization/__init__.py
+0
-20
No files found.
python/sglang/srt/layers/quantization/__init__.py
View file @
656dcc1a
...
...
@@ -56,25 +56,6 @@ def get_quantization_config(quantization: str) -> Type[QuantizationConfig]:
return
QUANTIZATION_METHODS
[
quantization
]
def
fp8_get_quant_method
(
self
,
layer
,
prefix
):
"""Enhanced get_quant_method for FP8 config."""
from
vllm.model_executor.layers.quantization.utils.quant_utils
import
(
is_layer_skipped
,
)
from
sglang.srt.layers.linear
import
LinearBase
,
UnquantizedLinearMethod
from
sglang.srt.layers.moe.fused_moe_triton.layer
import
FusedMoE
from
sglang.srt.layers.quantization.fp8
import
Fp8LinearMethod
,
Fp8MoEMethod
if
isinstance
(
layer
,
LinearBase
):
if
is_layer_skipped
(
prefix
,
self
.
ignored_layers
):
return
UnquantizedLinearMethod
()
return
Fp8LinearMethod
(
self
)
elif
isinstance
(
layer
,
FusedMoE
):
return
Fp8MoEMethod
(
self
)
return
None
def
gptq_get_quant_method
(
self
,
layer
,
prefix
):
from
vllm.model_executor.layers.quantization.gptq_marlin
import
(
GPTQMarlinLinearMethod
,
...
...
@@ -126,7 +107,6 @@ def patch_vllm_linear_base_isinstance():
def
apply_monkey_patches
():
"""Apply all monkey patches in one place."""
setattr
(
Fp8Config
,
"get_quant_method"
,
fp8_get_quant_method
)
setattr
(
GPTQMarlinConfig
,
"get_quant_method"
,
gptq_get_quant_method
)
setattr
(
AWQMarlinConfig
,
"get_quant_method"
,
awq_get_quant_method
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment