Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
656dcc1a
"docs/vscode:/vscode.git/clone" did not exist on "42180bd929639c137706be99c93626da93a36f2e"
Unverified
Commit
656dcc1a
authored
Jan 18, 2025
by
Ke Bao
Committed by
GitHub
Jan 18, 2025
Browse files
Remove fp8 monkey patch (#2960)
parent
8af7048d
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
0 additions
and
20 deletions
+0
-20
python/sglang/srt/layers/quantization/__init__.py
python/sglang/srt/layers/quantization/__init__.py
+0
-20
No files found.
python/sglang/srt/layers/quantization/__init__.py
View file @
656dcc1a
...
...
@@ -56,25 +56,6 @@ def get_quantization_config(quantization: str) -> Type[QuantizationConfig]:
return
QUANTIZATION_METHODS
[
quantization
]
def
fp8_get_quant_method
(
self
,
layer
,
prefix
):
"""Enhanced get_quant_method for FP8 config."""
from
vllm.model_executor.layers.quantization.utils.quant_utils
import
(
is_layer_skipped
,
)
from
sglang.srt.layers.linear
import
LinearBase
,
UnquantizedLinearMethod
from
sglang.srt.layers.moe.fused_moe_triton.layer
import
FusedMoE
from
sglang.srt.layers.quantization.fp8
import
Fp8LinearMethod
,
Fp8MoEMethod
if
isinstance
(
layer
,
LinearBase
):
if
is_layer_skipped
(
prefix
,
self
.
ignored_layers
):
return
UnquantizedLinearMethod
()
return
Fp8LinearMethod
(
self
)
elif
isinstance
(
layer
,
FusedMoE
):
return
Fp8MoEMethod
(
self
)
return
None
def
gptq_get_quant_method
(
self
,
layer
,
prefix
):
from
vllm.model_executor.layers.quantization.gptq_marlin
import
(
GPTQMarlinLinearMethod
,
...
...
@@ -126,7 +107,6 @@ def patch_vllm_linear_base_isinstance():
def
apply_monkey_patches
():
"""Apply all monkey patches in one place."""
setattr
(
Fp8Config
,
"get_quant_method"
,
fp8_get_quant_method
)
setattr
(
GPTQMarlinConfig
,
"get_quant_method"
,
gptq_get_quant_method
)
setattr
(
AWQMarlinConfig
,
"get_quant_method"
,
awq_get_quant_method
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment