Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
4746aaea
Unverified
Commit
4746aaea
authored
Aug 21, 2025
by
Yineng Zhang
Committed by
GitHub
Aug 21, 2025
Browse files
fix: support fb fp8 (#9462)
parent
10d34f74
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
11 additions
and
7 deletions
+11
-7
python/sglang/srt/layers/quantization/__init__.py
python/sglang/srt/layers/quantization/__init__.py
+5
-5
python/sglang/srt/layers/quantization/fpgemm_fp8.py
python/sglang/srt/layers/quantization/fpgemm_fp8.py
+6
-2
No files found.
python/sglang/srt/layers/quantization/__init__.py
View file @
4746aaea
...
...
@@ -16,7 +16,6 @@ try:
)
from
vllm.model_executor.layers.quantization.deepspeedfp
import
DeepSpeedFPConfig
from
vllm.model_executor.layers.quantization.experts_int8
import
ExpertsInt8Config
from
vllm.model_executor.layers.quantization.fbgemm_fp8
import
FBGEMMFp8Config
from
vllm.model_executor.layers.quantization.gguf
import
GGUFConfig
from
vllm.model_executor.layers.quantization.gptq_marlin_24
import
(
GPTQMarlin24Config
,
...
...
@@ -37,9 +36,9 @@ except ImportError as e:
AQLMConfig
=
BitsAndBytesConfig
=
CompressedTensorsConfig
=
DeepSpeedFPConfig
=
(
ExpertsInt8Config
)
=
FBGEMMFp8Config
=
GGUFConfig
=
GPTQMarlin24Config
=
MarlinConfig
=
QQQConfig
=
(
Int8Tpu
Config
)
=
DummyConfig
)
=
GGUFConfig
=
GPTQMarlin24Config
=
MarlinConfig
=
QQQConfig
=
Int8TpuConfig
=
(
Dummy
Config
)
from
sglang.srt.layers.quantization.awq
import
AWQConfig
,
AWQMarlinConfig
...
...
@@ -49,6 +48,7 @@ from sglang.srt.layers.quantization.compressed_tensors.compressed_tensors import
CompressedTensorsConfig
,
)
from
sglang.srt.layers.quantization.fp8
import
Fp8Config
from
sglang.srt.layers.quantization.fpgemm_fp8
import
FBGEMMFp8Config
from
sglang.srt.layers.quantization.gptq
import
GPTQConfig
,
GPTQMarlinConfig
from
sglang.srt.layers.quantization.modelopt_quant
import
(
ModelOptFp4Config
,
...
...
@@ -85,6 +85,7 @@ BASE_QUANTIZATION_METHODS: Dict[str, Type[QuantizationConfig]] = {
"qoq"
:
QoQConfig
,
"w4afp8"
:
W4AFp8Config
,
"petit_nvfp4"
:
PetitNvFp4Config
,
"fbgemm_fp8"
:
FBGEMMFp8Config
,
}
...
...
@@ -109,7 +110,6 @@ VLLM_QUANTIZATION_METHODS = {
"aqlm"
:
AQLMConfig
,
"deepspeedfp"
:
DeepSpeedFPConfig
,
"tpu_int8"
:
Int8TpuConfig
,
"fbgemm_fp8"
:
FBGEMMFp8Config
,
"marlin"
:
MarlinConfig
,
"gguf"
:
GGUFConfig
,
"gptq_marlin_24"
:
GPTQMarlin24Config
,
...
...
python/sglang/srt/layers/quantization/fpgemm_fp8.py
View file @
4746aaea
...
...
@@ -8,7 +8,7 @@ import torch
from
torch.nn
import
Module
from
torch.nn.parameter
import
Parameter
from
sglang.srt.layers.linear
import
LinearBase
,
LinearMethodBase
from
sglang.srt.layers.linear
import
LinearBase
from
sglang.srt.layers.parameter
import
ChannelQuantScaleParameter
,
ModelWeightParameter
from
sglang.srt.layers.quantization.base_config
import
(
FusedMoEMethodBase
,
...
...
@@ -16,6 +16,7 @@ from sglang.srt.layers.quantization.base_config import (
QuantizationConfig
,
QuantizeMethodBase
,
)
from
sglang.srt.layers.quantization.fp8_kernel
import
is_fp8_fnuz
from
sglang.srt.layers.quantization.fp8_utils
import
(
apply_fp8_linear
,
can_auto_enable_marlin_fp8
,
...
...
@@ -28,7 +29,7 @@ from sglang.srt.layers.quantization.marlin_utils_fp8 import (
)
from
sglang.srt.layers.quantization.unquant
import
UnquantizedLinearMethod
from
sglang.srt.layers.quantization.utils
import
is_layer_skipped
,
replace_parameter
from
sglang.srt.utils
import
get_bool_env_var
,
is_cuda
,
is_fp8_fnuz
from
sglang.srt.utils
import
get_bool_env_var
,
is_cuda
_is_cuda
=
is_cuda
()
_is_fp8_fnuz
=
is_fp8_fnuz
()
...
...
@@ -88,6 +89,9 @@ class FBGEMMFp8Config(QuantizationConfig):
return
FBGEMMFp8LinearMethod
(
self
)
return
None
def
get_scaled_act_names
(
self
)
->
List
[
str
]:
return
[]
class
FBGEMMFp8LinearMethod
(
LinearMethodBase
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment