Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
zhaoyu6
sglang
Commits
4746aaea
"CMakeLists.txt" did not exist on "ad08b8ce131bacd6f61dfcd49e5f1af3cac76ca7"
Unverified
Commit
4746aaea
authored
Aug 21, 2025
by
Yineng Zhang
Committed by
GitHub
Aug 21, 2025
Browse files
fix: support fb fp8 (#9462)
parent
10d34f74
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
11 additions
and
7 deletions
+11
-7
python/sglang/srt/layers/quantization/__init__.py
python/sglang/srt/layers/quantization/__init__.py
+5
-5
python/sglang/srt/layers/quantization/fpgemm_fp8.py
python/sglang/srt/layers/quantization/fpgemm_fp8.py
+6
-2
No files found.
python/sglang/srt/layers/quantization/__init__.py
View file @
4746aaea
...
...
@@ -16,7 +16,6 @@ try:
)
from
vllm.model_executor.layers.quantization.deepspeedfp
import
DeepSpeedFPConfig
from
vllm.model_executor.layers.quantization.experts_int8
import
ExpertsInt8Config
from
vllm.model_executor.layers.quantization.fbgemm_fp8
import
FBGEMMFp8Config
from
vllm.model_executor.layers.quantization.gguf
import
GGUFConfig
from
vllm.model_executor.layers.quantization.gptq_marlin_24
import
(
GPTQMarlin24Config
,
...
...
@@ -37,9 +36,9 @@ except ImportError as e:
AQLMConfig
=
BitsAndBytesConfig
=
CompressedTensorsConfig
=
DeepSpeedFPConfig
=
(
ExpertsInt8Config
)
=
FBGEMMFp8Config
=
GGUFConfig
=
GPTQMarlin24Config
=
MarlinConfig
=
QQQConfig
=
(
Int8Tpu
Config
)
=
DummyConfig
)
=
GGUFConfig
=
GPTQMarlin24Config
=
MarlinConfig
=
QQQConfig
=
Int8TpuConfig
=
(
Dummy
Config
)
from
sglang.srt.layers.quantization.awq
import
AWQConfig
,
AWQMarlinConfig
...
...
@@ -49,6 +48,7 @@ from sglang.srt.layers.quantization.compressed_tensors.compressed_tensors import
CompressedTensorsConfig
,
)
from
sglang.srt.layers.quantization.fp8
import
Fp8Config
from
sglang.srt.layers.quantization.fpgemm_fp8
import
FBGEMMFp8Config
from
sglang.srt.layers.quantization.gptq
import
GPTQConfig
,
GPTQMarlinConfig
from
sglang.srt.layers.quantization.modelopt_quant
import
(
ModelOptFp4Config
,
...
...
@@ -85,6 +85,7 @@ BASE_QUANTIZATION_METHODS: Dict[str, Type[QuantizationConfig]] = {
"qoq"
:
QoQConfig
,
"w4afp8"
:
W4AFp8Config
,
"petit_nvfp4"
:
PetitNvFp4Config
,
"fbgemm_fp8"
:
FBGEMMFp8Config
,
}
...
...
@@ -109,7 +110,6 @@ VLLM_QUANTIZATION_METHODS = {
"aqlm"
:
AQLMConfig
,
"deepspeedfp"
:
DeepSpeedFPConfig
,
"tpu_int8"
:
Int8TpuConfig
,
"fbgemm_fp8"
:
FBGEMMFp8Config
,
"marlin"
:
MarlinConfig
,
"gguf"
:
GGUFConfig
,
"gptq_marlin_24"
:
GPTQMarlin24Config
,
...
...
python/sglang/srt/layers/quantization/fpgemm_fp8.py
View file @
4746aaea
...
...
@@ -8,7 +8,7 @@ import torch
from
torch.nn
import
Module
from
torch.nn.parameter
import
Parameter
from
sglang.srt.layers.linear
import
LinearBase
,
LinearMethodBase
from
sglang.srt.layers.linear
import
LinearBase
from
sglang.srt.layers.parameter
import
ChannelQuantScaleParameter
,
ModelWeightParameter
from
sglang.srt.layers.quantization.base_config
import
(
FusedMoEMethodBase
,
...
...
@@ -16,6 +16,7 @@ from sglang.srt.layers.quantization.base_config import (
QuantizationConfig
,
QuantizeMethodBase
,
)
from
sglang.srt.layers.quantization.fp8_kernel
import
is_fp8_fnuz
from
sglang.srt.layers.quantization.fp8_utils
import
(
apply_fp8_linear
,
can_auto_enable_marlin_fp8
,
...
...
@@ -28,7 +29,7 @@ from sglang.srt.layers.quantization.marlin_utils_fp8 import (
)
from
sglang.srt.layers.quantization.unquant
import
UnquantizedLinearMethod
from
sglang.srt.layers.quantization.utils
import
is_layer_skipped
,
replace_parameter
from
sglang.srt.utils
import
get_bool_env_var
,
is_cuda
,
is_fp8_fnuz
from
sglang.srt.utils
import
get_bool_env_var
,
is_cuda
_is_cuda
=
is_cuda
()
_is_fp8_fnuz
=
is_fp8_fnuz
()
...
...
@@ -88,6 +89,9 @@ class FBGEMMFp8Config(QuantizationConfig):
return
FBGEMMFp8LinearMethod
(
self
)
return
None
def
get_scaled_act_names
(
self
)
->
List
[
str
]:
return
[]
class
FBGEMMFp8LinearMethod
(
LinearMethodBase
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment