Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
b5fb4ef5
Unverified
Commit
b5fb4ef5
authored
Jan 08, 2025
by
Ke Bao
Committed by
GitHub
Jan 08, 2025
Browse files
Update modelopt config and fix running issue (#2792)
parent
2e6346fc
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
2 additions
and
1 deletion
+2
-1
python/sglang/srt/layers/quantization/__init__.py
python/sglang/srt/layers/quantization/__init__.py
+1
-1
python/sglang/srt/layers/quantization/modelopt_quant.py
python/sglang/srt/layers/quantization/modelopt_quant.py
+1
-0
No files found.
python/sglang/srt/layers/quantization/__init__.py
View file @
b5fb4ef5
...
@@ -17,12 +17,12 @@ from vllm.model_executor.layers.quantization.gptq import GPTQConfig
...
@@ -17,12 +17,12 @@ from vllm.model_executor.layers.quantization.gptq import GPTQConfig
from
vllm.model_executor.layers.quantization.gptq_marlin
import
GPTQMarlinConfig
from
vllm.model_executor.layers.quantization.gptq_marlin
import
GPTQMarlinConfig
from
vllm.model_executor.layers.quantization.gptq_marlin_24
import
GPTQMarlin24Config
from
vllm.model_executor.layers.quantization.gptq_marlin_24
import
GPTQMarlin24Config
from
vllm.model_executor.layers.quantization.marlin
import
MarlinConfig
from
vllm.model_executor.layers.quantization.marlin
import
MarlinConfig
from
vllm.model_executor.layers.quantization.modelopt
import
ModelOptFp8Config
from
vllm.model_executor.layers.quantization.qqq
import
QQQConfig
from
vllm.model_executor.layers.quantization.qqq
import
QQQConfig
from
vllm.model_executor.layers.quantization.tpu_int8
import
Int8TpuConfig
from
vllm.model_executor.layers.quantization.tpu_int8
import
Int8TpuConfig
from
sglang.srt.layers.quantization.base_config
import
QuantizationConfig
from
sglang.srt.layers.quantization.base_config
import
QuantizationConfig
from
sglang.srt.layers.quantization.fp8
import
Fp8Config
from
sglang.srt.layers.quantization.fp8
import
Fp8Config
from
sglang.srt.layers.quantization.modelopt_quant
import
ModelOptFp8Config
QUANTIZATION_METHODS
:
Dict
[
str
,
Type
[
QuantizationConfig
]]
=
{
QUANTIZATION_METHODS
:
Dict
[
str
,
Type
[
QuantizationConfig
]]
=
{
"aqlm"
:
AQLMConfig
,
"aqlm"
:
AQLMConfig
,
...
...
python/sglang/srt/layers/modelopt_quant.py
→
python/sglang/srt/layers/
quantization/
modelopt_quant.py
View file @
b5fb4ef5
...
@@ -142,6 +142,7 @@ class ModelOptFp8LinearMethod(LinearMethodBase):
...
@@ -142,6 +142,7 @@ class ModelOptFp8LinearMethod(LinearMethodBase):
data
=
torch
.
full
(
data
=
torch
.
full
(
(
len
(
output_partition_sizes
),),
(
len
(
output_partition_sizes
),),
torch
.
finfo
(
torch
.
float32
).
min
,
torch
.
finfo
(
torch
.
float32
).
min
,
dtype
=
torch
.
float32
,
),
),
weight_loader
=
weight_loader
,
weight_loader
=
weight_loader
,
),
),
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment