Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
4c764007
"git@developer.sourcefind.cn:renzhc/diffusers_dcu.git" did not exist on "0b8e29289dd97805f778922c98a13cd0700d3ab3"
Unverified
Commit
4c764007
authored
Apr 21, 2025
by
Enrique Shockwave
Committed by
GitHub
Apr 20, 2025
Browse files
check marlin format before attempting conversion (#4675)
parent
9f3bd2ad
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
13 additions
and
7 deletions
+13
-7
python/sglang/srt/layers/quantization/gptq.py
python/sglang/srt/layers/quantization/gptq.py
+13
-7
No files found.
python/sglang/srt/layers/quantization/gptq.py
View file @
4c764007
...
@@ -37,6 +37,14 @@ except ImportError:
...
@@ -37,6 +37,14 @@ except ImportError:
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
def
check_marlin_format
(
hf_quant_cfg
:
Dict
[
str
,
Any
])
->
bool
:
# compat: gptqmodel and autogptq (eol) main use checkpoint_format: str
# compat: autogptq <=0.7.1 is_marlin_format: bool
return
hf_quant_cfg
.
get
(
"checkpoint_format"
)
==
"marlin"
or
hf_quant_cfg
.
get
(
"is_marlin_format"
,
False
)
class
GPTQConfig
(
QuantizationConfig
):
class
GPTQConfig
(
QuantizationConfig
):
"""Config class for GPTQ.
"""Config class for GPTQ.
...
@@ -262,13 +270,15 @@ class GPTQMarlinConfig(QuantizationConfig):
...
@@ -262,13 +270,15 @@ class GPTQMarlinConfig(QuantizationConfig):
@
classmethod
@
classmethod
def
override_quantization_method
(
cls
,
hf_quant_cfg
,
user_quant
)
->
Optional
[
str
]:
def
override_quantization_method
(
cls
,
hf_quant_cfg
,
user_quant
)
->
Optional
[
str
]:
is_marlin_format
=
check_marlin_format
(
hf_quant_cfg
)
can_convert
=
cls
.
is_gptq_marlin_compatible
(
hf_quant_cfg
)
can_convert
=
cls
.
is_gptq_marlin_compatible
(
hf_quant_cfg
)
is_valid_user_quant
=
(
is_valid_user_quant
=
(
user_quant
is
None
or
user_quant
==
"marlin"
or
user_quant
==
"gptq_marlin"
user_quant
is
None
or
user_quant
==
"marlin"
or
user_quant
==
"gptq_marlin"
)
)
if
can_convert
and
is_valid_user_quant
:
if
not
is_marlin_format
and
can_convert
and
is_valid_user_quant
:
msg
=
(
msg
=
(
"The model is convertible to {} during runtime."
"The model is convertible to {} during runtime."
" Using {} kernel."
.
format
(
cls
.
get_name
(),
cls
.
get_name
())
" Using {} kernel."
.
format
(
cls
.
get_name
(),
cls
.
get_name
())
...
@@ -276,7 +286,7 @@ class GPTQMarlinConfig(QuantizationConfig):
...
@@ -276,7 +286,7 @@ class GPTQMarlinConfig(QuantizationConfig):
logger
.
info
(
msg
)
logger
.
info
(
msg
)
return
cls
.
get_name
()
return
cls
.
get_name
()
if
can_convert
and
user_quant
==
"gptq"
:
if
not
is_marlin_format
and
can_convert
and
user_quant
==
"gptq"
:
logger
.
info
(
logger
.
info
(
"Detected that the model can run with gptq_marlin"
"Detected that the model can run with gptq_marlin"
", however you specified quantization=gptq explicitly,"
", however you specified quantization=gptq explicitly,"
...
@@ -401,11 +411,7 @@ class MarlinConfig(QuantizationConfig):
...
@@ -401,11 +411,7 @@ class MarlinConfig(QuantizationConfig):
@
classmethod
@
classmethod
def
override_quantization_method
(
cls
,
hf_quant_cfg
,
user_quant
)
->
Optional
[
str
]:
def
override_quantization_method
(
cls
,
hf_quant_cfg
,
user_quant
)
->
Optional
[
str
]:
# compat: autogptq >=0.8.0 use checkpoint_format: str
is_marlin_format
=
check_marlin_format
(
hf_quant_cfg
)
# compat: autogptq <=0.7.1 is_marlin_format: bool
is_marlin_format
=
hf_quant_cfg
.
get
(
"checkpoint_format"
)
==
"marlin"
or
hf_quant_cfg
.
get
(
"is_marlin_format"
,
False
)
is_valid_user_quant
=
(
is_valid_user_quant
=
(
user_quant
is
None
or
user_quant
==
"gptq"
or
user_quant
==
"marlin"
user_quant
is
None
or
user_quant
==
"gptq"
or
user_quant
==
"marlin"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment