Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
b167109b
Unverified
Commit
b167109b
authored
Mar 13, 2024
by
Bo-Wen Wang
Committed by
GitHub
Mar 12, 2024
Browse files
[Fix] Fix quantization="gptq" when using Marlin (#3319)
Co-authored-by:
Woosuk Kwon
<
woosuk.kwon@berkeley.edu
>
parent
602358f8
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
6 additions
and
1 deletion
+6
-1
vllm/config.py
vllm/config.py
+6
-1
No files found.
vllm/config.py
View file @
b167109b
...
@@ -168,13 +168,18 @@ class ModelConfig:
...
@@ -168,13 +168,18 @@ class ModelConfig:
# Parse quantization method from the HF model config, if available.
# Parse quantization method from the HF model config, if available.
hf_quant_config
=
getattr
(
self
.
hf_config
,
"quantization_config"
,
None
)
hf_quant_config
=
getattr
(
self
.
hf_config
,
"quantization_config"
,
None
)
if
hf_quant_config
is
not
None
:
if
hf_quant_config
is
not
None
:
hf_quant_method
=
str
(
hf_quant_config
[
"quant_method"
]).
lower
()
hf_quant_method
=
str
(
hf_quant_config
[
"quant_method"
]).
lower
()
# If the GPTQ model is serialized in marlin format, use marlin.
# If the GPTQ model is serialized in marlin format, use marlin.
if
(
hf_quant_method
==
"gptq"
if
(
hf_quant_method
==
"gptq"
and
"is_marlin_format"
in
hf_quant_config
and
"is_marlin_format"
in
hf_quant_config
and
hf_quant_config
[
"is_marlin_format"
]):
and
hf_quant_config
[
"is_marlin_format"
]):
logger
.
info
(
"The model is serialized in Marlin format. "
"Using Marlin kernel."
)
hf_quant_method
=
"marlin"
hf_quant_method
=
"marlin"
if
self
.
quantization
==
"gptq"
:
self
.
quantization
=
hf_quant_method
if
self
.
quantization
is
None
:
if
self
.
quantization
is
None
:
self
.
quantization
=
hf_quant_method
self
.
quantization
=
hf_quant_method
elif
self
.
quantization
!=
hf_quant_method
:
elif
self
.
quantization
!=
hf_quant_method
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment