Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
311f7438
Unverified
Commit
311f7438
authored
Aug 07, 2024
by
Lucas Wilkinson
Committed by
GitHub
Aug 07, 2024
Browse files
[Bugfix] Fix gptq failure on T4s (#7264)
parent
469b3bc5
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
14 additions
and
15 deletions
+14
-15
vllm/model_executor/layers/quantization/awq_marlin.py
vllm/model_executor/layers/quantization/awq_marlin.py
+1
-2
vllm/model_executor/layers/quantization/gptq_marlin.py
vllm/model_executor/layers/quantization/gptq_marlin.py
+1
-2
vllm/model_executor/layers/quantization/utils/marlin_utils.py
.../model_executor/layers/quantization/utils/marlin_utils.py
+12
-11
No files found.
vllm/model_executor/layers/quantization/awq_marlin.py
View file @
311f7438
...
...
@@ -126,8 +126,7 @@ class AWQMarlinConfig(QuantizationConfig):
return
check_marlin_supported
(
quant_type
=
cls
.
TYPE_MAP
[
num_bits
],
group_size
=
group_size
,
has_zp
=
has_zp
,
min_capability
=
cls
.
get_min_capability
())
has_zp
=
has_zp
)
class
AWQMarlinLinearMethod
(
LinearMethodBase
):
...
...
vllm/model_executor/layers/quantization/gptq_marlin.py
View file @
311f7438
...
...
@@ -136,8 +136,7 @@ class GPTQMarlinConfig(QuantizationConfig):
return
False
return
check_marlin_supported
(
quant_type
=
cls
.
TYPE_MAP
[(
num_bits
,
sym
)],
group_size
=
group_size
,
min_capability
=
cls
.
get_min_capability
())
group_size
=
group_size
)
class
GPTQMarlinLinearMethod
(
LinearMethodBase
):
...
...
vllm/model_executor/layers/quantization/utils/marlin_utils.py
View file @
311f7438
...
...
@@ -26,12 +26,13 @@ USE_FP32_REDUCE_DEFAULT = True
# without runtime zero-point. We support common cases, i.e. AWQ and GPTQ.
# TODO: we may want to move this into the C++ so its closer to the actual impl
def
query_marlin_supported_quant_types
(
has_zp
:
bool
,
min_capability
:
Optional
[
int
]
=
None
):
if
min_capability
is
None
:
device_capability
:
Optional
[
int
]
=
None
):
if
device_capability
is
None
:
major
,
minor
=
current_platform
.
get_device_capability
()
min
_capability
=
major
*
10
+
minor
device
_capability
=
major
*
10
+
minor
if
min
_capability
<
80
:
if
device
_capability
<
80
:
return
[]
if
has_zp
:
...
...
@@ -48,20 +49,20 @@ def _check_marlin_supported(
quant_type
:
ScalarType
,
group_size
:
Optional
[
int
],
has_zp
:
bool
,
min
_capability
:
Optional
[
int
]
=
None
)
->
Tuple
[
bool
,
Optional
[
str
]]:
device
_capability
:
Optional
[
int
]
=
None
)
->
Tuple
[
bool
,
Optional
[
str
]]:
if
min
_capability
is
None
:
if
device
_capability
is
None
:
major
,
minor
=
current_platform
.
get_device_capability
()
min
_capability
=
major
*
10
+
minor
device
_capability
=
major
*
10
+
minor
supported_types
=
query_marlin_supported_quant_types
(
has_zp
,
min
_capability
)
has_zp
,
device
_capability
)
if
quant_type
not
in
supported_types
:
return
(
False
,
f
"Marlin does not support weight_bits =
{
quant_type
}
. "
f
"Only types =
{
supported_types
}
"
f
"are supported (for group_size =
{
group_size
}
, "
f
"
min
_capability =
{
min
_capability
}
, zp =
{
has_zp
}
)."
)
f
"
device
_capability =
{
device
_capability
}
, zp =
{
has_zp
}
)."
)
if
(
group_size
is
None
or
group_size
not
in
MARLIN_SUPPORTED_GROUP_SIZES
):
return
(
False
,
f
"Marlin does not support group_size =
{
group_size
}
. "
f
"Only group_sizes =
{
MARLIN_SUPPORTED_GROUP_SIZES
}
"
...
...
@@ -73,9 +74,9 @@ def _check_marlin_supported(
def
check_marlin_supported
(
quant_type
:
ScalarType
,
group_size
:
int
,
has_zp
:
bool
=
False
,
min
_capability
:
Optional
[
int
]
=
None
)
->
bool
:
device
_capability
:
Optional
[
int
]
=
None
)
->
bool
:
cond
,
_
=
_check_marlin_supported
(
quant_type
,
group_size
,
has_zp
,
min
_capability
)
device
_capability
)
return
cond
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment