Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
text-generation-inference
Commits
9a59ebce
Commit
9a59ebce
authored
Jun 03, 2024
by
Nicolas Patry
Browse files
Hotfix GPTQ.
parent
9add5d0a
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
13 additions
and
1 deletion
+13
-1
server/text_generation_server/layers/linear.py
server/text_generation_server/layers/linear.py
+2
-0
server/text_generation_server/utils/weights.py
server/text_generation_server/utils/weights.py
+11
-1
No files found.
server/text_generation_server/layers/linear.py
View file @
9a59ebce
...
...
@@ -196,6 +196,8 @@ def get_linear(weight, bias, quantize):
weight
.
groupsize
,
)
elif
quantize
==
"awq"
:
from
text_generation_server.layers.gptq
import
GPTQWeight
if
not
isinstance
(
weight
,
GPTQWeight
):
raise
NotImplementedError
(
f
"The passed weight is not `awq` compatible, loader needs to be updated."
...
...
server/text_generation_server/utils/weights.py
View file @
9a59ebce
...
...
@@ -154,6 +154,8 @@ class Weights:
already alternating Q,K,V within the main tensor
"""
if
quantize
in
[
"gptq"
,
"awq"
]:
from
text_generation_server.layers.gptq
import
GPTQWeight
try
:
qweight
=
self
.
_get_qweight
(
f
"
{
prefix
}
.qweight"
)
except
RuntimeError
:
...
...
@@ -331,6 +333,8 @@ class Weights:
def
get_multi_weights_row
(
self
,
prefix
:
str
,
quantize
:
str
):
if
quantize
==
"exl2"
:
from
text_generation_server.layers.exl2
import
Exl2Weight
try
:
q_weight
=
self
.
get_tensor
(
f
"
{
prefix
}
.q_weight"
)
except
RuntimeError
:
...
...
@@ -390,7 +394,11 @@ class Weights:
# it would require to reorder input activations that are split unto several GPUs
use_exllama
=
False
from
text_generation_server.layers.gptq
import
HAS_EXLLAMA
,
CAN_EXLLAMA
from
text_generation_server.layers.gptq
import
(
HAS_EXLLAMA
,
CAN_EXLLAMA
,
GPTQWeight
,
)
if
use_exllama
:
if
not
HAS_EXLLAMA
:
...
...
@@ -442,6 +450,8 @@ class Weights:
use_exllama
=
use_exllama
,
)
elif
quantize
==
"awq"
:
from
text_generation_server.layers.gptq
import
GPTQWeight
bits
,
groupsize
,
_
,
_
=
self
.
_get_gptq_params
()
try
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment