Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
8376cc41
Commit
8376cc41
authored
Aug 15, 2024
by
gaoqiong
Browse files
修改awq workspace 申请
parent
3b2b3046
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
20 additions
and
16 deletions
+20
-16
vllm/_custom_ops.py
vllm/_custom_ops.py
+6
-0
vllm/model_executor/layers/quantization/awq.py
vllm/model_executor/layers/quantization/awq.py
+14
-16
No files found.
vllm/_custom_ops.py
View file @
8376cc41
...
...
@@ -143,6 +143,12 @@ def fused_add_rms_norm(input: torch.Tensor, residual: torch.Tensor,
# quantization ops
# awq
def
GetAWQShareWorkspaceSize
()
->
int
:
return
quant_ops
.
GetAWQShareWorkspaceSize
()
def
GetAWQShareWorkspace
()
->
torch
.
Tensor
:
return
quant_ops
.
GetAWQShareWorkspace
()
def
awq_dequantize
(
qweight
:
torch
.
Tensor
,
scales
:
torch
.
Tensor
,
zeros
:
torch
.
Tensor
,
split_k_iters
:
int
,
thx
:
int
,
thy
:
int
)
->
torch
.
Tensor
:
...
...
vllm/model_executor/layers/quantization/awq.py
View file @
8376cc41
...
...
@@ -20,12 +20,8 @@ class AWQShareWorkSpace:
return
cls
.
_instance
def
_initialize
(
self
):
self
.
awqworkshapcesize
=
2
<<
29
self
.
awqworkshapce
=
torch
.
zeros
(
self
.
awqworkshapcesize
//
2
+
1
,
dtype
=
torch
.
float16
).
cuda
()
#print("AWQShareWorkSpace _initialize\n")
#print("self.awqworkshapce.device:",self.awqworkshapce.device)
self
.
awqworkshapcesize
=
ops
.
GetAWQShareWorkspaceSize
()
self
.
awqworkshapce
=
ops
.
GetAWQShareWorkspace
()
class
AWQConfig
(
QuantizationConfig
):
"""Config class for AWQ.
...
...
@@ -202,6 +198,7 @@ class AWQLinearMethod(LinearMethodBase):
else
:
padding_group
=
0
if
m
<
4096
:
out
=
ops
.
awq_gemm
(
reshaped_x
,
qweight
,
zeros_and_scales
,
...
...
@@ -212,14 +209,15 @@ class AWQLinearMethod(LinearMethodBase):
padding_group
,
self
.
awqsingleton
.
awqworkshapce
,
self
.
awqsingleton
.
awqworkshapcesize
)
else
:
#下面是采用rocblas的做法
#
deqweight=ops.dequant_w4_gemm_colmajor( #shape[n,k/8]--->[n,k]
#
qweight,
#
zeros_and_scales,
#
k,
#
n,
#
self.quant_config.group_size)
#
output=F.linear(reshaped_x, deqweight)
deqweight
=
ops
.
dequant_w4_gemm_colmajor
(
#shape[n,k/8]--->[n,k]
qweight
,
zeros_and_scales
,
k
,
n
,
self
.
quant_config
.
group_size
)
output
=
F
.
linear
(
reshaped_x
,
deqweight
[:,
0
:
k
]
)
if
bias
is
not
None
:
out
.
add_
(
bias
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment