Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
8cd246bd
Commit
8cd246bd
authored
Aug 15, 2024
by
zhuwenwen
Browse files
修改awq workspace 申请
parent
17928589
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
20 additions
and
11 deletions
+20
-11
vllm/_custom_ops.py
vllm/_custom_ops.py
+6
-0
vllm/model_executor/layers/quantization/awq.py
vllm/model_executor/layers/quantization/awq.py
+14
-11
No files found.
vllm/_custom_ops.py
View file @
8cd246bd
...
...
@@ -183,6 +183,12 @@ def advance_step(num_seqs: int, num_queries: int, block_size: int,
# quantization ops
# awq
def
GetAWQShareWorkspaceSize
()
->
int
:
return
quant_ops
.
GetAWQShareWorkspaceSize
()
def
GetAWQShareWorkspace
()
->
torch
.
Tensor
:
return
quant_ops
.
GetAWQShareWorkspace
()
def
awq_dequantize
(
qweight
:
torch
.
Tensor
,
scales
:
torch
.
Tensor
,
zeros
:
torch
.
Tensor
,
split_k_iters
:
int
,
thx
:
int
,
thy
:
int
)
->
torch
.
Tensor
:
...
...
vllm/model_executor/layers/quantization/awq.py
View file @
8cd246bd
...
...
@@ -20,8 +20,8 @@ class AWQShareWorkSpace:
return
cls
.
_instance
def
_initialize
(
self
):
self
.
awqworkshapcesize
=
2
<<
29
self
.
awqworkshapce
=
torch
.
zeros
(
self
.
awqworkshapcesize
//
2
+
1
,
dtype
=
torch
.
float16
).
cuda
()
self
.
awqworkshapcesize
=
ops
.
GetAWQShareWorkspaceSize
()
self
.
awqworkshapce
=
ops
.
GetAWQShareWorkspace
()
class
AWQConfig
(
QuantizationConfig
):
...
...
@@ -200,6 +200,7 @@ class AWQLinearMethod(LinearMethodBase):
else
:
padding_group
=
0
if
m
<
4096
:
out
=
ops
.
awq_gemm
(
reshaped_x
,
qweight
,
zeros_and_scales
,
...
...
@@ -210,15 +211,17 @@ class AWQLinearMethod(LinearMethodBase):
padding_group
,
self
.
awqsingleton
.
awqworkshapce
,
self
.
awqsingleton
.
awqworkshapcesize
)
else
:
#下面是采用rocblas的做法
#
deqweight=ops.dequant_w4_gemm_colmajor( #shape[n,k/8]--->[n,k]
#
qweight,
#
zeros_and_scales,
#
k,
#
n,
#
self.quant_config.group_size)
# outp
ut=F.linear(reshaped_x, deqweight)
deqweight
=
ops
.
dequant_w4_gemm_colmajor
(
#shape[n,k/8]--->[n,k]
qweight
,
zeros_and_scales
,
k
,
n
,
self
.
quant_config
.
group_size
)
o
ut
=
F
.
linear
(
reshaped_x
,
deqweight
[:,
0
:
k
]
)
if
bias
is
not
None
:
out
.
add_
(
bias
)
return
out
.
reshape
(
out_shape
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment