Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
AutoAWQ
Commits
75710806
"vscode:/vscode.git/clone" did not exist on "dc1b6822e77f34e8e33fcde7dbb4f80974047342"
Commit
75710806
authored
Sep 08, 2023
by
Casper Hansen
Browse files
Fix MLP module
parent
e197a733
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
25 additions
and
13 deletions
+25
-13
awq/modules/fused/mlp.py
awq/modules/fused/mlp.py
+25
-13
No files found.
awq/modules/fused/mlp.py
View file @
75710806
...
...
@@ -21,7 +21,13 @@ class QuantMPTMLP(nn.Module):
def
forward
(
self
,
x
:
torch
.
Tensor
):
x
=
x
.
reshape
(
-
1
,
x
.
shape
[
-
1
])
x
=
awq_inference_engine
.
gemm_forward_cuda
(
x
,
self
.
up_proj_qweight
,
self
.
up_proj_scales
,
self
.
up_proj_qzeros
,
8
)
x
=
awq_inference_engine
.
gemv_forward_cuda
(
x
,
self
.
up_proj_qweight
,
self
.
up_proj_scales
,
self
.
up_proj_qzeros
,
self
.
down_proj
.
group_size
)
return
self
.
down_proj
(
self
.
act
(
x
))
...
...
@@ -48,18 +54,24 @@ class QuantLlamaMLP(nn.Module):
self
.
down_proj
=
down_proj
def
forward
(
self
,
x
):
return
self
.
down_proj
(
self
.
our_llama_mlp
(
x
))
def
our_llama_mlp
(
self
,
x
):
out_shape
=
x
.
shape
[:
-
1
]
+
(
self
.
intermediate_size
,
)
out_shape
=
x
.
shape
[:
-
1
]
+
(
self
.
intermediate_size
,)
x
=
x
.
reshape
(
-
1
,
x
.
shape
[
-
1
])
gate_output
=
awq_inference_engine
.
gemm_forward_cuda
(
x
,
self
.
gate_proj_qweight
,
self
.
gate_proj_scales
,
self
.
gate_proj_qzeros
,
8
gate_output
=
awq_inference_engine
.
gemv_forward_cuda
(
x
,
self
.
gate_proj_qweight
,
self
.
gate_proj_scales
,
self
.
gate_proj_qzeros
,
self
.
down_proj
.
group_size
,
)
gate_output
=
F
.
silu
(
gate_output
)
up_output
=
awq_inference_engine
.
gemm_forward_cuda
(
x
,
self
.
up_proj_qweight
,
self
.
up_proj_scales
,
self
.
up_proj_qzeros
,
8
up_output
=
awq_inference_engine
.
gemv_forward_cuda
(
x
,
self
.
up_proj_qweight
,
self
.
up_proj_scales
,
self
.
up_proj_qzeros
,
self
.
down_proj
.
group_size
,
)
c
=
gate_output
*
up_output
c
=
c
.
reshape
(
out_shape
)
return
c
x
=
F
.
silu
(
gate_output
)
*
up_output
x
=
x
.
reshape
(
out_shape
)
x
=
self
.
down_proj
(
x
)
return
x
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment