Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
AutoAWQ
Commits
e80663bb
Commit
e80663bb
authored
Sep 11, 2023
by
Casper Hansen
Browse files
Initialize with device
parent
ac3e86df
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
7 additions
and
6 deletions
+7
-6
awq/models/mpt.py
awq/models/mpt.py
+2
-1
awq/modules/fused/block.py
awq/modules/fused/block.py
+5
-5
No files found.
awq/models/mpt.py
View file @
e80663bb
...
@@ -85,7 +85,8 @@ class MptFuser:
...
@@ -85,7 +85,8 @@ class MptFuser:
self
.
model
.
config
.
n_heads
,
self
.
model
.
config
.
n_heads
,
module
.
attn
.
Wqkv
,
module
.
attn
.
Wqkv
,
module
.
attn
.
out_proj
,
module
.
attn
.
out_proj
,
module
.
ffn
module
.
ffn
,
next
(
iter
(
module
.
state_dict
().
values
())).
device
)
)
set_module_name
(
self
.
model
,
name
,
block
)
set_module_name
(
self
.
model
,
name
,
block
)
\ No newline at end of file
awq/modules/fused/block.py
View file @
e80663bb
...
@@ -2,14 +2,14 @@ import torch.nn as nn
...
@@ -2,14 +2,14 @@ import torch.nn as nn
from
awq.modules.fused.attn
import
QuantAttentionFused
from
awq.modules.fused.attn
import
QuantAttentionFused
class
MptBlock
(
nn
.
Module
):
class
MptBlock
(
nn
.
Module
):
def
__init__
(
self
,
hidden_size
,
n_heads
,
qkv_layer
,
o_proj
,
mpt_mlp
):
def
__init__
(
self
,
hidden_size
,
n_heads
,
qkv_layer
,
o_proj
,
mpt_mlp
,
dev
):
super
().
__init__
()
super
().
__init__
()
self
.
n_heads
=
n_heads
self
.
n_heads
=
n_heads
self
.
hidden_size
=
hidden_size
self
.
hidden_size
=
hidden_size
self
.
attn
=
QuantAttentionFused
(
hidden_size
,
self
.
n_heads
,
qkv_layer
,
o_proj
,
dev
=
"cuda:0"
,
max_seq_len
=
8096
,
use_alibi
=
True
)
.
to
(
"cuda:0"
)
self
.
attn
=
QuantAttentionFused
(
hidden_size
,
self
.
n_heads
,
qkv_layer
,
o_proj
,
dev
=
"cuda:0"
,
max_seq_len
=
8096
,
use_alibi
=
True
)
self
.
ffn
=
mpt_mlp
.
to
(
"cuda:0"
)
self
.
ffn
=
mpt_mlp
self
.
norm_1
=
nn
.
LayerNorm
(
hidden_size
,
eps
=
1e-6
).
half
().
to
(
"cuda:0"
)
self
.
norm_1
=
nn
.
LayerNorm
(
hidden_size
,
eps
=
1e-6
).
half
().
to
(
dev
)
self
.
norm_2
=
nn
.
LayerNorm
(
hidden_size
,
eps
=
1e-6
).
half
().
to
(
"cuda:0"
)
self
.
norm_2
=
nn
.
LayerNorm
(
hidden_size
,
eps
=
1e-6
).
half
().
to
(
dev
)
def
forward
(
def
forward
(
self
,
hidden_states
,
past_key_value
,
attn_bias
,
attention_mask
,
is_causal
self
,
hidden_states
,
past_key_value
,
attn_bias
,
attention_mask
,
is_causal
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment