Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
d56d723f
Unverified
Commit
d56d723f
authored
Oct 27, 2022
by
Younes Belkada
Committed by
GitHub
Oct 27, 2022
Browse files
Add `accelerate` support for M2M100 (#19912)
* add `accelerate` support for M2M100 * fix device set nit
parent
c766a2d7
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
9 additions
and
6 deletions
+9
-6
src/transformers/models/m2m_100/modeling_m2m_100.py
src/transformers/models/m2m_100/modeling_m2m_100.py
+9
-6
No files found.
src/transformers/models/m2m_100/modeling_m2m_100.py
View file @
d56d723f
...
...
@@ -532,6 +532,7 @@ class M2M100PreTrainedModel(PreTrainedModel):
config_class
=
M2M100Config
base_model_prefix
=
"model"
supports_gradient_checkpointing
=
True
_no_split_modules
=
[
"M2M100Attention"
]
def
_init_weights
(
self
,
module
):
std
=
self
.
config
.
init_std
...
...
@@ -693,10 +694,10 @@ class M2M100Encoder(M2M100PreTrainedModel):
self
.
max_source_positions
=
config
.
max_position_embeddings
self
.
embed_scale
=
math
.
sqrt
(
embed_dim
)
if
config
.
scale_embedding
else
1.0
self
.
embed_tokens
=
nn
.
Embedding
(
config
.
vocab_size
,
embed_dim
,
self
.
padding_idx
)
if
embed_tokens
is
not
None
:
self
.
embed_tokens
=
embed_tokens
else
:
self
.
embed_tokens
=
nn
.
Embedding
(
config
.
vocab_size
,
embed_dim
,
self
.
padding_idx
)
self
.
embed_tokens
.
weight
=
embed_tokens
.
weight
self
.
embed_positions
=
M2M100SinusoidalPositionalEmbedding
(
config
.
max_position_embeddings
,
...
...
@@ -777,6 +778,7 @@ class M2M100Encoder(M2M100PreTrainedModel):
inputs_embeds
=
self
.
embed_tokens
(
input_ids
)
*
self
.
embed_scale
embed_pos
=
self
.
embed_positions
(
input_ids
,
inputs_embeds
)
embed_pos
=
embed_pos
.
to
(
inputs_embeds
.
device
)
hidden_states
=
inputs_embeds
+
embed_pos
hidden_states
=
nn
.
functional
.
dropout
(
hidden_states
,
p
=
self
.
dropout
,
training
=
self
.
training
)
...
...
@@ -868,10 +870,10 @@ class M2M100Decoder(M2M100PreTrainedModel):
self
.
max_target_positions
=
config
.
max_position_embeddings
self
.
embed_scale
=
math
.
sqrt
(
config
.
d_model
)
if
config
.
scale_embedding
else
1.0
self
.
embed_tokens
=
nn
.
Embedding
(
config
.
vocab_size
,
config
.
d_model
,
self
.
padding_idx
)
if
embed_tokens
is
not
None
:
self
.
embed_tokens
=
embed_tokens
else
:
self
.
embed_tokens
=
nn
.
Embedding
(
config
.
vocab_size
,
config
.
d_model
,
self
.
padding_idx
)
self
.
embed_tokens
.
weight
=
embed_tokens
.
weight
self
.
embed_positions
=
M2M100SinusoidalPositionalEmbedding
(
config
.
max_position_embeddings
,
...
...
@@ -1010,6 +1012,7 @@ class M2M100Decoder(M2M100PreTrainedModel):
# embed positions
positions
=
self
.
embed_positions
(
input_ids
,
inputs_embeds
,
past_key_values_length
)
positions
=
positions
.
to
(
inputs_embeds
.
device
)
hidden_states
=
inputs_embeds
+
positions
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment