Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
ff20f9cf
Unverified
Commit
ff20f9cf
authored
Mar 23, 2023
by
Younes Belkada
Committed by
GitHub
Mar 23, 2023
Browse files
[`MBart`] Add `accelerate` support for MBart (#22309)
add `accelerate` support for MBart
parent
61f79b29
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
9 additions
and
8 deletions
+9
-8
src/transformers/models/mbart/modeling_mbart.py
src/transformers/models/mbart/modeling_mbart.py
+9
-8
No files found.
src/transformers/models/mbart/modeling_mbart.py
View file @
ff20f9cf
...
...
@@ -502,6 +502,7 @@ class MBartPreTrainedModel(PreTrainedModel):
config_class
=
MBartConfig
base_model_prefix
=
"model"
supports_gradient_checkpointing
=
True
_no_split_modules
=
[
"MBartDecoderLayer"
,
"MBartAttention"
]
def
_init_weights
(
self
,
module
):
std
=
self
.
config
.
init_std
...
...
@@ -702,10 +703,10 @@ class MBartEncoder(MBartPreTrainedModel):
self
.
max_source_positions
=
config
.
max_position_embeddings
self
.
embed_scale
=
math
.
sqrt
(
embed_dim
)
if
config
.
scale_embedding
else
1.0
self
.
embed_tokens
=
nn
.
Embedding
(
config
.
vocab_size
,
embed_dim
,
self
.
padding_idx
)
if
embed_tokens
is
not
None
:
self
.
embed_tokens
=
embed_tokens
else
:
self
.
embed_tokens
=
nn
.
Embedding
(
config
.
vocab_size
,
embed_dim
,
self
.
padding_idx
)
self
.
embed_tokens
.
weight
=
embed_tokens
.
weight
self
.
embed_positions
=
MBartLearnedPositionalEmbedding
(
config
.
max_position_embeddings
,
...
...
@@ -793,7 +794,7 @@ class MBartEncoder(MBartPreTrainedModel):
embed_pos
=
self
.
embed_positions
(
input
)
hidden_states
=
inputs_embeds
+
embed_pos
hidden_states
=
inputs_embeds
+
embed_pos
.
to
(
inputs_embeds
.
device
)
hidden_states
=
self
.
layernorm_embedding
(
hidden_states
)
hidden_states
=
nn
.
functional
.
dropout
(
hidden_states
,
p
=
self
.
dropout
,
training
=
self
.
training
)
...
...
@@ -876,10 +877,10 @@ class MBartDecoder(MBartPreTrainedModel):
self
.
max_target_positions
=
config
.
max_position_embeddings
self
.
embed_scale
=
math
.
sqrt
(
config
.
d_model
)
if
config
.
scale_embedding
else
1.0
self
.
embed_tokens
=
nn
.
Embedding
(
config
.
vocab_size
,
config
.
d_model
,
self
.
padding_idx
)
if
embed_tokens
is
not
None
:
self
.
embed_tokens
=
embed_tokens
else
:
self
.
embed_tokens
=
nn
.
Embedding
(
config
.
vocab_size
,
config
.
d_model
,
self
.
padding_idx
)
self
.
embed_tokens
.
weight
=
embed_tokens
.
weight
self
.
embed_positions
=
MBartLearnedPositionalEmbedding
(
config
.
max_position_embeddings
,
...
...
@@ -1038,7 +1039,7 @@ class MBartDecoder(MBartPreTrainedModel):
# embed positions
positions
=
self
.
embed_positions
(
input
,
past_key_values_length
)
hidden_states
=
inputs_embeds
+
positions
hidden_states
=
inputs_embeds
+
positions
.
to
(
inputs_embeds
.
device
)
hidden_states
=
self
.
layernorm_embedding
(
hidden_states
)
hidden_states
=
nn
.
functional
.
dropout
(
hidden_states
,
p
=
self
.
dropout
,
training
=
self
.
training
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment