Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
400c5a15
Unverified
Commit
400c5a15
authored
Sep 26, 2021
by
Stas Bekman
Committed by
GitHub
Sep 26, 2021
Browse files
[megatron gpt checkpoint conversion] causal mask requires pos_embed dimension (#13735)
parent
91df4551
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
5 additions
and
6 deletions
+5
-6
src/transformers/models/megatron_gpt2/convert_megatron_gpt2_checkpoint.py
.../models/megatron_gpt2/convert_megatron_gpt2_checkpoint.py
+5
-6
No files found.
src/transformers/models/megatron_gpt2/convert_megatron_gpt2_checkpoint.py
View file @
400c5a15
...
...
@@ -121,12 +121,11 @@ def convert_megatron_checkpoint(args, input_state_dict, config):
# The position embeddings.
pos_embeddings
=
embeddings
[
"position_embeddings"
][
"weight"
]
# Read the hidden dimension.
n_embed
=
pos_embeddings
.
size
(
1
)
# DEBUG.
# Read the causal mask dimension (seqlen). [max_sequence_length, hidden_size]
n_ctx
=
pos_embeddings
.
size
(
0
)
assert
(
n_
embed
==
heads
*
hidden_size_per_head
),
f
"
detected mismatch n_embed=
{
n_embed
}
!= heads=
{
heads
}
*hidden_size_per_head=
{
hidden_size_per_head
}
"
n_
ctx
==
config
.
n_ctx
),
f
"
pos_embeddings.max_sequence_length=
{
n_ctx
}
and config.n_ctx=
{
config
.
n_ctx
}
don't match
"
# Store the position embeddings.
output_state_dict
[
"transformer.wpe.weight"
]
=
pos_embeddings
...
...
@@ -175,7 +174,7 @@ def convert_megatron_checkpoint(args, input_state_dict, config):
)
and
weight_or_bias
==
"weight"
:
# Insert a tensor of 1x1xDxD bias.
causal_mask
=
torch
.
tril
(
torch
.
ones
((
n_
embed
,
n_embed
),
dtype
=
torch
.
float16
)).
view
(
1
,
1
,
n_
embed
,
n_embed
)
causal_mask
=
torch
.
tril
(
torch
.
ones
((
n_
ctx
,
n_ctx
),
dtype
=
torch
.
float16
)).
view
(
1
,
1
,
n_
ctx
,
n_ctx
)
output_state_dict
[
layer_name
+
".attn.bias"
]
=
causal_mask
# Insert a "dummy" tensor for masked_bias.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment