Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
a156da9a
Unverified
Commit
a156da9a
authored
Jun 14, 2021
by
Stas Bekman
Committed by
GitHub
Jun 14, 2021
Browse files
consistent nn. and nn.functional: p2 templates (#12153)
parent
007be9e4
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
15 additions
and
16 deletions
+15
-16
templates/adding_a_new_model/ADD_NEW_MODEL_PROPOSAL_TEMPLATE.md
...tes/adding_a_new_model/ADD_NEW_MODEL_PROPOSAL_TEMPLATE.md
+1
-1
templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_{{cookiecutter.lowercase_modelname}}.py
...elname}}/modeling_{{cookiecutter.lowercase_modelname}}.py
+13
-14
templates/adding_a_new_model/open_model_proposals/ADD_BIG_BIRD.md
...s/adding_a_new_model/open_model_proposals/ADD_BIG_BIRD.md
+1
-1
No files found.
templates/adding_a_new_model/ADD_NEW_MODEL_PROPOSAL_TEMPLATE.md
View file @
a156da9a
...
@@ -711,7 +711,7 @@ defined by the name of the class attribute you give the layer. Let's
...
@@ -711,7 +711,7 @@ defined by the name of the class attribute you give the layer. Let's
define a dummy model in PyTorch, called
`SimpleModel`
as follows:
define a dummy model in PyTorch, called
`SimpleModel`
as follows:
```
python
```
python
import
torch.nn
as
nn
from
torch
import
nn
class
SimpleModel
(
nn
.
Module
):
class
SimpleModel
(
nn
.
Module
):
def
__init__
(
self
):
def
__init__
(
self
):
...
...
templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_{{cookiecutter.lowercase_modelname}}.py
View file @
a156da9a
...
@@ -1542,7 +1542,6 @@ import random
...
@@ -1542,7 +1542,6 @@ import random
from
typing
import
Optional
,
Tuple
from
typing
import
Optional
,
Tuple
import
torch
import
torch
import
torch.nn.functional
as
F
from
torch
import
nn
from
torch
import
nn
from
torch.nn
import
CrossEntropyLoss
from
torch.nn
import
CrossEntropyLoss
...
@@ -1743,7 +1742,7 @@ class {{cookiecutter.camelcase_modelname}}Attention(nn.Module):
...
@@ -1743,7 +1742,7 @@ class {{cookiecutter.camelcase_modelname}}Attention(nn.Module):
attn_weights
=
attn_weights
.
view
(
bsz
,
self
.
num_heads
,
tgt_len
,
src_len
)
+
attention_mask
attn_weights
=
attn_weights
.
view
(
bsz
,
self
.
num_heads
,
tgt_len
,
src_len
)
+
attention_mask
attn_weights
=
attn_weights
.
view
(
bsz
*
self
.
num_heads
,
tgt_len
,
src_len
)
attn_weights
=
attn_weights
.
view
(
bsz
*
self
.
num_heads
,
tgt_len
,
src_len
)
attn_weights
=
F
.
softmax
(
attn_weights
,
dim
=-
1
)
attn_weights
=
nn
.
functional
.
softmax
(
attn_weights
,
dim
=-
1
)
if
layer_head_mask
is
not
None
:
if
layer_head_mask
is
not
None
:
if
layer_head_mask
.
size
()
!=
(
self
.
num_heads
,):
if
layer_head_mask
.
size
()
!=
(
self
.
num_heads
,):
...
@@ -1763,7 +1762,7 @@ class {{cookiecutter.camelcase_modelname}}Attention(nn.Module):
...
@@ -1763,7 +1762,7 @@ class {{cookiecutter.camelcase_modelname}}Attention(nn.Module):
else
:
else
:
attn_weights_reshaped
=
None
attn_weights_reshaped
=
None
attn_probs
=
F
.
dropout
(
attn_weights
,
p
=
self
.
dropout
,
training
=
self
.
training
)
attn_probs
=
nn
.
functional
.
dropout
(
attn_weights
,
p
=
self
.
dropout
,
training
=
self
.
training
)
attn_output
=
torch
.
bmm
(
attn_probs
,
value_states
)
attn_output
=
torch
.
bmm
(
attn_probs
,
value_states
)
...
@@ -1823,15 +1822,15 @@ class {{cookiecutter.camelcase_modelname}}EncoderLayer(nn.Module):
...
@@ -1823,15 +1822,15 @@ class {{cookiecutter.camelcase_modelname}}EncoderLayer(nn.Module):
layer_head_mask
=
layer_head_mask
,
layer_head_mask
=
layer_head_mask
,
output_attentions
=
output_attentions
,
output_attentions
=
output_attentions
,
)
)
hidden_states
=
F
.
dropout
(
hidden_states
,
p
=
self
.
dropout
,
training
=
self
.
training
)
hidden_states
=
nn
.
functional
.
dropout
(
hidden_states
,
p
=
self
.
dropout
,
training
=
self
.
training
)
hidden_states
=
residual
+
hidden_states
hidden_states
=
residual
+
hidden_states
hidden_states
=
self
.
self_attn_layer_norm
(
hidden_states
)
hidden_states
=
self
.
self_attn_layer_norm
(
hidden_states
)
residual
=
hidden_states
residual
=
hidden_states
hidden_states
=
self
.
activation_fn
(
self
.
fc1
(
hidden_states
))
hidden_states
=
self
.
activation_fn
(
self
.
fc1
(
hidden_states
))
hidden_states
=
F
.
dropout
(
hidden_states
,
p
=
self
.
activation_dropout
,
training
=
self
.
training
)
hidden_states
=
nn
.
functional
.
dropout
(
hidden_states
,
p
=
self
.
activation_dropout
,
training
=
self
.
training
)
hidden_states
=
self
.
fc2
(
hidden_states
)
hidden_states
=
self
.
fc2
(
hidden_states
)
hidden_states
=
F
.
dropout
(
hidden_states
,
p
=
self
.
dropout
,
training
=
self
.
training
)
hidden_states
=
nn
.
functional
.
dropout
(
hidden_states
,
p
=
self
.
dropout
,
training
=
self
.
training
)
hidden_states
=
residual
+
hidden_states
hidden_states
=
residual
+
hidden_states
hidden_states
=
self
.
final_layer_norm
(
hidden_states
)
hidden_states
=
self
.
final_layer_norm
(
hidden_states
)
...
@@ -1916,7 +1915,7 @@ class {{cookiecutter.camelcase_modelname}}DecoderLayer(nn.Module):
...
@@ -1916,7 +1915,7 @@ class {{cookiecutter.camelcase_modelname}}DecoderLayer(nn.Module):
layer_head_mask
=
layer_head_mask
,
layer_head_mask
=
layer_head_mask
,
output_attentions
=
output_attentions
,
output_attentions
=
output_attentions
,
)
)
hidden_states
=
F
.
dropout
(
hidden_states
,
p
=
self
.
dropout
,
training
=
self
.
training
)
hidden_states
=
nn
.
functional
.
dropout
(
hidden_states
,
p
=
self
.
dropout
,
training
=
self
.
training
)
hidden_states
=
residual
+
hidden_states
hidden_states
=
residual
+
hidden_states
hidden_states
=
self
.
self_attn_layer_norm
(
hidden_states
)
hidden_states
=
self
.
self_attn_layer_norm
(
hidden_states
)
...
@@ -1936,7 +1935,7 @@ class {{cookiecutter.camelcase_modelname}}DecoderLayer(nn.Module):
...
@@ -1936,7 +1935,7 @@ class {{cookiecutter.camelcase_modelname}}DecoderLayer(nn.Module):
past_key_value
=
cross_attn_past_key_value
,
past_key_value
=
cross_attn_past_key_value
,
output_attentions
=
output_attentions
,
output_attentions
=
output_attentions
,
)
)
hidden_states
=
F
.
dropout
(
hidden_states
,
p
=
self
.
dropout
,
training
=
self
.
training
)
hidden_states
=
nn
.
functional
.
dropout
(
hidden_states
,
p
=
self
.
dropout
,
training
=
self
.
training
)
hidden_states
=
residual
+
hidden_states
hidden_states
=
residual
+
hidden_states
hidden_states
=
self
.
encoder_attn_layer_norm
(
hidden_states
)
hidden_states
=
self
.
encoder_attn_layer_norm
(
hidden_states
)
...
@@ -1946,9 +1945,9 @@ class {{cookiecutter.camelcase_modelname}}DecoderLayer(nn.Module):
...
@@ -1946,9 +1945,9 @@ class {{cookiecutter.camelcase_modelname}}DecoderLayer(nn.Module):
# Fully Connected
# Fully Connected
residual
=
hidden_states
residual
=
hidden_states
hidden_states
=
self
.
activation_fn
(
self
.
fc1
(
hidden_states
))
hidden_states
=
self
.
activation_fn
(
self
.
fc1
(
hidden_states
))
hidden_states
=
F
.
dropout
(
hidden_states
,
p
=
self
.
activation_dropout
,
training
=
self
.
training
)
hidden_states
=
nn
.
functional
.
dropout
(
hidden_states
,
p
=
self
.
activation_dropout
,
training
=
self
.
training
)
hidden_states
=
self
.
fc2
(
hidden_states
)
hidden_states
=
self
.
fc2
(
hidden_states
)
hidden_states
=
F
.
dropout
(
hidden_states
,
p
=
self
.
dropout
,
training
=
self
.
training
)
hidden_states
=
nn
.
functional
.
dropout
(
hidden_states
,
p
=
self
.
dropout
,
training
=
self
.
training
)
hidden_states
=
residual
+
hidden_states
hidden_states
=
residual
+
hidden_states
hidden_states
=
self
.
final_layer_norm
(
hidden_states
)
hidden_states
=
self
.
final_layer_norm
(
hidden_states
)
...
@@ -2171,7 +2170,7 @@ class {{cookiecutter.camelcase_modelname}}Encoder({{cookiecutter.camelcase_model
...
@@ -2171,7 +2170,7 @@ class {{cookiecutter.camelcase_modelname}}Encoder({{cookiecutter.camelcase_model
Args:
Args:
config: {{cookiecutter.camelcase_modelname}}Config
config: {{cookiecutter.camelcase_modelname}}Config
embed_tokens (
torch.
nn.Embedding): output embedding
embed_tokens (nn.Embedding): output embedding
"""
"""
def
__init__
(
self
,
config
:
{{
cookiecutter
.
camelcase_modelname
}}
Config
,
embed_tokens
:
Optional
[
nn
.
Embedding
]
=
None
):
def
__init__
(
self
,
config
:
{{
cookiecutter
.
camelcase_modelname
}}
Config
,
embed_tokens
:
Optional
[
nn
.
Embedding
]
=
None
):
...
@@ -2270,7 +2269,7 @@ class {{cookiecutter.camelcase_modelname}}Encoder({{cookiecutter.camelcase_model
...
@@ -2270,7 +2269,7 @@ class {{cookiecutter.camelcase_modelname}}Encoder({{cookiecutter.camelcase_model
hidden_states
=
inputs_embeds
+
embed_pos
hidden_states
=
inputs_embeds
+
embed_pos
hidden_states
=
self
.
layernorm_embedding
(
hidden_states
)
hidden_states
=
self
.
layernorm_embedding
(
hidden_states
)
hidden_states
=
F
.
dropout
(
hidden_states
,
p
=
self
.
dropout
,
training
=
self
.
training
)
hidden_states
=
nn
.
functional
.
dropout
(
hidden_states
,
p
=
self
.
dropout
,
training
=
self
.
training
)
# expand attention_mask
# expand attention_mask
if
attention_mask
is
not
None
:
if
attention_mask
is
not
None
:
...
@@ -2337,7 +2336,7 @@ class {{cookiecutter.camelcase_modelname}}Decoder({{cookiecutter.camelcase_model
...
@@ -2337,7 +2336,7 @@ class {{cookiecutter.camelcase_modelname}}Decoder({{cookiecutter.camelcase_model
Args:
Args:
config: {{cookiecutter.camelcase_modelname}}Config
config: {{cookiecutter.camelcase_modelname}}Config
embed_tokens (
torch.
nn.Embedding): output embedding
embed_tokens (nn.Embedding): output embedding
"""
"""
def
__init__
(
self
,
config
:
{{
cookiecutter
.
camelcase_modelname
}}
Config
,
embed_tokens
:
Optional
[
nn
.
Embedding
]
=
None
):
def
__init__
(
self
,
config
:
{{
cookiecutter
.
camelcase_modelname
}}
Config
,
embed_tokens
:
Optional
[
nn
.
Embedding
]
=
None
):
...
@@ -2506,7 +2505,7 @@ class {{cookiecutter.camelcase_modelname}}Decoder({{cookiecutter.camelcase_model
...
@@ -2506,7 +2505,7 @@ class {{cookiecutter.camelcase_modelname}}Decoder({{cookiecutter.camelcase_model
hidden_states
=
inputs_embeds
+
positions
hidden_states
=
inputs_embeds
+
positions
hidden_states
=
self
.
layernorm_embedding
(
hidden_states
)
hidden_states
=
self
.
layernorm_embedding
(
hidden_states
)
hidden_states
=
F
.
dropout
(
hidden_states
,
p
=
self
.
dropout
,
training
=
self
.
training
)
hidden_states
=
nn
.
functional
.
dropout
(
hidden_states
,
p
=
self
.
dropout
,
training
=
self
.
training
)
# decoder layers
# decoder layers
all_hidden_states
=
()
if
output_hidden_states
else
None
all_hidden_states
=
()
if
output_hidden_states
else
None
...
...
templates/adding_a_new_model/open_model_proposals/ADD_BIG_BIRD.md
View file @
a156da9a
...
@@ -725,7 +725,7 @@ defined by the name of the class attribute you give the layer. Let's
...
@@ -725,7 +725,7 @@ defined by the name of the class attribute you give the layer. Let's
define a dummy model in PyTorch, called
`SimpleModel`
as follows:
define a dummy model in PyTorch, called
`SimpleModel`
as follows:
```
python
```
python
import
torch.nn
as
nn
from
torch
import
nn
class
SimpleModel
(
nn
.
Module
):
class
SimpleModel
(
nn
.
Module
):
def
__init__
(
self
):
def
__init__
(
self
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment