Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
a156da9a
"...git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "d1ec36b94f5ba45fb2423e74074cfedab48cfe73"
Unverified
Commit
a156da9a
authored
Jun 14, 2021
by
Stas Bekman
Committed by
GitHub
Jun 14, 2021
Browse files
consistent nn. and nn.functional: p2 templates (#12153)
parent
007be9e4
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
15 additions
and
16 deletions
+15
-16
templates/adding_a_new_model/ADD_NEW_MODEL_PROPOSAL_TEMPLATE.md
...tes/adding_a_new_model/ADD_NEW_MODEL_PROPOSAL_TEMPLATE.md
+1
-1
templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_{{cookiecutter.lowercase_modelname}}.py
...elname}}/modeling_{{cookiecutter.lowercase_modelname}}.py
+13
-14
templates/adding_a_new_model/open_model_proposals/ADD_BIG_BIRD.md
...s/adding_a_new_model/open_model_proposals/ADD_BIG_BIRD.md
+1
-1
No files found.
templates/adding_a_new_model/ADD_NEW_MODEL_PROPOSAL_TEMPLATE.md
View file @
a156da9a
...
@@ -711,7 +711,7 @@ defined by the name of the class attribute you give the layer. Let's
...
@@ -711,7 +711,7 @@ defined by the name of the class attribute you give the layer. Let's
define a dummy model in PyTorch, called
`SimpleModel`
as follows:
define a dummy model in PyTorch, called
`SimpleModel`
as follows:
```
python
```
python
import
torch.nn
as
nn
from
torch
import
nn
class
SimpleModel
(
nn
.
Module
):
class
SimpleModel
(
nn
.
Module
):
def
__init__
(
self
):
def
__init__
(
self
):
...
...
templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_{{cookiecutter.lowercase_modelname}}.py
View file @
a156da9a
...
@@ -1542,7 +1542,6 @@ import random
...
@@ -1542,7 +1542,6 @@ import random
from
typing
import
Optional
,
Tuple
from
typing
import
Optional
,
Tuple
import
torch
import
torch
import
torch.nn.functional
as
F
from
torch
import
nn
from
torch
import
nn
from
torch.nn
import
CrossEntropyLoss
from
torch.nn
import
CrossEntropyLoss
...
@@ -1743,7 +1742,7 @@ class {{cookiecutter.camelcase_modelname}}Attention(nn.Module):
...
@@ -1743,7 +1742,7 @@ class {{cookiecutter.camelcase_modelname}}Attention(nn.Module):
attn_weights
=
attn_weights
.
view
(
bsz
,
self
.
num_heads
,
tgt_len
,
src_len
)
+
attention_mask
attn_weights
=
attn_weights
.
view
(
bsz
,
self
.
num_heads
,
tgt_len
,
src_len
)
+
attention_mask
attn_weights
=
attn_weights
.
view
(
bsz
*
self
.
num_heads
,
tgt_len
,
src_len
)
attn_weights
=
attn_weights
.
view
(
bsz
*
self
.
num_heads
,
tgt_len
,
src_len
)
attn_weights
=
F
.
softmax
(
attn_weights
,
dim
=-
1
)
attn_weights
=
nn
.
functional
.
softmax
(
attn_weights
,
dim
=-
1
)
if
layer_head_mask
is
not
None
:
if
layer_head_mask
is
not
None
:
if
layer_head_mask
.
size
()
!=
(
self
.
num_heads
,):
if
layer_head_mask
.
size
()
!=
(
self
.
num_heads
,):
...
@@ -1763,7 +1762,7 @@ class {{cookiecutter.camelcase_modelname}}Attention(nn.Module):
...
@@ -1763,7 +1762,7 @@ class {{cookiecutter.camelcase_modelname}}Attention(nn.Module):
else
:
else
:
attn_weights_reshaped
=
None
attn_weights_reshaped
=
None
attn_probs
=
F
.
dropout
(
attn_weights
,
p
=
self
.
dropout
,
training
=
self
.
training
)
attn_probs
=
nn
.
functional
.
dropout
(
attn_weights
,
p
=
self
.
dropout
,
training
=
self
.
training
)
attn_output
=
torch
.
bmm
(
attn_probs
,
value_states
)
attn_output
=
torch
.
bmm
(
attn_probs
,
value_states
)
...
@@ -1823,15 +1822,15 @@ class {{cookiecutter.camelcase_modelname}}EncoderLayer(nn.Module):
...
@@ -1823,15 +1822,15 @@ class {{cookiecutter.camelcase_modelname}}EncoderLayer(nn.Module):
layer_head_mask
=
layer_head_mask
,
layer_head_mask
=
layer_head_mask
,
output_attentions
=
output_attentions
,
output_attentions
=
output_attentions
,
)
)
hidden_states
=
F
.
dropout
(
hidden_states
,
p
=
self
.
dropout
,
training
=
self
.
training
)
hidden_states
=
nn
.
functional
.
dropout
(
hidden_states
,
p
=
self
.
dropout
,
training
=
self
.
training
)
hidden_states
=
residual
+
hidden_states
hidden_states
=
residual
+
hidden_states
hidden_states
=
self
.
self_attn_layer_norm
(
hidden_states
)
hidden_states
=
self
.
self_attn_layer_norm
(
hidden_states
)
residual
=
hidden_states
residual
=
hidden_states
hidden_states
=
self
.
activation_fn
(
self
.
fc1
(
hidden_states
))
hidden_states
=
self
.
activation_fn
(
self
.
fc1
(
hidden_states
))
hidden_states
=
F
.
dropout
(
hidden_states
,
p
=
self
.
activation_dropout
,
training
=
self
.
training
)
hidden_states
=
nn
.
functional
.
dropout
(
hidden_states
,
p
=
self
.
activation_dropout
,
training
=
self
.
training
)
hidden_states
=
self
.
fc2
(
hidden_states
)
hidden_states
=
self
.
fc2
(
hidden_states
)
hidden_states
=
F
.
dropout
(
hidden_states
,
p
=
self
.
dropout
,
training
=
self
.
training
)
hidden_states
=
nn
.
functional
.
dropout
(
hidden_states
,
p
=
self
.
dropout
,
training
=
self
.
training
)
hidden_states
=
residual
+
hidden_states
hidden_states
=
residual
+
hidden_states
hidden_states
=
self
.
final_layer_norm
(
hidden_states
)
hidden_states
=
self
.
final_layer_norm
(
hidden_states
)
...
@@ -1916,7 +1915,7 @@ class {{cookiecutter.camelcase_modelname}}DecoderLayer(nn.Module):
...
@@ -1916,7 +1915,7 @@ class {{cookiecutter.camelcase_modelname}}DecoderLayer(nn.Module):
layer_head_mask
=
layer_head_mask
,
layer_head_mask
=
layer_head_mask
,
output_attentions
=
output_attentions
,
output_attentions
=
output_attentions
,
)
)
hidden_states
=
F
.
dropout
(
hidden_states
,
p
=
self
.
dropout
,
training
=
self
.
training
)
hidden_states
=
nn
.
functional
.
dropout
(
hidden_states
,
p
=
self
.
dropout
,
training
=
self
.
training
)
hidden_states
=
residual
+
hidden_states
hidden_states
=
residual
+
hidden_states
hidden_states
=
self
.
self_attn_layer_norm
(
hidden_states
)
hidden_states
=
self
.
self_attn_layer_norm
(
hidden_states
)
...
@@ -1936,7 +1935,7 @@ class {{cookiecutter.camelcase_modelname}}DecoderLayer(nn.Module):
...
@@ -1936,7 +1935,7 @@ class {{cookiecutter.camelcase_modelname}}DecoderLayer(nn.Module):
past_key_value
=
cross_attn_past_key_value
,
past_key_value
=
cross_attn_past_key_value
,
output_attentions
=
output_attentions
,
output_attentions
=
output_attentions
,
)
)
hidden_states
=
F
.
dropout
(
hidden_states
,
p
=
self
.
dropout
,
training
=
self
.
training
)
hidden_states
=
nn
.
functional
.
dropout
(
hidden_states
,
p
=
self
.
dropout
,
training
=
self
.
training
)
hidden_states
=
residual
+
hidden_states
hidden_states
=
residual
+
hidden_states
hidden_states
=
self
.
encoder_attn_layer_norm
(
hidden_states
)
hidden_states
=
self
.
encoder_attn_layer_norm
(
hidden_states
)
...
@@ -1946,9 +1945,9 @@ class {{cookiecutter.camelcase_modelname}}DecoderLayer(nn.Module):
...
@@ -1946,9 +1945,9 @@ class {{cookiecutter.camelcase_modelname}}DecoderLayer(nn.Module):
# Fully Connected
# Fully Connected
residual
=
hidden_states
residual
=
hidden_states
hidden_states
=
self
.
activation_fn
(
self
.
fc1
(
hidden_states
))
hidden_states
=
self
.
activation_fn
(
self
.
fc1
(
hidden_states
))
hidden_states
=
F
.
dropout
(
hidden_states
,
p
=
self
.
activation_dropout
,
training
=
self
.
training
)
hidden_states
=
nn
.
functional
.
dropout
(
hidden_states
,
p
=
self
.
activation_dropout
,
training
=
self
.
training
)
hidden_states
=
self
.
fc2
(
hidden_states
)
hidden_states
=
self
.
fc2
(
hidden_states
)
hidden_states
=
F
.
dropout
(
hidden_states
,
p
=
self
.
dropout
,
training
=
self
.
training
)
hidden_states
=
nn
.
functional
.
dropout
(
hidden_states
,
p
=
self
.
dropout
,
training
=
self
.
training
)
hidden_states
=
residual
+
hidden_states
hidden_states
=
residual
+
hidden_states
hidden_states
=
self
.
final_layer_norm
(
hidden_states
)
hidden_states
=
self
.
final_layer_norm
(
hidden_states
)
...
@@ -2171,7 +2170,7 @@ class {{cookiecutter.camelcase_modelname}}Encoder({{cookiecutter.camelcase_model
...
@@ -2171,7 +2170,7 @@ class {{cookiecutter.camelcase_modelname}}Encoder({{cookiecutter.camelcase_model
Args:
Args:
config: {{cookiecutter.camelcase_modelname}}Config
config: {{cookiecutter.camelcase_modelname}}Config
embed_tokens (
torch.
nn.Embedding): output embedding
embed_tokens (nn.Embedding): output embedding
"""
"""
def
__init__
(
self
,
config
:
{{
cookiecutter
.
camelcase_modelname
}}
Config
,
embed_tokens
:
Optional
[
nn
.
Embedding
]
=
None
):
def
__init__
(
self
,
config
:
{{
cookiecutter
.
camelcase_modelname
}}
Config
,
embed_tokens
:
Optional
[
nn
.
Embedding
]
=
None
):
...
@@ -2270,7 +2269,7 @@ class {{cookiecutter.camelcase_modelname}}Encoder({{cookiecutter.camelcase_model
...
@@ -2270,7 +2269,7 @@ class {{cookiecutter.camelcase_modelname}}Encoder({{cookiecutter.camelcase_model
hidden_states
=
inputs_embeds
+
embed_pos
hidden_states
=
inputs_embeds
+
embed_pos
hidden_states
=
self
.
layernorm_embedding
(
hidden_states
)
hidden_states
=
self
.
layernorm_embedding
(
hidden_states
)
hidden_states
=
F
.
dropout
(
hidden_states
,
p
=
self
.
dropout
,
training
=
self
.
training
)
hidden_states
=
nn
.
functional
.
dropout
(
hidden_states
,
p
=
self
.
dropout
,
training
=
self
.
training
)
# expand attention_mask
# expand attention_mask
if
attention_mask
is
not
None
:
if
attention_mask
is
not
None
:
...
@@ -2337,7 +2336,7 @@ class {{cookiecutter.camelcase_modelname}}Decoder({{cookiecutter.camelcase_model
...
@@ -2337,7 +2336,7 @@ class {{cookiecutter.camelcase_modelname}}Decoder({{cookiecutter.camelcase_model
Args:
Args:
config: {{cookiecutter.camelcase_modelname}}Config
config: {{cookiecutter.camelcase_modelname}}Config
embed_tokens (
torch.
nn.Embedding): output embedding
embed_tokens (nn.Embedding): output embedding
"""
"""
def
__init__
(
self
,
config
:
{{
cookiecutter
.
camelcase_modelname
}}
Config
,
embed_tokens
:
Optional
[
nn
.
Embedding
]
=
None
):
def
__init__
(
self
,
config
:
{{
cookiecutter
.
camelcase_modelname
}}
Config
,
embed_tokens
:
Optional
[
nn
.
Embedding
]
=
None
):
...
@@ -2506,7 +2505,7 @@ class {{cookiecutter.camelcase_modelname}}Decoder({{cookiecutter.camelcase_model
...
@@ -2506,7 +2505,7 @@ class {{cookiecutter.camelcase_modelname}}Decoder({{cookiecutter.camelcase_model
hidden_states
=
inputs_embeds
+
positions
hidden_states
=
inputs_embeds
+
positions
hidden_states
=
self
.
layernorm_embedding
(
hidden_states
)
hidden_states
=
self
.
layernorm_embedding
(
hidden_states
)
hidden_states
=
F
.
dropout
(
hidden_states
,
p
=
self
.
dropout
,
training
=
self
.
training
)
hidden_states
=
nn
.
functional
.
dropout
(
hidden_states
,
p
=
self
.
dropout
,
training
=
self
.
training
)
# decoder layers
# decoder layers
all_hidden_states
=
()
if
output_hidden_states
else
None
all_hidden_states
=
()
if
output_hidden_states
else
None
...
...
templates/adding_a_new_model/open_model_proposals/ADD_BIG_BIRD.md
View file @
a156da9a
...
@@ -725,7 +725,7 @@ defined by the name of the class attribute you give the layer. Let's
...
@@ -725,7 +725,7 @@ defined by the name of the class attribute you give the layer. Let's
define a dummy model in PyTorch, called
`SimpleModel`
as follows:
define a dummy model in PyTorch, called
`SimpleModel`
as follows:
```
python
```
python
import
torch.nn
as
nn
from
torch
import
nn
class
SimpleModel
(
nn
.
Module
):
class
SimpleModel
(
nn
.
Module
):
def
__init__
(
self
):
def
__init__
(
self
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment