Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
c9df1b1d
"...git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "5b5e4ca36607a7783d3e6e7514e9dd5d8d83f16c"
Unverified
Commit
c9df1b1d
authored
Feb 08, 2021
by
Lysandre Debut
Committed by
GitHub
Feb 08, 2021
Browse files
Model templates (#10072)
parent
3b7e612a
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
4 additions
and
4 deletions
+4
-4
templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_tf_{{cookiecutter.lowercase_modelname}}.py
...ame}}/modeling_tf_{{cookiecutter.lowercase_modelname}}.py
+3
-3
templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_{{cookiecutter.lowercase_modelname}}.py
...elname}}/modeling_{{cookiecutter.lowercase_modelname}}.py
+1
-1
No files found.
templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_tf_{{cookiecutter.lowercase_modelname}}.py
View file @
c9df1b1d
...
...
@@ -161,7 +161,7 @@ class TF{{cookiecutter.camelcase_modelname}}SelfAttention(tf.keras.layers.Layer)
self
.
num_attention_heads
=
config
.
num_attention_heads
self
.
attention_head_size
=
int
(
config
.
hidden_size
/
config
.
num_attention_heads
)
self
.
all_head_size
=
self
.
num_attention_heads
*
self
.
attention_head_size
self
.
r
sqrt_att_head_size
=
1.0
/
math
.
sqrt
(
self
.
attention_head_size
)
self
.
sqrt_att_head_size
=
math
.
sqrt
(
self
.
attention_head_size
)
self
.
query
=
tf
.
keras
.
layers
.
Dense
(
units
=
self
.
all_head_size
,
kernel_initializer
=
get_initializer
(
config
.
initializer_range
),
name
=
"query"
...
...
@@ -201,8 +201,8 @@ class TF{{cookiecutter.camelcase_modelname}}SelfAttention(tf.keras.layers.Layer)
# attention scores.
# (batch size, num_heads, seq_len_q, seq_len_k)
attention_scores
=
tf
.
matmul
(
query_layer
,
key_layer
,
transpose_b
=
True
)
dk
=
tf
.
cast
(
self
.
r
sqrt_att_head_size
,
dtype
=
attention_scores
.
dtype
)
attention_scores
=
tf
.
multiply
(
attention_scores
,
dk
)
dk
=
tf
.
cast
(
self
.
sqrt_att_head_size
,
dtype
=
attention_scores
.
dtype
)
attention_scores
=
tf
.
divide
(
attention_scores
,
dk
)
if
attention_mask
is
not
None
:
# Apply the attention mask is (precomputed for all layers in TF{{cookiecutter.camelcase_modelname}}Model call() function)
...
...
templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_{{cookiecutter.lowercase_modelname}}.py
View file @
c9df1b1d
...
...
@@ -593,7 +593,7 @@ class {{cookiecutter.camelcase_modelname}}Encoder(nn.Module):
)
# Copied from transformers.models.bert.modeling_bert.BertPredictionHead with Bert->{{cookiecutter.camelcase_modelname}}
# Copied from transformers.models.bert.modeling_bert.BertPredictionHead
Transform
with Bert->{{cookiecutter.camelcase_modelname}}
class
{{
cookiecutter
.
camelcase_modelname
}}
PredictionHeadTransform
(
nn
.
Module
):
def
__init__
(
self
,
config
):
super
().
__init__
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment