Unverified Commit 7251a473 authored by Julien Plu's avatar Julien Plu Committed by GitHub
Browse files

Fix template (#9697)

parent 14042d56
...@@ -307,7 +307,7 @@ class TFRobertaPooler(tf.keras.layers.Layer): ...@@ -307,7 +307,7 @@ class TFRobertaPooler(tf.keras.layers.Layer):
return pooled_output return pooled_output
# Copied from transformers.models.bert.modeling_tf_bert.TFBertSelfAttention # Copied from transformers.models.bert.modeling_tf_bert.TFBertSelfAttention with Bert->Roberta
class TFRobertaSelfAttention(tf.keras.layers.Layer): class TFRobertaSelfAttention(tf.keras.layers.Layer):
def __init__(self, config, **kwargs): def __init__(self, config, **kwargs):
super().__init__(**kwargs) super().__init__(**kwargs)
...@@ -355,7 +355,7 @@ class TFRobertaSelfAttention(tf.keras.layers.Layer): ...@@ -355,7 +355,7 @@ class TFRobertaSelfAttention(tf.keras.layers.Layer):
attention_scores = tf.einsum("aecd,abcd->acbe", key_layer, query_layer) attention_scores = tf.einsum("aecd,abcd->acbe", key_layer, query_layer)
if attention_mask is not None: if attention_mask is not None:
# Apply the attention mask is (precomputed for all layers in TFBertModel call() function) # Apply the attention mask is (precomputed for all layers in TFRobertaModel call() function)
attention_scores = attention_scores + attention_mask attention_scores = attention_scores + attention_mask
# Normalize the attention scores to probabilities. # Normalize the attention scores to probabilities.
...@@ -375,7 +375,7 @@ class TFRobertaSelfAttention(tf.keras.layers.Layer): ...@@ -375,7 +375,7 @@ class TFRobertaSelfAttention(tf.keras.layers.Layer):
return outputs return outputs
# Copied from transformers.models.bert.modeling_tf_bert.TFBertSelfOutput with Bert->Roberta # Copied from transformers.models.bert.modeling_tf_bert.TFBertSelfOutput
class TFRobertaSelfOutput(tf.keras.layers.Layer): class TFRobertaSelfOutput(tf.keras.layers.Layer):
def __init__(self, config, **kwargs): def __init__(self, config, **kwargs):
super().__init__(**kwargs) super().__init__(**kwargs)
......
...@@ -241,7 +241,7 @@ class TF{{cookiecutter.camelcase_modelname}}Embeddings(tf.keras.layers.Layer): ...@@ -241,7 +241,7 @@ class TF{{cookiecutter.camelcase_modelname}}Embeddings(tf.keras.layers.Layer):
# Copied from transformers.models.bert.modeling_tf_bert.TFBertSelfAttention # Copied from transformers.models.bert.modeling_tf_bert.TFBertSelfAttention with Bert->{{cookiecutter.camelcase_modelname}}
class TF{{cookiecutter.camelcase_modelname}}SelfAttention(tf.keras.layers.Layer): class TF{{cookiecutter.camelcase_modelname}}SelfAttention(tf.keras.layers.Layer):
def __init__(self, config, **kwargs): def __init__(self, config, **kwargs):
super().__init__(**kwargs) super().__init__(**kwargs)
...@@ -309,7 +309,7 @@ class TF{{cookiecutter.camelcase_modelname}}SelfAttention(tf.keras.layers.Layer) ...@@ -309,7 +309,7 @@ class TF{{cookiecutter.camelcase_modelname}}SelfAttention(tf.keras.layers.Layer)
return outputs return outputs
# Copied from transformers.models.bert.modeling_tf_bert.TFBertSelfOutput with Bert->{{cookiecutter.camelcase_modelname}} # Copied from transformers.models.bert.modeling_tf_bert.TFBertSelfOutput
class TF{{cookiecutter.camelcase_modelname}}SelfOutput(tf.keras.layers.Layer): class TF{{cookiecutter.camelcase_modelname}}SelfOutput(tf.keras.layers.Layer):
def __init__(self, config, **kwargs): def __init__(self, config, **kwargs):
super().__init__(**kwargs) super().__init__(**kwargs)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment