Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
dc17f2a1
Unverified
Commit
dc17f2a1
authored
Jan 16, 2020
by
Thomas Wolf
Committed by
GitHub
Jan 16, 2020
Browse files
Merge pull request #2538 from huggingface/py3_super
💄
super
parents
88085484
a98b2ca8
Changes
75
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
53 additions
and
54 deletions
+53
-54
src/transformers/modeling_utils.py
src/transformers/modeling_utils.py
+8
-8
src/transformers/modeling_xlm.py
src/transformers/modeling_xlm.py
+9
-9
src/transformers/modeling_xlnet.py
src/transformers/modeling_xlnet.py
+10
-10
src/transformers/optimization.py
src/transformers/optimization.py
+1
-1
src/transformers/optimization_tf.py
src/transformers/optimization_tf.py
+8
-8
src/transformers/tokenization_albert.py
src/transformers/tokenization_albert.py
+1
-1
src/transformers/tokenization_bert.py
src/transformers/tokenization_bert.py
+2
-2
src/transformers/tokenization_bert_japanese.py
src/transformers/tokenization_bert_japanese.py
+1
-0
src/transformers/tokenization_camembert.py
src/transformers/tokenization_camembert.py
+1
-1
src/transformers/tokenization_ctrl.py
src/transformers/tokenization_ctrl.py
+1
-1
src/transformers/tokenization_gpt2.py
src/transformers/tokenization_gpt2.py
+2
-4
src/transformers/tokenization_openai.py
src/transformers/tokenization_openai.py
+1
-1
src/transformers/tokenization_roberta.py
src/transformers/tokenization_roberta.py
+1
-1
src/transformers/tokenization_t5.py
src/transformers/tokenization_t5.py
+1
-1
src/transformers/tokenization_transfo_xl.py
src/transformers/tokenization_transfo_xl.py
+1
-1
src/transformers/tokenization_utils.py
src/transformers/tokenization_utils.py
+1
-1
src/transformers/tokenization_xlm.py
src/transformers/tokenization_xlm.py
+1
-1
src/transformers/tokenization_xlm_roberta.py
src/transformers/tokenization_xlm_roberta.py
+1
-1
src/transformers/tokenization_xlnet.py
src/transformers/tokenization_xlnet.py
+1
-1
templates/adding_a_new_model/configuration_xxx.py
templates/adding_a_new_model/configuration_xxx.py
+1
-1
No files found.
src/transformers/modeling_utils.py
View file @
dc17f2a1
...
@@ -47,7 +47,7 @@ except ImportError:
...
@@ -47,7 +47,7 @@ except ImportError:
"""
"""
def
__init__
(
self
,
*
args
,
**
kwargs
):
def
__init__
(
self
,
*
args
,
**
kwargs
):
super
(
Identity
,
self
).
__init__
()
super
().
__init__
()
def
forward
(
self
,
input
):
def
forward
(
self
,
input
):
return
input
return
input
...
@@ -97,7 +97,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin):
...
@@ -97,7 +97,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin):
return
{
"input_ids"
:
torch
.
tensor
(
DUMMY_INPUTS
)}
return
{
"input_ids"
:
torch
.
tensor
(
DUMMY_INPUTS
)}
def
__init__
(
self
,
config
,
*
inputs
,
**
kwargs
):
def
__init__
(
self
,
config
,
*
inputs
,
**
kwargs
):
super
(
PreTrainedModel
,
self
).
__init__
()
super
().
__init__
()
if
not
isinstance
(
config
,
PretrainedConfig
):
if
not
isinstance
(
config
,
PretrainedConfig
):
raise
ValueError
(
raise
ValueError
(
"Parameter config in `{}(config)` should be an instance of class `PretrainedConfig`. "
"Parameter config in `{}(config)` should be an instance of class `PretrainedConfig`. "
...
@@ -1102,7 +1102,7 @@ class Conv1D(nn.Module):
...
@@ -1102,7 +1102,7 @@ class Conv1D(nn.Module):
""" Conv1D layer as defined by Radford et al. for OpenAI GPT (and also used in GPT-2)
""" Conv1D layer as defined by Radford et al. for OpenAI GPT (and also used in GPT-2)
Basically works like a Linear layer but the weights are transposed
Basically works like a Linear layer but the weights are transposed
"""
"""
super
(
Conv1D
,
self
).
__init__
()
super
().
__init__
()
self
.
nf
=
nf
self
.
nf
=
nf
w
=
torch
.
empty
(
nx
,
nf
)
w
=
torch
.
empty
(
nx
,
nf
)
nn
.
init
.
normal_
(
w
,
std
=
0.02
)
nn
.
init
.
normal_
(
w
,
std
=
0.02
)
...
@@ -1120,7 +1120,7 @@ class PoolerStartLogits(nn.Module):
...
@@ -1120,7 +1120,7 @@ class PoolerStartLogits(nn.Module):
""" Compute SQuAD start_logits from sequence hidden states. """
""" Compute SQuAD start_logits from sequence hidden states. """
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
PoolerStartLogits
,
self
).
__init__
()
super
().
__init__
()
self
.
dense
=
nn
.
Linear
(
config
.
hidden_size
,
1
)
self
.
dense
=
nn
.
Linear
(
config
.
hidden_size
,
1
)
def
forward
(
self
,
hidden_states
,
p_mask
=
None
):
def
forward
(
self
,
hidden_states
,
p_mask
=
None
):
...
@@ -1145,7 +1145,7 @@ class PoolerEndLogits(nn.Module):
...
@@ -1145,7 +1145,7 @@ class PoolerEndLogits(nn.Module):
"""
"""
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
PoolerEndLogits
,
self
).
__init__
()
super
().
__init__
()
self
.
dense_0
=
nn
.
Linear
(
config
.
hidden_size
*
2
,
config
.
hidden_size
)
self
.
dense_0
=
nn
.
Linear
(
config
.
hidden_size
*
2
,
config
.
hidden_size
)
self
.
activation
=
nn
.
Tanh
()
self
.
activation
=
nn
.
Tanh
()
self
.
LayerNorm
=
nn
.
LayerNorm
(
config
.
hidden_size
,
eps
=
config
.
layer_norm_eps
)
self
.
LayerNorm
=
nn
.
LayerNorm
(
config
.
hidden_size
,
eps
=
config
.
layer_norm_eps
)
...
@@ -1191,7 +1191,7 @@ class PoolerAnswerClass(nn.Module):
...
@@ -1191,7 +1191,7 @@ class PoolerAnswerClass(nn.Module):
""" Compute SQuAD 2.0 answer class from classification and start tokens hidden states. """
""" Compute SQuAD 2.0 answer class from classification and start tokens hidden states. """
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
PoolerAnswerClass
,
self
).
__init__
()
super
().
__init__
()
self
.
dense_0
=
nn
.
Linear
(
config
.
hidden_size
*
2
,
config
.
hidden_size
)
self
.
dense_0
=
nn
.
Linear
(
config
.
hidden_size
*
2
,
config
.
hidden_size
)
self
.
activation
=
nn
.
Tanh
()
self
.
activation
=
nn
.
Tanh
()
self
.
dense_1
=
nn
.
Linear
(
config
.
hidden_size
,
1
,
bias
=
False
)
self
.
dense_1
=
nn
.
Linear
(
config
.
hidden_size
,
1
,
bias
=
False
)
...
@@ -1276,7 +1276,7 @@ class SQuADHead(nn.Module):
...
@@ -1276,7 +1276,7 @@ class SQuADHead(nn.Module):
"""
"""
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
SQuADHead
,
self
).
__init__
()
super
().
__init__
()
self
.
start_n_top
=
config
.
start_n_top
self
.
start_n_top
=
config
.
start_n_top
self
.
end_n_top
=
config
.
end_n_top
self
.
end_n_top
=
config
.
end_n_top
...
@@ -1368,7 +1368,7 @@ class SequenceSummary(nn.Module):
...
@@ -1368,7 +1368,7 @@ class SequenceSummary(nn.Module):
"""
"""
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
SequenceSummary
,
self
).
__init__
()
super
().
__init__
()
self
.
summary_type
=
config
.
summary_type
if
hasattr
(
config
,
"summary_type"
)
else
"last"
self
.
summary_type
=
config
.
summary_type
if
hasattr
(
config
,
"summary_type"
)
else
"last"
if
self
.
summary_type
==
"attn"
:
if
self
.
summary_type
==
"attn"
:
...
...
src/transformers/modeling_xlm.py
View file @
dc17f2a1
...
@@ -96,7 +96,7 @@ class MultiHeadAttention(nn.Module):
...
@@ -96,7 +96,7 @@ class MultiHeadAttention(nn.Module):
NEW_ID
=
itertools
.
count
()
NEW_ID
=
itertools
.
count
()
def
__init__
(
self
,
n_heads
,
dim
,
config
):
def
__init__
(
self
,
n_heads
,
dim
,
config
):
super
(
MultiHeadAttention
,
self
).
__init__
()
super
().
__init__
()
self
.
layer_id
=
next
(
MultiHeadAttention
.
NEW_ID
)
self
.
layer_id
=
next
(
MultiHeadAttention
.
NEW_ID
)
self
.
output_attentions
=
config
.
output_attentions
self
.
output_attentions
=
config
.
output_attentions
self
.
dim
=
dim
self
.
dim
=
dim
...
@@ -197,7 +197,7 @@ class MultiHeadAttention(nn.Module):
...
@@ -197,7 +197,7 @@ class MultiHeadAttention(nn.Module):
class
TransformerFFN
(
nn
.
Module
):
class
TransformerFFN
(
nn
.
Module
):
def
__init__
(
self
,
in_dim
,
dim_hidden
,
out_dim
,
config
):
def
__init__
(
self
,
in_dim
,
dim_hidden
,
out_dim
,
config
):
super
(
TransformerFFN
,
self
).
__init__
()
super
().
__init__
()
self
.
dropout
=
config
.
dropout
self
.
dropout
=
config
.
dropout
self
.
lin1
=
nn
.
Linear
(
in_dim
,
dim_hidden
)
self
.
lin1
=
nn
.
Linear
(
in_dim
,
dim_hidden
)
self
.
lin2
=
nn
.
Linear
(
dim_hidden
,
out_dim
)
self
.
lin2
=
nn
.
Linear
(
dim_hidden
,
out_dim
)
...
@@ -222,7 +222,7 @@ class XLMPreTrainedModel(PreTrainedModel):
...
@@ -222,7 +222,7 @@ class XLMPreTrainedModel(PreTrainedModel):
base_model_prefix
=
"transformer"
base_model_prefix
=
"transformer"
def
__init__
(
self
,
*
inputs
,
**
kwargs
):
def
__init__
(
self
,
*
inputs
,
**
kwargs
):
super
(
XLMPreTrainedModel
,
self
).
__init__
(
*
inputs
,
**
kwargs
)
super
().
__init__
(
*
inputs
,
**
kwargs
)
@
property
@
property
def
dummy_inputs
(
self
):
def
dummy_inputs
(
self
):
...
@@ -354,7 +354,7 @@ class XLMModel(XLMPreTrainedModel):
...
@@ -354,7 +354,7 @@ class XLMModel(XLMPreTrainedModel):
"""
"""
def
__init__
(
self
,
config
):
# , dico, is_encoder, with_output):
def
__init__
(
self
,
config
):
# , dico, is_encoder, with_output):
super
(
XLMModel
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
output_attentions
=
config
.
output_attentions
self
.
output_attentions
=
config
.
output_attentions
self
.
output_hidden_states
=
config
.
output_hidden_states
self
.
output_hidden_states
=
config
.
output_hidden_states
...
@@ -585,7 +585,7 @@ class XLMPredLayer(nn.Module):
...
@@ -585,7 +585,7 @@ class XLMPredLayer(nn.Module):
"""
"""
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
XLMPredLayer
,
self
).
__init__
()
super
().
__init__
()
self
.
asm
=
config
.
asm
self
.
asm
=
config
.
asm
self
.
n_words
=
config
.
n_words
self
.
n_words
=
config
.
n_words
self
.
pad_index
=
config
.
pad_index
self
.
pad_index
=
config
.
pad_index
...
@@ -661,7 +661,7 @@ class XLMWithLMHeadModel(XLMPreTrainedModel):
...
@@ -661,7 +661,7 @@ class XLMWithLMHeadModel(XLMPreTrainedModel):
"""
"""
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
XLMWithLMHeadModel
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
transformer
=
XLMModel
(
config
)
self
.
transformer
=
XLMModel
(
config
)
self
.
pred_layer
=
XLMPredLayer
(
config
)
self
.
pred_layer
=
XLMPredLayer
(
config
)
...
@@ -754,7 +754,7 @@ class XLMForSequenceClassification(XLMPreTrainedModel):
...
@@ -754,7 +754,7 @@ class XLMForSequenceClassification(XLMPreTrainedModel):
"""
"""
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
XLMForSequenceClassification
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
num_labels
=
config
.
num_labels
self
.
num_labels
=
config
.
num_labels
self
.
transformer
=
XLMModel
(
config
)
self
.
transformer
=
XLMModel
(
config
)
...
@@ -856,7 +856,7 @@ class XLMForQuestionAnsweringSimple(XLMPreTrainedModel):
...
@@ -856,7 +856,7 @@ class XLMForQuestionAnsweringSimple(XLMPreTrainedModel):
"""
"""
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
XLMForQuestionAnsweringSimple
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
transformer
=
XLMModel
(
config
)
self
.
transformer
=
XLMModel
(
config
)
self
.
qa_outputs
=
nn
.
Linear
(
config
.
hidden_size
,
config
.
num_labels
)
self
.
qa_outputs
=
nn
.
Linear
(
config
.
hidden_size
,
config
.
num_labels
)
...
@@ -973,7 +973,7 @@ class XLMForQuestionAnswering(XLMPreTrainedModel):
...
@@ -973,7 +973,7 @@ class XLMForQuestionAnswering(XLMPreTrainedModel):
"""
"""
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
XLMForQuestionAnswering
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
transformer
=
XLMModel
(
config
)
self
.
transformer
=
XLMModel
(
config
)
self
.
qa_outputs
=
SQuADHead
(
config
)
self
.
qa_outputs
=
SQuADHead
(
config
)
...
...
src/transformers/modeling_xlnet.py
View file @
dc17f2a1
...
@@ -204,7 +204,7 @@ XLNetLayerNorm = nn.LayerNorm
...
@@ -204,7 +204,7 @@ XLNetLayerNorm = nn.LayerNorm
class
XLNetRelativeAttention
(
nn
.
Module
):
class
XLNetRelativeAttention
(
nn
.
Module
):
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
XLNetRelativeAttention
,
self
).
__init__
()
super
().
__init__
()
self
.
output_attentions
=
config
.
output_attentions
self
.
output_attentions
=
config
.
output_attentions
if
config
.
d_model
%
config
.
n_head
!=
0
:
if
config
.
d_model
%
config
.
n_head
!=
0
:
...
@@ -414,7 +414,7 @@ class XLNetRelativeAttention(nn.Module):
...
@@ -414,7 +414,7 @@ class XLNetRelativeAttention(nn.Module):
class
XLNetFeedForward
(
nn
.
Module
):
class
XLNetFeedForward
(
nn
.
Module
):
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
XLNetFeedForward
,
self
).
__init__
()
super
().
__init__
()
self
.
layer_norm
=
XLNetLayerNorm
(
config
.
d_model
,
eps
=
config
.
layer_norm_eps
)
self
.
layer_norm
=
XLNetLayerNorm
(
config
.
d_model
,
eps
=
config
.
layer_norm_eps
)
self
.
layer_1
=
nn
.
Linear
(
config
.
d_model
,
config
.
d_inner
)
self
.
layer_1
=
nn
.
Linear
(
config
.
d_model
,
config
.
d_inner
)
self
.
layer_2
=
nn
.
Linear
(
config
.
d_inner
,
config
.
d_model
)
self
.
layer_2
=
nn
.
Linear
(
config
.
d_inner
,
config
.
d_model
)
...
@@ -437,7 +437,7 @@ class XLNetFeedForward(nn.Module):
...
@@ -437,7 +437,7 @@ class XLNetFeedForward(nn.Module):
class
XLNetLayer
(
nn
.
Module
):
class
XLNetLayer
(
nn
.
Module
):
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
XLNetLayer
,
self
).
__init__
()
super
().
__init__
()
self
.
rel_attn
=
XLNetRelativeAttention
(
config
)
self
.
rel_attn
=
XLNetRelativeAttention
(
config
)
self
.
ff
=
XLNetFeedForward
(
config
)
self
.
ff
=
XLNetFeedForward
(
config
)
self
.
dropout
=
nn
.
Dropout
(
config
.
dropout
)
self
.
dropout
=
nn
.
Dropout
(
config
.
dropout
)
...
@@ -631,7 +631,7 @@ class XLNetModel(XLNetPreTrainedModel):
...
@@ -631,7 +631,7 @@ class XLNetModel(XLNetPreTrainedModel):
"""
"""
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
XLNetModel
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
output_attentions
=
config
.
output_attentions
self
.
output_attentions
=
config
.
output_attentions
self
.
output_hidden_states
=
config
.
output_hidden_states
self
.
output_hidden_states
=
config
.
output_hidden_states
self
.
output_past
=
config
.
output_past
self
.
output_past
=
config
.
output_past
...
@@ -996,7 +996,7 @@ class XLNetLMHeadModel(XLNetPreTrainedModel):
...
@@ -996,7 +996,7 @@ class XLNetLMHeadModel(XLNetPreTrainedModel):
"""
"""
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
XLNetLMHeadModel
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
attn_type
=
config
.
attn_type
self
.
attn_type
=
config
.
attn_type
self
.
same_length
=
config
.
same_length
self
.
same_length
=
config
.
same_length
...
@@ -1119,7 +1119,7 @@ class XLNetForSequenceClassification(XLNetPreTrainedModel):
...
@@ -1119,7 +1119,7 @@ class XLNetForSequenceClassification(XLNetPreTrainedModel):
"""
"""
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
XLNetForSequenceClassification
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
num_labels
=
config
.
num_labels
self
.
num_labels
=
config
.
num_labels
self
.
transformer
=
XLNetModel
(
config
)
self
.
transformer
=
XLNetModel
(
config
)
...
@@ -1234,7 +1234,7 @@ class XLNetForTokenClassification(XLNetPreTrainedModel):
...
@@ -1234,7 +1234,7 @@ class XLNetForTokenClassification(XLNetPreTrainedModel):
"""
"""
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
XLNetForTokenClassification
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
num_labels
=
config
.
num_labels
self
.
num_labels
=
config
.
num_labels
self
.
transformer
=
XLNetModel
(
config
)
self
.
transformer
=
XLNetModel
(
config
)
...
@@ -1355,7 +1355,7 @@ class XLNetForMultipleChoice(XLNetPreTrainedModel):
...
@@ -1355,7 +1355,7 @@ class XLNetForMultipleChoice(XLNetPreTrainedModel):
"""
"""
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
XLNetForMultipleChoice
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
transformer
=
XLNetModel
(
config
)
self
.
transformer
=
XLNetModel
(
config
)
self
.
sequence_summary
=
SequenceSummary
(
config
)
self
.
sequence_summary
=
SequenceSummary
(
config
)
...
@@ -1463,7 +1463,7 @@ class XLNetForQuestionAnsweringSimple(XLNetPreTrainedModel):
...
@@ -1463,7 +1463,7 @@ class XLNetForQuestionAnsweringSimple(XLNetPreTrainedModel):
"""
"""
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
XLNetForQuestionAnsweringSimple
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
num_labels
=
config
.
num_labels
self
.
num_labels
=
config
.
num_labels
self
.
transformer
=
XLNetModel
(
config
)
self
.
transformer
=
XLNetModel
(
config
)
...
@@ -1595,7 +1595,7 @@ class XLNetForQuestionAnswering(XLNetPreTrainedModel):
...
@@ -1595,7 +1595,7 @@ class XLNetForQuestionAnswering(XLNetPreTrainedModel):
"""
"""
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
XLNetForQuestionAnswering
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
start_n_top
=
config
.
start_n_top
self
.
start_n_top
=
config
.
start_n_top
self
.
end_n_top
=
config
.
end_n_top
self
.
end_n_top
=
config
.
end_n_top
...
...
src/transformers/optimization.py
View file @
dc17f2a1
...
@@ -114,7 +114,7 @@ class AdamW(Optimizer):
...
@@ -114,7 +114,7 @@ class AdamW(Optimizer):
if
not
0.0
<=
eps
:
if
not
0.0
<=
eps
:
raise
ValueError
(
"Invalid epsilon value: {} - should be >= 0.0"
.
format
(
eps
))
raise
ValueError
(
"Invalid epsilon value: {} - should be >= 0.0"
.
format
(
eps
))
defaults
=
dict
(
lr
=
lr
,
betas
=
betas
,
eps
=
eps
,
weight_decay
=
weight_decay
,
correct_bias
=
correct_bias
)
defaults
=
dict
(
lr
=
lr
,
betas
=
betas
,
eps
=
eps
,
weight_decay
=
weight_decay
,
correct_bias
=
correct_bias
)
super
(
AdamW
,
self
).
__init__
(
params
,
defaults
)
super
().
__init__
(
params
,
defaults
)
def
step
(
self
,
closure
=
None
):
def
step
(
self
,
closure
=
None
):
"""Performs a single optimization step.
"""Performs a single optimization step.
...
...
src/transformers/optimization_tf.py
View file @
dc17f2a1
...
@@ -24,7 +24,7 @@ class WarmUp(tf.keras.optimizers.schedules.LearningRateSchedule):
...
@@ -24,7 +24,7 @@ class WarmUp(tf.keras.optimizers.schedules.LearningRateSchedule):
"""Applys a warmup schedule on a given learning rate decay schedule."""
"""Applys a warmup schedule on a given learning rate decay schedule."""
def
__init__
(
self
,
initial_learning_rate
,
decay_schedule_fn
,
warmup_steps
,
power
=
1.0
,
name
=
None
):
def
__init__
(
self
,
initial_learning_rate
,
decay_schedule_fn
,
warmup_steps
,
power
=
1.0
,
name
=
None
):
super
(
WarmUp
,
self
).
__init__
()
super
().
__init__
()
self
.
initial_learning_rate
=
initial_learning_rate
self
.
initial_learning_rate
=
initial_learning_rate
self
.
warmup_steps
=
warmup_steps
self
.
warmup_steps
=
warmup_steps
self
.
power
=
power
self
.
power
=
power
...
@@ -102,7 +102,7 @@ class AdamWeightDecay(tf.keras.optimizers.Adam):
...
@@ -102,7 +102,7 @@ class AdamWeightDecay(tf.keras.optimizers.Adam):
name
=
"AdamWeightDecay"
,
name
=
"AdamWeightDecay"
,
**
kwargs
**
kwargs
):
):
super
(
AdamWeightDecay
,
self
).
__init__
(
learning_rate
,
beta_1
,
beta_2
,
epsilon
,
amsgrad
,
name
,
**
kwargs
)
super
().
__init__
(
learning_rate
,
beta_1
,
beta_2
,
epsilon
,
amsgrad
,
name
,
**
kwargs
)
self
.
weight_decay_rate
=
weight_decay_rate
self
.
weight_decay_rate
=
weight_decay_rate
self
.
_include_in_weight_decay
=
include_in_weight_decay
self
.
_include_in_weight_decay
=
include_in_weight_decay
self
.
_exclude_from_weight_decay
=
exclude_from_weight_decay
self
.
_exclude_from_weight_decay
=
exclude_from_weight_decay
...
@@ -111,10 +111,10 @@ class AdamWeightDecay(tf.keras.optimizers.Adam):
...
@@ -111,10 +111,10 @@ class AdamWeightDecay(tf.keras.optimizers.Adam):
def
from_config
(
cls
,
config
):
def
from_config
(
cls
,
config
):
"""Creates an optimizer from its config with WarmUp custom object."""
"""Creates an optimizer from its config with WarmUp custom object."""
custom_objects
=
{
"WarmUp"
:
WarmUp
}
custom_objects
=
{
"WarmUp"
:
WarmUp
}
return
super
(
AdamWeightDecay
,
cls
).
from_config
(
config
,
custom_objects
=
custom_objects
)
return
super
().
from_config
(
config
,
custom_objects
=
custom_objects
)
def
_prepare_local
(
self
,
var_device
,
var_dtype
,
apply_state
):
def
_prepare_local
(
self
,
var_device
,
var_dtype
,
apply_state
):
super
(
AdamWeightDecay
,
self
).
_prepare_local
(
var_device
,
var_dtype
,
apply_state
)
super
().
_prepare_local
(
var_device
,
var_dtype
,
apply_state
)
apply_state
[
"weight_decay_rate"
]
=
tf
.
constant
(
self
.
weight_decay_rate
,
name
=
"adam_weight_decay_rate"
)
apply_state
[
"weight_decay_rate"
]
=
tf
.
constant
(
self
.
weight_decay_rate
,
name
=
"adam_weight_decay_rate"
)
def
_decay_weights_op
(
self
,
var
,
learning_rate
,
apply_state
):
def
_decay_weights_op
(
self
,
var
,
learning_rate
,
apply_state
):
...
@@ -128,7 +128,7 @@ class AdamWeightDecay(tf.keras.optimizers.Adam):
...
@@ -128,7 +128,7 @@ class AdamWeightDecay(tf.keras.optimizers.Adam):
def
apply_gradients
(
self
,
grads_and_vars
,
clip_norm
,
name
=
None
):
def
apply_gradients
(
self
,
grads_and_vars
,
clip_norm
,
name
=
None
):
grads
,
tvars
=
list
(
zip
(
*
grads_and_vars
))
grads
,
tvars
=
list
(
zip
(
*
grads_and_vars
))
(
grads
,
_
)
=
tf
.
clip_by_global_norm
(
grads
,
clip_norm
=
clip_norm
)
(
grads
,
_
)
=
tf
.
clip_by_global_norm
(
grads
,
clip_norm
=
clip_norm
)
return
super
(
AdamWeightDecay
,
self
).
apply_gradients
(
zip
(
grads
,
tvars
))
return
super
().
apply_gradients
(
zip
(
grads
,
tvars
))
def
_get_lr
(
self
,
var_device
,
var_dtype
,
apply_state
):
def
_get_lr
(
self
,
var_device
,
var_dtype
,
apply_state
):
"""Retrieves the learning rate with the given state."""
"""Retrieves the learning rate with the given state."""
...
@@ -147,16 +147,16 @@ class AdamWeightDecay(tf.keras.optimizers.Adam):
...
@@ -147,16 +147,16 @@ class AdamWeightDecay(tf.keras.optimizers.Adam):
lr_t
,
kwargs
=
self
.
_get_lr
(
var
.
device
,
var
.
dtype
.
base_dtype
,
apply_state
)
lr_t
,
kwargs
=
self
.
_get_lr
(
var
.
device
,
var
.
dtype
.
base_dtype
,
apply_state
)
decay
=
self
.
_decay_weights_op
(
var
,
lr_t
,
apply_state
)
decay
=
self
.
_decay_weights_op
(
var
,
lr_t
,
apply_state
)
with
tf
.
control_dependencies
([
decay
]):
with
tf
.
control_dependencies
([
decay
]):
return
super
(
AdamWeightDecay
,
self
).
_resource_apply_dense
(
grad
,
var
,
**
kwargs
)
return
super
().
_resource_apply_dense
(
grad
,
var
,
**
kwargs
)
def
_resource_apply_sparse
(
self
,
grad
,
var
,
indices
,
apply_state
=
None
):
def
_resource_apply_sparse
(
self
,
grad
,
var
,
indices
,
apply_state
=
None
):
lr_t
,
kwargs
=
self
.
_get_lr
(
var
.
device
,
var
.
dtype
.
base_dtype
,
apply_state
)
lr_t
,
kwargs
=
self
.
_get_lr
(
var
.
device
,
var
.
dtype
.
base_dtype
,
apply_state
)
decay
=
self
.
_decay_weights_op
(
var
,
lr_t
,
apply_state
)
decay
=
self
.
_decay_weights_op
(
var
,
lr_t
,
apply_state
)
with
tf
.
control_dependencies
([
decay
]):
with
tf
.
control_dependencies
([
decay
]):
return
super
(
AdamWeightDecay
,
self
).
_resource_apply_sparse
(
grad
,
var
,
indices
,
**
kwargs
)
return
super
().
_resource_apply_sparse
(
grad
,
var
,
indices
,
**
kwargs
)
def
get_config
(
self
):
def
get_config
(
self
):
config
=
super
(
AdamWeightDecay
,
self
).
get_config
()
config
=
super
().
get_config
()
config
.
update
({
"weight_decay_rate"
:
self
.
weight_decay_rate
})
config
.
update
({
"weight_decay_rate"
:
self
.
weight_decay_rate
})
return
config
return
config
...
...
src/transformers/tokenization_albert.py
View file @
dc17f2a1
...
@@ -79,7 +79,7 @@ class AlbertTokenizer(PreTrainedTokenizer):
...
@@ -79,7 +79,7 @@ class AlbertTokenizer(PreTrainedTokenizer):
mask_token
=
"[MASK]"
,
mask_token
=
"[MASK]"
,
**
kwargs
**
kwargs
):
):
super
(
AlbertTokenizer
,
self
).
__init__
(
super
().
__init__
(
bos_token
=
bos_token
,
bos_token
=
bos_token
,
eos_token
=
eos_token
,
eos_token
=
eos_token
,
unk_token
=
unk_token
,
unk_token
=
unk_token
,
...
...
src/transformers/tokenization_bert.py
View file @
dc17f2a1
...
@@ -163,7 +163,7 @@ class BertTokenizer(PreTrainedTokenizer):
...
@@ -163,7 +163,7 @@ class BertTokenizer(PreTrainedTokenizer):
This should likely be deactivated for Japanese:
This should likely be deactivated for Japanese:
see: https://github.com/huggingface/pytorch-pretrained-BERT/issues/328
see: https://github.com/huggingface/pytorch-pretrained-BERT/issues/328
"""
"""
super
(
BertTokenizer
,
self
).
__init__
(
super
().
__init__
(
unk_token
=
unk_token
,
unk_token
=
unk_token
,
sep_token
=
sep_token
,
sep_token
=
sep_token
,
pad_token
=
pad_token
,
pad_token
=
pad_token
,
...
@@ -554,7 +554,7 @@ class BertTokenizerFast(PreTrainedTokenizerFast):
...
@@ -554,7 +554,7 @@ class BertTokenizerFast(PreTrainedTokenizerFast):
add_special_tokens
=
True
,
add_special_tokens
=
True
,
**
kwargs
**
kwargs
):
):
super
(
BertTokenizerFast
,
self
).
__init__
(
super
().
__init__
(
unk_token
=
unk_token
,
unk_token
=
unk_token
,
sep_token
=
sep_token
,
sep_token
=
sep_token
,
pad_token
=
pad_token
,
pad_token
=
pad_token
,
...
...
src/transformers/tokenization_bert_japanese.py
View file @
dc17f2a1
...
@@ -115,6 +115,7 @@ class BertJapaneseTokenizer(BertTokenizer):
...
@@ -115,6 +115,7 @@ class BertJapaneseTokenizer(BertTokenizer):
mask_token
=
mask_token
,
mask_token
=
mask_token
,
**
kwargs
,
**
kwargs
,
)
)
# ^^ We call the grandparent's init, not the parent's.
self
.
max_len_single_sentence
=
self
.
max_len
-
2
# take into account special tokens
self
.
max_len_single_sentence
=
self
.
max_len
-
2
# take into account special tokens
self
.
max_len_sentences_pair
=
self
.
max_len
-
3
# take into account special tokens
self
.
max_len_sentences_pair
=
self
.
max_len
-
3
# take into account special tokens
...
...
src/transformers/tokenization_camembert.py
View file @
dc17f2a1
...
@@ -66,7 +66,7 @@ class CamembertTokenizer(PreTrainedTokenizer):
...
@@ -66,7 +66,7 @@ class CamembertTokenizer(PreTrainedTokenizer):
additional_special_tokens
=
[
"<s>NOTUSED"
,
"</s>NOTUSED"
],
additional_special_tokens
=
[
"<s>NOTUSED"
,
"</s>NOTUSED"
],
**
kwargs
**
kwargs
):
):
super
(
CamembertTokenizer
,
self
).
__init__
(
super
().
__init__
(
max_len
=
512
,
max_len
=
512
,
bos_token
=
bos_token
,
bos_token
=
bos_token
,
eos_token
=
eos_token
,
eos_token
=
eos_token
,
...
...
src/transformers/tokenization_ctrl.py
View file @
dc17f2a1
...
@@ -126,7 +126,7 @@ class CTRLTokenizer(PreTrainedTokenizer):
...
@@ -126,7 +126,7 @@ class CTRLTokenizer(PreTrainedTokenizer):
control_codes
=
CONTROL_CODES
control_codes
=
CONTROL_CODES
def
__init__
(
self
,
vocab_file
,
merges_file
,
unk_token
=
"<unk>"
,
**
kwargs
):
def
__init__
(
self
,
vocab_file
,
merges_file
,
unk_token
=
"<unk>"
,
**
kwargs
):
super
(
CTRLTokenizer
,
self
).
__init__
(
unk_token
=
unk_token
,
**
kwargs
)
super
().
__init__
(
unk_token
=
unk_token
,
**
kwargs
)
self
.
max_len_single_sentence
=
(
self
.
max_len_single_sentence
=
(
self
.
max_len
self
.
max_len
)
# no default special tokens - you can update this value if you add special tokens
)
# no default special tokens - you can update this value if you add special tokens
...
...
src/transformers/tokenization_gpt2.py
View file @
dc17f2a1
...
@@ -122,7 +122,7 @@ class GPT2Tokenizer(PreTrainedTokenizer):
...
@@ -122,7 +122,7 @@ class GPT2Tokenizer(PreTrainedTokenizer):
eos_token
=
"<|endoftext|>"
,
eos_token
=
"<|endoftext|>"
,
**
kwargs
**
kwargs
):
):
super
(
GPT2Tokenizer
,
self
).
__init__
(
bos_token
=
bos_token
,
eos_token
=
eos_token
,
unk_token
=
unk_token
,
**
kwargs
)
super
().
__init__
(
bos_token
=
bos_token
,
eos_token
=
eos_token
,
unk_token
=
unk_token
,
**
kwargs
)
self
.
max_len_single_sentence
=
(
self
.
max_len_single_sentence
=
(
self
.
max_len
self
.
max_len
)
# no default special tokens - you can update this value if you add special tokens
)
# no default special tokens - you can update this value if you add special tokens
...
@@ -268,9 +268,7 @@ class GPT2TokenizerFast(PreTrainedTokenizerFast):
...
@@ -268,9 +268,7 @@ class GPT2TokenizerFast(PreTrainedTokenizerFast):
truncation_strategy
=
"longest_first"
,
truncation_strategy
=
"longest_first"
,
**
kwargs
**
kwargs
):
):
super
(
GPT2TokenizerFast
,
self
).
__init__
(
super
().
__init__
(
bos_token
=
bos_token
,
eos_token
=
eos_token
,
unk_token
=
unk_token
,
**
kwargs
)
bos_token
=
bos_token
,
eos_token
=
eos_token
,
unk_token
=
unk_token
,
**
kwargs
)
self
.
_tokenizer
=
tk
.
Tokenizer
(
tk
.
models
.
BPE
.
from_files
(
vocab_file
,
merges_file
))
self
.
_tokenizer
=
tk
.
Tokenizer
(
tk
.
models
.
BPE
.
from_files
(
vocab_file
,
merges_file
))
self
.
_update_special_tokens
()
self
.
_update_special_tokens
()
...
...
src/transformers/tokenization_openai.py
View file @
dc17f2a1
...
@@ -82,7 +82,7 @@ class OpenAIGPTTokenizer(PreTrainedTokenizer):
...
@@ -82,7 +82,7 @@ class OpenAIGPTTokenizer(PreTrainedTokenizer):
max_model_input_sizes
=
PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
max_model_input_sizes
=
PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
def
__init__
(
self
,
vocab_file
,
merges_file
,
unk_token
=
"<unk>"
,
**
kwargs
):
def
__init__
(
self
,
vocab_file
,
merges_file
,
unk_token
=
"<unk>"
,
**
kwargs
):
super
(
OpenAIGPTTokenizer
,
self
).
__init__
(
unk_token
=
unk_token
,
**
kwargs
)
super
().
__init__
(
unk_token
=
unk_token
,
**
kwargs
)
self
.
max_len_single_sentence
=
(
self
.
max_len_single_sentence
=
(
self
.
max_len
self
.
max_len
...
...
src/transformers/tokenization_roberta.py
View file @
dc17f2a1
...
@@ -84,7 +84,7 @@ class RobertaTokenizer(GPT2Tokenizer):
...
@@ -84,7 +84,7 @@ class RobertaTokenizer(GPT2Tokenizer):
mask_token
=
"<mask>"
,
mask_token
=
"<mask>"
,
**
kwargs
**
kwargs
):
):
super
(
RobertaTokenizer
,
self
).
__init__
(
super
().
__init__
(
vocab_file
=
vocab_file
,
vocab_file
=
vocab_file
,
merges_file
=
merges_file
,
merges_file
=
merges_file
,
errors
=
errors
,
errors
=
errors
,
...
...
src/transformers/tokenization_t5.py
View file @
dc17f2a1
...
@@ -91,7 +91,7 @@ class T5Tokenizer(PreTrainedTokenizer):
...
@@ -91,7 +91,7 @@ class T5Tokenizer(PreTrainedTokenizer):
additional_special_tokens
=
[]
additional_special_tokens
=
[]
additional_special_tokens
.
extend
([
"<extra_id_{}>"
.
format
(
i
)
for
i
in
range
(
extra_ids
)])
additional_special_tokens
.
extend
([
"<extra_id_{}>"
.
format
(
i
)
for
i
in
range
(
extra_ids
)])
super
(
T5Tokenizer
,
self
).
__init__
(
super
().
__init__
(
eos_token
=
eos_token
,
eos_token
=
eos_token
,
unk_token
=
unk_token
,
unk_token
=
unk_token
,
pad_token
=
pad_token
,
pad_token
=
pad_token
,
...
...
src/transformers/tokenization_transfo_xl.py
View file @
dc17f2a1
...
@@ -78,7 +78,7 @@ class TransfoXLTokenizer(PreTrainedTokenizer):
...
@@ -78,7 +78,7 @@ class TransfoXLTokenizer(PreTrainedTokenizer):
additional_special_tokens
=
[
"<formula>"
],
additional_special_tokens
=
[
"<formula>"
],
**
kwargs
**
kwargs
):
):
super
(
TransfoXLTokenizer
,
self
).
__init__
(
super
().
__init__
(
unk_token
=
unk_token
,
eos_token
=
eos_token
,
additional_special_tokens
=
additional_special_tokens
,
**
kwargs
unk_token
=
unk_token
,
eos_token
=
eos_token
,
additional_special_tokens
=
additional_special_tokens
,
**
kwargs
)
)
...
...
src/transformers/tokenization_utils.py
View file @
dc17f2a1
...
@@ -1425,7 +1425,7 @@ class PreTrainedTokenizerFast(PreTrainedTokenizer):
...
@@ -1425,7 +1425,7 @@ class PreTrainedTokenizerFast(PreTrainedTokenizer):
_decoder
=
None
_decoder
=
None
def
__init__
(
self
,
**
kwargs
):
def
__init__
(
self
,
**
kwargs
):
super
(
PreTrainedTokenizerFast
,
self
).
__init__
(
**
kwargs
)
super
().
__init__
(
**
kwargs
)
@
property
@
property
def
tokenizer
(
self
):
def
tokenizer
(
self
):
...
...
src/transformers/tokenization_xlm.py
View file @
dc17f2a1
...
@@ -578,7 +578,7 @@ class XLMTokenizer(PreTrainedTokenizer):
...
@@ -578,7 +578,7 @@ class XLMTokenizer(PreTrainedTokenizer):
do_lowercase_and_remove_accent
=
True
,
do_lowercase_and_remove_accent
=
True
,
**
kwargs
**
kwargs
):
):
super
(
XLMTokenizer
,
self
).
__init__
(
super
().
__init__
(
unk_token
=
unk_token
,
unk_token
=
unk_token
,
bos_token
=
bos_token
,
bos_token
=
bos_token
,
sep_token
=
sep_token
,
sep_token
=
sep_token
,
...
...
src/transformers/tokenization_xlm_roberta.py
View file @
dc17f2a1
...
@@ -75,7 +75,7 @@ class XLMRobertaTokenizer(PreTrainedTokenizer):
...
@@ -75,7 +75,7 @@ class XLMRobertaTokenizer(PreTrainedTokenizer):
mask_token
=
"<mask>"
,
mask_token
=
"<mask>"
,
**
kwargs
**
kwargs
):
):
super
(
XLMRobertaTokenizer
,
self
).
__init__
(
super
().
__init__
(
bos_token
=
bos_token
,
bos_token
=
bos_token
,
eos_token
=
eos_token
,
eos_token
=
eos_token
,
unk_token
=
unk_token
,
unk_token
=
unk_token
,
...
...
src/transformers/tokenization_xlnet.py
View file @
dc17f2a1
...
@@ -77,7 +77,7 @@ class XLNetTokenizer(PreTrainedTokenizer):
...
@@ -77,7 +77,7 @@ class XLNetTokenizer(PreTrainedTokenizer):
additional_special_tokens
=
[
"<eop>"
,
"<eod>"
],
additional_special_tokens
=
[
"<eop>"
,
"<eod>"
],
**
kwargs
**
kwargs
):
):
super
(
XLNetTokenizer
,
self
).
__init__
(
super
().
__init__
(
bos_token
=
bos_token
,
bos_token
=
bos_token
,
eos_token
=
eos_token
,
eos_token
=
eos_token
,
unk_token
=
unk_token
,
unk_token
=
unk_token
,
...
...
templates/adding_a_new_model/configuration_xxx.py
View file @
dc17f2a1
...
@@ -80,7 +80,7 @@ class XxxConfig(PretrainedConfig):
...
@@ -80,7 +80,7 @@ class XxxConfig(PretrainedConfig):
summary_first_dropout
=
0.1
,
summary_first_dropout
=
0.1
,
**
kwargs
**
kwargs
):
):
super
(
XxxConfig
,
self
).
__init__
(
**
kwargs
)
super
().
__init__
(
**
kwargs
)
self
.
vocab_size
=
vocab_size
self
.
vocab_size
=
vocab_size
self
.
n_ctx
=
n_ctx
self
.
n_ctx
=
n_ctx
self
.
n_positions
=
n_positions
self
.
n_positions
=
n_positions
...
...
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment