Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
dc17f2a1
Unverified
Commit
dc17f2a1
authored
Jan 16, 2020
by
Thomas Wolf
Committed by
GitHub
Jan 16, 2020
Browse files
Merge pull request #2538 from huggingface/py3_super
💄
super
parents
88085484
a98b2ca8
Changes
75
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
53 additions
and
54 deletions
+53
-54
src/transformers/modeling_utils.py
src/transformers/modeling_utils.py
+8
-8
src/transformers/modeling_xlm.py
src/transformers/modeling_xlm.py
+9
-9
src/transformers/modeling_xlnet.py
src/transformers/modeling_xlnet.py
+10
-10
src/transformers/optimization.py
src/transformers/optimization.py
+1
-1
src/transformers/optimization_tf.py
src/transformers/optimization_tf.py
+8
-8
src/transformers/tokenization_albert.py
src/transformers/tokenization_albert.py
+1
-1
src/transformers/tokenization_bert.py
src/transformers/tokenization_bert.py
+2
-2
src/transformers/tokenization_bert_japanese.py
src/transformers/tokenization_bert_japanese.py
+1
-0
src/transformers/tokenization_camembert.py
src/transformers/tokenization_camembert.py
+1
-1
src/transformers/tokenization_ctrl.py
src/transformers/tokenization_ctrl.py
+1
-1
src/transformers/tokenization_gpt2.py
src/transformers/tokenization_gpt2.py
+2
-4
src/transformers/tokenization_openai.py
src/transformers/tokenization_openai.py
+1
-1
src/transformers/tokenization_roberta.py
src/transformers/tokenization_roberta.py
+1
-1
src/transformers/tokenization_t5.py
src/transformers/tokenization_t5.py
+1
-1
src/transformers/tokenization_transfo_xl.py
src/transformers/tokenization_transfo_xl.py
+1
-1
src/transformers/tokenization_utils.py
src/transformers/tokenization_utils.py
+1
-1
src/transformers/tokenization_xlm.py
src/transformers/tokenization_xlm.py
+1
-1
src/transformers/tokenization_xlm_roberta.py
src/transformers/tokenization_xlm_roberta.py
+1
-1
src/transformers/tokenization_xlnet.py
src/transformers/tokenization_xlnet.py
+1
-1
templates/adding_a_new_model/configuration_xxx.py
templates/adding_a_new_model/configuration_xxx.py
+1
-1
No files found.
src/transformers/modeling_utils.py
View file @
dc17f2a1
...
...
@@ -47,7 +47,7 @@ except ImportError:
"""
def
__init__
(
self
,
*
args
,
**
kwargs
):
super
(
Identity
,
self
).
__init__
()
super
().
__init__
()
def
forward
(
self
,
input
):
return
input
...
...
@@ -97,7 +97,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin):
return
{
"input_ids"
:
torch
.
tensor
(
DUMMY_INPUTS
)}
def
__init__
(
self
,
config
,
*
inputs
,
**
kwargs
):
super
(
PreTrainedModel
,
self
).
__init__
()
super
().
__init__
()
if
not
isinstance
(
config
,
PretrainedConfig
):
raise
ValueError
(
"Parameter config in `{}(config)` should be an instance of class `PretrainedConfig`. "
...
...
@@ -1102,7 +1102,7 @@ class Conv1D(nn.Module):
""" Conv1D layer as defined by Radford et al. for OpenAI GPT (and also used in GPT-2)
Basically works like a Linear layer but the weights are transposed
"""
super
(
Conv1D
,
self
).
__init__
()
super
().
__init__
()
self
.
nf
=
nf
w
=
torch
.
empty
(
nx
,
nf
)
nn
.
init
.
normal_
(
w
,
std
=
0.02
)
...
...
@@ -1120,7 +1120,7 @@ class PoolerStartLogits(nn.Module):
""" Compute SQuAD start_logits from sequence hidden states. """
def
__init__
(
self
,
config
):
super
(
PoolerStartLogits
,
self
).
__init__
()
super
().
__init__
()
self
.
dense
=
nn
.
Linear
(
config
.
hidden_size
,
1
)
def
forward
(
self
,
hidden_states
,
p_mask
=
None
):
...
...
@@ -1145,7 +1145,7 @@ class PoolerEndLogits(nn.Module):
"""
def
__init__
(
self
,
config
):
super
(
PoolerEndLogits
,
self
).
__init__
()
super
().
__init__
()
self
.
dense_0
=
nn
.
Linear
(
config
.
hidden_size
*
2
,
config
.
hidden_size
)
self
.
activation
=
nn
.
Tanh
()
self
.
LayerNorm
=
nn
.
LayerNorm
(
config
.
hidden_size
,
eps
=
config
.
layer_norm_eps
)
...
...
@@ -1191,7 +1191,7 @@ class PoolerAnswerClass(nn.Module):
""" Compute SQuAD 2.0 answer class from classification and start tokens hidden states. """
def
__init__
(
self
,
config
):
super
(
PoolerAnswerClass
,
self
).
__init__
()
super
().
__init__
()
self
.
dense_0
=
nn
.
Linear
(
config
.
hidden_size
*
2
,
config
.
hidden_size
)
self
.
activation
=
nn
.
Tanh
()
self
.
dense_1
=
nn
.
Linear
(
config
.
hidden_size
,
1
,
bias
=
False
)
...
...
@@ -1276,7 +1276,7 @@ class SQuADHead(nn.Module):
"""
def
__init__
(
self
,
config
):
super
(
SQuADHead
,
self
).
__init__
()
super
().
__init__
()
self
.
start_n_top
=
config
.
start_n_top
self
.
end_n_top
=
config
.
end_n_top
...
...
@@ -1368,7 +1368,7 @@ class SequenceSummary(nn.Module):
"""
def
__init__
(
self
,
config
):
super
(
SequenceSummary
,
self
).
__init__
()
super
().
__init__
()
self
.
summary_type
=
config
.
summary_type
if
hasattr
(
config
,
"summary_type"
)
else
"last"
if
self
.
summary_type
==
"attn"
:
...
...
src/transformers/modeling_xlm.py
View file @
dc17f2a1
...
...
@@ -96,7 +96,7 @@ class MultiHeadAttention(nn.Module):
NEW_ID
=
itertools
.
count
()
def
__init__
(
self
,
n_heads
,
dim
,
config
):
super
(
MultiHeadAttention
,
self
).
__init__
()
super
().
__init__
()
self
.
layer_id
=
next
(
MultiHeadAttention
.
NEW_ID
)
self
.
output_attentions
=
config
.
output_attentions
self
.
dim
=
dim
...
...
@@ -197,7 +197,7 @@ class MultiHeadAttention(nn.Module):
class
TransformerFFN
(
nn
.
Module
):
def
__init__
(
self
,
in_dim
,
dim_hidden
,
out_dim
,
config
):
super
(
TransformerFFN
,
self
).
__init__
()
super
().
__init__
()
self
.
dropout
=
config
.
dropout
self
.
lin1
=
nn
.
Linear
(
in_dim
,
dim_hidden
)
self
.
lin2
=
nn
.
Linear
(
dim_hidden
,
out_dim
)
...
...
@@ -222,7 +222,7 @@ class XLMPreTrainedModel(PreTrainedModel):
base_model_prefix
=
"transformer"
def
__init__
(
self
,
*
inputs
,
**
kwargs
):
super
(
XLMPreTrainedModel
,
self
).
__init__
(
*
inputs
,
**
kwargs
)
super
().
__init__
(
*
inputs
,
**
kwargs
)
@
property
def
dummy_inputs
(
self
):
...
...
@@ -354,7 +354,7 @@ class XLMModel(XLMPreTrainedModel):
"""
def
__init__
(
self
,
config
):
# , dico, is_encoder, with_output):
super
(
XLMModel
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
output_attentions
=
config
.
output_attentions
self
.
output_hidden_states
=
config
.
output_hidden_states
...
...
@@ -585,7 +585,7 @@ class XLMPredLayer(nn.Module):
"""
def
__init__
(
self
,
config
):
super
(
XLMPredLayer
,
self
).
__init__
()
super
().
__init__
()
self
.
asm
=
config
.
asm
self
.
n_words
=
config
.
n_words
self
.
pad_index
=
config
.
pad_index
...
...
@@ -661,7 +661,7 @@ class XLMWithLMHeadModel(XLMPreTrainedModel):
"""
def
__init__
(
self
,
config
):
super
(
XLMWithLMHeadModel
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
transformer
=
XLMModel
(
config
)
self
.
pred_layer
=
XLMPredLayer
(
config
)
...
...
@@ -754,7 +754,7 @@ class XLMForSequenceClassification(XLMPreTrainedModel):
"""
def
__init__
(
self
,
config
):
super
(
XLMForSequenceClassification
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
num_labels
=
config
.
num_labels
self
.
transformer
=
XLMModel
(
config
)
...
...
@@ -856,7 +856,7 @@ class XLMForQuestionAnsweringSimple(XLMPreTrainedModel):
"""
def
__init__
(
self
,
config
):
super
(
XLMForQuestionAnsweringSimple
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
transformer
=
XLMModel
(
config
)
self
.
qa_outputs
=
nn
.
Linear
(
config
.
hidden_size
,
config
.
num_labels
)
...
...
@@ -973,7 +973,7 @@ class XLMForQuestionAnswering(XLMPreTrainedModel):
"""
def
__init__
(
self
,
config
):
super
(
XLMForQuestionAnswering
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
transformer
=
XLMModel
(
config
)
self
.
qa_outputs
=
SQuADHead
(
config
)
...
...
src/transformers/modeling_xlnet.py
View file @
dc17f2a1
...
...
@@ -204,7 +204,7 @@ XLNetLayerNorm = nn.LayerNorm
class
XLNetRelativeAttention
(
nn
.
Module
):
def
__init__
(
self
,
config
):
super
(
XLNetRelativeAttention
,
self
).
__init__
()
super
().
__init__
()
self
.
output_attentions
=
config
.
output_attentions
if
config
.
d_model
%
config
.
n_head
!=
0
:
...
...
@@ -414,7 +414,7 @@ class XLNetRelativeAttention(nn.Module):
class
XLNetFeedForward
(
nn
.
Module
):
def
__init__
(
self
,
config
):
super
(
XLNetFeedForward
,
self
).
__init__
()
super
().
__init__
()
self
.
layer_norm
=
XLNetLayerNorm
(
config
.
d_model
,
eps
=
config
.
layer_norm_eps
)
self
.
layer_1
=
nn
.
Linear
(
config
.
d_model
,
config
.
d_inner
)
self
.
layer_2
=
nn
.
Linear
(
config
.
d_inner
,
config
.
d_model
)
...
...
@@ -437,7 +437,7 @@ class XLNetFeedForward(nn.Module):
class
XLNetLayer
(
nn
.
Module
):
def
__init__
(
self
,
config
):
super
(
XLNetLayer
,
self
).
__init__
()
super
().
__init__
()
self
.
rel_attn
=
XLNetRelativeAttention
(
config
)
self
.
ff
=
XLNetFeedForward
(
config
)
self
.
dropout
=
nn
.
Dropout
(
config
.
dropout
)
...
...
@@ -631,7 +631,7 @@ class XLNetModel(XLNetPreTrainedModel):
"""
def
__init__
(
self
,
config
):
super
(
XLNetModel
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
output_attentions
=
config
.
output_attentions
self
.
output_hidden_states
=
config
.
output_hidden_states
self
.
output_past
=
config
.
output_past
...
...
@@ -996,7 +996,7 @@ class XLNetLMHeadModel(XLNetPreTrainedModel):
"""
def
__init__
(
self
,
config
):
super
(
XLNetLMHeadModel
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
attn_type
=
config
.
attn_type
self
.
same_length
=
config
.
same_length
...
...
@@ -1119,7 +1119,7 @@ class XLNetForSequenceClassification(XLNetPreTrainedModel):
"""
def
__init__
(
self
,
config
):
super
(
XLNetForSequenceClassification
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
num_labels
=
config
.
num_labels
self
.
transformer
=
XLNetModel
(
config
)
...
...
@@ -1234,7 +1234,7 @@ class XLNetForTokenClassification(XLNetPreTrainedModel):
"""
def
__init__
(
self
,
config
):
super
(
XLNetForTokenClassification
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
num_labels
=
config
.
num_labels
self
.
transformer
=
XLNetModel
(
config
)
...
...
@@ -1355,7 +1355,7 @@ class XLNetForMultipleChoice(XLNetPreTrainedModel):
"""
def
__init__
(
self
,
config
):
super
(
XLNetForMultipleChoice
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
transformer
=
XLNetModel
(
config
)
self
.
sequence_summary
=
SequenceSummary
(
config
)
...
...
@@ -1463,7 +1463,7 @@ class XLNetForQuestionAnsweringSimple(XLNetPreTrainedModel):
"""
def
__init__
(
self
,
config
):
super
(
XLNetForQuestionAnsweringSimple
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
num_labels
=
config
.
num_labels
self
.
transformer
=
XLNetModel
(
config
)
...
...
@@ -1595,7 +1595,7 @@ class XLNetForQuestionAnswering(XLNetPreTrainedModel):
"""
def
__init__
(
self
,
config
):
super
(
XLNetForQuestionAnswering
,
self
).
__init__
(
config
)
super
().
__init__
(
config
)
self
.
start_n_top
=
config
.
start_n_top
self
.
end_n_top
=
config
.
end_n_top
...
...
src/transformers/optimization.py
View file @
dc17f2a1
...
...
@@ -114,7 +114,7 @@ class AdamW(Optimizer):
if
not
0.0
<=
eps
:
raise
ValueError
(
"Invalid epsilon value: {} - should be >= 0.0"
.
format
(
eps
))
defaults
=
dict
(
lr
=
lr
,
betas
=
betas
,
eps
=
eps
,
weight_decay
=
weight_decay
,
correct_bias
=
correct_bias
)
super
(
AdamW
,
self
).
__init__
(
params
,
defaults
)
super
().
__init__
(
params
,
defaults
)
def
step
(
self
,
closure
=
None
):
"""Performs a single optimization step.
...
...
src/transformers/optimization_tf.py
View file @
dc17f2a1
...
...
@@ -24,7 +24,7 @@ class WarmUp(tf.keras.optimizers.schedules.LearningRateSchedule):
"""Applys a warmup schedule on a given learning rate decay schedule."""
def
__init__
(
self
,
initial_learning_rate
,
decay_schedule_fn
,
warmup_steps
,
power
=
1.0
,
name
=
None
):
super
(
WarmUp
,
self
).
__init__
()
super
().
__init__
()
self
.
initial_learning_rate
=
initial_learning_rate
self
.
warmup_steps
=
warmup_steps
self
.
power
=
power
...
...
@@ -102,7 +102,7 @@ class AdamWeightDecay(tf.keras.optimizers.Adam):
name
=
"AdamWeightDecay"
,
**
kwargs
):
super
(
AdamWeightDecay
,
self
).
__init__
(
learning_rate
,
beta_1
,
beta_2
,
epsilon
,
amsgrad
,
name
,
**
kwargs
)
super
().
__init__
(
learning_rate
,
beta_1
,
beta_2
,
epsilon
,
amsgrad
,
name
,
**
kwargs
)
self
.
weight_decay_rate
=
weight_decay_rate
self
.
_include_in_weight_decay
=
include_in_weight_decay
self
.
_exclude_from_weight_decay
=
exclude_from_weight_decay
...
...
@@ -111,10 +111,10 @@ class AdamWeightDecay(tf.keras.optimizers.Adam):
def
from_config
(
cls
,
config
):
"""Creates an optimizer from its config with WarmUp custom object."""
custom_objects
=
{
"WarmUp"
:
WarmUp
}
return
super
(
AdamWeightDecay
,
cls
).
from_config
(
config
,
custom_objects
=
custom_objects
)
return
super
().
from_config
(
config
,
custom_objects
=
custom_objects
)
def
_prepare_local
(
self
,
var_device
,
var_dtype
,
apply_state
):
super
(
AdamWeightDecay
,
self
).
_prepare_local
(
var_device
,
var_dtype
,
apply_state
)
super
().
_prepare_local
(
var_device
,
var_dtype
,
apply_state
)
apply_state
[
"weight_decay_rate"
]
=
tf
.
constant
(
self
.
weight_decay_rate
,
name
=
"adam_weight_decay_rate"
)
def
_decay_weights_op
(
self
,
var
,
learning_rate
,
apply_state
):
...
...
@@ -128,7 +128,7 @@ class AdamWeightDecay(tf.keras.optimizers.Adam):
def
apply_gradients
(
self
,
grads_and_vars
,
clip_norm
,
name
=
None
):
grads
,
tvars
=
list
(
zip
(
*
grads_and_vars
))
(
grads
,
_
)
=
tf
.
clip_by_global_norm
(
grads
,
clip_norm
=
clip_norm
)
return
super
(
AdamWeightDecay
,
self
).
apply_gradients
(
zip
(
grads
,
tvars
))
return
super
().
apply_gradients
(
zip
(
grads
,
tvars
))
def
_get_lr
(
self
,
var_device
,
var_dtype
,
apply_state
):
"""Retrieves the learning rate with the given state."""
...
...
@@ -147,16 +147,16 @@ class AdamWeightDecay(tf.keras.optimizers.Adam):
lr_t
,
kwargs
=
self
.
_get_lr
(
var
.
device
,
var
.
dtype
.
base_dtype
,
apply_state
)
decay
=
self
.
_decay_weights_op
(
var
,
lr_t
,
apply_state
)
with
tf
.
control_dependencies
([
decay
]):
return
super
(
AdamWeightDecay
,
self
).
_resource_apply_dense
(
grad
,
var
,
**
kwargs
)
return
super
().
_resource_apply_dense
(
grad
,
var
,
**
kwargs
)
def
_resource_apply_sparse
(
self
,
grad
,
var
,
indices
,
apply_state
=
None
):
lr_t
,
kwargs
=
self
.
_get_lr
(
var
.
device
,
var
.
dtype
.
base_dtype
,
apply_state
)
decay
=
self
.
_decay_weights_op
(
var
,
lr_t
,
apply_state
)
with
tf
.
control_dependencies
([
decay
]):
return
super
(
AdamWeightDecay
,
self
).
_resource_apply_sparse
(
grad
,
var
,
indices
,
**
kwargs
)
return
super
().
_resource_apply_sparse
(
grad
,
var
,
indices
,
**
kwargs
)
def
get_config
(
self
):
config
=
super
(
AdamWeightDecay
,
self
).
get_config
()
config
=
super
().
get_config
()
config
.
update
({
"weight_decay_rate"
:
self
.
weight_decay_rate
})
return
config
...
...
src/transformers/tokenization_albert.py
View file @
dc17f2a1
...
...
@@ -79,7 +79,7 @@ class AlbertTokenizer(PreTrainedTokenizer):
mask_token
=
"[MASK]"
,
**
kwargs
):
super
(
AlbertTokenizer
,
self
).
__init__
(
super
().
__init__
(
bos_token
=
bos_token
,
eos_token
=
eos_token
,
unk_token
=
unk_token
,
...
...
src/transformers/tokenization_bert.py
View file @
dc17f2a1
...
...
@@ -163,7 +163,7 @@ class BertTokenizer(PreTrainedTokenizer):
This should likely be deactivated for Japanese:
see: https://github.com/huggingface/pytorch-pretrained-BERT/issues/328
"""
super
(
BertTokenizer
,
self
).
__init__
(
super
().
__init__
(
unk_token
=
unk_token
,
sep_token
=
sep_token
,
pad_token
=
pad_token
,
...
...
@@ -554,7 +554,7 @@ class BertTokenizerFast(PreTrainedTokenizerFast):
add_special_tokens
=
True
,
**
kwargs
):
super
(
BertTokenizerFast
,
self
).
__init__
(
super
().
__init__
(
unk_token
=
unk_token
,
sep_token
=
sep_token
,
pad_token
=
pad_token
,
...
...
src/transformers/tokenization_bert_japanese.py
View file @
dc17f2a1
...
...
@@ -115,6 +115,7 @@ class BertJapaneseTokenizer(BertTokenizer):
mask_token
=
mask_token
,
**
kwargs
,
)
# ^^ We call the grandparent's init, not the parent's.
self
.
max_len_single_sentence
=
self
.
max_len
-
2
# take into account special tokens
self
.
max_len_sentences_pair
=
self
.
max_len
-
3
# take into account special tokens
...
...
src/transformers/tokenization_camembert.py
View file @
dc17f2a1
...
...
@@ -66,7 +66,7 @@ class CamembertTokenizer(PreTrainedTokenizer):
additional_special_tokens
=
[
"<s>NOTUSED"
,
"</s>NOTUSED"
],
**
kwargs
):
super
(
CamembertTokenizer
,
self
).
__init__
(
super
().
__init__
(
max_len
=
512
,
bos_token
=
bos_token
,
eos_token
=
eos_token
,
...
...
src/transformers/tokenization_ctrl.py
View file @
dc17f2a1
...
...
@@ -126,7 +126,7 @@ class CTRLTokenizer(PreTrainedTokenizer):
control_codes
=
CONTROL_CODES
def
__init__
(
self
,
vocab_file
,
merges_file
,
unk_token
=
"<unk>"
,
**
kwargs
):
super
(
CTRLTokenizer
,
self
).
__init__
(
unk_token
=
unk_token
,
**
kwargs
)
super
().
__init__
(
unk_token
=
unk_token
,
**
kwargs
)
self
.
max_len_single_sentence
=
(
self
.
max_len
)
# no default special tokens - you can update this value if you add special tokens
...
...
src/transformers/tokenization_gpt2.py
View file @
dc17f2a1
...
...
@@ -122,7 +122,7 @@ class GPT2Tokenizer(PreTrainedTokenizer):
eos_token
=
"<|endoftext|>"
,
**
kwargs
):
super
(
GPT2Tokenizer
,
self
).
__init__
(
bos_token
=
bos_token
,
eos_token
=
eos_token
,
unk_token
=
unk_token
,
**
kwargs
)
super
().
__init__
(
bos_token
=
bos_token
,
eos_token
=
eos_token
,
unk_token
=
unk_token
,
**
kwargs
)
self
.
max_len_single_sentence
=
(
self
.
max_len
)
# no default special tokens - you can update this value if you add special tokens
...
...
@@ -268,9 +268,7 @@ class GPT2TokenizerFast(PreTrainedTokenizerFast):
truncation_strategy
=
"longest_first"
,
**
kwargs
):
super
(
GPT2TokenizerFast
,
self
).
__init__
(
bos_token
=
bos_token
,
eos_token
=
eos_token
,
unk_token
=
unk_token
,
**
kwargs
)
super
().
__init__
(
bos_token
=
bos_token
,
eos_token
=
eos_token
,
unk_token
=
unk_token
,
**
kwargs
)
self
.
_tokenizer
=
tk
.
Tokenizer
(
tk
.
models
.
BPE
.
from_files
(
vocab_file
,
merges_file
))
self
.
_update_special_tokens
()
...
...
src/transformers/tokenization_openai.py
View file @
dc17f2a1
...
...
@@ -82,7 +82,7 @@ class OpenAIGPTTokenizer(PreTrainedTokenizer):
max_model_input_sizes
=
PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
def
__init__
(
self
,
vocab_file
,
merges_file
,
unk_token
=
"<unk>"
,
**
kwargs
):
super
(
OpenAIGPTTokenizer
,
self
).
__init__
(
unk_token
=
unk_token
,
**
kwargs
)
super
().
__init__
(
unk_token
=
unk_token
,
**
kwargs
)
self
.
max_len_single_sentence
=
(
self
.
max_len
...
...
src/transformers/tokenization_roberta.py
View file @
dc17f2a1
...
...
@@ -84,7 +84,7 @@ class RobertaTokenizer(GPT2Tokenizer):
mask_token
=
"<mask>"
,
**
kwargs
):
super
(
RobertaTokenizer
,
self
).
__init__
(
super
().
__init__
(
vocab_file
=
vocab_file
,
merges_file
=
merges_file
,
errors
=
errors
,
...
...
src/transformers/tokenization_t5.py
View file @
dc17f2a1
...
...
@@ -91,7 +91,7 @@ class T5Tokenizer(PreTrainedTokenizer):
additional_special_tokens
=
[]
additional_special_tokens
.
extend
([
"<extra_id_{}>"
.
format
(
i
)
for
i
in
range
(
extra_ids
)])
super
(
T5Tokenizer
,
self
).
__init__
(
super
().
__init__
(
eos_token
=
eos_token
,
unk_token
=
unk_token
,
pad_token
=
pad_token
,
...
...
src/transformers/tokenization_transfo_xl.py
View file @
dc17f2a1
...
...
@@ -78,7 +78,7 @@ class TransfoXLTokenizer(PreTrainedTokenizer):
additional_special_tokens
=
[
"<formula>"
],
**
kwargs
):
super
(
TransfoXLTokenizer
,
self
).
__init__
(
super
().
__init__
(
unk_token
=
unk_token
,
eos_token
=
eos_token
,
additional_special_tokens
=
additional_special_tokens
,
**
kwargs
)
...
...
src/transformers/tokenization_utils.py
View file @
dc17f2a1
...
...
@@ -1425,7 +1425,7 @@ class PreTrainedTokenizerFast(PreTrainedTokenizer):
_decoder
=
None
def
__init__
(
self
,
**
kwargs
):
super
(
PreTrainedTokenizerFast
,
self
).
__init__
(
**
kwargs
)
super
().
__init__
(
**
kwargs
)
@
property
def
tokenizer
(
self
):
...
...
src/transformers/tokenization_xlm.py
View file @
dc17f2a1
...
...
@@ -578,7 +578,7 @@ class XLMTokenizer(PreTrainedTokenizer):
do_lowercase_and_remove_accent
=
True
,
**
kwargs
):
super
(
XLMTokenizer
,
self
).
__init__
(
super
().
__init__
(
unk_token
=
unk_token
,
bos_token
=
bos_token
,
sep_token
=
sep_token
,
...
...
src/transformers/tokenization_xlm_roberta.py
View file @
dc17f2a1
...
...
@@ -75,7 +75,7 @@ class XLMRobertaTokenizer(PreTrainedTokenizer):
mask_token
=
"<mask>"
,
**
kwargs
):
super
(
XLMRobertaTokenizer
,
self
).
__init__
(
super
().
__init__
(
bos_token
=
bos_token
,
eos_token
=
eos_token
,
unk_token
=
unk_token
,
...
...
src/transformers/tokenization_xlnet.py
View file @
dc17f2a1
...
...
@@ -77,7 +77,7 @@ class XLNetTokenizer(PreTrainedTokenizer):
additional_special_tokens
=
[
"<eop>"
,
"<eod>"
],
**
kwargs
):
super
(
XLNetTokenizer
,
self
).
__init__
(
super
().
__init__
(
bos_token
=
bos_token
,
eos_token
=
eos_token
,
unk_token
=
unk_token
,
...
...
templates/adding_a_new_model/configuration_xxx.py
View file @
dc17f2a1
...
...
@@ -80,7 +80,7 @@ class XxxConfig(PretrainedConfig):
summary_first_dropout
=
0.1
,
**
kwargs
):
super
(
XxxConfig
,
self
).
__init__
(
**
kwargs
)
super
().
__init__
(
**
kwargs
)
self
.
vocab_size
=
vocab_size
self
.
n_ctx
=
n_ctx
self
.
n_positions
=
n_positions
...
...
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment