Commit 83a41d39 authored by Julien Chaumond's avatar Julien Chaumond
Browse files

💄 super

parent cd51893d
...@@ -47,7 +47,7 @@ except ImportError: ...@@ -47,7 +47,7 @@ except ImportError:
""" """
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super(Identity, self).__init__() super().__init__()
def forward(self, input): def forward(self, input):
return input return input
...@@ -97,7 +97,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin): ...@@ -97,7 +97,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin):
return {"input_ids": torch.tensor(DUMMY_INPUTS)} return {"input_ids": torch.tensor(DUMMY_INPUTS)}
def __init__(self, config, *inputs, **kwargs): def __init__(self, config, *inputs, **kwargs):
super(PreTrainedModel, self).__init__() super().__init__()
if not isinstance(config, PretrainedConfig): if not isinstance(config, PretrainedConfig):
raise ValueError( raise ValueError(
"Parameter config in `{}(config)` should be an instance of class `PretrainedConfig`. " "Parameter config in `{}(config)` should be an instance of class `PretrainedConfig`. "
...@@ -1102,7 +1102,7 @@ class Conv1D(nn.Module): ...@@ -1102,7 +1102,7 @@ class Conv1D(nn.Module):
""" Conv1D layer as defined by Radford et al. for OpenAI GPT (and also used in GPT-2) """ Conv1D layer as defined by Radford et al. for OpenAI GPT (and also used in GPT-2)
Basically works like a Linear layer but the weights are transposed Basically works like a Linear layer but the weights are transposed
""" """
super(Conv1D, self).__init__() super().__init__()
self.nf = nf self.nf = nf
w = torch.empty(nx, nf) w = torch.empty(nx, nf)
nn.init.normal_(w, std=0.02) nn.init.normal_(w, std=0.02)
...@@ -1120,7 +1120,7 @@ class PoolerStartLogits(nn.Module): ...@@ -1120,7 +1120,7 @@ class PoolerStartLogits(nn.Module):
""" Compute SQuAD start_logits from sequence hidden states. """ """ Compute SQuAD start_logits from sequence hidden states. """
def __init__(self, config): def __init__(self, config):
super(PoolerStartLogits, self).__init__() super().__init__()
self.dense = nn.Linear(config.hidden_size, 1) self.dense = nn.Linear(config.hidden_size, 1)
def forward(self, hidden_states, p_mask=None): def forward(self, hidden_states, p_mask=None):
...@@ -1145,7 +1145,7 @@ class PoolerEndLogits(nn.Module): ...@@ -1145,7 +1145,7 @@ class PoolerEndLogits(nn.Module):
""" """
def __init__(self, config): def __init__(self, config):
super(PoolerEndLogits, self).__init__() super().__init__()
self.dense_0 = nn.Linear(config.hidden_size * 2, config.hidden_size) self.dense_0 = nn.Linear(config.hidden_size * 2, config.hidden_size)
self.activation = nn.Tanh() self.activation = nn.Tanh()
self.LayerNorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps) self.LayerNorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
...@@ -1191,7 +1191,7 @@ class PoolerAnswerClass(nn.Module): ...@@ -1191,7 +1191,7 @@ class PoolerAnswerClass(nn.Module):
""" Compute SQuAD 2.0 answer class from classification and start tokens hidden states. """ """ Compute SQuAD 2.0 answer class from classification and start tokens hidden states. """
def __init__(self, config): def __init__(self, config):
super(PoolerAnswerClass, self).__init__() super().__init__()
self.dense_0 = nn.Linear(config.hidden_size * 2, config.hidden_size) self.dense_0 = nn.Linear(config.hidden_size * 2, config.hidden_size)
self.activation = nn.Tanh() self.activation = nn.Tanh()
self.dense_1 = nn.Linear(config.hidden_size, 1, bias=False) self.dense_1 = nn.Linear(config.hidden_size, 1, bias=False)
...@@ -1276,7 +1276,7 @@ class SQuADHead(nn.Module): ...@@ -1276,7 +1276,7 @@ class SQuADHead(nn.Module):
""" """
def __init__(self, config): def __init__(self, config):
super(SQuADHead, self).__init__() super().__init__()
self.start_n_top = config.start_n_top self.start_n_top = config.start_n_top
self.end_n_top = config.end_n_top self.end_n_top = config.end_n_top
...@@ -1368,7 +1368,7 @@ class SequenceSummary(nn.Module): ...@@ -1368,7 +1368,7 @@ class SequenceSummary(nn.Module):
""" """
def __init__(self, config): def __init__(self, config):
super(SequenceSummary, self).__init__() super().__init__()
self.summary_type = config.summary_type if hasattr(config, "summary_type") else "last" self.summary_type = config.summary_type if hasattr(config, "summary_type") else "last"
if self.summary_type == "attn": if self.summary_type == "attn":
......
...@@ -96,7 +96,7 @@ class MultiHeadAttention(nn.Module): ...@@ -96,7 +96,7 @@ class MultiHeadAttention(nn.Module):
NEW_ID = itertools.count() NEW_ID = itertools.count()
def __init__(self, n_heads, dim, config): def __init__(self, n_heads, dim, config):
super(MultiHeadAttention, self).__init__() super().__init__()
self.layer_id = next(MultiHeadAttention.NEW_ID) self.layer_id = next(MultiHeadAttention.NEW_ID)
self.output_attentions = config.output_attentions self.output_attentions = config.output_attentions
self.dim = dim self.dim = dim
...@@ -197,7 +197,7 @@ class MultiHeadAttention(nn.Module): ...@@ -197,7 +197,7 @@ class MultiHeadAttention(nn.Module):
class TransformerFFN(nn.Module): class TransformerFFN(nn.Module):
def __init__(self, in_dim, dim_hidden, out_dim, config): def __init__(self, in_dim, dim_hidden, out_dim, config):
super(TransformerFFN, self).__init__() super().__init__()
self.dropout = config.dropout self.dropout = config.dropout
self.lin1 = nn.Linear(in_dim, dim_hidden) self.lin1 = nn.Linear(in_dim, dim_hidden)
self.lin2 = nn.Linear(dim_hidden, out_dim) self.lin2 = nn.Linear(dim_hidden, out_dim)
...@@ -222,7 +222,7 @@ class XLMPreTrainedModel(PreTrainedModel): ...@@ -222,7 +222,7 @@ class XLMPreTrainedModel(PreTrainedModel):
base_model_prefix = "transformer" base_model_prefix = "transformer"
def __init__(self, *inputs, **kwargs): def __init__(self, *inputs, **kwargs):
super(XLMPreTrainedModel, self).__init__(*inputs, **kwargs) super().__init__(*inputs, **kwargs)
@property @property
def dummy_inputs(self): def dummy_inputs(self):
...@@ -354,7 +354,7 @@ class XLMModel(XLMPreTrainedModel): ...@@ -354,7 +354,7 @@ class XLMModel(XLMPreTrainedModel):
""" """
def __init__(self, config): # , dico, is_encoder, with_output): def __init__(self, config): # , dico, is_encoder, with_output):
super(XLMModel, self).__init__(config) super().__init__(config)
self.output_attentions = config.output_attentions self.output_attentions = config.output_attentions
self.output_hidden_states = config.output_hidden_states self.output_hidden_states = config.output_hidden_states
...@@ -585,7 +585,7 @@ class XLMPredLayer(nn.Module): ...@@ -585,7 +585,7 @@ class XLMPredLayer(nn.Module):
""" """
def __init__(self, config): def __init__(self, config):
super(XLMPredLayer, self).__init__() super().__init__()
self.asm = config.asm self.asm = config.asm
self.n_words = config.n_words self.n_words = config.n_words
self.pad_index = config.pad_index self.pad_index = config.pad_index
...@@ -661,7 +661,7 @@ class XLMWithLMHeadModel(XLMPreTrainedModel): ...@@ -661,7 +661,7 @@ class XLMWithLMHeadModel(XLMPreTrainedModel):
""" """
def __init__(self, config): def __init__(self, config):
super(XLMWithLMHeadModel, self).__init__(config) super().__init__(config)
self.transformer = XLMModel(config) self.transformer = XLMModel(config)
self.pred_layer = XLMPredLayer(config) self.pred_layer = XLMPredLayer(config)
...@@ -754,7 +754,7 @@ class XLMForSequenceClassification(XLMPreTrainedModel): ...@@ -754,7 +754,7 @@ class XLMForSequenceClassification(XLMPreTrainedModel):
""" """
def __init__(self, config): def __init__(self, config):
super(XLMForSequenceClassification, self).__init__(config) super().__init__(config)
self.num_labels = config.num_labels self.num_labels = config.num_labels
self.transformer = XLMModel(config) self.transformer = XLMModel(config)
...@@ -856,7 +856,7 @@ class XLMForQuestionAnsweringSimple(XLMPreTrainedModel): ...@@ -856,7 +856,7 @@ class XLMForQuestionAnsweringSimple(XLMPreTrainedModel):
""" """
def __init__(self, config): def __init__(self, config):
super(XLMForQuestionAnsweringSimple, self).__init__(config) super().__init__(config)
self.transformer = XLMModel(config) self.transformer = XLMModel(config)
self.qa_outputs = nn.Linear(config.hidden_size, config.num_labels) self.qa_outputs = nn.Linear(config.hidden_size, config.num_labels)
...@@ -973,7 +973,7 @@ class XLMForQuestionAnswering(XLMPreTrainedModel): ...@@ -973,7 +973,7 @@ class XLMForQuestionAnswering(XLMPreTrainedModel):
""" """
def __init__(self, config): def __init__(self, config):
super(XLMForQuestionAnswering, self).__init__(config) super().__init__(config)
self.transformer = XLMModel(config) self.transformer = XLMModel(config)
self.qa_outputs = SQuADHead(config) self.qa_outputs = SQuADHead(config)
......
...@@ -204,7 +204,7 @@ XLNetLayerNorm = nn.LayerNorm ...@@ -204,7 +204,7 @@ XLNetLayerNorm = nn.LayerNorm
class XLNetRelativeAttention(nn.Module): class XLNetRelativeAttention(nn.Module):
def __init__(self, config): def __init__(self, config):
super(XLNetRelativeAttention, self).__init__() super().__init__()
self.output_attentions = config.output_attentions self.output_attentions = config.output_attentions
if config.d_model % config.n_head != 0: if config.d_model % config.n_head != 0:
...@@ -414,7 +414,7 @@ class XLNetRelativeAttention(nn.Module): ...@@ -414,7 +414,7 @@ class XLNetRelativeAttention(nn.Module):
class XLNetFeedForward(nn.Module): class XLNetFeedForward(nn.Module):
def __init__(self, config): def __init__(self, config):
super(XLNetFeedForward, self).__init__() super().__init__()
self.layer_norm = XLNetLayerNorm(config.d_model, eps=config.layer_norm_eps) self.layer_norm = XLNetLayerNorm(config.d_model, eps=config.layer_norm_eps)
self.layer_1 = nn.Linear(config.d_model, config.d_inner) self.layer_1 = nn.Linear(config.d_model, config.d_inner)
self.layer_2 = nn.Linear(config.d_inner, config.d_model) self.layer_2 = nn.Linear(config.d_inner, config.d_model)
...@@ -437,7 +437,7 @@ class XLNetFeedForward(nn.Module): ...@@ -437,7 +437,7 @@ class XLNetFeedForward(nn.Module):
class XLNetLayer(nn.Module): class XLNetLayer(nn.Module):
def __init__(self, config): def __init__(self, config):
super(XLNetLayer, self).__init__() super().__init__()
self.rel_attn = XLNetRelativeAttention(config) self.rel_attn = XLNetRelativeAttention(config)
self.ff = XLNetFeedForward(config) self.ff = XLNetFeedForward(config)
self.dropout = nn.Dropout(config.dropout) self.dropout = nn.Dropout(config.dropout)
...@@ -631,7 +631,7 @@ class XLNetModel(XLNetPreTrainedModel): ...@@ -631,7 +631,7 @@ class XLNetModel(XLNetPreTrainedModel):
""" """
def __init__(self, config): def __init__(self, config):
super(XLNetModel, self).__init__(config) super().__init__(config)
self.output_attentions = config.output_attentions self.output_attentions = config.output_attentions
self.output_hidden_states = config.output_hidden_states self.output_hidden_states = config.output_hidden_states
self.output_past = config.output_past self.output_past = config.output_past
...@@ -996,7 +996,7 @@ class XLNetLMHeadModel(XLNetPreTrainedModel): ...@@ -996,7 +996,7 @@ class XLNetLMHeadModel(XLNetPreTrainedModel):
""" """
def __init__(self, config): def __init__(self, config):
super(XLNetLMHeadModel, self).__init__(config) super().__init__(config)
self.attn_type = config.attn_type self.attn_type = config.attn_type
self.same_length = config.same_length self.same_length = config.same_length
...@@ -1119,7 +1119,7 @@ class XLNetForSequenceClassification(XLNetPreTrainedModel): ...@@ -1119,7 +1119,7 @@ class XLNetForSequenceClassification(XLNetPreTrainedModel):
""" """
def __init__(self, config): def __init__(self, config):
super(XLNetForSequenceClassification, self).__init__(config) super().__init__(config)
self.num_labels = config.num_labels self.num_labels = config.num_labels
self.transformer = XLNetModel(config) self.transformer = XLNetModel(config)
...@@ -1234,7 +1234,7 @@ class XLNetForTokenClassification(XLNetPreTrainedModel): ...@@ -1234,7 +1234,7 @@ class XLNetForTokenClassification(XLNetPreTrainedModel):
""" """
def __init__(self, config): def __init__(self, config):
super(XLNetForTokenClassification, self).__init__(config) super().__init__(config)
self.num_labels = config.num_labels self.num_labels = config.num_labels
self.transformer = XLNetModel(config) self.transformer = XLNetModel(config)
...@@ -1355,7 +1355,7 @@ class XLNetForMultipleChoice(XLNetPreTrainedModel): ...@@ -1355,7 +1355,7 @@ class XLNetForMultipleChoice(XLNetPreTrainedModel):
""" """
def __init__(self, config): def __init__(self, config):
super(XLNetForMultipleChoice, self).__init__(config) super().__init__(config)
self.transformer = XLNetModel(config) self.transformer = XLNetModel(config)
self.sequence_summary = SequenceSummary(config) self.sequence_summary = SequenceSummary(config)
...@@ -1463,7 +1463,7 @@ class XLNetForQuestionAnsweringSimple(XLNetPreTrainedModel): ...@@ -1463,7 +1463,7 @@ class XLNetForQuestionAnsweringSimple(XLNetPreTrainedModel):
""" """
def __init__(self, config): def __init__(self, config):
super(XLNetForQuestionAnsweringSimple, self).__init__(config) super().__init__(config)
self.num_labels = config.num_labels self.num_labels = config.num_labels
self.transformer = XLNetModel(config) self.transformer = XLNetModel(config)
...@@ -1595,7 +1595,7 @@ class XLNetForQuestionAnswering(XLNetPreTrainedModel): ...@@ -1595,7 +1595,7 @@ class XLNetForQuestionAnswering(XLNetPreTrainedModel):
""" """
def __init__(self, config): def __init__(self, config):
super(XLNetForQuestionAnswering, self).__init__(config) super().__init__(config)
self.start_n_top = config.start_n_top self.start_n_top = config.start_n_top
self.end_n_top = config.end_n_top self.end_n_top = config.end_n_top
......
...@@ -114,7 +114,7 @@ class AdamW(Optimizer): ...@@ -114,7 +114,7 @@ class AdamW(Optimizer):
if not 0.0 <= eps: if not 0.0 <= eps:
raise ValueError("Invalid epsilon value: {} - should be >= 0.0".format(eps)) raise ValueError("Invalid epsilon value: {} - should be >= 0.0".format(eps))
defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay, correct_bias=correct_bias) defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay, correct_bias=correct_bias)
super(AdamW, self).__init__(params, defaults) super().__init__(params, defaults)
def step(self, closure=None): def step(self, closure=None):
"""Performs a single optimization step. """Performs a single optimization step.
......
...@@ -24,7 +24,7 @@ class WarmUp(tf.keras.optimizers.schedules.LearningRateSchedule): ...@@ -24,7 +24,7 @@ class WarmUp(tf.keras.optimizers.schedules.LearningRateSchedule):
"""Applys a warmup schedule on a given learning rate decay schedule.""" """Applys a warmup schedule on a given learning rate decay schedule."""
def __init__(self, initial_learning_rate, decay_schedule_fn, warmup_steps, power=1.0, name=None): def __init__(self, initial_learning_rate, decay_schedule_fn, warmup_steps, power=1.0, name=None):
super(WarmUp, self).__init__() super().__init__()
self.initial_learning_rate = initial_learning_rate self.initial_learning_rate = initial_learning_rate
self.warmup_steps = warmup_steps self.warmup_steps = warmup_steps
self.power = power self.power = power
...@@ -102,7 +102,7 @@ class AdamWeightDecay(tf.keras.optimizers.Adam): ...@@ -102,7 +102,7 @@ class AdamWeightDecay(tf.keras.optimizers.Adam):
name="AdamWeightDecay", name="AdamWeightDecay",
**kwargs **kwargs
): ):
super(AdamWeightDecay, self).__init__(learning_rate, beta_1, beta_2, epsilon, amsgrad, name, **kwargs) super().__init__(learning_rate, beta_1, beta_2, epsilon, amsgrad, name, **kwargs)
self.weight_decay_rate = weight_decay_rate self.weight_decay_rate = weight_decay_rate
self._include_in_weight_decay = include_in_weight_decay self._include_in_weight_decay = include_in_weight_decay
self._exclude_from_weight_decay = exclude_from_weight_decay self._exclude_from_weight_decay = exclude_from_weight_decay
...@@ -111,10 +111,10 @@ class AdamWeightDecay(tf.keras.optimizers.Adam): ...@@ -111,10 +111,10 @@ class AdamWeightDecay(tf.keras.optimizers.Adam):
def from_config(cls, config): def from_config(cls, config):
"""Creates an optimizer from its config with WarmUp custom object.""" """Creates an optimizer from its config with WarmUp custom object."""
custom_objects = {"WarmUp": WarmUp} custom_objects = {"WarmUp": WarmUp}
return super(AdamWeightDecay, cls).from_config(config, custom_objects=custom_objects) return super().from_config(config, custom_objects=custom_objects)
def _prepare_local(self, var_device, var_dtype, apply_state): def _prepare_local(self, var_device, var_dtype, apply_state):
super(AdamWeightDecay, self)._prepare_local(var_device, var_dtype, apply_state) super()._prepare_local(var_device, var_dtype, apply_state)
apply_state["weight_decay_rate"] = tf.constant(self.weight_decay_rate, name="adam_weight_decay_rate") apply_state["weight_decay_rate"] = tf.constant(self.weight_decay_rate, name="adam_weight_decay_rate")
def _decay_weights_op(self, var, learning_rate, apply_state): def _decay_weights_op(self, var, learning_rate, apply_state):
...@@ -128,7 +128,7 @@ class AdamWeightDecay(tf.keras.optimizers.Adam): ...@@ -128,7 +128,7 @@ class AdamWeightDecay(tf.keras.optimizers.Adam):
def apply_gradients(self, grads_and_vars, clip_norm, name=None): def apply_gradients(self, grads_and_vars, clip_norm, name=None):
grads, tvars = list(zip(*grads_and_vars)) grads, tvars = list(zip(*grads_and_vars))
(grads, _) = tf.clip_by_global_norm(grads, clip_norm=clip_norm) (grads, _) = tf.clip_by_global_norm(grads, clip_norm=clip_norm)
return super(AdamWeightDecay, self).apply_gradients(zip(grads, tvars)) return super().apply_gradients(zip(grads, tvars))
def _get_lr(self, var_device, var_dtype, apply_state): def _get_lr(self, var_device, var_dtype, apply_state):
"""Retrieves the learning rate with the given state.""" """Retrieves the learning rate with the given state."""
...@@ -147,16 +147,16 @@ class AdamWeightDecay(tf.keras.optimizers.Adam): ...@@ -147,16 +147,16 @@ class AdamWeightDecay(tf.keras.optimizers.Adam):
lr_t, kwargs = self._get_lr(var.device, var.dtype.base_dtype, apply_state) lr_t, kwargs = self._get_lr(var.device, var.dtype.base_dtype, apply_state)
decay = self._decay_weights_op(var, lr_t, apply_state) decay = self._decay_weights_op(var, lr_t, apply_state)
with tf.control_dependencies([decay]): with tf.control_dependencies([decay]):
return super(AdamWeightDecay, self)._resource_apply_dense(grad, var, **kwargs) return super()._resource_apply_dense(grad, var, **kwargs)
def _resource_apply_sparse(self, grad, var, indices, apply_state=None): def _resource_apply_sparse(self, grad, var, indices, apply_state=None):
lr_t, kwargs = self._get_lr(var.device, var.dtype.base_dtype, apply_state) lr_t, kwargs = self._get_lr(var.device, var.dtype.base_dtype, apply_state)
decay = self._decay_weights_op(var, lr_t, apply_state) decay = self._decay_weights_op(var, lr_t, apply_state)
with tf.control_dependencies([decay]): with tf.control_dependencies([decay]):
return super(AdamWeightDecay, self)._resource_apply_sparse(grad, var, indices, **kwargs) return super()._resource_apply_sparse(grad, var, indices, **kwargs)
def get_config(self): def get_config(self):
config = super(AdamWeightDecay, self).get_config() config = super().get_config()
config.update({"weight_decay_rate": self.weight_decay_rate}) config.update({"weight_decay_rate": self.weight_decay_rate})
return config return config
......
...@@ -79,7 +79,7 @@ class AlbertTokenizer(PreTrainedTokenizer): ...@@ -79,7 +79,7 @@ class AlbertTokenizer(PreTrainedTokenizer):
mask_token="[MASK]", mask_token="[MASK]",
**kwargs **kwargs
): ):
super(AlbertTokenizer, self).__init__( super().__init__(
bos_token=bos_token, bos_token=bos_token,
eos_token=eos_token, eos_token=eos_token,
unk_token=unk_token, unk_token=unk_token,
......
...@@ -163,7 +163,7 @@ class BertTokenizer(PreTrainedTokenizer): ...@@ -163,7 +163,7 @@ class BertTokenizer(PreTrainedTokenizer):
This should likely be deactivated for Japanese: This should likely be deactivated for Japanese:
see: https://github.com/huggingface/pytorch-pretrained-BERT/issues/328 see: https://github.com/huggingface/pytorch-pretrained-BERT/issues/328
""" """
super(BertTokenizer, self).__init__( super().__init__(
unk_token=unk_token, unk_token=unk_token,
sep_token=sep_token, sep_token=sep_token,
pad_token=pad_token, pad_token=pad_token,
...@@ -554,7 +554,7 @@ class BertTokenizerFast(PreTrainedTokenizerFast): ...@@ -554,7 +554,7 @@ class BertTokenizerFast(PreTrainedTokenizerFast):
add_special_tokens=True, add_special_tokens=True,
**kwargs **kwargs
): ):
super(BertTokenizerFast, self).__init__( super().__init__(
unk_token=unk_token, unk_token=unk_token,
sep_token=sep_token, sep_token=sep_token,
pad_token=pad_token, pad_token=pad_token,
......
...@@ -107,7 +107,7 @@ class BertJapaneseTokenizer(BertTokenizer): ...@@ -107,7 +107,7 @@ class BertJapaneseTokenizer(BertTokenizer):
**subword_tokenizer_type**: (`optional`) string (default "wordpiece") **subword_tokenizer_type**: (`optional`) string (default "wordpiece")
Type of subword tokenizer. Type of subword tokenizer.
""" """
super(BertTokenizer, self).__init__( super().__init__(
unk_token=unk_token, unk_token=unk_token,
sep_token=sep_token, sep_token=sep_token,
pad_token=pad_token, pad_token=pad_token,
......
...@@ -66,7 +66,7 @@ class CamembertTokenizer(PreTrainedTokenizer): ...@@ -66,7 +66,7 @@ class CamembertTokenizer(PreTrainedTokenizer):
additional_special_tokens=["<s>NOTUSED", "</s>NOTUSED"], additional_special_tokens=["<s>NOTUSED", "</s>NOTUSED"],
**kwargs **kwargs
): ):
super(CamembertTokenizer, self).__init__( super().__init__(
max_len=512, max_len=512,
bos_token=bos_token, bos_token=bos_token,
eos_token=eos_token, eos_token=eos_token,
......
...@@ -126,7 +126,7 @@ class CTRLTokenizer(PreTrainedTokenizer): ...@@ -126,7 +126,7 @@ class CTRLTokenizer(PreTrainedTokenizer):
control_codes = CONTROL_CODES control_codes = CONTROL_CODES
def __init__(self, vocab_file, merges_file, unk_token="<unk>", **kwargs): def __init__(self, vocab_file, merges_file, unk_token="<unk>", **kwargs):
super(CTRLTokenizer, self).__init__(unk_token=unk_token, **kwargs) super().__init__(unk_token=unk_token, **kwargs)
self.max_len_single_sentence = ( self.max_len_single_sentence = (
self.max_len self.max_len
) # no default special tokens - you can update this value if you add special tokens ) # no default special tokens - you can update this value if you add special tokens
......
...@@ -122,7 +122,7 @@ class GPT2Tokenizer(PreTrainedTokenizer): ...@@ -122,7 +122,7 @@ class GPT2Tokenizer(PreTrainedTokenizer):
eos_token="<|endoftext|>", eos_token="<|endoftext|>",
**kwargs **kwargs
): ):
super(GPT2Tokenizer, self).__init__(bos_token=bos_token, eos_token=eos_token, unk_token=unk_token, **kwargs) super().__init__(bos_token=bos_token, eos_token=eos_token, unk_token=unk_token, **kwargs)
self.max_len_single_sentence = ( self.max_len_single_sentence = (
self.max_len self.max_len
) # no default special tokens - you can update this value if you add special tokens ) # no default special tokens - you can update this value if you add special tokens
...@@ -268,7 +268,7 @@ class GPT2TokenizerFast(PreTrainedTokenizerFast): ...@@ -268,7 +268,7 @@ class GPT2TokenizerFast(PreTrainedTokenizerFast):
truncation_strategy="longest_first", truncation_strategy="longest_first",
**kwargs **kwargs
): ):
super(GPT2TokenizerFast, self).__init__( super().__init__(
bos_token=bos_token, eos_token=eos_token, unk_token=unk_token, **kwargs bos_token=bos_token, eos_token=eos_token, unk_token=unk_token, **kwargs
) )
......
...@@ -82,7 +82,7 @@ class OpenAIGPTTokenizer(PreTrainedTokenizer): ...@@ -82,7 +82,7 @@ class OpenAIGPTTokenizer(PreTrainedTokenizer):
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
def __init__(self, vocab_file, merges_file, unk_token="<unk>", **kwargs): def __init__(self, vocab_file, merges_file, unk_token="<unk>", **kwargs):
super(OpenAIGPTTokenizer, self).__init__(unk_token=unk_token, **kwargs) super().__init__(unk_token=unk_token, **kwargs)
self.max_len_single_sentence = ( self.max_len_single_sentence = (
self.max_len self.max_len
......
...@@ -84,7 +84,7 @@ class RobertaTokenizer(GPT2Tokenizer): ...@@ -84,7 +84,7 @@ class RobertaTokenizer(GPT2Tokenizer):
mask_token="<mask>", mask_token="<mask>",
**kwargs **kwargs
): ):
super(RobertaTokenizer, self).__init__( super().__init__(
vocab_file=vocab_file, vocab_file=vocab_file,
merges_file=merges_file, merges_file=merges_file,
errors=errors, errors=errors,
......
...@@ -91,7 +91,7 @@ class T5Tokenizer(PreTrainedTokenizer): ...@@ -91,7 +91,7 @@ class T5Tokenizer(PreTrainedTokenizer):
additional_special_tokens = [] additional_special_tokens = []
additional_special_tokens.extend(["<extra_id_{}>".format(i) for i in range(extra_ids)]) additional_special_tokens.extend(["<extra_id_{}>".format(i) for i in range(extra_ids)])
super(T5Tokenizer, self).__init__( super().__init__(
eos_token=eos_token, eos_token=eos_token,
unk_token=unk_token, unk_token=unk_token,
pad_token=pad_token, pad_token=pad_token,
......
...@@ -78,7 +78,7 @@ class TransfoXLTokenizer(PreTrainedTokenizer): ...@@ -78,7 +78,7 @@ class TransfoXLTokenizer(PreTrainedTokenizer):
additional_special_tokens=["<formula>"], additional_special_tokens=["<formula>"],
**kwargs **kwargs
): ):
super(TransfoXLTokenizer, self).__init__( super().__init__(
unk_token=unk_token, eos_token=eos_token, additional_special_tokens=additional_special_tokens, **kwargs unk_token=unk_token, eos_token=eos_token, additional_special_tokens=additional_special_tokens, **kwargs
) )
......
...@@ -1425,7 +1425,7 @@ class PreTrainedTokenizerFast(PreTrainedTokenizer): ...@@ -1425,7 +1425,7 @@ class PreTrainedTokenizerFast(PreTrainedTokenizer):
_decoder = None _decoder = None
def __init__(self, **kwargs): def __init__(self, **kwargs):
super(PreTrainedTokenizerFast, self).__init__(**kwargs) super().__init__(**kwargs)
@property @property
def tokenizer(self): def tokenizer(self):
......
...@@ -578,7 +578,7 @@ class XLMTokenizer(PreTrainedTokenizer): ...@@ -578,7 +578,7 @@ class XLMTokenizer(PreTrainedTokenizer):
do_lowercase_and_remove_accent=True, do_lowercase_and_remove_accent=True,
**kwargs **kwargs
): ):
super(XLMTokenizer, self).__init__( super().__init__(
unk_token=unk_token, unk_token=unk_token,
bos_token=bos_token, bos_token=bos_token,
sep_token=sep_token, sep_token=sep_token,
......
...@@ -75,7 +75,7 @@ class XLMRobertaTokenizer(PreTrainedTokenizer): ...@@ -75,7 +75,7 @@ class XLMRobertaTokenizer(PreTrainedTokenizer):
mask_token="<mask>", mask_token="<mask>",
**kwargs **kwargs
): ):
super(XLMRobertaTokenizer, self).__init__( super().__init__(
bos_token=bos_token, bos_token=bos_token,
eos_token=eos_token, eos_token=eos_token,
unk_token=unk_token, unk_token=unk_token,
......
...@@ -77,7 +77,7 @@ class XLNetTokenizer(PreTrainedTokenizer): ...@@ -77,7 +77,7 @@ class XLNetTokenizer(PreTrainedTokenizer):
additional_special_tokens=["<eop>", "<eod>"], additional_special_tokens=["<eop>", "<eod>"],
**kwargs **kwargs
): ):
super(XLNetTokenizer, self).__init__( super().__init__(
bos_token=bos_token, bos_token=bos_token,
eos_token=eos_token, eos_token=eos_token,
unk_token=unk_token, unk_token=unk_token,
......
...@@ -80,7 +80,7 @@ class XxxConfig(PretrainedConfig): ...@@ -80,7 +80,7 @@ class XxxConfig(PretrainedConfig):
summary_first_dropout=0.1, summary_first_dropout=0.1,
**kwargs **kwargs
): ):
super(XxxConfig, self).__init__(**kwargs) super().__init__(**kwargs)
self.vocab_size = vocab_size self.vocab_size = vocab_size
self.n_ctx = n_ctx self.n_ctx = n_ctx
self.n_positions = n_positions self.n_positions = n_positions
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment