"git@developer.sourcefind.cn:zhaoyu6/sglang.git" did not exist on "d511b2d905177ebc4040186ffb1493414e190a80"
Unverified Commit dc17f2a1 authored by Thomas Wolf's avatar Thomas Wolf Committed by GitHub
Browse files

Merge pull request #2538 from huggingface/py3_super

💄 super
parents 88085484 a98b2ca8
......@@ -47,7 +47,7 @@ except ImportError:
"""
def __init__(self, *args, **kwargs):
super(Identity, self).__init__()
super().__init__()
def forward(self, input):
return input
......@@ -97,7 +97,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin):
return {"input_ids": torch.tensor(DUMMY_INPUTS)}
def __init__(self, config, *inputs, **kwargs):
super(PreTrainedModel, self).__init__()
super().__init__()
if not isinstance(config, PretrainedConfig):
raise ValueError(
"Parameter config in `{}(config)` should be an instance of class `PretrainedConfig`. "
......@@ -1102,7 +1102,7 @@ class Conv1D(nn.Module):
""" Conv1D layer as defined by Radford et al. for OpenAI GPT (and also used in GPT-2)
Basically works like a Linear layer but the weights are transposed
"""
super(Conv1D, self).__init__()
super().__init__()
self.nf = nf
w = torch.empty(nx, nf)
nn.init.normal_(w, std=0.02)
......@@ -1120,7 +1120,7 @@ class PoolerStartLogits(nn.Module):
""" Compute SQuAD start_logits from sequence hidden states. """
def __init__(self, config):
super(PoolerStartLogits, self).__init__()
super().__init__()
self.dense = nn.Linear(config.hidden_size, 1)
def forward(self, hidden_states, p_mask=None):
......@@ -1145,7 +1145,7 @@ class PoolerEndLogits(nn.Module):
"""
def __init__(self, config):
super(PoolerEndLogits, self).__init__()
super().__init__()
self.dense_0 = nn.Linear(config.hidden_size * 2, config.hidden_size)
self.activation = nn.Tanh()
self.LayerNorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
......@@ -1191,7 +1191,7 @@ class PoolerAnswerClass(nn.Module):
""" Compute SQuAD 2.0 answer class from classification and start tokens hidden states. """
def __init__(self, config):
super(PoolerAnswerClass, self).__init__()
super().__init__()
self.dense_0 = nn.Linear(config.hidden_size * 2, config.hidden_size)
self.activation = nn.Tanh()
self.dense_1 = nn.Linear(config.hidden_size, 1, bias=False)
......@@ -1276,7 +1276,7 @@ class SQuADHead(nn.Module):
"""
def __init__(self, config):
super(SQuADHead, self).__init__()
super().__init__()
self.start_n_top = config.start_n_top
self.end_n_top = config.end_n_top
......@@ -1368,7 +1368,7 @@ class SequenceSummary(nn.Module):
"""
def __init__(self, config):
super(SequenceSummary, self).__init__()
super().__init__()
self.summary_type = config.summary_type if hasattr(config, "summary_type") else "last"
if self.summary_type == "attn":
......
......@@ -96,7 +96,7 @@ class MultiHeadAttention(nn.Module):
NEW_ID = itertools.count()
def __init__(self, n_heads, dim, config):
super(MultiHeadAttention, self).__init__()
super().__init__()
self.layer_id = next(MultiHeadAttention.NEW_ID)
self.output_attentions = config.output_attentions
self.dim = dim
......@@ -197,7 +197,7 @@ class MultiHeadAttention(nn.Module):
class TransformerFFN(nn.Module):
def __init__(self, in_dim, dim_hidden, out_dim, config):
super(TransformerFFN, self).__init__()
super().__init__()
self.dropout = config.dropout
self.lin1 = nn.Linear(in_dim, dim_hidden)
self.lin2 = nn.Linear(dim_hidden, out_dim)
......@@ -222,7 +222,7 @@ class XLMPreTrainedModel(PreTrainedModel):
base_model_prefix = "transformer"
def __init__(self, *inputs, **kwargs):
super(XLMPreTrainedModel, self).__init__(*inputs, **kwargs)
super().__init__(*inputs, **kwargs)
@property
def dummy_inputs(self):
......@@ -354,7 +354,7 @@ class XLMModel(XLMPreTrainedModel):
"""
def __init__(self, config): # , dico, is_encoder, with_output):
super(XLMModel, self).__init__(config)
super().__init__(config)
self.output_attentions = config.output_attentions
self.output_hidden_states = config.output_hidden_states
......@@ -585,7 +585,7 @@ class XLMPredLayer(nn.Module):
"""
def __init__(self, config):
super(XLMPredLayer, self).__init__()
super().__init__()
self.asm = config.asm
self.n_words = config.n_words
self.pad_index = config.pad_index
......@@ -661,7 +661,7 @@ class XLMWithLMHeadModel(XLMPreTrainedModel):
"""
def __init__(self, config):
super(XLMWithLMHeadModel, self).__init__(config)
super().__init__(config)
self.transformer = XLMModel(config)
self.pred_layer = XLMPredLayer(config)
......@@ -754,7 +754,7 @@ class XLMForSequenceClassification(XLMPreTrainedModel):
"""
def __init__(self, config):
super(XLMForSequenceClassification, self).__init__(config)
super().__init__(config)
self.num_labels = config.num_labels
self.transformer = XLMModel(config)
......@@ -856,7 +856,7 @@ class XLMForQuestionAnsweringSimple(XLMPreTrainedModel):
"""
def __init__(self, config):
super(XLMForQuestionAnsweringSimple, self).__init__(config)
super().__init__(config)
self.transformer = XLMModel(config)
self.qa_outputs = nn.Linear(config.hidden_size, config.num_labels)
......@@ -973,7 +973,7 @@ class XLMForQuestionAnswering(XLMPreTrainedModel):
"""
def __init__(self, config):
super(XLMForQuestionAnswering, self).__init__(config)
super().__init__(config)
self.transformer = XLMModel(config)
self.qa_outputs = SQuADHead(config)
......
......@@ -204,7 +204,7 @@ XLNetLayerNorm = nn.LayerNorm
class XLNetRelativeAttention(nn.Module):
def __init__(self, config):
super(XLNetRelativeAttention, self).__init__()
super().__init__()
self.output_attentions = config.output_attentions
if config.d_model % config.n_head != 0:
......@@ -414,7 +414,7 @@ class XLNetRelativeAttention(nn.Module):
class XLNetFeedForward(nn.Module):
def __init__(self, config):
super(XLNetFeedForward, self).__init__()
super().__init__()
self.layer_norm = XLNetLayerNorm(config.d_model, eps=config.layer_norm_eps)
self.layer_1 = nn.Linear(config.d_model, config.d_inner)
self.layer_2 = nn.Linear(config.d_inner, config.d_model)
......@@ -437,7 +437,7 @@ class XLNetFeedForward(nn.Module):
class XLNetLayer(nn.Module):
def __init__(self, config):
super(XLNetLayer, self).__init__()
super().__init__()
self.rel_attn = XLNetRelativeAttention(config)
self.ff = XLNetFeedForward(config)
self.dropout = nn.Dropout(config.dropout)
......@@ -631,7 +631,7 @@ class XLNetModel(XLNetPreTrainedModel):
"""
def __init__(self, config):
super(XLNetModel, self).__init__(config)
super().__init__(config)
self.output_attentions = config.output_attentions
self.output_hidden_states = config.output_hidden_states
self.output_past = config.output_past
......@@ -996,7 +996,7 @@ class XLNetLMHeadModel(XLNetPreTrainedModel):
"""
def __init__(self, config):
super(XLNetLMHeadModel, self).__init__(config)
super().__init__(config)
self.attn_type = config.attn_type
self.same_length = config.same_length
......@@ -1119,7 +1119,7 @@ class XLNetForSequenceClassification(XLNetPreTrainedModel):
"""
def __init__(self, config):
super(XLNetForSequenceClassification, self).__init__(config)
super().__init__(config)
self.num_labels = config.num_labels
self.transformer = XLNetModel(config)
......@@ -1234,7 +1234,7 @@ class XLNetForTokenClassification(XLNetPreTrainedModel):
"""
def __init__(self, config):
super(XLNetForTokenClassification, self).__init__(config)
super().__init__(config)
self.num_labels = config.num_labels
self.transformer = XLNetModel(config)
......@@ -1355,7 +1355,7 @@ class XLNetForMultipleChoice(XLNetPreTrainedModel):
"""
def __init__(self, config):
super(XLNetForMultipleChoice, self).__init__(config)
super().__init__(config)
self.transformer = XLNetModel(config)
self.sequence_summary = SequenceSummary(config)
......@@ -1463,7 +1463,7 @@ class XLNetForQuestionAnsweringSimple(XLNetPreTrainedModel):
"""
def __init__(self, config):
super(XLNetForQuestionAnsweringSimple, self).__init__(config)
super().__init__(config)
self.num_labels = config.num_labels
self.transformer = XLNetModel(config)
......@@ -1595,7 +1595,7 @@ class XLNetForQuestionAnswering(XLNetPreTrainedModel):
"""
def __init__(self, config):
super(XLNetForQuestionAnswering, self).__init__(config)
super().__init__(config)
self.start_n_top = config.start_n_top
self.end_n_top = config.end_n_top
......
......@@ -114,7 +114,7 @@ class AdamW(Optimizer):
if not 0.0 <= eps:
raise ValueError("Invalid epsilon value: {} - should be >= 0.0".format(eps))
defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay, correct_bias=correct_bias)
super(AdamW, self).__init__(params, defaults)
super().__init__(params, defaults)
def step(self, closure=None):
"""Performs a single optimization step.
......
......@@ -24,7 +24,7 @@ class WarmUp(tf.keras.optimizers.schedules.LearningRateSchedule):
"""Applys a warmup schedule on a given learning rate decay schedule."""
def __init__(self, initial_learning_rate, decay_schedule_fn, warmup_steps, power=1.0, name=None):
super(WarmUp, self).__init__()
super().__init__()
self.initial_learning_rate = initial_learning_rate
self.warmup_steps = warmup_steps
self.power = power
......@@ -102,7 +102,7 @@ class AdamWeightDecay(tf.keras.optimizers.Adam):
name="AdamWeightDecay",
**kwargs
):
super(AdamWeightDecay, self).__init__(learning_rate, beta_1, beta_2, epsilon, amsgrad, name, **kwargs)
super().__init__(learning_rate, beta_1, beta_2, epsilon, amsgrad, name, **kwargs)
self.weight_decay_rate = weight_decay_rate
self._include_in_weight_decay = include_in_weight_decay
self._exclude_from_weight_decay = exclude_from_weight_decay
......@@ -111,10 +111,10 @@ class AdamWeightDecay(tf.keras.optimizers.Adam):
def from_config(cls, config):
"""Creates an optimizer from its config with WarmUp custom object."""
custom_objects = {"WarmUp": WarmUp}
return super(AdamWeightDecay, cls).from_config(config, custom_objects=custom_objects)
return super().from_config(config, custom_objects=custom_objects)
def _prepare_local(self, var_device, var_dtype, apply_state):
super(AdamWeightDecay, self)._prepare_local(var_device, var_dtype, apply_state)
super()._prepare_local(var_device, var_dtype, apply_state)
apply_state["weight_decay_rate"] = tf.constant(self.weight_decay_rate, name="adam_weight_decay_rate")
def _decay_weights_op(self, var, learning_rate, apply_state):
......@@ -128,7 +128,7 @@ class AdamWeightDecay(tf.keras.optimizers.Adam):
def apply_gradients(self, grads_and_vars, clip_norm, name=None):
grads, tvars = list(zip(*grads_and_vars))
(grads, _) = tf.clip_by_global_norm(grads, clip_norm=clip_norm)
return super(AdamWeightDecay, self).apply_gradients(zip(grads, tvars))
return super().apply_gradients(zip(grads, tvars))
def _get_lr(self, var_device, var_dtype, apply_state):
"""Retrieves the learning rate with the given state."""
......@@ -147,16 +147,16 @@ class AdamWeightDecay(tf.keras.optimizers.Adam):
lr_t, kwargs = self._get_lr(var.device, var.dtype.base_dtype, apply_state)
decay = self._decay_weights_op(var, lr_t, apply_state)
with tf.control_dependencies([decay]):
return super(AdamWeightDecay, self)._resource_apply_dense(grad, var, **kwargs)
return super()._resource_apply_dense(grad, var, **kwargs)
def _resource_apply_sparse(self, grad, var, indices, apply_state=None):
lr_t, kwargs = self._get_lr(var.device, var.dtype.base_dtype, apply_state)
decay = self._decay_weights_op(var, lr_t, apply_state)
with tf.control_dependencies([decay]):
return super(AdamWeightDecay, self)._resource_apply_sparse(grad, var, indices, **kwargs)
return super()._resource_apply_sparse(grad, var, indices, **kwargs)
def get_config(self):
config = super(AdamWeightDecay, self).get_config()
config = super().get_config()
config.update({"weight_decay_rate": self.weight_decay_rate})
return config
......
......@@ -79,7 +79,7 @@ class AlbertTokenizer(PreTrainedTokenizer):
mask_token="[MASK]",
**kwargs
):
super(AlbertTokenizer, self).__init__(
super().__init__(
bos_token=bos_token,
eos_token=eos_token,
unk_token=unk_token,
......
......@@ -163,7 +163,7 @@ class BertTokenizer(PreTrainedTokenizer):
This should likely be deactivated for Japanese:
see: https://github.com/huggingface/pytorch-pretrained-BERT/issues/328
"""
super(BertTokenizer, self).__init__(
super().__init__(
unk_token=unk_token,
sep_token=sep_token,
pad_token=pad_token,
......@@ -554,7 +554,7 @@ class BertTokenizerFast(PreTrainedTokenizerFast):
add_special_tokens=True,
**kwargs
):
super(BertTokenizerFast, self).__init__(
super().__init__(
unk_token=unk_token,
sep_token=sep_token,
pad_token=pad_token,
......
......@@ -115,6 +115,7 @@ class BertJapaneseTokenizer(BertTokenizer):
mask_token=mask_token,
**kwargs,
)
# ^^ We call the grandparent's init, not the parent's.
self.max_len_single_sentence = self.max_len - 2 # take into account special tokens
self.max_len_sentences_pair = self.max_len - 3 # take into account special tokens
......
......@@ -66,7 +66,7 @@ class CamembertTokenizer(PreTrainedTokenizer):
additional_special_tokens=["<s>NOTUSED", "</s>NOTUSED"],
**kwargs
):
super(CamembertTokenizer, self).__init__(
super().__init__(
max_len=512,
bos_token=bos_token,
eos_token=eos_token,
......
......@@ -126,7 +126,7 @@ class CTRLTokenizer(PreTrainedTokenizer):
control_codes = CONTROL_CODES
def __init__(self, vocab_file, merges_file, unk_token="<unk>", **kwargs):
super(CTRLTokenizer, self).__init__(unk_token=unk_token, **kwargs)
super().__init__(unk_token=unk_token, **kwargs)
self.max_len_single_sentence = (
self.max_len
) # no default special tokens - you can update this value if you add special tokens
......
......@@ -122,7 +122,7 @@ class GPT2Tokenizer(PreTrainedTokenizer):
eos_token="<|endoftext|>",
**kwargs
):
super(GPT2Tokenizer, self).__init__(bos_token=bos_token, eos_token=eos_token, unk_token=unk_token, **kwargs)
super().__init__(bos_token=bos_token, eos_token=eos_token, unk_token=unk_token, **kwargs)
self.max_len_single_sentence = (
self.max_len
) # no default special tokens - you can update this value if you add special tokens
......@@ -268,9 +268,7 @@ class GPT2TokenizerFast(PreTrainedTokenizerFast):
truncation_strategy="longest_first",
**kwargs
):
super(GPT2TokenizerFast, self).__init__(
bos_token=bos_token, eos_token=eos_token, unk_token=unk_token, **kwargs
)
super().__init__(bos_token=bos_token, eos_token=eos_token, unk_token=unk_token, **kwargs)
self._tokenizer = tk.Tokenizer(tk.models.BPE.from_files(vocab_file, merges_file))
self._update_special_tokens()
......
......@@ -82,7 +82,7 @@ class OpenAIGPTTokenizer(PreTrainedTokenizer):
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
def __init__(self, vocab_file, merges_file, unk_token="<unk>", **kwargs):
super(OpenAIGPTTokenizer, self).__init__(unk_token=unk_token, **kwargs)
super().__init__(unk_token=unk_token, **kwargs)
self.max_len_single_sentence = (
self.max_len
......
......@@ -84,7 +84,7 @@ class RobertaTokenizer(GPT2Tokenizer):
mask_token="<mask>",
**kwargs
):
super(RobertaTokenizer, self).__init__(
super().__init__(
vocab_file=vocab_file,
merges_file=merges_file,
errors=errors,
......
......@@ -91,7 +91,7 @@ class T5Tokenizer(PreTrainedTokenizer):
additional_special_tokens = []
additional_special_tokens.extend(["<extra_id_{}>".format(i) for i in range(extra_ids)])
super(T5Tokenizer, self).__init__(
super().__init__(
eos_token=eos_token,
unk_token=unk_token,
pad_token=pad_token,
......
......@@ -78,7 +78,7 @@ class TransfoXLTokenizer(PreTrainedTokenizer):
additional_special_tokens=["<formula>"],
**kwargs
):
super(TransfoXLTokenizer, self).__init__(
super().__init__(
unk_token=unk_token, eos_token=eos_token, additional_special_tokens=additional_special_tokens, **kwargs
)
......
......@@ -1425,7 +1425,7 @@ class PreTrainedTokenizerFast(PreTrainedTokenizer):
_decoder = None
def __init__(self, **kwargs):
super(PreTrainedTokenizerFast, self).__init__(**kwargs)
super().__init__(**kwargs)
@property
def tokenizer(self):
......
......@@ -578,7 +578,7 @@ class XLMTokenizer(PreTrainedTokenizer):
do_lowercase_and_remove_accent=True,
**kwargs
):
super(XLMTokenizer, self).__init__(
super().__init__(
unk_token=unk_token,
bos_token=bos_token,
sep_token=sep_token,
......
......@@ -75,7 +75,7 @@ class XLMRobertaTokenizer(PreTrainedTokenizer):
mask_token="<mask>",
**kwargs
):
super(XLMRobertaTokenizer, self).__init__(
super().__init__(
bos_token=bos_token,
eos_token=eos_token,
unk_token=unk_token,
......
......@@ -77,7 +77,7 @@ class XLNetTokenizer(PreTrainedTokenizer):
additional_special_tokens=["<eop>", "<eod>"],
**kwargs
):
super(XLNetTokenizer, self).__init__(
super().__init__(
bos_token=bos_token,
eos_token=eos_token,
unk_token=unk_token,
......
......@@ -80,7 +80,7 @@ class XxxConfig(PretrainedConfig):
summary_first_dropout=0.1,
**kwargs
):
super(XxxConfig, self).__init__(**kwargs)
super().__init__(**kwargs)
self.vocab_size = vocab_size
self.n_ctx = n_ctx
self.n_positions = n_positions
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment