"git@developer.sourcefind.cn:dadigang/Ventoy.git" did not exist on "b1d52370413fe35672ba7aa6f8390de0d7eb1c50"
Commit a75c64d8 authored by Lysandre's avatar Lysandre
Browse files

Black 20 release

parent e78c1103
...@@ -83,7 +83,7 @@ def create_sinusoidal_embeddings(n_pos, dim, out): ...@@ -83,7 +83,7 @@ def create_sinusoidal_embeddings(n_pos, dim, out):
def gelu(x): def gelu(x):
""" Gaussian Error Linear Unit. """Gaussian Error Linear Unit.
Original Implementation of the gelu activation function in Google Bert repo when initially created. Original Implementation of the gelu activation function in Google Bert repo when initially created.
For information: OpenAI GPT's gelu is slightly different (and gives slightly different results): For information: OpenAI GPT's gelu is slightly different (and gives slightly different results):
0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3)))) 0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3))))
...@@ -333,9 +333,9 @@ class TFXLMMainLayer(tf.keras.layers.Layer): ...@@ -333,9 +333,9 @@ class TFXLMMainLayer(tf.keras.layers.Layer):
raise NotImplementedError raise NotImplementedError
def _prune_heads(self, heads_to_prune): def _prune_heads(self, heads_to_prune):
""" Prunes heads of the model. """Prunes heads of the model.
heads_to_prune: dict of {layer_num: list of heads to prune in this layer} heads_to_prune: dict of {layer_num: list of heads to prune in this layer}
See base class PreTrainedModel See base class PreTrainedModel
""" """
raise NotImplementedError raise NotImplementedError
...@@ -516,8 +516,8 @@ class TFXLMMainLayer(tf.keras.layers.Layer): ...@@ -516,8 +516,8 @@ class TFXLMMainLayer(tf.keras.layers.Layer):
class TFXLMPreTrainedModel(TFPreTrainedModel): class TFXLMPreTrainedModel(TFPreTrainedModel):
""" An abstract class to handle weights initialization and """An abstract class to handle weights initialization and
a simple interface for downloading and loading pretrained models. a simple interface for downloading and loading pretrained models.
""" """
config_class = XLMConfig config_class = XLMConfig
...@@ -858,7 +858,7 @@ class TFXLMForMultipleChoice(TFXLMPreTrainedModel, TFMultipleChoiceLoss): ...@@ -858,7 +858,7 @@ class TFXLMForMultipleChoice(TFXLMPreTrainedModel, TFMultipleChoiceLoss):
@property @property
def dummy_inputs(self): def dummy_inputs(self):
""" Dummy inputs to build the network. """Dummy inputs to build the network.
Returns: Returns:
tf.Tensor with dummy inputs tf.Tensor with dummy inputs
......
...@@ -77,7 +77,8 @@ class TFXLMRobertaModel(TFRobertaModel): ...@@ -77,7 +77,8 @@ class TFXLMRobertaModel(TFRobertaModel):
@add_start_docstrings( @add_start_docstrings(
"""XLM-RoBERTa Model with a `language modeling` head on top. """, XLM_ROBERTA_START_DOCSTRING, """XLM-RoBERTa Model with a `language modeling` head on top. """,
XLM_ROBERTA_START_DOCSTRING,
) )
class TFXLMRobertaForMaskedLM(TFRobertaForMaskedLM): class TFXLMRobertaForMaskedLM(TFRobertaForMaskedLM):
""" """
......
...@@ -62,9 +62,9 @@ TF_XLNET_PRETRAINED_MODEL_ARCHIVE_LIST = [ ...@@ -62,9 +62,9 @@ TF_XLNET_PRETRAINED_MODEL_ARCHIVE_LIST = [
def gelu(x): def gelu(x):
""" Implementation of the gelu activation function. """Implementation of the gelu activation function.
XLNet is using OpenAI GPT's gelu XLNet is using OpenAI GPT's gelu
Also see https://arxiv.org/abs/1606.08415 Also see https://arxiv.org/abs/1606.08415
""" """
cdf = 0.5 * (1.0 + tf.tanh((np.sqrt(2 / np.pi) * (x + 0.044715 * tf.pow(x, 3))))) cdf = 0.5 * (1.0 + tf.tanh((np.sqrt(2 / np.pi) * (x + 0.044715 * tf.pow(x, 3)))))
return x * cdf return x * cdf
...@@ -807,8 +807,8 @@ class TFXLNetMainLayer(tf.keras.layers.Layer): ...@@ -807,8 +807,8 @@ class TFXLNetMainLayer(tf.keras.layers.Layer):
class TFXLNetPreTrainedModel(TFPreTrainedModel): class TFXLNetPreTrainedModel(TFPreTrainedModel):
""" An abstract class to handle weights initialization and """An abstract class to handle weights initialization and
a simple interface for downloading and loading pretrained models. a simple interface for downloading and loading pretrained models.
""" """
config_class = XLNetConfig config_class = XLNetConfig
...@@ -1213,33 +1213,33 @@ class TFXLNetLMHeadModel(TFXLNetPreTrainedModel, TFCausalLanguageModelingLoss): ...@@ -1213,33 +1213,33 @@ class TFXLNetLMHeadModel(TFXLNetPreTrainedModel, TFCausalLanguageModelingLoss):
training=False, training=False,
): ):
r""" r"""
labels (:obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length)`, `optional`, defaults to :obj:`None`): labels (:obj:`tf.Tensor` of shape :obj:`(batch_size, sequence_length)`, `optional`, defaults to :obj:`None`):
Labels for computing the cross entropy classification loss. Labels for computing the cross entropy classification loss.
Indices should be in ``[0, ..., config.vocab_size - 1]``. Indices should be in ``[0, ..., config.vocab_size - 1]``.
Return: Return:
Examples:: Examples::
import tensorflow as tf import tensorflow as tf
import numpy as np import numpy as np
from transformers import XLNetTokenizer, TFXLNetLMHeadModel from transformers import XLNetTokenizer, TFXLNetLMHeadModel
tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased') tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
model = TFXLNetLMHeadModel.from_pretrained('xlnet-large-cased') model = TFXLNetLMHeadModel.from_pretrained('xlnet-large-cased')
# We show how to setup inputs to predict a next token using a bi-directional context. # We show how to setup inputs to predict a next token using a bi-directional context.
input_ids = tf.constant(tokenizer.encode("Hello, my dog is very <mask>", add_special_tokens=True))[None, :] # We will predict the masked token input_ids = tf.constant(tokenizer.encode("Hello, my dog is very <mask>", add_special_tokens=True))[None, :] # We will predict the masked token
perm_mask = np.zeros((1, input_ids.shape[1], input_ids.shape[1])) perm_mask = np.zeros((1, input_ids.shape[1], input_ids.shape[1]))
perm_mask[:, :, -1] = 1.0 # Previous tokens don't see last token perm_mask[:, :, -1] = 1.0 # Previous tokens don't see last token
target_mapping = np.zeros((1, 1, input_ids.shape[1])) # Shape [1, 1, seq_length] => let's predict one token target_mapping = np.zeros((1, 1, input_ids.shape[1])) # Shape [1, 1, seq_length] => let's predict one token
target_mapping[0, 0, -1] = 1.0 # Our first (and only) prediction will be the last token of the sequence (the masked token) target_mapping[0, 0, -1] = 1.0 # Our first (and only) prediction will be the last token of the sequence (the masked token)
outputs = model(input_ids, perm_mask=tf.constant(perm_mask, dtype=tf.float32), target_mapping=tf.constant(target_mapping, dtype=tf.float32)) outputs = model(input_ids, perm_mask=tf.constant(perm_mask, dtype=tf.float32), target_mapping=tf.constant(target_mapping, dtype=tf.float32))
next_token_logits = outputs[0] # Output has shape [target_mapping.size(0), target_mapping.size(1), config.vocab_size] next_token_logits = outputs[0] # Output has shape [target_mapping.size(0), target_mapping.size(1), config.vocab_size]
""" """
return_dict = return_dict if return_dict is not None else self.transformer.return_dict return_dict = return_dict if return_dict is not None else self.transformer.return_dict
...@@ -1401,7 +1401,7 @@ class TFXLNetForMultipleChoice(TFXLNetPreTrainedModel, TFMultipleChoiceLoss): ...@@ -1401,7 +1401,7 @@ class TFXLNetForMultipleChoice(TFXLNetPreTrainedModel, TFMultipleChoiceLoss):
@property @property
def dummy_inputs(self): def dummy_inputs(self):
""" Dummy inputs to build the network. """Dummy inputs to build the network.
Returns: Returns:
tf.Tensor with dummy inputs tf.Tensor with dummy inputs
......
...@@ -45,8 +45,8 @@ TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_LIST = [ ...@@ -45,8 +45,8 @@ TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_LIST = [
def build_tf_to_pytorch_map(model, config): def build_tf_to_pytorch_map(model, config):
""" A map of modules from TF to PyTorch. """A map of modules from TF to PyTorch.
This time I use a map to keep the PyTorch model as identical to the original PyTorch model as possible. This time I use a map to keep the PyTorch model as identical to the original PyTorch model as possible.
""" """
tf_to_pt_map = {} tf_to_pt_map = {}
...@@ -112,8 +112,7 @@ def build_tf_to_pytorch_map(model, config): ...@@ -112,8 +112,7 @@ def build_tf_to_pytorch_map(model, config):
def load_tf_weights_in_transfo_xl(model, config, tf_path): def load_tf_weights_in_transfo_xl(model, config, tf_path):
""" Load tf checkpoints in a pytorch model """Load tf checkpoints in a pytorch model"""
"""
try: try:
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
...@@ -386,7 +385,12 @@ class RelPartialLearnableDecoderLayer(nn.Module): ...@@ -386,7 +385,12 @@ class RelPartialLearnableDecoderLayer(nn.Module):
def forward(self, dec_inp, r, dec_attn_mask=None, mems=None, head_mask=None, output_attentions=False): def forward(self, dec_inp, r, dec_attn_mask=None, mems=None, head_mask=None, output_attentions=False):
attn_outputs = self.dec_attn( attn_outputs = self.dec_attn(
dec_inp, r, attn_mask=dec_attn_mask, mems=mems, head_mask=head_mask, output_attentions=output_attentions, dec_inp,
r,
attn_mask=dec_attn_mask,
mems=mems,
head_mask=head_mask,
output_attentions=output_attentions,
) )
ff_output = self.pos_ff(attn_outputs[0]) ff_output = self.pos_ff(attn_outputs[0])
...@@ -456,8 +460,8 @@ class AdaptiveEmbedding(nn.Module): ...@@ -456,8 +460,8 @@ class AdaptiveEmbedding(nn.Module):
class TransfoXLPreTrainedModel(PreTrainedModel): class TransfoXLPreTrainedModel(PreTrainedModel):
""" An abstract class to handle weights initialization and """An abstract class to handle weights initialization and
a simple interface for downloading and loading pretrained models. a simple interface for downloading and loading pretrained models.
""" """
config_class = TransfoXLConfig config_class = TransfoXLConfig
...@@ -474,8 +478,7 @@ class TransfoXLPreTrainedModel(PreTrainedModel): ...@@ -474,8 +478,7 @@ class TransfoXLPreTrainedModel(PreTrainedModel):
nn.init.constant_(bias, 0.0) nn.init.constant_(bias, 0.0)
def _init_weights(self, m): def _init_weights(self, m):
""" Initialize the weights. """Initialize the weights."""
"""
classname = m.__class__.__name__ classname = m.__class__.__name__
if classname.find("Linear") != -1: if classname.find("Linear") != -1:
if hasattr(m, "weight") and m.weight is not None: if hasattr(m, "weight") and m.weight is not None:
...@@ -515,7 +518,7 @@ class TransfoXLPreTrainedModel(PreTrainedModel): ...@@ -515,7 +518,7 @@ class TransfoXLPreTrainedModel(PreTrainedModel):
self._init_bias(m.r_bias) self._init_bias(m.r_bias)
def resize_token_embeddings(self, new_num_tokens: Optional[int] = None, layer: Optional[int] = -1): def resize_token_embeddings(self, new_num_tokens: Optional[int] = None, layer: Optional[int] = -1):
""" Resize input token embeddings matrix of the model if new_num_tokens != config.vocab_size. """Resize input token embeddings matrix of the model if new_num_tokens != config.vocab_size.
Take care of tying weights embeddings afterwards if the model class has a `tie_weights()` method. Take care of tying weights embeddings afterwards if the model class has a `tie_weights()` method.
Arguments: Arguments:
...@@ -948,7 +951,10 @@ class TransfoXLModel(TransfoXLPreTrainedModel): ...@@ -948,7 +951,10 @@ class TransfoXLModel(TransfoXLPreTrainedModel):
return tuple(v for v in [core_out, new_mems, hids, attentions] if v is not None) return tuple(v for v in [core_out, new_mems, hids, attentions] if v is not None)
return TransfoXLModelOutput( return TransfoXLModelOutput(
last_hidden_state=core_out, mems=new_mems, hidden_states=hids, attentions=attentions, last_hidden_state=core_out,
mems=new_mems,
hidden_states=hids,
attentions=attentions,
) )
...@@ -1064,8 +1070,7 @@ class TransfoXLLMHeadModel(TransfoXLPreTrainedModel): ...@@ -1064,8 +1070,7 @@ class TransfoXLLMHeadModel(TransfoXLPreTrainedModel):
) )
def get_output_embeddings(self): def get_output_embeddings(self):
""" Double-check if you are using adaptive softmax. """Double-check if you are using adaptive softmax."""
"""
if self.sample_softmax > 0: if self.sample_softmax > 0:
return self.out_layer return self.out_layer
else: else:
......
...@@ -85,17 +85,17 @@ class ProjectedAdaptiveLogSoftmax(nn.Module): ...@@ -85,17 +85,17 @@ class ProjectedAdaptiveLogSoftmax(nn.Module):
def forward(self, hidden, labels=None, keep_order=False): def forward(self, hidden, labels=None, keep_order=False):
""" """
Params: Params:
hidden :: [len*bsz x d_proj] hidden :: [len*bsz x d_proj]
labels :: [len*bsz] labels :: [len*bsz]
Return: Return:
if labels is None: if labels is None:
out :: [len*bsz x n_tokens] log probabilities of tokens over the vocabulary out :: [len*bsz x n_tokens] log probabilities of tokens over the vocabulary
else: else:
out :: [(len-1)*bsz] Negative log likelihood out :: [(len-1)*bsz] Negative log likelihood
We could replace this implementation by the native PyTorch one We could replace this implementation by the native PyTorch one
if their's had an option to set bias on all clusters in the native one. if their's had an option to set bias on all clusters in the native one.
here: https://github.com/pytorch/pytorch/blob/dbe6a7a9ff1a364a8706bf5df58a1ca96d2fd9da/torch/nn/modules/adaptive.py#L138 here: https://github.com/pytorch/pytorch/blob/dbe6a7a9ff1a364a8706bf5df58a1ca96d2fd9da/torch/nn/modules/adaptive.py#L138
""" """
if labels is not None: if labels is not None:
...@@ -191,7 +191,7 @@ class ProjectedAdaptiveLogSoftmax(nn.Module): ...@@ -191,7 +191,7 @@ class ProjectedAdaptiveLogSoftmax(nn.Module):
return out return out
def log_prob(self, hidden): def log_prob(self, hidden):
r""" Computes log probabilities for all :math:`n\_classes` r"""Computes log probabilities for all :math:`n\_classes`
From: https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/adaptive.py From: https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/adaptive.py
Args: Args:
hidden (Tensor): a minibatch of examples hidden (Tensor): a minibatch of examples
......
...@@ -51,8 +51,7 @@ try: ...@@ -51,8 +51,7 @@ try:
except ImportError: except ImportError:
# Older PyTorch compatibility # Older PyTorch compatibility
class Identity(nn.Module): class Identity(nn.Module):
r"""A placeholder identity operator that is argument-insensitive. r"""A placeholder identity operator that is argument-insensitive."""
"""
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super().__init__() super().__init__()
...@@ -488,8 +487,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin): ...@@ -488,8 +487,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin):
) )
def _tie_or_clone_weights(self, output_embeddings, input_embeddings): def _tie_or_clone_weights(self, output_embeddings, input_embeddings):
""" Tie or clone module weights depending of whether we are using TorchScript or not """Tie or clone module weights depending of whether we are using TorchScript or not"""
"""
if self.config.torchscript: if self.config.torchscript:
output_embeddings.weight = nn.Parameter(input_embeddings.weight.clone()) output_embeddings.weight = nn.Parameter(input_embeddings.weight.clone())
else: else:
...@@ -498,7 +496,10 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin): ...@@ -498,7 +496,10 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin):
if getattr(output_embeddings, "bias", None) is not None: if getattr(output_embeddings, "bias", None) is not None:
output_embeddings.bias.data = torch.nn.functional.pad( output_embeddings.bias.data = torch.nn.functional.pad(
output_embeddings.bias.data, output_embeddings.bias.data,
(0, output_embeddings.weight.shape[0] - output_embeddings.bias.shape[0],), (
0,
output_embeddings.weight.shape[0] - output_embeddings.bias.shape[0],
),
"constant", "constant",
0, 0,
) )
...@@ -906,7 +907,13 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin): ...@@ -906,7 +907,13 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin):
def load(module: nn.Module, prefix=""): def load(module: nn.Module, prefix=""):
local_metadata = {} if metadata is None else metadata.get(prefix[:-1], {}) local_metadata = {} if metadata is None else metadata.get(prefix[:-1], {})
module._load_from_state_dict( module._load_from_state_dict(
state_dict, prefix, local_metadata, True, missing_keys, unexpected_keys, error_msgs, state_dict,
prefix,
local_metadata,
True,
missing_keys,
unexpected_keys,
error_msgs,
) )
for name, child in module._modules.items(): for name, child in module._modules.items():
if child is not None: if child is not None:
...@@ -1242,24 +1249,24 @@ class SQuADHead(nn.Module): ...@@ -1242,24 +1249,24 @@ class SQuADHead(nn.Module):
return_dict: bool = False, return_dict: bool = False,
) -> Union[SquadHeadOutput, Tuple[torch.FloatTensor]]: ) -> Union[SquadHeadOutput, Tuple[torch.FloatTensor]]:
""" """
Args: Args:
hidden_states (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, seq_len, hidden_size)`): hidden_states (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, seq_len, hidden_size)`):
Final hidden states of the model on the sequence tokens. Final hidden states of the model on the sequence tokens.
start_positions (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`): start_positions (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`):
Positions of the first token for the labeled span. Positions of the first token for the labeled span.
end_positions (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`): end_positions (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`):
Positions of the last token for the labeled span. Positions of the last token for the labeled span.
cls_index (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`): cls_index (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`):
Position of the CLS token for each sentence in the batch. If :obj:`None`, takes the last token. Position of the CLS token for each sentence in the batch. If :obj:`None`, takes the last token.
is_impossible (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`): is_impossible (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`):
Whether the question has a possible answer in the paragraph or not. Whether the question has a possible answer in the paragraph or not.
p_mask (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, seq_len)`, `optional`): p_mask (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, seq_len)`, `optional`):
Mask for tokens at invalid position, such as query and special symbols (PAD, SEP, CLS). Mask for tokens at invalid position, such as query and special symbols (PAD, SEP, CLS).
1.0 means token should be masked. 1.0 means token should be masked.
return_dict (:obj:`bool`, `optional`, defaults to :obj:`False`): return_dict (:obj:`bool`, `optional`, defaults to :obj:`False`):
Whether or not to return a :class:`~transformers.file_utils.ModelOuput` instead of a plain tuple. Whether or not to return a :class:`~transformers.file_utils.ModelOuput` instead of a plain tuple.
Returns: Returns:
""" """
start_logits = self.start_logits(hidden_states, p_mask=p_mask) start_logits = self.start_logits(hidden_states, p_mask=p_mask)
...@@ -1375,7 +1382,7 @@ class SequenceSummary(nn.Module): ...@@ -1375,7 +1382,7 @@ class SequenceSummary(nn.Module):
self.summary = nn.Linear(config.hidden_size, num_classes) self.summary = nn.Linear(config.hidden_size, num_classes)
activation_string = getattr(config, "summary_activation", None) activation_string = getattr(config, "summary_activation", None)
self.activation: Callable = (get_activation(activation_string) if activation_string else Identity()) self.activation: Callable = get_activation(activation_string) if activation_string else Identity()
self.first_dropout = Identity() self.first_dropout = Identity()
if hasattr(config, "summary_first_dropout") and config.summary_first_dropout > 0: if hasattr(config, "summary_first_dropout") and config.summary_first_dropout > 0:
...@@ -1409,7 +1416,11 @@ class SequenceSummary(nn.Module): ...@@ -1409,7 +1416,11 @@ class SequenceSummary(nn.Module):
output = hidden_states.mean(dim=1) output = hidden_states.mean(dim=1)
elif self.summary_type == "cls_index": elif self.summary_type == "cls_index":
if cls_index is None: if cls_index is None:
cls_index = torch.full_like(hidden_states[..., :1, :], hidden_states.shape[-2] - 1, dtype=torch.long,) cls_index = torch.full_like(
hidden_states[..., :1, :],
hidden_states.shape[-2] - 1,
dtype=torch.long,
)
else: else:
cls_index = cls_index.unsqueeze(-1).unsqueeze(-1) cls_index = cls_index.unsqueeze(-1).unsqueeze(-1)
cls_index = cls_index.expand((-1,) * (cls_index.dim() - 1) + (hidden_states.size(-1),)) cls_index = cls_index.expand((-1,) * (cls_index.dim() - 1) + (hidden_states.size(-1),))
......
...@@ -228,8 +228,8 @@ class TransformerFFN(nn.Module): ...@@ -228,8 +228,8 @@ class TransformerFFN(nn.Module):
class XLMPreTrainedModel(PreTrainedModel): class XLMPreTrainedModel(PreTrainedModel):
""" An abstract class to handle weights initialization and """An abstract class to handle weights initialization and
a simple interface for downloading and loading pretrained models. a simple interface for downloading and loading pretrained models.
""" """
config_class = XLMConfig config_class = XLMConfig
...@@ -462,9 +462,9 @@ class XLMModel(XLMPreTrainedModel): ...@@ -462,9 +462,9 @@ class XLMModel(XLMPreTrainedModel):
self.embeddings = new_embeddings self.embeddings = new_embeddings
def _prune_heads(self, heads_to_prune): def _prune_heads(self, heads_to_prune):
""" Prunes heads of the model. """Prunes heads of the model.
heads_to_prune: dict of {layer_num: list of heads to prune in this layer} heads_to_prune: dict of {layer_num: list of heads to prune in this layer}
See base class PreTrainedModel See base class PreTrainedModel
""" """
for layer, heads in heads_to_prune.items(): for layer, heads in heads_to_prune.items():
self.attentions[layer].prune_heads(heads) self.attentions[layer].prune_heads(heads)
...@@ -572,7 +572,11 @@ class XLMModel(XLMPreTrainedModel): ...@@ -572,7 +572,11 @@ class XLMModel(XLMPreTrainedModel):
# self attention # self attention
attn_outputs = self.attentions[i]( attn_outputs = self.attentions[i](
tensor, attn_mask, cache=cache, head_mask=head_mask[i], output_attentions=output_attentions, tensor,
attn_mask,
cache=cache,
head_mask=head_mask[i],
output_attentions=output_attentions,
) )
attn = attn_outputs[0] attn = attn_outputs[0]
if output_attentions: if output_attentions:
...@@ -633,8 +637,7 @@ class XLMPredLayer(nn.Module): ...@@ -633,8 +637,7 @@ class XLMPredLayer(nn.Module):
) )
def forward(self, x, y=None): def forward(self, x, y=None):
""" Compute the loss, and optionally the scores. """Compute the loss, and optionally the scores."""
"""
outputs = () outputs = ()
if self.asm is False: if self.asm is False:
scores = self.proj(x) scores = self.proj(x)
...@@ -969,38 +972,38 @@ class XLMForQuestionAnswering(XLMPreTrainedModel): ...@@ -969,38 +972,38 @@ class XLMForQuestionAnswering(XLMPreTrainedModel):
return_dict=None, return_dict=None,
): ):
r""" r"""
start_positions (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`, defaults to :obj:`None`): start_positions (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`, defaults to :obj:`None`):
Labels for position (index) of the start of the labelled span for computing the token classification loss. Labels for position (index) of the start of the labelled span for computing the token classification loss.
Positions are clamped to the length of the sequence (`sequence_length`). Positions are clamped to the length of the sequence (`sequence_length`).
Position outside of the sequence are not taken into account for computing the loss. Position outside of the sequence are not taken into account for computing the loss.
end_positions (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`, defaults to :obj:`None`): end_positions (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`, defaults to :obj:`None`):
Labels for position (index) of the end of the labelled span for computing the token classification loss. Labels for position (index) of the end of the labelled span for computing the token classification loss.
Positions are clamped to the length of the sequence (`sequence_length`). Positions are clamped to the length of the sequence (`sequence_length`).
Position outside of the sequence are not taken into account for computing the loss. Position outside of the sequence are not taken into account for computing the loss.
is_impossible (``torch.LongTensor`` of shape ``(batch_size,)``, `optional`, defaults to :obj:`None`): is_impossible (``torch.LongTensor`` of shape ``(batch_size,)``, `optional`, defaults to :obj:`None`):
Labels whether a question has an answer or no answer (SQuAD 2.0) Labels whether a question has an answer or no answer (SQuAD 2.0)
cls_index (``torch.LongTensor`` of shape ``(batch_size,)``, `optional`, defaults to :obj:`None`): cls_index (``torch.LongTensor`` of shape ``(batch_size,)``, `optional`, defaults to :obj:`None`):
Labels for position (index) of the classification token to use as input for computing plausibility of the answer. Labels for position (index) of the classification token to use as input for computing plausibility of the answer.
p_mask (``torch.FloatTensor`` of shape ``(batch_size, sequence_length)``, `optional`, defaults to :obj:`None`): p_mask (``torch.FloatTensor`` of shape ``(batch_size, sequence_length)``, `optional`, defaults to :obj:`None`):
Optional mask of tokens which can't be in answers (e.g. [CLS], [PAD], ...). Optional mask of tokens which can't be in answers (e.g. [CLS], [PAD], ...).
1.0 means token should be masked. 0.0 mean token is not masked. 1.0 means token should be masked. 0.0 mean token is not masked.
Returns: Returns:
Example:: Example::
>>> from transformers import XLMTokenizer, XLMForQuestionAnswering >>> from transformers import XLMTokenizer, XLMForQuestionAnswering
>>> import torch >>> import torch
>>> tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048') >>> tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
>>> model = XLMForQuestionAnswering.from_pretrained('xlm-mlm-en-2048', return_dict=True) >>> model = XLMForQuestionAnswering.from_pretrained('xlm-mlm-en-2048', return_dict=True)
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
>>> start_positions = torch.tensor([1]) >>> start_positions = torch.tensor([1])
>>> end_positions = torch.tensor([3]) >>> end_positions = torch.tensor([3])
>>> outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions) >>> outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
>>> loss = outputs.loss >>> loss = outputs.loss
""" """
return_dict = return_dict if return_dict is not None else self.config.use_return_dict return_dict = return_dict if return_dict is not None else self.config.use_return_dict
...@@ -1131,7 +1134,10 @@ class XLMForTokenClassification(XLMPreTrainedModel): ...@@ -1131,7 +1134,10 @@ class XLMForTokenClassification(XLMPreTrainedModel):
return ((loss,) + output) if loss is not None else output return ((loss,) + output) if loss is not None else output
return TokenClassifierOutput( return TokenClassifierOutput(
loss=loss, logits=logits, hidden_states=outputs.hidden_states, attentions=outputs.attentions, loss=loss,
logits=logits,
hidden_states=outputs.hidden_states,
attentions=outputs.attentions,
) )
......
...@@ -68,7 +68,8 @@ class XLMRobertaModel(RobertaModel): ...@@ -68,7 +68,8 @@ class XLMRobertaModel(RobertaModel):
@add_start_docstrings( @add_start_docstrings(
"""XLM-RoBERTa Model with a `language modeling` head on top. """, XLM_ROBERTA_START_DOCSTRING, """XLM-RoBERTa Model with a `language modeling` head on top. """,
XLM_ROBERTA_START_DOCSTRING,
) )
class XLMRobertaForMaskedLM(RobertaForMaskedLM): class XLMRobertaForMaskedLM(RobertaForMaskedLM):
""" """
......
...@@ -58,9 +58,9 @@ XLNET_PRETRAINED_MODEL_ARCHIVE_LIST = [ ...@@ -58,9 +58,9 @@ XLNET_PRETRAINED_MODEL_ARCHIVE_LIST = [
def build_tf_xlnet_to_pytorch_map(model, config, tf_weights=None): def build_tf_xlnet_to_pytorch_map(model, config, tf_weights=None):
""" A map of modules from TF to PyTorch. """A map of modules from TF to PyTorch.
I use a map to keep the PyTorch model as I use a map to keep the PyTorch model as
identical to the original PyTorch model as possible. identical to the original PyTorch model as possible.
""" """
tf_to_pt_map = {} tf_to_pt_map = {}
...@@ -141,8 +141,7 @@ def build_tf_xlnet_to_pytorch_map(model, config, tf_weights=None): ...@@ -141,8 +141,7 @@ def build_tf_xlnet_to_pytorch_map(model, config, tf_weights=None):
def load_tf_weights_in_xlnet(model, config, tf_path): def load_tf_weights_in_xlnet(model, config, tf_path):
""" Load tf checkpoints in a pytorch model """Load tf checkpoints in a pytorch model"""
"""
try: try:
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
...@@ -548,8 +547,8 @@ class XLNetLayer(nn.Module): ...@@ -548,8 +547,8 @@ class XLNetLayer(nn.Module):
class XLNetPreTrainedModel(PreTrainedModel): class XLNetPreTrainedModel(PreTrainedModel):
""" An abstract class to handle weights initialization and """An abstract class to handle weights initialization and
a simple interface for downloading and loading pretrained models. a simple interface for downloading and loading pretrained models.
""" """
config_class = XLNetConfig config_class = XLNetConfig
...@@ -557,8 +556,7 @@ class XLNetPreTrainedModel(PreTrainedModel): ...@@ -557,8 +556,7 @@ class XLNetPreTrainedModel(PreTrainedModel):
base_model_prefix = "transformer" base_model_prefix = "transformer"
def _init_weights(self, module): def _init_weights(self, module):
""" Initialize the weights. """Initialize the weights."""
"""
if isinstance(module, (nn.Linear, nn.Embedding)): if isinstance(module, (nn.Linear, nn.Embedding)):
# Slightly different from the TF version which uses truncated_normal for initialization # Slightly different from the TF version which uses truncated_normal for initialization
# cf https://github.com/pytorch/pytorch/pull/5617 # cf https://github.com/pytorch/pytorch/pull/5617
...@@ -1350,46 +1348,46 @@ class XLNetLMHeadModel(XLNetPreTrainedModel): ...@@ -1350,46 +1348,46 @@ class XLNetLMHeadModel(XLNetPreTrainedModel):
return_dict=None, return_dict=None,
): ):
r""" r"""
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, num_predict)`, `optional`, defaults to :obj:`None`): labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, num_predict)`, `optional`, defaults to :obj:`None`):
Labels for masked language modeling. Labels for masked language modeling.
`num_predict` corresponds to `target_mapping.shape[1]`. If `target_mapping` is `None`, then `num_predict` corresponds to `sequence_length`. `num_predict` corresponds to `target_mapping.shape[1]`. If `target_mapping` is `None`, then `num_predict` corresponds to `sequence_length`.
The labels should correspond to the masked input words that should be predicted and depends on `target_mapping`. Note in order to perform standard auto-regressive language modeling a `<mask>` token has to be added to the `input_ids` (see `prepare_inputs_for_generation` fn and examples below) The labels should correspond to the masked input words that should be predicted and depends on `target_mapping`. Note in order to perform standard auto-regressive language modeling a `<mask>` token has to be added to the `input_ids` (see `prepare_inputs_for_generation` fn and examples below)
Indices are selected in ``[-100, 0, ..., config.vocab_size]`` Indices are selected in ``[-100, 0, ..., config.vocab_size]``
All labels set to ``-100`` are ignored, the loss is only All labels set to ``-100`` are ignored, the loss is only
computed for labels in ``[0, ..., config.vocab_size]`` computed for labels in ``[0, ..., config.vocab_size]``
Return: Return:
Examples:: Examples::
from transformers import XLNetTokenizer, XLNetLMHeadModel from transformers import XLNetTokenizer, XLNetLMHeadModel
import torch import torch
tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased') tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
model = XLNetLMHeadModel.from_pretrained('xlnet-large-cased', return_dict=True) model = XLNetLMHeadModel.from_pretrained('xlnet-large-cased', return_dict=True)
# We show how to setup inputs to predict a next token using a bi-directional context. # We show how to setup inputs to predict a next token using a bi-directional context.
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is very <mask>", add_special_tokens=False)).unsqueeze(0) # We will predict the masked token input_ids = torch.tensor(tokenizer.encode("Hello, my dog is very <mask>", add_special_tokens=False)).unsqueeze(0) # We will predict the masked token
perm_mask = torch.zeros((1, input_ids.shape[1], input_ids.shape[1]), dtype=torch.float) perm_mask = torch.zeros((1, input_ids.shape[1], input_ids.shape[1]), dtype=torch.float)
perm_mask[:, :, -1] = 1.0 # Previous tokens don't see last token perm_mask[:, :, -1] = 1.0 # Previous tokens don't see last token
target_mapping = torch.zeros((1, 1, input_ids.shape[1]), dtype=torch.float) # Shape [1, 1, seq_length] => let's predict one token target_mapping = torch.zeros((1, 1, input_ids.shape[1]), dtype=torch.float) # Shape [1, 1, seq_length] => let's predict one token
target_mapping[0, 0, -1] = 1.0 # Our first (and only) prediction will be the last token of the sequence (the masked token) target_mapping[0, 0, -1] = 1.0 # Our first (and only) prediction will be the last token of the sequence (the masked token)
outputs = model(input_ids, perm_mask=perm_mask, target_mapping=target_mapping) outputs = model(input_ids, perm_mask=perm_mask, target_mapping=target_mapping)
next_token_logits = outputs[0] # Output has shape [target_mapping.size(0), target_mapping.size(1), config.vocab_size] next_token_logits = outputs[0] # Output has shape [target_mapping.size(0), target_mapping.size(1), config.vocab_size]
# The same way can the XLNetLMHeadModel be used to be trained by standard auto-regressive language modeling. # The same way can the XLNetLMHeadModel be used to be trained by standard auto-regressive language modeling.
input_ids = torch.tensor(tokenizer.encode("Hello, my dog is very <mask>", add_special_tokens=False)).unsqueeze(0) # We will predict the masked token input_ids = torch.tensor(tokenizer.encode("Hello, my dog is very <mask>", add_special_tokens=False)).unsqueeze(0) # We will predict the masked token
labels = torch.tensor(tokenizer.encode("cute", add_special_tokens=False)).unsqueeze(0) labels = torch.tensor(tokenizer.encode("cute", add_special_tokens=False)).unsqueeze(0)
assert labels.shape[0] == 1, 'only one word will be predicted' assert labels.shape[0] == 1, 'only one word will be predicted'
perm_mask = torch.zeros((1, input_ids.shape[1], input_ids.shape[1]), dtype=torch.float) perm_mask = torch.zeros((1, input_ids.shape[1], input_ids.shape[1]), dtype=torch.float)
perm_mask[:, :, -1] = 1.0 # Previous tokens don't see last token as is done in standard auto-regressive lm training perm_mask[:, :, -1] = 1.0 # Previous tokens don't see last token as is done in standard auto-regressive lm training
target_mapping = torch.zeros((1, 1, input_ids.shape[1]), dtype=torch.float) # Shape [1, 1, seq_length] => let's predict one token target_mapping = torch.zeros((1, 1, input_ids.shape[1]), dtype=torch.float) # Shape [1, 1, seq_length] => let's predict one token
target_mapping[0, 0, -1] = 1.0 # Our first (and only) prediction will be the last token of the sequence (the masked token) target_mapping[0, 0, -1] = 1.0 # Our first (and only) prediction will be the last token of the sequence (the masked token)
outputs = model(input_ids, perm_mask=perm_mask, target_mapping=target_mapping, labels=labels) outputs = model(input_ids, perm_mask=perm_mask, target_mapping=target_mapping, labels=labels)
loss = outputs.loss loss = outputs.loss
next_token_logits = outputs.logits # Logits have shape [target_mapping.size(0), target_mapping.size(1), config.vocab_size] next_token_logits = outputs.logits # Logits have shape [target_mapping.size(0), target_mapping.size(1), config.vocab_size]
""" """
return_dict = return_dict if return_dict is not None else self.config.use_return_dict return_dict = return_dict if return_dict is not None else self.config.use_return_dict
use_cache = self.training or (use_cache if use_cache is not None else self.config.use_cache) use_cache = self.training or (use_cache if use_cache is not None else self.config.use_cache)
...@@ -1867,38 +1865,38 @@ class XLNetForQuestionAnswering(XLNetPreTrainedModel): ...@@ -1867,38 +1865,38 @@ class XLNetForQuestionAnswering(XLNetPreTrainedModel):
return_dict=None, return_dict=None,
): ):
r""" r"""
start_positions (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`, defaults to :obj:`None`): start_positions (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`, defaults to :obj:`None`):
Labels for position (index) of the start of the labelled span for computing the token classification loss. Labels for position (index) of the start of the labelled span for computing the token classification loss.
Positions are clamped to the length of the sequence (`sequence_length`). Positions are clamped to the length of the sequence (`sequence_length`).
Position outside of the sequence are not taken into account for computing the loss. Position outside of the sequence are not taken into account for computing the loss.
end_positions (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`, defaults to :obj:`None`): end_positions (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`, defaults to :obj:`None`):
Labels for position (index) of the end of the labelled span for computing the token classification loss. Labels for position (index) of the end of the labelled span for computing the token classification loss.
Positions are clamped to the length of the sequence (`sequence_length`). Positions are clamped to the length of the sequence (`sequence_length`).
Position outside of the sequence are not taken into account for computing the loss. Position outside of the sequence are not taken into account for computing the loss.
is_impossible (``torch.LongTensor`` of shape ``(batch_size,)``, `optional`, defaults to :obj:`None`): is_impossible (``torch.LongTensor`` of shape ``(batch_size,)``, `optional`, defaults to :obj:`None`):
Labels whether a question has an answer or no answer (SQuAD 2.0) Labels whether a question has an answer or no answer (SQuAD 2.0)
cls_index (``torch.LongTensor`` of shape ``(batch_size,)``, `optional`, defaults to :obj:`None`): cls_index (``torch.LongTensor`` of shape ``(batch_size,)``, `optional`, defaults to :obj:`None`):
Labels for position (index) of the classification token to use as input for computing plausibility of the answer. Labels for position (index) of the classification token to use as input for computing plausibility of the answer.
p_mask (``torch.FloatTensor`` of shape ``(batch_size, sequence_length)``, `optional`, defaults to :obj:`None`): p_mask (``torch.FloatTensor`` of shape ``(batch_size, sequence_length)``, `optional`, defaults to :obj:`None`):
Optional mask of tokens which can't be in answers (e.g. [CLS], [PAD], ...). Optional mask of tokens which can't be in answers (e.g. [CLS], [PAD], ...).
1.0 means token should be masked. 0.0 mean token is not masked. 1.0 means token should be masked. 0.0 mean token is not masked.
Returns: Returns:
Example:: Example::
>>> from transformers import XLNetTokenizer, XLNetForQuestionAnswering >>> from transformers import XLNetTokenizer, XLNetForQuestionAnswering
>>> import torch >>> import torch
>>> tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased') >>> tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased')
>>> model = XLNetForQuestionAnswering.from_pretrained('xlnet-base-cased', return_dict=True) >>> model = XLNetForQuestionAnswering.from_pretrained('xlnet-base-cased', return_dict=True)
>>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1 >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0) # Batch size 1
>>> start_positions = torch.tensor([1]) >>> start_positions = torch.tensor([1])
>>> end_positions = torch.tensor([3]) >>> end_positions = torch.tensor([3])
>>> outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions) >>> outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
>>> loss = outputs.loss >>> loss = outputs.loss
""" """
return_dict = return_dict if return_dict is not None else self.config.use_return_dict return_dict = return_dict if return_dict is not None else self.config.use_return_dict
use_cache = self.training or (use_cache if use_cache is not None else self.config.use_cache) use_cache = self.training or (use_cache if use_cache is not None else self.config.use_cache)
......
...@@ -122,7 +122,9 @@ def create_optimizer( ...@@ -122,7 +122,9 @@ def create_optimizer(
) )
if num_warmup_steps: if num_warmup_steps:
lr_schedule = WarmUp( lr_schedule = WarmUp(
initial_learning_rate=init_lr, decay_schedule_fn=lr_schedule, warmup_steps=num_warmup_steps, initial_learning_rate=init_lr,
decay_schedule_fn=lr_schedule,
warmup_steps=num_warmup_steps,
) )
if weight_decay_rate > 0.0: if weight_decay_rate > 0.0:
optimizer = AdamWeightDecay( optimizer = AdamWeightDecay(
......
...@@ -208,7 +208,11 @@ class PipelineDataFormat: ...@@ -208,7 +208,11 @@ class PipelineDataFormat:
SUPPORTED_FORMATS = ["json", "csv", "pipe"] SUPPORTED_FORMATS = ["json", "csv", "pipe"]
def __init__( def __init__(
self, output_path: Optional[str], input_path: Optional[str], column: Optional[str], overwrite: bool = False, self,
output_path: Optional[str],
input_path: Optional[str],
column: Optional[str],
overwrite: bool = False,
): ):
self.output_path = output_path self.output_path = output_path
self.input_path = input_path self.input_path = input_path
...@@ -261,7 +265,11 @@ class PipelineDataFormat: ...@@ -261,7 +265,11 @@ class PipelineDataFormat:
@staticmethod @staticmethod
def from_str( def from_str(
format: str, output_path: Optional[str], input_path: Optional[str], column: Optional[str], overwrite=False, format: str,
output_path: Optional[str],
input_path: Optional[str],
column: Optional[str],
overwrite=False,
) -> "PipelineDataFormat": ) -> "PipelineDataFormat":
""" """
Creates an instance of the right subclass of :class:`~transformers.pipelines.PipelineDataFormat` depending Creates an instance of the right subclass of :class:`~transformers.pipelines.PipelineDataFormat` depending
...@@ -305,7 +313,11 @@ class CsvPipelineDataFormat(PipelineDataFormat): ...@@ -305,7 +313,11 @@ class CsvPipelineDataFormat(PipelineDataFormat):
""" """
def __init__( def __init__(
self, output_path: Optional[str], input_path: Optional[str], column: Optional[str], overwrite=False, self,
output_path: Optional[str],
input_path: Optional[str],
column: Optional[str],
overwrite=False,
): ):
super().__init__(output_path, input_path, column, overwrite=overwrite) super().__init__(output_path, input_path, column, overwrite=overwrite)
...@@ -346,7 +358,11 @@ class JsonPipelineDataFormat(PipelineDataFormat): ...@@ -346,7 +358,11 @@ class JsonPipelineDataFormat(PipelineDataFormat):
""" """
def __init__( def __init__(
self, output_path: Optional[str], input_path: Optional[str], column: Optional[str], overwrite=False, self,
output_path: Optional[str],
input_path: Optional[str],
column: Optional[str],
overwrite=False,
): ):
super().__init__(output_path, input_path, column, overwrite=overwrite) super().__init__(output_path, input_path, column, overwrite=overwrite)
...@@ -610,7 +626,10 @@ class Pipeline(_ScikitCompat): ...@@ -610,7 +626,10 @@ class Pipeline(_ScikitCompat):
# Parse arguments # Parse arguments
inputs = self._args_parser(*args, **kwargs) inputs = self._args_parser(*args, **kwargs)
inputs = self.tokenizer( inputs = self.tokenizer(
inputs, add_special_tokens=add_special_tokens, return_tensors=self.framework, padding=padding, inputs,
add_special_tokens=add_special_tokens,
return_tensors=self.framework,
padding=padding,
) )
return inputs return inputs
...@@ -1349,7 +1368,10 @@ class TokenClassificationPipeline(Pipeline): ...@@ -1349,7 +1368,10 @@ class TokenClassificationPipeline(Pipeline):
with self.device_placement(): with self.device_placement():
tokens = self.tokenizer( tokens = self.tokenizer(
sentence, return_attention_mask=False, return_tensors=self.framework, truncation=True, sentence,
return_attention_mask=False,
return_tensors=self.framework,
truncation=True,
) )
# Forward # Forward
...@@ -1925,7 +1947,9 @@ class SummarizationPipeline(Pipeline): ...@@ -1925,7 +1947,9 @@ class SummarizationPipeline(Pipeline):
) )
summaries = self.model.generate( summaries = self.model.generate(
inputs["input_ids"], attention_mask=inputs["attention_mask"], **generate_kwargs, inputs["input_ids"],
attention_mask=inputs["attention_mask"],
**generate_kwargs,
) )
results = [] results = []
...@@ -1935,7 +1959,9 @@ class SummarizationPipeline(Pipeline): ...@@ -1935,7 +1959,9 @@ class SummarizationPipeline(Pipeline):
record["summary_token_ids"] = summary record["summary_token_ids"] = summary
if return_text: if return_text:
record["summary_text"] = self.tokenizer.decode( record["summary_text"] = self.tokenizer.decode(
summary, skip_special_tokens=True, clean_up_tokenization_spaces=clean_up_tokenization_spaces, summary,
skip_special_tokens=True,
clean_up_tokenization_spaces=clean_up_tokenization_spaces,
) )
results.append(record) results.append(record)
return results return results
...@@ -2032,7 +2058,9 @@ class TranslationPipeline(Pipeline): ...@@ -2032,7 +2058,9 @@ class TranslationPipeline(Pipeline):
) )
translations = self.model.generate( translations = self.model.generate(
inputs["input_ids"], attention_mask=inputs["attention_mask"], **generate_kwargs, inputs["input_ids"],
attention_mask=inputs["attention_mask"],
**generate_kwargs,
) )
results = [] results = []
for translation in translations: for translation in translations:
...@@ -2271,7 +2299,9 @@ class ConversationalPipeline(Pipeline): ...@@ -2271,7 +2299,9 @@ class ConversationalPipeline(Pipeline):
"You might consider trimming the early phase of the conversation".format(input_length, max_length) "You might consider trimming the early phase of the conversation".format(input_length, max_length)
) )
generated_responses = self.model.generate( generated_responses = self.model.generate(
inputs["input_ids"], attention_mask=inputs["attention_mask"], **generate_kwargs, inputs["input_ids"],
attention_mask=inputs["attention_mask"],
**generate_kwargs,
) )
cleaned_history = self._clean_padding_history(generated_responses) cleaned_history = self._clean_padding_history(generated_responses)
...@@ -2355,7 +2385,8 @@ class ConversationalPipeline(Pipeline): ...@@ -2355,7 +2385,8 @@ class ConversationalPipeline(Pipeline):
max_len = max([len(item) for item in outputs]) max_len = max([len(item) for item in outputs])
outputs = [output + [self.pad_token_id] * (max_len - len(output)) for output in outputs] outputs = [output + [self.pad_token_id] * (max_len - len(output)) for output in outputs]
outputs = BatchEncoding( outputs = BatchEncoding(
{"input_ids": outputs, "attention_mask": [[1] * len(outputs)]}, tensor_type=self.framework, {"input_ids": outputs, "attention_mask": [[1] * len(outputs)]},
tensor_type=self.framework,
) )
return outputs return outputs
......
...@@ -169,7 +169,7 @@ def assert_screenout(out, what): ...@@ -169,7 +169,7 @@ def assert_screenout(out, what):
class CaptureStd: class CaptureStd:
""" Context manager to capture: """Context manager to capture:
stdout, clean it up and make it available via obj.out stdout, clean it up and make it available via obj.out
stderr, and make it available via obj.err stderr, and make it available via obj.err
......
...@@ -105,31 +105,31 @@ TOKENIZER_MAPPING = OrderedDict( ...@@ -105,31 +105,31 @@ TOKENIZER_MAPPING = OrderedDict(
class AutoTokenizer: class AutoTokenizer:
r""":class:`~transformers.AutoTokenizer` is a generic tokenizer class r""":class:`~transformers.AutoTokenizer` is a generic tokenizer class
that will be instantiated as one of the tokenizer classes of the library that will be instantiated as one of the tokenizer classes of the library
when created with the `AutoTokenizer.from_pretrained(pretrained_model_name_or_path)` when created with the `AutoTokenizer.from_pretrained(pretrained_model_name_or_path)`
class method. class method.
The `from_pretrained()` method takes care of returning the correct tokenizer class instance The `from_pretrained()` method takes care of returning the correct tokenizer class instance
based on the `model_type` property of the config object, or when it's missing, based on the `model_type` property of the config object, or when it's missing,
falling back to using pattern matching on the `pretrained_model_name_or_path` string: falling back to using pattern matching on the `pretrained_model_name_or_path` string:
- `t5`: T5Tokenizer (T5 model) - `t5`: T5Tokenizer (T5 model)
- `distilbert`: DistilBertTokenizer (DistilBert model) - `distilbert`: DistilBertTokenizer (DistilBert model)
- `albert`: AlbertTokenizer (ALBERT model) - `albert`: AlbertTokenizer (ALBERT model)
- `camembert`: CamembertTokenizer (CamemBERT model) - `camembert`: CamembertTokenizer (CamemBERT model)
- `xlm-roberta`: XLMRobertaTokenizer (XLM-RoBERTa model) - `xlm-roberta`: XLMRobertaTokenizer (XLM-RoBERTa model)
- `longformer`: LongformerTokenizer (AllenAI Longformer model) - `longformer`: LongformerTokenizer (AllenAI Longformer model)
- `roberta`: RobertaTokenizer (RoBERTa model) - `roberta`: RobertaTokenizer (RoBERTa model)
- `bert`: BertTokenizer (Bert model) - `bert`: BertTokenizer (Bert model)
- `openai-gpt`: OpenAIGPTTokenizer (OpenAI GPT model) - `openai-gpt`: OpenAIGPTTokenizer (OpenAI GPT model)
- `gpt2`: GPT2Tokenizer (OpenAI GPT-2 model) - `gpt2`: GPT2Tokenizer (OpenAI GPT-2 model)
- `transfo-xl`: TransfoXLTokenizer (Transformer-XL model) - `transfo-xl`: TransfoXLTokenizer (Transformer-XL model)
- `xlnet`: XLNetTokenizer (XLNet model) - `xlnet`: XLNetTokenizer (XLNet model)
- `xlm`: XLMTokenizer (XLM model) - `xlm`: XLMTokenizer (XLM model)
- `ctrl`: CTRLTokenizer (Salesforce CTRL model) - `ctrl`: CTRLTokenizer (Salesforce CTRL model)
- `electra`: ElectraTokenizer (Google ELECTRA model) - `electra`: ElectraTokenizer (Google ELECTRA model)
This class cannot be instantiated using `__init__()` (throw an error). This class cannot be instantiated using `__init__()` (throw an error).
""" """
def __init__(self): def __init__(self):
...@@ -140,7 +140,7 @@ class AutoTokenizer: ...@@ -140,7 +140,7 @@ class AutoTokenizer:
@classmethod @classmethod
def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs): def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs):
r""" Instantiate one of the tokenizer classes of the library r"""Instantiate one of the tokenizer classes of the library
from a pre-trained model vocabulary. from a pre-trained model vocabulary.
The tokenizer class to instantiate is selected The tokenizer class to instantiate is selected
......
...@@ -359,7 +359,7 @@ class BasicTokenizer(object): ...@@ -359,7 +359,7 @@ class BasicTokenizer(object):
"""Runs basic tokenization (punctuation splitting, lower casing, etc.).""" """Runs basic tokenization (punctuation splitting, lower casing, etc.)."""
def __init__(self, do_lower_case=True, never_split=None, tokenize_chinese_chars=True, strip_accents=None): def __init__(self, do_lower_case=True, never_split=None, tokenize_chinese_chars=True, strip_accents=None):
""" Constructs a BasicTokenizer. """Constructs a BasicTokenizer.
Args: Args:
**do_lower_case**: Whether to lower case the input. **do_lower_case**: Whether to lower case the input.
...@@ -383,7 +383,7 @@ class BasicTokenizer(object): ...@@ -383,7 +383,7 @@ class BasicTokenizer(object):
self.strip_accents = strip_accents self.strip_accents = strip_accents
def tokenize(self, text, never_split=None): def tokenize(self, text, never_split=None):
""" Basic Tokenization of a piece of text. """Basic Tokenization of a piece of text.
Split on "white spaces" only, for sub-word tokenization, see WordPieceTokenizer. Split on "white spaces" only, for sub-word tokenization, see WordPieceTokenizer.
Args: Args:
......
...@@ -202,8 +202,7 @@ class CTRLTokenizer(PreTrainedTokenizer): ...@@ -202,8 +202,7 @@ class CTRLTokenizer(PreTrainedTokenizer):
return word return word
def _tokenize(self, text): def _tokenize(self, text):
""" Tokenize a string. """Tokenize a string."""
"""
split_tokens = [] split_tokens = []
words = re.findall(r"\S+\n?", text) words = re.findall(r"\S+\n?", text)
......
...@@ -330,7 +330,11 @@ class CustomDPRReaderTokenizerMixin: ...@@ -330,7 +330,11 @@ class CustomDPRReaderTokenizerMixin:
return nbest_spans_predictions[:num_spans] return nbest_spans_predictions[:num_spans]
def _get_best_spans( def _get_best_spans(
self, start_logits: List[int], end_logits: List[int], max_answer_length: int, top_spans: int, self,
start_logits: List[int],
end_logits: List[int],
max_answer_length: int,
top_spans: int,
) -> List[DPRSpanPrediction]: ) -> List[DPRSpanPrediction]:
""" """
Finds the best answer span for the extractive Q&A model for one passage. Finds the best answer span for the extractive Q&A model for one passage.
......
...@@ -137,9 +137,7 @@ class MarianTokenizer(PreTrainedTokenizer): ...@@ -137,9 +137,7 @@ class MarianTokenizer(PreTrainedTokenizer):
padding="longest", padding="longest",
**unused, **unused,
) -> BatchEncoding: ) -> BatchEncoding:
"""Prepare model inputs for translation. For best performance, translate one sentence at a time. """Prepare model inputs for translation. For best performance, translate one sentence at a time."""
"""
if "" in src_texts: if "" in src_texts:
raise ValueError(f"found empty string in src_texts: {src_texts}") raise ValueError(f"found empty string in src_texts: {src_texts}")
self.current_spm = self.spm_source self.current_spm = self.spm_source
......
...@@ -53,29 +53,29 @@ PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = { ...@@ -53,29 +53,29 @@ PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
class ReformerTokenizer(PreTrainedTokenizer): class ReformerTokenizer(PreTrainedTokenizer):
""" """
Constructs an Reformer tokenizer. Based on `SentencePiece <https://github.com/google/sentencepiece>`__ . Constructs an Reformer tokenizer. Based on `SentencePiece <https://github.com/google/sentencepiece>`__ .
This tokenizer inherits from :class:`~transformers.PreTrainedTokenizer` which contains most of the methods. Users This tokenizer inherits from :class:`~transformers.PreTrainedTokenizer` which contains most of the methods. Users
should refer to the superclass for more information regarding methods. should refer to the superclass for more information regarding methods.
Args: Args:
vocab_file (:obj:`string`): vocab_file (:obj:`string`):
`SentencePiece <https://github.com/google/sentencepiece>`__ file (generally has a `.spm` extension) that `SentencePiece <https://github.com/google/sentencepiece>`__ file (generally has a `.spm` extension) that
contains the vocabulary necessary to instantiate a tokenizer. contains the vocabulary necessary to instantiate a tokenizer.
eos_token (:obj:`string`, `optional`, defaults to "</s>"): eos_token (:obj:`string`, `optional`, defaults to "</s>"):
The end of sequence token. The end of sequence token.
.. note:: .. note::
When building a sequence using special tokens, this is not the token that is used for the end When building a sequence using special tokens, this is not the token that is used for the end
of sequence. The token used is the :obj:`sep_token`. of sequence. The token used is the :obj:`sep_token`.
unk_token (:obj:`string`, `optional`, defaults to "<unk>"): unk_token (:obj:`string`, `optional`, defaults to "<unk>"):
The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
token instead. token instead.
pad_token (:obj:`string`, `optional`, defaults to "<pad>"): pad_token (:obj:`string`, `optional`, defaults to "<pad>"):
The token used for padding, for example when batching sequences of different lengths. The token used for padding, for example when batching sequences of different lengths.
additional_special_tokens (:obj:`List[str]`, `optional`, defaults to :obj:`None`): additional_special_tokens (:obj:`List[str]`, `optional`, defaults to :obj:`None`):
Additional special tokens used by the tokenizer. Additional special tokens used by the tokenizer.
""" """
vocab_files_names = VOCAB_FILES_NAMES vocab_files_names = VOCAB_FILES_NAMES
...@@ -142,8 +142,7 @@ class ReformerTokenizer(PreTrainedTokenizer): ...@@ -142,8 +142,7 @@ class ReformerTokenizer(PreTrainedTokenizer):
self.sp_model.Load(self.vocab_file) self.sp_model.Load(self.vocab_file)
def _tokenize(self, text, sample=False): def _tokenize(self, text, sample=False):
""" Take as input a string and return a list of strings (tokens) for words/sub-words """Take as input a string and return a list of strings (tokens) for words/sub-words"""
"""
if not sample: if not sample:
pieces = self.sp_model.EncodeAsPieces(text) pieces = self.sp_model.EncodeAsPieces(text)
else: else:
...@@ -166,8 +165,8 @@ class ReformerTokenizer(PreTrainedTokenizer): ...@@ -166,8 +165,8 @@ class ReformerTokenizer(PreTrainedTokenizer):
return out_string return out_string
def save_vocabulary(self, save_directory): def save_vocabulary(self, save_directory):
""" Save the sentencepiece vocabulary (copy original file) and special tokens file """Save the sentencepiece vocabulary (copy original file) and special tokens file
to a directory. to a directory.
""" """
if not os.path.isdir(save_directory): if not os.path.isdir(save_directory):
logger.error("Vocabulary path ({}) should be a directory".format(save_directory)) logger.error("Vocabulary path ({}) should be a directory".format(save_directory))
......
...@@ -63,34 +63,34 @@ PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = { ...@@ -63,34 +63,34 @@ PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
class T5Tokenizer(PreTrainedTokenizer): class T5Tokenizer(PreTrainedTokenizer):
""" """
Constructs a T5 tokenizer. Based on `SentencePiece <https://github.com/google/sentencepiece>`__ . Constructs a T5 tokenizer. Based on `SentencePiece <https://github.com/google/sentencepiece>`__ .
This tokenizer inherits from :class:`~transformers.PreTrainedTokenizer` which contains most of the methods. Users This tokenizer inherits from :class:`~transformers.PreTrainedTokenizer` which contains most of the methods. Users
should refer to the superclass for more information regarding methods. should refer to the superclass for more information regarding methods.
Args: Args:
vocab_file (:obj:`string`): vocab_file (:obj:`string`):
`SentencePiece <https://github.com/google/sentencepiece>`__ file (generally has a `.spm` extension) that `SentencePiece <https://github.com/google/sentencepiece>`__ file (generally has a `.spm` extension) that
contains the vocabulary necessary to instantiate a tokenizer. contains the vocabulary necessary to instantiate a tokenizer.
eos_token (:obj:`string`, `optional`, defaults to "</s>"): eos_token (:obj:`string`, `optional`, defaults to "</s>"):
The end of sequence token. The end of sequence token.
.. note:: .. note::
When building a sequence using special tokens, this is not the token that is used for the end When building a sequence using special tokens, this is not the token that is used for the end
of sequence. The token used is the :obj:`sep_token`. of sequence. The token used is the :obj:`sep_token`.
unk_token (:obj:`string`, `optional`, defaults to "<unk>"): unk_token (:obj:`string`, `optional`, defaults to "<unk>"):
The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
token instead. token instead.
pad_token (:obj:`string`, `optional`, defaults to "<pad>"): pad_token (:obj:`string`, `optional`, defaults to "<pad>"):
The token used for padding, for example when batching sequences of different lengths. The token used for padding, for example when batching sequences of different lengths.
extra_ids (:obj:`List[str]`, `optional`, defaults to :obj:`100`): extra_ids (:obj:`List[str]`, `optional`, defaults to :obj:`100`):
Add a number of extra ids added to the end of the vocabulary for use as sentinels. Add a number of extra ids added to the end of the vocabulary for use as sentinels.
These tokens are accessible as "<extra_id_{%d}>" where "{%d}" is a number between 0 and extra_ids-1. These tokens are accessible as "<extra_id_{%d}>" where "{%d}" is a number between 0 and extra_ids-1.
Extra tokens are indexed from the end of the vocabulary up to beginnning ("<extra_id_0>" is the last token in the vocabulary like in T5 preprocessing Extra tokens are indexed from the end of the vocabulary up to beginnning ("<extra_id_0>" is the last token in the vocabulary like in T5 preprocessing
see: https://github.com/google-research/text-to-text-transfer-transformer/blob/9fd7b14a769417be33bc6c850f9598764913c833/t5/data/preprocessors.py#L2117) see: https://github.com/google-research/text-to-text-transfer-transformer/blob/9fd7b14a769417be33bc6c850f9598764913c833/t5/data/preprocessors.py#L2117)
additional_special_tokens (:obj:`List[str]`, `optional`, defaults to :obj:`None`): additional_special_tokens (:obj:`List[str]`, `optional`, defaults to :obj:`None`):
Additional special tokens used by the tokenizer. Additional special tokens used by the tokenizer.
""" """
vocab_files_names = VOCAB_FILES_NAMES vocab_files_names = VOCAB_FILES_NAMES
...@@ -236,8 +236,7 @@ class T5Tokenizer(PreTrainedTokenizer): ...@@ -236,8 +236,7 @@ class T5Tokenizer(PreTrainedTokenizer):
self.sp_model.Load(self.vocab_file) self.sp_model.Load(self.vocab_file)
def _tokenize(self, text, sample=False): def _tokenize(self, text, sample=False):
""" Take as input a string and return a list of strings (tokens) for words/sub-words """Take as input a string and return a list of strings (tokens) for words/sub-words"""
"""
if not sample: if not sample:
pieces = self.sp_model.EncodeAsPieces(text) pieces = self.sp_model.EncodeAsPieces(text)
else: else:
...@@ -266,8 +265,8 @@ class T5Tokenizer(PreTrainedTokenizer): ...@@ -266,8 +265,8 @@ class T5Tokenizer(PreTrainedTokenizer):
return out_string return out_string
def save_vocabulary(self, save_directory): def save_vocabulary(self, save_directory):
""" Save the sentencepiece vocabulary (copy original file) and special tokens file """Save the sentencepiece vocabulary (copy original file) and special tokens file
to a directory. to a directory.
""" """
if not os.path.isdir(save_directory): if not os.path.isdir(save_directory):
logger.error("Vocabulary path ({}) should be a directory".format(save_directory)) logger.error("Vocabulary path ({}) should be a directory".format(save_directory))
......
...@@ -163,7 +163,7 @@ class TransfoXLTokenizer(PreTrainedTokenizer): ...@@ -163,7 +163,7 @@ class TransfoXLTokenizer(PreTrainedTokenizer):
def count_sents(self, sents, verbose=False): def count_sents(self, sents, verbose=False):
""" """
sents : a list of sentences, each a list of tokenized symbols sents : a list of sentences, each a list of tokenized symbols
""" """
if verbose: if verbose:
logger.info("counting {} sents ...".format(len(sents))) logger.info("counting {} sents ...".format(len(sents)))
...@@ -496,7 +496,7 @@ class TransfoXLTokenizerFast(PreTrainedTokenizerFast): ...@@ -496,7 +496,7 @@ class TransfoXLTokenizerFast(PreTrainedTokenizerFast):
class LMOrderedIterator(object): class LMOrderedIterator(object):
def __init__(self, data, bsz, bptt, device="cpu", ext_len=None): def __init__(self, data, bsz, bptt, device="cpu", ext_len=None):
""" """
data -- LongTensor -- the LongTensor is strictly ordered data -- LongTensor -- the LongTensor is strictly ordered
""" """
self.bsz = bsz self.bsz = bsz
self.bptt = bptt self.bptt = bptt
...@@ -555,7 +555,7 @@ class LMOrderedIterator(object): ...@@ -555,7 +555,7 @@ class LMOrderedIterator(object):
class LMShuffledIterator(object): class LMShuffledIterator(object):
def __init__(self, data, bsz, bptt, device="cpu", ext_len=None, shuffle=False): def __init__(self, data, bsz, bptt, device="cpu", ext_len=None, shuffle=False):
""" """
data -- list[LongTensor] -- there is no order among the LongTensors data -- list[LongTensor] -- there is no order among the LongTensors
""" """
self.data = data self.data = data
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment