Black 20 release

a75c64d8 · Lysandre · e78c1103 · a75c64d8 · a75c64d8 · a75c64d8
Commit a75c64d8 authored Aug 26, 2020 by Lysandre
20 changed files
--- a/src/transformers/modeling_tf_xlm.py
+++ b/src/transformers/modeling_tf_xlm.py
@@ -83,7 +83,7 @@ def create_sinusoidal_embeddings(n_pos, dim, out):


 def gelu(x):
-    """ Gaussian Error Linear Unit.
+    """Gaussian Error Linear Unit.
    Original Implementation of the gelu activation function in Google Bert repo when initially created.
        For information: OpenAI GPT's gelu is slightly different (and gives slightly different results):
        0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3))))
@@ -333,7 +333,7 @@ class TFXLMMainLayer(tf.keras.layers.Layer):
        raise NotImplementedError

    def _prune_heads(self, heads_to_prune):
-        """ Prunes heads of the model.
+        """Prunes heads of the model.
        heads_to_prune: dict of {layer_num: list of heads to prune in this layer}
        See base class PreTrainedModel
        """
@@ -516,7 +516,7 @@ class TFXLMMainLayer(tf.keras.layers.Layer):


 class TFXLMPreTrainedModel(TFPreTrainedModel):
-    """ An abstract class to handle weights initialization and
+    """An abstract class to handle weights initialization and
    a simple interface for downloading and loading pretrained models.
    """

@@ -858,7 +858,7 @@ class TFXLMForMultipleChoice(TFXLMPreTrainedModel, TFMultipleChoiceLoss):

    @property
    def dummy_inputs(self):
-        """ Dummy inputs to build the network.
+        """Dummy inputs to build the network.

        Returns:
            tf.Tensor with dummy inputs

--- a/src/transformers/modeling_tf_xlm_roberta.py
+++ b/src/transformers/modeling_tf_xlm_roberta.py
@@ -77,7 +77,8 @@ class TFXLMRobertaModel(TFRobertaModel):


 @add_start_docstrings(
-    """XLM-RoBERTa Model with a `language modeling` head on top. """, XLM_ROBERTA_START_DOCSTRING,
+    """XLM-RoBERTa Model with a `language modeling` head on top. """,
+    XLM_ROBERTA_START_DOCSTRING,
 )
 class TFXLMRobertaForMaskedLM(TFRobertaForMaskedLM):
    """

--- a/src/transformers/modeling_tf_xlnet.py
+++ b/src/transformers/modeling_tf_xlnet.py
@@ -62,7 +62,7 @@ TF_XLNET_PRETRAINED_MODEL_ARCHIVE_LIST = [


 def gelu(x):
-    """ Implementation of the gelu activation function.
+    """Implementation of the gelu activation function.
    XLNet is using OpenAI GPT's gelu
    Also see https://arxiv.org/abs/1606.08415
    """
@@ -807,7 +807,7 @@ class TFXLNetMainLayer(tf.keras.layers.Layer):


 class TFXLNetPreTrainedModel(TFPreTrainedModel):
-    """ An abstract class to handle weights initialization and
+    """An abstract class to handle weights initialization and
    a simple interface for downloading and loading pretrained models.
    """

@@ -1401,7 +1401,7 @@ class TFXLNetForMultipleChoice(TFXLNetPreTrainedModel, TFMultipleChoiceLoss):

    @property
    def dummy_inputs(self):
-        """ Dummy inputs to build the network.
+        """Dummy inputs to build the network.

        Returns:
            tf.Tensor with dummy inputs

--- a/src/transformers/modeling_transfo_xl.py
+++ b/src/transformers/modeling_transfo_xl.py
@@ -45,7 +45,7 @@ TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_LIST = [


 def build_tf_to_pytorch_map(model, config):
-    """ A map of modules from TF to PyTorch.
+    """A map of modules from TF to PyTorch.
    This time I use a map to keep the PyTorch model as identical to the original PyTorch model as possible.
    """
    tf_to_pt_map = {}
@@ -112,8 +112,7 @@ def build_tf_to_pytorch_map(model, config):


 def load_tf_weights_in_transfo_xl(model, config, tf_path):
-    """ Load tf checkpoints in a pytorch model
-    """
+    """Load tf checkpoints in a pytorch model"""
    try:
        import numpy as np
        import tensorflow as tf
@@ -386,7 +385,12 @@ class RelPartialLearnableDecoderLayer(nn.Module):
    def forward(self, dec_inp, r, dec_attn_mask=None, mems=None, head_mask=None, output_attentions=False):

        attn_outputs = self.dec_attn(
-            dec_inp, r, attn_mask=dec_attn_mask, mems=mems, head_mask=head_mask, output_attentions=output_attentions,
+            dec_inp,
+            r,
+            attn_mask=dec_attn_mask,
+            mems=mems,
+            head_mask=head_mask,
+            output_attentions=output_attentions,
        )
        ff_output = self.pos_ff(attn_outputs[0])

@@ -456,7 +460,7 @@ class AdaptiveEmbedding(nn.Module):


 class TransfoXLPreTrainedModel(PreTrainedModel):
-    """ An abstract class to handle weights initialization and
+    """An abstract class to handle weights initialization and
    a simple interface for downloading and loading pretrained models.
    """

@@ -474,8 +478,7 @@ class TransfoXLPreTrainedModel(PreTrainedModel):
        nn.init.constant_(bias, 0.0)

    def _init_weights(self, m):
-        """ Initialize the weights.
-        """
+        """Initialize the weights."""
        classname = m.__class__.__name__
        if classname.find("Linear") != -1:
            if hasattr(m, "weight") and m.weight is not None:
@@ -515,7 +518,7 @@ class TransfoXLPreTrainedModel(PreTrainedModel):
                self._init_bias(m.r_bias)

    def resize_token_embeddings(self, new_num_tokens: Optional[int] = None, layer: Optional[int] = -1):
-        """ Resize input token embeddings matrix of the model if new_num_tokens != config.vocab_size.
+        """Resize input token embeddings matrix of the model if new_num_tokens != config.vocab_size.
        Take care of tying weights embeddings afterwards if the model class has a `tie_weights()` method.

        Arguments:
@@ -948,7 +951,10 @@ class TransfoXLModel(TransfoXLPreTrainedModel):
            return tuple(v for v in [core_out, new_mems, hids, attentions] if v is not None)

        return TransfoXLModelOutput(
-            last_hidden_state=core_out, mems=new_mems, hidden_states=hids, attentions=attentions,
+            last_hidden_state=core_out,
+            mems=new_mems,
+            hidden_states=hids,
+            attentions=attentions,
        )


@@ -1064,8 +1070,7 @@ class TransfoXLLMHeadModel(TransfoXLPreTrainedModel):
        )

    def get_output_embeddings(self):
-        """ Double-check if you are using adaptive softmax.
-        """
+        """Double-check if you are using adaptive softmax."""
        if self.sample_softmax > 0:
            return self.out_layer
        else:

--- a/src/transformers/modeling_transfo_xl_utilities.py
+++ b/src/transformers/modeling_transfo_xl_utilities.py
@@ -191,7 +191,7 @@ class ProjectedAdaptiveLogSoftmax(nn.Module):
        return out

    def log_prob(self, hidden):
-        r""" Computes log probabilities for all :math:`n\_classes`
+        r"""Computes log probabilities for all :math:`n\_classes`
        From: https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/adaptive.py
        Args:
            hidden (Tensor): a minibatch of examples

--- a/src/transformers/modeling_utils.py
+++ b/src/transformers/modeling_utils.py
@@ -51,8 +51,7 @@ try:
 except ImportError:
    # Older PyTorch compatibility
    class Identity(nn.Module):
-        r"""A placeholder identity operator that is argument-insensitive.
-        """
+        r"""A placeholder identity operator that is argument-insensitive."""

        def __init__(self, *args, **kwargs):
            super().__init__()
@@ -488,8 +487,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin):
            )

    def _tie_or_clone_weights(self, output_embeddings, input_embeddings):
-        """ Tie or clone module weights depending of whether we are using TorchScript or not
-        """
+        """Tie or clone module weights depending of whether we are using TorchScript or not"""
        if self.config.torchscript:
            output_embeddings.weight = nn.Parameter(input_embeddings.weight.clone())
        else:
@@ -498,7 +496,10 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin):
        if getattr(output_embeddings, "bias", None) is not None:
            output_embeddings.bias.data = torch.nn.functional.pad(
                output_embeddings.bias.data,
-                (0, output_embeddings.weight.shape[0] - output_embeddings.bias.shape[0],),
+                (
+                    0,
+                    output_embeddings.weight.shape[0] - output_embeddings.bias.shape[0],
+                ),
                "constant",
                0,
            )
@@ -906,7 +907,13 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin):
            def load(module: nn.Module, prefix=""):
                local_metadata = {} if metadata is None else metadata.get(prefix[:-1], {})
                module._load_from_state_dict(
-                    state_dict, prefix, local_metadata, True, missing_keys, unexpected_keys, error_msgs,
+                    state_dict,
+                    prefix,
+                    local_metadata,
+                    True,
+                    missing_keys,
+                    unexpected_keys,
+                    error_msgs,
                )
                for name, child in module._modules.items():
                    if child is not None:
@@ -1375,7 +1382,7 @@ class SequenceSummary(nn.Module):
            self.summary = nn.Linear(config.hidden_size, num_classes)

        activation_string = getattr(config, "summary_activation", None)
-        self.activation: Callable = (get_activation(activation_string) if activation_string else Identity())
+        self.activation: Callable = get_activation(activation_string) if activation_string else Identity()

        self.first_dropout = Identity()
        if hasattr(config, "summary_first_dropout") and config.summary_first_dropout > 0:
@@ -1409,7 +1416,11 @@ class SequenceSummary(nn.Module):
            output = hidden_states.mean(dim=1)
        elif self.summary_type == "cls_index":
            if cls_index is None:
-                cls_index = torch.full_like(hidden_states[..., :1, :], hidden_states.shape[-2] - 1, dtype=torch.long,)
+                cls_index = torch.full_like(
+                    hidden_states[..., :1, :],
+                    hidden_states.shape[-2] - 1,
+                    dtype=torch.long,
+                )
            else:
                cls_index = cls_index.unsqueeze(-1).unsqueeze(-1)
                cls_index = cls_index.expand((-1,) * (cls_index.dim() - 1) + (hidden_states.size(-1),))

--- a/src/transformers/modeling_xlm.py
+++ b/src/transformers/modeling_xlm.py
@@ -228,7 +228,7 @@ class TransformerFFN(nn.Module):


 class XLMPreTrainedModel(PreTrainedModel):
-    """ An abstract class to handle weights initialization and
+    """An abstract class to handle weights initialization and
    a simple interface for downloading and loading pretrained models.
    """

@@ -462,7 +462,7 @@ class XLMModel(XLMPreTrainedModel):
        self.embeddings = new_embeddings

    def _prune_heads(self, heads_to_prune):
-        """ Prunes heads of the model.
+        """Prunes heads of the model.
        heads_to_prune: dict of {layer_num: list of heads to prune in this layer}
        See base class PreTrainedModel
        """
@@ -572,7 +572,11 @@ class XLMModel(XLMPreTrainedModel):

            # self attention
            attn_outputs = self.attentions[i](
-                tensor, attn_mask, cache=cache, head_mask=head_mask[i], output_attentions=output_attentions,
+                tensor,
+                attn_mask,
+                cache=cache,
+                head_mask=head_mask[i],
+                output_attentions=output_attentions,
            )
            attn = attn_outputs[0]
            if output_attentions:
@@ -633,8 +637,7 @@ class XLMPredLayer(nn.Module):
            )

    def forward(self, x, y=None):
-        """ Compute the loss, and optionally the scores.
-        """
+        """Compute the loss, and optionally the scores."""
        outputs = ()
        if self.asm is False:
            scores = self.proj(x)
@@ -1131,7 +1134,10 @@ class XLMForTokenClassification(XLMPreTrainedModel):
            return ((loss,) + output) if loss is not None else output

        return TokenClassifierOutput(
-            loss=loss, logits=logits, hidden_states=outputs.hidden_states, attentions=outputs.attentions,
+            loss=loss,
+            logits=logits,
+            hidden_states=outputs.hidden_states,
+            attentions=outputs.attentions,
        )



--- a/src/transformers/modeling_xlm_roberta.py
+++ b/src/transformers/modeling_xlm_roberta.py
@@ -68,7 +68,8 @@ class XLMRobertaModel(RobertaModel):


 @add_start_docstrings(
-    """XLM-RoBERTa Model with a `language modeling` head on top. """, XLM_ROBERTA_START_DOCSTRING,
+    """XLM-RoBERTa Model with a `language modeling` head on top. """,
+    XLM_ROBERTA_START_DOCSTRING,
 )
 class XLMRobertaForMaskedLM(RobertaForMaskedLM):
    """

--- a/src/transformers/modeling_xlnet.py
+++ b/src/transformers/modeling_xlnet.py
@@ -58,7 +58,7 @@ XLNET_PRETRAINED_MODEL_ARCHIVE_LIST = [


 def build_tf_xlnet_to_pytorch_map(model, config, tf_weights=None):
-    """ A map of modules from TF to PyTorch.
+    """A map of modules from TF to PyTorch.
    I use a map to keep the PyTorch model as
    identical to the original PyTorch model as possible.
    """
@@ -141,8 +141,7 @@ def build_tf_xlnet_to_pytorch_map(model, config, tf_weights=None):


 def load_tf_weights_in_xlnet(model, config, tf_path):
-    """ Load tf checkpoints in a pytorch model
-    """
+    """Load tf checkpoints in a pytorch model"""
    try:
        import numpy as np
        import tensorflow as tf
@@ -548,7 +547,7 @@ class XLNetLayer(nn.Module):


 class XLNetPreTrainedModel(PreTrainedModel):
-    """ An abstract class to handle weights initialization and
+    """An abstract class to handle weights initialization and
    a simple interface for downloading and loading pretrained models.
    """

@@ -557,8 +556,7 @@ class XLNetPreTrainedModel(PreTrainedModel):
    base_model_prefix = "transformer"

    def _init_weights(self, module):
-        """ Initialize the weights.
-        """
+        """Initialize the weights."""
        if isinstance(module, (nn.Linear, nn.Embedding)):
            # Slightly different from the TF version which uses truncated_normal for initialization
            # cf https://github.com/pytorch/pytorch/pull/5617

--- a/src/transformers/optimization_tf.py
+++ b/src/transformers/optimization_tf.py
@@ -122,7 +122,9 @@ def create_optimizer(
    )
    if num_warmup_steps:
        lr_schedule = WarmUp(
-            initial_learning_rate=init_lr, decay_schedule_fn=lr_schedule, warmup_steps=num_warmup_steps,
+            initial_learning_rate=init_lr,
+            decay_schedule_fn=lr_schedule,
+            warmup_steps=num_warmup_steps,
        )
    if weight_decay_rate > 0.0:
        optimizer = AdamWeightDecay(

--- a/src/transformers/pipelines.py
+++ b/src/transformers/pipelines.py
@@ -208,7 +208,11 @@ class PipelineDataFormat:
    SUPPORTED_FORMATS = ["json", "csv", "pipe"]

    def __init__(
-        self, output_path: Optional[str], input_path: Optional[str], column: Optional[str], overwrite: bool = False,
+        self,
+        output_path: Optional[str],
+        input_path: Optional[str],
+        column: Optional[str],
+        overwrite: bool = False,
    ):
        self.output_path = output_path
        self.input_path = input_path
@@ -261,7 +265,11 @@ class PipelineDataFormat:

    @staticmethod
    def from_str(
-        format: str, output_path: Optional[str], input_path: Optional[str], column: Optional[str], overwrite=False,
+        format: str,
+        output_path: Optional[str],
+        input_path: Optional[str],
+        column: Optional[str],
+        overwrite=False,
    ) -> "PipelineDataFormat":
        """
        Creates an instance of the right subclass of :class:`~transformers.pipelines.PipelineDataFormat` depending
@@ -305,7 +313,11 @@ class CsvPipelineDataFormat(PipelineDataFormat):
    """

    def __init__(
-        self, output_path: Optional[str], input_path: Optional[str], column: Optional[str], overwrite=False,
+        self,
+        output_path: Optional[str],
+        input_path: Optional[str],
+        column: Optional[str],
+        overwrite=False,
    ):
        super().__init__(output_path, input_path, column, overwrite=overwrite)

@@ -346,7 +358,11 @@ class JsonPipelineDataFormat(PipelineDataFormat):
    """

    def __init__(
-        self, output_path: Optional[str], input_path: Optional[str], column: Optional[str], overwrite=False,
+        self,
+        output_path: Optional[str],
+        input_path: Optional[str],
+        column: Optional[str],
+        overwrite=False,
    ):
        super().__init__(output_path, input_path, column, overwrite=overwrite)

@@ -610,7 +626,10 @@ class Pipeline(_ScikitCompat):
        # Parse arguments
        inputs = self._args_parser(*args, **kwargs)
        inputs = self.tokenizer(
-            inputs, add_special_tokens=add_special_tokens, return_tensors=self.framework, padding=padding,
+            inputs,
+            add_special_tokens=add_special_tokens,
+            return_tensors=self.framework,
+            padding=padding,
        )

        return inputs
@@ -1349,7 +1368,10 @@ class TokenClassificationPipeline(Pipeline):
            with self.device_placement():

                tokens = self.tokenizer(
-                    sentence, return_attention_mask=False, return_tensors=self.framework, truncation=True,
+                    sentence,
+                    return_attention_mask=False,
+                    return_tensors=self.framework,
+                    truncation=True,
                )

                # Forward
@@ -1925,7 +1947,9 @@ class SummarizationPipeline(Pipeline):
                )

            summaries = self.model.generate(
-                inputs["input_ids"], attention_mask=inputs["attention_mask"], **generate_kwargs,
+                inputs["input_ids"],
+                attention_mask=inputs["attention_mask"],
+                **generate_kwargs,
            )

            results = []
@@ -1935,7 +1959,9 @@ class SummarizationPipeline(Pipeline):
                    record["summary_token_ids"] = summary
                if return_text:
                    record["summary_text"] = self.tokenizer.decode(
-                        summary, skip_special_tokens=True, clean_up_tokenization_spaces=clean_up_tokenization_spaces,
+                        summary,
+                        skip_special_tokens=True,
+                        clean_up_tokenization_spaces=clean_up_tokenization_spaces,
                    )
                results.append(record)
            return results
@@ -2032,7 +2058,9 @@ class TranslationPipeline(Pipeline):
                )

            translations = self.model.generate(
-                inputs["input_ids"], attention_mask=inputs["attention_mask"], **generate_kwargs,
+                inputs["input_ids"],
+                attention_mask=inputs["attention_mask"],
+                **generate_kwargs,
            )
            results = []
            for translation in translations:
@@ -2271,7 +2299,9 @@ class ConversationalPipeline(Pipeline):
                    "You might consider trimming the early phase of the conversation".format(input_length, max_length)
                )
            generated_responses = self.model.generate(
-                inputs["input_ids"], attention_mask=inputs["attention_mask"], **generate_kwargs,
+                inputs["input_ids"],
+                attention_mask=inputs["attention_mask"],
+                **generate_kwargs,
            )

            cleaned_history = self._clean_padding_history(generated_responses)
@@ -2355,7 +2385,8 @@ class ConversationalPipeline(Pipeline):
        max_len = max([len(item) for item in outputs])
        outputs = [output + [self.pad_token_id] * (max_len - len(output)) for output in outputs]
        outputs = BatchEncoding(
-            {"input_ids": outputs, "attention_mask": [[1] * len(outputs)]}, tensor_type=self.framework,
+            {"input_ids": outputs, "attention_mask": [[1] * len(outputs)]},
+            tensor_type=self.framework,
        )
        return outputs


--- a/src/transformers/testing_utils.py
+++ b/src/transformers/testing_utils.py
@@ -169,7 +169,7 @@ def assert_screenout(out, what):


 class CaptureStd:
-    """ Context manager to capture:
+    """Context manager to capture:
    stdout, clean it up and make it available via obj.out
    stderr, and make it available via obj.err


--- a/src/transformers/tokenization_auto.py
+++ b/src/transformers/tokenization_auto.py
@@ -140,7 +140,7 @@ class AutoTokenizer:

    @classmethod
    def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs):
-        r""" Instantiate one of the tokenizer classes of the library
+        r"""Instantiate one of the tokenizer classes of the library
        from a pre-trained model vocabulary.

        The tokenizer class to instantiate is selected

--- a/src/transformers/tokenization_bert.py
+++ b/src/transformers/tokenization_bert.py
@@ -359,7 +359,7 @@ class BasicTokenizer(object):
    """Runs basic tokenization (punctuation splitting, lower casing, etc.)."""

    def __init__(self, do_lower_case=True, never_split=None, tokenize_chinese_chars=True, strip_accents=None):
-        """ Constructs a BasicTokenizer.
+        """Constructs a BasicTokenizer.

        Args:
            **do_lower_case**: Whether to lower case the input.
@@ -383,7 +383,7 @@ class BasicTokenizer(object):
        self.strip_accents = strip_accents

    def tokenize(self, text, never_split=None):
-        """ Basic Tokenization of a piece of text.
+        """Basic Tokenization of a piece of text.
            Split on "white spaces" only, for sub-word tokenization, see WordPieceTokenizer.

        Args:

--- a/src/transformers/tokenization_ctrl.py
+++ b/src/transformers/tokenization_ctrl.py
@@ -202,8 +202,7 @@ class CTRLTokenizer(PreTrainedTokenizer):
        return word

    def _tokenize(self, text):
-        """ Tokenize a string.
-        """
+        """Tokenize a string."""
        split_tokens = []

        words = re.findall(r"\S+\n?", text)

--- a/src/transformers/tokenization_dpr.py
+++ b/src/transformers/tokenization_dpr.py
@@ -330,7 +330,11 @@ class CustomDPRReaderTokenizerMixin:
        return nbest_spans_predictions[:num_spans]

    def _get_best_spans(
-        self, start_logits: List[int], end_logits: List[int], max_answer_length: int, top_spans: int,
+        self,
+        start_logits: List[int],
+        end_logits: List[int],
+        max_answer_length: int,
+        top_spans: int,
    ) -> List[DPRSpanPrediction]:
        """
        Finds the best answer span for the extractive Q&A model for one passage.

--- a/src/transformers/tokenization_marian.py
+++ b/src/transformers/tokenization_marian.py
@@ -137,9 +137,7 @@ class MarianTokenizer(PreTrainedTokenizer):
        padding="longest",
        **unused,
    ) -> BatchEncoding:
-        """Prepare model inputs for translation. For best performance, translate one sentence at a time.
-
-        """
+        """Prepare model inputs for translation. For best performance, translate one sentence at a time."""
        if "" in src_texts:
            raise ValueError(f"found empty string in src_texts: {src_texts}")
        self.current_spm = self.spm_source

--- a/src/transformers/tokenization_reformer.py
+++ b/src/transformers/tokenization_reformer.py
@@ -142,8 +142,7 @@ class ReformerTokenizer(PreTrainedTokenizer):
        self.sp_model.Load(self.vocab_file)

    def _tokenize(self, text, sample=False):
-        """ Take as input a string and return a list of strings (tokens) for words/sub-words
-        """
+        """Take as input a string and return a list of strings (tokens) for words/sub-words"""
        if not sample:
            pieces = self.sp_model.EncodeAsPieces(text)
        else:
@@ -166,7 +165,7 @@ class ReformerTokenizer(PreTrainedTokenizer):
        return out_string

    def save_vocabulary(self, save_directory):
-        """ Save the sentencepiece vocabulary (copy original file) and special tokens file
+        """Save the sentencepiece vocabulary (copy original file) and special tokens file
        to a directory.
        """
        if not os.path.isdir(save_directory):

--- a/src/transformers/tokenization_t5.py
+++ b/src/transformers/tokenization_t5.py
@@ -236,8 +236,7 @@ class T5Tokenizer(PreTrainedTokenizer):
        self.sp_model.Load(self.vocab_file)

    def _tokenize(self, text, sample=False):
-        """ Take as input a string and return a list of strings (tokens) for words/sub-words
-        """
+        """Take as input a string and return a list of strings (tokens) for words/sub-words"""
        if not sample:
            pieces = self.sp_model.EncodeAsPieces(text)
        else:
@@ -266,7 +265,7 @@ class T5Tokenizer(PreTrainedTokenizer):
        return out_string

    def save_vocabulary(self, save_directory):
-        """ Save the sentencepiece vocabulary (copy original file) and special tokens file
+        """Save the sentencepiece vocabulary (copy original file) and special tokens file
        to a directory.
        """
        if not os.path.isdir(save_directory):

--- a/src/transformers/tokenization_transfo_xl.py
+++ b/src/transformers/tokenization_transfo_xl.py