Merge remote-tracking branch 'origin/master'

42f63e38 · Sylvain Gugger · bb03a14e · 4df6b593 · 42f63e38 · 42f63e38
Commit 42f63e38 authored Nov 13, 2020 by Sylvain Gugger
14 changed files
--- a/examples/text-classification/run_tf_text_classification.py
+++ b/examples/text-classification/run_tf_text_classification.py
@@ -21,6 +21,12 @@ from transformers import (
    TFTrainer,
    TFTrainingArguments,
 )
+from transformers.utils import logging as hf_logging
+hf_logging.set_verbosity_info()
+hf_logging.enable_default_handler()
+hf_logging.enable_explicit_format()
 def get_tfds(

--- a/examples/text-classification/run_xnli.py
+++ b/examples/text-classification/run_xnli.py
@@ -29,6 +29,7 @@ from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, Tenso
 from torch.utils.data.distributed import DistributedSampler
 from tqdm import tqdm, trange
+import transformers
 from transformers import (
    WEIGHTS_NAME,
    AdamW,
@@ -41,6 +42,7 @@ from transformers import glue_convert_examples_to_features as convert_examples_t
 from transformers import xnli_compute_metrics as compute_metrics
 from transformers import xnli_output_modes as output_modes
 from transformers import xnli_processors as processors
+from transformers.trainer_utils import is_main_process
 try:
@@ -526,7 +528,11 @@ def main():
        bool(args.local_rank != -1),
        args.fp16,
    )
+    # Set the verbosity to info of the Transformers logger (on main process only):
+    if is_main_process(args.local_rank):
+        transformers.utils.logging.set_verbosity_info()
+        transformers.utils.logging.enable_default_handler()
+        transformers.utils.logging.enable_explicit_format()
    # Set seed
    set_seed(args)

--- a/examples/token-classification/run_ner.py
+++ b/examples/token-classification/run_ner.py
@@ -163,6 +163,8 @@ def main():
    # Set the verbosity to info of the Transformers logger (on main process only):
    if is_main_process(training_args.local_rank):
        transformers.utils.logging.set_verbosity_info()
+        transformers.utils.logging.enable_default_handler()
+        transformers.utils.logging.enable_explicit_format()
    logger.info("Training/evaluation parameters %s", training_args)
    # Set seed before initializing model.

--- a/examples/token-classification/run_ner_old.py
+++ b/examples/token-classification/run_ner_old.py
@@ -25,6 +25,7 @@ import numpy as np
 from seqeval.metrics import accuracy_score, f1_score, precision_score, recall_score
 from torch import nn
+import transformers
 from transformers import (
    AutoConfig,
    AutoModelForTokenClassification,
@@ -35,6 +36,7 @@ from transformers import (
    TrainingArguments,
    set_seed,
 )
+from transformers.trainer_utils import is_main_process
 from utils_ner import Split, TokenClassificationDataset, TokenClassificationTask
@@ -139,6 +141,11 @@ def main():
        bool(training_args.local_rank != -1),
        training_args.fp16,
    )
+    # Set the verbosity to info of the Transformers logger (on main process only):
+    if is_main_process(training_args.local_rank):
+        transformers.utils.logging.set_verbosity_info()
+        transformers.utils.logging.enable_default_handler()
+        transformers.utils.logging.enable_explicit_format()
    logger.info("Training/evaluation parameters %s", training_args)
    # Set seed

--- a/examples/token-classification/run_tf_ner.py
+++ b/examples/token-classification/run_tf_ner.py
@@ -33,9 +33,15 @@ from transformers import (
    TFTrainer,
    TFTrainingArguments,
 )
+from transformers.utils import logging as hf_logging
 from utils_ner import Split, TFTokenClassificationDataset, TokenClassificationTask
+hf_logging.set_verbosity_info()
+hf_logging.enable_default_handler()
+hf_logging.enable_explicit_format()
 logger = logging.getLogger(__name__)

--- a/model_cards/deepset/roberta-base-squad2/README.md
+++ b/model_cards/deepset/roberta-base-squad2/README.md
@@ -5,7 +5,7 @@ datasets:
 # roberta-base for QA 
-NOTE: This model has been superseded by deepset/roberta-base-squad2-v2. For an explanation of why, see [this github issue](https://github.com/deepset-ai/FARM/issues/552) from the FARM repository.
+NOTE: This is version 2 of the model. See [this github issue](https://github.com/deepset-ai/FARM/issues/552) from the FARM repository for an explanation of why we updated. If you'd like to use version 1, specify `revision="v1.0"` when loading the model in Transformers 3.5.
 ## Overview
 **Language model:** roberta-base  
@@ -19,10 +19,10 @@ NOTE: This model has been superseded by deepset/roberta-base-squad2-v2. For an e
 ## Hyperparameters
 ```
-batch_size = 50
+batch_size = 96
-n_epochs = 3
+n_epochs = 2
 base_LM_model = "roberta-base"
-max_seq_len = 384
+max_seq_len = 386
 learning_rate = 3e-5
 lr_schedule = LinearWarmup
 warmup_proportion = 0.2
@@ -32,9 +32,18 @@ max_query_length=64
 ## Performance
 Evaluated on the SQuAD 2.0 dev set with the [official eval script](https://worksheets.codalab.org/rest/bundles/0x6b567e1cf2e041ec80d7098f031c5c9e/contents/blob/).
 ```
-"exact": 78.49743114629833,
+"exact": 79.97136359807968
-"f1": 81.73092721240889
+"f1": 83.00449234495325
+"total": 11873
+"HasAns_exact": 78.03643724696356
+"HasAns_f1": 84.11139298441825
+"HasAns_total": 5928
+"NoAns_exact": 81.90075693860386
+"NoAns_f1": 81.90075693860386
+"NoAns_total": 5945
 ```
 ## Usage
@@ -85,7 +94,7 @@ For doing QA at scale (i.e. many docs instead of single paragraph), you can load
 ```python
 reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2")
 # or 
-reader = TransformersReader(model="deepset/roberta-base-squad2",tokenizer="deepset/roberta-base-squad2")
+reader = TransformersReader(model_name_or_path="deepset/roberta-base-squad2",tokenizer="deepset/roberta-base-squad2")
 ```

--- a/src/transformers/data/metrics/__init__.py
+++ b/src/transformers/data/metrics/__init__.py
@@ -28,7 +28,7 @@ if is_sklearn_available():
 DEPRECATION_WARNING = (
    "This metric will be removed from the library soon, metrics should be handled with the 🤗 Datasets "
    "library. You can have a look at this example script for pointers: "
-    "https://github.com/huggingface/transformers/blob/master/examples/text-classification/run_glue.py",
+    "https://github.com/huggingface/transformers/blob/master/examples/text-classification/run_glue.py"
 )

--- a/src/transformers/modeling_tf_bert.py
+++ b/src/transformers/modeling_tf_bert.py
@@ -89,6 +89,38 @@ TF_BERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
 ]
+class TFBertPreTrainingLoss:
+    """
+    Loss function suitable for BERT-like pre-training, that is, the task of pretraining a language model by combining
+    NSP + MLM. .. note:: Any label of -100 will be ignored (along with the corresponding logits) in the loss
+    computation.
+    """
+    def compute_loss(self, labels, logits):
+        loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(
+            from_logits=True, reduction=tf.keras.losses.Reduction.NONE
+        )
+        # make sure only labels that are not equal to -100
+        # are taken into account as loss
+        masked_lm_active_loss = tf.not_equal(tf.reshape(labels["labels"], (-1,)), -100)
+        masked_lm_reduced_logits = tf.boolean_mask(
+            tf.reshape(logits[0], (-1, shape_list(logits[0])[2])),
+            masked_lm_active_loss,
+        )
+        masked_lm_labels = tf.boolean_mask(tf.reshape(labels["labels"], (-1,)), masked_lm_active_loss)
+        next_sentence_active_loss = tf.not_equal(tf.reshape(labels["next_sentence_label"], (-1,)), -100)
+        next_sentence_reduced_logits = tf.boolean_mask(tf.reshape(logits[1], (-1, 2)), next_sentence_active_loss)
+        next_sentence_label = tf.boolean_mask(
+            tf.reshape(labels["next_sentence_label"], (-1,)), mask=next_sentence_active_loss
+        )
+        masked_lm_loss = loss_fn(masked_lm_labels, masked_lm_reduced_logits)
+        next_sentence_loss = loss_fn(next_sentence_label, next_sentence_reduced_logits)
+        masked_lm_loss = tf.reshape(masked_lm_loss, (-1, shape_list(next_sentence_loss)[0]))
+        masked_lm_loss = tf.reduce_mean(masked_lm_loss, 0)
+        return masked_lm_loss + next_sentence_loss
 class TFBertEmbeddings(tf.keras.layers.Layer):
    """Construct the embeddings from word, position and token_type embeddings."""
@@ -688,6 +720,7 @@ class TFBertForPreTrainingOutput(ModelOutput):
            heads.
    """
+    loss: Optional[tf.Tensor] = None
    prediction_logits: tf.Tensor = None
    seq_relationship_logits: tf.Tensor = None
    hidden_states: Optional[Tuple[tf.Tensor]] = None
@@ -814,7 +847,7 @@ Bert Model with two heads on top as done during the pre-training:
    """,
    BERT_START_DOCSTRING,
 )
-class TFBertForPreTraining(TFBertPreTrainedModel):
+class TFBertForPreTraining(TFBertPreTrainedModel, TFBertPreTrainingLoss):
    def __init__(self, config, *inputs, **kwargs):
        super().__init__(config, *inputs, **kwargs)
@@ -827,7 +860,21 @@ class TFBertForPreTraining(TFBertPreTrainedModel):
    @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
    @replace_return_docstrings(output_type=TFBertForPreTrainingOutput, config_class=_CONFIG_FOR_DOC)
-    def call(self, inputs, **kwargs):
+    def call(
+        self,
+        inputs=None,
+        attention_mask=None,
+        token_type_ids=None,
+        position_ids=None,
+        head_mask=None,
+        inputs_embeds=None,
+        output_attentions=None,
+        output_hidden_states=None,
+        return_dict=None,
+        labels=None,
+        next_sentence_label=None,
+        training=False,
+    ):
        r"""
        Return:
@@ -843,17 +890,44 @@ class TFBertForPreTraining(TFBertPreTrainedModel):
            >>> prediction_scores, seq_relationship_scores = outputs[:2]
        """
-        return_dict = kwargs.get("return_dict")
        return_dict = return_dict if return_dict is not None else self.bert.return_dict
-        outputs = self.bert(inputs, **kwargs)
+        if isinstance(inputs, (tuple, list)):
+            labels = inputs[9] if len(inputs) > 9 else labels
+            next_sentence_label = inputs[10] if len(inputs) > 10 else next_sentence_label
+            if len(inputs) > 9:
+                inputs = inputs[:9]
+        elif isinstance(inputs, (dict, BatchEncoding)):
+            labels = inputs.pop("labels", labels)
+            next_sentence_label = inputs.pop("next_sentence_label", next_sentence_label)
+        outputs = self.bert(
+            inputs,
+            attention_mask=attention_mask,
+            token_type_ids=token_type_ids,
+            position_ids=position_ids,
+            head_mask=head_mask,
+            inputs_embeds=inputs_embeds,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+            training=training,
+        )
        sequence_output, pooled_output = outputs[:2]
-        prediction_scores = self.mlm(sequence_output, training=kwargs.get("training", False))
+        prediction_scores = self.mlm(sequence_output, training=training)
        seq_relationship_score = self.nsp(pooled_output)
+        total_loss = None
+        if labels is not None and next_sentence_label is not None:
+            d_labels = {"labels": labels}
+            d_labels["next_sentence_label"] = next_sentence_label
+            total_loss = self.compute_loss(labels=d_labels, logits=(prediction_scores, seq_relationship_score))
        if not return_dict:
            return (prediction_scores, seq_relationship_score) + outputs[2:]
        return TFBertForPreTrainingOutput(
+            loss=total_loss,
            prediction_logits=prediction_scores,
            seq_relationship_logits=seq_relationship_score,
            hidden_states=outputs.hidden_states,

--- a/tests/test_modeling_tf_bert.py
+++ b/tests/test_modeling_tf_bert.py
@@ -26,6 +26,7 @@ from .test_modeling_tf_common import TFModelTesterMixin, ids_tensor
 if is_tf_available():
    import tensorflow as tf
+    from transformers import TF_MODEL_FOR_PRETRAINING_MAPPING
    from transformers.modeling_tf_bert import (
        TFBertForMaskedLM,
        TFBertForMultipleChoice,
@@ -274,6 +275,16 @@ class TFBertModelTest(TFModelTesterMixin, unittest.TestCase):
        else ()
    )
+    # special case for ForPreTraining model
+    def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
+        inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels)
+        if return_labels:
+            if model_class in TF_MODEL_FOR_PRETRAINING_MAPPING.values():
+                inputs_dict["next_sentence_label"] = tf.zeros(self.model_tester.batch_size, dtype=tf.int32)
+        return inputs_dict
    def setUp(self):
        self.model_tester = TFBertModelTester(self)
        self.config_tester = ConfigTester(self, config_class=BertConfig, hidden_size=37)

--- a/tests/test_modeling_tf_common.py
+++ b/tests/test_modeling_tf_common.py
@@ -36,6 +36,7 @@ if is_tf_available():
        TF_MODEL_FOR_MASKED_LM_MAPPING,
        TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING,
        TF_MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING,
+        TF_MODEL_FOR_PRETRAINING_MAPPING,
        TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING,
        TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING,
        TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING,
@@ -102,6 +103,7 @@ class TFModelTesterMixin:
                *TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING.values(),
                *TF_MODEL_FOR_CAUSAL_LM_MAPPING.values(),
                *TF_MODEL_FOR_MASKED_LM_MAPPING.values(),
+                *TF_MODEL_FOR_PRETRAINING_MAPPING.values(),
                *TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING.values(),
            ]:
                inputs_dict["labels"] = tf.zeros(
@@ -834,7 +836,9 @@ class TFModelTesterMixin:
            if getattr(model, "compute_loss", None):
                # The number of elements in the loss should be the same as the number of elements in the label
                prepared_for_class = self._prepare_for_class(inputs_dict.copy(), model_class, return_labels=True)
-                added_label = prepared_for_class[list(prepared_for_class.keys() - inputs_dict.keys())[0]]
+                added_label = prepared_for_class[
+                    sorted(list(prepared_for_class.keys() - inputs_dict.keys()), reverse=True)[0]
+                ]
                loss_size = tf.size(added_label)
                if model.__class__ in TF_MODEL_FOR_CAUSAL_LM_MAPPING.values():
@@ -859,23 +863,30 @@ class TFModelTesterMixin:
                # Get keys that were added with the _prepare_for_class function
                label_keys = prepared_for_class.keys() - inputs_dict.keys()
-                signature = inspect.getfullargspec(model.call)[0]
+                signature = inspect.signature(model.call).parameters
+                signature_names = list(signature.keys())
                # Create a dictionary holding the location of the tensors in the tuple
-                tuple_index_mapping = {1: "input_ids"}
+                tuple_index_mapping = {0: "input_ids"}
                for label_key in label_keys:
-                    label_key_index = signature.index(label_key)
+                    label_key_index = signature_names.index(label_key)
                    tuple_index_mapping[label_key_index] = label_key
                sorted_tuple_index_mapping = sorted(tuple_index_mapping.items())
+                # Initialize a list with their default values, update the values and convert to a tuple
+                list_input = []
+                for name in signature_names:
+                    if name != "kwargs":
+                        list_input.append(signature[name].default)
-                # Initialize a list with None, update the values and convert to a tuple
-                list_input = [None] * sorted_tuple_index_mapping[-1][0]
                for index, value in sorted_tuple_index_mapping:
-                    list_input[index - 1] = prepared_for_class[value]
+                    list_input[index] = prepared_for_class[value]
                tuple_input = tuple(list_input)
                # Send to model
-                loss = model(tuple_input)[0]
+                loss = model(tuple_input[:-1])[0]
                self.assertEqual(loss.shape, [loss_size])
    def _generate_random_bad_tokens(self, num_bad_tokens, model):

--- a/utils/check_copies.py
+++ b/utils/check_copies.py
@@ -42,7 +42,7 @@ def find_code_in_transformers(object_name):
            f"`object_name` should begin with the name of a module of transformers but got {object_name}."
        )
-    with open(os.path.join(TRANSFORMERS_PATH, f"{module}.py"), "r", encoding="utf-8") as f:
+    with open(os.path.join(TRANSFORMERS_PATH, f"{module}.py"), "r", encoding="utf-8", newline="\n") as f:
        lines = f.readlines()
    # Now let's find the class / func in the code!
@@ -82,10 +82,10 @@ def blackify(code):
        code = f"class Bla:\n{code}"
    with tempfile.TemporaryDirectory() as d:
        fname = os.path.join(d, "tmp.py")
-        with open(fname, "w", encoding="utf-8") as f:
+        with open(fname, "w", encoding="utf-8", newline="\n") as f:
            f.write(code)
        os.system(f"black -q --line-length 119 --target-version py35 {fname}")
-        with open(fname, "r", encoding="utf-8") as f:
+        with open(fname, "r", encoding="utf-8", newline="\n") as f:
            result = f.read()
            return result[len("class Bla:\n") :] if has_indent else result
@@ -96,7 +96,7 @@ def is_copy_consistent(filename, overwrite=False):
    Return the differences or overwrites the content depending on `overwrite`.
    """
-    with open(filename, "r", encoding="utf-8") as f:
+    with open(filename, "r", encoding="utf-8", newline="\n") as f:
        lines = f.readlines()
    diffs = []
    line_index = 0
@@ -150,7 +150,7 @@ def is_copy_consistent(filename, overwrite=False):
    if overwrite and len(diffs) > 0:
        # Warn the user a file has been modified.
        print(f"Detected changes, rewriting {filename}.")
-        with open(filename, "w", encoding="utf-8") as f:
+        with open(filename, "w", encoding="utf-8", newline="\n") as f:
            f.writelines(lines)
    return diffs
@@ -176,7 +176,7 @@ def get_model_list():
    # If the introduction or the conclusion of the list change, the prompts may need to be updated.
    _start_prompt = "🤗 Transformers currently provides the following architectures"
    _end_prompt = "1. Want to contribute a new model?"
-    with open(os.path.join(REPO_PATH, "README.md"), "r", encoding="utf-8") as f:
+    with open(os.path.join(REPO_PATH, "README.md"), "r", encoding="utf-8", newline="\n") as f:
        lines = f.readlines()
    # Find the start of the list.
    start_index = 0
@@ -254,7 +254,7 @@ def check_model_list_copy(overwrite=False, max_per_line=119):
    """ Check the model lists in the README and index.rst are consistent and maybe `overwrite`. """
    _start_prompt = "    This list is updated automatically from the README"
    _end_prompt = ".. toctree::"
-    with open(os.path.join(PATH_TO_DOCS, "index.rst"), "r", encoding="utf-8") as f:
+    with open(os.path.join(PATH_TO_DOCS, "index.rst"), "r", encoding="utf-8", newline="\n") as f:
        lines = f.readlines()
    # Find the start of the list.
    start_index = 0
@@ -279,7 +279,7 @@ def check_model_list_copy(overwrite=False, max_per_line=119):
    if converted_list != rst_list:
        if overwrite:
-            with open(os.path.join(PATH_TO_DOCS, "index.rst"), "w", encoding="utf-8") as f:
+            with open(os.path.join(PATH_TO_DOCS, "index.rst"), "w", encoding="utf-8", newline="\n") as f:
                f.writelines(lines[:start_index] + [converted_list] + lines[end_index:])
        else:
            raise ValueError(

--- a/utils/check_dummies.py
+++ b/utils/check_dummies.py
@@ -166,7 +166,7 @@ DUMMY_FUNCTION = {
 def read_init():
    """ Read the init and extracts PyTorch, TensorFlow, SentencePiece and Tokenizers objects. """
-    with open(os.path.join(PATH_TO_TRANSFORMERS, "__init__.py"), "r", encoding="utf-8") as f:
+    with open(os.path.join(PATH_TO_TRANSFORMERS, "__init__.py"), "r", encoding="utf-8", newline="\n") as f:
        lines = f.readlines()
    line_index = 0
@@ -321,21 +321,21 @@ def check_dummies(overwrite=False):
    tf_file = os.path.join(path, "dummy_tf_objects.py")
    flax_file = os.path.join(path, "dummy_flax_objects.py")
-    with open(sentencepiece_file, "r", encoding="utf-8") as f:
+    with open(sentencepiece_file, "r", encoding="utf-8", newline="\n") as f:
        actual_sentencepiece_dummies = f.read()
-    with open(tokenizers_file, "r", encoding="utf-8") as f:
+    with open(tokenizers_file, "r", encoding="utf-8", newline="\n") as f:
        actual_tokenizers_dummies = f.read()
-    with open(pt_file, "r", encoding="utf-8") as f:
+    with open(pt_file, "r", encoding="utf-8", newline="\n") as f:
        actual_pt_dummies = f.read()
-    with open(tf_file, "r", encoding="utf-8") as f:
+    with open(tf_file, "r", encoding="utf-8", newline="\n") as f:
        actual_tf_dummies = f.read()
-    with open(flax_file, "r", encoding="utf-8") as f:
+    with open(flax_file, "r", encoding="utf-8", newline="\n") as f:
        actual_flax_dummies = f.read()
    if sentencepiece_dummies != actual_sentencepiece_dummies:
        if overwrite:
            print("Updating transformers.utils.dummy_sentencepiece_objects.py as the main __init__ has new objects.")
-            with open(sentencepiece_file, "w", encoding="utf-8") as f:
+            with open(sentencepiece_file, "w", encoding="utf-8", newline="\n") as f:
                f.write(sentencepiece_dummies)
        else:
            raise ValueError(
@@ -346,7 +346,7 @@ def check_dummies(overwrite=False):
    if tokenizers_dummies != actual_tokenizers_dummies:
        if overwrite:
            print("Updating transformers.utils.dummy_tokenizers_objects.py as the main __init__ has new objects.")
-            with open(tokenizers_file, "w", encoding="utf-8") as f:
+            with open(tokenizers_file, "w", encoding="utf-8", newline="\n") as f:
                f.write(tokenizers_dummies)
        else:
            raise ValueError(
@@ -357,7 +357,7 @@ def check_dummies(overwrite=False):
    if pt_dummies != actual_pt_dummies:
        if overwrite:
            print("Updating transformers.utils.dummy_pt_objects.py as the main __init__ has new objects.")
-            with open(pt_file, "w", encoding="utf-8") as f:
+            with open(pt_file, "w", encoding="utf-8", newline="\n") as f:
                f.write(pt_dummies)
        else:
            raise ValueError(
@@ -368,7 +368,7 @@ def check_dummies(overwrite=False):
    if tf_dummies != actual_tf_dummies:
        if overwrite:
            print("Updating transformers.utils.dummy_tf_objects.py as the main __init__ has new objects.")
-            with open(tf_file, "w", encoding="utf-8") as f:
+            with open(tf_file, "w", encoding="utf-8", newline="\n") as f:
                f.write(tf_dummies)
        else:
            raise ValueError(
@@ -379,7 +379,7 @@ def check_dummies(overwrite=False):
    if flax_dummies != actual_flax_dummies:
        if overwrite:
            print("Updating transformers.utils.dummy_flax_objects.py as the main __init__ has new objects.")
-            with open(flax_file, "w", encoding="utf-8") as f:
+            with open(flax_file, "w", encoding="utf-8", newline="\n") as f:
                f.write(flax_dummies)
        else:
            raise ValueError(

--- a/utils/check_repo.py
+++ b/utils/check_repo.py
@@ -197,7 +197,7 @@ def get_model_doc_files():
 def find_tested_models(test_file):
    """ Parse the content of test_file to detect what's in all_model_classes"""
    # This is a bit hacky but I didn't find a way to import the test_file as a module and read inside the class
-    with open(os.path.join(PATH_TO_TESTS, test_file), "r", encoding="utf-8") as f:
+    with open(os.path.join(PATH_TO_TESTS, test_file), "r", encoding="utf-8", newline="\n") as f:
        content = f.read()
    all_models = re.findall(r"all_model_classes\s+=\s+\(\s*\(([^\)]*)\)", content)
    # Check with one less parenthesis
@@ -255,7 +255,7 @@ def check_all_models_are_tested():
 def find_documented_classes(doc_file):
    """ Parse the content of doc_file to detect which classes it documents"""
-    with open(os.path.join(PATH_TO_DOC, doc_file), "r", encoding="utf-8") as f:
+    with open(os.path.join(PATH_TO_DOC, doc_file), "r", encoding="utf-8", newline="\n") as f:
        content = f.read()
    return re.findall(r"autoclass:: transformers.(\S+)\s+", content)
@@ -360,7 +360,7 @@ _re_decorator = re.compile(r"^\s*@(\S+)\s+$")
 def check_decorator_order(filename):
    """ Check that in the test file `filename` the slow decorator is always last."""
-    with open(filename, "r", encoding="utf-8") as f:
+    with open(filename, "r", encoding="utf-8", newline="\n") as f:
        lines = f.readlines()
    decorator_before = None
    errors = []

--- a/utils/style_doc.py
+++ b/utils/style_doc.py
@@ -357,14 +357,14 @@ doc_styler = DocstringStyler()
 def style_rst_file(doc_file, max_len=119, check_only=False):
    """ Style one rst file `doc_file` to `max_len`."""
-    with open(doc_file, "r", encoding="utf-8") as f:
+    with open(doc_file, "r", encoding="utf-8", newline="\n") as f:
        doc = f.read()
    clean_doc = rst_styler.style(doc, max_len=max_len)
    diff = clean_doc != doc
    if not check_only and diff:
        print(f"Overwriting content of {doc_file}.")
-        with open(doc_file, "w", encoding="utf-8") as f:
+        with open(doc_file, "w", encoding="utf-8", newline="\n") as f:
            f.write(clean_doc)
    return diff
@@ -404,7 +404,7 @@ def style_docstring(docstring, max_len=119):
 def style_file_docstrings(code_file, max_len=119, check_only=False):
    """Style all docstrings in `code_file` to `max_len`."""
-    with open(code_file, "r", encoding="utf-8") as f:
+    with open(code_file, "r", encoding="utf-8", newline="\n") as f:
        code = f.read()
    splits = code.split('"""')
    splits = [
@@ -416,7 +416,7 @@ def style_file_docstrings(code_file, max_len=119, check_only=False):
    diff = clean_code != code
    if not check_only and diff:
        print(f"Overwriting content of {code_file}.")
-        with open(code_file, "w", encoding="utf-8") as f:
+        with open(code_file, "w", encoding="utf-8", newline="\n") as f:
            f.write(clean_code)
    return diff