Merge pull request #2270 from aaugustin/remove-python-2

Remove support for Python 2

Merge pull request #2270 from aaugustin/remove-python-2
Remove support for Python 2
ce50305e · Aymeric Augustin · GitHub · b6ea0f43 · 1a948d70 · ce50305e
Unverified Commit ce50305e authored Dec 22, 2019 by Aymeric Augustin Committed by GitHub Dec 22, 2019
20 changed files
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
 version: 2
 jobs:
-    run_tests_py3_torch_and_tf:
+    run_tests_torch_and_tf:
        working_directory: ~/transformers
        docker:
            - image: circleci/python:3.5
@@ -17,7 +17,7 @@ jobs:
            - run: sudo pip install tensorboardX scikit-learn
            - run: python -m pytest -n 8 --dist=loadfile -s -v ./tests/ --cov
            - run: codecov
-    run_tests_py3_torch:
+    run_tests_torch:
        working_directory: ~/transformers
        docker:
            - image: circleci/python:3.5
@@ -33,7 +33,7 @@ jobs:
            - run: sudo pip install tensorboardX scikit-learn
            - run: python -m pytest -n 8 --dist=loadfile -s -v ./tests/ --cov
            - run: codecov
-    run_tests_py3_tf:
+    run_tests_tf:
        working_directory: ~/transformers
        docker:
            - image: circleci/python:3.5
@@ -49,7 +49,7 @@ jobs:
            - run: sudo pip install tensorboardX scikit-learn
            - run: python -m pytest -n 8 --dist=loadfile -s -v ./tests/ --cov
            - run: codecov
-    run_tests_py3_custom_tokenizers:
+    run_tests_custom_tokenizers:
        working_directory: ~/transformers
        docker:
            - image: circleci/python:3.5
@@ -59,7 +59,7 @@ jobs:
            - run: sudo pip install pytest pytest-xdist
            - run: sudo pip install mecab-python3
            - run: RUN_CUSTOM_TOKENIZERS=1 python -m pytest -sv ./tests/test_tokenization_bert_japanese.py
-    run_examples_py3_torch:
+    run_examples_torch:
        working_directory: ~/transformers
        docker:
            - image: circleci/python:3.5
@@ -121,9 +121,9 @@ workflows:
        jobs:
            - check_code_quality
            - check_repository_consistency
-            - run_examples_py3_torch
-            - run_tests_py3_custom_tokenizers
-            - run_tests_py3_torch_and_tf
-            - run_tests_py3_torch
-            - run_tests_py3_tf
+            - run_examples_torch
+            - run_tests_custom_tokenizers
+            - run_tests_torch_and_tf
+            - run_tests_torch
+            - run_tests_tf
            - deploy_doc: *workflow_filters
--- a/README.md
+++ b/README.md
@@ -64,7 +64,7 @@ Choose the right framework for every part of a model's lifetime

 ## Installation

-This repo is tested on Python 2.7 and 3.5+ (examples are tested only on python 3.5+), PyTorch 1.0.0+ and TensorFlow 2.0.0-rc1
+This repo is tested on Python 3.5+, PyTorch 1.0.0+ and TensorFlow 2.0.0-rc1

 ### With pip


--- a/docs/source/installation.md
+++ b/docs/source/installation.md
 # Installation

-Transformers is tested on Python 2.7 and 3.5+ (examples are tested only on python 3.5+) and PyTorch 1.1.0
+Transformers is tested on Python 3.5+ and PyTorch 1.1.0

 ## With pip

@@ -44,7 +44,7 @@ By default, slow tests are skipped. Set the `RUN_SLOW` environment variable to `

 ## OpenAI GPT original tokenization workflow

-If you want to reproduce the original tokenization process of the `OpenAI GPT` paper, you will need to install `ftfy` (use version 4.4.3 if you are using Python 2) and `SpaCy`:
+If you want to reproduce the original tokenization process of the `OpenAI GPT` paper, you will need to install `ftfy` and `SpaCy`:

 ``` bash
 pip install spacy ftfy==4.4.3

--- a/examples/contrib/run_swag.py
+++ b/examples/contrib/run_swag.py
@@ -16,7 +16,7 @@
 """BERT finetuning runner.
   Finetuning the library models for multiple choice on SWAG (Bert).
 """
-from __future__ import absolute_import, division, print_function
+

 import argparse
 import csv
@@ -24,7 +24,6 @@ import glob
 import logging
 import os
 import random
-import sys

 import numpy as np
 import torch
@@ -104,12 +103,7 @@ class InputFeatures(object):

 def read_swag_examples(input_file, is_training=True):
    with open(input_file, "r", encoding="utf-8") as f:
-        reader = csv.reader(f)
-        lines = []
-        for line in reader:
-            if sys.version_info[0] == 2:
-                line = list(unicode(cell, "utf-8") for cell in line)  # noqa: F821
-            lines.append(line)
+        lines = list(csv.reader(f))

    if is_training and lines[0][-1] != "label":
        raise ValueError("For training, the input file must contain a label column.")
@@ -347,7 +341,7 @@ def train(args, train_dataset, model, tokenizer):
    tr_loss, logging_loss = 0.0, 0.0
    model.zero_grad()
    train_iterator = trange(int(args.num_train_epochs), desc="Epoch", disable=args.local_rank not in [-1, 0])
-    set_seed(args)  # Added here for reproductibility (even between python 2 and 3)
+    set_seed(args)  # Added here for reproductibility
    for _ in train_iterator:
        epoch_iterator = tqdm(train_dataloader, desc="Iteration", disable=args.local_rank not in [-1, 0])
        for step, batch in enumerate(epoch_iterator):

--- a/examples/contrib/run_transfo_xl.py
+++ b/examples/contrib/run_transfo_xl.py
@@ -19,7 +19,7 @@

    This script with default values evaluates a pretrained Transformer-XL on WikiText 103
 """
-from __future__ import absolute_import, division, print_function, unicode_literals
+

 import argparse
 import logging

--- a/examples/distillation/run_squad_w_distillation.py
+++ b/examples/distillation/run_squad_w_distillation.py
@@ -15,7 +15,6 @@
 # limitations under the License.
 """ This is the exact same script as `examples/run_squad.py` (as of 2019, October 4th) with an additional and optional step of distillation."""

-from __future__ import absolute_import, division, print_function

 import argparse
 import glob
@@ -160,7 +159,7 @@ def train(args, train_dataset, model, tokenizer, teacher=None):
    tr_loss, logging_loss = 0.0, 0.0
    model.zero_grad()
    train_iterator = trange(int(args.num_train_epochs), desc="Epoch", disable=args.local_rank not in [-1, 0])
-    set_seed(args)  # Added here for reproductibility (even between python 2 and 3)
+    set_seed(args)  # Added here for reproductibility
    for _ in train_iterator:
        epoch_iterator = tqdm(train_dataloader, desc="Iteration", disable=args.local_rank not in [-1, 0])
        for step, batch in enumerate(epoch_iterator):

--- a/examples/mm-imdb/run_mmimdb.py
+++ b/examples/mm-imdb/run_mmimdb.py
@@ -15,7 +15,6 @@
 # limitations under the License.
 """ Finetuning the library models for multimodal multiclass prediction on MM-IMDB dataset."""

-from __future__ import absolute_import, division, print_function

 import argparse
 import glob
@@ -165,7 +164,7 @@ def train(args, train_dataset, model, tokenizer, criterion):
    best_f1, n_no_improve = 0, 0
    model.zero_grad()
    train_iterator = trange(int(args.num_train_epochs), desc="Epoch", disable=args.local_rank not in [-1, 0])
-    set_seed(args)  # Added here for reproductibility (even between python 2 and 3)
+    set_seed(args)  # Added here for reproductibility
    for _ in train_iterator:
        epoch_iterator = tqdm(train_dataloader, desc="Iteration", disable=args.local_rank not in [-1, 0])
        for step, batch in enumerate(epoch_iterator):

--- a/examples/run_generation.py
+++ b/examples/run_generation.py
@@ -16,7 +16,7 @@
 # limitations under the License.
 """ Conditional text generation with the auto-regressive models of the library (GPT/GPT-2/CTRL/Transformer-XL/XLNet)
 """
-from __future__ import absolute_import, division, print_function, unicode_literals
+

 import argparse
 import logging

--- a/examples/run_glue.py
+++ b/examples/run_glue.py
@@ -15,7 +15,6 @@
 # limitations under the License.
 """ Finetuning the library models for sequence classification on GLUE (Bert, XLM, XLNet, RoBERTa)."""

-from __future__ import absolute_import, division, print_function

 import argparse
 import glob
@@ -186,7 +185,7 @@ def train(args, train_dataset, model, tokenizer):
    train_iterator = trange(
        epochs_trained, int(args.num_train_epochs), desc="Epoch", disable=args.local_rank not in [-1, 0]
    )
-    set_seed(args)  # Added here for reproductibility (even between python 2 and 3)
+    set_seed(args)  # Added here for reproductibility
    for _ in train_iterator:
        epoch_iterator = tqdm(train_dataloader, desc="Iteration", disable=args.local_rank not in [-1, 0])
        for step, batch in enumerate(epoch_iterator):

--- a/examples/run_lm_finetuning.py
+++ b/examples/run_lm_finetuning.py
@@ -19,7 +19,6 @@ GPT and GPT-2 are fine-tuned using a causal language modeling (CLM) loss while B
 using a masked language modeling (MLM) loss.
 """

-from __future__ import absolute_import, division, print_function

 import argparse
 import glob
@@ -282,7 +281,7 @@ def train(args, train_dataset, model, tokenizer):
    train_iterator = trange(
        epochs_trained, int(args.num_train_epochs), desc="Epoch", disable=args.local_rank not in [-1, 0]
    )
-    set_seed(args)  # Added here for reproducibility (even between python 2 and 3)
+    set_seed(args)  # Added here for reproducibility
    for _ in train_iterator:
        epoch_iterator = tqdm(train_dataloader, desc="Iteration", disable=args.local_rank not in [-1, 0])
        for step, batch in enumerate(epoch_iterator):

--- a/examples/run_multiple_choice.py
+++ b/examples/run_multiple_choice.py
@@ -15,7 +15,6 @@
 # limitations under the License.
 """ Finetuning the library models for multiple choice (Bert, Roberta, XLNet)."""

-from __future__ import absolute_import, division, print_function

 import argparse
 import glob
@@ -146,7 +145,7 @@ def train(args, train_dataset, model, tokenizer):
    best_steps = 0
    model.zero_grad()
    train_iterator = trange(int(args.num_train_epochs), desc="Epoch", disable=args.local_rank not in [-1, 0])
-    set_seed(args)  # Added here for reproductibility (even between python 2 and 3)
+    set_seed(args)  # Added here for reproductibility
    for _ in train_iterator:
        epoch_iterator = tqdm(train_dataloader, desc="Iteration", disable=args.local_rank not in [-1, 0])
        for step, batch in enumerate(epoch_iterator):

--- a/examples/run_ner.py
+++ b/examples/run_ner.py
@@ -15,7 +15,6 @@
 # limitations under the License.
 """ Fine-tuning the library models for named entity recognition on CoNLL-2003 (Bert or Roberta). """

-from __future__ import absolute_import, division, print_function

 import argparse
 import glob
@@ -170,7 +169,7 @@ def train(args, train_dataset, model, tokenizer, labels, pad_token_label_id):
    train_iterator = trange(
        epochs_trained, int(args.num_train_epochs), desc="Epoch", disable=args.local_rank not in [-1, 0]
    )
-    set_seed(args)  # Added here for reproductibility (even between python 2 and 3)
+    set_seed(args)  # Added here for reproductibility
    for _ in train_iterator:
        epoch_iterator = tqdm(train_dataloader, desc="Iteration", disable=args.local_rank not in [-1, 0])
        for step, batch in enumerate(epoch_iterator):

--- a/examples/run_squad.py
+++ b/examples/run_squad.py
@@ -15,7 +15,6 @@
 # limitations under the License.
 """ Finetuning the library models for question-answering on SQuAD (DistilBERT, Bert, XLM, XLNet)."""

-from __future__ import absolute_import, division, print_function

 import argparse
 import glob
@@ -186,7 +185,7 @@ def train(args, train_dataset, model, tokenizer):
    train_iterator = trange(
        epochs_trained, int(args.num_train_epochs), desc="Epoch", disable=args.local_rank not in [-1, 0]
    )
-    # Added here for reproductibility (even between python 2 and 3)
+    # Added here for reproductibility
    set_seed(args)

    for _ in train_iterator:

--- a/examples/run_xnli.py
+++ b/examples/run_xnli.py
@@ -16,7 +16,6 @@
 """ Finetuning multi-lingual models on XNLI (Bert, DistilBERT, XLM).
    Adapted from `examples/run_glue.py`"""

-from __future__ import absolute_import, division, print_function

 import argparse
 import glob
@@ -165,7 +164,7 @@ def train(args, train_dataset, model, tokenizer):
    train_iterator = trange(
        epochs_trained, int(args.num_train_epochs), desc="Epoch", disable=args.local_rank not in [-1, 0]
    )
-    set_seed(args)  # Added here for reproductibility (even between python 2 and 3)
+    set_seed(args)  # Added here for reproductibility
    for _ in train_iterator:
        epoch_iterator = tqdm(train_dataloader, desc="Iteration", disable=args.local_rank not in [-1, 0])
        for step, batch in enumerate(epoch_iterator):

--- a/examples/summarization/utils_summarization.py
+++ b/examples/summarization/utils_summarization.py
@@ -94,7 +94,7 @@ def process_story(raw_story):


 def _add_missing_period(line):
-    END_TOKENS = [".", "!", "?", "...", "'", "`", '"', u"\u2019", u"\u2019", ")"]
+    END_TOKENS = [".", "!", "?", "...", "'", "`", '"', "\u2019", "\u2019", ")"]
    if line.startswith("@highlight"):
        return line
    if line[-1] in END_TOKENS:

--- a/examples/test_examples.py
+++ b/examples/test_examples.py
@@ -12,25 +12,19 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import absolute_import, division, print_function
+

 import argparse
 import logging
 import sys
 import unittest
+from unittest.mock import patch

 import run_generation
 import run_glue
 import run_squad


-try:
-    # python 3.4+ can use builtin unittest.mock instead of mock package
-    from unittest.mock import patch
-except ImportError:
-    from mock import patch
-
-
 logging.basicConfig(level=logging.DEBUG)

 logger = logging.getLogger()

--- a/examples/utils_multiple_choice.py
+++ b/examples/utils_multiple_choice.py
@@ -15,15 +15,12 @@
 # limitations under the License.
 """ Multiple choice fine-tuning: utilities to work with multiple choice tasks of reading comprehension  """

-from __future__ import absolute_import, division, print_function

 import csv
 import glob
 import json
 import logging
 import os
-import sys
-from io import open
 from typing import List

 import tqdm
@@ -180,13 +177,7 @@ class SwagProcessor(DataProcessor):

    def _read_csv(self, input_file):
        with open(input_file, "r", encoding="utf-8") as f:
-            reader = csv.reader(f)
-            lines = []
-            for line in reader:
-                if sys.version_info[0] == 2:
-                    line = list(unicode(cell, "utf-8") for cell in line)  # noqa: F821
-                lines.append(line)
-            return lines
+            return list(csv.reader(f))

    def _create_examples(self, lines: List[List[str]], type: str):
        """Creates examples for the training and dev sets."""

--- a/examples/utils_ner.py
+++ b/examples/utils_ner.py
@@ -15,11 +15,9 @@
 # limitations under the License.
 """ Named entity recognition fine-tuning: utilities to work with CoNLL-2003 task. """

-from __future__ import absolute_import, division, print_function

 import logging
 import os
-from io import open


 logger = logging.getLogger(__name__)

--- a/setup.py
+++ b/setup.py
@@ -14,7 +14,7 @@ To create the package for pypi.
   creating the wheel and the source distribution (obviously).

   For the wheel, run: "python setup.py bdist_wheel" in the top level directory.
-   (this will build a wheel for the python version you use to build it - make sure you use python 3.x).
+   (this will build a wheel for the python version you use to build it).

   For the sources, run: "python setup.py sdist"
   You should now have a /dist directory with both .whl and .tar.gz source versions.
@@ -33,7 +33,6 @@ To create the package for pypi.
 7. Copy the release notes from RELEASE.md to the tag in github once everything is looking hunky-dory.

 """
-from io import open

 from setuptools import find_packages, setup


--- a/src/transformers/commands/user.py
+++ b/src/transformers/commands/user.py
@@ -34,8 +34,8 @@ class ANSI:
    Helper for en.wikipedia.org/wiki/ANSI_escape_code
    """

-    _bold = u"\u001b[1m"
-    _reset = u"\u001b[0m"
+    _bold = "\u001b[1m"
+    _reset = "\u001b[0m"

    @classmethod
    def bold(cls, s):