Merge pull request #2270 from aaugustin/remove-python-2

Remove support for Python 2

Merge pull request #2270 from aaugustin/remove-python-2
Remove support for Python 2
ce50305e · Aymeric Augustin · GitHub · b6ea0f43 · 1a948d70 · ce50305e
Unverified Commit ce50305e authored Dec 22, 2019 by Aymeric Augustin Committed by GitHub Dec 22, 2019
20 changed files
--- a/src/transformers/convert_roberta_original_pytorch_checkpoint_to_pytorch.py
+++ b/src/transformers/convert_roberta_original_pytorch_checkpoint_to_pytorch.py
@@ -14,7 +14,6 @@
 # limitations under the License.
 """Convert RoBERTa checkpoint."""
-from __future__ import absolute_import, division, print_function
 import argparse
 import logging

--- a/src/transformers/convert_t5_original_tf_checkpoint_to_pytorch.py
+++ b/src/transformers/convert_t5_original_tf_checkpoint_to_pytorch.py
@@ -14,7 +14,6 @@
 # limitations under the License.
 """Convert T5 checkpoint."""
-from __future__ import absolute_import, division, print_function
 import argparse
 import logging

--- a/src/transformers/convert_transfo_xl_original_tf_checkpoint_to_pytorch.py
+++ b/src/transformers/convert_transfo_xl_original_tf_checkpoint_to_pytorch.py
@@ -14,13 +14,12 @@
 # limitations under the License.
 """Convert Transformer XL checkpoint and datasets."""
-from __future__ import absolute_import, division, print_function
 import argparse
 import logging
 import os
+import pickle
 import sys
-from io import open
 import torch
@@ -35,12 +34,6 @@ from transformers import (
 from transformers.tokenization_transfo_xl import CORPUS_NAME, VOCAB_FILES_NAMES
-if sys.version_info[0] == 2:
-    import cPickle as pickle
-else:
-    import pickle
 logging.basicConfig(level=logging.INFO)
 # We do this to be able to load python 2 datasets pickles

--- a/src/transformers/convert_xlm_original_pytorch_checkpoint_to_pytorch.py
+++ b/src/transformers/convert_xlm_original_pytorch_checkpoint_to_pytorch.py
@@ -14,12 +14,10 @@
 # limitations under the License.
 """Convert OpenAI GPT checkpoint."""
-from __future__ import absolute_import, division, print_function
 import argparse
 import json
 import logging
-from io import open
 import numpy
 import torch

--- a/src/transformers/convert_xlnet_original_tf_checkpoint_to_pytorch.py
+++ b/src/transformers/convert_xlnet_original_tf_checkpoint_to_pytorch.py
@@ -14,7 +14,6 @@
 # limitations under the License.
 """Convert BERT checkpoint."""
-from __future__ import absolute_import, division, print_function
 import argparse
 import logging

--- a/src/transformers/data/metrics/squad_metrics.py
+++ b/src/transformers/data/metrics/squad_metrics.py
@@ -14,7 +14,6 @@ import logging
 import math
 import re
 import string
-from io import open
 from transformers.tokenization_bert import BasicTokenizer

--- a/src/transformers/data/processors/utils.py
+++ b/src/transformers/data/processors/utils.py
@@ -18,7 +18,6 @@ import copy
 import csv
 import json
 import logging
-import sys
 from ...file_utils import is_tf_available, is_torch_available
@@ -98,13 +97,7 @@ class DataProcessor(object):
    def _read_tsv(cls, input_file, quotechar=None):
        """Reads a tab separated value file."""
        with open(input_file, "r", encoding="utf-8-sig") as f:
-            reader = csv.reader(f, delimiter="\t", quotechar=quotechar)
+            return list(csv.reader(f, delimiter="\t", quotechar=quotechar))
-            lines = []
-            for line in reader:
-                if sys.version_info[0] == 2:
-                    line = list(unicode(cell, "utf-8") for cell in line)  # noqa: F821
-                lines.append(line)
-            return lines
 class SingleSentenceClassificationProcessor(DataProcessor):

--- a/src/transformers/data/processors/xnli.py
+++ b/src/transformers/data/processors/xnli.py
@@ -15,7 +15,6 @@
 # limitations under the License.
 """ XNLI utils (dataset loading and evaluation) """
-from __future__ import absolute_import, division, print_function
 import logging
 import os

--- a/src/transformers/file_utils.py
+++ b/src/transformers/file_utils.py
@@ -3,7 +3,7 @@ Utilities for working with the local dataset cache.
 This file is adapted from the AllenNLP library at https://github.com/allenai/allennlp
 Copyright by the AllenNLP authors.
 """
-from __future__ import absolute_import, division, print_function, unicode_literals
 import fnmatch
 import json
@@ -14,11 +14,10 @@ import tempfile
 from contextlib import contextmanager
 from functools import partial, wraps
 from hashlib import sha256
-from io import open
+from urllib.parse import urlparse
 import boto3
 import requests
-import six
 from botocore.config import Config
 from botocore.exceptions import ClientError
 from filelock import FileLock
@@ -66,10 +65,6 @@ except ImportError:
    )
 default_cache_path = os.path.join(torch_cache_home, "transformers")
-try:
-    from urllib.parse import urlparse
-except ImportError:
-    from urlparse import urlparse
 try:
    from pathlib import Path
@@ -107,36 +102,20 @@ def is_tf_available():
    return _tf_available
-if not six.PY2:
+def add_start_docstrings(*docstr):
+    def docstring_decorator(fn):
-    def add_start_docstrings(*docstr):
+        fn.__doc__ = "".join(docstr) + fn.__doc__
-        def docstring_decorator(fn):
+        return fn
-            fn.__doc__ = "".join(docstr) + fn.__doc__
-            return fn
-        return docstring_decorator
-    def add_end_docstrings(*docstr):
+    return docstring_decorator
-        def docstring_decorator(fn):
-            fn.__doc__ = fn.__doc__ + "".join(docstr)
-            return fn
-        return docstring_decorator
+def add_end_docstrings(*docstr):
+    def docstring_decorator(fn):
+        fn.__doc__ = fn.__doc__ + "".join(docstr)
+        return fn
-else:
+    return docstring_decorator
-    # Not possible to update class docstrings on python2
-    def add_start_docstrings(*docstr):
-        def docstring_decorator(fn):
-            return fn
-        return docstring_decorator
-    def add_end_docstrings(*docstr):
-        def docstring_decorator(fn):
-            return fn
-        return docstring_decorator
 def is_remote_url(url_or_filename):
@@ -183,7 +162,7 @@ def filename_to_url(filename, cache_dir=None):
    """
    if cache_dir is None:
        cache_dir = TRANSFORMERS_CACHE
-    if sys.version_info[0] == 3 and isinstance(cache_dir, Path):
+    if isinstance(cache_dir, Path):
        cache_dir = str(cache_dir)
    cache_path = os.path.join(cache_dir, filename)
@@ -218,9 +197,9 @@ def cached_path(
    """
    if cache_dir is None:
        cache_dir = TRANSFORMERS_CACHE
-    if sys.version_info[0] == 3 and isinstance(url_or_filename, Path):
+    if isinstance(url_or_filename, Path):
        url_or_filename = str(url_or_filename)
-    if sys.version_info[0] == 3 and isinstance(cache_dir, Path):
+    if isinstance(cache_dir, Path):
        cache_dir = str(cache_dir)
    if is_remote_url(url_or_filename):
@@ -297,7 +276,7 @@ def http_get(url, temp_file, proxies=None, resume_size=0, user_agent=None):
    ua = "transformers/{}; python/{}".format(__version__, sys.version.split()[0])
    if isinstance(user_agent, dict):
        ua += "; " + "; ".join("{}/{}".format(k, v) for k, v in user_agent.items())
-    elif isinstance(user_agent, six.string_types):
+    elif isinstance(user_agent, str):
        ua += "; " + user_agent
    headers = {"user-agent": ua}
    if resume_size > 0:
@@ -331,9 +310,7 @@ def get_from_cache(
    """
    if cache_dir is None:
        cache_dir = TRANSFORMERS_CACHE
-    if sys.version_info[0] == 3 and isinstance(cache_dir, Path):
+    if isinstance(cache_dir, Path):
-        cache_dir = str(cache_dir)
-    if sys.version_info[0] == 2 and not isinstance(cache_dir, str):
        cache_dir = str(cache_dir)
    if not os.path.exists(cache_dir):
@@ -352,8 +329,6 @@ def get_from_cache(
        except (EnvironmentError, requests.exceptions.Timeout):
            etag = None
-    if sys.version_info[0] == 2 and etag is not None:
-        etag = etag.decode("utf-8")
    filename = url_to_filename(url, etag)
    # get cache path to put the file
@@ -417,9 +392,6 @@ def get_from_cache(
                meta = {"url": url, "etag": etag}
                meta_path = cache_path + ".json"
                with open(meta_path, "w") as meta_file:
-                    output_string = json.dumps(meta)
+                    json.dump(meta, meta_file)
-                    if sys.version_info[0] == 2 and isinstance(output_string, str):
-                        output_string = unicode(output_string, "utf-8")  # noqa: F821
-                    meta_file.write(output_string)
    return cache_path
--- a/src/transformers/hf_api.py
+++ b/src/transformers/hf_api.py
@@ -12,7 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from __future__ import absolute_import, division, print_function
 import io
 import os
@@ -20,7 +20,6 @@ from os.path import expanduser
 from typing import List
 import requests
-import six
 from tqdm import tqdm
@@ -28,14 +27,7 @@ ENDPOINT = "https://huggingface.co"
 class S3Obj:
-    def __init__(
+    def __init__(self, filename: str, LastModified: str, ETag: str, Size: int, **kwargs):
-        self,
-        filename,  # type: str
-        LastModified,  # type: str
-        ETag,  # type: str
-        Size,  # type: int
-        **kwargs
-    ):
        self.filename = filename
        self.LastModified = LastModified
        self.ETag = ETag
@@ -43,13 +35,7 @@ class S3Obj:
 class PresignedUrl:
-    def __init__(
+    def __init__(self, write: str, access: str, type: str, **kwargs):
-        self,
-        write,  # type: str
-        access,  # type: str
-        type,  # type: str
-        **kwargs
-    ):
        self.write = write
        self.access = access
        self.type = type  # mime-type to send to S3.
@@ -59,12 +45,7 @@ class HfApi:
    def __init__(self, endpoint=None):
        self.endpoint = endpoint if endpoint is not None else ENDPOINT
-    def login(
+    def login(self, username: str, password: str) -> str:
-        self,
-        username,  # type: str
-        password,  # type: str
-    ):
-        # type: (...) -> str
        """
        Call HF API to sign in a user and get a token if credentials are valid.
@@ -80,10 +61,7 @@ class HfApi:
        d = r.json()
        return d["token"]
-    def whoami(
+    def whoami(self, token: str) -> str:
-        self, token,  # type: str
-    ):
-        # type: (...) -> str
        """
        Call HF API to know "whoami"
        """
@@ -93,8 +71,7 @@ class HfApi:
        d = r.json()
        return d["user"]
-    def logout(self, token):
+    def logout(self, token: str) -> None:
-        # type: (...) -> None
        """
        Call HF API to log out.
        """
@@ -102,19 +79,17 @@ class HfApi:
        r = requests.post(path, headers={"authorization": "Bearer {}".format(token)})
        r.raise_for_status()
-    def presign(self, token, filename):
+    def presign(self, token: str, filename) -> PresignedUrl:
-        # type: (...) -> PresignedUrl
        """
        Call HF API to get a presigned url to upload `filename` to S3.
        """
        path = "{}/api/presign".format(self.endpoint)
-        r = requests.post(path, headers={"authorization": "Bearer {}".format(token)}, json={"filename": filename},)
+        r = requests.post(path, headers={"authorization": "Bearer {}".format(token)}, json={"filename": filename})
        r.raise_for_status()
        d = r.json()
        return PresignedUrl(**d)
-    def presign_and_upload(self, token, filename, filepath):
+    def presign_and_upload(self, token: str, filename, filepath) -> str:
-        # type: (...) -> str
        """
        Get a presigned url, then upload file to S3.
@@ -158,13 +133,10 @@ class TqdmProgressFileReader:
    def __init__(self, f: io.BufferedReader):
        self.f = f
-        self.total_size = os.fstat(f.fileno()).st_size  # type: int
+        self.total_size = os.fstat(f.fileno()).st_size
        self.pbar = tqdm(total=self.total_size, leave=False)
-        if six.PY3:
+        self.read = f.read
-            # does not work unless PY3
+        f.read = self._read
-            # no big deal as the CLI does not currently support PY2 anyways.
-            self.read = f.read
-            f.read = self._read
    def _read(self, n=-1):
        self.pbar.update(n)
@@ -182,16 +154,7 @@ class HfFolder:
        """
        Save token, creating folder as needed.
        """
-        if six.PY3:
+        os.makedirs(os.path.dirname(cls.path_token), exist_ok=True)
-            os.makedirs(os.path.dirname(cls.path_token), exist_ok=True)
-        else:
-            # Python 2
-            try:
-                os.makedirs(os.path.dirname(cls.path_token))
-            except OSError as e:
-                if e.errno != os.errno.EEXIST:
-                    raise e
-                pass
        with open(cls.path_token, "w+") as f:
            f.write(token)

--- a/src/transformers/modelcard.py
+++ b/src/transformers/modelcard.py
@@ -14,13 +14,11 @@
 # limitations under the License.
 """ Configuration base class and utilities."""
-from __future__ import absolute_import, division, print_function, unicode_literals
 import copy
 import json
 import logging
 import os
-from io import open
 from .configuration_auto import ALL_PRETRAINED_CONFIG_ARCHIVE_MAP
 from .file_utils import (

--- a/src/transformers/modeling_auto.py
+++ b/src/transformers/modeling_auto.py
@@ -14,7 +14,6 @@
 # limitations under the License.
 """ Auto Model class. """
-from __future__ import absolute_import, division, print_function, unicode_literals
 import logging

--- a/src/transformers/modeling_bert.py
+++ b/src/transformers/modeling_bert.py
@@ -15,12 +15,10 @@
 # limitations under the License.
 """PyTorch BERT model. """
-from __future__ import absolute_import, division, print_function, unicode_literals
 import logging
 import math
 import os
-import sys
 import torch
 from torch import nn
@@ -339,9 +337,7 @@ class BertIntermediate(nn.Module):
    def __init__(self, config):
        super(BertIntermediate, self).__init__()
        self.dense = nn.Linear(config.hidden_size, config.intermediate_size)
-        if isinstance(config.hidden_act, str) or (
+        if isinstance(config.hidden_act, str):
-            sys.version_info[0] == 2 and isinstance(config.hidden_act, unicode)  # noqa: F821
-        ):
            self.intermediate_act_fn = ACT2FN[config.hidden_act]
        else:
            self.intermediate_act_fn = config.hidden_act
@@ -461,9 +457,7 @@ class BertPredictionHeadTransform(nn.Module):
    def __init__(self, config):
        super(BertPredictionHeadTransform, self).__init__()
        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
-        if isinstance(config.hidden_act, str) or (
+        if isinstance(config.hidden_act, str):
-            sys.version_info[0] == 2 and isinstance(config.hidden_act, unicode)  # noqa: F821
-        ):
            self.transform_act_fn = ACT2FN[config.hidden_act]
        else:
            self.transform_act_fn = config.hidden_act

--- a/src/transformers/modeling_camembert.py
+++ b/src/transformers/modeling_camembert.py
@@ -15,7 +15,6 @@
 # limitations under the License.
 """PyTorch CamemBERT model. """
-from __future__ import absolute_import, division, print_function, unicode_literals
 import logging

--- a/src/transformers/modeling_ctrl.py
+++ b/src/transformers/modeling_ctrl.py
@@ -15,7 +15,6 @@
 # limitations under the License.
 """ PyTorch CTRL model."""
-from __future__ import absolute_import, division, print_function, unicode_literals
 import logging

--- a/src/transformers/modeling_distilbert.py
+++ b/src/transformers/modeling_distilbert.py
@@ -16,7 +16,7 @@
    adapted in part from Facebook, Inc XLM model (https://github.com/facebookresearch/XLM)
    and in part from HuggingFace PyTorch version of Google AI Bert model (https://github.com/google-research/bert)
 """
-from __future__ import absolute_import, division, print_function, unicode_literals
 import copy
 import logging

--- a/src/transformers/modeling_encoder_decoder.py
+++ b/src/transformers/modeling_encoder_decoder.py
@@ -14,7 +14,6 @@
 # limitations under the License.
 """ Classes to support Encoder-Decoder architectures """
-from __future__ import absolute_import, division, print_function, unicode_literals
 import logging
 import os

--- a/src/transformers/modeling_gpt2.py
+++ b/src/transformers/modeling_gpt2.py
@@ -15,7 +15,6 @@
 # limitations under the License.
 """PyTorch OpenAI GPT-2 model."""
-from __future__ import absolute_import, division, print_function, unicode_literals
 import logging
 import math

--- a/src/transformers/modeling_mmbt.py
+++ b/src/transformers/modeling_mmbt.py
@@ -15,7 +15,6 @@
 # limitations under the License.
 """PyTorch MMBT model. """
-from __future__ import absolute_import, division, print_function, unicode_literals
 import logging

--- a/src/transformers/modeling_openai.py
+++ b/src/transformers/modeling_openai.py
@@ -15,13 +15,11 @@
 # limitations under the License.
 """PyTorch OpenAI GPT model."""
-from __future__ import absolute_import, division, print_function, unicode_literals
 import json
 import logging
 import math
 import os
-from io import open
 import torch
 import torch.nn as nn