Unverified Commit ce50305e authored by Aymeric Augustin's avatar Aymeric Augustin Committed by GitHub
Browse files

Merge pull request #2270 from aaugustin/remove-python-2

Remove support for Python 2
parents b6ea0f43 1a948d70
......@@ -14,7 +14,6 @@
# limitations under the License.
"""Convert RoBERTa checkpoint."""
from __future__ import absolute_import, division, print_function
import argparse
import logging
......
......@@ -14,7 +14,6 @@
# limitations under the License.
"""Convert T5 checkpoint."""
from __future__ import absolute_import, division, print_function
import argparse
import logging
......
......@@ -14,13 +14,12 @@
# limitations under the License.
"""Convert Transformer XL checkpoint and datasets."""
from __future__ import absolute_import, division, print_function
import argparse
import logging
import os
import pickle
import sys
from io import open
import torch
......@@ -35,12 +34,6 @@ from transformers import (
from transformers.tokenization_transfo_xl import CORPUS_NAME, VOCAB_FILES_NAMES
if sys.version_info[0] == 2:
import cPickle as pickle
else:
import pickle
logging.basicConfig(level=logging.INFO)
# We do this to be able to load python 2 datasets pickles
......
......@@ -14,12 +14,10 @@
# limitations under the License.
"""Convert OpenAI GPT checkpoint."""
from __future__ import absolute_import, division, print_function
import argparse
import json
import logging
from io import open
import numpy
import torch
......
......@@ -14,7 +14,6 @@
# limitations under the License.
"""Convert BERT checkpoint."""
from __future__ import absolute_import, division, print_function
import argparse
import logging
......
......@@ -14,7 +14,6 @@ import logging
import math
import re
import string
from io import open
from transformers.tokenization_bert import BasicTokenizer
......
......@@ -18,7 +18,6 @@ import copy
import csv
import json
import logging
import sys
from ...file_utils import is_tf_available, is_torch_available
......@@ -98,13 +97,7 @@ class DataProcessor(object):
def _read_tsv(cls, input_file, quotechar=None):
"""Reads a tab separated value file."""
with open(input_file, "r", encoding="utf-8-sig") as f:
reader = csv.reader(f, delimiter="\t", quotechar=quotechar)
lines = []
for line in reader:
if sys.version_info[0] == 2:
line = list(unicode(cell, "utf-8") for cell in line) # noqa: F821
lines.append(line)
return lines
return list(csv.reader(f, delimiter="\t", quotechar=quotechar))
class SingleSentenceClassificationProcessor(DataProcessor):
......
......@@ -15,7 +15,6 @@
# limitations under the License.
""" XNLI utils (dataset loading and evaluation) """
from __future__ import absolute_import, division, print_function
import logging
import os
......
......@@ -3,7 +3,7 @@ Utilities for working with the local dataset cache.
This file is adapted from the AllenNLP library at https://github.com/allenai/allennlp
Copyright by the AllenNLP authors.
"""
from __future__ import absolute_import, division, print_function, unicode_literals
import fnmatch
import json
......@@ -14,11 +14,10 @@ import tempfile
from contextlib import contextmanager
from functools import partial, wraps
from hashlib import sha256
from io import open
from urllib.parse import urlparse
import boto3
import requests
import six
from botocore.config import Config
from botocore.exceptions import ClientError
from filelock import FileLock
......@@ -66,10 +65,6 @@ except ImportError:
)
default_cache_path = os.path.join(torch_cache_home, "transformers")
try:
from urllib.parse import urlparse
except ImportError:
from urlparse import urlparse
try:
from pathlib import Path
......@@ -107,36 +102,20 @@ def is_tf_available():
return _tf_available
if not six.PY2:
def add_start_docstrings(*docstr):
def docstring_decorator(fn):
fn.__doc__ = "".join(docstr) + fn.__doc__
return fn
return docstring_decorator
def add_start_docstrings(*docstr):
def docstring_decorator(fn):
fn.__doc__ = "".join(docstr) + fn.__doc__
return fn
def add_end_docstrings(*docstr):
def docstring_decorator(fn):
fn.__doc__ = fn.__doc__ + "".join(docstr)
return fn
return docstring_decorator
return docstring_decorator
def add_end_docstrings(*docstr):
def docstring_decorator(fn):
fn.__doc__ = fn.__doc__ + "".join(docstr)
return fn
else:
# Not possible to update class docstrings on python2
def add_start_docstrings(*docstr):
def docstring_decorator(fn):
return fn
return docstring_decorator
def add_end_docstrings(*docstr):
def docstring_decorator(fn):
return fn
return docstring_decorator
return docstring_decorator
def is_remote_url(url_or_filename):
......@@ -183,7 +162,7 @@ def filename_to_url(filename, cache_dir=None):
"""
if cache_dir is None:
cache_dir = TRANSFORMERS_CACHE
if sys.version_info[0] == 3 and isinstance(cache_dir, Path):
if isinstance(cache_dir, Path):
cache_dir = str(cache_dir)
cache_path = os.path.join(cache_dir, filename)
......@@ -218,9 +197,9 @@ def cached_path(
"""
if cache_dir is None:
cache_dir = TRANSFORMERS_CACHE
if sys.version_info[0] == 3 and isinstance(url_or_filename, Path):
if isinstance(url_or_filename, Path):
url_or_filename = str(url_or_filename)
if sys.version_info[0] == 3 and isinstance(cache_dir, Path):
if isinstance(cache_dir, Path):
cache_dir = str(cache_dir)
if is_remote_url(url_or_filename):
......@@ -297,7 +276,7 @@ def http_get(url, temp_file, proxies=None, resume_size=0, user_agent=None):
ua = "transformers/{}; python/{}".format(__version__, sys.version.split()[0])
if isinstance(user_agent, dict):
ua += "; " + "; ".join("{}/{}".format(k, v) for k, v in user_agent.items())
elif isinstance(user_agent, six.string_types):
elif isinstance(user_agent, str):
ua += "; " + user_agent
headers = {"user-agent": ua}
if resume_size > 0:
......@@ -331,9 +310,7 @@ def get_from_cache(
"""
if cache_dir is None:
cache_dir = TRANSFORMERS_CACHE
if sys.version_info[0] == 3 and isinstance(cache_dir, Path):
cache_dir = str(cache_dir)
if sys.version_info[0] == 2 and not isinstance(cache_dir, str):
if isinstance(cache_dir, Path):
cache_dir = str(cache_dir)
if not os.path.exists(cache_dir):
......@@ -352,8 +329,6 @@ def get_from_cache(
except (EnvironmentError, requests.exceptions.Timeout):
etag = None
if sys.version_info[0] == 2 and etag is not None:
etag = etag.decode("utf-8")
filename = url_to_filename(url, etag)
# get cache path to put the file
......@@ -417,9 +392,6 @@ def get_from_cache(
meta = {"url": url, "etag": etag}
meta_path = cache_path + ".json"
with open(meta_path, "w") as meta_file:
output_string = json.dumps(meta)
if sys.version_info[0] == 2 and isinstance(output_string, str):
output_string = unicode(output_string, "utf-8") # noqa: F821
meta_file.write(output_string)
json.dump(meta, meta_file)
return cache_path
......@@ -12,7 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import, division, print_function
import io
import os
......@@ -20,7 +20,6 @@ from os.path import expanduser
from typing import List
import requests
import six
from tqdm import tqdm
......@@ -28,14 +27,7 @@ ENDPOINT = "https://huggingface.co"
class S3Obj:
def __init__(
self,
filename, # type: str
LastModified, # type: str
ETag, # type: str
Size, # type: int
**kwargs
):
def __init__(self, filename: str, LastModified: str, ETag: str, Size: int, **kwargs):
self.filename = filename
self.LastModified = LastModified
self.ETag = ETag
......@@ -43,13 +35,7 @@ class S3Obj:
class PresignedUrl:
def __init__(
self,
write, # type: str
access, # type: str
type, # type: str
**kwargs
):
def __init__(self, write: str, access: str, type: str, **kwargs):
self.write = write
self.access = access
self.type = type # mime-type to send to S3.
......@@ -59,12 +45,7 @@ class HfApi:
def __init__(self, endpoint=None):
self.endpoint = endpoint if endpoint is not None else ENDPOINT
def login(
self,
username, # type: str
password, # type: str
):
# type: (...) -> str
def login(self, username: str, password: str) -> str:
"""
Call HF API to sign in a user and get a token if credentials are valid.
......@@ -80,10 +61,7 @@ class HfApi:
d = r.json()
return d["token"]
def whoami(
self, token, # type: str
):
# type: (...) -> str
def whoami(self, token: str) -> str:
"""
Call HF API to know "whoami"
"""
......@@ -93,8 +71,7 @@ class HfApi:
d = r.json()
return d["user"]
def logout(self, token):
# type: (...) -> None
def logout(self, token: str) -> None:
"""
Call HF API to log out.
"""
......@@ -102,19 +79,17 @@ class HfApi:
r = requests.post(path, headers={"authorization": "Bearer {}".format(token)})
r.raise_for_status()
def presign(self, token, filename):
# type: (...) -> PresignedUrl
def presign(self, token: str, filename) -> PresignedUrl:
"""
Call HF API to get a presigned url to upload `filename` to S3.
"""
path = "{}/api/presign".format(self.endpoint)
r = requests.post(path, headers={"authorization": "Bearer {}".format(token)}, json={"filename": filename},)
r = requests.post(path, headers={"authorization": "Bearer {}".format(token)}, json={"filename": filename})
r.raise_for_status()
d = r.json()
return PresignedUrl(**d)
def presign_and_upload(self, token, filename, filepath):
# type: (...) -> str
def presign_and_upload(self, token: str, filename, filepath) -> str:
"""
Get a presigned url, then upload file to S3.
......@@ -158,13 +133,10 @@ class TqdmProgressFileReader:
def __init__(self, f: io.BufferedReader):
self.f = f
self.total_size = os.fstat(f.fileno()).st_size # type: int
self.total_size = os.fstat(f.fileno()).st_size
self.pbar = tqdm(total=self.total_size, leave=False)
if six.PY3:
# does not work unless PY3
# no big deal as the CLI does not currently support PY2 anyways.
self.read = f.read
f.read = self._read
self.read = f.read
f.read = self._read
def _read(self, n=-1):
self.pbar.update(n)
......@@ -182,16 +154,7 @@ class HfFolder:
"""
Save token, creating folder as needed.
"""
if six.PY3:
os.makedirs(os.path.dirname(cls.path_token), exist_ok=True)
else:
# Python 2
try:
os.makedirs(os.path.dirname(cls.path_token))
except OSError as e:
if e.errno != os.errno.EEXIST:
raise e
pass
os.makedirs(os.path.dirname(cls.path_token), exist_ok=True)
with open(cls.path_token, "w+") as f:
f.write(token)
......
......@@ -14,13 +14,11 @@
# limitations under the License.
""" Configuration base class and utilities."""
from __future__ import absolute_import, division, print_function, unicode_literals
import copy
import json
import logging
import os
from io import open
from .configuration_auto import ALL_PRETRAINED_CONFIG_ARCHIVE_MAP
from .file_utils import (
......
......@@ -14,7 +14,6 @@
# limitations under the License.
""" Auto Model class. """
from __future__ import absolute_import, division, print_function, unicode_literals
import logging
......
......@@ -15,12 +15,10 @@
# limitations under the License.
"""PyTorch BERT model. """
from __future__ import absolute_import, division, print_function, unicode_literals
import logging
import math
import os
import sys
import torch
from torch import nn
......@@ -339,9 +337,7 @@ class BertIntermediate(nn.Module):
def __init__(self, config):
super(BertIntermediate, self).__init__()
self.dense = nn.Linear(config.hidden_size, config.intermediate_size)
if isinstance(config.hidden_act, str) or (
sys.version_info[0] == 2 and isinstance(config.hidden_act, unicode) # noqa: F821
):
if isinstance(config.hidden_act, str):
self.intermediate_act_fn = ACT2FN[config.hidden_act]
else:
self.intermediate_act_fn = config.hidden_act
......@@ -461,9 +457,7 @@ class BertPredictionHeadTransform(nn.Module):
def __init__(self, config):
super(BertPredictionHeadTransform, self).__init__()
self.dense = nn.Linear(config.hidden_size, config.hidden_size)
if isinstance(config.hidden_act, str) or (
sys.version_info[0] == 2 and isinstance(config.hidden_act, unicode) # noqa: F821
):
if isinstance(config.hidden_act, str):
self.transform_act_fn = ACT2FN[config.hidden_act]
else:
self.transform_act_fn = config.hidden_act
......
......@@ -15,7 +15,6 @@
# limitations under the License.
"""PyTorch CamemBERT model. """
from __future__ import absolute_import, division, print_function, unicode_literals
import logging
......
......@@ -15,7 +15,6 @@
# limitations under the License.
""" PyTorch CTRL model."""
from __future__ import absolute_import, division, print_function, unicode_literals
import logging
......
......@@ -16,7 +16,7 @@
adapted in part from Facebook, Inc XLM model (https://github.com/facebookresearch/XLM)
and in part from HuggingFace PyTorch version of Google AI Bert model (https://github.com/google-research/bert)
"""
from __future__ import absolute_import, division, print_function, unicode_literals
import copy
import logging
......
......@@ -14,7 +14,6 @@
# limitations under the License.
""" Classes to support Encoder-Decoder architectures """
from __future__ import absolute_import, division, print_function, unicode_literals
import logging
import os
......
......@@ -15,7 +15,6 @@
# limitations under the License.
"""PyTorch OpenAI GPT-2 model."""
from __future__ import absolute_import, division, print_function, unicode_literals
import logging
import math
......
......@@ -15,7 +15,6 @@
# limitations under the License.
"""PyTorch MMBT model. """
from __future__ import absolute_import, division, print_function, unicode_literals
import logging
......
......@@ -15,13 +15,11 @@
# limitations under the License.
"""PyTorch OpenAI GPT model."""
from __future__ import absolute_import, division, print_function, unicode_literals
import json
import logging
import math
import os
from io import open
import torch
import torch.nn as nn
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment