Unverified Commit ce50305e authored by Aymeric Augustin's avatar Aymeric Augustin Committed by GitHub
Browse files

Merge pull request #2270 from aaugustin/remove-python-2

Remove support for Python 2
parents b6ea0f43 1a948d70
version: 2
jobs:
run_tests_py3_torch_and_tf:
run_tests_torch_and_tf:
working_directory: ~/transformers
docker:
- image: circleci/python:3.5
......@@ -17,7 +17,7 @@ jobs:
- run: sudo pip install tensorboardX scikit-learn
- run: python -m pytest -n 8 --dist=loadfile -s -v ./tests/ --cov
- run: codecov
run_tests_py3_torch:
run_tests_torch:
working_directory: ~/transformers
docker:
- image: circleci/python:3.5
......@@ -33,7 +33,7 @@ jobs:
- run: sudo pip install tensorboardX scikit-learn
- run: python -m pytest -n 8 --dist=loadfile -s -v ./tests/ --cov
- run: codecov
run_tests_py3_tf:
run_tests_tf:
working_directory: ~/transformers
docker:
- image: circleci/python:3.5
......@@ -49,7 +49,7 @@ jobs:
- run: sudo pip install tensorboardX scikit-learn
- run: python -m pytest -n 8 --dist=loadfile -s -v ./tests/ --cov
- run: codecov
run_tests_py3_custom_tokenizers:
run_tests_custom_tokenizers:
working_directory: ~/transformers
docker:
- image: circleci/python:3.5
......@@ -59,7 +59,7 @@ jobs:
- run: sudo pip install pytest pytest-xdist
- run: sudo pip install mecab-python3
- run: RUN_CUSTOM_TOKENIZERS=1 python -m pytest -sv ./tests/test_tokenization_bert_japanese.py
run_examples_py3_torch:
run_examples_torch:
working_directory: ~/transformers
docker:
- image: circleci/python:3.5
......@@ -121,9 +121,9 @@ workflows:
jobs:
- check_code_quality
- check_repository_consistency
- run_examples_py3_torch
- run_tests_py3_custom_tokenizers
- run_tests_py3_torch_and_tf
- run_tests_py3_torch
- run_tests_py3_tf
- run_examples_torch
- run_tests_custom_tokenizers
- run_tests_torch_and_tf
- run_tests_torch
- run_tests_tf
- deploy_doc: *workflow_filters
......@@ -64,7 +64,7 @@ Choose the right framework for every part of a model's lifetime
## Installation
This repo is tested on Python 2.7 and 3.5+ (examples are tested only on python 3.5+), PyTorch 1.0.0+ and TensorFlow 2.0.0-rc1
This repo is tested on Python 3.5+, PyTorch 1.0.0+ and TensorFlow 2.0.0-rc1
### With pip
......
# Installation
Transformers is tested on Python 2.7 and 3.5+ (examples are tested only on python 3.5+) and PyTorch 1.1.0
Transformers is tested on Python 3.5+ and PyTorch 1.1.0
## With pip
......@@ -44,7 +44,7 @@ By default, slow tests are skipped. Set the `RUN_SLOW` environment variable to `
## OpenAI GPT original tokenization workflow
If you want to reproduce the original tokenization process of the `OpenAI GPT` paper, you will need to install `ftfy` (use version 4.4.3 if you are using Python 2) and `SpaCy`:
If you want to reproduce the original tokenization process of the `OpenAI GPT` paper, you will need to install `ftfy` and `SpaCy`:
``` bash
pip install spacy ftfy==4.4.3
......
......@@ -16,7 +16,7 @@
"""BERT finetuning runner.
Finetuning the library models for multiple choice on SWAG (Bert).
"""
from __future__ import absolute_import, division, print_function
import argparse
import csv
......@@ -24,7 +24,6 @@ import glob
import logging
import os
import random
import sys
import numpy as np
import torch
......@@ -104,12 +103,7 @@ class InputFeatures(object):
def read_swag_examples(input_file, is_training=True):
with open(input_file, "r", encoding="utf-8") as f:
reader = csv.reader(f)
lines = []
for line in reader:
if sys.version_info[0] == 2:
line = list(unicode(cell, "utf-8") for cell in line) # noqa: F821
lines.append(line)
lines = list(csv.reader(f))
if is_training and lines[0][-1] != "label":
raise ValueError("For training, the input file must contain a label column.")
......@@ -347,7 +341,7 @@ def train(args, train_dataset, model, tokenizer):
tr_loss, logging_loss = 0.0, 0.0
model.zero_grad()
train_iterator = trange(int(args.num_train_epochs), desc="Epoch", disable=args.local_rank not in [-1, 0])
set_seed(args) # Added here for reproductibility (even between python 2 and 3)
set_seed(args) # Added here for reproductibility
for _ in train_iterator:
epoch_iterator = tqdm(train_dataloader, desc="Iteration", disable=args.local_rank not in [-1, 0])
for step, batch in enumerate(epoch_iterator):
......
......@@ -19,7 +19,7 @@
This script with default values evaluates a pretrained Transformer-XL on WikiText 103
"""
from __future__ import absolute_import, division, print_function, unicode_literals
import argparse
import logging
......
......@@ -15,7 +15,6 @@
# limitations under the License.
""" This is the exact same script as `examples/run_squad.py` (as of 2019, October 4th) with an additional and optional step of distillation."""
from __future__ import absolute_import, division, print_function
import argparse
import glob
......@@ -160,7 +159,7 @@ def train(args, train_dataset, model, tokenizer, teacher=None):
tr_loss, logging_loss = 0.0, 0.0
model.zero_grad()
train_iterator = trange(int(args.num_train_epochs), desc="Epoch", disable=args.local_rank not in [-1, 0])
set_seed(args) # Added here for reproductibility (even between python 2 and 3)
set_seed(args) # Added here for reproductibility
for _ in train_iterator:
epoch_iterator = tqdm(train_dataloader, desc="Iteration", disable=args.local_rank not in [-1, 0])
for step, batch in enumerate(epoch_iterator):
......
......@@ -15,7 +15,6 @@
# limitations under the License.
""" Finetuning the library models for multimodal multiclass prediction on MM-IMDB dataset."""
from __future__ import absolute_import, division, print_function
import argparse
import glob
......@@ -165,7 +164,7 @@ def train(args, train_dataset, model, tokenizer, criterion):
best_f1, n_no_improve = 0, 0
model.zero_grad()
train_iterator = trange(int(args.num_train_epochs), desc="Epoch", disable=args.local_rank not in [-1, 0])
set_seed(args) # Added here for reproductibility (even between python 2 and 3)
set_seed(args) # Added here for reproductibility
for _ in train_iterator:
epoch_iterator = tqdm(train_dataloader, desc="Iteration", disable=args.local_rank not in [-1, 0])
for step, batch in enumerate(epoch_iterator):
......
......@@ -16,7 +16,7 @@
# limitations under the License.
""" Conditional text generation with the auto-regressive models of the library (GPT/GPT-2/CTRL/Transformer-XL/XLNet)
"""
from __future__ import absolute_import, division, print_function, unicode_literals
import argparse
import logging
......
......@@ -15,7 +15,6 @@
# limitations under the License.
""" Finetuning the library models for sequence classification on GLUE (Bert, XLM, XLNet, RoBERTa)."""
from __future__ import absolute_import, division, print_function
import argparse
import glob
......@@ -186,7 +185,7 @@ def train(args, train_dataset, model, tokenizer):
train_iterator = trange(
epochs_trained, int(args.num_train_epochs), desc="Epoch", disable=args.local_rank not in [-1, 0]
)
set_seed(args) # Added here for reproductibility (even between python 2 and 3)
set_seed(args) # Added here for reproductibility
for _ in train_iterator:
epoch_iterator = tqdm(train_dataloader, desc="Iteration", disable=args.local_rank not in [-1, 0])
for step, batch in enumerate(epoch_iterator):
......
......@@ -19,7 +19,6 @@ GPT and GPT-2 are fine-tuned using a causal language modeling (CLM) loss while B
using a masked language modeling (MLM) loss.
"""
from __future__ import absolute_import, division, print_function
import argparse
import glob
......@@ -282,7 +281,7 @@ def train(args, train_dataset, model, tokenizer):
train_iterator = trange(
epochs_trained, int(args.num_train_epochs), desc="Epoch", disable=args.local_rank not in [-1, 0]
)
set_seed(args) # Added here for reproducibility (even between python 2 and 3)
set_seed(args) # Added here for reproducibility
for _ in train_iterator:
epoch_iterator = tqdm(train_dataloader, desc="Iteration", disable=args.local_rank not in [-1, 0])
for step, batch in enumerate(epoch_iterator):
......
......@@ -15,7 +15,6 @@
# limitations under the License.
""" Finetuning the library models for multiple choice (Bert, Roberta, XLNet)."""
from __future__ import absolute_import, division, print_function
import argparse
import glob
......@@ -146,7 +145,7 @@ def train(args, train_dataset, model, tokenizer):
best_steps = 0
model.zero_grad()
train_iterator = trange(int(args.num_train_epochs), desc="Epoch", disable=args.local_rank not in [-1, 0])
set_seed(args) # Added here for reproductibility (even between python 2 and 3)
set_seed(args) # Added here for reproductibility
for _ in train_iterator:
epoch_iterator = tqdm(train_dataloader, desc="Iteration", disable=args.local_rank not in [-1, 0])
for step, batch in enumerate(epoch_iterator):
......
......@@ -15,7 +15,6 @@
# limitations under the License.
""" Fine-tuning the library models for named entity recognition on CoNLL-2003 (Bert or Roberta). """
from __future__ import absolute_import, division, print_function
import argparse
import glob
......@@ -170,7 +169,7 @@ def train(args, train_dataset, model, tokenizer, labels, pad_token_label_id):
train_iterator = trange(
epochs_trained, int(args.num_train_epochs), desc="Epoch", disable=args.local_rank not in [-1, 0]
)
set_seed(args) # Added here for reproductibility (even between python 2 and 3)
set_seed(args) # Added here for reproductibility
for _ in train_iterator:
epoch_iterator = tqdm(train_dataloader, desc="Iteration", disable=args.local_rank not in [-1, 0])
for step, batch in enumerate(epoch_iterator):
......
......@@ -15,7 +15,6 @@
# limitations under the License.
""" Finetuning the library models for question-answering on SQuAD (DistilBERT, Bert, XLM, XLNet)."""
from __future__ import absolute_import, division, print_function
import argparse
import glob
......@@ -186,7 +185,7 @@ def train(args, train_dataset, model, tokenizer):
train_iterator = trange(
epochs_trained, int(args.num_train_epochs), desc="Epoch", disable=args.local_rank not in [-1, 0]
)
# Added here for reproductibility (even between python 2 and 3)
# Added here for reproductibility
set_seed(args)
for _ in train_iterator:
......
......@@ -16,7 +16,6 @@
""" Finetuning multi-lingual models on XNLI (Bert, DistilBERT, XLM).
Adapted from `examples/run_glue.py`"""
from __future__ import absolute_import, division, print_function
import argparse
import glob
......@@ -165,7 +164,7 @@ def train(args, train_dataset, model, tokenizer):
train_iterator = trange(
epochs_trained, int(args.num_train_epochs), desc="Epoch", disable=args.local_rank not in [-1, 0]
)
set_seed(args) # Added here for reproductibility (even between python 2 and 3)
set_seed(args) # Added here for reproductibility
for _ in train_iterator:
epoch_iterator = tqdm(train_dataloader, desc="Iteration", disable=args.local_rank not in [-1, 0])
for step, batch in enumerate(epoch_iterator):
......
......@@ -94,7 +94,7 @@ def process_story(raw_story):
def _add_missing_period(line):
END_TOKENS = [".", "!", "?", "...", "'", "`", '"', u"\u2019", u"\u2019", ")"]
END_TOKENS = [".", "!", "?", "...", "'", "`", '"', "\u2019", "\u2019", ")"]
if line.startswith("@highlight"):
return line
if line[-1] in END_TOKENS:
......
......@@ -12,25 +12,19 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import, division, print_function
import argparse
import logging
import sys
import unittest
from unittest.mock import patch
import run_generation
import run_glue
import run_squad
try:
# python 3.4+ can use builtin unittest.mock instead of mock package
from unittest.mock import patch
except ImportError:
from mock import patch
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger()
......
......@@ -15,15 +15,12 @@
# limitations under the License.
""" Multiple choice fine-tuning: utilities to work with multiple choice tasks of reading comprehension """
from __future__ import absolute_import, division, print_function
import csv
import glob
import json
import logging
import os
import sys
from io import open
from typing import List
import tqdm
......@@ -180,13 +177,7 @@ class SwagProcessor(DataProcessor):
def _read_csv(self, input_file):
with open(input_file, "r", encoding="utf-8") as f:
reader = csv.reader(f)
lines = []
for line in reader:
if sys.version_info[0] == 2:
line = list(unicode(cell, "utf-8") for cell in line) # noqa: F821
lines.append(line)
return lines
return list(csv.reader(f))
def _create_examples(self, lines: List[List[str]], type: str):
"""Creates examples for the training and dev sets."""
......
......@@ -15,11 +15,9 @@
# limitations under the License.
""" Named entity recognition fine-tuning: utilities to work with CoNLL-2003 task. """
from __future__ import absolute_import, division, print_function
import logging
import os
from io import open
logger = logging.getLogger(__name__)
......
......@@ -14,7 +14,7 @@ To create the package for pypi.
creating the wheel and the source distribution (obviously).
For the wheel, run: "python setup.py bdist_wheel" in the top level directory.
(this will build a wheel for the python version you use to build it - make sure you use python 3.x).
(this will build a wheel for the python version you use to build it).
For the sources, run: "python setup.py sdist"
You should now have a /dist directory with both .whl and .tar.gz source versions.
......@@ -33,7 +33,6 @@ To create the package for pypi.
7. Copy the release notes from RELEASE.md to the tag in github once everything is looking hunky-dory.
"""
from io import open
from setuptools import find_packages, setup
......
......@@ -34,8 +34,8 @@ class ANSI:
Helper for en.wikipedia.org/wiki/ANSI_escape_code
"""
_bold = u"\u001b[1m"
_reset = u"\u001b[0m"
_bold = "\u001b[1m"
_reset = "\u001b[0m"
@classmethod
def bold(cls, s):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment