Unverified Commit 54abc67a authored by Thomas Wolf's avatar Thomas Wolf Committed by GitHub
Browse files

Merge pull request #2255 from aaugustin/implement-best-practices

Implement some Python best practices
parents 645713e2 c11b3e29
......@@ -21,6 +21,7 @@ import logging
import os
from io import open
logger = logging.getLogger(__name__)
......@@ -61,9 +62,7 @@ def read_examples_from_file(data_dir, mode):
for line in f:
if line.startswith("-DOCSTART-") or line == "" or line == "\n":
if words:
examples.append(InputExample(guid="{}-{}".format(mode, guid_index),
words=words,
labels=labels))
examples.append(InputExample(guid="{}-{}".format(mode, guid_index), words=words, labels=labels))
guid_index += 1
words = []
labels = []
......@@ -76,13 +75,12 @@ def read_examples_from_file(data_dir, mode):
# Examples could have no label for mode = "test"
labels.append("O")
if words:
examples.append(InputExample(guid="%s-%d".format(mode, guid_index),
words=words,
labels=labels))
examples.append(InputExample(guid="%s-%d".format(mode, guid_index), words=words, labels=labels))
return examples
def convert_examples_to_features(examples,
def convert_examples_to_features(
examples,
label_list,
max_seq_length,
tokenizer,
......@@ -96,7 +94,8 @@ def convert_examples_to_features(examples,
pad_token_segment_id=0,
pad_token_label_id=-100,
sequence_a_segment_id=0,
mask_padding_with_zero=True):
mask_padding_with_zero=True,
):
""" Loads a data file into a list of `InputBatch`s
`cls_token_at_end` define the location of the CLS token:
- False (Default, BERT/XLM pattern): [CLS] + A + [SEP] + B + [SEP]
......@@ -122,8 +121,8 @@ def convert_examples_to_features(examples,
# Account for [CLS] and [SEP] with "- 2" and with "- 3" for RoBERTa.
special_tokens_count = 3 if sep_token_extra else 2
if len(tokens) > max_seq_length - special_tokens_count:
tokens = tokens[:(max_seq_length - special_tokens_count)]
label_ids = label_ids[:(max_seq_length - special_tokens_count)]
tokens = tokens[: (max_seq_length - special_tokens_count)]
label_ids = label_ids[: (max_seq_length - special_tokens_count)]
# The convention in BERT is:
# (a) For sequence pairs:
......@@ -174,10 +173,10 @@ def convert_examples_to_features(examples,
segment_ids = ([pad_token_segment_id] * padding_length) + segment_ids
label_ids = ([pad_token_label_id] * padding_length) + label_ids
else:
input_ids += ([pad_token] * padding_length)
input_mask += ([0 if mask_padding_with_zero else 1] * padding_length)
segment_ids += ([pad_token_segment_id] * padding_length)
label_ids += ([pad_token_label_id] * padding_length)
input_ids += [pad_token] * padding_length
input_mask += [0 if mask_padding_with_zero else 1] * padding_length
segment_ids += [pad_token_segment_id] * padding_length
label_ids += [pad_token_label_id] * padding_length
assert len(input_ids) == max_seq_length
assert len(input_mask) == max_seq_length
......@@ -194,10 +193,8 @@ def convert_examples_to_features(examples,
logger.info("label_ids: %s", " ".join([str(x) for x in label_ids]))
features.append(
InputFeatures(input_ids=input_ids,
input_mask=input_mask,
segment_ids=segment_ids,
label_ids=label_ids))
InputFeatures(input_ids=input_ids, input_mask=input_mask, segment_ids=segment_ids, label_ids=label_ids)
)
return features
......
from transformers import (
AutoTokenizer, AutoConfig, AutoModel, AutoModelWithLMHead, AutoModelForSequenceClassification, AutoModelForQuestionAnswering
AutoConfig,
AutoModel,
AutoModelForQuestionAnswering,
AutoModelForSequenceClassification,
AutoModelWithLMHead,
AutoTokenizer,
)
from transformers.file_utils import add_start_docstrings
dependencies = ['torch', 'tqdm', 'boto3', 'requests', 'regex', 'sentencepiece', 'sacremoses']
dependencies = ["torch", "tqdm", "boto3", "requests", "regex", "sentencepiece", "sacremoses"]
@add_start_docstrings(AutoConfig.__doc__)
def config(*args, **kwargs):
......@@ -57,6 +64,7 @@ def model(*args, **kwargs):
return AutoModel.from_pretrained(*args, **kwargs)
@add_start_docstrings(AutoModelWithLMHead.__doc__)
def modelWithLMHead(*args, **kwargs):
r"""
......
[isort]
ensure_newline_before_comments = True
force_grid_wrap = 0
include_trailing_comma = True
known_first_party = transformers
known_third_party =
fairseq
fastprogress
git
nltk
packaging
PIL
psutil
seqeval
sklearn
tensorboardX
tensorflow_datasets
torchtext
torchvision
line_length = 119
lines_after_imports = 2
multi_line_output = 3
use_parentheses = True
[flake8]
ignore = E203, E501, F841, W503
max-line-length = 119
......@@ -34,15 +34,16 @@ To create the package for pypi.
"""
from io import open
from setuptools import find_packages, setup
extras = {
'serving': ['pydantic', 'uvicorn', 'fastapi'],
'serving-tf': ['pydantic', 'uvicorn', 'fastapi', 'tensorflow'],
'serving-torch': ['pydantic', 'uvicorn', 'fastapi', 'torch']
"serving": ["pydantic", "uvicorn", "fastapi"],
"serving-tf": ["pydantic", "uvicorn", "fastapi", "tensorflow"],
"serving-torch": ["pydantic", "uvicorn", "fastapi", "torch"],
}
extras['all'] = [package for package in extras.values()]
extras["all"] = [package for package in extras.values()]
setup(
name="transformers",
......@@ -50,30 +51,29 @@ setup(
author="Thomas Wolf, Lysandre Debut, Victor Sanh, Julien Chaumond, Google AI Language Team Authors, Open AI team Authors, Facebook AI Authors, Carnegie Mellon University Authors",
author_email="thomas@huggingface.co",
description="State-of-the-art Natural Language Processing for TensorFlow 2.0 and PyTorch",
long_description=open("README.md", "r", encoding='utf-8').read(),
long_description=open("README.md", "r", encoding="utf-8").read(),
long_description_content_type="text/markdown",
keywords='NLP deep learning transformer pytorch tensorflow BERT GPT GPT-2 google openai CMU',
license='Apache',
keywords="NLP deep learning transformer pytorch tensorflow BERT GPT GPT-2 google openai CMU",
license="Apache",
url="https://github.com/huggingface/transformers",
packages=find_packages(exclude=["*.tests", "*.tests.*",
"tests.*", "tests"]),
install_requires=['numpy',
'boto3',
'filelock',
'requests',
'tqdm',
'regex != 2019.12.17',
'sentencepiece',
'sacremoses'],
extras_require=extras,
scripts=[
'transformers-cli'
packages=find_packages(exclude=["*.tests", "*.tests.*", "tests.*", "tests"]),
install_requires=[
"numpy",
"boto3",
"filelock",
"requests",
"tqdm",
"regex != 2019.12.17",
"sentencepiece",
"sacremoses",
],
extras_require=extras,
scripts=["transformers-cli"],
# python_requires='>=3.5.0',
classifiers=[
'Intended Audience :: Science/Research',
'License :: OSI Approved :: Apache Software License',
'Programming Language :: Python :: 3',
'Topic :: Scientific/Engineering :: Artificial Intelligence',
"Intended Audience :: Science/Research",
"License :: OSI Approved :: Apache Software License",
"Programming Language :: Python :: 3",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
],
)
......@@ -17,55 +17,70 @@
from __future__ import absolute_import, division, print_function
import argparse
import glob
import logging
import os
import random
import glob
import numpy as np
import torch
from torch.utils.data import (DataLoader, RandomSampler, SequentialSampler,
TensorDataset)
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, TensorDataset
from torch.utils.data.distributed import DistributedSampler
try:
from torch.utils.tensorboard import SummaryWriter
except:
from tensorboardX import SummaryWriter
from tqdm import tqdm, trange
from transformers import (WEIGHTS_NAME, BertConfig,
BertForQuestionAnswering, BertTokenizer,
XLMConfig, XLMForQuestionAnswering,
XLMTokenizer, XLNetConfig,
from transformers import (
WEIGHTS_NAME,
AdamW,
BertConfig,
BertForQuestionAnswering,
BertTokenizer,
DistilBertConfig,
DistilBertForQuestionAnswering,
DistilBertTokenizer,
XLMConfig,
XLMForQuestionAnswering,
XLMTokenizer,
XLNetConfig,
XLNetForQuestionAnswering,
XLNetTokenizer,
DistilBertConfig, DistilBertForQuestionAnswering, DistilBertTokenizer)
from transformers import AdamW, get_linear_schedule_with_warmup
from utils_squad import (read_squad_examples, convert_examples_to_features,
RawResult, write_predictions,
RawResultExtended, write_predictions_extended)
get_linear_schedule_with_warmup,
)
from utils_squad import (
RawResult,
RawResultExtended,
convert_examples_to_features,
read_squad_examples,
write_predictions,
write_predictions_extended,
)
# The follwing import is the official SQuAD evaluation script (2.0).
# You can remove it from the dependencies if you are using this script outside of the library
# We've added it here for automated tests (see examples/test_examples.py file)
from utils_squad_evaluate import EVAL_OPTS, main as evaluate_on_squad
from utils_squad_evaluate import EVAL_OPTS
from utils_squad_evaluate import main as evaluate_on_squad
try:
from torch.utils.tensorboard import SummaryWriter
except ImportError:
from tensorboardX import SummaryWriter
logger = logging.getLogger(__name__)
ALL_MODELS = sum((tuple(conf.pretrained_config_archive_map.keys()) \
for conf in (BertConfig, XLNetConfig, XLMConfig)), ())
ALL_MODELS = sum(
(tuple(conf.pretrained_config_archive_map.keys()) for conf in (BertConfig, XLNetConfig, XLMConfig)), ()
)
MODEL_CLASSES = {
'bert': (BertConfig, BertForQuestionAnswering, BertTokenizer),
'xlnet': (XLNetConfig, XLNetForQuestionAnswering, XLNetTokenizer),
'xlm': (XLMConfig, XLMForQuestionAnswering, XLMTokenizer),
'distilbert': (DistilBertConfig, DistilBertForQuestionAnswering, DistilBertTokenizer)
"bert": (BertConfig, BertForQuestionAnswering, BertTokenizer),
"xlnet": (XLNetConfig, XLNetForQuestionAnswering, XLNetTokenizer),
"xlm": (XLMConfig, XLMForQuestionAnswering, XLMTokenizer),
"distilbert": (DistilBertConfig, DistilBertForQuestionAnswering, DistilBertTokenizer),
}
def set_seed(args):
random.seed(args.seed)
np.random.seed(args.seed)
......@@ -73,9 +88,11 @@ def set_seed(args):
if args.n_gpu > 0:
torch.cuda.manual_seed_all(args.seed)
def to_list(tensor):
return tensor.detach().cpu().tolist()
def train(args, train_dataset, model, tokenizer):
""" Train the model """
if args.local_rank in [-1, 0]:
......@@ -92,13 +109,18 @@ def train(args, train_dataset, model, tokenizer):
t_total = len(train_dataloader) // args.gradient_accumulation_steps * args.num_train_epochs
# Prepare optimizer and schedule (linear warmup and decay)
no_decay = ['bias', 'LayerNorm.weight']
no_decay = ["bias", "LayerNorm.weight"]
optimizer_grouped_parameters = [
{'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': args.weight_decay},
{'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
{
"params": [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)],
"weight_decay": args.weight_decay,
},
{"params": [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], "weight_decay": 0.0},
]
optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon)
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=args.warmup_steps, num_training_steps=t_total)
scheduler = get_linear_schedule_with_warmup(
optimizer, num_warmup_steps=args.warmup_steps, num_training_steps=t_total
)
if args.fp16:
try:
from apex import amp
......@@ -112,17 +134,21 @@ def train(args, train_dataset, model, tokenizer):
# Distributed training (should be after apex fp16 initialization)
if args.local_rank != -1:
model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.local_rank],
output_device=args.local_rank,
find_unused_parameters=True)
model = torch.nn.parallel.DistributedDataParallel(
model, device_ids=[args.local_rank], output_device=args.local_rank, find_unused_parameters=True
)
# Train!
logger.info("***** Running training *****")
logger.info(" Num examples = %d", len(train_dataset))
logger.info(" Num Epochs = %d", args.num_train_epochs)
logger.info(" Instantaneous batch size per GPU = %d", args.per_gpu_train_batch_size)
logger.info(" Total train batch size (w. parallel, distributed & accumulation) = %d",
args.train_batch_size * args.gradient_accumulation_steps * (torch.distributed.get_world_size() if args.local_rank != -1 else 1))
logger.info(
" Total train batch size (w. parallel, distributed & accumulation) = %d",
args.train_batch_size
* args.gradient_accumulation_steps
* (torch.distributed.get_world_size() if args.local_rank != -1 else 1),
)
logger.info(" Gradient Accumulation steps = %d", args.gradient_accumulation_steps)
logger.info(" Total optimization steps = %d", t_total)
......@@ -136,15 +162,16 @@ def train(args, train_dataset, model, tokenizer):
for step, batch in enumerate(epoch_iterator):
model.train()
batch = tuple(t.to(args.device) for t in batch)
inputs = {'input_ids': batch[0],
'attention_mask': batch[1],
'start_positions': batch[3],
'end_positions': batch[4]}
if args.model_type != 'distilbert':
inputs['token_type_ids'] = None if args.model_type == 'xlm' else batch[2]
if args.model_type in ['xlnet', 'xlm']:
inputs.update({'cls_index': batch[5],
'p_mask': batch[6]})
inputs = {
"input_ids": batch[0],
"attention_mask": batch[1],
"start_positions": batch[3],
"end_positions": batch[4],
}
if args.model_type != "distilbert":
inputs["token_type_ids"] = None if args.model_type == "xlm" else batch[2]
if args.model_type in ["xlnet", "xlm"]:
inputs.update({"cls_index": batch[5], "p_mask": batch[6]})
outputs = model(**inputs)
loss = outputs[0] # model outputs are always tuple in transformers (see doc)
......@@ -173,22 +200,26 @@ def train(args, train_dataset, model, tokenizer):
if args.local_rank in [-1, 0] and args.logging_steps > 0 and global_step % args.logging_steps == 0:
# Log metrics
if args.local_rank == -1 and args.evaluate_during_training: # Only evaluate when single GPU otherwise metrics may not average well
if (
args.local_rank == -1 and args.evaluate_during_training
): # Only evaluate when single GPU otherwise metrics may not average well
results = evaluate(args, model, tokenizer)
for key, value in results.items():
tb_writer.add_scalar('eval_{}'.format(key), value, global_step)
tb_writer.add_scalar('lr', scheduler.get_lr()[0], global_step)
tb_writer.add_scalar('loss', (tr_loss - logging_loss)/args.logging_steps, global_step)
tb_writer.add_scalar("eval_{}".format(key), value, global_step)
tb_writer.add_scalar("lr", scheduler.get_lr()[0], global_step)
tb_writer.add_scalar("loss", (tr_loss - logging_loss) / args.logging_steps, global_step)
logging_loss = tr_loss
if args.local_rank in [-1, 0] and args.save_steps > 0 and global_step % args.save_steps == 0:
# Save model checkpoint
output_dir = os.path.join(args.output_dir, 'checkpoint-{}'.format(global_step))
output_dir = os.path.join(args.output_dir, "checkpoint-{}".format(global_step))
if not os.path.exists(output_dir):
os.makedirs(output_dir)
model_to_save = model.module if hasattr(model, 'module') else model # Take care of distributed/parallel training
model_to_save = (
model.module if hasattr(model, "module") else model
) # Take care of distributed/parallel training
model_to_save.save_pretrained(output_dir)
torch.save(args, os.path.join(output_dir, 'training_args.bin'))
torch.save(args, os.path.join(output_dir, "training_args.bin"))
logger.info("Saving model checkpoint to %s", output_dir)
if args.max_steps > 0 and global_step > args.max_steps:
......@@ -224,32 +255,31 @@ def evaluate(args, model, tokenizer, prefix=""):
model.eval()
batch = tuple(t.to(args.device) for t in batch)
with torch.no_grad():
inputs = {'input_ids': batch[0],
'attention_mask': batch[1]
}
if args.model_type != 'distilbert':
inputs['token_type_ids'] = None if args.model_type == 'xlm' else batch[2] # XLM don't use segment_ids
inputs = {"input_ids": batch[0], "attention_mask": batch[1]}
if args.model_type != "distilbert":
inputs["token_type_ids"] = None if args.model_type == "xlm" else batch[2] # XLM don't use segment_ids
example_indices = batch[3]
if args.model_type in ['xlnet', 'xlm']:
inputs.update({'cls_index': batch[4],
'p_mask': batch[5]})
if args.model_type in ["xlnet", "xlm"]:
inputs.update({"cls_index": batch[4], "p_mask": batch[5]})
outputs = model(**inputs)
for i, example_index in enumerate(example_indices):
eval_feature = features[example_index.item()]
unique_id = int(eval_feature.unique_id)
if args.model_type in ['xlnet', 'xlm']:
if args.model_type in ["xlnet", "xlm"]:
# XLNet uses a more complex post-processing procedure
result = RawResultExtended(unique_id = unique_id,
start_top_log_probs = to_list(outputs[0][i]),
start_top_index = to_list(outputs[1][i]),
end_top_log_probs = to_list(outputs[2][i]),
end_top_index = to_list(outputs[3][i]),
cls_logits = to_list(outputs[4][i]))
result = RawResultExtended(
unique_id=unique_id,
start_top_log_probs=to_list(outputs[0][i]),
start_top_index=to_list(outputs[1][i]),
end_top_log_probs=to_list(outputs[2][i]),
end_top_index=to_list(outputs[3][i]),
cls_logits=to_list(outputs[4][i]),
)
else:
result = RawResult(unique_id = unique_id,
start_logits = to_list(outputs[0][i]),
end_logits = to_list(outputs[1][i]))
result = RawResult(
unique_id=unique_id, start_logits=to_list(outputs[0][i]), end_logits=to_list(outputs[1][i])
)
all_results.append(result)
# Compute predictions
......@@ -260,23 +290,44 @@ def evaluate(args, model, tokenizer, prefix=""):
else:
output_null_log_odds_file = None
if args.model_type in ['xlnet', 'xlm']:
if args.model_type in ["xlnet", "xlm"]:
# XLNet uses a more complex post-processing procedure
write_predictions_extended(examples, features, all_results, args.n_best_size,
args.max_answer_length, output_prediction_file,
output_nbest_file, output_null_log_odds_file, args.predict_file,
model.config.start_n_top, model.config.end_n_top,
args.version_2_with_negative, tokenizer, args.verbose_logging)
write_predictions_extended(
examples,
features,
all_results,
args.n_best_size,
args.max_answer_length,
output_prediction_file,
output_nbest_file,
output_null_log_odds_file,
args.predict_file,
model.config.start_n_top,
model.config.end_n_top,
args.version_2_with_negative,
tokenizer,
args.verbose_logging,
)
else:
write_predictions(examples, features, all_results, args.n_best_size,
args.max_answer_length, args.do_lower_case, output_prediction_file,
output_nbest_file, output_null_log_odds_file, args.verbose_logging,
args.version_2_with_negative, args.null_score_diff_threshold)
write_predictions(
examples,
features,
all_results,
args.n_best_size,
args.max_answer_length,
args.do_lower_case,
output_prediction_file,
output_nbest_file,
output_null_log_odds_file,
args.verbose_logging,
args.version_2_with_negative,
args.null_score_diff_threshold,
)
# Evaluate with the official SQuAD script
evaluate_options = EVAL_OPTS(data_file=args.predict_file,
pred_file=output_prediction_file,
na_prob_file=output_null_log_odds_file)
evaluate_options = EVAL_OPTS(
data_file=args.predict_file, pred_file=output_prediction_file, na_prob_file=output_null_log_odds_file
)
results = evaluate_on_squad(evaluate_options)
return results
......@@ -287,24 +338,30 @@ def load_and_cache_examples(args, tokenizer, evaluate=False, output_examples=Fal
# Load data features from cache or dataset file
input_file = args.predict_file if evaluate else args.train_file
cached_features_file = os.path.join(os.path.dirname(input_file), 'cached_{}_{}_{}'.format(
'dev' if evaluate else 'train',
list(filter(None, args.model_name_or_path.split('/'))).pop(),
str(args.max_seq_length)))
cached_features_file = os.path.join(
os.path.dirname(input_file),
"cached_{}_{}_{}".format(
"dev" if evaluate else "train",
list(filter(None, args.model_name_or_path.split("/"))).pop(),
str(args.max_seq_length),
),
)
if os.path.exists(cached_features_file) and not args.overwrite_cache and not output_examples:
logger.info("Loading features from cached file %s", cached_features_file)
features = torch.load(cached_features_file)
else:
logger.info("Creating features from dataset file at %s", input_file)
examples = read_squad_examples(input_file=input_file,
is_training=not evaluate,
version_2_with_negative=args.version_2_with_negative)
features = convert_examples_to_features(examples=examples,
examples = read_squad_examples(
input_file=input_file, is_training=not evaluate, version_2_with_negative=args.version_2_with_negative
)
features = convert_examples_to_features(
examples=examples,
tokenizer=tokenizer,
max_seq_length=args.max_seq_length,
doc_stride=args.doc_stride,
max_query_length=args.max_query_length,
is_training=not evaluate)
is_training=not evaluate,
)
if args.local_rank in [-1, 0]:
logger.info("Saving features into cached file %s", cached_features_file)
torch.save(features, cached_features_file)
......@@ -320,14 +377,21 @@ def load_and_cache_examples(args, tokenizer, evaluate=False, output_examples=Fal
all_p_mask = torch.tensor([f.p_mask for f in features], dtype=torch.float)
if evaluate:
all_example_index = torch.arange(all_input_ids.size(0), dtype=torch.long)
dataset = TensorDataset(all_input_ids, all_input_mask, all_segment_ids,
all_example_index, all_cls_index, all_p_mask)
dataset = TensorDataset(
all_input_ids, all_input_mask, all_segment_ids, all_example_index, all_cls_index, all_p_mask
)
else:
all_start_positions = torch.tensor([f.start_position for f in features], dtype=torch.long)
all_end_positions = torch.tensor([f.end_position for f in features], dtype=torch.long)
dataset = TensorDataset(all_input_ids, all_input_mask, all_segment_ids,
all_start_positions, all_end_positions,
all_cls_index, all_p_mask)
dataset = TensorDataset(
all_input_ids,
all_input_mask,
all_segment_ids,
all_start_positions,
all_end_positions,
all_cls_index,
all_p_mask,
)
if output_examples:
return dataset, examples, features
......@@ -337,110 +401,191 @@ def load_and_cache_examples(args, tokenizer, evaluate=False, output_examples=Fal
def main():
parser = argparse.ArgumentParser()
## Required parameters
parser.add_argument("--train_file", default=None, type=str, required=True,
help="SQuAD json for training. E.g., train-v1.1.json")
parser.add_argument("--predict_file", default=None, type=str, required=True,
help="SQuAD json for predictions. E.g., dev-v1.1.json or test-v1.1.json")
parser.add_argument("--model_type", default=None, type=str, required=True,
help="Model type selected in the list: " + ", ".join(MODEL_CLASSES.keys()))
parser.add_argument("--model_name_or_path", default=None, type=str, required=True,
help="Path to pre-trained model or shortcut name selected in the list: " + ", ".join(ALL_MODELS))
parser.add_argument("--output_dir", default=None, type=str, required=True,
help="The output directory where the model checkpoints and predictions will be written.")
## Other parameters
parser.add_argument("--config_name", default="", type=str,
help="Pretrained config name or path if not the same as model_name")
parser.add_argument("--tokenizer_name", default="", type=str,
help="Pretrained tokenizer name or path if not the same as model_name")
parser.add_argument("--cache_dir", default="", type=str,
help="Where do you want to store the pre-trained models downloaded from s3")
parser.add_argument('--version_2_with_negative', action='store_true',
help='If true, the SQuAD examples contain some that do not have an answer.')
parser.add_argument('--null_score_diff_threshold', type=float, default=0.0,
help="If null_score - best_non_null is greater than the threshold predict null.")
parser.add_argument("--max_seq_length", default=384, type=int,
# Required parameters
parser.add_argument(
"--train_file", default=None, type=str, required=True, help="SQuAD json for training. E.g., train-v1.1.json"
)
parser.add_argument(
"--predict_file",
default=None,
type=str,
required=True,
help="SQuAD json for predictions. E.g., dev-v1.1.json or test-v1.1.json",
)
parser.add_argument(
"--model_type",
default=None,
type=str,
required=True,
help="Model type selected in the list: " + ", ".join(MODEL_CLASSES.keys()),
)
parser.add_argument(
"--model_name_or_path",
default=None,
type=str,
required=True,
help="Path to pre-trained model or shortcut name selected in the list: " + ", ".join(ALL_MODELS),
)
parser.add_argument(
"--output_dir",
default=None,
type=str,
required=True,
help="The output directory where the model checkpoints and predictions will be written.",
)
# Other parameters
parser.add_argument(
"--config_name", default="", type=str, help="Pretrained config name or path if not the same as model_name"
)
parser.add_argument(
"--tokenizer_name",
default="",
type=str,
help="Pretrained tokenizer name or path if not the same as model_name",
)
parser.add_argument(
"--cache_dir",
default="",
type=str,
help="Where do you want to store the pre-trained models downloaded from s3",
)
parser.add_argument(
"--version_2_with_negative",
action="store_true",
help="If true, the SQuAD examples contain some that do not have an answer.",
)
parser.add_argument(
"--null_score_diff_threshold",
type=float,
default=0.0,
help="If null_score - best_non_null is greater than the threshold predict null.",
)
parser.add_argument(
"--max_seq_length",
default=384,
type=int,
help="The maximum total input sequence length after WordPiece tokenization. Sequences "
"longer than this will be truncated, and sequences shorter than this will be padded.")
parser.add_argument("--doc_stride", default=128, type=int,
help="When splitting up a long document into chunks, how much stride to take between chunks.")
parser.add_argument("--max_query_length", default=64, type=int,
"longer than this will be truncated, and sequences shorter than this will be padded.",
)
parser.add_argument(
"--doc_stride",
default=128,
type=int,
help="When splitting up a long document into chunks, how much stride to take between chunks.",
)
parser.add_argument(
"--max_query_length",
default=64,
type=int,
help="The maximum number of tokens for the question. Questions longer than this will "
"be truncated to this length.")
parser.add_argument("--do_train", action='store_true',
help="Whether to run training.")
parser.add_argument("--do_eval", action='store_true',
help="Whether to run eval on the dev set.")
parser.add_argument("--evaluate_during_training", action='store_true',
help="Rul evaluation during training at each logging step.")
parser.add_argument("--do_lower_case", action='store_true',
help="Set this flag if you are using an uncased model.")
parser.add_argument("--per_gpu_train_batch_size", default=8, type=int,
help="Batch size per GPU/CPU for training.")
parser.add_argument("--per_gpu_eval_batch_size", default=8, type=int,
help="Batch size per GPU/CPU for evaluation.")
parser.add_argument("--learning_rate", default=5e-5, type=float,
help="The initial learning rate for Adam.")
parser.add_argument('--gradient_accumulation_steps', type=int, default=1,
help="Number of updates steps to accumulate before performing a backward/update pass.")
parser.add_argument("--weight_decay", default=0.0, type=float,
help="Weight deay if we apply some.")
parser.add_argument("--adam_epsilon", default=1e-8, type=float,
help="Epsilon for Adam optimizer.")
parser.add_argument("--max_grad_norm", default=1.0, type=float,
help="Max gradient norm.")
parser.add_argument("--num_train_epochs", default=3.0, type=float,
help="Total number of training epochs to perform.")
parser.add_argument("--max_steps", default=-1, type=int,
help="If > 0: set total number of training steps to perform. Override num_train_epochs.")
parser.add_argument("--warmup_steps", default=0, type=int,
help="Linear warmup over warmup_steps.")
parser.add_argument("--n_best_size", default=20, type=int,
help="The total number of n-best predictions to generate in the nbest_predictions.json output file.")
parser.add_argument("--max_answer_length", default=30, type=int,
"be truncated to this length.",
)
parser.add_argument("--do_train", action="store_true", help="Whether to run training.")
parser.add_argument("--do_eval", action="store_true", help="Whether to run eval on the dev set.")
parser.add_argument(
"--evaluate_during_training", action="store_true", help="Rul evaluation during training at each logging step."
)
parser.add_argument(
"--do_lower_case", action="store_true", help="Set this flag if you are using an uncased model."
)
parser.add_argument("--per_gpu_train_batch_size", default=8, type=int, help="Batch size per GPU/CPU for training.")
parser.add_argument(
"--per_gpu_eval_batch_size", default=8, type=int, help="Batch size per GPU/CPU for evaluation."
)
parser.add_argument("--learning_rate", default=5e-5, type=float, help="The initial learning rate for Adam.")
parser.add_argument(
"--gradient_accumulation_steps",
type=int,
default=1,
help="Number of updates steps to accumulate before performing a backward/update pass.",
)
parser.add_argument("--weight_decay", default=0.0, type=float, help="Weight deay if we apply some.")
parser.add_argument("--adam_epsilon", default=1e-8, type=float, help="Epsilon for Adam optimizer.")
parser.add_argument("--max_grad_norm", default=1.0, type=float, help="Max gradient norm.")
parser.add_argument(
"--num_train_epochs", default=3.0, type=float, help="Total number of training epochs to perform."
)
parser.add_argument(
"--max_steps",
default=-1,
type=int,
help="If > 0: set total number of training steps to perform. Override num_train_epochs.",
)
parser.add_argument("--warmup_steps", default=0, type=int, help="Linear warmup over warmup_steps.")
parser.add_argument(
"--n_best_size",
default=20,
type=int,
help="The total number of n-best predictions to generate in the nbest_predictions.json output file.",
)
parser.add_argument(
"--max_answer_length",
default=30,
type=int,
help="The maximum length of an answer that can be generated. This is needed because the start "
"and end predictions are not conditioned on one another.")
parser.add_argument("--verbose_logging", action='store_true',
"and end predictions are not conditioned on one another.",
)
parser.add_argument(
"--verbose_logging",
action="store_true",
help="If true, all of the warnings related to data processing will be printed. "
"A number of warnings are expected for a normal SQuAD evaluation.")
parser.add_argument('--logging_steps', type=int, default=50,
help="Log every X updates steps.")
parser.add_argument('--save_steps', type=int, default=50,
help="Save checkpoint every X updates steps.")
parser.add_argument("--eval_all_checkpoints", action='store_true',
help="Evaluate all checkpoints starting with the same prefix as model_name ending and ending with step number")
parser.add_argument("--no_cuda", action='store_true',
help="Whether not to use CUDA when available")
parser.add_argument('--overwrite_output_dir', action='store_true',
help="Overwrite the content of the output directory")
parser.add_argument('--overwrite_cache', action='store_true',
help="Overwrite the cached training and evaluation sets")
parser.add_argument('--seed', type=int, default=42,
help="random seed for initialization")
parser.add_argument("--local_rank", type=int, default=-1,
help="local_rank for distributed training on gpus")
parser.add_argument('--fp16', action='store_true',
help="Whether to use 16-bit (mixed) precision (through NVIDIA apex) instead of 32-bit")
parser.add_argument('--fp16_opt_level', type=str, default='O1',
"A number of warnings are expected for a normal SQuAD evaluation.",
)
parser.add_argument("--logging_steps", type=int, default=50, help="Log every X updates steps.")
parser.add_argument("--save_steps", type=int, default=50, help="Save checkpoint every X updates steps.")
parser.add_argument(
"--eval_all_checkpoints",
action="store_true",
help="Evaluate all checkpoints starting with the same prefix as model_name ending and ending with step number",
)
parser.add_argument("--no_cuda", action="store_true", help="Whether not to use CUDA when available")
parser.add_argument(
"--overwrite_output_dir", action="store_true", help="Overwrite the content of the output directory"
)
parser.add_argument(
"--overwrite_cache", action="store_true", help="Overwrite the cached training and evaluation sets"
)
parser.add_argument("--seed", type=int, default=42, help="random seed for initialization")
parser.add_argument("--local_rank", type=int, default=-1, help="local_rank for distributed training on gpus")
parser.add_argument(
"--fp16",
action="store_true",
help="Whether to use 16-bit (mixed) precision (through NVIDIA apex) instead of 32-bit",
)
parser.add_argument(
"--fp16_opt_level",
type=str,
default="O1",
help="For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
"See details at https://nvidia.github.io/apex/amp.html")
parser.add_argument('--server_ip', type=str, default='', help="Can be used for distant debugging.")
parser.add_argument('--server_port', type=str, default='', help="Can be used for distant debugging.")
"See details at https://nvidia.github.io/apex/amp.html",
)
parser.add_argument("--server_ip", type=str, default="", help="Can be used for distant debugging.")
parser.add_argument("--server_port", type=str, default="", help="Can be used for distant debugging.")
args = parser.parse_args()
if os.path.exists(args.output_dir) and os.listdir(args.output_dir) and args.do_train and not args.overwrite_output_dir:
raise ValueError("Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome.".format(args.output_dir))
if (
os.path.exists(args.output_dir)
and os.listdir(args.output_dir)
and args.do_train
and not args.overwrite_output_dir
):
raise ValueError(
"Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome.".format(
args.output_dir
)
)
# Setup distant debugging if needed
if args.server_ip and args.server_port:
# Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script
import ptvsd
print("Waiting for debugger attach")
ptvsd.enable_attach(address=(args.server_ip, args.server_port), redirect_output=True)
ptvsd.wait_for_attach()
......@@ -452,16 +597,24 @@ def main():
else: # Initializes the distributed backend which will take care of sychronizing nodes/GPUs
torch.cuda.set_device(args.local_rank)
device = torch.device("cuda", args.local_rank)
torch.distributed.init_process_group(backend='nccl')
torch.distributed.init_process_group(backend="nccl")
args.n_gpu = 1
args.device = device
# Setup logging
logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s',
datefmt = '%m/%d/%Y %H:%M:%S',
level = logging.INFO if args.local_rank in [-1, 0] else logging.WARN)
logger.warning("Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s",
args.local_rank, device, args.n_gpu, bool(args.local_rank != -1), args.fp16)
logging.basicConfig(
format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
datefmt="%m/%d/%Y %H:%M:%S",
level=logging.INFO if args.local_rank in [-1, 0] else logging.WARN,
)
logger.warning(
"Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s",
args.local_rank,
device,
args.n_gpu,
bool(args.local_rank != -1),
args.fp16,
)
# Set seed
set_seed(args)
......@@ -472,15 +625,21 @@ def main():
args.model_type = args.model_type.lower()
config_class, model_class, tokenizer_class = MODEL_CLASSES[args.model_type]
config = config_class.from_pretrained(args.config_name if args.config_name else args.model_name_or_path,
cache_dir=args.cache_dir if args.cache_dir else None)
tokenizer = tokenizer_class.from_pretrained(args.tokenizer_name if args.tokenizer_name else args.model_name_or_path,
config = config_class.from_pretrained(
args.config_name if args.config_name else args.model_name_or_path,
cache_dir=args.cache_dir if args.cache_dir else None,
)
tokenizer = tokenizer_class.from_pretrained(
args.tokenizer_name if args.tokenizer_name else args.model_name_or_path,
do_lower_case=args.do_lower_case,
cache_dir=args.cache_dir if args.cache_dir else None)
model = model_class.from_pretrained(args.model_name_or_path,
from_tf=bool('.ckpt' in args.model_name_or_path),
cache_dir=args.cache_dir if args.cache_dir else None,
)
model = model_class.from_pretrained(
args.model_name_or_path,
from_tf=bool(".ckpt" in args.model_name_or_path),
config=config,
cache_dir=args.cache_dir if args.cache_dir else None)
cache_dir=args.cache_dir if args.cache_dir else None,
)
if args.local_rank == 0:
torch.distributed.barrier() # Make sure only the first process in distributed training will download model & vocab
......@@ -495,7 +654,8 @@ def main():
if args.fp16:
try:
import apex
apex.amp.register_half_function(torch, 'einsum')
apex.amp.register_half_function(torch, "einsum")
except ImportError:
raise ImportError("Please install apex from https://www.github.com/nvidia/apex to use fp16 training.")
......@@ -505,7 +665,6 @@ def main():
global_step, tr_loss = train(args, train_dataset, model, tokenizer)
logger.info(" global_step = %s, average loss = %s", global_step, tr_loss)
# Save the trained model and the tokenizer
if args.do_train and (args.local_rank == -1 or torch.distributed.get_rank() == 0):
# Create output directory if needed
......@@ -515,39 +674,42 @@ def main():
logger.info("Saving model checkpoint to %s", args.output_dir)
# Save a trained model, configuration and tokenizer using `save_pretrained()`.
# They can then be reloaded using `from_pretrained()`
model_to_save = model.module if hasattr(model, 'module') else model # Take care of distributed/parallel training
model_to_save = (
model.module if hasattr(model, "module") else model
) # Take care of distributed/parallel training
model_to_save.save_pretrained(args.output_dir)
tokenizer.save_pretrained(args.output_dir)
# Good practice: save your training arguments together with the trained model
torch.save(args, os.path.join(args.output_dir, 'training_args.bin'))
torch.save(args, os.path.join(args.output_dir, "training_args.bin"))
# Load a trained model and vocabulary that you have fine-tuned
model = model_class.from_pretrained(args.output_dir)
tokenizer = tokenizer_class.from_pretrained(args.output_dir, do_lower_case=args.do_lower_case)
model.to(args.device)
# Evaluation - we can ask to evaluate all the checkpoints (sub-directories) in a directory
results = {}
if args.do_eval and args.local_rank in [-1, 0]:
checkpoints = [args.output_dir]
if args.eval_all_checkpoints:
checkpoints = list(os.path.dirname(c) for c in sorted(glob.glob(args.output_dir + '/**/' + WEIGHTS_NAME, recursive=True)))
checkpoints = list(
os.path.dirname(c) for c in sorted(glob.glob(args.output_dir + "/**/" + WEIGHTS_NAME, recursive=True))
)
logging.getLogger("transformers.modeling_utils").setLevel(logging.WARN) # Reduce model loading logs
logger.info("Evaluate the following checkpoints: %s", checkpoints)
for checkpoint in checkpoints:
# Reload the model
global_step = checkpoint.split('-')[-1] if len(checkpoints) > 1 else ""
global_step = checkpoint.split("-")[-1] if len(checkpoints) > 1 else ""
model = model_class.from_pretrained(checkpoint)
model.to(args.device)
# Evaluate
result = evaluate(args, model, tokenizer, prefix=global_step)
result = dict((k + ('_{}'.format(global_step) if global_step else ''), v) for k, v in result.items())
result = dict((k + ("_{}".format(global_step) if global_step else ""), v) for k, v in result.items())
results.update(result)
logger.info("Results: {}".format(results))
......
# coding=utf-8
# Copyright 2018 XXX. All rights reserved.
#
......@@ -17,16 +16,17 @@
from __future__ import absolute_import, division, print_function
import collections
import json
import logging
import math
import collections
from io import open
from transformers.tokenization_bert import BasicTokenizer, whitespace_tokenize
# Required by XLNet evaluation method to compute optimal threshold (see write_predictions_extended() method)
from utils_squad_evaluate import find_all_best_thresh_v2, make_qid_to_has_ans, get_raw_scores
from utils_squad_evaluate import find_all_best_thresh_v2, get_raw_scores, make_qid_to_has_ans
logger = logging.getLogger(__name__)
......@@ -37,14 +37,16 @@ class SquadExample(object):
For examples without an answer, the start and end position are -1.
"""
def __init__(self,
def __init__(
self,
qas_id,
question_text,
doc_tokens,
orig_answer_text=None,
start_position=None,
end_position=None,
is_impossible=None):
is_impossible=None,
):
self.qas_id = qas_id
self.question_text = question_text
self.doc_tokens = doc_tokens
......@@ -59,8 +61,7 @@ class SquadExample(object):
def __repr__(self):
s = ""
s += "qas_id: %s" % (self.qas_id)
s += ", question_text: %s" % (
self.question_text)
s += ", question_text: %s" % (self.question_text)
s += ", doc_tokens: [%s]" % (" ".join(self.doc_tokens))
if self.start_position:
s += ", start_position: %d" % (self.start_position)
......@@ -74,7 +75,8 @@ class SquadExample(object):
class InputFeatures(object):
"""A single set of features of data."""
def __init__(self,
def __init__(
self,
unique_id,
example_index,
doc_span_index,
......@@ -89,7 +91,8 @@ class InputFeatures(object):
paragraph_len,
start_position=None,
end_position=None,
is_impossible=None):
is_impossible=None,
):
self.unique_id = unique_id
self.example_index = example_index
self.doc_span_index = doc_span_index
......@@ -109,7 +112,7 @@ class InputFeatures(object):
def read_squad_examples(input_file, is_training, version_2_with_negative):
"""Read a SQuAD json file into a list of SquadExample."""
with open(input_file, "r", encoding='utf-8') as reader:
with open(input_file, "r", encoding="utf-8") as reader:
input_data = json.load(reader)["data"]
def is_whitespace(c):
......@@ -146,8 +149,7 @@ def read_squad_examples(input_file, is_training, version_2_with_negative):
if version_2_with_negative:
is_impossible = qa["is_impossible"]
if (len(qa["answers"]) != 1) and (not is_impossible):
raise ValueError(
"For training, each question should have exactly 1 answer.")
raise ValueError("For training, each question should have exactly 1 answer.")
if not is_impossible:
answer = qa["answers"][0]
orig_answer_text = answer["text"]
......@@ -161,12 +163,10 @@ def read_squad_examples(input_file, is_training, version_2_with_negative):
#
# Note that this means for training mode, every example is NOT
# guaranteed to be preserved.
actual_text = " ".join(doc_tokens[start_position:(end_position + 1)])
cleaned_answer_text = " ".join(
whitespace_tokenize(orig_answer_text))
actual_text = " ".join(doc_tokens[start_position : (end_position + 1)])
cleaned_answer_text = " ".join(whitespace_tokenize(orig_answer_text))
if actual_text.find(cleaned_answer_text) == -1:
logger.warning("Could not find answer: '%s' vs. '%s'",
actual_text, cleaned_answer_text)
logger.warning("Could not find answer: '%s' vs. '%s'", actual_text, cleaned_answer_text)
continue
else:
start_position = -1
......@@ -180,18 +180,29 @@ def read_squad_examples(input_file, is_training, version_2_with_negative):
orig_answer_text=orig_answer_text,
start_position=start_position,
end_position=end_position,
is_impossible=is_impossible)
is_impossible=is_impossible,
)
examples.append(example)
return examples
def convert_examples_to_features(examples, tokenizer, max_seq_length,
doc_stride, max_query_length, is_training,
def convert_examples_to_features(
examples,
tokenizer,
max_seq_length,
doc_stride,
max_query_length,
is_training,
cls_token_at_end=False,
cls_token='[CLS]', sep_token='[SEP]', pad_token=0,
sequence_a_segment_id=0, sequence_b_segment_id=1,
cls_token_segment_id=0, pad_token_segment_id=0,
mask_padding_with_zero=True):
cls_token="[CLS]",
sep_token="[SEP]",
pad_token=0,
sequence_a_segment_id=0,
sequence_b_segment_id=1,
cls_token_segment_id=0,
pad_token_segment_id=0,
mask_padding_with_zero=True,
):
"""Loads a data file into a list of `InputBatch`s."""
unique_id = 1000000000
......@@ -232,8 +243,8 @@ def convert_examples_to_features(examples, tokenizer, max_seq_length,
else:
tok_end_position = len(all_doc_tokens) - 1
(tok_start_position, tok_end_position) = _improve_answer_span(
all_doc_tokens, tok_start_position, tok_end_position, tokenizer,
example.orig_answer_text)
all_doc_tokens, tok_start_position, tok_end_position, tokenizer, example.orig_answer_text
)
# The -3 accounts for [CLS], [SEP] and [SEP]
max_tokens_for_doc = max_seq_length - len(query_tokens) - 3
......@@ -241,8 +252,7 @@ def convert_examples_to_features(examples, tokenizer, max_seq_length,
# We can have documents that are longer than the maximum sequence length.
# To deal with this we do a sliding window approach, where we take chunks
# of the up to our max length with a stride of `doc_stride`.
_DocSpan = collections.namedtuple( # pylint: disable=invalid-name
"DocSpan", ["start", "length"])
_DocSpan = collections.namedtuple("DocSpan", ["start", "length"]) # pylint: disable=invalid-name
doc_spans = []
start_offset = 0
while start_offset < len(all_doc_tokens):
......@@ -287,8 +297,7 @@ def convert_examples_to_features(examples, tokenizer, max_seq_length,
split_token_index = doc_span.start + i
token_to_orig_map[len(tokens)] = tok_to_orig_index[split_token_index]
is_max_context = _check_is_max_context(doc_spans, doc_span_index,
split_token_index)
is_max_context = _check_is_max_context(doc_spans, doc_span_index, split_token_index)
token_is_max_context[len(tokens)] = is_max_context
tokens.append(all_doc_tokens[split_token_index])
segment_ids.append(sequence_b_segment_id)
......@@ -333,8 +342,7 @@ def convert_examples_to_features(examples, tokenizer, max_seq_length,
doc_start = doc_span.start
doc_end = doc_span.start + doc_span.length - 1
out_of_span = False
if not (tok_start_position >= doc_start and
tok_end_position <= doc_end):
if not (tok_start_position >= doc_start and tok_end_position <= doc_end):
out_of_span = True
if out_of_span:
start_position = 0
......@@ -355,24 +363,23 @@ def convert_examples_to_features(examples, tokenizer, max_seq_length,
logger.info("example_index: %s" % (example_index))
logger.info("doc_span_index: %s" % (doc_span_index))
logger.info("tokens: %s" % " ".join(tokens))
logger.info("token_to_orig_map: %s" % " ".join([
"%d:%d" % (x, y) for (x, y) in token_to_orig_map.items()]))
logger.info("token_is_max_context: %s" % " ".join([
"%d:%s" % (x, y) for (x, y) in token_is_max_context.items()
]))
logger.info("input_ids: %s" % " ".join([str(x) for x in input_ids]))
logger.info(
"input_mask: %s" % " ".join([str(x) for x in input_mask]))
"token_to_orig_map: %s" % " ".join(["%d:%d" % (x, y) for (x, y) in token_to_orig_map.items()])
)
logger.info(
"segment_ids: %s" % " ".join([str(x) for x in segment_ids]))
"token_is_max_context: %s"
% " ".join(["%d:%s" % (x, y) for (x, y) in token_is_max_context.items()])
)
logger.info("input_ids: %s" % " ".join([str(x) for x in input_ids]))
logger.info("input_mask: %s" % " ".join([str(x) for x in input_mask]))
logger.info("segment_ids: %s" % " ".join([str(x) for x in segment_ids]))
if is_training and span_is_impossible:
logger.info("impossible example")
if is_training and not span_is_impossible:
answer_text = " ".join(tokens[start_position:(end_position + 1)])
answer_text = " ".join(tokens[start_position : (end_position + 1)])
logger.info("start_position: %d" % (start_position))
logger.info("end_position: %d" % (end_position))
logger.info(
"answer: %s" % (answer_text))
logger.info("answer: %s" % (answer_text))
features.append(
InputFeatures(
......@@ -390,14 +397,15 @@ def convert_examples_to_features(examples, tokenizer, max_seq_length,
paragraph_len=paragraph_len,
start_position=start_position,
end_position=end_position,
is_impossible=span_is_impossible))
is_impossible=span_is_impossible,
)
)
unique_id += 1
return features
def _improve_answer_span(doc_tokens, input_start, input_end, tokenizer,
orig_answer_text):
def _improve_answer_span(doc_tokens, input_start, input_end, tokenizer, orig_answer_text):
"""Returns tokenized answer spans that better match the annotated answer."""
# The SQuAD annotations are character based. We first project them to
......@@ -426,7 +434,7 @@ def _improve_answer_span(doc_tokens, input_start, input_end, tokenizer,
for new_start in range(input_start, input_end + 1):
for new_end in range(input_end, new_start - 1, -1):
text_span = " ".join(doc_tokens[new_start:(new_end + 1)])
text_span = " ".join(doc_tokens[new_start : (new_end + 1)])
if text_span == tok_answer_text:
return (new_start, new_end)
......@@ -470,13 +478,23 @@ def _check_is_max_context(doc_spans, cur_span_index, position):
return cur_span_index == best_span_index
RawResult = collections.namedtuple("RawResult",
["unique_id", "start_logits", "end_logits"])
RawResult = collections.namedtuple("RawResult", ["unique_id", "start_logits", "end_logits"])
def write_predictions(all_examples, all_features, all_results, n_best_size,
max_answer_length, do_lower_case, output_prediction_file,
output_nbest_file, output_null_log_odds_file, verbose_logging,
version_2_with_negative, null_score_diff_threshold):
def write_predictions(
all_examples,
all_features,
all_results,
n_best_size,
max_answer_length,
do_lower_case,
output_prediction_file,
output_nbest_file,
output_null_log_odds_file,
verbose_logging,
version_2_with_negative,
null_score_diff_threshold,
):
"""Write final predictions to the json file and log-odds of null if needed."""
logger.info("Writing predictions to: %s" % (output_prediction_file))
logger.info("Writing nbest to: %s" % (output_nbest_file))
......@@ -490,8 +508,8 @@ def write_predictions(all_examples, all_features, all_results, n_best_size,
unique_id_to_result[result.unique_id] = result
_PrelimPrediction = collections.namedtuple( # pylint: disable=invalid-name
"PrelimPrediction",
["feature_index", "start_index", "end_index", "start_logit", "end_logit"])
"PrelimPrediction", ["feature_index", "start_index", "end_index", "start_logit", "end_logit"]
)
all_predictions = collections.OrderedDict()
all_nbest_json = collections.OrderedDict()
......@@ -544,7 +562,9 @@ def write_predictions(all_examples, all_features, all_results, n_best_size,
start_index=start_index,
end_index=end_index,
start_logit=result.start_logits[start_index],
end_logit=result.end_logits[end_index]))
end_logit=result.end_logits[end_index],
)
)
if version_2_with_negative:
prelim_predictions.append(
_PrelimPrediction(
......@@ -552,14 +572,14 @@ def write_predictions(all_examples, all_features, all_results, n_best_size,
start_index=0,
end_index=0,
start_logit=null_start_logit,
end_logit=null_end_logit))
prelim_predictions = sorted(
prelim_predictions,
key=lambda x: (x.start_logit + x.end_logit),
reverse=True)
end_logit=null_end_logit,
)
)
prelim_predictions = sorted(prelim_predictions, key=lambda x: (x.start_logit + x.end_logit), reverse=True)
_NbestPrediction = collections.namedtuple( # pylint: disable=invalid-name
"NbestPrediction", ["text", "start_logit", "end_logit"])
"NbestPrediction", ["text", "start_logit", "end_logit"]
)
seen_predictions = {}
nbest = []
......@@ -568,10 +588,10 @@ def write_predictions(all_examples, all_features, all_results, n_best_size,
break
feature = features[pred.feature_index]
if pred.start_index > 0: # this is a non-null prediction
tok_tokens = feature.tokens[pred.start_index:(pred.end_index + 1)]
tok_tokens = feature.tokens[pred.start_index : (pred.end_index + 1)]
orig_doc_start = feature.token_to_orig_map[pred.start_index]
orig_doc_end = feature.token_to_orig_map[pred.end_index]
orig_tokens = example.doc_tokens[orig_doc_start:(orig_doc_end + 1)]
orig_tokens = example.doc_tokens[orig_doc_start : (orig_doc_end + 1)]
tok_text = " ".join(tok_tokens)
# De-tokenize WordPieces that have been split off.
......@@ -592,31 +612,21 @@ def write_predictions(all_examples, all_features, all_results, n_best_size,
final_text = ""
seen_predictions[final_text] = True
nbest.append(
_NbestPrediction(
text=final_text,
start_logit=pred.start_logit,
end_logit=pred.end_logit))
nbest.append(_NbestPrediction(text=final_text, start_logit=pred.start_logit, end_logit=pred.end_logit))
# if we didn't include the empty option in the n-best, include it
if version_2_with_negative:
if "" not in seen_predictions:
nbest.append(
_NbestPrediction(
text="",
start_logit=null_start_logit,
end_logit=null_end_logit))
nbest.append(_NbestPrediction(text="", start_logit=null_start_logit, end_logit=null_end_logit))
# In very rare edge cases we could only have single null prediction.
# So we just create a nonce prediction in this case to avoid failure.
if len(nbest)==1:
nbest.insert(0,
_NbestPrediction(text="empty", start_logit=0.0, end_logit=0.0))
if len(nbest) == 1:
nbest.insert(0, _NbestPrediction(text="empty", start_logit=0.0, end_logit=0.0))
# In very rare edge cases we could have no valid predictions. So we
# just create a nonce prediction in this case to avoid failure.
if not nbest:
nbest.append(
_NbestPrediction(text="empty", start_logit=0.0, end_logit=0.0))
nbest.append(_NbestPrediction(text="empty", start_logit=0.0, end_logit=0.0))
assert len(nbest) >= 1
......@@ -645,8 +655,7 @@ def write_predictions(all_examples, all_features, all_results, n_best_size,
all_predictions[example.qas_id] = nbest_json[0]["text"]
else:
# predict "" iff the null score - the score of best non-null > threshold
score_diff = score_null - best_non_null_entry.start_logit - (
best_non_null_entry.end_logit)
score_diff = score_null - best_non_null_entry.start_logit - (best_non_null_entry.end_logit)
scores_diff_json[example.qas_id] = score_diff
if score_diff > null_score_diff_threshold:
all_predictions[example.qas_id] = ""
......@@ -668,29 +677,40 @@ def write_predictions(all_examples, all_features, all_results, n_best_size,
# For XLNet (and XLM which uses the same head)
RawResultExtended = collections.namedtuple("RawResultExtended",
["unique_id", "start_top_log_probs", "start_top_index",
"end_top_log_probs", "end_top_index", "cls_logits"])
def write_predictions_extended(all_examples, all_features, all_results, n_best_size,
max_answer_length, output_prediction_file,
RawResultExtended = collections.namedtuple(
"RawResultExtended",
["unique_id", "start_top_log_probs", "start_top_index", "end_top_log_probs", "end_top_index", "cls_logits"],
)
def write_predictions_extended(
all_examples,
all_features,
all_results,
n_best_size,
max_answer_length,
output_prediction_file,
output_nbest_file,
output_null_log_odds_file, orig_data_file,
start_n_top, end_n_top, version_2_with_negative,
tokenizer, verbose_logging):
output_null_log_odds_file,
orig_data_file,
start_n_top,
end_n_top,
version_2_with_negative,
tokenizer,
verbose_logging,
):
""" XLNet write prediction logic (more complex than Bert's).
Write final predictions to the json file and log-odds of null if needed.
Requires utils_squad_evaluate.py
"""
_PrelimPrediction = collections.namedtuple( # pylint: disable=invalid-name
"PrelimPrediction",
["feature_index", "start_index", "end_index",
"start_log_prob", "end_log_prob"])
"PrelimPrediction", ["feature_index", "start_index", "end_index", "start_log_prob", "end_log_prob"]
)
_NbestPrediction = collections.namedtuple( # pylint: disable=invalid-name
"NbestPrediction", ["text", "start_log_prob", "end_log_prob"])
"NbestPrediction", ["text", "start_log_prob", "end_log_prob"]
)
logger.info("Writing predictions to: %s", output_prediction_file)
# logger.info("Writing nbest to: %s" % (output_nbest_file))
......@@ -754,12 +774,13 @@ def write_predictions_extended(all_examples, all_features, all_results, n_best_s
start_index=start_index,
end_index=end_index,
start_log_prob=start_log_prob,
end_log_prob=end_log_prob))
end_log_prob=end_log_prob,
)
)
prelim_predictions = sorted(
prelim_predictions,
key=lambda x: (x.start_log_prob + x.end_log_prob),
reverse=True)
prelim_predictions, key=lambda x: (x.start_log_prob + x.end_log_prob), reverse=True
)
seen_predictions = {}
nbest = []
......@@ -779,10 +800,10 @@ def write_predictions_extended(all_examples, all_features, all_results, n_best_s
# final_text = paragraph_text[start_orig_pos: end_orig_pos + 1].strip()
# Previously used Bert untokenizer
tok_tokens = feature.tokens[pred.start_index:(pred.end_index + 1)]
tok_tokens = feature.tokens[pred.start_index : (pred.end_index + 1)]
orig_doc_start = feature.token_to_orig_map[pred.start_index]
orig_doc_end = feature.token_to_orig_map[pred.end_index]
orig_tokens = example.doc_tokens[orig_doc_start:(orig_doc_end + 1)]
orig_tokens = example.doc_tokens[orig_doc_start : (orig_doc_end + 1)]
tok_text = tokenizer.convert_tokens_to_string(tok_tokens)
# Clean whitespace
......@@ -790,8 +811,7 @@ def write_predictions_extended(all_examples, all_features, all_results, n_best_s
tok_text = " ".join(tok_text.split())
orig_text = " ".join(orig_tokens)
final_text = get_final_text(tok_text, orig_text, tokenizer.do_lower_case,
verbose_logging)
final_text = get_final_text(tok_text, orig_text, tokenizer.do_lower_case, verbose_logging)
if final_text in seen_predictions:
continue
......@@ -799,17 +819,13 @@ def write_predictions_extended(all_examples, all_features, all_results, n_best_s
seen_predictions[final_text] = True
nbest.append(
_NbestPrediction(
text=final_text,
start_log_prob=pred.start_log_prob,
end_log_prob=pred.end_log_prob))
_NbestPrediction(text=final_text, start_log_prob=pred.start_log_prob, end_log_prob=pred.end_log_prob)
)
# In very rare edge cases we could have no valid predictions. So we
# just create a nonce prediction in this case to avoid failure.
if not nbest:
nbest.append(
_NbestPrediction(text="", start_log_prob=-1e6,
end_log_prob=-1e6))
nbest.append(_NbestPrediction(text="", start_log_prob=-1e6, end_log_prob=-1e6))
total_scores = []
best_non_null_entry = None
......@@ -850,7 +866,7 @@ def write_predictions_extended(all_examples, all_features, all_results, n_best_s
with open(output_null_log_odds_file, "w") as writer:
writer.write(json.dumps(scores_diff_json, indent=4) + "\n")
with open(orig_data_file, "r", encoding='utf-8') as reader:
with open(orig_data_file, "r", encoding="utf-8") as reader:
orig_data = json.load(reader)["data"]
qid_to_has_ans = make_qid_to_has_ans(orig_data)
......@@ -914,8 +930,7 @@ def get_final_text(pred_text, orig_text, do_lower_case, verbose_logging=False):
start_position = tok_text.find(pred_text)
if start_position == -1:
if verbose_logging:
logger.info(
"Unable to find text: '%s' in '%s'" % (pred_text, orig_text))
logger.info("Unable to find text: '%s' in '%s'" % (pred_text, orig_text))
return orig_text
end_position = start_position + len(pred_text) - 1
......@@ -924,8 +939,7 @@ def get_final_text(pred_text, orig_text, do_lower_case, verbose_logging=False):
if len(orig_ns_text) != len(tok_ns_text):
if verbose_logging:
logger.info("Length not equal after stripping spaces: '%s' vs '%s'",
orig_ns_text, tok_ns_text)
logger.info("Length not equal after stripping spaces: '%s' vs '%s'", orig_ns_text, tok_ns_text)
return orig_text
# We then project the characters in `pred_text` back to `orig_text` using
......@@ -956,7 +970,7 @@ def get_final_text(pred_text, orig_text, do_lower_case, verbose_logging=False):
logger.info("Couldn't map end position")
return orig_text
output_text = orig_text[orig_start_position:(orig_end_position + 1)]
output_text = orig_text[orig_start_position : (orig_end_position + 1)]
return output_text
......
......@@ -16,19 +16,16 @@
from __future__ import absolute_import, division, print_function, unicode_literals
import json
import logging
import sys
import six
from io import open
from .configuration_utils import PretrainedConfig
logger = logging.getLogger(__name__)
XXX_PRETRAINED_CONFIG_ARCHIVE_MAP = {
'xxx-base-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/xxx-base-uncased-config.json",
'xxx-large-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/xxx-large-uncased-config.json",
"xxx-base-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/xxx-base-uncased-config.json",
"xxx-large-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/xxx-large-uncased-config.json",
}
......@@ -63,7 +60,8 @@ class XxxConfig(PretrainedConfig):
"""
pretrained_config_archive_map = XXX_PRETRAINED_CONFIG_ARCHIVE_MAP
def __init__(self,
def __init__(
self,
vocab_size=50257,
n_positions=1024,
n_ctx=1024,
......@@ -75,12 +73,13 @@ class XxxConfig(PretrainedConfig):
attn_pdrop=0.1,
layer_norm_epsilon=1e-5,
initializer_range=0.02,
summary_type='cls_index',
summary_type="cls_index",
summary_use_proj=True,
summary_activation=None,
summary_proj_to_labels=True,
summary_first_dropout=0.1,
**kwargs):
**kwargs
):
super(XxxConfig, self).__init__(**kwargs)
self.vocab_size = vocab_size
self.n_ctx = n_ctx
......
......@@ -14,18 +14,19 @@
# limitations under the License.
"""Convert XXX checkpoint."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import absolute_import, division, print_function
import argparse
import logging
import torch
from transformers import XxxConfig, XxxForPreTraining, load_tf_weights_in_xxx
import logging
logging.basicConfig(level=logging.INFO)
def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, config_file, pytorch_dump_path):
# Initialise PyTorch model
config = XxxConfig.from_json_file(config_file)
......@@ -42,24 +43,20 @@ def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, config_file, pytorch_du
if __name__ == "__main__":
parser = argparse.ArgumentParser()
## Required parameters
parser.add_argument("--tf_checkpoint_path",
default = None,
type = str,
required = True,
help = "Path to the TensorFlow checkpoint path.")
parser.add_argument("--config_file",
default = None,
type = str,
required = True,
help = "The config json file corresponding to the pre-trained model. \n"
"This specifies the model architecture.")
parser.add_argument("--pytorch_dump_path",
default = None,
type = str,
required = True,
help = "Path to the output PyTorch model.")
# Required parameters
parser.add_argument(
"--tf_checkpoint_path", default=None, type=str, required=True, help="Path to the TensorFlow checkpoint path."
)
parser.add_argument(
"--config_file",
default=None,
type=str,
required=True,
help="The config json file corresponding to the pre-trained model. \n"
"This specifies the model architecture.",
)
parser.add_argument(
"--pytorch_dump_path", default=None, type=str, required=True, help="Path to the output PyTorch model."
)
args = parser.parse_args()
convert_tf_checkpoint_to_pytorch(args.tf_checkpoint_path,
args.config_file,
args.pytorch_dump_path)
convert_tf_checkpoint_to_pytorch(args.tf_checkpoint_path, args.config_file, args.pytorch_dump_path)
......@@ -21,21 +21,14 @@
from __future__ import absolute_import, division, print_function, unicode_literals
import json
import logging
import math
import os
import sys
import copy
import itertools
from io import open
import numpy as np
import tensorflow as tf
from .configuration_xxx import XxxConfig
from .modeling_tf_utils import TFPreTrainedModel, get_initializer, shape_list
from .file_utils import add_start_docstrings
from .modeling_tf_utils import TFPreTrainedModel, get_initializer, shape_list
logger = logging.getLogger(__name__)
......@@ -44,10 +37,11 @@ logger = logging.getLogger(__name__)
# for the pretrained weights provided with the models
####################################################
TF_XXX_PRETRAINED_MODEL_ARCHIVE_MAP = {
'xxx-base-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/xxx-base-uncased-tf_model.h5",
'xxx-large-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/xxx-large-uncased-tf_model.h5",
"xxx-base-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/xxx-base-uncased-tf_model.h5",
"xxx-large-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/xxx-large-uncased-tf_model.h5",
}
####################################################
# TF 2.0 Models are constructed using Keras imperative API by sub-classing
# - tf.keras.layers.Layer for the layers and
......@@ -66,12 +60,20 @@ TF_XXX_PRETRAINED_MODEL_ARCHIVE_MAP = {
#
# See the conversion methods in modeling_tf_pytorch_utils.py for more details
####################################################
TFXxxAttention = tf.keras.layers.Layer
TFXxxIntermediate = tf.keras.layers.Layer
TFXxxOutput = tf.keras.layers.Layer
class TFXxxLayer(tf.keras.layers.Layer):
def __init__(self, config, **kwargs):
super(TFXxxLayer, self).__init__(**kwargs)
self.attention = TFXxxAttention(config, name='attention')
self.intermediate = TFXxxIntermediate(config, name='intermediate')
self.transformer_output = TFXxxOutput(config, name='output')
self.attention = TFXxxAttention(config, name="attention")
self.intermediate = TFXxxIntermediate(config, name="intermediate")
self.transformer_output = TFXxxOutput(config, name="output")
def call(self, inputs, training=False):
hidden_states, attention_mask, head_mask = inputs
......@@ -98,7 +100,9 @@ class TFXxxMainLayer(tf.keras.layers.Layer):
def _prune_heads(self, heads_to_prune):
raise NotImplementedError # Not implemented yet in the library fr TF 2.0 models
def call(self, inputs, attention_mask=None, token_type_ids=None, position_ids=None, head_mask=None, training=False):
def call(
self, inputs, attention_mask=None, token_type_ids=None, position_ids=None, head_mask=None, training=False
):
# We allow three types of multi-inputs:
# - traditional keyword arguments in the call method
# - all the arguments provided as a dict in the first positional argument of call
......@@ -113,11 +117,11 @@ class TFXxxMainLayer(tf.keras.layers.Layer):
head_mask = inputs[4] if len(inputs) > 4 else head_mask
assert len(inputs) <= 5, "Too many inputs."
elif isinstance(inputs, dict):
input_ids = inputs.get('input_ids')
attention_mask = inputs.get('attention_mask', attention_mask)
token_type_ids = inputs.get('token_type_ids', token_type_ids)
position_ids = inputs.get('position_ids', position_ids)
head_mask = inputs.get('head_mask', head_mask)
input_ids = inputs.get("input_ids")
attention_mask = inputs.get("attention_mask", attention_mask)
token_type_ids = inputs.get("token_type_ids", token_type_ids)
position_ids = inputs.get("position_ids", position_ids)
head_mask = inputs.get("head_mask", head_mask)
assert len(inputs) <= 5, "Too many inputs."
else:
input_ids = inputs
......@@ -148,7 +152,7 @@ class TFXxxMainLayer(tf.keras.layers.Layer):
# attention_probs has shape bsz x n_heads x N x N
# input head_mask has shape [num_heads] or [num_hidden_layers x num_heads]
# and head_mask is converted to shape [num_hidden_layers x batch x num_heads x seq_length x seq_length]
if not head_mask is None:
if head_mask is not None:
raise NotImplementedError
else:
head_mask = [None] * self.num_hidden_layers
......@@ -175,6 +179,7 @@ class TFXxxPreTrainedModel(TFPreTrainedModel):
""" An abstract class to handle weights initialization and
a simple interface for dowloading and loading pretrained models.
"""
config_class = XxxConfig
pretrained_model_archive_map = TF_XXX_PRETRAINED_MODEL_ARCHIVE_MAP
base_model_prefix = "transformer"
......@@ -263,8 +268,12 @@ XXX_INPUTS_DOCSTRING = r"""
than the model's internal embedding lookup matrix.
"""
@add_start_docstrings("The bare Xxx Model transformer outputing raw hidden-states without any specific head on top.",
XXX_START_DOCSTRING, XXX_INPUTS_DOCSTRING)
@add_start_docstrings(
"The bare Xxx Model transformer outputing raw hidden-states without any specific head on top.",
XXX_START_DOCSTRING,
XXX_INPUTS_DOCSTRING,
)
class TFXxxModel(TFXxxPreTrainedModel):
r"""
Outputs: `Tuple` comprising various elements depending on the configuration (config) and inputs:
......@@ -297,17 +306,22 @@ class TFXxxModel(TFXxxPreTrainedModel):
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
"""
def __init__(self, config, *inputs, **kwargs):
super(TFXxxModel, self).__init__(config, *inputs, **kwargs)
self.transformer = TFXxxMainLayer(config, name='transformer')
self.transformer = TFXxxMainLayer(config, name="transformer")
def call(self, inputs, **kwargs):
outputs = self.transformer(inputs, **kwargs)
return outputs
@add_start_docstrings("""Xxx Model with a `language modeling` head on top. """,
XXX_START_DOCSTRING, XXX_INPUTS_DOCSTRING)
TFXxxMLMHead = tf.keras.layers.Layer
@add_start_docstrings(
"""Xxx Model with a `language modeling` head on top. """, XXX_START_DOCSTRING, XXX_INPUTS_DOCSTRING
)
class TFXxxForMaskedLM(TFXxxPreTrainedModel):
r"""
Outputs: `Tuple` comprising various elements depending on the configuration (config) and inputs:
......@@ -333,26 +347,30 @@ class TFXxxForMaskedLM(TFXxxPreTrainedModel):
prediction_scores = outputs[0]
"""
def __init__(self, config, *inputs, **kwargs):
super(TFXxxForMaskedLM, self).__init__(config, *inputs, **kwargs)
self.transformer = TFXxxMainLayer(config, name='transformer')
self.mlm = TFXxxMLMHead(config, self.transformer.embeddings, name='mlm')
self.transformer = TFXxxMainLayer(config, name="transformer")
self.mlm = TFXxxMLMHead(config, self.transformer.embeddings, name="mlm")
def call(self, inputs, **kwargs):
outputs = self.transformer(inputs, **kwargs)
sequence_output = outputs[0]
prediction_scores = self.mlm(sequence_output, training=kwargs.get('training', False))
prediction_scores = self.mlm(sequence_output, training=kwargs.get("training", False))
outputs = (prediction_scores,) + outputs[2:] # Add hidden states and attention if they are here
return outputs # prediction_scores, (hidden_states), (attentions)
@add_start_docstrings("""Xxx Model transformer with a sequence classification/regression head on top (a linear layer on top of
@add_start_docstrings(
"""Xxx Model transformer with a sequence classification/regression head on top (a linear layer on top of
the pooled output) e.g. for GLUE tasks. """,
XXX_START_DOCSTRING, XXX_INPUTS_DOCSTRING)
XXX_START_DOCSTRING,
XXX_INPUTS_DOCSTRING,
)
class TFXxxForSequenceClassification(TFXxxPreTrainedModel):
r"""
Outputs: `Tuple` comprising various elements depending on the configuration (config) and inputs:
......@@ -378,22 +396,23 @@ class TFXxxForSequenceClassification(TFXxxPreTrainedModel):
logits = outputs[0]
"""
def __init__(self, config, *inputs, **kwargs):
super(TFXxxForSequenceClassification, self).__init__(config, *inputs, **kwargs)
self.num_labels = config.num_labels
self.transformer = TFXxxMainLayer(config, name='transformer')
self.transformer = TFXxxMainLayer(config, name="transformer")
self.dropout = tf.keras.layers.Dropout(config.hidden_dropout_prob)
self.classifier = tf.keras.layers.Dense(config.num_labels,
kernel_initializer=get_initializer(config.initializer_range),
name='classifier')
self.classifier = tf.keras.layers.Dense(
config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="classifier"
)
def call(self, inputs, **kwargs):
outputs = self.transformer(inputs, **kwargs)
pooled_output = outputs[1]
pooled_output = self.dropout(pooled_output, training=kwargs.get('training', False))
pooled_output = self.dropout(pooled_output, training=kwargs.get("training", False))
logits = self.classifier(pooled_output)
outputs = (logits,) + outputs[2:] # add hidden states and attention if they are here
......@@ -401,9 +420,12 @@ class TFXxxForSequenceClassification(TFXxxPreTrainedModel):
return outputs # logits, (hidden_states), (attentions)
@add_start_docstrings("""Xxx Model with a token classification head on top (a linear layer on top of
@add_start_docstrings(
"""Xxx Model with a token classification head on top (a linear layer on top of
the hidden-states output) e.g. for Named-Entity-Recognition (NER) tasks. """,
XXX_START_DOCSTRING, XXX_INPUTS_DOCSTRING)
XXX_START_DOCSTRING,
XXX_INPUTS_DOCSTRING,
)
class TFXxxForTokenClassification(TFXxxPreTrainedModel):
r"""
Outputs: `Tuple` comprising various elements depending on the configuration (config) and inputs:
......@@ -429,22 +451,23 @@ class TFXxxForTokenClassification(TFXxxPreTrainedModel):
scores = outputs[0]
"""
def __init__(self, config, *inputs, **kwargs):
super(TFXxxForTokenClassification, self).__init__(config, *inputs, **kwargs)
self.num_labels = config.num_labels
self.transformer = TFXxxMainLayer(config, name='transformer')
self.transformer = TFXxxMainLayer(config, name="transformer")
self.dropout = tf.keras.layers.Dropout(config.hidden_dropout_prob)
self.classifier = tf.keras.layers.Dense(config.num_labels,
kernel_initializer=get_initializer(config.initializer_range),
name='classifier')
self.classifier = tf.keras.layers.Dense(
config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="classifier"
)
def call(self, inputs, **kwargs):
outputs = self.transformer(inputs, **kwargs)
sequence_output = outputs[0]
sequence_output = self.dropout(sequence_output, training=kwargs.get('training', False))
sequence_output = self.dropout(sequence_output, training=kwargs.get("training", False))
logits = self.classifier(sequence_output)
outputs = (logits,) + outputs[2:] # add hidden states and attention if they are here
......@@ -452,9 +475,12 @@ class TFXxxForTokenClassification(TFXxxPreTrainedModel):
return outputs # scores, (hidden_states), (attentions)
@add_start_docstrings("""Xxx Model with a span classification head on top for extractive question-answering tasks like SQuAD (a linear layers on top of
@add_start_docstrings(
"""Xxx Model with a span classification head on top for extractive question-answering tasks like SQuAD (a linear layers on top of
the hidden-states output to compute `span start logits` and `span end logits`). """,
XXX_START_DOCSTRING, XXX_INPUTS_DOCSTRING)
XXX_START_DOCSTRING,
XXX_INPUTS_DOCSTRING,
)
class TFXxxForQuestionAnswering(TFXxxPreTrainedModel):
r"""
Outputs: `Tuple` comprising various elements depending on the configuration (config) and inputs:
......@@ -482,14 +508,15 @@ class TFXxxForQuestionAnswering(TFXxxPreTrainedModel):
start_scores, end_scores = outputs[:2]
"""
def __init__(self, config, *inputs, **kwargs):
super(TFXxxForQuestionAnswering, self).__init__(config, *inputs, **kwargs)
self.num_labels = config.num_labels
self.transformer = TFXxxMainLayer(config, name='transformer')
self.qa_outputs = tf.keras.layers.Dense(config.num_labels,
kernel_initializer=get_initializer(config.initializer_range),
name='qa_outputs')
self.transformer = TFXxxMainLayer(config, name="transformer")
self.qa_outputs = tf.keras.layers.Dense(
config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="qa_outputs"
)
def call(self, inputs, **kwargs):
outputs = self.transformer(inputs, **kwargs)
......
......@@ -20,22 +20,17 @@
from __future__ import absolute_import, division, print_function, unicode_literals
import json
import logging
import math
import os
import sys
import copy
import itertools
from io import open
import torch
from torch import nn
from torch.nn import CrossEntropyLoss, MSELoss
from .modeling_utils import PreTrainedModel, prune_linear_layer
from .configuration_xxx import XxxConfig
from .file_utils import add_start_docstrings
from .modeling_utils import PreTrainedModel
logger = logging.getLogger(__name__)
......@@ -44,10 +39,11 @@ logger = logging.getLogger(__name__)
# for the pretrained weights provided with the models
####################################################
XXX_PRETRAINED_MODEL_ARCHIVE_MAP = {
'xxx-base-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/xxx-base-uncased-pytorch_model.bin",
'xxx-large-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/xxx-large-uncased-pytorch_model.bin",
"xxx-base-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/xxx-base-uncased-pytorch_model.bin",
"xxx-large-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/xxx-large-uncased-pytorch_model.bin",
}
####################################################
# This is a conversion method from TF 1.0 to PyTorch
# More details: https://medium.com/huggingface/from-tensorflow-to-pytorch-265f40ef2a28
......@@ -60,8 +56,10 @@ def load_tf_weights_in_xxx(model, config, tf_checkpoint_path):
import numpy as np
import tensorflow as tf
except ImportError:
logger.error("Loading a TensorFlow model in PyTorch, requires TensorFlow to be installed. Please see "
"https://www.tensorflow.org/install/ for installation instructions.")
logger.error(
"Loading a TensorFlow model in PyTorch, requires TensorFlow to be installed. Please see "
"https://www.tensorflow.org/install/ for installation instructions."
)
raise
tf_path = os.path.abspath(tf_checkpoint_path)
logger.info("Converting TensorFlow checkpoint from {}".format(tf_path))
......@@ -76,7 +74,7 @@ def load_tf_weights_in_xxx(model, config, tf_checkpoint_path):
arrays.append(array)
for name, array in zip(names, arrays):
name = name.split('/')
name = name.split("/")
# adam_v and adam_m are variables used in AdamWeightDecayOptimizer to calculated m and v
# which are not required for using pretrained model
if any(n in ["adam_v", "adam_m", "global_step"] for n in name):
......@@ -84,30 +82,30 @@ def load_tf_weights_in_xxx(model, config, tf_checkpoint_path):
continue
pointer = model
for m_name in name:
if re.fullmatch(r'[A-Za-z]+_\d+', m_name):
l = re.split(r'_(\d+)', m_name)
if re.fullmatch(r"[A-Za-z]+_\d+", m_name):
scope_names = re.split(r"_(\d+)", m_name)
else:
l = [m_name]
if l[0] == 'kernel' or l[0] == 'gamma':
pointer = getattr(pointer, 'weight')
elif l[0] == 'output_bias' or l[0] == 'beta':
pointer = getattr(pointer, 'bias')
elif l[0] == 'output_weights':
pointer = getattr(pointer, 'weight')
elif l[0] == 'squad':
pointer = getattr(pointer, 'classifier')
scope_names = [m_name]
if scope_names[0] == "kernel" or scope_names[0] == "gamma":
pointer = getattr(pointer, "weight")
elif scope_names[0] == "output_bias" or scope_names[0] == "beta":
pointer = getattr(pointer, "bias")
elif scope_names[0] == "output_weights":
pointer = getattr(pointer, "weight")
elif scope_names[0] == "squad":
pointer = getattr(pointer, "classifier")
else:
try:
pointer = getattr(pointer, l[0])
pointer = getattr(pointer, scope_names[0])
except AttributeError:
logger.info("Skipping {}".format("/".join(name)))
continue
if len(l) >= 2:
num = int(l[1])
if len(scope_names) >= 2:
num = int(scope_names[1])
pointer = pointer[num]
if m_name[-11:] == '_embeddings':
pointer = getattr(pointer, 'weight')
elif m_name == 'kernel':
if m_name[-11:] == "_embeddings":
pointer = getattr(pointer, "weight")
elif m_name == "kernel":
array = np.transpose(array)
try:
assert pointer.shape == array.shape
......@@ -131,6 +129,14 @@ def load_tf_weights_in_xxx(model, config, tf_checkpoint_path):
#
# See the conversion methods in modeling_tf_pytorch_utils.py for more details
####################################################
XxxAttention = nn.Module
XxxIntermediate = nn.Module
XxxOutput = nn.Module
class XxxLayer(nn.Module):
def __init__(self, config):
super(XxxLayer, self).__init__()
......@@ -147,7 +153,6 @@ class XxxLayer(nn.Module):
return outputs
####################################################
# PreTrainedModel is a sub-class of torch.nn.Module
# which take care of loading and saving pretrained weights
......@@ -157,10 +162,21 @@ class XxxLayer(nn.Module):
# pointers for your model and the weights initialization
# method if its not fully covered by PreTrainedModel's default method
####################################################
XxxLayerNorm = torch.nn.LayerNorm
XxxEmbeddings = nn.Module
XxxEncoder = nn.Module
XxxPooler = nn.Module
class XxxPreTrainedModel(PreTrainedModel):
""" An abstract class to handle weights initialization and
a simple interface for dowloading and loading pretrained models.
"""
config_class = XxxConfig
pretrained_model_archive_map = XXX_PRETRAINED_MODEL_ARCHIVE_MAP
load_tf_weights = load_tf_weights_in_xxx
......@@ -246,8 +262,12 @@ XXX_INPUTS_DOCSTRING = r"""
than the model's internal embedding lookup matrix.
"""
@add_start_docstrings("The bare Xxx Model transformer outputting raw hidden-states without any specific head on top.",
XXX_START_DOCSTRING, XXX_INPUTS_DOCSTRING)
@add_start_docstrings(
"The bare Xxx Model transformer outputting raw hidden-states without any specific head on top.",
XXX_START_DOCSTRING,
XXX_INPUTS_DOCSTRING,
)
class XxxModel(XxxPreTrainedModel):
r"""
Outputs: `Tuple` comprising various elements depending on the configuration (config) and inputs:
......@@ -277,6 +297,7 @@ class XxxModel(XxxPreTrainedModel):
last_hidden_states = outputs[0] # The last hidden-state is the first element of the output tuple
"""
def __init__(self, config):
super(XxxModel, self).__init__(config)
......@@ -300,7 +321,15 @@ class XxxModel(XxxPreTrainedModel):
for layer, heads in heads_to_prune.items():
self.encoder.layer[layer].attention.prune_heads(heads)
def forward(self, input_ids=None, attention_mask=None, token_type_ids=None, position_ids=None, head_mask=None, inputs_embeds=None):
def forward(
self,
input_ids=None,
attention_mask=None,
token_type_ids=None,
position_ids=None,
head_mask=None,
inputs_embeds=None,
):
if input_ids is not None and inputs_embeds is not None:
raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
elif input_ids is not None:
......@@ -342,14 +371,20 @@ class XxxModel(XxxPreTrainedModel):
head_mask = head_mask.unsqueeze(0).unsqueeze(0).unsqueeze(-1).unsqueeze(-1)
head_mask = head_mask.expand(self.config.num_hidden_layers, -1, -1, -1, -1)
elif head_mask.dim() == 2:
head_mask = head_mask.unsqueeze(1).unsqueeze(-1).unsqueeze(-1) # We can specify head_mask for each layer
head_mask = head_mask.to(dtype=next(self.parameters()).dtype) # switch to fload if need + fp16 compatibility
head_mask = (
head_mask.unsqueeze(1).unsqueeze(-1).unsqueeze(-1)
) # We can specify head_mask for each layer
head_mask = head_mask.to(
dtype=next(self.parameters()).dtype
) # switch to fload if need + fp16 compatibility
else:
head_mask = [None] * self.config.num_hidden_layers
##################################
# Replace this with your model code
embedding_output = self.embeddings(input_ids=input_ids, position_ids=position_ids, token_type_ids=token_type_ids, inputs_embeds=inputs_embeds)
embedding_output = self.embeddings(
input_ids=input_ids, position_ids=position_ids, token_type_ids=token_type_ids, inputs_embeds=inputs_embeds
)
encoder_outputs = self.encoder(embedding_output, extended_attention_mask, head_mask=head_mask)
sequence_output = encoder_outputs[0]
outputs = (sequence_output,) + encoder_outputs[1:] # add hidden_states and attentions if they are here
......@@ -357,8 +392,9 @@ class XxxModel(XxxPreTrainedModel):
return outputs # sequence_output, (hidden_states), (attentions)
@add_start_docstrings("""Xxx Model with a `language modeling` head on top. """,
XXX_START_DOCSTRING, XXX_INPUTS_DOCSTRING)
@add_start_docstrings(
"""Xxx Model with a `language modeling` head on top. """, XXX_START_DOCSTRING, XXX_INPUTS_DOCSTRING
)
class XxxForMaskedLM(XxxPreTrainedModel):
r"""
**masked_lm_labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``:
......@@ -389,6 +425,7 @@ class XxxForMaskedLM(XxxPreTrainedModel):
loss, prediction_scores = outputs[:2]
"""
def __init__(self, config):
super(XxxForMaskedLM, self).__init__(config)
......@@ -400,15 +437,25 @@ class XxxForMaskedLM(XxxPreTrainedModel):
def get_output_embeddings(self):
return self.lm_head
def forward(self, input_ids=None, attention_mask=None, token_type_ids=None, position_ids=None, head_mask=None, inputs_embeds=None,
masked_lm_labels=None):
outputs = self.transformer(input_ids,
def forward(
self,
input_ids=None,
attention_mask=None,
token_type_ids=None,
position_ids=None,
head_mask=None,
inputs_embeds=None,
masked_lm_labels=None,
):
outputs = self.transformer(
input_ids,
attention_mask=attention_mask,
token_type_ids=token_type_ids,
position_ids=position_ids,
head_mask=head_mask,
inputs_embeds=inputs_embeds)
inputs_embeds=inputs_embeds,
)
sequence_output = outputs[0]
prediction_scores = self.cls(sequence_output)
......@@ -422,9 +469,12 @@ class XxxForMaskedLM(XxxPreTrainedModel):
return outputs # (masked_lm_loss), prediction_scores, (hidden_states), (attentions)
@add_start_docstrings("""Xxx Model transformer with a sequence classification/regression head on top (a linear layer on top of
@add_start_docstrings(
"""Xxx Model transformer with a sequence classification/regression head on top (a linear layer on top of
the pooled output) e.g. for GLUE tasks. """,
XXX_START_DOCSTRING, XXX_INPUTS_DOCSTRING)
XXX_START_DOCSTRING,
XXX_INPUTS_DOCSTRING,
)
class XxxForSequenceClassification(XxxPreTrainedModel):
r"""
**labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size,)``:
......@@ -456,6 +506,7 @@ class XxxForSequenceClassification(XxxPreTrainedModel):
loss, logits = outputs[:2]
"""
def __init__(self, config):
super(XxxForSequenceClassification, self).__init__(config)
self.num_labels = config.num_labels
......@@ -466,15 +517,25 @@ class XxxForSequenceClassification(XxxPreTrainedModel):
self.init_weights()
def forward(self, input_ids=None, attention_mask=None, token_type_ids=None,
position_ids=None, head_mask=None, inputs_embeds=None, labels=None):
outputs = self.transformer(input_ids,
def forward(
self,
input_ids=None,
attention_mask=None,
token_type_ids=None,
position_ids=None,
head_mask=None,
inputs_embeds=None,
labels=None,
):
outputs = self.transformer(
input_ids,
attention_mask=attention_mask,
token_type_ids=token_type_ids,
position_ids=position_ids,
head_mask=head_mask,
inputs_embeds=inputs_embeds)
inputs_embeds=inputs_embeds,
)
pooled_output = outputs[1]
......@@ -496,9 +557,12 @@ class XxxForSequenceClassification(XxxPreTrainedModel):
return outputs # (loss), logits, (hidden_states), (attentions)
@add_start_docstrings("""Xxx Model with a token classification head on top (a linear layer on top of
@add_start_docstrings(
"""Xxx Model with a token classification head on top (a linear layer on top of
the hidden-states output) e.g. for Named-Entity-Recognition (NER) tasks. """,
XXX_START_DOCSTRING, XXX_INPUTS_DOCSTRING)
XXX_START_DOCSTRING,
XXX_INPUTS_DOCSTRING,
)
class XxxForTokenClassification(XxxPreTrainedModel):
r"""
**labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``:
......@@ -528,6 +592,7 @@ class XxxForTokenClassification(XxxPreTrainedModel):
loss, scores = outputs[:2]
"""
def __init__(self, config):
super(XxxForTokenClassification, self).__init__(config)
self.num_labels = config.num_labels
......@@ -538,15 +603,25 @@ class XxxForTokenClassification(XxxPreTrainedModel):
self.init_weights()
def forward(self, input_ids=None, attention_mask=None, token_type_ids=None,
position_ids=None, head_mask=None, inputs_embeds=None, labels=None):
outputs = self.transformer(input_ids,
def forward(
self,
input_ids=None,
attention_mask=None,
token_type_ids=None,
position_ids=None,
head_mask=None,
inputs_embeds=None,
labels=None,
):
outputs = self.transformer(
input_ids,
attention_mask=attention_mask,
token_type_ids=token_type_ids,
position_ids=position_ids,
head_mask=head_mask,
inputs_embeds=inputs_embeds)
inputs_embeds=inputs_embeds,
)
sequence_output = outputs[0]
......@@ -569,9 +644,12 @@ class XxxForTokenClassification(XxxPreTrainedModel):
return outputs # (loss), scores, (hidden_states), (attentions)
@add_start_docstrings("""Xxx Model with a span classification head on top for extractive question-answering tasks like SQuAD (a linear layers on top of
@add_start_docstrings(
"""Xxx Model with a span classification head on top for extractive question-answering tasks like SQuAD (a linear layers on top of
the hidden-states output to compute `span start logits` and `span end logits`). """,
XXX_START_DOCSTRING, XXX_INPUTS_DOCSTRING)
XXX_START_DOCSTRING,
XXX_INPUTS_DOCSTRING,
)
class XxxForQuestionAnswering(XxxPreTrainedModel):
r"""
**start_positions**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size,)``:
......@@ -613,6 +691,7 @@ class XxxForQuestionAnswering(XxxPreTrainedModel):
"""
def __init__(self, config):
super(XxxForQuestionAnswering, self).__init__(config)
self.num_labels = config.num_labels
......@@ -622,15 +701,26 @@ class XxxForQuestionAnswering(XxxPreTrainedModel):
self.init_weights()
def forward(self, input_ids=None, attention_mask=None, token_type_ids=None, position_ids=None, head_mask=None, inputs_embeds=None,
start_positions=None, end_positions=None):
outputs = self.transformer(input_ids,
def forward(
self,
input_ids=None,
attention_mask=None,
token_type_ids=None,
position_ids=None,
head_mask=None,
inputs_embeds=None,
start_positions=None,
end_positions=None,
):
outputs = self.transformer(
input_ids,
attention_mask=attention_mask,
token_type_ids=token_type_ids,
position_ids=position_ids,
head_mask=head_mask,
inputs_embeds=inputs_embeds)
inputs_embeds=inputs_embeds,
)
sequence_output = outputs[0]
......
......@@ -12,38 +12,45 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import absolute_import, division, print_function
import unittest
import sys
from .modeling_tf_common_test import (TFCommonTestCases, ids_tensor)
from transformers import XxxConfig, is_tf_available
from .configuration_common_test import ConfigTester
from .modeling_tf_common_test import TFCommonTestCases, ids_tensor
from .utils import CACHE_DIR, require_tf, slow
from transformers import XxxConfig, is_tf_available
if is_tf_available():
import tensorflow as tf
from transformers.modeling_tf_xxx import (TFXxxModel, TFXxxForMaskedLM,
from transformers.modeling_tf_xxx import (
TFXxxModel,
TFXxxForMaskedLM,
TFXxxForSequenceClassification,
TFXxxForTokenClassification,
TFXxxForQuestionAnswering,
TF_XXX_PRETRAINED_MODEL_ARCHIVE_MAP)
)
@require_tf
class TFXxxModelTest(TFCommonTestCases.TFCommonModelTester):
all_model_classes = (TFXxxModel, TFXxxForMaskedLM, TFXxxForQuestionAnswering,
all_model_classes = (
(
TFXxxModel,
TFXxxForMaskedLM,
TFXxxForQuestionAnswering,
TFXxxForSequenceClassification,
TFXxxForTokenClassification) if is_tf_available() else ()
TFXxxForTokenClassification,
)
if is_tf_available()
else ()
)
class TFXxxModelTester(object):
def __init__(self,
def __init__(
self,
parent,
batch_size=13,
seq_length=7,
......@@ -120,15 +127,16 @@ class TFXxxModelTest(TFCommonTestCases.TFCommonModelTester):
attention_probs_dropout_prob=self.attention_probs_dropout_prob,
max_position_embeddings=self.max_position_embeddings,
type_vocab_size=self.type_vocab_size,
initializer_range=self.initializer_range)
initializer_range=self.initializer_range,
)
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
def create_and_check_xxx_model(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels):
def create_and_check_xxx_model(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = TFXxxModel(config=config)
inputs = {'input_ids': input_ids,
'attention_mask': input_mask,
'token_type_ids': token_type_ids}
inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
sequence_output, pooled_output = model(inputs)
inputs = [input_ids, input_mask]
......@@ -141,78 +149,74 @@ class TFXxxModelTest(TFCommonTestCases.TFCommonModelTester):
"pooled_output": pooled_output.numpy(),
}
self.parent.assertListEqual(
list(result["sequence_output"].shape),
[self.batch_size, self.seq_length, self.hidden_size])
list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size]
)
self.parent.assertListEqual(list(result["pooled_output"].shape), [self.batch_size, self.hidden_size])
def create_and_check_xxx_for_masked_lm(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels):
def create_and_check_xxx_for_masked_lm(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = TFXxxForMaskedLM(config=config)
inputs = {'input_ids': input_ids,
'attention_mask': input_mask,
'token_type_ids': token_type_ids}
prediction_scores, = model(inputs)
inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
(prediction_scores,) = model(inputs)
result = {
"prediction_scores": prediction_scores.numpy(),
}
self.parent.assertListEqual(
list(result["prediction_scores"].shape),
[self.batch_size, self.seq_length, self.vocab_size])
list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size]
)
def create_and_check_xxx_for_sequence_classification(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels):
def create_and_check_xxx_for_sequence_classification(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
config.num_labels = self.num_labels
model = TFXxxForSequenceClassification(config=config)
inputs = {'input_ids': input_ids,
'attention_mask': input_mask,
'token_type_ids': token_type_ids}
logits, = model(inputs)
inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
(logits,) = model(inputs)
result = {
"logits": logits.numpy(),
}
self.parent.assertListEqual(
list(result["logits"].shape),
[self.batch_size, self.num_labels])
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.num_labels])
def create_and_check_xxx_for_token_classification(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels):
def create_and_check_xxx_for_token_classification(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
config.num_labels = self.num_labels
model = TFXxxForTokenClassification(config=config)
inputs = {'input_ids': input_ids,
'attention_mask': input_mask,
'token_type_ids': token_type_ids}
logits, = model(inputs)
inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
(logits,) = model(inputs)
result = {
"logits": logits.numpy(),
}
self.parent.assertListEqual(
list(result["logits"].shape),
[self.batch_size, self.seq_length, self.num_labels])
list(result["logits"].shape), [self.batch_size, self.seq_length, self.num_labels]
)
def create_and_check_xxx_for_question_answering(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels):
def create_and_check_xxx_for_question_answering(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = TFXxxForQuestionAnswering(config=config)
inputs = {'input_ids': input_ids,
'attention_mask': input_mask,
'token_type_ids': token_type_ids}
inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
start_logits, end_logits = model(inputs)
result = {
"start_logits": start_logits.numpy(),
"end_logits": end_logits.numpy(),
}
self.parent.assertListEqual(
list(result["start_logits"].shape),
[self.batch_size, self.seq_length])
self.parent.assertListEqual(
list(result["end_logits"].shape),
[self.batch_size, self.seq_length])
self.parent.assertListEqual(list(result["start_logits"].shape), [self.batch_size, self.seq_length])
self.parent.assertListEqual(list(result["end_logits"].shape), [self.batch_size, self.seq_length])
def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs()
(config, input_ids, token_type_ids, input_mask,
sequence_labels, token_labels, choice_labels) = config_and_inputs
inputs_dict = {'input_ids': input_ids, 'token_type_ids': token_type_ids, 'attention_mask': input_mask}
(
config,
input_ids,
token_type_ids,
input_mask,
sequence_labels,
token_labels,
choice_labels,
) = config_and_inputs
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
return config, inputs_dict
def setUp(self):
......@@ -244,9 +248,10 @@ class TFXxxModelTest(TFCommonTestCases.TFCommonModelTester):
@slow
def test_model_from_pretrained(self):
for model_name in ['xxx-base-uncased']:
for model_name in ["xxx-base-uncased"]:
model = TFXxxModel.from_pretrained(model_name, cache_dir=CACHE_DIR)
self.assertIsNotNone(model)
if __name__ == "__main__":
unittest.main()
......@@ -12,36 +12,41 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import absolute_import, division, print_function
import unittest
from transformers import is_torch_available
from .modeling_common_test import (CommonTestCases, ids_tensor)
from .configuration_common_test import ConfigTester
from .modeling_common_test import CommonTestCases, ids_tensor
from .utils import CACHE_DIR, require_torch, slow, torch_device
if is_torch_available():
from transformers import (XxxConfig, XxxModel, XxxForMaskedLM,
XxxForNextSentencePrediction, XxxForPreTraining,
XxxForQuestionAnswering, XxxForSequenceClassification,
XxxForTokenClassification, XxxForMultipleChoice)
from transformers import (
XxxConfig,
XxxModel,
XxxForMaskedLM,
XxxForQuestionAnswering,
XxxForSequenceClassification,
XxxForTokenClassification,
)
from transformers.modeling_xxx import XXX_PRETRAINED_MODEL_ARCHIVE_MAP
@require_torch
class XxxModelTest(CommonTestCases.CommonModelTester):
all_model_classes = (XxxModel, XxxForMaskedLM, XxxForQuestionAnswering,
XxxForSequenceClassification,
XxxForTokenClassification) if is_torch_available() else ()
all_model_classes = (
(XxxModel, XxxForMaskedLM, XxxForQuestionAnswering, XxxForSequenceClassification, XxxForTokenClassification)
if is_torch_available()
else ()
)
class XxxModelTester(object):
def __init__(self,
def __init__(
self,
parent,
batch_size=13,
seq_length=7,
......@@ -118,16 +123,17 @@ class XxxModelTest(CommonTestCases.CommonModelTester):
attention_probs_dropout_prob=self.attention_probs_dropout_prob,
max_position_embeddings=self.max_position_embeddings,
type_vocab_size=self.type_vocab_size,
initializer_range=self.initializer_range)
initializer_range=self.initializer_range,
)
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
def check_loss_output(self, result):
self.parent.assertListEqual(
list(result["loss"].size()),
[])
self.parent.assertListEqual(list(result["loss"].size()), [])
def create_and_check_xxx_model(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels):
def create_and_check_xxx_model(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = XxxModel(config=config)
model.to(torch_device)
model.eval()
......@@ -140,83 +146,98 @@ class XxxModelTest(CommonTestCases.CommonModelTester):
"pooled_output": pooled_output,
}
self.parent.assertListEqual(
list(result["sequence_output"].size()),
[self.batch_size, self.seq_length, self.hidden_size])
list(result["sequence_output"].size()), [self.batch_size, self.seq_length, self.hidden_size]
)
self.parent.assertListEqual(list(result["pooled_output"].size()), [self.batch_size, self.hidden_size])
def create_and_check_xxx_for_masked_lm(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels):
def create_and_check_xxx_for_masked_lm(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = XxxForMaskedLM(config=config)
model.to(torch_device)
model.eval()
loss, prediction_scores = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, masked_lm_labels=token_labels)
loss, prediction_scores = model(
input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, masked_lm_labels=token_labels
)
result = {
"loss": loss,
"prediction_scores": prediction_scores,
}
self.parent.assertListEqual(
list(result["prediction_scores"].size()),
[self.batch_size, self.seq_length, self.vocab_size])
list(result["prediction_scores"].size()), [self.batch_size, self.seq_length, self.vocab_size]
)
self.check_loss_output(result)
def create_and_check_xxx_for_question_answering(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels):
def create_and_check_xxx_for_question_answering(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = XxxForQuestionAnswering(config=config)
model.to(torch_device)
model.eval()
loss, start_logits, end_logits = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids,
start_positions=sequence_labels, end_positions=sequence_labels)
loss, start_logits, end_logits = model(
input_ids,
attention_mask=input_mask,
token_type_ids=token_type_ids,
start_positions=sequence_labels,
end_positions=sequence_labels,
)
result = {
"loss": loss,
"start_logits": start_logits,
"end_logits": end_logits,
}
self.parent.assertListEqual(
list(result["start_logits"].size()),
[self.batch_size, self.seq_length])
self.parent.assertListEqual(
list(result["end_logits"].size()),
[self.batch_size, self.seq_length])
self.parent.assertListEqual(list(result["start_logits"].size()), [self.batch_size, self.seq_length])
self.parent.assertListEqual(list(result["end_logits"].size()), [self.batch_size, self.seq_length])
self.check_loss_output(result)
def create_and_check_xxx_for_sequence_classification(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels):
def create_and_check_xxx_for_sequence_classification(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
config.num_labels = self.num_labels
model = XxxForSequenceClassification(config)
model.to(torch_device)
model.eval()
loss, logits = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=sequence_labels)
loss, logits = model(
input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=sequence_labels
)
result = {
"loss": loss,
"logits": logits,
}
self.parent.assertListEqual(
list(result["logits"].size()),
[self.batch_size, self.num_labels])
self.parent.assertListEqual(list(result["logits"].size()), [self.batch_size, self.num_labels])
self.check_loss_output(result)
def create_and_check_xxx_for_token_classification(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels):
def create_and_check_xxx_for_token_classification(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
config.num_labels = self.num_labels
model = XxxForTokenClassification(config=config)
model.to(torch_device)
model.eval()
loss, logits = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels)
loss, logits = model(
input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels
)
result = {
"loss": loss,
"logits": logits,
}
self.parent.assertListEqual(
list(result["logits"].size()),
[self.batch_size, self.seq_length, self.num_labels])
list(result["logits"].size()), [self.batch_size, self.seq_length, self.num_labels]
)
self.check_loss_output(result)
def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs()
(config, input_ids, token_type_ids, input_mask,
sequence_labels, token_labels, choice_labels) = config_and_inputs
inputs_dict = {'input_ids': input_ids, 'token_type_ids': token_type_ids, 'attention_mask': input_mask}
(
config,
input_ids,
token_type_ids,
input_mask,
sequence_labels,
token_labels,
choice_labels,
) = config_and_inputs
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
return config, inputs_dict
def setUp(self):
......@@ -252,5 +273,6 @@ class XxxModelTest(CommonTestCases.CommonModelTester):
model = XxxModel.from_pretrained(model_name, cache_dir=CACHE_DIR)
self.assertIsNotNone(model)
if __name__ == "__main__":
unittest.main()
......@@ -18,10 +18,11 @@ import os
import unittest
from io import open
from transformers.tokenization_bert import (XxxTokenizer, VOCAB_FILES_NAMES)
from transformers.tokenization_bert import VOCAB_FILES_NAMES, XxxTokenizer
from .tokenization_tests_commons import CommonTestCases
class XxxTokenizationTest(CommonTestCases.CommonTokenizerTester):
tokenizer_class = XxxTokenizer
......@@ -30,28 +31,39 @@ class XxxTokenizationTest(CommonTestCases.CommonTokenizerTester):
super(XxxTokenizationTest, self).setUp()
vocab_tokens = [
"[UNK]", "[CLS]", "[SEP]", "want", "##want", "##ed", "wa", "un", "runn",
"##ing", ",", "low", "lowest",
"[UNK]",
"[CLS]",
"[SEP]",
"want",
"##want",
"##ed",
"wa",
"un",
"runn",
"##ing",
",",
"low",
"lowest",
]
self.vocab_file = os.path.join(self.tmpdirname, VOCAB_FILES_NAMES['vocab_file'])
with open(self.vocab_file, "w", encoding='utf-8') as vocab_writer:
self.vocab_file = os.path.join(self.tmpdirname, VOCAB_FILES_NAMES["vocab_file"])
with open(self.vocab_file, "w", encoding="utf-8") as vocab_writer:
vocab_writer.write("".join([x + "\n" for x in vocab_tokens]))
def get_tokenizer(self, **kwargs):
return XxxTokenizer.from_pretrained(self.tmpdirname, **kwargs)
def get_input_output_texts(self):
input_text = u"UNwant\u00E9d,running"
output_text = u"unwanted, running"
input_text = "UNwant\u00E9d,running"
output_text = "unwanted, running"
return input_text, output_text
def test_full_tokenizer(self):
tokenizer = self.tokenizer_class(self.vocab_file)
tokens = tokenizer.tokenize(u"UNwant\u00E9d,running")
tokens = tokenizer.tokenize("UNwant\u00E9d,running")
self.assertListEqual(tokens, ["un", "##want", "##ed", ",", "runn", "##ing"])
self.assertListEqual(tokenizer.convert_tokens_to_ids(tokens), [7, 4, 5, 10, 8, 9])
if __name__ == '__main__':
if __name__ == "__main__":
unittest.main()
......@@ -19,11 +19,11 @@ from __future__ import absolute_import, division, print_function, unicode_litera
import collections
import logging
import os
import unicodedata
from io import open
from .tokenization_utils import PreTrainedTokenizer
logger = logging.getLogger(__name__)
####################################################
......@@ -34,17 +34,16 @@ logger = logging.getLogger(__name__)
# Mapping from the keyword arguments names of Tokenizer `__init__`
# to file names for serializing Tokenizer instances
####################################################
VOCAB_FILES_NAMES = {'vocab_file': 'vocab.txt'}
VOCAB_FILES_NAMES = {"vocab_file": "vocab.txt"}
####################################################
# Mapping from the keyword arguments names of Tokenizer `__init__`
# to pretrained vocabulary URL for all the model shortcut names.
####################################################
PRETRAINED_VOCAB_FILES_MAP = {
'vocab_file':
{
'xxx-base-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/xxx-base-uncased-vocab.txt",
'xxx-large-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/xxx-large-uncased-vocab.txt",
"vocab_file": {
"xxx-base-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/xxx-base-uncased-vocab.txt",
"xxx-large-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/xxx-large-uncased-vocab.txt",
}
}
......@@ -52,8 +51,8 @@ PRETRAINED_VOCAB_FILES_MAP = {
# Mapping from model shortcut names to max length of inputs
####################################################
PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
'xxx-base-uncased': 512,
'xxx-large-uncased': 512,
"xxx-base-uncased": 512,
"xxx-large-uncased": 512,
}
####################################################
......@@ -62,8 +61,8 @@ PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
# To be used for checkpoint specific configurations.
####################################################
PRETRAINED_INIT_CONFIGURATION = {
'xxx-base-uncased': {'do_lower_case': True},
'xxx-large-uncased': {'do_lower_case': True},
"xxx-base-uncased": {"do_lower_case": True},
"xxx-large-uncased": {"do_lower_case": True},
}
......@@ -73,7 +72,7 @@ def load_vocab(vocab_file):
with open(vocab_file, "r", encoding="utf-8") as reader:
tokens = reader.readlines()
for index, token in enumerate(tokens):
token = token.rstrip('\n')
token = token.rstrip("\n")
vocab[token] = index
return vocab
......@@ -93,9 +92,17 @@ class XxxTokenizer(PreTrainedTokenizer):
pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
def __init__(self, vocab_file, do_lower_case=True,
unk_token="[UNK]", sep_token="[SEP]", pad_token="[PAD]", cls_token="[CLS]",
mask_token="[MASK]", **kwargs):
def __init__(
self,
vocab_file,
do_lower_case=True,
unk_token="[UNK]",
sep_token="[SEP]",
pad_token="[PAD]",
cls_token="[CLS]",
mask_token="[MASK]",
**kwargs
):
"""Constructs a XxxTokenizer.
Args:
......@@ -104,16 +111,22 @@ class XxxTokenizer(PreTrainedTokenizer):
Whether to lower case the input
Only has an effect when do_basic_tokenize=True
"""
super(XxxTokenizer, self).__init__(unk_token=unk_token, sep_token=sep_token,
pad_token=pad_token, cls_token=cls_token,
mask_token=mask_token, **kwargs)
super(XxxTokenizer, self).__init__(
unk_token=unk_token,
sep_token=sep_token,
pad_token=pad_token,
cls_token=cls_token,
mask_token=mask_token,
**kwargs
)
self.max_len_single_sentence = self.max_len - 2 # take into account special tokens
self.max_len_sentences_pair = self.max_len - 3 # take into account special tokens
if not os.path.isfile(vocab_file):
raise ValueError(
"Can't find a vocabulary file at path '{}'. To load the vocabulary from a Google pretrained "
"model use `tokenizer = XxxTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`".format(vocab_file))
"model use `tokenizer = XxxTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`".format(vocab_file)
)
self.vocab = load_vocab(vocab_file)
@property
......@@ -142,7 +155,7 @@ class XxxTokenizer(PreTrainedTokenizer):
def convert_tokens_to_string(self, tokens):
""" Converts a sequence of tokens (string) in a single string. """
out_string = ' '.join(tokens).replace(' ##', '').strip()
out_string = " ".join(tokens).replace(" ##", "").strip()
return out_string
def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
......@@ -177,8 +190,10 @@ class XxxTokenizer(PreTrainedTokenizer):
if already_has_special_tokens:
if token_ids_1 is not None:
raise ValueError("You should not supply a second sequence if the provided sequence of "
"ids is already formated with special tokens for the model.")
raise ValueError(
"You should not supply a second sequence if the provided sequence of "
"ids is already formated with special tokens for the model."
)
return list(map(lambda x: 1 if x in [self.sep_token_id, self.cls_token_id] else 0, token_ids_0))
if token_ids_1 is not None:
......@@ -204,15 +219,17 @@ class XxxTokenizer(PreTrainedTokenizer):
"""Save the tokenizer vocabulary to a directory or file."""
index = 0
if os.path.isdir(vocab_path):
vocab_file = os.path.join(vocab_path, VOCAB_FILES_NAMES['vocab_file'])
vocab_file = os.path.join(vocab_path, VOCAB_FILES_NAMES["vocab_file"])
else:
vocab_file = vocab_path
with open(vocab_file, "w", encoding="utf-8") as writer:
for token, token_index in sorted(self.vocab.items(), key=lambda kv: kv[1]):
if index != token_index:
logger.warning("Saving vocabulary to {}: vocabulary indices are not consecutive."
" Please check that the vocabulary is not corrupted!".format(vocab_file))
logger.warning(
"Saving vocabulary to {}: vocabulary indices are not consecutive."
" Please check that the vocabulary is not corrupted!".format(vocab_file)
)
index = token_index
writer.write(token + u'\n')
writer.write(token + "\n")
index += 1
return (vocab_file,)
# flake8: noqa
# There's no way to ignore "F401 '...' imported but unused" warnings in this
# module, but to preserve other warnings. So, don't check this module at all.
__version__ = "2.3.0"
# Work around to update TensorFlow's absl.logging threshold which alters the
......@@ -6,212 +10,377 @@ __version__ = "2.3.0"
# and: https://github.com/tensorflow/tensorflow/issues/26691#issuecomment-500369493
try:
import absl.logging
absl.logging.set_verbosity('info')
absl.logging.set_stderrthreshold('info')
absl.logging._warn_preinit_stderr = False
except:
except ImportError:
pass
else:
absl.logging.set_verbosity("info")
absl.logging.set_stderrthreshold("info")
absl.logging._warn_preinit_stderr = False
import logging
logger = logging.getLogger(__name__) # pylint: disable=invalid-name
# Files and general utilities
from .file_utils import (TRANSFORMERS_CACHE, PYTORCH_TRANSFORMERS_CACHE, PYTORCH_PRETRAINED_BERT_CACHE,
cached_path, add_start_docstrings, add_end_docstrings,
WEIGHTS_NAME, TF2_WEIGHTS_NAME, TF_WEIGHTS_NAME, CONFIG_NAME, MODEL_CARD_NAME,
is_tf_available, is_torch_available)
from .configuration_albert import ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, AlbertConfig
from .configuration_auto import ALL_PRETRAINED_CONFIG_ARCHIVE_MAP, AutoConfig
from .configuration_bert import BERT_PRETRAINED_CONFIG_ARCHIVE_MAP, BertConfig
from .configuration_camembert import CAMEMBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, CamembertConfig
from .configuration_ctrl import CTRL_PRETRAINED_CONFIG_ARCHIVE_MAP, CTRLConfig
from .configuration_distilbert import DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, DistilBertConfig
from .configuration_gpt2 import GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP, GPT2Config
from .configuration_mmbt import MMBTConfig
from .configuration_openai import OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP, OpenAIGPTConfig
from .configuration_roberta import ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP, RobertaConfig
from .configuration_t5 import T5_PRETRAINED_CONFIG_ARCHIVE_MAP, T5Config
from .configuration_transfo_xl import TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP, TransfoXLConfig
from .data import (is_sklearn_available,
InputExample, InputFeatures, DataProcessor,
# Configurations
from .configuration_utils import PretrainedConfig
from .configuration_xlm import XLM_PRETRAINED_CONFIG_ARCHIVE_MAP, XLMConfig
from .configuration_xlm_roberta import XLM_ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP, XLMRobertaConfig
from .configuration_xlnet import XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP, XLNetConfig
from .data import (
DataProcessor,
InputExample,
InputFeatures,
SingleSentenceClassificationProcessor,
glue_output_modes, glue_convert_examples_to_features,
glue_processors, glue_tasks_num_labels,
xnli_output_modes, xnli_processors, xnli_tasks_num_labels,
squad_convert_examples_to_features, SquadFeatures,
SquadExample, SquadV1Processor, SquadV2Processor)
SquadExample,
SquadFeatures,
SquadV1Processor,
SquadV2Processor,
glue_convert_examples_to_features,
glue_output_modes,
glue_processors,
glue_tasks_num_labels,
is_sklearn_available,
squad_convert_examples_to_features,
xnli_output_modes,
xnli_processors,
xnli_tasks_num_labels,
)
if is_sklearn_available():
from .data import glue_compute_metrics, xnli_compute_metrics
# Files and general utilities
from .file_utils import (
CONFIG_NAME,
MODEL_CARD_NAME,
PYTORCH_PRETRAINED_BERT_CACHE,
PYTORCH_TRANSFORMERS_CACHE,
TF2_WEIGHTS_NAME,
TF_WEIGHTS_NAME,
TRANSFORMERS_CACHE,
WEIGHTS_NAME,
add_end_docstrings,
add_start_docstrings,
cached_path,
is_tf_available,
is_torch_available,
)
# Model Cards
from .modelcard import ModelCard
# Tokenizers
from .tokenization_utils import (PreTrainedTokenizer)
# TF 2.0 <=> PyTorch conversion utilities
from .modeling_tf_pytorch_utils import (
convert_tf_weight_name_to_pt_weight_name,
load_pytorch_checkpoint_in_tf2_model,
load_pytorch_model_in_tf2_model,
load_pytorch_weights_in_tf2_model,
load_tf2_checkpoint_in_pytorch_model,
load_tf2_model_in_pytorch_model,
load_tf2_weights_in_pytorch_model,
)
# Pipelines
from .pipelines import (
CsvPipelineDataFormat,
FeatureExtractionPipeline,
JsonPipelineDataFormat,
NerPipeline,
PipedPipelineDataFormat,
Pipeline,
PipelineDataFormat,
QuestionAnsweringPipeline,
TextClassificationPipeline,
pipeline,
)
from .tokenization_albert import AlbertTokenizer
from .tokenization_auto import AutoTokenizer
from .tokenization_bert import BertTokenizer, BasicTokenizer, WordpieceTokenizer
from .tokenization_bert_japanese import BertJapaneseTokenizer, MecabTokenizer, CharacterTokenizer
from .tokenization_openai import OpenAIGPTTokenizer
from .tokenization_transfo_xl import (TransfoXLTokenizer, TransfoXLCorpus)
from .tokenization_gpt2 import GPT2Tokenizer
from .tokenization_bert import BasicTokenizer, BertTokenizer, WordpieceTokenizer
from .tokenization_bert_japanese import BertJapaneseTokenizer, CharacterTokenizer, MecabTokenizer
from .tokenization_camembert import CamembertTokenizer
from .tokenization_ctrl import CTRLTokenizer
from .tokenization_xlnet import XLNetTokenizer, SPIECE_UNDERLINE
from .tokenization_xlm import XLMTokenizer
from .tokenization_roberta import RobertaTokenizer
from .tokenization_distilbert import DistilBertTokenizer
from .tokenization_albert import AlbertTokenizer
from .tokenization_camembert import CamembertTokenizer
from .tokenization_gpt2 import GPT2Tokenizer
from .tokenization_openai import OpenAIGPTTokenizer
from .tokenization_roberta import RobertaTokenizer
from .tokenization_t5 import T5Tokenizer
from .tokenization_transfo_xl import TransfoXLCorpus, TransfoXLTokenizer
# Tokenizers
from .tokenization_utils import PreTrainedTokenizer
from .tokenization_xlm import XLMTokenizer
from .tokenization_xlm_roberta import XLMRobertaTokenizer
from .tokenization_xlnet import SPIECE_UNDERLINE, XLNetTokenizer
logger = logging.getLogger(__name__) # pylint: disable=invalid-name
if is_sklearn_available():
from .data import glue_compute_metrics, xnli_compute_metrics
# Configurations
from .configuration_utils import PretrainedConfig
from .configuration_auto import AutoConfig, ALL_PRETRAINED_CONFIG_ARCHIVE_MAP
from .configuration_bert import BertConfig, BERT_PRETRAINED_CONFIG_ARCHIVE_MAP
from .configuration_openai import OpenAIGPTConfig, OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP
from .configuration_transfo_xl import TransfoXLConfig, TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP
from .configuration_gpt2 import GPT2Config, GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP
from .configuration_ctrl import CTRLConfig, CTRL_PRETRAINED_CONFIG_ARCHIVE_MAP
from .configuration_xlnet import XLNetConfig, XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP
from .configuration_xlm import XLMConfig, XLM_PRETRAINED_CONFIG_ARCHIVE_MAP
from .configuration_roberta import RobertaConfig, ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP
from .configuration_distilbert import DistilBertConfig, DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP
from .configuration_albert import AlbertConfig, ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP
from .configuration_camembert import CamembertConfig, CAMEMBERT_PRETRAINED_CONFIG_ARCHIVE_MAP
from .configuration_t5 import T5Config, T5_PRETRAINED_CONFIG_ARCHIVE_MAP
from .configuration_xlm_roberta import XLMRobertaConfig, XLM_ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP
from .configuration_mmbt import MMBTConfig
# Modeling
if is_torch_available():
from .modeling_utils import (PreTrainedModel, prune_layer, Conv1D)
from .modeling_auto import (AutoModel, AutoModelForSequenceClassification, AutoModelForQuestionAnswering,
AutoModelWithLMHead, AutoModelForTokenClassification, ALL_PRETRAINED_MODEL_ARCHIVE_MAP)
from .modeling_bert import (BertPreTrainedModel, BertModel, BertForPreTraining,
BertForMaskedLM, BertForNextSentencePrediction,
BertForSequenceClassification, BertForMultipleChoice,
BertForTokenClassification, BertForQuestionAnswering,
load_tf_weights_in_bert, BERT_PRETRAINED_MODEL_ARCHIVE_MAP)
from .modeling_openai import (OpenAIGPTPreTrainedModel, OpenAIGPTModel,
OpenAIGPTLMHeadModel, OpenAIGPTDoubleHeadsModel,
load_tf_weights_in_openai_gpt, OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP)
from .modeling_transfo_xl import (TransfoXLPreTrainedModel, TransfoXLModel, TransfoXLLMHeadModel,
from .modeling_utils import PreTrainedModel, prune_layer, Conv1D
from .modeling_auto import (
AutoModel,
AutoModelForSequenceClassification,
AutoModelForQuestionAnswering,
AutoModelWithLMHead,
AutoModelForTokenClassification,
ALL_PRETRAINED_MODEL_ARCHIVE_MAP,
)
from .modeling_bert import (
BertPreTrainedModel,
BertModel,
BertForPreTraining,
BertForMaskedLM,
BertForNextSentencePrediction,
BertForSequenceClassification,
BertForMultipleChoice,
BertForTokenClassification,
BertForQuestionAnswering,
load_tf_weights_in_bert,
BERT_PRETRAINED_MODEL_ARCHIVE_MAP,
)
from .modeling_openai import (
OpenAIGPTPreTrainedModel,
OpenAIGPTModel,
OpenAIGPTLMHeadModel,
OpenAIGPTDoubleHeadsModel,
load_tf_weights_in_openai_gpt,
OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP,
)
from .modeling_transfo_xl import (
TransfoXLPreTrainedModel,
TransfoXLModel,
TransfoXLLMHeadModel,
AdaptiveEmbedding,
load_tf_weights_in_transfo_xl, TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP)
from .modeling_gpt2 import (GPT2PreTrainedModel, GPT2Model,
GPT2LMHeadModel, GPT2DoubleHeadsModel,
load_tf_weights_in_gpt2, GPT2_PRETRAINED_MODEL_ARCHIVE_MAP)
from .modeling_ctrl import (CTRLPreTrainedModel, CTRLModel,
CTRLLMHeadModel,
CTRL_PRETRAINED_MODEL_ARCHIVE_MAP)
from .modeling_xlnet import (XLNetPreTrainedModel, XLNetModel, XLNetLMHeadModel,
XLNetForSequenceClassification, XLNetForTokenClassification,
XLNetForMultipleChoice, XLNetForQuestionAnsweringSimple,
XLNetForQuestionAnswering, load_tf_weights_in_xlnet,
XLNET_PRETRAINED_MODEL_ARCHIVE_MAP)
from .modeling_xlm import (XLMPreTrainedModel , XLMModel,
XLMWithLMHeadModel, XLMForSequenceClassification,
XLMForQuestionAnswering, XLMForQuestionAnsweringSimple,
XLM_PRETRAINED_MODEL_ARCHIVE_MAP)
from .modeling_roberta import (RobertaForMaskedLM, RobertaModel,
RobertaForSequenceClassification, RobertaForMultipleChoice,
RobertaForTokenClassification, RobertaForQuestionAnswering,
ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP)
from .modeling_distilbert import (DistilBertPreTrainedModel, DistilBertForMaskedLM, DistilBertModel,
DistilBertForSequenceClassification, DistilBertForQuestionAnswering,
load_tf_weights_in_transfo_xl,
TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP,
)
from .modeling_gpt2 import (
GPT2PreTrainedModel,
GPT2Model,
GPT2LMHeadModel,
GPT2DoubleHeadsModel,
load_tf_weights_in_gpt2,
GPT2_PRETRAINED_MODEL_ARCHIVE_MAP,
)
from .modeling_ctrl import CTRLPreTrainedModel, CTRLModel, CTRLLMHeadModel, CTRL_PRETRAINED_MODEL_ARCHIVE_MAP
from .modeling_xlnet import (
XLNetPreTrainedModel,
XLNetModel,
XLNetLMHeadModel,
XLNetForSequenceClassification,
XLNetForTokenClassification,
XLNetForMultipleChoice,
XLNetForQuestionAnsweringSimple,
XLNetForQuestionAnswering,
load_tf_weights_in_xlnet,
XLNET_PRETRAINED_MODEL_ARCHIVE_MAP,
)
from .modeling_xlm import (
XLMPreTrainedModel,
XLMModel,
XLMWithLMHeadModel,
XLMForSequenceClassification,
XLMForQuestionAnswering,
XLMForQuestionAnsweringSimple,
XLM_PRETRAINED_MODEL_ARCHIVE_MAP,
)
from .modeling_roberta import (
RobertaForMaskedLM,
RobertaModel,
RobertaForSequenceClassification,
RobertaForMultipleChoice,
RobertaForTokenClassification,
RobertaForQuestionAnswering,
ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP,
)
from .modeling_distilbert import (
DistilBertPreTrainedModel,
DistilBertForMaskedLM,
DistilBertModel,
DistilBertForSequenceClassification,
DistilBertForQuestionAnswering,
DistilBertForTokenClassification,
DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP)
from .modeling_camembert import (CamembertForMaskedLM, CamembertModel,
CamembertForSequenceClassification, CamembertForMultipleChoice,
DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
)
from .modeling_camembert import (
CamembertForMaskedLM,
CamembertModel,
CamembertForSequenceClassification,
CamembertForMultipleChoice,
CamembertForTokenClassification,
CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP)
CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
)
from .modeling_encoder_decoder import PreTrainedEncoderDecoder, Model2Model
from .modeling_t5 import (T5PreTrainedModel, T5Model, T5WithLMHeadModel,
from .modeling_t5 import (
T5PreTrainedModel,
T5Model,
T5WithLMHeadModel,
load_tf_weights_in_t5,
T5_PRETRAINED_MODEL_ARCHIVE_MAP)
from .modeling_albert import (AlbertPreTrainedModel, AlbertModel, AlbertForMaskedLM, AlbertForSequenceClassification,
T5_PRETRAINED_MODEL_ARCHIVE_MAP,
)
from .modeling_albert import (
AlbertPreTrainedModel,
AlbertModel,
AlbertForMaskedLM,
AlbertForSequenceClassification,
AlbertForQuestionAnswering,
load_tf_weights_in_albert, ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP)
from .modeling_xlm_roberta import (XLMRobertaForMaskedLM, XLMRobertaModel, XLMRobertaForMultipleChoice,
XLMRobertaForSequenceClassification, XLMRobertaForTokenClassification)
load_tf_weights_in_albert,
ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
)
from .modeling_xlm_roberta import (
XLMRobertaForMaskedLM,
XLMRobertaModel,
XLMRobertaForMultipleChoice,
XLMRobertaForSequenceClassification,
XLMRobertaForTokenClassification,
)
from .modeling_mmbt import ModalEmbeddings, MMBTModel, MMBTForClassification
# Optimization
from .optimization import (AdamW, get_constant_schedule, get_constant_schedule_with_warmup, get_cosine_schedule_with_warmup,
get_cosine_with_hard_restarts_schedule_with_warmup, get_linear_schedule_with_warmup)
from .optimization import (
AdamW,
get_constant_schedule,
get_constant_schedule_with_warmup,
get_cosine_schedule_with_warmup,
get_cosine_with_hard_restarts_schedule_with_warmup,
get_linear_schedule_with_warmup,
)
# TensorFlow
if is_tf_available():
from .modeling_tf_utils import TFPreTrainedModel, TFSharedEmbeddings, TFSequenceSummary, shape_list
from .modeling_tf_auto import (TFAutoModel, TFAutoModelForSequenceClassification, TFAutoModelForQuestionAnswering,
TFAutoModelWithLMHead, TFAutoModelForTokenClassification, TF_ALL_PRETRAINED_MODEL_ARCHIVE_MAP)
from .modeling_tf_bert import (TFBertPreTrainedModel, TFBertMainLayer, TFBertEmbeddings,
TFBertModel, TFBertForPreTraining,
TFBertForMaskedLM, TFBertForNextSentencePrediction,
TFBertForSequenceClassification, TFBertForMultipleChoice,
TFBertForTokenClassification, TFBertForQuestionAnswering,
TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP)
from .modeling_tf_gpt2 import (TFGPT2PreTrainedModel, TFGPT2MainLayer,
TFGPT2Model, TFGPT2LMHeadModel, TFGPT2DoubleHeadsModel,
TF_GPT2_PRETRAINED_MODEL_ARCHIVE_MAP)
from .modeling_tf_openai import (TFOpenAIGPTPreTrainedModel, TFOpenAIGPTMainLayer,
TFOpenAIGPTModel, TFOpenAIGPTLMHeadModel, TFOpenAIGPTDoubleHeadsModel,
TF_OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP)
from .modeling_tf_transfo_xl import (TFTransfoXLPreTrainedModel, TFTransfoXLMainLayer,
TFTransfoXLModel, TFTransfoXLLMHeadModel,
TF_TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP)
from .modeling_tf_xlnet import (TFXLNetPreTrainedModel, TFXLNetMainLayer,
TFXLNetModel, TFXLNetLMHeadModel,
from .modeling_tf_auto import (
TFAutoModel,
TFAutoModelForSequenceClassification,
TFAutoModelForQuestionAnswering,
TFAutoModelWithLMHead,
TFAutoModelForTokenClassification,
TF_ALL_PRETRAINED_MODEL_ARCHIVE_MAP,
)
from .modeling_tf_bert import (
TFBertPreTrainedModel,
TFBertMainLayer,
TFBertEmbeddings,
TFBertModel,
TFBertForPreTraining,
TFBertForMaskedLM,
TFBertForNextSentencePrediction,
TFBertForSequenceClassification,
TFBertForMultipleChoice,
TFBertForTokenClassification,
TFBertForQuestionAnswering,
TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP,
)
from .modeling_tf_gpt2 import (
TFGPT2PreTrainedModel,
TFGPT2MainLayer,
TFGPT2Model,
TFGPT2LMHeadModel,
TFGPT2DoubleHeadsModel,
TF_GPT2_PRETRAINED_MODEL_ARCHIVE_MAP,
)
from .modeling_tf_openai import (
TFOpenAIGPTPreTrainedModel,
TFOpenAIGPTMainLayer,
TFOpenAIGPTModel,
TFOpenAIGPTLMHeadModel,
TFOpenAIGPTDoubleHeadsModel,
TF_OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP,
)
from .modeling_tf_transfo_xl import (
TFTransfoXLPreTrainedModel,
TFTransfoXLMainLayer,
TFTransfoXLModel,
TFTransfoXLLMHeadModel,
TF_TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP,
)
from .modeling_tf_xlnet import (
TFXLNetPreTrainedModel,
TFXLNetMainLayer,
TFXLNetModel,
TFXLNetLMHeadModel,
TFXLNetForSequenceClassification,
TFXLNetForTokenClassification,
TFXLNetForQuestionAnsweringSimple,
TF_XLNET_PRETRAINED_MODEL_ARCHIVE_MAP)
TF_XLNET_PRETRAINED_MODEL_ARCHIVE_MAP,
)
from .modeling_tf_xlm import (TFXLMPreTrainedModel, TFXLMMainLayer,
TFXLMModel, TFXLMWithLMHeadModel,
from .modeling_tf_xlm import (
TFXLMPreTrainedModel,
TFXLMMainLayer,
TFXLMModel,
TFXLMWithLMHeadModel,
TFXLMForSequenceClassification,
TFXLMForQuestionAnsweringSimple,
TF_XLM_PRETRAINED_MODEL_ARCHIVE_MAP)
TF_XLM_PRETRAINED_MODEL_ARCHIVE_MAP,
)
from .modeling_tf_roberta import (TFRobertaPreTrainedModel, TFRobertaMainLayer,
TFRobertaModel, TFRobertaForMaskedLM,
from .modeling_tf_roberta import (
TFRobertaPreTrainedModel,
TFRobertaMainLayer,
TFRobertaModel,
TFRobertaForMaskedLM,
TFRobertaForSequenceClassification,
TFRobertaForTokenClassification,
TF_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP)
TF_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP,
)
from .modeling_tf_distilbert import (TFDistilBertPreTrainedModel, TFDistilBertMainLayer,
TFDistilBertModel, TFDistilBertForMaskedLM,
from .modeling_tf_distilbert import (
TFDistilBertPreTrainedModel,
TFDistilBertMainLayer,
TFDistilBertModel,
TFDistilBertForMaskedLM,
TFDistilBertForSequenceClassification,
TFDistilBertForTokenClassification,
TFDistilBertForQuestionAnswering,
TF_DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP)
TF_DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
)
from .modeling_tf_ctrl import (TFCTRLPreTrainedModel, TFCTRLModel,
from .modeling_tf_ctrl import (
TFCTRLPreTrainedModel,
TFCTRLModel,
TFCTRLLMHeadModel,
TF_CTRL_PRETRAINED_MODEL_ARCHIVE_MAP)
TF_CTRL_PRETRAINED_MODEL_ARCHIVE_MAP,
)
from .modeling_tf_albert import (TFAlbertPreTrainedModel, TFAlbertModel, TFAlbertForMaskedLM,
from .modeling_tf_albert import (
TFAlbertPreTrainedModel,
TFAlbertModel,
TFAlbertForMaskedLM,
TFAlbertForSequenceClassification,
TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP)
TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
)
from .modeling_tf_t5 import (TFT5PreTrainedModel, TFT5Model, TFT5WithLMHeadModel,
TF_T5_PRETRAINED_MODEL_ARCHIVE_MAP)
from .modeling_tf_t5 import TFT5PreTrainedModel, TFT5Model, TFT5WithLMHeadModel, TF_T5_PRETRAINED_MODEL_ARCHIVE_MAP
# Optimization
from .optimization_tf import (WarmUp, create_optimizer, AdamWeightDecay, GradientAccumulator)
# TF 2.0 <=> PyTorch conversion utilities
from .modeling_tf_pytorch_utils import (convert_tf_weight_name_to_pt_weight_name,
load_pytorch_checkpoint_in_tf2_model,
load_pytorch_weights_in_tf2_model,
load_pytorch_model_in_tf2_model,
load_tf2_checkpoint_in_pytorch_model,
load_tf2_weights_in_pytorch_model,
load_tf2_model_in_pytorch_model)
from .optimization_tf import WarmUp, create_optimizer, AdamWeightDecay, GradientAccumulator
# Pipelines
from .pipelines import pipeline, PipelineDataFormat, CsvPipelineDataFormat, JsonPipelineDataFormat, PipedPipelineDataFormat, \
Pipeline, FeatureExtractionPipeline, QuestionAnsweringPipeline, NerPipeline, TextClassificationPipeline
if not is_tf_available() and not is_torch_available():
logger.warning("Neither PyTorch nor TensorFlow >= 2.0 have been found."
logger.warning(
"Neither PyTorch nor TensorFlow >= 2.0 have been found."
"Models won't be available and only tokenizers, configuration"
"and file/data utilities can be used.")
"and file/data utilities can be used."
)
# coding: utf8
def main():
import sys
if len(sys.argv) < 2 or sys.argv[1] not in ["convert", "train", "predict", "serve"]:
print(
"First argument to `transformers` command line interface should be one of: \n"
">> convert serve train predict")
">> convert serve train predict"
)
if sys.argv[1] == "convert":
from transformers.commands import convert
convert(sys.argv)
elif sys.argv[1] == "train":
from transformers.commands import train
train(sys.argv)
elif sys.argv[1] == "serve":
pass
......@@ -19,7 +24,6 @@ def main():
# parser = ArgumentParser('Transformers CLI tool', usage='transformers serve <command> [<args>]')
# commands_parser = parser.add_subparsers(help='transformers-cli command helpers')
# # Register commands
# ServeCommand.register_subcommand(commands_parser)
......@@ -33,5 +37,6 @@ def main():
# service = args.func(args)
# service.run()
if __name__ == '__main__':
if __name__ == "__main__":
main()
from abc import ABC, abstractmethod
from argparse import ArgumentParser
class BaseTransformersCLICommand(ABC):
@staticmethod
@abstractmethod
......
from argparse import ArgumentParser, Namespace
from logging import getLogger
from transformers import AutoModel, AutoTokenizer
from transformers.commands import BaseTransformersCLICommand
......@@ -11,12 +9,12 @@ def convert_command_factory(args: Namespace):
Factory function used to convert a model TF 1.0 checkpoint in a PyTorch checkpoint.
:return: ServeCommand
"""
return ConvertCommand(args.model_type, args.tf_checkpoint, args.pytorch_dump_output,
args.config, args.finetuning_task_name)
return ConvertCommand(
args.model_type, args.tf_checkpoint, args.pytorch_dump_output, args.config, args.finetuning_task_name
)
class ConvertCommand(BaseTransformersCLICommand):
@staticmethod
def register_subcommand(parser: ArgumentParser):
"""
......@@ -24,25 +22,39 @@ class ConvertCommand(BaseTransformersCLICommand):
:param parser: Root parser to register command-specific arguments
:return:
"""
train_parser = parser.add_parser('convert', help="CLI tool to run convert model from original "
"author checkpoints to Transformesr PyTorch checkpoints.")
train_parser.add_argument('--model_type', type=str, required=True,
help='Model\'s type.')
train_parser.add_argument('--tf_checkpoint', type=str, required=True,
help='TensorFlow checkpoint path or folder.')
train_parser.add_argument('--pytorch_dump_output', type=str, required=True,
help='Path to the PyTorch savd model output.')
train_parser.add_argument('--config', type=str, default="",
help='Configuration file path or folder.')
train_parser.add_argument('--finetuning_task_name', type=str, default=None,
help='Optional fine-tuning task name if the TF model was a finetuned model.')
train_parser = parser.add_parser(
"convert",
help="CLI tool to run convert model from original "
"author checkpoints to Transformesr PyTorch checkpoints.",
)
train_parser.add_argument("--model_type", type=str, required=True, help="Model's type.")
train_parser.add_argument(
"--tf_checkpoint", type=str, required=True, help="TensorFlow checkpoint path or folder."
)
train_parser.add_argument(
"--pytorch_dump_output", type=str, required=True, help="Path to the PyTorch savd model output."
)
train_parser.add_argument("--config", type=str, default="", help="Configuration file path or folder.")
train_parser.add_argument(
"--finetuning_task_name",
type=str,
default=None,
help="Optional fine-tuning task name if the TF model was a finetuned model.",
)
train_parser.set_defaults(func=convert_command_factory)
def __init__(self, model_type: str, tf_checkpoint: str, pytorch_dump_output: str,
config: str, finetuning_task_name: str, *args):
self._logger = getLogger('transformers-cli/converting')
def __init__(
self,
model_type: str,
tf_checkpoint: str,
pytorch_dump_output: str,
config: str,
finetuning_task_name: str,
*args
):
self._logger = getLogger("transformers-cli/converting")
self._logger.info('Loading model {}'.format(model_type))
self._logger.info("Loading model {}".format(model_type))
self._model_type = model_type
self._tf_checkpoint = tf_checkpoint
self._pytorch_dump_output = pytorch_dump_output
......@@ -52,63 +64,80 @@ class ConvertCommand(BaseTransformersCLICommand):
def run(self):
if self._model_type == "bert":
try:
from transformers.convert_bert_original_tf_checkpoint_to_pytorch import convert_tf_checkpoint_to_pytorch
from transformers.convert_bert_original_tf_checkpoint_to_pytorch import (
convert_tf_checkpoint_to_pytorch,
)
except ImportError:
msg = "transformers can only be used from the commandline to convert TensorFlow models in PyTorch, " \
"In that case, it requires TensorFlow to be installed. Please see " \
msg = (
"transformers can only be used from the commandline to convert TensorFlow models in PyTorch, "
"In that case, it requires TensorFlow to be installed. Please see "
"https://www.tensorflow.org/install/ for installation instructions."
)
raise ImportError(msg)
convert_tf_checkpoint_to_pytorch(self._tf_checkpoint, self._config, self._pytorch_dump_output)
elif self._model_type == "gpt":
from transformers.convert_openai_original_tf_checkpoint_to_pytorch import convert_openai_checkpoint_to_pytorch
convert_openai_checkpoint_to_pytorch(self._tf_checkpoint,
self._config,
self._pytorch_dump_output)
from transformers.convert_openai_original_tf_checkpoint_to_pytorch import (
convert_openai_checkpoint_to_pytorch,
)
convert_openai_checkpoint_to_pytorch(self._tf_checkpoint, self._config, self._pytorch_dump_output)
elif self._model_type == "transfo_xl":
try:
from transformers.convert_transfo_xl_original_tf_checkpoint_to_pytorch import convert_transfo_xl_checkpoint_to_pytorch
from transformers.convert_transfo_xl_original_tf_checkpoint_to_pytorch import (
convert_transfo_xl_checkpoint_to_pytorch,
)
except ImportError:
msg = "transformers can only be used from the commandline to convert TensorFlow models in PyTorch, " \
"In that case, it requires TensorFlow to be installed. Please see " \
msg = (
"transformers can only be used from the commandline to convert TensorFlow models in PyTorch, "
"In that case, it requires TensorFlow to be installed. Please see "
"https://www.tensorflow.org/install/ for installation instructions."
)
raise ImportError(msg)
if 'ckpt' in self._tf_checkpoint.lower():
if "ckpt" in self._tf_checkpoint.lower():
TF_CHECKPOINT = self._tf_checkpoint
TF_DATASET_FILE = ""
else:
TF_DATASET_FILE = self._tf_checkpoint
TF_CHECKPOINT = ""
convert_transfo_xl_checkpoint_to_pytorch(TF_CHECKPOINT,
self._config,
self._pytorch_dump_output,
TF_DATASET_FILE)
convert_transfo_xl_checkpoint_to_pytorch(
TF_CHECKPOINT, self._config, self._pytorch_dump_output, TF_DATASET_FILE
)
elif self._model_type == "gpt2":
try:
from transformers.convert_gpt2_original_tf_checkpoint_to_pytorch import convert_gpt2_checkpoint_to_pytorch
from transformers.convert_gpt2_original_tf_checkpoint_to_pytorch import (
convert_gpt2_checkpoint_to_pytorch,
)
except ImportError:
msg = "transformers can only be used from the commandline to convert TensorFlow models in PyTorch, " \
"In that case, it requires TensorFlow to be installed. Please see " \
msg = (
"transformers can only be used from the commandline to convert TensorFlow models in PyTorch, "
"In that case, it requires TensorFlow to be installed. Please see "
"https://www.tensorflow.org/install/ for installation instructions."
)
raise ImportError(msg)
convert_gpt2_checkpoint_to_pytorch(self._tf_checkpoint, self._config, self._pytorch_dump_output)
elif self._model_type == "xlnet":
try:
from transformers.convert_xlnet_original_tf_checkpoint_to_pytorch import convert_xlnet_checkpoint_to_pytorch
from transformers.convert_xlnet_original_tf_checkpoint_to_pytorch import (
convert_xlnet_checkpoint_to_pytorch,
)
except ImportError:
msg = "transformers can only be used from the commandline to convert TensorFlow models in PyTorch, " \
"In that case, it requires TensorFlow to be installed. Please see " \
msg = (
"transformers can only be used from the commandline to convert TensorFlow models in PyTorch, "
"In that case, it requires TensorFlow to be installed. Please see "
"https://www.tensorflow.org/install/ for installation instructions."
)
raise ImportError(msg)
convert_xlnet_checkpoint_to_pytorch(self._tf_checkpoint,
self._config,
self._pytorch_dump_output,
self._finetuning_task_name)
convert_xlnet_checkpoint_to_pytorch(
self._tf_checkpoint, self._config, self._pytorch_dump_output, self._finetuning_task_name
)
elif self._model_type == "xlm":
from transformers.convert_xlm_original_pytorch_checkpoint_to_pytorch import convert_xlm_checkpoint_to_pytorch
from transformers.convert_xlm_original_pytorch_checkpoint_to_pytorch import (
convert_xlm_checkpoint_to_pytorch,
)
convert_xlm_checkpoint_to_pytorch(self._tf_checkpoint, self._pytorch_dump_output)
else:
......
......@@ -8,13 +8,16 @@ def download_command_factory(args):
class DownloadCommand(BaseTransformersCLICommand):
@staticmethod
def register_subcommand(parser: ArgumentParser):
download_parser = parser.add_parser('download')
download_parser.add_argument('--cache-dir', type=str, default=None, help='Path to location to store the models')
download_parser.add_argument('--force', action='store_true', help='Force the model to be download even if already in cache-dir')
download_parser.add_argument('model', type=str, help='Name of the model to download')
download_parser = parser.add_parser("download")
download_parser.add_argument(
"--cache-dir", type=str, default=None, help="Path to location to store the models"
)
download_parser.add_argument(
"--force", action="store_true", help="Force the model to be download even if already in cache-dir"
)
download_parser.add_argument("model", type=str, help="Name of the model to download")
download_parser.set_defaults(func=download_command_factory)
def __init__(self, model: str, cache: str, force: bool):
......
......@@ -2,7 +2,7 @@ import logging
from argparse import ArgumentParser
from transformers.commands import BaseTransformersCLICommand
from transformers.pipelines import pipeline, Pipeline, PipelineDataFormat, SUPPORTED_TASKS
from transformers.pipelines import SUPPORTED_TASKS, Pipeline, PipelineDataFormat, pipeline
logger = logging.getLogger(__name__) # pylint: disable=invalid-name
......@@ -10,52 +10,72 @@ logger = logging.getLogger(__name__) # pylint: disable=invalid-name
def try_infer_format_from_ext(path: str):
if not path:
return 'pipe'
return "pipe"
for ext in PipelineDataFormat.SUPPORTED_FORMATS:
if path.endswith(ext):
return ext
raise Exception(
'Unable to determine file format from file extension {}. '
'Please provide the format through --format {}'.format(path, PipelineDataFormat.SUPPORTED_FORMATS)
"Unable to determine file format from file extension {}. "
"Please provide the format through --format {}".format(path, PipelineDataFormat.SUPPORTED_FORMATS)
)
def run_command_factory(args):
nlp = pipeline(task=args.task,
nlp = pipeline(
task=args.task,
model=args.model if args.model else None,
config=args.config,
tokenizer=args.tokenizer,
device=args.device)
format = try_infer_format_from_ext(args.input) if args.format == 'infer' else args.format
reader = PipelineDataFormat.from_str(format=format,
device=args.device,
)
format = try_infer_format_from_ext(args.input) if args.format == "infer" else args.format
reader = PipelineDataFormat.from_str(
format=format,
output_path=args.output,
input_path=args.input,
column=args.column if args.column else nlp.default_input_names,
overwrite=args.overwrite)
overwrite=args.overwrite,
)
return RunCommand(nlp, reader)
class RunCommand(BaseTransformersCLICommand):
def __init__(self, nlp: Pipeline, reader: PipelineDataFormat):
self._nlp = nlp
self._reader = reader
@staticmethod
def register_subcommand(parser: ArgumentParser):
run_parser = parser.add_parser('run', help="Run a pipeline through the CLI")
run_parser.add_argument('--task', choices=SUPPORTED_TASKS.keys(), help='Task to run')
run_parser.add_argument('--input', type=str, help='Path to the file to use for inference')
run_parser.add_argument('--output', type=str, help='Path to the file that will be used post to write results.')
run_parser.add_argument('--model', type=str, help='Name or path to the model to instantiate.')
run_parser.add_argument('--config', type=str, help='Name or path to the model\'s config to instantiate.')
run_parser.add_argument('--tokenizer', type=str, help='Name of the tokenizer to use. (default: same as the model name)')
run_parser.add_argument('--column', type=str, help='Name of the column to use as input. (For multi columns input as QA use column1,columns2)')
run_parser.add_argument('--format', type=str, default='infer', choices=PipelineDataFormat.SUPPORTED_FORMATS, help='Input format to read from')
run_parser.add_argument('--device', type=int, default=-1, help='Indicate the device to run onto, -1 indicates CPU, >= 0 indicates GPU (default: -1)')
run_parser.add_argument('--overwrite', action='store_true', help='Allow overwriting the output file.')
run_parser = parser.add_parser("run", help="Run a pipeline through the CLI")
run_parser.add_argument("--task", choices=SUPPORTED_TASKS.keys(), help="Task to run")
run_parser.add_argument("--input", type=str, help="Path to the file to use for inference")
run_parser.add_argument("--output", type=str, help="Path to the file that will be used post to write results.")
run_parser.add_argument("--model", type=str, help="Name or path to the model to instantiate.")
run_parser.add_argument("--config", type=str, help="Name or path to the model's config to instantiate.")
run_parser.add_argument(
"--tokenizer", type=str, help="Name of the tokenizer to use. (default: same as the model name)"
)
run_parser.add_argument(
"--column",
type=str,
help="Name of the column to use as input. (For multi columns input as QA use column1,columns2)",
)
run_parser.add_argument(
"--format",
type=str,
default="infer",
choices=PipelineDataFormat.SUPPORTED_FORMATS,
help="Input format to read from",
)
run_parser.add_argument(
"--device",
type=int,
default=-1,
help="Indicate the device to run onto, -1 indicates CPU, >= 0 indicates GPU (default: -1)",
)
run_parser.add_argument("--overwrite", action="store_true", help="Allow overwriting the output file.")
run_parser.set_defaults(func=run_command_factory)
def run(self):
......@@ -71,9 +91,6 @@ class RunCommand(BaseTransformersCLICommand):
# Saving data
if self._nlp.binary_output:
binary_path = self._reader.save_binary(outputs)
logger.warning('Current pipeline requires output to be in binary format, saving at {}'.format(binary_path))
logger.warning("Current pipeline requires output to be in binary format, saving at {}".format(binary_path))
else:
self._reader.save(outputs)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment