Commit 31c23bd5 authored by thomwolf's avatar thomwolf
Browse files

[BIG] pytorch-transformers => transformers

parent 2f071fcb
...@@ -26,12 +26,12 @@ import torch ...@@ -26,12 +26,12 @@ import torch
import torch.nn.functional as F import torch.nn.functional as F
import numpy as np import numpy as np
from pytorch_transformers import GPT2Config, OpenAIGPTConfig, XLNetConfig, TransfoXLConfig from transformers import GPT2Config, OpenAIGPTConfig, XLNetConfig, TransfoXLConfig
from pytorch_transformers import GPT2LMHeadModel, GPT2Tokenizer from transformers import GPT2LMHeadModel, GPT2Tokenizer
from pytorch_transformers import OpenAIGPTLMHeadModel, OpenAIGPTTokenizer from transformers import OpenAIGPTLMHeadModel, OpenAIGPTTokenizer
from pytorch_transformers import XLNetLMHeadModel, XLNetTokenizer from transformers import XLNetLMHeadModel, XLNetTokenizer
from pytorch_transformers import TransfoXLLMHeadModel, TransfoXLTokenizer from transformers import TransfoXLLMHeadModel, TransfoXLTokenizer
logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s', logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s',
......
...@@ -31,7 +31,7 @@ from torch.utils.data.distributed import DistributedSampler ...@@ -31,7 +31,7 @@ from torch.utils.data.distributed import DistributedSampler
from tensorboardX import SummaryWriter from tensorboardX import SummaryWriter
from tqdm import tqdm, trange from tqdm import tqdm, trange
from pytorch_transformers import (WEIGHTS_NAME, BertConfig, from transformers import (WEIGHTS_NAME, BertConfig,
BertForSequenceClassification, BertTokenizer, BertForSequenceClassification, BertTokenizer,
RobertaConfig, RobertaConfig,
RobertaForSequenceClassification, RobertaForSequenceClassification,
...@@ -44,12 +44,12 @@ from pytorch_transformers import (WEIGHTS_NAME, BertConfig, ...@@ -44,12 +44,12 @@ from pytorch_transformers import (WEIGHTS_NAME, BertConfig,
DistilBertForSequenceClassification, DistilBertForSequenceClassification,
DistilBertTokenizer) DistilBertTokenizer)
from pytorch_transformers import AdamW, WarmupLinearSchedule from transformers import AdamW, WarmupLinearSchedule
from pytorch_transformers import glue_compute_metrics as compute_metrics from transformers import glue_compute_metrics as compute_metrics
from pytorch_transformers import glue_output_modes as output_modes from transformers import glue_output_modes as output_modes
from pytorch_transformers import glue_processors as processors from transformers import glue_processors as processors
from pytorch_transformers import glue_convert_examples_to_features as convert_examples_to_features from transformers import glue_convert_examples_to_features as convert_examples_to_features
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
...@@ -137,7 +137,7 @@ def train(args, train_dataset, model, tokenizer): ...@@ -137,7 +137,7 @@ def train(args, train_dataset, model, tokenizer):
'token_type_ids': batch[2] if args.model_type in ['bert', 'xlnet'] else None, # XLM, DistilBERT and RoBERTa don't use segment_ids 'token_type_ids': batch[2] if args.model_type in ['bert', 'xlnet'] else None, # XLM, DistilBERT and RoBERTa don't use segment_ids
'labels': batch[3]} 'labels': batch[3]}
outputs = model(**inputs) outputs = model(**inputs)
loss = outputs[0] # model outputs are always tuple in pytorch-transformers (see doc) loss = outputs[0] # model outputs are always tuple in transformers (see doc)
if args.n_gpu > 1: if args.n_gpu > 1:
loss = loss.mean() # mean() to average on multi-gpu parallel training loss = loss.mean() # mean() to average on multi-gpu parallel training
...@@ -483,7 +483,7 @@ def main(): ...@@ -483,7 +483,7 @@ def main():
checkpoints = [args.output_dir] checkpoints = [args.output_dir]
if args.eval_all_checkpoints: if args.eval_all_checkpoints:
checkpoints = list(os.path.dirname(c) for c in sorted(glob.glob(args.output_dir + '/**/' + WEIGHTS_NAME, recursive=True))) checkpoints = list(os.path.dirname(c) for c in sorted(glob.glob(args.output_dir + '/**/' + WEIGHTS_NAME, recursive=True)))
logging.getLogger("pytorch_transformers.modeling_utils").setLevel(logging.WARN) # Reduce logging logging.getLogger("transformers.modeling_utils").setLevel(logging.WARN) # Reduce logging
logger.info("Evaluate the following checkpoints: %s", checkpoints) logger.info("Evaluate the following checkpoints: %s", checkpoints)
for checkpoint in checkpoints: for checkpoint in checkpoints:
global_step = checkpoint.split('-')[-1] if len(checkpoints) > 1 else "" global_step = checkpoint.split('-')[-1] if len(checkpoints) > 1 else ""
......
...@@ -35,7 +35,7 @@ from torch.utils.data.distributed import DistributedSampler ...@@ -35,7 +35,7 @@ from torch.utils.data.distributed import DistributedSampler
from tensorboardX import SummaryWriter from tensorboardX import SummaryWriter
from tqdm import tqdm, trange from tqdm import tqdm, trange
from pytorch_transformers import (WEIGHTS_NAME, AdamW, WarmupLinearSchedule, from transformers import (WEIGHTS_NAME, AdamW, WarmupLinearSchedule,
BertConfig, BertForMaskedLM, BertTokenizer, BertConfig, BertForMaskedLM, BertTokenizer,
GPT2Config, GPT2LMHeadModel, GPT2Tokenizer, GPT2Config, GPT2LMHeadModel, GPT2Tokenizer,
OpenAIGPTConfig, OpenAIGPTLMHeadModel, OpenAIGPTTokenizer, OpenAIGPTConfig, OpenAIGPTLMHeadModel, OpenAIGPTTokenizer,
...@@ -188,7 +188,7 @@ def train(args, train_dataset, model, tokenizer): ...@@ -188,7 +188,7 @@ def train(args, train_dataset, model, tokenizer):
labels = labels.to(args.device) labels = labels.to(args.device)
model.train() model.train()
outputs = model(inputs, masked_lm_labels=labels) if args.mlm else model(inputs, labels=labels) outputs = model(inputs, masked_lm_labels=labels) if args.mlm else model(inputs, labels=labels)
loss = outputs[0] # model outputs are always tuple in pytorch-transformers (see doc) loss = outputs[0] # model outputs are always tuple in transformers (see doc)
if args.n_gpu > 1: if args.n_gpu > 1:
loss = loss.mean() # mean() to average on multi-gpu parallel training loss = loss.mean() # mean() to average on multi-gpu parallel training
...@@ -481,7 +481,7 @@ def main(): ...@@ -481,7 +481,7 @@ def main():
checkpoints = [args.output_dir] checkpoints = [args.output_dir]
if args.eval_all_checkpoints: if args.eval_all_checkpoints:
checkpoints = list(os.path.dirname(c) for c in sorted(glob.glob(args.output_dir + '/**/' + WEIGHTS_NAME, recursive=True))) checkpoints = list(os.path.dirname(c) for c in sorted(glob.glob(args.output_dir + '/**/' + WEIGHTS_NAME, recursive=True)))
logging.getLogger("pytorch_transformers.modeling_utils").setLevel(logging.WARN) # Reduce logging logging.getLogger("transformers.modeling_utils").setLevel(logging.WARN) # Reduce logging
logger.info("Evaluate the following checkpoints: %s", checkpoints) logger.info("Evaluate the following checkpoints: %s", checkpoints)
for checkpoint in checkpoints: for checkpoint in checkpoints:
global_step = checkpoint.split('-')[-1] if len(checkpoints) > 1 else "" global_step = checkpoint.split('-')[-1] if len(checkpoints) > 1 else ""
......
...@@ -32,13 +32,13 @@ from torch.utils.data.distributed import DistributedSampler ...@@ -32,13 +32,13 @@ from torch.utils.data.distributed import DistributedSampler
from tensorboardX import SummaryWriter from tensorboardX import SummaryWriter
from tqdm import tqdm, trange from tqdm import tqdm, trange
from pytorch_transformers import (WEIGHTS_NAME, BertConfig, from transformers import (WEIGHTS_NAME, BertConfig,
BertForMultipleChoice, BertTokenizer, BertForMultipleChoice, BertTokenizer,
XLNetConfig, XLNetForMultipleChoice, XLNetConfig, XLNetForMultipleChoice,
XLNetTokenizer, RobertaConfig, XLNetTokenizer, RobertaConfig,
RobertaForMultipleChoice, RobertaTokenizer) RobertaForMultipleChoice, RobertaTokenizer)
from pytorch_transformers import AdamW, WarmupLinearSchedule from transformers import AdamW, WarmupLinearSchedule
from utils_multiple_choice import (convert_examples_to_features, processors) from utils_multiple_choice import (convert_examples_to_features, processors)
...@@ -141,7 +141,7 @@ def train(args, train_dataset, model, tokenizer): ...@@ -141,7 +141,7 @@ def train(args, train_dataset, model, tokenizer):
'token_type_ids': batch[2] if args.model_type in ['bert', 'xlnet'] else None, # XLM don't use segment_ids 'token_type_ids': batch[2] if args.model_type in ['bert', 'xlnet'] else None, # XLM don't use segment_ids
'labels': batch[3]} 'labels': batch[3]}
outputs = model(**inputs) outputs = model(**inputs)
loss = outputs[0] # model outputs are always tuple in pytorch-transformers (see doc) loss = outputs[0] # model outputs are always tuple in transformers (see doc)
if args.n_gpu > 1: if args.n_gpu > 1:
loss = loss.mean() # mean() to average on multi-gpu parallel training loss = loss.mean() # mean() to average on multi-gpu parallel training
...@@ -508,7 +508,7 @@ def main(): ...@@ -508,7 +508,7 @@ def main():
checkpoints = [args.output_dir] checkpoints = [args.output_dir]
if args.eval_all_checkpoints: if args.eval_all_checkpoints:
checkpoints = list(os.path.dirname(c) for c in sorted(glob.glob(args.output_dir + '/**/' + WEIGHTS_NAME, recursive=True))) checkpoints = list(os.path.dirname(c) for c in sorted(glob.glob(args.output_dir + '/**/' + WEIGHTS_NAME, recursive=True)))
logging.getLogger("pytorch_transformers.modeling_utils").setLevel(logging.WARN) # Reduce logging logging.getLogger("transformers.modeling_utils").setLevel(logging.WARN) # Reduce logging
logger.info("Evaluate the following checkpoints: %s", checkpoints) logger.info("Evaluate the following checkpoints: %s", checkpoints)
for checkpoint in checkpoints: for checkpoint in checkpoints:
global_step = checkpoint.split('-')[-1] if len(checkpoints) > 1 else "" global_step = checkpoint.split('-')[-1] if len(checkpoints) > 1 else ""
...@@ -524,7 +524,7 @@ def main(): ...@@ -524,7 +524,7 @@ def main():
checkpoints = [args.output_dir] checkpoints = [args.output_dir]
# if args.eval_all_checkpoints: # can not use this to do test!! # if args.eval_all_checkpoints: # can not use this to do test!!
# checkpoints = list(os.path.dirname(c) for c in sorted(glob.glob(args.output_dir + '/**/' + WEIGHTS_NAME, recursive=True))) # checkpoints = list(os.path.dirname(c) for c in sorted(glob.glob(args.output_dir + '/**/' + WEIGHTS_NAME, recursive=True)))
# logging.getLogger("pytorch_transformers.modeling_utils").setLevel(logging.WARN) # Reduce logging # logging.getLogger("transformers.modeling_utils").setLevel(logging.WARN) # Reduce logging
logger.info("Evaluate the following checkpoints: %s", checkpoints) logger.info("Evaluate the following checkpoints: %s", checkpoints)
for checkpoint in checkpoints: for checkpoint in checkpoints:
global_step = checkpoint.split('-')[-1] if len(checkpoints) > 1 else "" global_step = checkpoint.split('-')[-1] if len(checkpoints) > 1 else ""
......
...@@ -32,7 +32,7 @@ from tqdm import tqdm, trange ...@@ -32,7 +32,7 @@ from tqdm import tqdm, trange
from tensorboardX import SummaryWriter from tensorboardX import SummaryWriter
from pytorch_transformers import (WEIGHTS_NAME, BertConfig, from transformers import (WEIGHTS_NAME, BertConfig,
BertForQuestionAnswering, BertTokenizer, BertForQuestionAnswering, BertTokenizer,
XLMConfig, XLMForQuestionAnswering, XLMConfig, XLMForQuestionAnswering,
XLMTokenizer, XLNetConfig, XLMTokenizer, XLNetConfig,
...@@ -40,7 +40,7 @@ from pytorch_transformers import (WEIGHTS_NAME, BertConfig, ...@@ -40,7 +40,7 @@ from pytorch_transformers import (WEIGHTS_NAME, BertConfig,
XLNetTokenizer, XLNetTokenizer,
DistilBertConfig, DistilBertForQuestionAnswering, DistilBertTokenizer) DistilBertConfig, DistilBertForQuestionAnswering, DistilBertTokenizer)
from pytorch_transformers import AdamW, WarmupLinearSchedule from transformers import AdamW, WarmupLinearSchedule
from utils_squad import (read_squad_examples, convert_examples_to_features, from utils_squad import (read_squad_examples, convert_examples_to_features,
RawResult, write_predictions, RawResult, write_predictions,
...@@ -142,7 +142,7 @@ def train(args, train_dataset, model, tokenizer): ...@@ -142,7 +142,7 @@ def train(args, train_dataset, model, tokenizer):
inputs.update({'cls_index': batch[5], inputs.update({'cls_index': batch[5],
'p_mask': batch[6]}) 'p_mask': batch[6]})
outputs = model(**inputs) outputs = model(**inputs)
loss = outputs[0] # model outputs are always tuple in pytorch-transformers (see doc) loss = outputs[0] # model outputs are always tuple in transformers (see doc)
if args.n_gpu > 1: if args.n_gpu > 1:
loss = loss.mean() # mean() to average on multi-gpu parallel (not distributed) training loss = loss.mean() # mean() to average on multi-gpu parallel (not distributed) training
...@@ -510,7 +510,7 @@ def main(): ...@@ -510,7 +510,7 @@ def main():
checkpoints = [args.output_dir] checkpoints = [args.output_dir]
if args.eval_all_checkpoints: if args.eval_all_checkpoints:
checkpoints = list(os.path.dirname(c) for c in sorted(glob.glob(args.output_dir + '/**/' + WEIGHTS_NAME, recursive=True))) checkpoints = list(os.path.dirname(c) for c in sorted(glob.glob(args.output_dir + '/**/' + WEIGHTS_NAME, recursive=True)))
logging.getLogger("pytorch_transformers.modeling_utils").setLevel(logging.WARN) # Reduce model loading logs logging.getLogger("transformers.modeling_utils").setLevel(logging.WARN) # Reduce model loading logs
logger.info("Evaluate the following checkpoints: %s", checkpoints) logger.info("Evaluate the following checkpoints: %s", checkpoints)
......
import tensorflow as tf import tensorflow as tf
import tensorflow_datasets import tensorflow_datasets
from pytorch_transformers import * from transformers import *
# Load dataset, tokenizer, model from pretrained model/vocabulary # Load dataset, tokenizer, model from pretrained model/vocabulary
tokenizer = BertTokenizer.from_pretrained('bert-base-cased') tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
......
...@@ -24,7 +24,7 @@ import math ...@@ -24,7 +24,7 @@ import math
import collections import collections
from io import open from io import open
from pytorch_transformers.tokenization_bert import BasicTokenizer, whitespace_tokenize from transformers.tokenization_bert import BasicTokenizer, whitespace_tokenize
# Required by XLNet evaluation method to compute optimal threshold (see write_predictions_extended() method) # Required by XLNet evaluation method to compute optimal threshold (see write_predictions_extended() method)
from utils_squad_evaluate import find_all_best_thresh_v2, make_qid_to_has_ans, get_raw_scores from utils_squad_evaluate import find_all_best_thresh_v2, make_qid_to_has_ans, get_raw_scores
......
from pytorch_transformers import ( from transformers import (
AutoTokenizer, AutoConfig, AutoModel, AutoModelWithLMHead, AutoModelForSequenceClassification, AutoModelForQuestionAnswering AutoTokenizer, AutoConfig, AutoModel, AutoModelWithLMHead, AutoModelForSequenceClassification, AutoModelForQuestionAnswering
) )
from pytorch_transformers.file_utils import add_start_docstrings from transformers.file_utils import add_start_docstrings
dependencies = ['torch', 'tqdm', 'boto3', 'requests', 'regex', 'sentencepiece', 'sacremoses'] dependencies = ['torch', 'tqdm', 'boto3', 'requests', 'regex', 'sentencepiece', 'sacremoses']
...@@ -11,12 +11,12 @@ def config(*args, **kwargs): ...@@ -11,12 +11,12 @@ def config(*args, **kwargs):
# Using torch.hub ! # Using torch.hub !
import torch import torch
config = torch.hub.load('huggingface/pytorch-transformers', 'config', 'bert-base-uncased') # Download configuration from S3 and cache. config = torch.hub.load('huggingface/transformers', 'config', 'bert-base-uncased') # Download configuration from S3 and cache.
config = torch.hub.load('huggingface/pytorch-transformers', 'config', './test/bert_saved_model/') # E.g. config (or model) was saved using `save_pretrained('./test/saved_model/')` config = torch.hub.load('huggingface/transformers', 'config', './test/bert_saved_model/') # E.g. config (or model) was saved using `save_pretrained('./test/saved_model/')`
config = torch.hub.load('huggingface/pytorch-transformers', 'config', './test/bert_saved_model/my_configuration.json') config = torch.hub.load('huggingface/transformers', 'config', './test/bert_saved_model/my_configuration.json')
config = torch.hub.load('huggingface/pytorch-transformers', 'config', 'bert-base-uncased', output_attention=True, foo=False) config = torch.hub.load('huggingface/transformers', 'config', 'bert-base-uncased', output_attention=True, foo=False)
assert config.output_attention == True assert config.output_attention == True
config, unused_kwargs = torch.hub.load('huggingface/pytorch-transformers', 'config', 'bert-base-uncased', output_attention=True, foo=False, return_unused_kwargs=True) config, unused_kwargs = torch.hub.load('huggingface/transformers', 'config', 'bert-base-uncased', output_attention=True, foo=False, return_unused_kwargs=True)
assert config.output_attention == True assert config.output_attention == True
assert unused_kwargs == {'foo': False} assert unused_kwargs == {'foo': False}
...@@ -31,8 +31,8 @@ def tokenizer(*args, **kwargs): ...@@ -31,8 +31,8 @@ def tokenizer(*args, **kwargs):
# Using torch.hub ! # Using torch.hub !
import torch import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'tokenizer', 'bert-base-uncased') # Download vocabulary from S3 and cache. tokenizer = torch.hub.load('huggingface/transformers', 'tokenizer', 'bert-base-uncased') # Download vocabulary from S3 and cache.
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'tokenizer', './test/bert_saved_model/') # E.g. tokenizer was saved using `save_pretrained('./test/saved_model/')` tokenizer = torch.hub.load('huggingface/transformers', 'tokenizer', './test/bert_saved_model/') # E.g. tokenizer was saved using `save_pretrained('./test/saved_model/')`
""" """
...@@ -45,13 +45,13 @@ def model(*args, **kwargs): ...@@ -45,13 +45,13 @@ def model(*args, **kwargs):
# Using torch.hub ! # Using torch.hub !
import torch import torch
model = torch.hub.load('huggingface/pytorch-transformers', 'model', 'bert-base-uncased') # Download model and configuration from S3 and cache. model = torch.hub.load('huggingface/transformers', 'model', 'bert-base-uncased') # Download model and configuration from S3 and cache.
model = torch.hub.load('huggingface/pytorch-transformers', 'model', './test/bert_model/') # E.g. model was saved using `save_pretrained('./test/saved_model/')` model = torch.hub.load('huggingface/transformers', 'model', './test/bert_model/') # E.g. model was saved using `save_pretrained('./test/saved_model/')`
model = torch.hub.load('huggingface/pytorch-transformers', 'model', 'bert-base-uncased', output_attention=True) # Update configuration during loading model = torch.hub.load('huggingface/transformers', 'model', 'bert-base-uncased', output_attention=True) # Update configuration during loading
assert model.config.output_attention == True assert model.config.output_attention == True
# Loading from a TF checkpoint file instead of a PyTorch model (slower) # Loading from a TF checkpoint file instead of a PyTorch model (slower)
config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json') config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json')
model = torch.hub.load('huggingface/pytorch-transformers', 'model', './tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config) model = torch.hub.load('huggingface/transformers', 'model', './tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config)
""" """
...@@ -63,13 +63,13 @@ def modelWithLMHead(*args, **kwargs): ...@@ -63,13 +63,13 @@ def modelWithLMHead(*args, **kwargs):
# Using torch.hub ! # Using torch.hub !
import torch import torch
model = torch.hub.load('huggingface/pytorch-transformers', 'modelWithLMHead', 'bert-base-uncased') # Download model and configuration from S3 and cache. model = torch.hub.load('huggingface/transformers', 'modelWithLMHead', 'bert-base-uncased') # Download model and configuration from S3 and cache.
model = torch.hub.load('huggingface/pytorch-transformers', 'modelWithLMHead', './test/bert_model/') # E.g. model was saved using `save_pretrained('./test/saved_model/')` model = torch.hub.load('huggingface/transformers', 'modelWithLMHead', './test/bert_model/') # E.g. model was saved using `save_pretrained('./test/saved_model/')`
model = torch.hub.load('huggingface/pytorch-transformers', 'modelWithLMHead', 'bert-base-uncased', output_attention=True) # Update configuration during loading model = torch.hub.load('huggingface/transformers', 'modelWithLMHead', 'bert-base-uncased', output_attention=True) # Update configuration during loading
assert model.config.output_attention == True assert model.config.output_attention == True
# Loading from a TF checkpoint file instead of a PyTorch model (slower) # Loading from a TF checkpoint file instead of a PyTorch model (slower)
config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json') config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json')
model = torch.hub.load('huggingface/pytorch-transformers', 'modelWithLMHead', './tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config) model = torch.hub.load('huggingface/transformers', 'modelWithLMHead', './tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config)
""" """
return AutoModelWithLMHead.from_pretrained(*args, **kwargs) return AutoModelWithLMHead.from_pretrained(*args, **kwargs)
...@@ -81,13 +81,13 @@ def modelForSequenceClassification(*args, **kwargs): ...@@ -81,13 +81,13 @@ def modelForSequenceClassification(*args, **kwargs):
# Using torch.hub ! # Using torch.hub !
import torch import torch
model = torch.hub.load('huggingface/pytorch-transformers', 'modelForSequenceClassification', 'bert-base-uncased') # Download model and configuration from S3 and cache. model = torch.hub.load('huggingface/transformers', 'modelForSequenceClassification', 'bert-base-uncased') # Download model and configuration from S3 and cache.
model = torch.hub.load('huggingface/pytorch-transformers', 'modelForSequenceClassification', './test/bert_model/') # E.g. model was saved using `save_pretrained('./test/saved_model/')` model = torch.hub.load('huggingface/transformers', 'modelForSequenceClassification', './test/bert_model/') # E.g. model was saved using `save_pretrained('./test/saved_model/')`
model = torch.hub.load('huggingface/pytorch-transformers', 'modelForSequenceClassification', 'bert-base-uncased', output_attention=True) # Update configuration during loading model = torch.hub.load('huggingface/transformers', 'modelForSequenceClassification', 'bert-base-uncased', output_attention=True) # Update configuration during loading
assert model.config.output_attention == True assert model.config.output_attention == True
# Loading from a TF checkpoint file instead of a PyTorch model (slower) # Loading from a TF checkpoint file instead of a PyTorch model (slower)
config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json') config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json')
model = torch.hub.load('huggingface/pytorch-transformers', 'modelForSequenceClassification', './tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config) model = torch.hub.load('huggingface/transformers', 'modelForSequenceClassification', './tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config)
""" """
...@@ -100,13 +100,13 @@ def modelForQuestionAnswering(*args, **kwargs): ...@@ -100,13 +100,13 @@ def modelForQuestionAnswering(*args, **kwargs):
# Using torch.hub ! # Using torch.hub !
import torch import torch
model = torch.hub.load('huggingface/pytorch-transformers', 'modelForQuestionAnswering', 'bert-base-uncased') # Download model and configuration from S3 and cache. model = torch.hub.load('huggingface/transformers', 'modelForQuestionAnswering', 'bert-base-uncased') # Download model and configuration from S3 and cache.
model = torch.hub.load('huggingface/pytorch-transformers', 'modelForQuestionAnswering', './test/bert_model/') # E.g. model was saved using `save_pretrained('./test/saved_model/')` model = torch.hub.load('huggingface/transformers', 'modelForQuestionAnswering', './test/bert_model/') # E.g. model was saved using `save_pretrained('./test/saved_model/')`
model = torch.hub.load('huggingface/pytorch-transformers', 'modelForQuestionAnswering', 'bert-base-uncased', output_attention=True) # Update configuration during loading model = torch.hub.load('huggingface/transformers', 'modelForQuestionAnswering', 'bert-base-uncased', output_attention=True) # Update configuration during loading
assert model.config.output_attention == True assert model.config.output_attention == True
# Loading from a TF checkpoint file instead of a PyTorch model (slower) # Loading from a TF checkpoint file instead of a PyTorch model (slower)
config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json') config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json')
model = torch.hub.load('huggingface/pytorch-transformers', 'modelForQuestionAnswering', './tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config) model = torch.hub.load('huggingface/transformers', 'modelForQuestionAnswering', './tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config)
""" """
return AutoModelForQuestionAnswering.from_pretrained(*args, **kwargs) return AutoModelForQuestionAnswering.from_pretrained(*args, **kwargs)
...@@ -25,7 +25,7 @@ To create the package for pypi. ...@@ -25,7 +25,7 @@ To create the package for pypi.
(pypi suggest using twine as other methods upload files via plaintext.) (pypi suggest using twine as other methods upload files via plaintext.)
Check that you can install it in a virtualenv by running: Check that you can install it in a virtualenv by running:
pip install -i https://testpypi.python.org/pypi pytorch-transformers pip install -i https://testpypi.python.org/pypi transformers
6. Upload the final version to actual pypi: 6. Upload the final version to actual pypi:
twine upload dist/* -r pypi twine upload dist/* -r pypi
...@@ -37,8 +37,8 @@ from io import open ...@@ -37,8 +37,8 @@ from io import open
from setuptools import find_packages, setup from setuptools import find_packages, setup
setup( setup(
name="pytorch_transformers", name="transformers",
version="1.2.0", version="2.0.0",
author="Thomas Wolf, Lysandre Debut, Victor Sanh, Julien Chaumond, Google AI Language Team Authors, Open AI team Authors", author="Thomas Wolf, Lysandre Debut, Victor Sanh, Julien Chaumond, Google AI Language Team Authors, Open AI team Authors",
author_email="thomas@huggingface.co", author_email="thomas@huggingface.co",
description="Repository of pre-trained NLP Transformer models: BERT & RoBERTa, GPT & GPT-2, Transformer-XL, XLNet and XLM", description="Repository of pre-trained NLP Transformer models: BERT & RoBERTa, GPT & GPT-2, Transformer-XL, XLNet and XLM",
...@@ -46,7 +46,7 @@ setup( ...@@ -46,7 +46,7 @@ setup(
long_description_content_type="text/markdown", long_description_content_type="text/markdown",
keywords='NLP deep learning transformer pytorch BERT GPT GPT-2 google openai CMU', keywords='NLP deep learning transformer pytorch BERT GPT GPT-2 google openai CMU',
license='Apache', license='Apache',
url="https://github.com/huggingface/pytorch-transformers", url="https://github.com/huggingface/transformers",
packages=find_packages(exclude=["*.tests", "*.tests.*", packages=find_packages(exclude=["*.tests", "*.tests.*",
"tests.*", "tests"]), "tests.*", "tests"]),
install_requires=['numpy', install_requires=['numpy',
...@@ -58,7 +58,7 @@ setup( ...@@ -58,7 +58,7 @@ setup(
'sacremoses'], 'sacremoses'],
entry_points={ entry_points={
'console_scripts': [ 'console_scripts': [
"pytorch_transformers=pytorch_transformers.__main__:main", "transformers=transformers.__main__:main",
] ]
}, },
# python_requires='>=3.5.0', # python_requires='>=3.5.0',
......
__version__ = "1.2.0" __version__ = "2.0.0"
# Work around to update TensorFlow's absl.logging threshold which alters the # Work around to update TensorFlow's absl.logging threshold which alters the
# default Python logging output behavior when present. # default Python logging output behavior when present.
...@@ -17,7 +17,7 @@ import logging ...@@ -17,7 +17,7 @@ import logging
logger = logging.getLogger(__name__) # pylint: disable=invalid-name logger = logging.getLogger(__name__) # pylint: disable=invalid-name
# Files and general utilities # Files and general utilities
from .file_utils import (PYTORCH_TRANSFORMERS_CACHE, PYTORCH_PRETRAINED_BERT_CACHE, from .file_utils import (TRANSFORMERS_CACHE, PYTORCH_TRANSFORMERS_CACHE, PYTORCH_PRETRAINED_BERT_CACHE,
cached_path, add_start_docstrings, add_end_docstrings, cached_path, add_start_docstrings, add_end_docstrings,
WEIGHTS_NAME, TF2_WEIGHTS_NAME, TF_WEIGHTS_NAME, CONFIG_NAME, WEIGHTS_NAME, TF2_WEIGHTS_NAME, TF_WEIGHTS_NAME, CONFIG_NAME,
is_tf_available, is_torch_available) is_tf_available, is_torch_available)
......
...@@ -5,25 +5,25 @@ def main(): ...@@ -5,25 +5,25 @@ def main():
print( print(
"This command line utility let you convert original (author released) model checkpoint to pytorch.\n" "This command line utility let you convert original (author released) model checkpoint to pytorch.\n"
"It should be used as one of: \n" "It should be used as one of: \n"
">> pytorch_transformers bert TF_CHECKPOINT TF_CONFIG PYTORCH_DUMP_OUTPUT, \n" ">> transformers bert TF_CHECKPOINT TF_CONFIG PYTORCH_DUMP_OUTPUT, \n"
">> pytorch_transformers gpt OPENAI_GPT_CHECKPOINT_FOLDER_PATH PYTORCH_DUMP_OUTPUT [OPENAI_GPT_CONFIG], \n" ">> transformers gpt OPENAI_GPT_CHECKPOINT_FOLDER_PATH PYTORCH_DUMP_OUTPUT [OPENAI_GPT_CONFIG], \n"
">> pytorch_transformers transfo_xl TF_CHECKPOINT_OR_DATASET PYTORCH_DUMP_OUTPUT [TF_CONFIG] or \n" ">> transformers transfo_xl TF_CHECKPOINT_OR_DATASET PYTORCH_DUMP_OUTPUT [TF_CONFIG] or \n"
">> pytorch_transformers gpt2 TF_CHECKPOINT PYTORCH_DUMP_OUTPUT [GPT2_CONFIG] or \n" ">> transformers gpt2 TF_CHECKPOINT PYTORCH_DUMP_OUTPUT [GPT2_CONFIG] or \n"
">> pytorch_transformers xlnet TF_CHECKPOINT TF_CONFIG PYTORCH_DUMP_OUTPUT [FINETUNING_TASK_NAME] or \n" ">> transformers xlnet TF_CHECKPOINT TF_CONFIG PYTORCH_DUMP_OUTPUT [FINETUNING_TASK_NAME] or \n"
">> pytorch_transformers xlm XLM_CHECKPOINT_PATH PYTORCH_DUMP_OUTPUT") ">> transformers xlm XLM_CHECKPOINT_PATH PYTORCH_DUMP_OUTPUT")
else: else:
if sys.argv[1] == "bert": if sys.argv[1] == "bert":
try: try:
from .convert_bert_original_tf_checkpoint_to_pytorch import convert_tf_checkpoint_to_pytorch from .convert_bert_original_tf_checkpoint_to_pytorch import convert_tf_checkpoint_to_pytorch
except ImportError: except ImportError:
print("pytorch_transformers can only be used from the commandline to convert TensorFlow models in PyTorch, " print("transformers can only be used from the commandline to convert TensorFlow models in PyTorch, "
"In that case, it requires TensorFlow to be installed. Please see " "In that case, it requires TensorFlow to be installed. Please see "
"https://www.tensorflow.org/install/ for installation instructions.") "https://www.tensorflow.org/install/ for installation instructions.")
raise raise
if len(sys.argv) != 5: if len(sys.argv) != 5:
# pylint: disable=line-too-long # pylint: disable=line-too-long
print("Should be used as `pytorch_transformers bert TF_CHECKPOINT TF_CONFIG PYTORCH_DUMP_OUTPUT`") print("Should be used as `transformers bert TF_CHECKPOINT TF_CONFIG PYTORCH_DUMP_OUTPUT`")
else: else:
PYTORCH_DUMP_OUTPUT = sys.argv.pop() PYTORCH_DUMP_OUTPUT = sys.argv.pop()
TF_CONFIG = sys.argv.pop() TF_CONFIG = sys.argv.pop()
...@@ -33,7 +33,7 @@ def main(): ...@@ -33,7 +33,7 @@ def main():
from .convert_openai_original_tf_checkpoint_to_pytorch import convert_openai_checkpoint_to_pytorch from .convert_openai_original_tf_checkpoint_to_pytorch import convert_openai_checkpoint_to_pytorch
if len(sys.argv) < 4 or len(sys.argv) > 5: if len(sys.argv) < 4 or len(sys.argv) > 5:
# pylint: disable=line-too-long # pylint: disable=line-too-long
print("Should be used as `pytorch_transformers gpt OPENAI_GPT_CHECKPOINT_FOLDER_PATH PYTORCH_DUMP_OUTPUT [OPENAI_GPT_CONFIG]`") print("Should be used as `transformers gpt OPENAI_GPT_CHECKPOINT_FOLDER_PATH PYTORCH_DUMP_OUTPUT [OPENAI_GPT_CONFIG]`")
else: else:
OPENAI_GPT_CHECKPOINT_FOLDER_PATH = sys.argv[2] OPENAI_GPT_CHECKPOINT_FOLDER_PATH = sys.argv[2]
PYTORCH_DUMP_OUTPUT = sys.argv[3] PYTORCH_DUMP_OUTPUT = sys.argv[3]
...@@ -48,13 +48,13 @@ def main(): ...@@ -48,13 +48,13 @@ def main():
try: try:
from .convert_transfo_xl_original_tf_checkpoint_to_pytorch import convert_transfo_xl_checkpoint_to_pytorch from .convert_transfo_xl_original_tf_checkpoint_to_pytorch import convert_transfo_xl_checkpoint_to_pytorch
except ImportError: except ImportError:
print("pytorch_transformers can only be used from the commandline to convert TensorFlow models in PyTorch, " print("transformers can only be used from the commandline to convert TensorFlow models in PyTorch, "
"In that case, it requires TensorFlow to be installed. Please see " "In that case, it requires TensorFlow to be installed. Please see "
"https://www.tensorflow.org/install/ for installation instructions.") "https://www.tensorflow.org/install/ for installation instructions.")
raise raise
if len(sys.argv) < 4 or len(sys.argv) > 5: if len(sys.argv) < 4 or len(sys.argv) > 5:
# pylint: disable=line-too-long # pylint: disable=line-too-long
print("Should be used as `pytorch_transformers transfo_xl TF_CHECKPOINT/TF_DATASET_FILE PYTORCH_DUMP_OUTPUT [TF_CONFIG]`") print("Should be used as `transformers transfo_xl TF_CHECKPOINT/TF_DATASET_FILE PYTORCH_DUMP_OUTPUT [TF_CONFIG]`")
else: else:
if 'ckpt' in sys.argv[2].lower(): if 'ckpt' in sys.argv[2].lower():
TF_CHECKPOINT = sys.argv[2] TF_CHECKPOINT = sys.argv[2]
...@@ -72,14 +72,14 @@ def main(): ...@@ -72,14 +72,14 @@ def main():
try: try:
from .convert_gpt2_original_tf_checkpoint_to_pytorch import convert_gpt2_checkpoint_to_pytorch from .convert_gpt2_original_tf_checkpoint_to_pytorch import convert_gpt2_checkpoint_to_pytorch
except ImportError: except ImportError:
print("pytorch_transformers can only be used from the commandline to convert TensorFlow models in PyTorch, " print("transformers can only be used from the commandline to convert TensorFlow models in PyTorch, "
"In that case, it requires TensorFlow to be installed. Please see " "In that case, it requires TensorFlow to be installed. Please see "
"https://www.tensorflow.org/install/ for installation instructions.") "https://www.tensorflow.org/install/ for installation instructions.")
raise raise
if len(sys.argv) < 4 or len(sys.argv) > 5: if len(sys.argv) < 4 or len(sys.argv) > 5:
# pylint: disable=line-too-long # pylint: disable=line-too-long
print("Should be used as `pytorch_transformers gpt2 TF_CHECKPOINT PYTORCH_DUMP_OUTPUT [TF_CONFIG]`") print("Should be used as `transformers gpt2 TF_CHECKPOINT PYTORCH_DUMP_OUTPUT [TF_CONFIG]`")
else: else:
TF_CHECKPOINT = sys.argv[2] TF_CHECKPOINT = sys.argv[2]
PYTORCH_DUMP_OUTPUT = sys.argv[3] PYTORCH_DUMP_OUTPUT = sys.argv[3]
...@@ -92,14 +92,14 @@ def main(): ...@@ -92,14 +92,14 @@ def main():
try: try:
from .convert_xlnet_original_tf_checkpoint_to_pytorch import convert_xlnet_checkpoint_to_pytorch from .convert_xlnet_original_tf_checkpoint_to_pytorch import convert_xlnet_checkpoint_to_pytorch
except ImportError: except ImportError:
print("pytorch_transformers can only be used from the commandline to convert TensorFlow models in PyTorch, " print("transformers can only be used from the commandline to convert TensorFlow models in PyTorch, "
"In that case, it requires TensorFlow to be installed. Please see " "In that case, it requires TensorFlow to be installed. Please see "
"https://www.tensorflow.org/install/ for installation instructions.") "https://www.tensorflow.org/install/ for installation instructions.")
raise raise
if len(sys.argv) < 5 or len(sys.argv) > 6: if len(sys.argv) < 5 or len(sys.argv) > 6:
# pylint: disable=line-too-long # pylint: disable=line-too-long
print("Should be used as `pytorch_transformers xlnet TF_CHECKPOINT TF_CONFIG PYTORCH_DUMP_OUTPUT [FINETUNING_TASK_NAME]`") print("Should be used as `transformers xlnet TF_CHECKPOINT TF_CONFIG PYTORCH_DUMP_OUTPUT [FINETUNING_TASK_NAME]`")
else: else:
TF_CHECKPOINT = sys.argv[2] TF_CHECKPOINT = sys.argv[2]
TF_CONFIG = sys.argv[3] TF_CONFIG = sys.argv[3]
...@@ -118,7 +118,7 @@ def main(): ...@@ -118,7 +118,7 @@ def main():
if len(sys.argv) != 4: if len(sys.argv) != 4:
# pylint: disable=line-too-long # pylint: disable=line-too-long
print("Should be used as `pytorch_transformers xlm XLM_CHECKPOINT_PATH PYTORCH_DUMP_OUTPUT`") print("Should be used as `transformers xlm XLM_CHECKPOINT_PATH PYTORCH_DUMP_OUTPUT`")
else: else:
XLM_CHECKPOINT_PATH = sys.argv[2] XLM_CHECKPOINT_PATH = sys.argv[2]
PYTORCH_DUMP_OUTPUT = sys.argv[3] PYTORCH_DUMP_OUTPUT = sys.argv[3]
......
...@@ -31,7 +31,7 @@ logger = logging.getLogger(__name__) ...@@ -31,7 +31,7 @@ logger = logging.getLogger(__name__)
class AutoConfig(object): class AutoConfig(object):
r""":class:`~pytorch_transformers.AutoConfig` is a generic configuration class r""":class:`~transformers.AutoConfig` is a generic configuration class
that will be instantiated as one of the configuration classes of the library that will be instantiated as one of the configuration classes of the library
when created with the `AutoConfig.from_pretrained(pretrained_model_name_or_path)` when created with the `AutoConfig.from_pretrained(pretrained_model_name_or_path)`
class method. class method.
...@@ -76,7 +76,7 @@ class AutoConfig(object): ...@@ -76,7 +76,7 @@ class AutoConfig(object):
pretrained_model_name_or_path: either: pretrained_model_name_or_path: either:
- a string with the `shortcut name` of a pre-trained model configuration to load from cache or download, e.g.: ``bert-base-uncased``. - a string with the `shortcut name` of a pre-trained model configuration to load from cache or download, e.g.: ``bert-base-uncased``.
- a path to a `directory` containing a configuration file saved using the :func:`~pytorch_transformers.PretrainedConfig.save_pretrained` method, e.g.: ``./my_model_directory/``. - a path to a `directory` containing a configuration file saved using the :func:`~transformers.PretrainedConfig.save_pretrained` method, e.g.: ``./my_model_directory/``.
- a path or url to a saved configuration JSON `file`, e.g.: ``./my_model_directory/configuration.json``. - a path or url to a saved configuration JSON `file`, e.g.: ``./my_model_directory/configuration.json``.
cache_dir: (`optional`) string: cache_dir: (`optional`) string:
......
...@@ -45,7 +45,7 @@ BERT_PRETRAINED_CONFIG_ARCHIVE_MAP = { ...@@ -45,7 +45,7 @@ BERT_PRETRAINED_CONFIG_ARCHIVE_MAP = {
class BertConfig(PretrainedConfig): class BertConfig(PretrainedConfig):
r""" r"""
:class:`~pytorch_transformers.BertConfig` is the configuration class to store the configuration of a :class:`~transformers.BertConfig` is the configuration class to store the configuration of a
`BertModel`. `BertModel`.
......
...@@ -59,7 +59,7 @@ class PretrainedConfig(object): ...@@ -59,7 +59,7 @@ class PretrainedConfig(object):
def save_pretrained(self, save_directory): def save_pretrained(self, save_directory):
""" Save a configuration object to the directory `save_directory`, so that it """ Save a configuration object to the directory `save_directory`, so that it
can be re-loaded using the :func:`~pytorch_transformers.PretrainedConfig.from_pretrained` class method. can be re-loaded using the :func:`~transformers.PretrainedConfig.from_pretrained` class method.
""" """
assert os.path.isdir(save_directory), "Saving path should be a directory where the model and configuration can be saved" assert os.path.isdir(save_directory), "Saving path should be a directory where the model and configuration can be saved"
...@@ -71,13 +71,13 @@ class PretrainedConfig(object): ...@@ -71,13 +71,13 @@ class PretrainedConfig(object):
@classmethod @classmethod
def from_pretrained(cls, pretrained_model_name_or_path, **kwargs): def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
r""" Instantiate a :class:`~pytorch_transformers.PretrainedConfig` (or a derived class) from a pre-trained model configuration. r""" Instantiate a :class:`~transformers.PretrainedConfig` (or a derived class) from a pre-trained model configuration.
Parameters: Parameters:
pretrained_model_name_or_path: either: pretrained_model_name_or_path: either:
- a string with the `shortcut name` of a pre-trained model configuration to load from cache or download, e.g.: ``bert-base-uncased``. - a string with the `shortcut name` of a pre-trained model configuration to load from cache or download, e.g.: ``bert-base-uncased``.
- a path to a `directory` containing a configuration file saved using the :func:`~pytorch_transformers.PretrainedConfig.save_pretrained` method, e.g.: ``./my_model_directory/``. - a path to a `directory` containing a configuration file saved using the :func:`~transformers.PretrainedConfig.save_pretrained` method, e.g.: ``./my_model_directory/``.
- a path or url to a saved configuration JSON `file`, e.g.: ``./my_model_directory/configuration.json``. - a path or url to a saved configuration JSON `file`, e.g.: ``./my_model_directory/configuration.json``.
cache_dir: (`optional`) string: cache_dir: (`optional`) string:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment