Unverified Commit d3f24dfa authored by Lysandre Debut's avatar Lysandre Debut Committed by GitHub
Browse files

Merge branch 'master' into master

parents 4b543c30 ecc4f1bd
from pytorch_transformers import ( from transformers import (
AutoTokenizer, AutoConfig, AutoModel, AutoModelWithLMHead, AutoModelForSequenceClassification, AutoModelForQuestionAnswering AutoTokenizer, AutoConfig, AutoModel, AutoModelWithLMHead, AutoModelForSequenceClassification, AutoModelForQuestionAnswering
) )
from pytorch_transformers.file_utils import add_start_docstrings from transformers.file_utils import add_start_docstrings
dependencies = ['torch', 'tqdm', 'boto3', 'requests', 'regex', 'sentencepiece', 'sacremoses'] dependencies = ['torch', 'tqdm', 'boto3', 'requests', 'regex', 'sentencepiece', 'sacremoses']
...@@ -11,12 +11,12 @@ def config(*args, **kwargs): ...@@ -11,12 +11,12 @@ def config(*args, **kwargs):
# Using torch.hub ! # Using torch.hub !
import torch import torch
config = torch.hub.load('huggingface/pytorch-transformers', 'config', 'bert-base-uncased') # Download configuration from S3 and cache. config = torch.hub.load('huggingface/transformers', 'config', 'bert-base-uncased') # Download configuration from S3 and cache.
config = torch.hub.load('huggingface/pytorch-transformers', 'config', './test/bert_saved_model/') # E.g. config (or model) was saved using `save_pretrained('./test/saved_model/')` config = torch.hub.load('huggingface/transformers', 'config', './test/bert_saved_model/') # E.g. config (or model) was saved using `save_pretrained('./test/saved_model/')`
config = torch.hub.load('huggingface/pytorch-transformers', 'config', './test/bert_saved_model/my_configuration.json') config = torch.hub.load('huggingface/transformers', 'config', './test/bert_saved_model/my_configuration.json')
config = torch.hub.load('huggingface/pytorch-transformers', 'config', 'bert-base-uncased', output_attention=True, foo=False) config = torch.hub.load('huggingface/transformers', 'config', 'bert-base-uncased', output_attention=True, foo=False)
assert config.output_attention == True assert config.output_attention == True
config, unused_kwargs = torch.hub.load('huggingface/pytorch-transformers', 'config', 'bert-base-uncased', output_attention=True, foo=False, return_unused_kwargs=True) config, unused_kwargs = torch.hub.load('huggingface/transformers', 'config', 'bert-base-uncased', output_attention=True, foo=False, return_unused_kwargs=True)
assert config.output_attention == True assert config.output_attention == True
assert unused_kwargs == {'foo': False} assert unused_kwargs == {'foo': False}
...@@ -31,8 +31,8 @@ def tokenizer(*args, **kwargs): ...@@ -31,8 +31,8 @@ def tokenizer(*args, **kwargs):
# Using torch.hub ! # Using torch.hub !
import torch import torch
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'tokenizer', 'bert-base-uncased') # Download vocabulary from S3 and cache. tokenizer = torch.hub.load('huggingface/transformers', 'tokenizer', 'bert-base-uncased') # Download vocabulary from S3 and cache.
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'tokenizer', './test/bert_saved_model/') # E.g. tokenizer was saved using `save_pretrained('./test/saved_model/')` tokenizer = torch.hub.load('huggingface/transformers', 'tokenizer', './test/bert_saved_model/') # E.g. tokenizer was saved using `save_pretrained('./test/saved_model/')`
""" """
...@@ -45,13 +45,13 @@ def model(*args, **kwargs): ...@@ -45,13 +45,13 @@ def model(*args, **kwargs):
# Using torch.hub ! # Using torch.hub !
import torch import torch
model = torch.hub.load('huggingface/pytorch-transformers', 'model', 'bert-base-uncased') # Download model and configuration from S3 and cache. model = torch.hub.load('huggingface/transformers', 'model', 'bert-base-uncased') # Download model and configuration from S3 and cache.
model = torch.hub.load('huggingface/pytorch-transformers', 'model', './test/bert_model/') # E.g. model was saved using `save_pretrained('./test/saved_model/')` model = torch.hub.load('huggingface/transformers', 'model', './test/bert_model/') # E.g. model was saved using `save_pretrained('./test/saved_model/')`
model = torch.hub.load('huggingface/pytorch-transformers', 'model', 'bert-base-uncased', output_attention=True) # Update configuration during loading model = torch.hub.load('huggingface/transformers', 'model', 'bert-base-uncased', output_attention=True) # Update configuration during loading
assert model.config.output_attention == True assert model.config.output_attention == True
# Loading from a TF checkpoint file instead of a PyTorch model (slower) # Loading from a TF checkpoint file instead of a PyTorch model (slower)
config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json') config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json')
model = torch.hub.load('huggingface/pytorch-transformers', 'model', './tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config) model = torch.hub.load('huggingface/transformers', 'model', './tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config)
""" """
...@@ -63,13 +63,13 @@ def modelWithLMHead(*args, **kwargs): ...@@ -63,13 +63,13 @@ def modelWithLMHead(*args, **kwargs):
# Using torch.hub ! # Using torch.hub !
import torch import torch
model = torch.hub.load('huggingface/pytorch-transformers', 'modelWithLMHead', 'bert-base-uncased') # Download model and configuration from S3 and cache. model = torch.hub.load('huggingface/transformers', 'modelWithLMHead', 'bert-base-uncased') # Download model and configuration from S3 and cache.
model = torch.hub.load('huggingface/pytorch-transformers', 'modelWithLMHead', './test/bert_model/') # E.g. model was saved using `save_pretrained('./test/saved_model/')` model = torch.hub.load('huggingface/transformers', 'modelWithLMHead', './test/bert_model/') # E.g. model was saved using `save_pretrained('./test/saved_model/')`
model = torch.hub.load('huggingface/pytorch-transformers', 'modelWithLMHead', 'bert-base-uncased', output_attention=True) # Update configuration during loading model = torch.hub.load('huggingface/transformers', 'modelWithLMHead', 'bert-base-uncased', output_attention=True) # Update configuration during loading
assert model.config.output_attention == True assert model.config.output_attention == True
# Loading from a TF checkpoint file instead of a PyTorch model (slower) # Loading from a TF checkpoint file instead of a PyTorch model (slower)
config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json') config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json')
model = torch.hub.load('huggingface/pytorch-transformers', 'modelWithLMHead', './tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config) model = torch.hub.load('huggingface/transformers', 'modelWithLMHead', './tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config)
""" """
return AutoModelWithLMHead.from_pretrained(*args, **kwargs) return AutoModelWithLMHead.from_pretrained(*args, **kwargs)
...@@ -81,13 +81,13 @@ def modelForSequenceClassification(*args, **kwargs): ...@@ -81,13 +81,13 @@ def modelForSequenceClassification(*args, **kwargs):
# Using torch.hub ! # Using torch.hub !
import torch import torch
model = torch.hub.load('huggingface/pytorch-transformers', 'modelForSequenceClassification', 'bert-base-uncased') # Download model and configuration from S3 and cache. model = torch.hub.load('huggingface/transformers', 'modelForSequenceClassification', 'bert-base-uncased') # Download model and configuration from S3 and cache.
model = torch.hub.load('huggingface/pytorch-transformers', 'modelForSequenceClassification', './test/bert_model/') # E.g. model was saved using `save_pretrained('./test/saved_model/')` model = torch.hub.load('huggingface/transformers', 'modelForSequenceClassification', './test/bert_model/') # E.g. model was saved using `save_pretrained('./test/saved_model/')`
model = torch.hub.load('huggingface/pytorch-transformers', 'modelForSequenceClassification', 'bert-base-uncased', output_attention=True) # Update configuration during loading model = torch.hub.load('huggingface/transformers', 'modelForSequenceClassification', 'bert-base-uncased', output_attention=True) # Update configuration during loading
assert model.config.output_attention == True assert model.config.output_attention == True
# Loading from a TF checkpoint file instead of a PyTorch model (slower) # Loading from a TF checkpoint file instead of a PyTorch model (slower)
config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json') config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json')
model = torch.hub.load('huggingface/pytorch-transformers', 'modelForSequenceClassification', './tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config) model = torch.hub.load('huggingface/transformers', 'modelForSequenceClassification', './tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config)
""" """
...@@ -100,13 +100,13 @@ def modelForQuestionAnswering(*args, **kwargs): ...@@ -100,13 +100,13 @@ def modelForQuestionAnswering(*args, **kwargs):
# Using torch.hub ! # Using torch.hub !
import torch import torch
model = torch.hub.load('huggingface/pytorch-transformers', 'modelForQuestionAnswering', 'bert-base-uncased') # Download model and configuration from S3 and cache. model = torch.hub.load('huggingface/transformers', 'modelForQuestionAnswering', 'bert-base-uncased') # Download model and configuration from S3 and cache.
model = torch.hub.load('huggingface/pytorch-transformers', 'modelForQuestionAnswering', './test/bert_model/') # E.g. model was saved using `save_pretrained('./test/saved_model/')` model = torch.hub.load('huggingface/transformers', 'modelForQuestionAnswering', './test/bert_model/') # E.g. model was saved using `save_pretrained('./test/saved_model/')`
model = torch.hub.load('huggingface/pytorch-transformers', 'modelForQuestionAnswering', 'bert-base-uncased', output_attention=True) # Update configuration during loading model = torch.hub.load('huggingface/transformers', 'modelForQuestionAnswering', 'bert-base-uncased', output_attention=True) # Update configuration during loading
assert model.config.output_attention == True assert model.config.output_attention == True
# Loading from a TF checkpoint file instead of a PyTorch model (slower) # Loading from a TF checkpoint file instead of a PyTorch model (slower)
config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json') config = AutoConfig.from_json_file('./tf_model/bert_tf_model_config.json')
model = torch.hub.load('huggingface/pytorch-transformers', 'modelForQuestionAnswering', './tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config) model = torch.hub.load('huggingface/transformers', 'modelForQuestionAnswering', './tf_model/bert_tf_checkpoint.ckpt.index', from_tf=True, config=config)
""" """
return AutoModelForQuestionAnswering.from_pretrained(*args, **kwargs) return AutoModelForQuestionAnswering.from_pretrained(*args, **kwargs)
__version__ = "1.2.0"
# Work around to update TensorFlow's absl.logging threshold which alters the
# default Python logging output behavior when present.
# see: https://github.com/abseil/abseil-py/issues/99
# and: https://github.com/tensorflow/tensorflow/issues/26691#issuecomment-500369493
try:
import absl.logging
absl.logging.set_verbosity('info')
absl.logging.set_stderrthreshold('info')
absl.logging._warn_preinit_stderr = False
except:
pass
# Tokenizer
from .tokenization_utils import (PreTrainedTokenizer)
from .tokenization_auto import AutoTokenizer
from .tokenization_bert import BertTokenizer, BasicTokenizer, WordpieceTokenizer
from .tokenization_openai import OpenAIGPTTokenizer
from .tokenization_transfo_xl import (TransfoXLTokenizer, TransfoXLCorpus)
from .tokenization_gpt2 import GPT2Tokenizer
from .tokenization_xlnet import XLNetTokenizer, SPIECE_UNDERLINE
from .tokenization_xlm import XLMTokenizer
from .tokenization_roberta import RobertaTokenizer
from .tokenization_distilbert import DistilBertTokenizer
# Configurations
from .configuration_utils import PretrainedConfig
from .configuration_auto import AutoConfig
from .configuration_bert import BertConfig, BERT_PRETRAINED_CONFIG_ARCHIVE_MAP
from .configuration_openai import OpenAIGPTConfig, OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP
from .configuration_transfo_xl import TransfoXLConfig, TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP
from .configuration_gpt2 import GPT2Config, GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP
from .configuration_xlnet import XLNetConfig, XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP
from .configuration_xlm import XLMConfig, XLM_PRETRAINED_CONFIG_ARCHIVE_MAP
from .configuration_roberta import RobertaConfig, ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP
from .configuration_distilbert import DistilBertConfig, DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP
# Modeling
from .modeling_utils import (PreTrainedModel, prune_layer, Conv1D)
from .modeling_auto import (AutoModel, AutoModelForSequenceClassification, AutoModelForQuestionAnswering,
AutoModelWithLMHead)
from .modeling_bert import (BertPreTrainedModel, BertModel, BertForPreTraining,
BertForMaskedLM, BertForNextSentencePrediction,
BertForSequenceClassification, BertForMultipleChoice,
BertForTokenClassification, BertForQuestionAnswering,
load_tf_weights_in_bert, BERT_PRETRAINED_MODEL_ARCHIVE_MAP)
from .modeling_openai import (OpenAIGPTPreTrainedModel, OpenAIGPTModel,
OpenAIGPTLMHeadModel, OpenAIGPTDoubleHeadsModel,
load_tf_weights_in_openai_gpt, OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP)
from .modeling_transfo_xl import (TransfoXLPreTrainedModel, TransfoXLModel, TransfoXLLMHeadModel,
load_tf_weights_in_transfo_xl, TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP)
from .modeling_gpt2 import (GPT2PreTrainedModel, GPT2Model,
GPT2LMHeadModel, GPT2DoubleHeadsModel,
load_tf_weights_in_gpt2, GPT2_PRETRAINED_MODEL_ARCHIVE_MAP)
from .modeling_xlnet import (XLNetPreTrainedModel, XLNetModel, XLNetLMHeadModel,
XLNetForSequenceClassification, XLNetForQuestionAnswering, XLNetForMultipleChoice,
load_tf_weights_in_xlnet, XLNET_PRETRAINED_MODEL_ARCHIVE_MAP)
from .modeling_xlm import (XLMPreTrainedModel , XLMModel,
XLMWithLMHeadModel, XLMForSequenceClassification,
XLMForQuestionAnswering, XLM_PRETRAINED_MODEL_ARCHIVE_MAP)
from .modeling_roberta import (RobertaForMaskedLM, RobertaModel, RobertaForSequenceClassification,
RobertaForMultipleChoice, ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP)
from .modeling_distilbert import (DistilBertForMaskedLM, DistilBertModel,
DistilBertForSequenceClassification, DistilBertForQuestionAnswering,
DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP)
# Optimization
from .optimization import (AdamW, ConstantLRSchedule, WarmupConstantSchedule, WarmupCosineSchedule,
WarmupCosineWithHardRestartsSchedule, WarmupLinearSchedule)
# Files and general utilities
from .file_utils import (PYTORCH_TRANSFORMERS_CACHE, PYTORCH_PRETRAINED_BERT_CACHE,
cached_path, add_start_docstrings, add_end_docstrings,
WEIGHTS_NAME, TF_WEIGHTS_NAME, CONFIG_NAME)
# PyTorch
torch>=1.0.0
# progress bars in model download and training scripts # progress bars in model download and training scripts
tqdm tqdm
# Accessing files from S3 directly. # Accessing files from S3 directly.
......
...@@ -13,11 +13,11 @@ To create the package for pypi. ...@@ -13,11 +13,11 @@ To create the package for pypi.
4. Build both the sources and the wheel. Do not change anything in setup.py between 4. Build both the sources and the wheel. Do not change anything in setup.py between
creating the wheel and the source distribution (obviously). creating the wheel and the source distribution (obviously).
For the wheel, run: "python setup.py bdist_wheel" in the top level allennlp directory. For the wheel, run: "python setup.py bdist_wheel" in the top level directory.
(this will build a wheel for the python version you use to build it - make sure you use python 3.x). (this will build a wheel for the python version you use to build it - make sure you use python 3.x).
For the sources, run: "python setup.py sdist" For the sources, run: "python setup.py sdist"
You should now have a /dist directory with both .whl and .tar.gz source versions of allennlp. You should now have a /dist directory with both .whl and .tar.gz source versions.
5. Check that everything looks correct by uploading the package to the pypi test server: 5. Check that everything looks correct by uploading the package to the pypi test server:
...@@ -25,7 +25,7 @@ To create the package for pypi. ...@@ -25,7 +25,7 @@ To create the package for pypi.
(pypi suggest using twine as other methods upload files via plaintext.) (pypi suggest using twine as other methods upload files via plaintext.)
Check that you can install it in a virtualenv by running: Check that you can install it in a virtualenv by running:
pip install -i https://testpypi.python.org/pypi pytorch-transformers pip install -i https://testpypi.python.org/pypi transformers
6. Upload the final version to actual pypi: 6. Upload the final version to actual pypi:
twine upload dist/* -r pypi twine upload dist/* -r pypi
...@@ -37,20 +37,19 @@ from io import open ...@@ -37,20 +37,19 @@ from io import open
from setuptools import find_packages, setup from setuptools import find_packages, setup
setup( setup(
name="pytorch_transformers", name="transformers",
version="1.2.0", version="2.0.0",
author="Thomas Wolf, Lysandre Debut, Victor Sanh, Julien Chaumond, Google AI Language Team Authors, Open AI team Authors", author="Thomas Wolf, Lysandre Debut, Victor Sanh, Julien Chaumond, Google AI Language Team Authors, Open AI team Authors, Facebook AI Authors, Carnegie Mellon University Authors",
author_email="thomas@huggingface.co", author_email="thomas@huggingface.co",
description="Repository of pre-trained NLP Transformer models: BERT & RoBERTa, GPT & GPT-2, Transformer-XL, XLNet and XLM", description="State-of-the-art Natural Language Processing for TensorFlow 2.0 and PyTorch",
long_description=open("README.md", "r", encoding='utf-8').read(), long_description=open("README.md", "r", encoding='utf-8').read(),
long_description_content_type="text/markdown", long_description_content_type="text/markdown",
keywords='NLP deep learning transformer pytorch BERT GPT GPT-2 google openai CMU', keywords='NLP deep learning transformer pytorch tensorflow BERT GPT GPT-2 google openai CMU',
license='Apache', license='Apache',
url="https://github.com/huggingface/pytorch-transformers", url="https://github.com/huggingface/transformers",
packages=find_packages(exclude=["*.tests", "*.tests.*", packages=find_packages(exclude=["*.tests", "*.tests.*",
"tests.*", "tests"]), "tests.*", "tests"]),
install_requires=['torch>=1.0.0', install_requires=['numpy',
'numpy',
'boto3', 'boto3',
'requests', 'requests',
'tqdm', 'tqdm',
...@@ -59,7 +58,7 @@ setup( ...@@ -59,7 +58,7 @@ setup(
'sacremoses'], 'sacremoses'],
entry_points={ entry_points={
'console_scripts': [ 'console_scripts': [
"pytorch_transformers=pytorch_transformers.__main__:main", "transformers=transformers.__main__:main",
] ]
}, },
# python_requires='>=3.5.0', # python_requires='>=3.5.0',
......
__version__ = "2.0.0"
# Work around to update TensorFlow's absl.logging threshold which alters the
# default Python logging output behavior when present.
# see: https://github.com/abseil/abseil-py/issues/99
# and: https://github.com/tensorflow/tensorflow/issues/26691#issuecomment-500369493
try:
import absl.logging
absl.logging.set_verbosity('info')
absl.logging.set_stderrthreshold('info')
absl.logging._warn_preinit_stderr = False
except:
pass
import logging
logger = logging.getLogger(__name__) # pylint: disable=invalid-name
# Files and general utilities
from .file_utils import (TRANSFORMERS_CACHE, PYTORCH_TRANSFORMERS_CACHE, PYTORCH_PRETRAINED_BERT_CACHE,
cached_path, add_start_docstrings, add_end_docstrings,
WEIGHTS_NAME, TF2_WEIGHTS_NAME, TF_WEIGHTS_NAME, CONFIG_NAME,
is_tf_available, is_torch_available)
from .data import (is_sklearn_available,
InputExample, InputFeatures, DataProcessor,
glue_output_modes, glue_convert_examples_to_features,
glue_processors, glue_tasks_num_labels)
if is_sklearn_available():
from .data import glue_compute_metrics
# Tokenizers
from .tokenization_utils import (PreTrainedTokenizer)
from .tokenization_auto import AutoTokenizer
from .tokenization_bert import BertTokenizer, BasicTokenizer, WordpieceTokenizer
from .tokenization_openai import OpenAIGPTTokenizer
from .tokenization_transfo_xl import (TransfoXLTokenizer, TransfoXLCorpus)
from .tokenization_gpt2 import GPT2Tokenizer
from .tokenization_xlnet import XLNetTokenizer, SPIECE_UNDERLINE
from .tokenization_xlm import XLMTokenizer
from .tokenization_roberta import RobertaTokenizer
from .tokenization_distilbert import DistilBertTokenizer
# Configurations
from .configuration_utils import PretrainedConfig
from .configuration_auto import AutoConfig
from .configuration_bert import BertConfig, BERT_PRETRAINED_CONFIG_ARCHIVE_MAP
from .configuration_openai import OpenAIGPTConfig, OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP
from .configuration_transfo_xl import TransfoXLConfig, TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP
from .configuration_gpt2 import GPT2Config, GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP
from .configuration_xlnet import XLNetConfig, XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP
from .configuration_xlm import XLMConfig, XLM_PRETRAINED_CONFIG_ARCHIVE_MAP
from .configuration_roberta import RobertaConfig, ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP
from .configuration_distilbert import DistilBertConfig, DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP
# Modeling
if is_torch_available():
from .modeling_utils import (PreTrainedModel, prune_layer, Conv1D)
from .modeling_auto import (AutoModel, AutoModelForSequenceClassification, AutoModelForQuestionAnswering,
AutoModelWithLMHead)
from .modeling_bert import (BertPreTrainedModel, BertModel, BertForPreTraining,
BertForMaskedLM, BertForNextSentencePrediction,
BertForSequenceClassification, BertForMultipleChoice,
BertForTokenClassification, BertForQuestionAnswering,
load_tf_weights_in_bert, BERT_PRETRAINED_MODEL_ARCHIVE_MAP)
from .modeling_openai import (OpenAIGPTPreTrainedModel, OpenAIGPTModel,
OpenAIGPTLMHeadModel, OpenAIGPTDoubleHeadsModel,
load_tf_weights_in_openai_gpt, OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP)
from .modeling_transfo_xl import (TransfoXLPreTrainedModel, TransfoXLModel, TransfoXLLMHeadModel,
load_tf_weights_in_transfo_xl, TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP)
from .modeling_gpt2 import (GPT2PreTrainedModel, GPT2Model,
GPT2LMHeadModel, GPT2DoubleHeadsModel,
load_tf_weights_in_gpt2, GPT2_PRETRAINED_MODEL_ARCHIVE_MAP)
from .modeling_xlnet import (XLNetPreTrainedModel, XLNetModel, XLNetLMHeadModel,
XLNetForSequenceClassification, XLNetForMultipleChoice,
XLNetForQuestionAnsweringSimple, XLNetForQuestionAnswering,
load_tf_weights_in_xlnet, XLNET_PRETRAINED_MODEL_ARCHIVE_MAP)
from .modeling_xlm import (XLMPreTrainedModel , XLMModel,
XLMWithLMHeadModel, XLMForSequenceClassification,
XLMForQuestionAnswering, XLMForQuestionAnsweringSimple,
XLM_PRETRAINED_MODEL_ARCHIVE_MAP)
from .modeling_roberta import (RobertaForMaskedLM, RobertaModel,
RobertaForSequenceClassification, RobertaForMultipleChoice,
ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP)
from .modeling_distilbert import (DistilBertForMaskedLM, DistilBertModel,
DistilBertForSequenceClassification, DistilBertForQuestionAnswering,
DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP)
# Optimization
from .optimization import (AdamW, ConstantLRSchedule, WarmupConstantSchedule, WarmupCosineSchedule,
WarmupCosineWithHardRestartsSchedule, WarmupLinearSchedule)
# TensorFlow
if is_tf_available():
from .modeling_tf_utils import TFPreTrainedModel, TFSharedEmbeddings, TFSequenceSummary
from .modeling_tf_auto import (TFAutoModel, TFAutoModelForSequenceClassification, TFAutoModelForQuestionAnswering,
TFAutoModelWithLMHead)
from .modeling_tf_bert import (TFBertPreTrainedModel, TFBertMainLayer, TFBertEmbeddings,
TFBertModel, TFBertForPreTraining,
TFBertForMaskedLM, TFBertForNextSentencePrediction,
TFBertForSequenceClassification, TFBertForMultipleChoice,
TFBertForTokenClassification, TFBertForQuestionAnswering,
load_bert_pt_weights_in_tf2,
TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP)
from .modeling_tf_gpt2 import (TFGPT2PreTrainedModel, TFGPT2MainLayer,
TFGPT2Model, TFGPT2LMHeadModel, TFGPT2DoubleHeadsModel,
load_gpt2_pt_weights_in_tf2,
TF_GPT2_PRETRAINED_MODEL_ARCHIVE_MAP)
from .modeling_tf_openai import (TFOpenAIGPTPreTrainedModel, TFOpenAIGPTMainLayer,
TFOpenAIGPTModel, TFOpenAIGPTLMHeadModel, TFOpenAIGPTDoubleHeadsModel,
load_openai_gpt_pt_weights_in_tf2,
TF_OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP)
from .modeling_tf_transfo_xl import (TFTransfoXLPreTrainedModel, TFTransfoXLMainLayer,
TFTransfoXLModel, TFTransfoXLLMHeadModel,
load_transfo_xl_pt_weights_in_tf2,
TF_TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP)
from .modeling_tf_xlnet import (TFXLNetPreTrainedModel, TFXLNetMainLayer,
TFXLNetModel, TFXLNetLMHeadModel,
TFXLNetForSequenceClassification,
TFXLNetForQuestionAnsweringSimple,
load_xlnet_pt_weights_in_tf2,
TF_XLNET_PRETRAINED_MODEL_ARCHIVE_MAP)
from .modeling_tf_xlm import (TFXLMPreTrainedModel, TFXLMMainLayer,
TFXLMModel, TFXLMWithLMHeadModel,
TFXLMForSequenceClassification,
TFXLMForQuestionAnsweringSimple,
load_xlm_pt_weights_in_tf2,
TF_XLM_PRETRAINED_MODEL_ARCHIVE_MAP)
from .modeling_tf_roberta import (TFRobertaPreTrainedModel, TFRobertaMainLayer,
TFRobertaModel, TFRobertaForMaskedLM,
TFRobertaForSequenceClassification,
load_roberta_pt_weights_in_tf2,
TF_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP)
from .modeling_tf_distilbert import (TFDistilBertPreTrainedModel, TFDistilBertMainLayer,
TFDistilBertModel, TFDistilBertForMaskedLM,
TFDistilBertForSequenceClassification,
TFDistilBertForQuestionAnswering,
load_distilbert_pt_weights_in_tf2,
TF_DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP)
# TF 2.0 <=> PyTorch conversion utilities
if is_tf_available() and is_torch_available():
from .modeling_tf_pytorch_utils import (convert_tf_weight_name_to_pt_weight_name,
load_pytorch_checkpoint_in_tf2_model,
load_pytorch_weights_in_tf2_model,
load_pytorch_model_in_tf2_model,
load_tf2_checkpoint_in_pytorch_model,
load_tf2_weights_in_pytorch_model,
load_tf2_model_in_pytorch_model)
if not is_tf_available() and not is_torch_available():
logger.warning("Neither PyTorch nor TensorFlow >= 2.0 have been found."
"Models won't be available and only tokenizers, configuration"
"and file/data utilities can be used.")
...@@ -3,36 +3,37 @@ def main(): ...@@ -3,36 +3,37 @@ def main():
import sys import sys
if (len(sys.argv) < 4 or len(sys.argv) > 6) or sys.argv[1] not in ["bert", "gpt", "transfo_xl", "gpt2", "xlnet", "xlm"]: if (len(sys.argv) < 4 or len(sys.argv) > 6) or sys.argv[1] not in ["bert", "gpt", "transfo_xl", "gpt2", "xlnet", "xlm"]:
print( print(
"Should be used as one of: \n" "This command line utility let you convert original (author released) model checkpoint to pytorch.\n"
">> pytorch_transformers bert TF_CHECKPOINT TF_CONFIG PYTORCH_DUMP_OUTPUT, \n" "It should be used as one of: \n"
">> pytorch_transformers gpt OPENAI_GPT_CHECKPOINT_FOLDER_PATH PYTORCH_DUMP_OUTPUT [OPENAI_GPT_CONFIG], \n" ">> transformers bert TF_CHECKPOINT TF_CONFIG PYTORCH_DUMP_OUTPUT, \n"
">> pytorch_transformers transfo_xl TF_CHECKPOINT_OR_DATASET PYTORCH_DUMP_OUTPUT [TF_CONFIG] or \n" ">> transformers gpt OPENAI_GPT_CHECKPOINT_FOLDER_PATH PYTORCH_DUMP_OUTPUT [OPENAI_GPT_CONFIG], \n"
">> pytorch_transformers gpt2 TF_CHECKPOINT PYTORCH_DUMP_OUTPUT [GPT2_CONFIG] or \n" ">> transformers transfo_xl TF_CHECKPOINT_OR_DATASET PYTORCH_DUMP_OUTPUT [TF_CONFIG] or \n"
">> pytorch_transformers xlnet TF_CHECKPOINT TF_CONFIG PYTORCH_DUMP_OUTPUT [FINETUNING_TASK_NAME] or \n" ">> transformers gpt2 TF_CHECKPOINT PYTORCH_DUMP_OUTPUT [GPT2_CONFIG] or \n"
">> pytorch_transformers xlm XLM_CHECKPOINT_PATH PYTORCH_DUMP_OUTPUT") ">> transformers xlnet TF_CHECKPOINT TF_CONFIG PYTORCH_DUMP_OUTPUT [FINETUNING_TASK_NAME] or \n"
">> transformers xlm XLM_CHECKPOINT_PATH PYTORCH_DUMP_OUTPUT")
else: else:
if sys.argv[1] == "bert": if sys.argv[1] == "bert":
try: try:
from .convert_tf_checkpoint_to_pytorch import convert_tf_checkpoint_to_pytorch from .convert_bert_original_tf_checkpoint_to_pytorch import convert_tf_checkpoint_to_pytorch
except ImportError: except ImportError:
print("pytorch_transformers can only be used from the commandline to convert TensorFlow models in PyTorch, " print("transformers can only be used from the commandline to convert TensorFlow models in PyTorch, "
"In that case, it requires TensorFlow to be installed. Please see " "In that case, it requires TensorFlow to be installed. Please see "
"https://www.tensorflow.org/install/ for installation instructions.") "https://www.tensorflow.org/install/ for installation instructions.")
raise raise
if len(sys.argv) != 5: if len(sys.argv) != 5:
# pylint: disable=line-too-long # pylint: disable=line-too-long
print("Should be used as `pytorch_transformers bert TF_CHECKPOINT TF_CONFIG PYTORCH_DUMP_OUTPUT`") print("Should be used as `transformers bert TF_CHECKPOINT TF_CONFIG PYTORCH_DUMP_OUTPUT`")
else: else:
PYTORCH_DUMP_OUTPUT = sys.argv.pop() PYTORCH_DUMP_OUTPUT = sys.argv.pop()
TF_CONFIG = sys.argv.pop() TF_CONFIG = sys.argv.pop()
TF_CHECKPOINT = sys.argv.pop() TF_CHECKPOINT = sys.argv.pop()
convert_tf_checkpoint_to_pytorch(TF_CHECKPOINT, TF_CONFIG, PYTORCH_DUMP_OUTPUT) convert_tf_checkpoint_to_pytorch(TF_CHECKPOINT, TF_CONFIG, PYTORCH_DUMP_OUTPUT)
elif sys.argv[1] == "gpt": elif sys.argv[1] == "gpt":
from .convert_openai_checkpoint_to_pytorch import convert_openai_checkpoint_to_pytorch from .convert_openai_original_tf_checkpoint_to_pytorch import convert_openai_checkpoint_to_pytorch
if len(sys.argv) < 4 or len(sys.argv) > 5: if len(sys.argv) < 4 or len(sys.argv) > 5:
# pylint: disable=line-too-long # pylint: disable=line-too-long
print("Should be used as `pytorch_transformers gpt OPENAI_GPT_CHECKPOINT_FOLDER_PATH PYTORCH_DUMP_OUTPUT [OPENAI_GPT_CONFIG]`") print("Should be used as `transformers gpt OPENAI_GPT_CHECKPOINT_FOLDER_PATH PYTORCH_DUMP_OUTPUT [OPENAI_GPT_CONFIG]`")
else: else:
OPENAI_GPT_CHECKPOINT_FOLDER_PATH = sys.argv[2] OPENAI_GPT_CHECKPOINT_FOLDER_PATH = sys.argv[2]
PYTORCH_DUMP_OUTPUT = sys.argv[3] PYTORCH_DUMP_OUTPUT = sys.argv[3]
...@@ -45,15 +46,15 @@ def main(): ...@@ -45,15 +46,15 @@ def main():
PYTORCH_DUMP_OUTPUT) PYTORCH_DUMP_OUTPUT)
elif sys.argv[1] == "transfo_xl": elif sys.argv[1] == "transfo_xl":
try: try:
from .convert_transfo_xl_checkpoint_to_pytorch import convert_transfo_xl_checkpoint_to_pytorch from .convert_transfo_xl_original_tf_checkpoint_to_pytorch import convert_transfo_xl_checkpoint_to_pytorch
except ImportError: except ImportError:
print("pytorch_transformers can only be used from the commandline to convert TensorFlow models in PyTorch, " print("transformers can only be used from the commandline to convert TensorFlow models in PyTorch, "
"In that case, it requires TensorFlow to be installed. Please see " "In that case, it requires TensorFlow to be installed. Please see "
"https://www.tensorflow.org/install/ for installation instructions.") "https://www.tensorflow.org/install/ for installation instructions.")
raise raise
if len(sys.argv) < 4 or len(sys.argv) > 5: if len(sys.argv) < 4 or len(sys.argv) > 5:
# pylint: disable=line-too-long # pylint: disable=line-too-long
print("Should be used as `pytorch_transformers transfo_xl TF_CHECKPOINT/TF_DATASET_FILE PYTORCH_DUMP_OUTPUT [TF_CONFIG]`") print("Should be used as `transformers transfo_xl TF_CHECKPOINT/TF_DATASET_FILE PYTORCH_DUMP_OUTPUT [TF_CONFIG]`")
else: else:
if 'ckpt' in sys.argv[2].lower(): if 'ckpt' in sys.argv[2].lower():
TF_CHECKPOINT = sys.argv[2] TF_CHECKPOINT = sys.argv[2]
...@@ -69,16 +70,16 @@ def main(): ...@@ -69,16 +70,16 @@ def main():
convert_transfo_xl_checkpoint_to_pytorch(TF_CHECKPOINT, TF_CONFIG, PYTORCH_DUMP_OUTPUT, TF_DATASET_FILE) convert_transfo_xl_checkpoint_to_pytorch(TF_CHECKPOINT, TF_CONFIG, PYTORCH_DUMP_OUTPUT, TF_DATASET_FILE)
elif sys.argv[1] == "gpt2": elif sys.argv[1] == "gpt2":
try: try:
from .convert_gpt2_checkpoint_to_pytorch import convert_gpt2_checkpoint_to_pytorch from .convert_gpt2_original_tf_checkpoint_to_pytorch import convert_gpt2_checkpoint_to_pytorch
except ImportError: except ImportError:
print("pytorch_transformers can only be used from the commandline to convert TensorFlow models in PyTorch, " print("transformers can only be used from the commandline to convert TensorFlow models in PyTorch, "
"In that case, it requires TensorFlow to be installed. Please see " "In that case, it requires TensorFlow to be installed. Please see "
"https://www.tensorflow.org/install/ for installation instructions.") "https://www.tensorflow.org/install/ for installation instructions.")
raise raise
if len(sys.argv) < 4 or len(sys.argv) > 5: if len(sys.argv) < 4 or len(sys.argv) > 5:
# pylint: disable=line-too-long # pylint: disable=line-too-long
print("Should be used as `pytorch_transformers gpt2 TF_CHECKPOINT PYTORCH_DUMP_OUTPUT [TF_CONFIG]`") print("Should be used as `transformers gpt2 TF_CHECKPOINT PYTORCH_DUMP_OUTPUT [TF_CONFIG]`")
else: else:
TF_CHECKPOINT = sys.argv[2] TF_CHECKPOINT = sys.argv[2]
PYTORCH_DUMP_OUTPUT = sys.argv[3] PYTORCH_DUMP_OUTPUT = sys.argv[3]
...@@ -89,16 +90,16 @@ def main(): ...@@ -89,16 +90,16 @@ def main():
convert_gpt2_checkpoint_to_pytorch(TF_CHECKPOINT, TF_CONFIG, PYTORCH_DUMP_OUTPUT) convert_gpt2_checkpoint_to_pytorch(TF_CHECKPOINT, TF_CONFIG, PYTORCH_DUMP_OUTPUT)
elif sys.argv[1] == "xlnet": elif sys.argv[1] == "xlnet":
try: try:
from .convert_xlnet_checkpoint_to_pytorch import convert_xlnet_checkpoint_to_pytorch from .convert_xlnet_original_tf_checkpoint_to_pytorch import convert_xlnet_checkpoint_to_pytorch
except ImportError: except ImportError:
print("pytorch_transformers can only be used from the commandline to convert TensorFlow models in PyTorch, " print("transformers can only be used from the commandline to convert TensorFlow models in PyTorch, "
"In that case, it requires TensorFlow to be installed. Please see " "In that case, it requires TensorFlow to be installed. Please see "
"https://www.tensorflow.org/install/ for installation instructions.") "https://www.tensorflow.org/install/ for installation instructions.")
raise raise
if len(sys.argv) < 5 or len(sys.argv) > 6: if len(sys.argv) < 5 or len(sys.argv) > 6:
# pylint: disable=line-too-long # pylint: disable=line-too-long
print("Should be used as `pytorch_transformers xlnet TF_CHECKPOINT TF_CONFIG PYTORCH_DUMP_OUTPUT [FINETUNING_TASK_NAME]`") print("Should be used as `transformers xlnet TF_CHECKPOINT TF_CONFIG PYTORCH_DUMP_OUTPUT [FINETUNING_TASK_NAME]`")
else: else:
TF_CHECKPOINT = sys.argv[2] TF_CHECKPOINT = sys.argv[2]
TF_CONFIG = sys.argv[3] TF_CONFIG = sys.argv[3]
...@@ -113,11 +114,11 @@ def main(): ...@@ -113,11 +114,11 @@ def main():
PYTORCH_DUMP_OUTPUT, PYTORCH_DUMP_OUTPUT,
FINETUNING_TASK) FINETUNING_TASK)
elif sys.argv[1] == "xlm": elif sys.argv[1] == "xlm":
from .convert_xlm_checkpoint_to_pytorch import convert_xlm_checkpoint_to_pytorch from .convert_xlm_original_pytorch_checkpoint_to_pytorch import convert_xlm_checkpoint_to_pytorch
if len(sys.argv) != 4: if len(sys.argv) != 4:
# pylint: disable=line-too-long # pylint: disable=line-too-long
print("Should be used as `pytorch_transformers xlm XLM_CHECKPOINT_PATH PYTORCH_DUMP_OUTPUT`") print("Should be used as `transformers xlm XLM_CHECKPOINT_PATH PYTORCH_DUMP_OUTPUT`")
else: else:
XLM_CHECKPOINT_PATH = sys.argv[2] XLM_CHECKPOINT_PATH = sys.argv[2]
PYTORCH_DUMP_OUTPUT = sys.argv[3] PYTORCH_DUMP_OUTPUT = sys.argv[3]
......
...@@ -31,7 +31,7 @@ logger = logging.getLogger(__name__) ...@@ -31,7 +31,7 @@ logger = logging.getLogger(__name__)
class AutoConfig(object): class AutoConfig(object):
r""":class:`~pytorch_transformers.AutoConfig` is a generic configuration class r""":class:`~transformers.AutoConfig` is a generic configuration class
that will be instantiated as one of the configuration classes of the library that will be instantiated as one of the configuration classes of the library
when created with the `AutoConfig.from_pretrained(pretrained_model_name_or_path)` when created with the `AutoConfig.from_pretrained(pretrained_model_name_or_path)`
class method. class method.
...@@ -76,7 +76,7 @@ class AutoConfig(object): ...@@ -76,7 +76,7 @@ class AutoConfig(object):
pretrained_model_name_or_path: either: pretrained_model_name_or_path: either:
- a string with the `shortcut name` of a pre-trained model configuration to load from cache or download, e.g.: ``bert-base-uncased``. - a string with the `shortcut name` of a pre-trained model configuration to load from cache or download, e.g.: ``bert-base-uncased``.
- a path to a `directory` containing a configuration file saved using the :func:`~pytorch_transformers.PretrainedConfig.save_pretrained` method, e.g.: ``./my_model_directory/``. - a path to a `directory` containing a configuration file saved using the :func:`~transformers.PretrainedConfig.save_pretrained` method, e.g.: ``./my_model_directory/``.
- a path or url to a saved configuration JSON `file`, e.g.: ``./my_model_directory/configuration.json``. - a path or url to a saved configuration JSON `file`, e.g.: ``./my_model_directory/configuration.json``.
cache_dir: (`optional`) string: cache_dir: (`optional`) string:
......
...@@ -45,7 +45,7 @@ BERT_PRETRAINED_CONFIG_ARCHIVE_MAP = { ...@@ -45,7 +45,7 @@ BERT_PRETRAINED_CONFIG_ARCHIVE_MAP = {
class BertConfig(PretrainedConfig): class BertConfig(PretrainedConfig):
r""" r"""
:class:`~pytorch_transformers.BertConfig` is the configuration class to store the configuration of a :class:`~transformers.BertConfig` is the configuration class to store the configuration of a
`BertModel`. `BertModel`.
...@@ -58,7 +58,7 @@ class BertConfig(PretrainedConfig): ...@@ -58,7 +58,7 @@ class BertConfig(PretrainedConfig):
intermediate_size: The size of the "intermediate" (i.e., feed-forward) intermediate_size: The size of the "intermediate" (i.e., feed-forward)
layer in the Transformer encoder. layer in the Transformer encoder.
hidden_act: The non-linear activation function (function or string) in the hidden_act: The non-linear activation function (function or string) in the
encoder and pooler. If string, "gelu", "relu" and "swish" are supported. encoder and pooler. If string, "gelu", "relu", "swish" and "gelu_new" are supported.
hidden_dropout_prob: The dropout probabilitiy for all fully connected hidden_dropout_prob: The dropout probabilitiy for all fully connected
layers in the embeddings, encoder, and pooler. layers in the embeddings, encoder, and pooler.
attention_probs_dropout_prob: The dropout ratio for the attention attention_probs_dropout_prob: The dropout ratio for the attention
......
...@@ -37,7 +37,7 @@ class DistilBertConfig(PretrainedConfig): ...@@ -37,7 +37,7 @@ class DistilBertConfig(PretrainedConfig):
def __init__(self, def __init__(self,
vocab_size_or_config_json_file=30522, vocab_size_or_config_json_file=30522,
max_position_embeddings=512, max_position_embeddings=512,
sinusoidal_pos_embds=True, sinusoidal_pos_embds=False,
n_layers=6, n_layers=6,
n_heads=12, n_heads=12,
dim=768, dim=768,
......
...@@ -28,7 +28,8 @@ logger = logging.getLogger(__name__) ...@@ -28,7 +28,8 @@ logger = logging.getLogger(__name__)
GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP = {"gpt2": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-config.json", GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP = {"gpt2": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-config.json",
"gpt2-medium": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-medium-config.json", "gpt2-medium": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-medium-config.json",
"gpt2-large": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-large-config.json"} "gpt2-large": "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-large-config.json",
"distilgpt2": "https://s3.amazonaws.com/models.huggingface.co/bert/distilgpt2-config.json",}
class GPT2Config(PretrainedConfig): class GPT2Config(PretrainedConfig):
"""Configuration class to store the configuration of a `GPT2Model`. """Configuration class to store the configuration of a `GPT2Model`.
......
...@@ -36,7 +36,6 @@ class OpenAIGPTConfig(PretrainedConfig): ...@@ -36,7 +36,6 @@ class OpenAIGPTConfig(PretrainedConfig):
Args: Args:
vocab_size_or_config_json_file: Vocabulary size of `inputs_ids` in `OpenAIGPTModel` or a configuration json file. vocab_size_or_config_json_file: Vocabulary size of `inputs_ids` in `OpenAIGPTModel` or a configuration json file.
n_special: The number of special tokens to learn during fine-tuning ('[SEP]', '[CLF]', ...)
n_positions: Number of positional embeddings. n_positions: Number of positional embeddings.
n_ctx: Size of the causal mask (usually same as n_positions). n_ctx: Size of the causal mask (usually same as n_positions).
n_embd: Dimensionality of the embeddings and hidden states. n_embd: Dimensionality of the embeddings and hidden states.
......
...@@ -95,10 +95,43 @@ class TransfoXLConfig(PretrainedConfig): ...@@ -95,10 +95,43 @@ class TransfoXLConfig(PretrainedConfig):
init_range=0.01, init_range=0.01,
proj_init_std=0.01, proj_init_std=0.01,
init_std=0.02, init_std=0.02,
layer_norm_epsilon=1e-5,
**kwargs): **kwargs):
"""Constructs TransfoXLConfig. """Constructs TransfoXLConfig.
""" """
super(TransfoXLConfig, self).__init__(**kwargs) super(TransfoXLConfig, self).__init__(**kwargs)
self.n_token = vocab_size_or_config_json_file if isinstance(vocab_size_or_config_json_file, int) else -1
self.cutoffs = []
self.cutoffs.extend(cutoffs)
self.tie_weight = tie_weight
if proj_share_all_but_first:
self.tie_projs = [False] + [True] * len(self.cutoffs)
else:
self.tie_projs = [False] + [False] * len(self.cutoffs)
self.d_model = d_model
self.d_embed = d_embed
self.d_head = d_head
self.d_inner = d_inner
self.div_val = div_val
self.pre_lnorm = pre_lnorm
self.n_layer = n_layer
self.n_head = n_head
self.tgt_len = tgt_len
self.ext_len = ext_len
self.mem_len = mem_len
self.same_length = same_length
self.attn_type = attn_type
self.clamp_len = clamp_len
self.sample_softmax = sample_softmax
self.adaptive = adaptive
self.dropout = dropout
self.dropatt = dropatt
self.untie_r = untie_r
self.init = init
self.init_range = init_range
self.proj_init_std = proj_init_std
self.init_std = init_std
self.layer_norm_epsilon = layer_norm_epsilon
if isinstance(vocab_size_or_config_json_file, str) or (sys.version_info[0] == 2 if isinstance(vocab_size_or_config_json_file, str) or (sys.version_info[0] == 2
and isinstance(vocab_size_or_config_json_file, unicode)): and isinstance(vocab_size_or_config_json_file, unicode)):
...@@ -106,39 +139,7 @@ class TransfoXLConfig(PretrainedConfig): ...@@ -106,39 +139,7 @@ class TransfoXLConfig(PretrainedConfig):
json_config = json.loads(reader.read()) json_config = json.loads(reader.read())
for key, value in json_config.items(): for key, value in json_config.items():
self.__dict__[key] = value self.__dict__[key] = value
elif isinstance(vocab_size_or_config_json_file, int): elif not isinstance(vocab_size_or_config_json_file, int):
self.n_token = vocab_size_or_config_json_file
self.cutoffs = []
self.cutoffs.extend(cutoffs)
self.tie_weight = tie_weight
if proj_share_all_but_first:
self.tie_projs = [False] + [True] * len(self.cutoffs)
else:
self.tie_projs = [False] + [False] * len(self.cutoffs)
self.d_model = d_model
self.d_embed = d_embed
self.d_head = d_head
self.d_inner = d_inner
self.div_val = div_val
self.pre_lnorm = pre_lnorm
self.n_layer = n_layer
self.n_head = n_head
self.tgt_len = tgt_len
self.ext_len = ext_len
self.mem_len = mem_len
self.same_length = same_length
self.attn_type = attn_type
self.clamp_len = clamp_len
self.sample_softmax = sample_softmax
self.adaptive = adaptive
self.dropout = dropout
self.dropatt = dropatt
self.untie_r = untie_r
self.init = init
self.init_range = init_range
self.proj_init_std = proj_init_std
self.init_std = init_std
else:
raise ValueError("First argument must be either a vocabulary size (int)" raise ValueError("First argument must be either a vocabulary size (int)"
" or the path to a pretrained model config file (str)") " or the path to a pretrained model config file (str)")
......
...@@ -54,11 +54,12 @@ class PretrainedConfig(object): ...@@ -54,11 +54,12 @@ class PretrainedConfig(object):
self.output_attentions = kwargs.pop('output_attentions', False) self.output_attentions = kwargs.pop('output_attentions', False)
self.output_hidden_states = kwargs.pop('output_hidden_states', False) self.output_hidden_states = kwargs.pop('output_hidden_states', False)
self.torchscript = kwargs.pop('torchscript', False) self.torchscript = kwargs.pop('torchscript', False)
self.use_bfloat16 = kwargs.pop('use_bfloat16', False)
self.pruned_heads = kwargs.pop('pruned_heads', {}) self.pruned_heads = kwargs.pop('pruned_heads', {})
def save_pretrained(self, save_directory): def save_pretrained(self, save_directory):
""" Save a configuration object to the directory `save_directory`, so that it """ Save a configuration object to the directory `save_directory`, so that it
can be re-loaded using the :func:`~pytorch_transformers.PretrainedConfig.from_pretrained` class method. can be re-loaded using the :func:`~transformers.PretrainedConfig.from_pretrained` class method.
""" """
assert os.path.isdir(save_directory), "Saving path should be a directory where the model and configuration can be saved" assert os.path.isdir(save_directory), "Saving path should be a directory where the model and configuration can be saved"
...@@ -66,16 +67,17 @@ class PretrainedConfig(object): ...@@ -66,16 +67,17 @@ class PretrainedConfig(object):
output_config_file = os.path.join(save_directory, CONFIG_NAME) output_config_file = os.path.join(save_directory, CONFIG_NAME)
self.to_json_file(output_config_file) self.to_json_file(output_config_file)
logger.info("Configuration saved in {}".format(output_config_file))
@classmethod @classmethod
def from_pretrained(cls, pretrained_model_name_or_path, **kwargs): def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
r""" Instantiate a :class:`~pytorch_transformers.PretrainedConfig` (or a derived class) from a pre-trained model configuration. r""" Instantiate a :class:`~transformers.PretrainedConfig` (or a derived class) from a pre-trained model configuration.
Parameters: Parameters:
pretrained_model_name_or_path: either: pretrained_model_name_or_path: either:
- a string with the `shortcut name` of a pre-trained model configuration to load from cache or download, e.g.: ``bert-base-uncased``. - a string with the `shortcut name` of a pre-trained model configuration to load from cache or download, e.g.: ``bert-base-uncased``.
- a path to a `directory` containing a configuration file saved using the :func:`~pytorch_transformers.PretrainedConfig.save_pretrained` method, e.g.: ``./my_model_directory/``. - a path to a `directory` containing a configuration file saved using the :func:`~transformers.PretrainedConfig.save_pretrained` method, e.g.: ``./my_model_directory/``.
- a path or url to a saved configuration JSON `file`, e.g.: ``./my_model_directory/configuration.json``. - a path or url to a saved configuration JSON `file`, e.g.: ``./my_model_directory/configuration.json``.
cache_dir: (`optional`) string: cache_dir: (`optional`) string:
...@@ -174,7 +176,7 @@ class PretrainedConfig(object): ...@@ -174,7 +176,7 @@ class PretrainedConfig(object):
"""Constructs a `Config` from a Python dictionary of parameters.""" """Constructs a `Config` from a Python dictionary of parameters."""
config = cls(vocab_size_or_config_json_file=-1) config = cls(vocab_size_or_config_json_file=-1)
for key, value in json_object.items(): for key, value in json_object.items():
config.__dict__[key] = value setattr(config, key, value)
return config return config
@classmethod @classmethod
......
...@@ -56,8 +56,6 @@ class XLMConfig(PretrainedConfig): ...@@ -56,8 +56,6 @@ class XLMConfig(PretrainedConfig):
dropout: The dropout probabilitiy for all fully connected dropout: The dropout probabilitiy for all fully connected
layers in the embeddings, encoder, and pooler. layers in the embeddings, encoder, and pooler.
dropatt: The dropout ratio for the attention
probabilities.
max_position_embeddings: The maximum sequence length that this model might max_position_embeddings: The maximum sequence length that this model might
ever be used with. Typically set this to something large just in case ever be used with. Typically set this to something large just in case
(e.g., 512 or 1024 or 2048). (e.g., 512 or 1024 or 2048).
...@@ -66,7 +64,6 @@ class XLMConfig(PretrainedConfig): ...@@ -66,7 +64,6 @@ class XLMConfig(PretrainedConfig):
layer_norm_eps: The epsilon used by LayerNorm. layer_norm_eps: The epsilon used by LayerNorm.
dropout: float, dropout rate. dropout: float, dropout rate.
dropatt: float, dropout rate on attention probabilities.
init: str, the initialization scheme, either "normal" or "uniform". init: str, the initialization scheme, either "normal" or "uniform".
init_range: float, initialize the parameters with a uniform distribution init_range: float, initialize the parameters with a uniform distribution
in [-init_range, init_range]. Only effective when init="uniform". in [-init_range, init_range]. Only effective when init="uniform".
......
...@@ -49,14 +49,11 @@ class XLNetConfig(PretrainedConfig): ...@@ -49,14 +49,11 @@ class XLNetConfig(PretrainedConfig):
dropout: The dropout probabilitiy for all fully connected dropout: The dropout probabilitiy for all fully connected
layers in the embeddings, encoder, and pooler. layers in the embeddings, encoder, and pooler.
dropatt: The dropout ratio for the attention
probabilities.
initializer_range: The sttdev of the truncated_normal_initializer for initializer_range: The sttdev of the truncated_normal_initializer for
initializing all weight matrices. initializing all weight matrices.
layer_norm_eps: The epsilon used by LayerNorm. layer_norm_eps: The epsilon used by LayerNorm.
dropout: float, dropout rate. dropout: float, dropout rate.
dropatt: float, dropout rate on attention probabilities.
init: str, the initialization scheme, either "normal" or "uniform". init: str, the initialization scheme, either "normal" or "uniform".
init_range: float, initialize the parameters with a uniform distribution init_range: float, initialize the parameters with a uniform distribution
in [-init_range, init_range]. Only effective when init="uniform". in [-init_range, init_range]. Only effective when init="uniform".
...@@ -80,6 +77,7 @@ class XLNetConfig(PretrainedConfig): ...@@ -80,6 +77,7 @@ class XLNetConfig(PretrainedConfig):
n_layer=24, n_layer=24,
n_head=16, n_head=16,
d_inner=4096, d_inner=4096,
max_position_embeddings=512,
ff_activation="gelu", ff_activation="gelu",
untie_r=True, untie_r=True,
attn_type="bi", attn_type="bi",
...@@ -112,7 +110,7 @@ class XLNetConfig(PretrainedConfig): ...@@ -112,7 +110,7 @@ class XLNetConfig(PretrainedConfig):
with open(vocab_size_or_config_json_file, "r", encoding='utf-8') as reader: with open(vocab_size_or_config_json_file, "r", encoding='utf-8') as reader:
json_config = json.loads(reader.read()) json_config = json.loads(reader.read())
for key, value in json_config.items(): for key, value in json_config.items():
self.__dict__[key] = value setattr(config, key, value)
elif isinstance(vocab_size_or_config_json_file, int): elif isinstance(vocab_size_or_config_json_file, int):
self.n_token = vocab_size_or_config_json_file self.n_token = vocab_size_or_config_json_file
self.d_model = d_model self.d_model = d_model
......
...@@ -21,7 +21,7 @@ from __future__ import print_function ...@@ -21,7 +21,7 @@ from __future__ import print_function
import argparse import argparse
import torch import torch
from pytorch_transformers import BertConfig, BertForPreTraining, load_tf_weights_in_bert from transformers import BertConfig, BertForPreTraining, load_tf_weights_in_bert
import logging import logging
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
......
...@@ -20,7 +20,7 @@ import argparse ...@@ -20,7 +20,7 @@ import argparse
import torch import torch
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
from pytorch_transformers import BertModel from transformers import BertModel
def convert_pytorch_checkpoint_to_tf(model:BertModel, ckpt_dir:str, model_name:str): def convert_pytorch_checkpoint_to_tf(model:BertModel, ckpt_dir:str, model_name:str):
......
...@@ -21,7 +21,7 @@ from io import open ...@@ -21,7 +21,7 @@ from io import open
import torch import torch
from pytorch_transformers import (CONFIG_NAME, WEIGHTS_NAME, from transformers import (CONFIG_NAME, WEIGHTS_NAME,
GPT2Config, GPT2Config,
GPT2Model, GPT2Model,
load_tf_weights_in_gpt2) load_tf_weights_in_gpt2)
......
...@@ -21,7 +21,7 @@ from io import open ...@@ -21,7 +21,7 @@ from io import open
import torch import torch
from pytorch_transformers import (CONFIG_NAME, WEIGHTS_NAME, from transformers import (CONFIG_NAME, WEIGHTS_NAME,
OpenAIGPTConfig, OpenAIGPTConfig,
OpenAIGPTModel, OpenAIGPTModel,
load_tf_weights_in_openai_gpt) load_tf_weights_in_openai_gpt)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment