"...lm-evaluation-harness.git" did not exist on "7a76696e3aad5bcbf51657fa4ece8c1315179dc6"
Commit d7395789 authored by danai-antoniou's avatar danai-antoniou
Browse files

Merge branch 'master' of...

Merge branch 'master' of https://github.com/danai-antoniou/pytorch-transformers into add-duplicate-tokens-error
parents 2e6797cc 391db836
......@@ -12,5 +12,5 @@ The base class ``PreTrainedTokenizer`` implements the common methods for loading
``PreTrainedTokenizer``
~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.PreTrainedTokenizer
.. autoclass:: transformers.PreTrainedTokenizer
:members:
# Migrating from pytorch-pretrained-bert
Here is a quick summary of what you should take care of when migrating from `pytorch-pretrained-bert` to `pytorch-transformers`
Here is a quick summary of what you should take care of when migrating from `pytorch-pretrained-bert` to `transformers`
### Models always output `tuples`
The main breaking change when migrating from `pytorch-pretrained-bert` to `pytorch-transformers` is that the models forward method always outputs a `tuple` with various elements depending on the model and the configuration parameters.
The main breaking change when migrating from `pytorch-pretrained-bert` to `transformers` is that the models forward method always outputs a `tuple` with various elements depending on the model and the configuration parameters.
The exact content of the tuples for each model are detailled in the models' docstrings and the [documentation](https://huggingface.co/pytorch-transformers/).
The exact content of the tuples for each model are detailled in the models' docstrings and the [documentation](https://huggingface.co/transformers/).
In pretty much every case, you will be fine by taking the first element of the output as the output you previously used in `pytorch-pretrained-bert`.
Here is a `pytorch-pretrained-bert` to `pytorch-transformers` conversion example for a `BertForSequenceClassification` classification model:
Here is a `pytorch-pretrained-bert` to `transformers` conversion example for a `BertForSequenceClassification` classification model:
```python
# Let's load our model
......@@ -20,11 +20,11 @@ model = BertForSequenceClassification.from_pretrained('bert-base-uncased')
# If you used to have this line in pytorch-pretrained-bert:
loss = model(input_ids, labels=labels)
# Now just use this line in pytorch-transformers to extract the loss from the output tuple:
# Now just use this line in transformers to extract the loss from the output tuple:
outputs = model(input_ids, labels=labels)
loss = outputs[0]
# In pytorch-transformers you can also have access to the logits:
# In transformers you can also have access to the logits:
loss, logits = outputs[:2]
# And even the attention weigths if you configure the model to output them (and other outputs too, see the docstrings and documentation)
......@@ -96,7 +96,7 @@ for batch in train_data:
loss.backward()
optimizer.step()
### In PyTorch-Transformers, optimizer and schedules are splitted and instantiated like this:
### In Transformers, optimizer and schedules are splitted and instantiated like this:
optimizer = AdamW(model.parameters(), lr=lr, correct_bias=False) # To reproduce BertAdam specific behavior set correct_bias=False
scheduler = WarmupLinearSchedule(optimizer, warmup_steps=num_warmup_steps, t_total=num_total_steps) # PyTorch scheduler
### and used like this:
......
......@@ -11,19 +11,19 @@ Instantiating one of ``AutoModel``, ``AutoConfig`` and ``AutoTokenizer`` will di
``AutoConfig``
~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.AutoConfig
.. autoclass:: transformers.AutoConfig
:members:
``AutoModel``
~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.AutoModel
.. autoclass:: transformers.AutoModel
:members:
``AutoTokenizer``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.AutoTokenizer
.. autoclass:: transformers.AutoTokenizer
:members:
......@@ -4,69 +4,125 @@ BERT
``BertConfig``
~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.BertConfig
.. autoclass:: transformers.BertConfig
:members:
``BertTokenizer``
~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.BertTokenizer
.. autoclass:: transformers.BertTokenizer
:members:
``BertModel``
~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.BertModel
.. autoclass:: transformers.BertModel
:members:
``BertForPreTraining``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.BertForPreTraining
.. autoclass:: transformers.BertForPreTraining
:members:
``BertForMaskedLM``
~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.BertForMaskedLM
.. autoclass:: transformers.BertForMaskedLM
:members:
``BertForNextSentencePrediction``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.BertForNextSentencePrediction
.. autoclass:: transformers.BertForNextSentencePrediction
:members:
``BertForSequenceClassification``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.BertForSequenceClassification
.. autoclass:: transformers.BertForSequenceClassification
:members:
``BertForMultipleChoice``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.BertForMultipleChoice
.. autoclass:: transformers.BertForMultipleChoice
:members:
``BertForTokenClassification``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.BertForTokenClassification
.. autoclass:: transformers.BertForTokenClassification
:members:
``BertForQuestionAnswering``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.BertForQuestionAnswering
.. autoclass:: transformers.BertForQuestionAnswering
:members:
``TFBertModel``
~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFBertModel
:members:
``TFBertForPreTraining``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFBertForPreTraining
:members:
``TFBertForMaskedLM``
~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFBertForMaskedLM
:members:
``TFBertForNextSentencePrediction``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFBertForNextSentencePrediction
:members:
``TFBertForSequenceClassification``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFBertForSequenceClassification
:members:
``TFBertForMultipleChoice``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFBertForMultipleChoice
:members:
``TFBertForTokenClassification``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFBertForTokenClassification
:members:
``TFBertForQuestionAnswering``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFBertForQuestionAnswering
:members:
......@@ -4,40 +4,67 @@ DistilBERT
``DistilBertConfig``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.DistilBertConfig
.. autoclass:: transformers.DistilBertConfig
:members:
``DistilBertTokenizer``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.DistilBertTokenizer
.. autoclass:: transformers.DistilBertTokenizer
:members:
``DistilBertModel``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.DistilBertModel
.. autoclass:: transformers.DistilBertModel
:members:
``DistilBertForMaskedLM``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.DistilBertForMaskedLM
.. autoclass:: transformers.DistilBertForMaskedLM
:members:
``DistilBertForSequenceClassification``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.DistilBertForSequenceClassification
.. autoclass:: transformers.DistilBertForSequenceClassification
:members:
``DistilBertForQuestionAnswering``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.DistilBertForQuestionAnswering
.. autoclass:: transformers.DistilBertForQuestionAnswering
:members:
``TFDistilBertModel``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFDistilBertModel
:members:
``TFDistilBertForMaskedLM``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFDistilBertForMaskedLM
:members:
``TFDistilBertForSequenceClassification``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFDistilBertForSequenceClassification
:members:
``TFDistilBertForQuestionAnswering``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFDistilBertForQuestionAnswering
:members:
......@@ -4,33 +4,54 @@ OpenAI GPT
``OpenAIGPTConfig``
~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.OpenAIGPTConfig
.. autoclass:: transformers.OpenAIGPTConfig
:members:
``OpenAIGPTTokenizer``
~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.OpenAIGPTTokenizer
.. autoclass:: transformers.OpenAIGPTTokenizer
:members:
``OpenAIGPTModel``
~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.OpenAIGPTModel
.. autoclass:: transformers.OpenAIGPTModel
:members:
``OpenAIGPTLMHeadModel``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.OpenAIGPTLMHeadModel
.. autoclass:: transformers.OpenAIGPTLMHeadModel
:members:
``OpenAIGPTDoubleHeadsModel``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.OpenAIGPTDoubleHeadsModel
.. autoclass:: transformers.OpenAIGPTDoubleHeadsModel
:members:
``TFOpenAIGPTModel``
~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFOpenAIGPTModel
:members:
``TFOpenAIGPTLMHeadModel``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFOpenAIGPTLMHeadModel
:members:
``TFOpenAIGPTDoubleHeadsModel``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFOpenAIGPTDoubleHeadsModel
:members:
......@@ -4,33 +4,54 @@ OpenAI GPT2
``GPT2Config``
~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.GPT2Config
.. autoclass:: transformers.GPT2Config
:members:
``GPT2Tokenizer``
~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.GPT2Tokenizer
.. autoclass:: transformers.GPT2Tokenizer
:members:
``GPT2Model``
~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.GPT2Model
.. autoclass:: transformers.GPT2Model
:members:
``GPT2LMHeadModel``
~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.GPT2LMHeadModel
.. autoclass:: transformers.GPT2LMHeadModel
:members:
``GPT2DoubleHeadsModel``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.GPT2DoubleHeadsModel
.. autoclass:: transformers.GPT2DoubleHeadsModel
:members:
``TFGPT2Model``
~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFGPT2Model
:members:
``TFGPT2LMHeadModel``
~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFGPT2LMHeadModel
:members:
``TFGPT2DoubleHeadsModel``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFGPT2DoubleHeadsModel
:members:
......@@ -4,33 +4,54 @@ RoBERTa
``RobertaConfig``
~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.RobertaConfig
.. autoclass:: transformers.RobertaConfig
:members:
``RobertaTokenizer``
~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.RobertaTokenizer
.. autoclass:: transformers.RobertaTokenizer
:members:
``RobertaModel``
~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.RobertaModel
.. autoclass:: transformers.RobertaModel
:members:
``RobertaForMaskedLM``
~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.RobertaForMaskedLM
.. autoclass:: transformers.RobertaForMaskedLM
:members:
``RobertaForSequenceClassification``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.RobertaForSequenceClassification
.. autoclass:: transformers.RobertaForSequenceClassification
:members:
``TFRobertaModel``
~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFRobertaModel
:members:
``TFRobertaForMaskedLM``
~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFRobertaForMaskedLM
:members:
``TFRobertaForSequenceClassification``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFRobertaForSequenceClassification
:members:
......@@ -5,26 +5,40 @@ Transformer XL
``TransfoXLConfig``
~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.TransfoXLConfig
.. autoclass:: transformers.TransfoXLConfig
:members:
``TransfoXLTokenizer``
~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.TransfoXLTokenizer
.. autoclass:: transformers.TransfoXLTokenizer
:members:
``TransfoXLModel``
~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.TransfoXLModel
.. autoclass:: transformers.TransfoXLModel
:members:
``TransfoXLLMHeadModel``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.TransfoXLLMHeadModel
.. autoclass:: transformers.TransfoXLLMHeadModel
:members:
``TFTransfoXLModel``
~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFTransfoXLModel
:members:
``TFTransfoXLLMHeadModel``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFTransfoXLLMHeadModel
:members:
......@@ -4,38 +4,66 @@ XLM
``XLMConfig``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.XLMConfig
.. autoclass:: transformers.XLMConfig
:members:
``XLMTokenizer``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.XLMTokenizer
.. autoclass:: transformers.XLMTokenizer
:members:
``XLMModel``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.XLMModel
.. autoclass:: transformers.XLMModel
:members:
``XLMWithLMHeadModel``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.XLMWithLMHeadModel
.. autoclass:: transformers.XLMWithLMHeadModel
:members:
``XLMForSequenceClassification``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.XLMForSequenceClassification
.. autoclass:: transformers.XLMForSequenceClassification
:members:
``XLMForQuestionAnswering``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.XLMForQuestionAnswering
.. autoclass:: transformers.XLMForQuestionAnswering
:members:
``TFXLMModel``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFXLMModel
:members:
``TFXLMWithLMHeadModel``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFXLMWithLMHeadModel
:members:
``TFXLMForSequenceClassification``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFXLMForSequenceClassification
:members:
``TFXLMForQuestionAnsweringSimple``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFXLMForQuestionAnsweringSimple
:members:
......@@ -4,40 +4,68 @@ XLNet
``XLNetConfig``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.XLNetConfig
.. autoclass:: transformers.XLNetConfig
:members:
``XLNetTokenizer``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.XLNetTokenizer
.. autoclass:: transformers.XLNetTokenizer
:members:
``XLNetModel``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.XLNetModel
.. autoclass:: transformers.XLNetModel
:members:
``XLNetLMHeadModel``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.XLNetLMHeadModel
.. autoclass:: transformers.XLNetLMHeadModel
:members:
``XLNetForSequenceClassification``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.XLNetForSequenceClassification
.. autoclass:: transformers.XLNetForSequenceClassification
:members:
``XLNetForQuestionAnswering``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.XLNetForQuestionAnswering
.. autoclass:: transformers.XLNetForQuestionAnswering
:members:
``TFXLNetModel``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFXLNetModel
:members:
``TFXLNetLMHeadModel``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFXLNetLMHeadModel
:members:
``TFXLNetForSequenceClassification``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFXLNetForSequenceClassification
:members:
``TFXLNetForQuestionAnsweringSimple``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFXLNetForQuestionAnsweringSimple
:members:
Notebooks
================================================
We include `three Jupyter Notebooks <https://github.com/huggingface/pytorch-transformers/tree/master/notebooks>`_ that can be used to check that the predictions of the PyTorch model are identical to the predictions of the original TensorFlow model.
We include `three Jupyter Notebooks <https://github.com/huggingface/transformers/tree/master/notebooks>`_ that can be used to check that the predictions of the PyTorch model are identical to the predictions of the original TensorFlow model.
*
The first NoteBook (\ `Comparing-TF-and-PT-models.ipynb <https://github.com/huggingface/pytorch-transformers/blob/master/notebooks/Comparing-TF-and-PT-models.ipynb>`_\ ) extracts the hidden states of a full sequence on each layers of the TensorFlow and the PyTorch models and computes the standard deviation between them. In the given example, we get a standard deviation of 1.5e-7 to 9e-7 on the various hidden state of the models.
The first NoteBook (\ `Comparing-TF-and-PT-models.ipynb <https://github.com/huggingface/transformers/blob/master/notebooks/Comparing-TF-and-PT-models.ipynb>`_\ ) extracts the hidden states of a full sequence on each layers of the TensorFlow and the PyTorch models and computes the standard deviation between them. In the given example, we get a standard deviation of 1.5e-7 to 9e-7 on the various hidden state of the models.
*
The second NoteBook (\ `Comparing-TF-and-PT-models-SQuAD.ipynb <https://github.com/huggingface/pytorch-transformers/blob/master/notebooks/Comparing-TF-and-PT-models-SQuAD.ipynb>`_\ ) compares the loss computed by the TensorFlow and the PyTorch models for identical initialization of the fine-tuning layer of the ``BertForQuestionAnswering`` and computes the standard deviation between them. In the given example, we get a standard deviation of 2.5e-7 between the models.
The second NoteBook (\ `Comparing-TF-and-PT-models-SQuAD.ipynb <https://github.com/huggingface/transformers/blob/master/notebooks/Comparing-TF-and-PT-models-SQuAD.ipynb>`_\ ) compares the loss computed by the TensorFlow and the PyTorch models for identical initialization of the fine-tuning layer of the ``BertForQuestionAnswering`` and computes the standard deviation between them. In the given example, we get a standard deviation of 2.5e-7 between the models.
*
The third NoteBook (\ `Comparing-TF-and-PT-models-MLM-NSP.ipynb <https://github.com/huggingface/pytorch-transformers/blob/master/notebooks/Comparing-TF-and-PT-models-MLM-NSP.ipynb>`_\ ) compares the predictions computed by the TensorFlow and the PyTorch models for masked token language modeling using the pre-trained masked language modeling model.
The third NoteBook (\ `Comparing-TF-and-PT-models-MLM-NSP.ipynb <https://github.com/huggingface/transformers/blob/master/notebooks/Comparing-TF-and-PT-models-MLM-NSP.ipynb>`_\ ) compares the predictions computed by the TensorFlow and the PyTorch models for masked token language modeling using the pre-trained masked language modeling model.
Please follow the instructions given in the notebooks to run and modify them.
......@@ -44,15 +44,15 @@ Here is the full list of the currently provided pretrained models together with
| +------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+
| | ``bert-large-uncased-whole-word-masking-finetuned-squad`` | | 24-layer, 1024-hidden, 16-heads, 340M parameters. |
| | | | The ``bert-large-uncased-whole-word-masking`` model fine-tuned on SQuAD |
| | | (see details of fine-tuning in the `example section <https://github.com/huggingface/pytorch-transformers/tree/master/examples>`__). |
| | | (see details of fine-tuning in the `example section <https://github.com/huggingface/transformers/tree/master/examples>`__). |
| +------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+
| | ``bert-large-cased-whole-word-masking-finetuned-squad`` | | 24-layer, 1024-hidden, 16-heads, 340M parameters |
| | | | The ``bert-large-cased-whole-word-masking`` model fine-tuned on SQuAD |
| | | (see `details of fine-tuning in the example section <https://huggingface.co/pytorch-transformers/examples.html>`__) |
| | | (see `details of fine-tuning in the example section <https://huggingface.co/transformers/examples.html>`__) |
| +------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+
| | ``bert-base-cased-finetuned-mrpc`` | | 12-layer, 768-hidden, 12-heads, 110M parameters. |
| | | | The ``bert-base-cased`` model fine-tuned on MRPC |
| | | (see `details of fine-tuning in the example section <https://huggingface.co/pytorch-transformers/examples.html>`__) |
| | | (see `details of fine-tuning in the example section <https://huggingface.co/transformers/examples.html>`__) |
+-------------------+------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+
| GPT | ``openai-gpt`` | | 12-layer, 768-hidden, 12-heads, 110M parameters. |
| | | | OpenAI GPT English model |
......@@ -120,4 +120,4 @@ Here is the full list of the currently provided pretrained models together with
| | | (see `details <https://medium.com/huggingface/distilbert-8cf3380435b5>`__) |
+-------------------+------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+
.. <https://huggingface.co/pytorch-transformers/examples.html>`__
\ No newline at end of file
.. <https://huggingface.co/transformers/examples.html>`__
\ No newline at end of file
......@@ -2,7 +2,7 @@
## Philosophy
PyTorch-Transformers is an opinionated library built for NLP researchers seeking to use/study/extend large-scale transformers models.
Transformers is an opinionated library built for NLP researchers seeking to use/study/extend large-scale transformers models.
The library was designed with two strong goals in mind:
......@@ -19,12 +19,12 @@ The library was designed with two strong goals in mind:
A few other goals:
- expose the models internals as consistently as possible:
- expose the models' internals as consistently as possible:
- we give access, using a single API to the full hidden-states and attention weights,
- tokenizer and base model's API are standardized to easily switch between models.
- incorporate a subjective selection of promising tools for fine-tuning/investiguating these models:
- incorporate a subjective selection of promising tools for fine-tuning/investigating these models:
- a simple/consistent way to add new tokens to the vocabulary and embeddings for fine-tuning,
- simple ways to mask and prune transformer heads.
......@@ -33,13 +33,13 @@ A few other goals:
The library is build around three type of classes for each models:
- **model classes** which are PyTorch models (`torch.nn.Modules`) of the 6 models architectures currently provided in the library, e.g. `BertModel`
- **model classes** which are PyTorch models (`torch.nn.Modules`) of the 8 models architectures currently provided in the library, e.g. `BertModel`
- **configuration classes** which store all the parameters required to build a model, e.g. `BertConfig`. You don't always need to instantiate these your-self, in particular if you are using a pretrained model without any modification, creating the model will automatically take care of instantiating the configuration (which is part of the model)
- **tokenizer classes** which store the vocabulary for each model and provide methods for encoding/decoding strings in list of token embeddings indices to be fed to a model, e.g. `BertTokenizer`
All these classes can be instantiated from pretrained instances and saved locally using two methods:
- `from_pretrained()` let you instantiate a model/configuration/tokenizer from a pretrained version either provided by the library itself (currently 27 models are provided as listed [here](https://huggingface.co/pytorch-transformers/pretrained_models.html)) or stored locally (or on a server) by the user,
- `from_pretrained()` let you instantiate a model/configuration/tokenizer from a pretrained version either provided by the library itself (currently 27 models are provided as listed [here](https://huggingface.co/transformers/pretrained_models.html)) or stored locally (or on a server) by the user,
- `save_pretrained()` let you save a model/configuration/tokenizer locally so that it can be reloaded using `from_pretrained()`.
We'll finish this quickstart tour by going through a few simple quick-start examples to see how we can instantiate and use these classes. The rest of the documentation is organized in two parts:
......@@ -51,7 +51,7 @@ We'll finish this quickstart tour by going through a few simple quick-start exam
Here are two examples showcasing a few `Bert` and `GPT2` classes and pre-trained models.
See full API reference for examples for each model classe.
See full API reference for examples for each model class.
### BERT example
......@@ -59,7 +59,7 @@ Let's start by preparing a tokenized input (a list of token embeddings indices t
```python
import torch
from pytorch_transformers import BertTokenizer, BertModel, BertForMaskedLM
from transformers import BertTokenizer, BertModel, BertForMaskedLM
# OPTIONAL: if you want to have more information on what's happening under the hood, activate the logger as follows
import logging
......@@ -93,8 +93,8 @@ Let's see how we can use `BertModel` to encode our inputs in hidden-states:
# Load pre-trained model (weights)
model = BertModel.from_pretrained('bert-base-uncased')
# Set the model in evaluation mode to desactivate the DropOut modules
# This is IMPORTANT to have reproductible results during evaluation!
# Set the model in evaluation mode to deactivate the DropOut modules
# This is IMPORTANT to have reproducible results during evaluation!
model.eval()
# If you have a GPU, put everything on cuda
......@@ -106,7 +106,7 @@ model.to('cuda')
with torch.no_grad():
# See the models docstrings for the detail of the inputs
outputs = model(tokens_tensor, token_type_ids=segments_tensors)
# PyTorch-Transformers models always output tuples.
# Transformers models always output tuples.
# See the models docstrings for the detail of all the outputs
# In our case, the first element is the hidden state of the last layer of the Bert model
encoded_layers = outputs[0]
......@@ -145,7 +145,7 @@ First let's prepare a tokenized input from our text string using `GPT2Tokenizer`
```python
import torch
from pytorch_transformers import GPT2Tokenizer, GPT2LMHeadModel
from transformers import GPT2Tokenizer, GPT2LMHeadModel
# OPTIONAL: if you want to have more information on what's happening, activate the logger as follows
import logging
......@@ -168,8 +168,8 @@ Let's see how to use `GPT2LMHeadModel` to generate the next token following our
# Load pre-trained model (weights)
model = GPT2LMHeadModel.from_pretrained('gpt2')
# Set the model in evaluation mode to desactivate the DropOut modules
# This is IMPORTANT to have reproductible results during evaluation!
# Set the model in evaluation mode to deactivate the DropOut modules
# This is IMPORTANT to have reproducible results during evaluation!
model.eval()
# If you have a GPU, put everything on cuda
......
......@@ -45,7 +45,7 @@ where
* ``bert_config.json`` or ``openai_gpt_config.json`` a configuration file for the model, and
* ``pytorch_model.bin`` a PyTorch dump of a pre-trained instance of ``BertForPreTraining``\ , ``OpenAIGPTModel``\ , ``TransfoXLModel``\ , ``GPT2LMHeadModel`` (saved with the usual ``torch.save()``\ )
If ``PRE_TRAINED_MODEL_NAME_OR_PATH`` is a shortcut name, the pre-trained weights will be downloaded from AWS S3 (see the links `here <https://github.com/huggingface/pytorch-transformers/blob/master/pytorch_transformers/modeling_bert.py>`__\ ) and stored in a cache folder to avoid future download (the cache folder can be found at ``~/.pytorch_pretrained_bert/``\ ).
If ``PRE_TRAINED_MODEL_NAME_OR_PATH`` is a shortcut name, the pre-trained weights will be downloaded from AWS S3 (see the links `here <https://github.com/huggingface/transformers/blob/master/transformers/modeling_bert.py>`__\ ) and stored in a cache folder to avoid future download (the cache folder can be found at ``~/.pytorch_pretrained_bert/``\ ).
*
``cache_dir`` can be an optional path to a specific directory to download and cache the pre-trained model weights. This option is useful in particular when you are using distributed training: to avoid concurrent access to the same weights you can set for example ``cache_dir='./pretrained_model_{}'.format(args.local_rank)`` (see the section on distributed training for more information).
......@@ -122,7 +122,7 @@ Here is the recommended way of saving the model, configuration and vocabulary to
.. code-block:: python
from pytorch_transformers import WEIGHTS_NAME, CONFIG_NAME
from transformers import WEIGHTS_NAME, CONFIG_NAME
output_dir = "./models/"
......
......@@ -12,7 +12,7 @@ According to Pytorch's documentation: "TorchScript is a way to create serializab
Pytorch's two modules `JIT and TRACE <https://pytorch.org/docs/stable/jit.html>`_ allow the developer to export
their model to be re-used in other programs, such as efficiency-oriented C++ programs.
We have provided an interface that allows the export of `pytorch-transformers` models to TorchScript so that they can
We have provided an interface that allows the export of `transformers` models to TorchScript so that they can
be reused in a different environment than a Pytorch-based python program. Here we explain how to use our models so that
they can be exported, and what to be mindful of when using these models with TorchScript.
......@@ -74,7 +74,7 @@ according to a ``BertConfig`` class and then saved to disk under the filename ``
.. code-block:: python
from pytorch_transformers import BertModel, BertTokenizer, BertConfig
from transformers import BertModel, BertTokenizer, BertConfig
import torch
enc = BertTokenizer.from_pretrained("bert-base-uncased")
......
......@@ -13,7 +13,7 @@ similar API between the different models.
## Language model fine-tuning
Based on the script [`run_lm_finetuning.py`](https://github.com/huggingface/pytorch-transformers/blob/master/examples/run_lm_finetuning.py).
Based on the script [`run_lm_finetuning.py`](https://github.com/huggingface/transformers/blob/master/examples/run_lm_finetuning.py).
Fine-tuning the library models for language modeling on a text dataset for GPT, GPT-2, BERT and RoBERTa (DistilBERT
to be added soon). GPT and GPT-2 are fine-tuned using a causal language modeling (CLM) loss while BERT and RoBERTa
......@@ -75,7 +75,7 @@ python run_lm_finetuning.py \
## Language generation
Based on the script [`run_generation.py`](https://github.com/huggingface/pytorch-transformers/blob/master/examples/run_generation.py).
Based on the script [`run_generation.py`](https://github.com/huggingface/transformers/blob/master/examples/run_generation.py).
Conditional text generation using the auto-regressive models of the library: GPT, GPT-2, Transformer-XL and XLNet.
A similar script is used for our official demo [Write With Transfomer](https://transformer.huggingface.co), where you
......@@ -91,26 +91,26 @@ python run_generation.py \
## GLUE
Based on the script [`run_glue.py`](https://github.com/huggingface/pytorch-transformers/blob/master/examples/run_glue.py).
Based on the script [`run_glue.py`](https://github.com/huggingface/transformers/blob/master/examples/run_glue.py).
Fine-tuning the library models for sequence classification on the GLUE benchmark: [General Language Understanding
Evaluation](https://gluebenchmark.com/). This script can fine-tune the following models: BERT, XLM, XLNet and RoBERTa.
GLUE is made up of a total of 9 different tasks. We get the following results on the dev set of the benchmark with an
uncased BERT base model (the checkpoint `bert-base-uncased`). All experiments ran on 8 V100 GPUs with a total train
uncased BERT base model (the checkpoint `bert-base-uncased`). All experiments ran on 8 V100 GPUs with a total train
batch size of 24. Some of these tasks have a small dataset and training can lead to high variance in the results
between different runs. We report the median on 5 runs (with different seeds) for each of the metrics.
| Task | Metric | Result |
|-------|------------------------------|-------------|
| CoLA | Matthew's corr | 55.75 |
| SST-2 | Accuracy | 92.09 |
| MRPC | F1/Accuracy | 90.48/86.27 |
| STS-B | Person/Spearman corr. | 89.03/88.64 |
| QQP | Accuracy/F1 | 90.92/87.72 |
| MNLI | Matched acc./Mismatched acc. | 83.74/84.06 |
| QNLI | Accuracy | 91.07 |
| RTE | Accuracy | 68.59 |
| CoLA | Matthew's corr | 48.87 |
| SST-2 | Accuracy | 91.74 |
| MRPC | F1/Accuracy | 90.70/86.27 |
| STS-B | Person/Spearman corr. | 91.39/91.04 |
| QQP | Accuracy/F1 | 90.79/87.66 |
| MNLI | Matched acc./Mismatched acc. | 83.70/84.83 |
| QNLI | Accuracy | 89.31 |
| RTE | Accuracy | 71.43 |
| WNLI | Accuracy | 43.66 |
Some of these results are significantly different from the ones reported on the test set
......@@ -319,7 +319,7 @@ eval_loss = 0.44457291918821606
## SQuAD
Based on the script [`run_squad.py`](https://github.com/huggingface/pytorch-transformers/blob/master/examples/run_squad.py).
Based on the script [`run_squad.py`](https://github.com/huggingface/transformers/blob/master/examples/run_squad.py).
#### Fine-tuning on SQuAD
......
......@@ -39,7 +39,7 @@ import torch
from torch.utils.data import (DataLoader, RandomSampler, SequentialSampler,
TensorDataset)
from pytorch_transformers import (OpenAIGPTDoubleHeadsModel, OpenAIGPTTokenizer,
from transformers import (OpenAIGPTDoubleHeadsModel, OpenAIGPTTokenizer,
AdamW, cached_path, WEIGHTS_NAME, CONFIG_NAME,
WarmupLinearSchedule)
......
......@@ -35,10 +35,10 @@ from tqdm import tqdm, trange
from tensorboardX import SummaryWriter
from pytorch_transformers import (WEIGHTS_NAME, BertConfig,
from transformers import (WEIGHTS_NAME, BertConfig,
BertForMultipleChoice, BertTokenizer)
from pytorch_transformers import AdamW, WarmupLinearSchedule
from transformers import AdamW, WarmupLinearSchedule
logger = logging.getLogger(__name__)
......@@ -365,7 +365,7 @@ def train(args, train_dataset, model, tokenizer):
# inputs.update({'cls_index': batch[5],
# 'p_mask': batch[6]})
outputs = model(**inputs)
loss = outputs[0] # model outputs are always tuple in pytorch-transformers (see doc)
loss = outputs[0] # model outputs are always tuple in transformers (see doc)
if args.n_gpu > 1:
loss = loss.mean() # mean() to average on multi-gpu parallel (not distributed) training
......@@ -647,7 +647,7 @@ def main():
if args.eval_all_checkpoints:
checkpoints = list(os.path.dirname(c) for c in sorted(glob.glob(args.output_dir + '/**/' + WEIGHTS_NAME, recursive=True)))
logging.getLogger("pytorch_transformers.modeling_utils").setLevel(logging.WARN) # Reduce model loading logs
logging.getLogger("transformers.modeling_utils").setLevel(logging.WARN) # Reduce model loading logs
logger.info("Evaluate the following checkpoints: %s", checkpoints)
......
......@@ -28,7 +28,7 @@ import math
import torch
from pytorch_transformers import TransfoXLLMHeadModel, TransfoXLCorpus, TransfoXLTokenizer
from transformers import TransfoXLLMHeadModel, TransfoXLCorpus, TransfoXLTokenizer
logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s',
datefmt = '%m/%d/%Y %H:%M:%S',
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment