Commit d7395789 authored by danai-antoniou's avatar danai-antoniou
Browse files

Merge branch 'master' of...

Merge branch 'master' of https://github.com/danai-antoniou/pytorch-transformers into add-duplicate-tokens-error
parents 2e6797cc 391db836
...@@ -12,5 +12,5 @@ The base class ``PreTrainedTokenizer`` implements the common methods for loading ...@@ -12,5 +12,5 @@ The base class ``PreTrainedTokenizer`` implements the common methods for loading
``PreTrainedTokenizer`` ``PreTrainedTokenizer``
~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.PreTrainedTokenizer .. autoclass:: transformers.PreTrainedTokenizer
:members: :members:
# Migrating from pytorch-pretrained-bert # Migrating from pytorch-pretrained-bert
Here is a quick summary of what you should take care of when migrating from `pytorch-pretrained-bert` to `pytorch-transformers` Here is a quick summary of what you should take care of when migrating from `pytorch-pretrained-bert` to `transformers`
### Models always output `tuples` ### Models always output `tuples`
The main breaking change when migrating from `pytorch-pretrained-bert` to `pytorch-transformers` is that the models forward method always outputs a `tuple` with various elements depending on the model and the configuration parameters. The main breaking change when migrating from `pytorch-pretrained-bert` to `transformers` is that the models forward method always outputs a `tuple` with various elements depending on the model and the configuration parameters.
The exact content of the tuples for each model are detailled in the models' docstrings and the [documentation](https://huggingface.co/pytorch-transformers/). The exact content of the tuples for each model are detailled in the models' docstrings and the [documentation](https://huggingface.co/transformers/).
In pretty much every case, you will be fine by taking the first element of the output as the output you previously used in `pytorch-pretrained-bert`. In pretty much every case, you will be fine by taking the first element of the output as the output you previously used in `pytorch-pretrained-bert`.
Here is a `pytorch-pretrained-bert` to `pytorch-transformers` conversion example for a `BertForSequenceClassification` classification model: Here is a `pytorch-pretrained-bert` to `transformers` conversion example for a `BertForSequenceClassification` classification model:
```python ```python
# Let's load our model # Let's load our model
...@@ -20,11 +20,11 @@ model = BertForSequenceClassification.from_pretrained('bert-base-uncased') ...@@ -20,11 +20,11 @@ model = BertForSequenceClassification.from_pretrained('bert-base-uncased')
# If you used to have this line in pytorch-pretrained-bert: # If you used to have this line in pytorch-pretrained-bert:
loss = model(input_ids, labels=labels) loss = model(input_ids, labels=labels)
# Now just use this line in pytorch-transformers to extract the loss from the output tuple: # Now just use this line in transformers to extract the loss from the output tuple:
outputs = model(input_ids, labels=labels) outputs = model(input_ids, labels=labels)
loss = outputs[0] loss = outputs[0]
# In pytorch-transformers you can also have access to the logits: # In transformers you can also have access to the logits:
loss, logits = outputs[:2] loss, logits = outputs[:2]
# And even the attention weigths if you configure the model to output them (and other outputs too, see the docstrings and documentation) # And even the attention weigths if you configure the model to output them (and other outputs too, see the docstrings and documentation)
...@@ -96,7 +96,7 @@ for batch in train_data: ...@@ -96,7 +96,7 @@ for batch in train_data:
loss.backward() loss.backward()
optimizer.step() optimizer.step()
### In PyTorch-Transformers, optimizer and schedules are splitted and instantiated like this: ### In Transformers, optimizer and schedules are splitted and instantiated like this:
optimizer = AdamW(model.parameters(), lr=lr, correct_bias=False) # To reproduce BertAdam specific behavior set correct_bias=False optimizer = AdamW(model.parameters(), lr=lr, correct_bias=False) # To reproduce BertAdam specific behavior set correct_bias=False
scheduler = WarmupLinearSchedule(optimizer, warmup_steps=num_warmup_steps, t_total=num_total_steps) # PyTorch scheduler scheduler = WarmupLinearSchedule(optimizer, warmup_steps=num_warmup_steps, t_total=num_total_steps) # PyTorch scheduler
### and used like this: ### and used like this:
......
...@@ -11,19 +11,19 @@ Instantiating one of ``AutoModel``, ``AutoConfig`` and ``AutoTokenizer`` will di ...@@ -11,19 +11,19 @@ Instantiating one of ``AutoModel``, ``AutoConfig`` and ``AutoTokenizer`` will di
``AutoConfig`` ``AutoConfig``
~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.AutoConfig .. autoclass:: transformers.AutoConfig
:members: :members:
``AutoModel`` ``AutoModel``
~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.AutoModel .. autoclass:: transformers.AutoModel
:members: :members:
``AutoTokenizer`` ``AutoTokenizer``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.AutoTokenizer .. autoclass:: transformers.AutoTokenizer
:members: :members:
...@@ -4,69 +4,125 @@ BERT ...@@ -4,69 +4,125 @@ BERT
``BertConfig`` ``BertConfig``
~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.BertConfig .. autoclass:: transformers.BertConfig
:members: :members:
``BertTokenizer`` ``BertTokenizer``
~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.BertTokenizer .. autoclass:: transformers.BertTokenizer
:members: :members:
``BertModel`` ``BertModel``
~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.BertModel .. autoclass:: transformers.BertModel
:members: :members:
``BertForPreTraining`` ``BertForPreTraining``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.BertForPreTraining .. autoclass:: transformers.BertForPreTraining
:members: :members:
``BertForMaskedLM`` ``BertForMaskedLM``
~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.BertForMaskedLM .. autoclass:: transformers.BertForMaskedLM
:members: :members:
``BertForNextSentencePrediction`` ``BertForNextSentencePrediction``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.BertForNextSentencePrediction .. autoclass:: transformers.BertForNextSentencePrediction
:members: :members:
``BertForSequenceClassification`` ``BertForSequenceClassification``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.BertForSequenceClassification .. autoclass:: transformers.BertForSequenceClassification
:members: :members:
``BertForMultipleChoice`` ``BertForMultipleChoice``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.BertForMultipleChoice .. autoclass:: transformers.BertForMultipleChoice
:members: :members:
``BertForTokenClassification`` ``BertForTokenClassification``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.BertForTokenClassification .. autoclass:: transformers.BertForTokenClassification
:members: :members:
``BertForQuestionAnswering`` ``BertForQuestionAnswering``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.BertForQuestionAnswering .. autoclass:: transformers.BertForQuestionAnswering
:members:
``TFBertModel``
~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFBertModel
:members:
``TFBertForPreTraining``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFBertForPreTraining
:members:
``TFBertForMaskedLM``
~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFBertForMaskedLM
:members:
``TFBertForNextSentencePrediction``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFBertForNextSentencePrediction
:members:
``TFBertForSequenceClassification``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFBertForSequenceClassification
:members:
``TFBertForMultipleChoice``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFBertForMultipleChoice
:members:
``TFBertForTokenClassification``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFBertForTokenClassification
:members:
``TFBertForQuestionAnswering``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFBertForQuestionAnswering
:members: :members:
...@@ -4,40 +4,67 @@ DistilBERT ...@@ -4,40 +4,67 @@ DistilBERT
``DistilBertConfig`` ``DistilBertConfig``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.DistilBertConfig .. autoclass:: transformers.DistilBertConfig
:members: :members:
``DistilBertTokenizer`` ``DistilBertTokenizer``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.DistilBertTokenizer .. autoclass:: transformers.DistilBertTokenizer
:members: :members:
``DistilBertModel`` ``DistilBertModel``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.DistilBertModel .. autoclass:: transformers.DistilBertModel
:members: :members:
``DistilBertForMaskedLM`` ``DistilBertForMaskedLM``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.DistilBertForMaskedLM .. autoclass:: transformers.DistilBertForMaskedLM
:members: :members:
``DistilBertForSequenceClassification`` ``DistilBertForSequenceClassification``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.DistilBertForSequenceClassification .. autoclass:: transformers.DistilBertForSequenceClassification
:members: :members:
``DistilBertForQuestionAnswering`` ``DistilBertForQuestionAnswering``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.DistilBertForQuestionAnswering .. autoclass:: transformers.DistilBertForQuestionAnswering
:members:
``TFDistilBertModel``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFDistilBertModel
:members:
``TFDistilBertForMaskedLM``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFDistilBertForMaskedLM
:members:
``TFDistilBertForSequenceClassification``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFDistilBertForSequenceClassification
:members:
``TFDistilBertForQuestionAnswering``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFDistilBertForQuestionAnswering
:members: :members:
...@@ -4,33 +4,54 @@ OpenAI GPT ...@@ -4,33 +4,54 @@ OpenAI GPT
``OpenAIGPTConfig`` ``OpenAIGPTConfig``
~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.OpenAIGPTConfig .. autoclass:: transformers.OpenAIGPTConfig
:members: :members:
``OpenAIGPTTokenizer`` ``OpenAIGPTTokenizer``
~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.OpenAIGPTTokenizer .. autoclass:: transformers.OpenAIGPTTokenizer
:members: :members:
``OpenAIGPTModel`` ``OpenAIGPTModel``
~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.OpenAIGPTModel .. autoclass:: transformers.OpenAIGPTModel
:members: :members:
``OpenAIGPTLMHeadModel`` ``OpenAIGPTLMHeadModel``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.OpenAIGPTLMHeadModel .. autoclass:: transformers.OpenAIGPTLMHeadModel
:members: :members:
``OpenAIGPTDoubleHeadsModel`` ``OpenAIGPTDoubleHeadsModel``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.OpenAIGPTDoubleHeadsModel .. autoclass:: transformers.OpenAIGPTDoubleHeadsModel
:members:
``TFOpenAIGPTModel``
~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFOpenAIGPTModel
:members:
``TFOpenAIGPTLMHeadModel``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFOpenAIGPTLMHeadModel
:members:
``TFOpenAIGPTDoubleHeadsModel``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFOpenAIGPTDoubleHeadsModel
:members: :members:
...@@ -4,33 +4,54 @@ OpenAI GPT2 ...@@ -4,33 +4,54 @@ OpenAI GPT2
``GPT2Config`` ``GPT2Config``
~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.GPT2Config .. autoclass:: transformers.GPT2Config
:members: :members:
``GPT2Tokenizer`` ``GPT2Tokenizer``
~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.GPT2Tokenizer .. autoclass:: transformers.GPT2Tokenizer
:members: :members:
``GPT2Model`` ``GPT2Model``
~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.GPT2Model .. autoclass:: transformers.GPT2Model
:members: :members:
``GPT2LMHeadModel`` ``GPT2LMHeadModel``
~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.GPT2LMHeadModel .. autoclass:: transformers.GPT2LMHeadModel
:members: :members:
``GPT2DoubleHeadsModel`` ``GPT2DoubleHeadsModel``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.GPT2DoubleHeadsModel .. autoclass:: transformers.GPT2DoubleHeadsModel
:members:
``TFGPT2Model``
~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFGPT2Model
:members:
``TFGPT2LMHeadModel``
~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFGPT2LMHeadModel
:members:
``TFGPT2DoubleHeadsModel``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFGPT2DoubleHeadsModel
:members: :members:
...@@ -4,33 +4,54 @@ RoBERTa ...@@ -4,33 +4,54 @@ RoBERTa
``RobertaConfig`` ``RobertaConfig``
~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.RobertaConfig .. autoclass:: transformers.RobertaConfig
:members: :members:
``RobertaTokenizer`` ``RobertaTokenizer``
~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.RobertaTokenizer .. autoclass:: transformers.RobertaTokenizer
:members: :members:
``RobertaModel`` ``RobertaModel``
~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.RobertaModel .. autoclass:: transformers.RobertaModel
:members: :members:
``RobertaForMaskedLM`` ``RobertaForMaskedLM``
~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.RobertaForMaskedLM .. autoclass:: transformers.RobertaForMaskedLM
:members: :members:
``RobertaForSequenceClassification`` ``RobertaForSequenceClassification``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.RobertaForSequenceClassification .. autoclass:: transformers.RobertaForSequenceClassification
:members:
``TFRobertaModel``
~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFRobertaModel
:members:
``TFRobertaForMaskedLM``
~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFRobertaForMaskedLM
:members:
``TFRobertaForSequenceClassification``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFRobertaForSequenceClassification
:members: :members:
...@@ -5,26 +5,40 @@ Transformer XL ...@@ -5,26 +5,40 @@ Transformer XL
``TransfoXLConfig`` ``TransfoXLConfig``
~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.TransfoXLConfig .. autoclass:: transformers.TransfoXLConfig
:members: :members:
``TransfoXLTokenizer`` ``TransfoXLTokenizer``
~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.TransfoXLTokenizer .. autoclass:: transformers.TransfoXLTokenizer
:members: :members:
``TransfoXLModel`` ``TransfoXLModel``
~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.TransfoXLModel .. autoclass:: transformers.TransfoXLModel
:members: :members:
``TransfoXLLMHeadModel`` ``TransfoXLLMHeadModel``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.TransfoXLLMHeadModel .. autoclass:: transformers.TransfoXLLMHeadModel
:members:
``TFTransfoXLModel``
~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFTransfoXLModel
:members:
``TFTransfoXLLMHeadModel``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFTransfoXLLMHeadModel
:members: :members:
...@@ -4,38 +4,66 @@ XLM ...@@ -4,38 +4,66 @@ XLM
``XLMConfig`` ``XLMConfig``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.XLMConfig .. autoclass:: transformers.XLMConfig
:members: :members:
``XLMTokenizer`` ``XLMTokenizer``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.XLMTokenizer .. autoclass:: transformers.XLMTokenizer
:members: :members:
``XLMModel`` ``XLMModel``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.XLMModel .. autoclass:: transformers.XLMModel
:members: :members:
``XLMWithLMHeadModel`` ``XLMWithLMHeadModel``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.XLMWithLMHeadModel .. autoclass:: transformers.XLMWithLMHeadModel
:members: :members:
``XLMForSequenceClassification`` ``XLMForSequenceClassification``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.XLMForSequenceClassification .. autoclass:: transformers.XLMForSequenceClassification
:members: :members:
``XLMForQuestionAnswering`` ``XLMForQuestionAnswering``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.XLMForQuestionAnswering .. autoclass:: transformers.XLMForQuestionAnswering
:members:
``TFXLMModel``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFXLMModel
:members:
``TFXLMWithLMHeadModel``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFXLMWithLMHeadModel
:members:
``TFXLMForSequenceClassification``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFXLMForSequenceClassification
:members:
``TFXLMForQuestionAnsweringSimple``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFXLMForQuestionAnsweringSimple
:members: :members:
...@@ -4,40 +4,68 @@ XLNet ...@@ -4,40 +4,68 @@ XLNet
``XLNetConfig`` ``XLNetConfig``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.XLNetConfig .. autoclass:: transformers.XLNetConfig
:members: :members:
``XLNetTokenizer`` ``XLNetTokenizer``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.XLNetTokenizer .. autoclass:: transformers.XLNetTokenizer
:members: :members:
``XLNetModel`` ``XLNetModel``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.XLNetModel .. autoclass:: transformers.XLNetModel
:members: :members:
``XLNetLMHeadModel`` ``XLNetLMHeadModel``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.XLNetLMHeadModel .. autoclass:: transformers.XLNetLMHeadModel
:members: :members:
``XLNetForSequenceClassification`` ``XLNetForSequenceClassification``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.XLNetForSequenceClassification .. autoclass:: transformers.XLNetForSequenceClassification
:members: :members:
``XLNetForQuestionAnswering`` ``XLNetForQuestionAnswering``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: pytorch_transformers.XLNetForQuestionAnswering .. autoclass:: transformers.XLNetForQuestionAnswering
:members:
``TFXLNetModel``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFXLNetModel
:members:
``TFXLNetLMHeadModel``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFXLNetLMHeadModel
:members:
``TFXLNetForSequenceClassification``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFXLNetForSequenceClassification
:members:
``TFXLNetForQuestionAnsweringSimple``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TFXLNetForQuestionAnsweringSimple
:members: :members:
Notebooks Notebooks
================================================ ================================================
We include `three Jupyter Notebooks <https://github.com/huggingface/pytorch-transformers/tree/master/notebooks>`_ that can be used to check that the predictions of the PyTorch model are identical to the predictions of the original TensorFlow model. We include `three Jupyter Notebooks <https://github.com/huggingface/transformers/tree/master/notebooks>`_ that can be used to check that the predictions of the PyTorch model are identical to the predictions of the original TensorFlow model.
* *
The first NoteBook (\ `Comparing-TF-and-PT-models.ipynb <https://github.com/huggingface/pytorch-transformers/blob/master/notebooks/Comparing-TF-and-PT-models.ipynb>`_\ ) extracts the hidden states of a full sequence on each layers of the TensorFlow and the PyTorch models and computes the standard deviation between them. In the given example, we get a standard deviation of 1.5e-7 to 9e-7 on the various hidden state of the models. The first NoteBook (\ `Comparing-TF-and-PT-models.ipynb <https://github.com/huggingface/transformers/blob/master/notebooks/Comparing-TF-and-PT-models.ipynb>`_\ ) extracts the hidden states of a full sequence on each layers of the TensorFlow and the PyTorch models and computes the standard deviation between them. In the given example, we get a standard deviation of 1.5e-7 to 9e-7 on the various hidden state of the models.
* *
The second NoteBook (\ `Comparing-TF-and-PT-models-SQuAD.ipynb <https://github.com/huggingface/pytorch-transformers/blob/master/notebooks/Comparing-TF-and-PT-models-SQuAD.ipynb>`_\ ) compares the loss computed by the TensorFlow and the PyTorch models for identical initialization of the fine-tuning layer of the ``BertForQuestionAnswering`` and computes the standard deviation between them. In the given example, we get a standard deviation of 2.5e-7 between the models. The second NoteBook (\ `Comparing-TF-and-PT-models-SQuAD.ipynb <https://github.com/huggingface/transformers/blob/master/notebooks/Comparing-TF-and-PT-models-SQuAD.ipynb>`_\ ) compares the loss computed by the TensorFlow and the PyTorch models for identical initialization of the fine-tuning layer of the ``BertForQuestionAnswering`` and computes the standard deviation between them. In the given example, we get a standard deviation of 2.5e-7 between the models.
* *
The third NoteBook (\ `Comparing-TF-and-PT-models-MLM-NSP.ipynb <https://github.com/huggingface/pytorch-transformers/blob/master/notebooks/Comparing-TF-and-PT-models-MLM-NSP.ipynb>`_\ ) compares the predictions computed by the TensorFlow and the PyTorch models for masked token language modeling using the pre-trained masked language modeling model. The third NoteBook (\ `Comparing-TF-and-PT-models-MLM-NSP.ipynb <https://github.com/huggingface/transformers/blob/master/notebooks/Comparing-TF-and-PT-models-MLM-NSP.ipynb>`_\ ) compares the predictions computed by the TensorFlow and the PyTorch models for masked token language modeling using the pre-trained masked language modeling model.
Please follow the instructions given in the notebooks to run and modify them. Please follow the instructions given in the notebooks to run and modify them.
...@@ -44,15 +44,15 @@ Here is the full list of the currently provided pretrained models together with ...@@ -44,15 +44,15 @@ Here is the full list of the currently provided pretrained models together with
| +------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+ | +------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+
| | ``bert-large-uncased-whole-word-masking-finetuned-squad`` | | 24-layer, 1024-hidden, 16-heads, 340M parameters. | | | ``bert-large-uncased-whole-word-masking-finetuned-squad`` | | 24-layer, 1024-hidden, 16-heads, 340M parameters. |
| | | | The ``bert-large-uncased-whole-word-masking`` model fine-tuned on SQuAD | | | | | The ``bert-large-uncased-whole-word-masking`` model fine-tuned on SQuAD |
| | | (see details of fine-tuning in the `example section <https://github.com/huggingface/pytorch-transformers/tree/master/examples>`__). | | | | (see details of fine-tuning in the `example section <https://github.com/huggingface/transformers/tree/master/examples>`__). |
| +------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+ | +------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+
| | ``bert-large-cased-whole-word-masking-finetuned-squad`` | | 24-layer, 1024-hidden, 16-heads, 340M parameters | | | ``bert-large-cased-whole-word-masking-finetuned-squad`` | | 24-layer, 1024-hidden, 16-heads, 340M parameters |
| | | | The ``bert-large-cased-whole-word-masking`` model fine-tuned on SQuAD | | | | | The ``bert-large-cased-whole-word-masking`` model fine-tuned on SQuAD |
| | | (see `details of fine-tuning in the example section <https://huggingface.co/pytorch-transformers/examples.html>`__) | | | | (see `details of fine-tuning in the example section <https://huggingface.co/transformers/examples.html>`__) |
| +------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+ | +------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+
| | ``bert-base-cased-finetuned-mrpc`` | | 12-layer, 768-hidden, 12-heads, 110M parameters. | | | ``bert-base-cased-finetuned-mrpc`` | | 12-layer, 768-hidden, 12-heads, 110M parameters. |
| | | | The ``bert-base-cased`` model fine-tuned on MRPC | | | | | The ``bert-base-cased`` model fine-tuned on MRPC |
| | | (see `details of fine-tuning in the example section <https://huggingface.co/pytorch-transformers/examples.html>`__) | | | | (see `details of fine-tuning in the example section <https://huggingface.co/transformers/examples.html>`__) |
+-------------------+------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+ +-------------------+------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+
| GPT | ``openai-gpt`` | | 12-layer, 768-hidden, 12-heads, 110M parameters. | | GPT | ``openai-gpt`` | | 12-layer, 768-hidden, 12-heads, 110M parameters. |
| | | | OpenAI GPT English model | | | | | OpenAI GPT English model |
...@@ -120,4 +120,4 @@ Here is the full list of the currently provided pretrained models together with ...@@ -120,4 +120,4 @@ Here is the full list of the currently provided pretrained models together with
| | | (see `details <https://medium.com/huggingface/distilbert-8cf3380435b5>`__) | | | | (see `details <https://medium.com/huggingface/distilbert-8cf3380435b5>`__) |
+-------------------+------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+ +-------------------+------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+
.. <https://huggingface.co/pytorch-transformers/examples.html>`__ .. <https://huggingface.co/transformers/examples.html>`__
\ No newline at end of file \ No newline at end of file
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
## Philosophy ## Philosophy
PyTorch-Transformers is an opinionated library built for NLP researchers seeking to use/study/extend large-scale transformers models. Transformers is an opinionated library built for NLP researchers seeking to use/study/extend large-scale transformers models.
The library was designed with two strong goals in mind: The library was designed with two strong goals in mind:
...@@ -19,12 +19,12 @@ The library was designed with two strong goals in mind: ...@@ -19,12 +19,12 @@ The library was designed with two strong goals in mind:
A few other goals: A few other goals:
- expose the models internals as consistently as possible: - expose the models' internals as consistently as possible:
- we give access, using a single API to the full hidden-states and attention weights, - we give access, using a single API to the full hidden-states and attention weights,
- tokenizer and base model's API are standardized to easily switch between models. - tokenizer and base model's API are standardized to easily switch between models.
- incorporate a subjective selection of promising tools for fine-tuning/investiguating these models: - incorporate a subjective selection of promising tools for fine-tuning/investigating these models:
- a simple/consistent way to add new tokens to the vocabulary and embeddings for fine-tuning, - a simple/consistent way to add new tokens to the vocabulary and embeddings for fine-tuning,
- simple ways to mask and prune transformer heads. - simple ways to mask and prune transformer heads.
...@@ -33,13 +33,13 @@ A few other goals: ...@@ -33,13 +33,13 @@ A few other goals:
The library is build around three type of classes for each models: The library is build around three type of classes for each models:
- **model classes** which are PyTorch models (`torch.nn.Modules`) of the 6 models architectures currently provided in the library, e.g. `BertModel` - **model classes** which are PyTorch models (`torch.nn.Modules`) of the 8 models architectures currently provided in the library, e.g. `BertModel`
- **configuration classes** which store all the parameters required to build a model, e.g. `BertConfig`. You don't always need to instantiate these your-self, in particular if you are using a pretrained model without any modification, creating the model will automatically take care of instantiating the configuration (which is part of the model) - **configuration classes** which store all the parameters required to build a model, e.g. `BertConfig`. You don't always need to instantiate these your-self, in particular if you are using a pretrained model without any modification, creating the model will automatically take care of instantiating the configuration (which is part of the model)
- **tokenizer classes** which store the vocabulary for each model and provide methods for encoding/decoding strings in list of token embeddings indices to be fed to a model, e.g. `BertTokenizer` - **tokenizer classes** which store the vocabulary for each model and provide methods for encoding/decoding strings in list of token embeddings indices to be fed to a model, e.g. `BertTokenizer`
All these classes can be instantiated from pretrained instances and saved locally using two methods: All these classes can be instantiated from pretrained instances and saved locally using two methods:
- `from_pretrained()` let you instantiate a model/configuration/tokenizer from a pretrained version either provided by the library itself (currently 27 models are provided as listed [here](https://huggingface.co/pytorch-transformers/pretrained_models.html)) or stored locally (or on a server) by the user, - `from_pretrained()` let you instantiate a model/configuration/tokenizer from a pretrained version either provided by the library itself (currently 27 models are provided as listed [here](https://huggingface.co/transformers/pretrained_models.html)) or stored locally (or on a server) by the user,
- `save_pretrained()` let you save a model/configuration/tokenizer locally so that it can be reloaded using `from_pretrained()`. - `save_pretrained()` let you save a model/configuration/tokenizer locally so that it can be reloaded using `from_pretrained()`.
We'll finish this quickstart tour by going through a few simple quick-start examples to see how we can instantiate and use these classes. The rest of the documentation is organized in two parts: We'll finish this quickstart tour by going through a few simple quick-start examples to see how we can instantiate and use these classes. The rest of the documentation is organized in two parts:
...@@ -51,7 +51,7 @@ We'll finish this quickstart tour by going through a few simple quick-start exam ...@@ -51,7 +51,7 @@ We'll finish this quickstart tour by going through a few simple quick-start exam
Here are two examples showcasing a few `Bert` and `GPT2` classes and pre-trained models. Here are two examples showcasing a few `Bert` and `GPT2` classes and pre-trained models.
See full API reference for examples for each model classe. See full API reference for examples for each model class.
### BERT example ### BERT example
...@@ -59,7 +59,7 @@ Let's start by preparing a tokenized input (a list of token embeddings indices t ...@@ -59,7 +59,7 @@ Let's start by preparing a tokenized input (a list of token embeddings indices t
```python ```python
import torch import torch
from pytorch_transformers import BertTokenizer, BertModel, BertForMaskedLM from transformers import BertTokenizer, BertModel, BertForMaskedLM
# OPTIONAL: if you want to have more information on what's happening under the hood, activate the logger as follows # OPTIONAL: if you want to have more information on what's happening under the hood, activate the logger as follows
import logging import logging
...@@ -93,8 +93,8 @@ Let's see how we can use `BertModel` to encode our inputs in hidden-states: ...@@ -93,8 +93,8 @@ Let's see how we can use `BertModel` to encode our inputs in hidden-states:
# Load pre-trained model (weights) # Load pre-trained model (weights)
model = BertModel.from_pretrained('bert-base-uncased') model = BertModel.from_pretrained('bert-base-uncased')
# Set the model in evaluation mode to desactivate the DropOut modules # Set the model in evaluation mode to deactivate the DropOut modules
# This is IMPORTANT to have reproductible results during evaluation! # This is IMPORTANT to have reproducible results during evaluation!
model.eval() model.eval()
# If you have a GPU, put everything on cuda # If you have a GPU, put everything on cuda
...@@ -106,7 +106,7 @@ model.to('cuda') ...@@ -106,7 +106,7 @@ model.to('cuda')
with torch.no_grad(): with torch.no_grad():
# See the models docstrings for the detail of the inputs # See the models docstrings for the detail of the inputs
outputs = model(tokens_tensor, token_type_ids=segments_tensors) outputs = model(tokens_tensor, token_type_ids=segments_tensors)
# PyTorch-Transformers models always output tuples. # Transformers models always output tuples.
# See the models docstrings for the detail of all the outputs # See the models docstrings for the detail of all the outputs
# In our case, the first element is the hidden state of the last layer of the Bert model # In our case, the first element is the hidden state of the last layer of the Bert model
encoded_layers = outputs[0] encoded_layers = outputs[0]
...@@ -145,7 +145,7 @@ First let's prepare a tokenized input from our text string using `GPT2Tokenizer` ...@@ -145,7 +145,7 @@ First let's prepare a tokenized input from our text string using `GPT2Tokenizer`
```python ```python
import torch import torch
from pytorch_transformers import GPT2Tokenizer, GPT2LMHeadModel from transformers import GPT2Tokenizer, GPT2LMHeadModel
# OPTIONAL: if you want to have more information on what's happening, activate the logger as follows # OPTIONAL: if you want to have more information on what's happening, activate the logger as follows
import logging import logging
...@@ -168,8 +168,8 @@ Let's see how to use `GPT2LMHeadModel` to generate the next token following our ...@@ -168,8 +168,8 @@ Let's see how to use `GPT2LMHeadModel` to generate the next token following our
# Load pre-trained model (weights) # Load pre-trained model (weights)
model = GPT2LMHeadModel.from_pretrained('gpt2') model = GPT2LMHeadModel.from_pretrained('gpt2')
# Set the model in evaluation mode to desactivate the DropOut modules # Set the model in evaluation mode to deactivate the DropOut modules
# This is IMPORTANT to have reproductible results during evaluation! # This is IMPORTANT to have reproducible results during evaluation!
model.eval() model.eval()
# If you have a GPU, put everything on cuda # If you have a GPU, put everything on cuda
......
...@@ -45,7 +45,7 @@ where ...@@ -45,7 +45,7 @@ where
* ``bert_config.json`` or ``openai_gpt_config.json`` a configuration file for the model, and * ``bert_config.json`` or ``openai_gpt_config.json`` a configuration file for the model, and
* ``pytorch_model.bin`` a PyTorch dump of a pre-trained instance of ``BertForPreTraining``\ , ``OpenAIGPTModel``\ , ``TransfoXLModel``\ , ``GPT2LMHeadModel`` (saved with the usual ``torch.save()``\ ) * ``pytorch_model.bin`` a PyTorch dump of a pre-trained instance of ``BertForPreTraining``\ , ``OpenAIGPTModel``\ , ``TransfoXLModel``\ , ``GPT2LMHeadModel`` (saved with the usual ``torch.save()``\ )
If ``PRE_TRAINED_MODEL_NAME_OR_PATH`` is a shortcut name, the pre-trained weights will be downloaded from AWS S3 (see the links `here <https://github.com/huggingface/pytorch-transformers/blob/master/pytorch_transformers/modeling_bert.py>`__\ ) and stored in a cache folder to avoid future download (the cache folder can be found at ``~/.pytorch_pretrained_bert/``\ ). If ``PRE_TRAINED_MODEL_NAME_OR_PATH`` is a shortcut name, the pre-trained weights will be downloaded from AWS S3 (see the links `here <https://github.com/huggingface/transformers/blob/master/transformers/modeling_bert.py>`__\ ) and stored in a cache folder to avoid future download (the cache folder can be found at ``~/.pytorch_pretrained_bert/``\ ).
* *
``cache_dir`` can be an optional path to a specific directory to download and cache the pre-trained model weights. This option is useful in particular when you are using distributed training: to avoid concurrent access to the same weights you can set for example ``cache_dir='./pretrained_model_{}'.format(args.local_rank)`` (see the section on distributed training for more information). ``cache_dir`` can be an optional path to a specific directory to download and cache the pre-trained model weights. This option is useful in particular when you are using distributed training: to avoid concurrent access to the same weights you can set for example ``cache_dir='./pretrained_model_{}'.format(args.local_rank)`` (see the section on distributed training for more information).
...@@ -122,7 +122,7 @@ Here is the recommended way of saving the model, configuration and vocabulary to ...@@ -122,7 +122,7 @@ Here is the recommended way of saving the model, configuration and vocabulary to
.. code-block:: python .. code-block:: python
from pytorch_transformers import WEIGHTS_NAME, CONFIG_NAME from transformers import WEIGHTS_NAME, CONFIG_NAME
output_dir = "./models/" output_dir = "./models/"
......
...@@ -12,7 +12,7 @@ According to Pytorch's documentation: "TorchScript is a way to create serializab ...@@ -12,7 +12,7 @@ According to Pytorch's documentation: "TorchScript is a way to create serializab
Pytorch's two modules `JIT and TRACE <https://pytorch.org/docs/stable/jit.html>`_ allow the developer to export Pytorch's two modules `JIT and TRACE <https://pytorch.org/docs/stable/jit.html>`_ allow the developer to export
their model to be re-used in other programs, such as efficiency-oriented C++ programs. their model to be re-used in other programs, such as efficiency-oriented C++ programs.
We have provided an interface that allows the export of `pytorch-transformers` models to TorchScript so that they can We have provided an interface that allows the export of `transformers` models to TorchScript so that they can
be reused in a different environment than a Pytorch-based python program. Here we explain how to use our models so that be reused in a different environment than a Pytorch-based python program. Here we explain how to use our models so that
they can be exported, and what to be mindful of when using these models with TorchScript. they can be exported, and what to be mindful of when using these models with TorchScript.
...@@ -74,7 +74,7 @@ according to a ``BertConfig`` class and then saved to disk under the filename `` ...@@ -74,7 +74,7 @@ according to a ``BertConfig`` class and then saved to disk under the filename ``
.. code-block:: python .. code-block:: python
from pytorch_transformers import BertModel, BertTokenizer, BertConfig from transformers import BertModel, BertTokenizer, BertConfig
import torch import torch
enc = BertTokenizer.from_pretrained("bert-base-uncased") enc = BertTokenizer.from_pretrained("bert-base-uncased")
......
...@@ -13,7 +13,7 @@ similar API between the different models. ...@@ -13,7 +13,7 @@ similar API between the different models.
## Language model fine-tuning ## Language model fine-tuning
Based on the script [`run_lm_finetuning.py`](https://github.com/huggingface/pytorch-transformers/blob/master/examples/run_lm_finetuning.py). Based on the script [`run_lm_finetuning.py`](https://github.com/huggingface/transformers/blob/master/examples/run_lm_finetuning.py).
Fine-tuning the library models for language modeling on a text dataset for GPT, GPT-2, BERT and RoBERTa (DistilBERT Fine-tuning the library models for language modeling on a text dataset for GPT, GPT-2, BERT and RoBERTa (DistilBERT
to be added soon). GPT and GPT-2 are fine-tuned using a causal language modeling (CLM) loss while BERT and RoBERTa to be added soon). GPT and GPT-2 are fine-tuned using a causal language modeling (CLM) loss while BERT and RoBERTa
...@@ -75,7 +75,7 @@ python run_lm_finetuning.py \ ...@@ -75,7 +75,7 @@ python run_lm_finetuning.py \
## Language generation ## Language generation
Based on the script [`run_generation.py`](https://github.com/huggingface/pytorch-transformers/blob/master/examples/run_generation.py). Based on the script [`run_generation.py`](https://github.com/huggingface/transformers/blob/master/examples/run_generation.py).
Conditional text generation using the auto-regressive models of the library: GPT, GPT-2, Transformer-XL and XLNet. Conditional text generation using the auto-regressive models of the library: GPT, GPT-2, Transformer-XL and XLNet.
A similar script is used for our official demo [Write With Transfomer](https://transformer.huggingface.co), where you A similar script is used for our official demo [Write With Transfomer](https://transformer.huggingface.co), where you
...@@ -91,7 +91,7 @@ python run_generation.py \ ...@@ -91,7 +91,7 @@ python run_generation.py \
## GLUE ## GLUE
Based on the script [`run_glue.py`](https://github.com/huggingface/pytorch-transformers/blob/master/examples/run_glue.py). Based on the script [`run_glue.py`](https://github.com/huggingface/transformers/blob/master/examples/run_glue.py).
Fine-tuning the library models for sequence classification on the GLUE benchmark: [General Language Understanding Fine-tuning the library models for sequence classification on the GLUE benchmark: [General Language Understanding
Evaluation](https://gluebenchmark.com/). This script can fine-tune the following models: BERT, XLM, XLNet and RoBERTa. Evaluation](https://gluebenchmark.com/). This script can fine-tune the following models: BERT, XLM, XLNet and RoBERTa.
...@@ -103,14 +103,14 @@ between different runs. We report the median on 5 runs (with different seeds) fo ...@@ -103,14 +103,14 @@ between different runs. We report the median on 5 runs (with different seeds) fo
| Task | Metric | Result | | Task | Metric | Result |
|-------|------------------------------|-------------| |-------|------------------------------|-------------|
| CoLA | Matthew's corr | 55.75 | | CoLA | Matthew's corr | 48.87 |
| SST-2 | Accuracy | 92.09 | | SST-2 | Accuracy | 91.74 |
| MRPC | F1/Accuracy | 90.48/86.27 | | MRPC | F1/Accuracy | 90.70/86.27 |
| STS-B | Person/Spearman corr. | 89.03/88.64 | | STS-B | Person/Spearman corr. | 91.39/91.04 |
| QQP | Accuracy/F1 | 90.92/87.72 | | QQP | Accuracy/F1 | 90.79/87.66 |
| MNLI | Matched acc./Mismatched acc. | 83.74/84.06 | | MNLI | Matched acc./Mismatched acc. | 83.70/84.83 |
| QNLI | Accuracy | 91.07 | | QNLI | Accuracy | 89.31 |
| RTE | Accuracy | 68.59 | | RTE | Accuracy | 71.43 |
| WNLI | Accuracy | 43.66 | | WNLI | Accuracy | 43.66 |
Some of these results are significantly different from the ones reported on the test set Some of these results are significantly different from the ones reported on the test set
...@@ -319,7 +319,7 @@ eval_loss = 0.44457291918821606 ...@@ -319,7 +319,7 @@ eval_loss = 0.44457291918821606
## SQuAD ## SQuAD
Based on the script [`run_squad.py`](https://github.com/huggingface/pytorch-transformers/blob/master/examples/run_squad.py). Based on the script [`run_squad.py`](https://github.com/huggingface/transformers/blob/master/examples/run_squad.py).
#### Fine-tuning on SQuAD #### Fine-tuning on SQuAD
......
...@@ -39,7 +39,7 @@ import torch ...@@ -39,7 +39,7 @@ import torch
from torch.utils.data import (DataLoader, RandomSampler, SequentialSampler, from torch.utils.data import (DataLoader, RandomSampler, SequentialSampler,
TensorDataset) TensorDataset)
from pytorch_transformers import (OpenAIGPTDoubleHeadsModel, OpenAIGPTTokenizer, from transformers import (OpenAIGPTDoubleHeadsModel, OpenAIGPTTokenizer,
AdamW, cached_path, WEIGHTS_NAME, CONFIG_NAME, AdamW, cached_path, WEIGHTS_NAME, CONFIG_NAME,
WarmupLinearSchedule) WarmupLinearSchedule)
......
...@@ -35,10 +35,10 @@ from tqdm import tqdm, trange ...@@ -35,10 +35,10 @@ from tqdm import tqdm, trange
from tensorboardX import SummaryWriter from tensorboardX import SummaryWriter
from pytorch_transformers import (WEIGHTS_NAME, BertConfig, from transformers import (WEIGHTS_NAME, BertConfig,
BertForMultipleChoice, BertTokenizer) BertForMultipleChoice, BertTokenizer)
from pytorch_transformers import AdamW, WarmupLinearSchedule from transformers import AdamW, WarmupLinearSchedule
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
...@@ -365,7 +365,7 @@ def train(args, train_dataset, model, tokenizer): ...@@ -365,7 +365,7 @@ def train(args, train_dataset, model, tokenizer):
# inputs.update({'cls_index': batch[5], # inputs.update({'cls_index': batch[5],
# 'p_mask': batch[6]}) # 'p_mask': batch[6]})
outputs = model(**inputs) outputs = model(**inputs)
loss = outputs[0] # model outputs are always tuple in pytorch-transformers (see doc) loss = outputs[0] # model outputs are always tuple in transformers (see doc)
if args.n_gpu > 1: if args.n_gpu > 1:
loss = loss.mean() # mean() to average on multi-gpu parallel (not distributed) training loss = loss.mean() # mean() to average on multi-gpu parallel (not distributed) training
...@@ -647,7 +647,7 @@ def main(): ...@@ -647,7 +647,7 @@ def main():
if args.eval_all_checkpoints: if args.eval_all_checkpoints:
checkpoints = list(os.path.dirname(c) for c in sorted(glob.glob(args.output_dir + '/**/' + WEIGHTS_NAME, recursive=True))) checkpoints = list(os.path.dirname(c) for c in sorted(glob.glob(args.output_dir + '/**/' + WEIGHTS_NAME, recursive=True)))
logging.getLogger("pytorch_transformers.modeling_utils").setLevel(logging.WARN) # Reduce model loading logs logging.getLogger("transformers.modeling_utils").setLevel(logging.WARN) # Reduce model loading logs
logger.info("Evaluate the following checkpoints: %s", checkpoints) logger.info("Evaluate the following checkpoints: %s", checkpoints)
......
...@@ -28,7 +28,7 @@ import math ...@@ -28,7 +28,7 @@ import math
import torch import torch
from pytorch_transformers import TransfoXLLMHeadModel, TransfoXLCorpus, TransfoXLTokenizer from transformers import TransfoXLLMHeadModel, TransfoXLCorpus, TransfoXLTokenizer
logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s', logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s',
datefmt = '%m/%d/%Y %H:%M:%S', datefmt = '%m/%d/%Y %H:%M:%S',
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment