Commit 1ab25c49 authored by thomwolf's avatar thomwolf
Browse files

Merge branch 'master' into pr/2115

parents df396112 18601c3b
version: 2 version: 2
jobs: jobs:
build_py3_torch_and_tf: run_tests_py3_torch_and_tf:
working_directory: ~/transformers working_directory: ~/transformers
docker: docker:
- image: circleci/python:3.5 - image: circleci/python:3.5
environment:
OMP_NUM_THREADS: 1
resource_class: xlarge resource_class: xlarge
parallelism: 1 parallelism: 1
steps: steps:
...@@ -11,65 +13,67 @@ jobs: ...@@ -11,65 +13,67 @@ jobs:
- run: sudo pip install torch - run: sudo pip install torch
- run: sudo pip install tensorflow - run: sudo pip install tensorflow
- run: sudo pip install --progress-bar off . - run: sudo pip install --progress-bar off .
- run: sudo pip install pytest codecov pytest-cov - run: sudo pip install pytest codecov pytest-cov pytest-xdist
- run: sudo pip install tensorboardX scikit-learn - run: sudo pip install tensorboardX scikit-learn
- run: python -m pytest -sv ./transformers/tests/ --cov - run: python -m pytest -n 8 --dist=loadfile -s -v ./transformers/tests/ --cov
- run: codecov - run: codecov
build_py3_torch: run_tests_py3_torch:
working_directory: ~/transformers working_directory: ~/transformers
docker: docker:
- image: circleci/python:3.5 - image: circleci/python:3.5
environment:
OMP_NUM_THREADS: 1
resource_class: xlarge resource_class: xlarge
parallelism: 1 parallelism: 1
steps: steps:
- checkout - checkout
- run: sudo pip install torch - run: sudo pip install torch
- run: sudo pip install --progress-bar off . - run: sudo pip install --progress-bar off .
- run: sudo pip install pytest codecov pytest-cov - run: sudo pip install pytest codecov pytest-cov pytest-xdist
- run: sudo pip install tensorboardX scikit-learn - run: sudo pip install tensorboardX scikit-learn
- run: python -m pytest -sv ./transformers/tests/ --cov - run: python -m pytest -n 8 --dist=loadfile -s -v ./transformers/tests/ --cov
- run: python -m pytest -sv ./examples/
- run: codecov - run: codecov
build_py3_tf: run_tests_py3_tf:
working_directory: ~/transformers working_directory: ~/transformers
docker: docker:
- image: circleci/python:3.5 - image: circleci/python:3.5
environment:
OMP_NUM_THREADS: 1
resource_class: xlarge resource_class: xlarge
parallelism: 1 parallelism: 1
steps: steps:
- checkout - checkout
- run: sudo pip install tensorflow - run: sudo pip install tensorflow
- run: sudo pip install --progress-bar off . - run: sudo pip install --progress-bar off .
- run: sudo pip install pytest codecov pytest-cov - run: sudo pip install pytest codecov pytest-cov pytest-xdist
- run: sudo pip install tensorboardX scikit-learn - run: sudo pip install tensorboardX scikit-learn
- run: python -m pytest -sv ./transformers/tests/ --cov - run: python -m pytest -n 8 --dist=loadfile -s -v ./transformers/tests/ --cov
- run: codecov - run: codecov
build_py2_torch: run_tests_py3_custom_tokenizers:
working_directory: ~/transformers working_directory: ~/transformers
resource_class: large
parallelism: 1
docker: docker:
- image: circleci/python:2.7 - image: circleci/python:3.5
steps: steps:
- checkout - checkout
- run: sudo pip install torch
- run: sudo pip install --progress-bar off . - run: sudo pip install --progress-bar off .
- run: sudo pip install pytest codecov pytest-cov - run: sudo pip install pytest pytest-xdist
- run: python -m pytest -sv ./transformers/tests/ --cov - run: sudo pip install mecab-python3
- run: codecov - run: RUN_CUSTOM_TOKENIZERS=1 python -m pytest -sv ./transformers/tests/tokenization_bert_japanese_test.py
build_py2_tf: run_examples_py3_torch:
working_directory: ~/transformers working_directory: ~/transformers
resource_class: large
parallelism: 1
docker: docker:
- image: circleci/python:2.7 - image: circleci/python:3.5
environment:
OMP_NUM_THREADS: 1
resource_class: xlarge
parallelism: 1
steps: steps:
- checkout - checkout
- run: sudo pip install tensorflow - run: sudo pip install torch
- run: sudo pip install --progress-bar off . - run: sudo pip install --progress-bar off .
- run: sudo pip install pytest codecov pytest-cov - run: sudo pip install pytest pytest-xdist
- run: python -m pytest -sv ./transformers/tests/ --cov - run: sudo pip install tensorboardX scikit-learn
- run: codecov - run: python -m pytest -n 8 --dist=loadfile -s -v ./examples/
deploy_doc: deploy_doc:
working_directory: ~/transformers working_directory: ~/transformers
docker: docker:
...@@ -82,6 +86,16 @@ jobs: ...@@ -82,6 +86,16 @@ jobs:
- run: sudo pip install --progress-bar off -r docs/requirements.txt - run: sudo pip install --progress-bar off -r docs/requirements.txt
- run: sudo pip install --progress-bar off -r requirements.txt - run: sudo pip install --progress-bar off -r requirements.txt
- run: ./.circleci/deploy.sh - run: ./.circleci/deploy.sh
check_repository_consistency:
working_directory: ~/transformers
docker:
- image: circleci/python:3.5
resource_class: small
parallelism: 1
steps:
- checkout
- run: sudo pip install requests
- run: python ./utils/link_tester.py
workflow_filters: &workflow_filters workflow_filters: &workflow_filters
filters: filters:
branches: branches:
...@@ -91,9 +105,10 @@ workflows: ...@@ -91,9 +105,10 @@ workflows:
version: 2 version: 2
build_and_test: build_and_test:
jobs: jobs:
- build_py3_torch_and_tf - check_repository_consistency
- build_py3_torch - run_examples_py3_torch
- build_py3_tf - run_tests_py3_custom_tokenizers
- build_py2_torch - run_tests_py3_torch_and_tf
- build_py2_tf - run_tests_py3_torch
- run_tests_py3_tf
- deploy_doc: *workflow_filters - deploy_doc: *workflow_filters
...@@ -168,7 +168,7 @@ Follow these steps to start contributing: ...@@ -168,7 +168,7 @@ Follow these steps to start contributing:
to be merged; to be merged;
4. Make sure pre-existing tests still pass; 4. Make sure pre-existing tests still pass;
5. Add high-coverage tests. No quality test, no merge; 5. Add high-coverage tests. No quality test, no merge;
6. All public methods must have informative doctrings; 6. All public methods must have informative docstrings;
### Style guide ### Style guide
......
...@@ -55,10 +55,12 @@ Choose the right framework for every part of a model's lifetime ...@@ -55,10 +55,12 @@ Choose the right framework for every part of a model's lifetime
| [Online demo](#online-demo) | Experimenting with this repo’s text generation capabilities | | [Online demo](#online-demo) | Experimenting with this repo’s text generation capabilities |
| [Quick tour: Usage](#quick-tour) | Tokenizers & models usage: Bert and GPT-2 | | [Quick tour: Usage](#quick-tour) | Tokenizers & models usage: Bert and GPT-2 |
| [Quick tour: TF 2.0 and PyTorch ](#Quick-tour-TF-20-training-and-PyTorch-interoperability) | Train a TF 2.0 model in 10 lines of code, load it in PyTorch | | [Quick tour: TF 2.0 and PyTorch ](#Quick-tour-TF-20-training-and-PyTorch-interoperability) | Train a TF 2.0 model in 10 lines of code, load it in PyTorch |
| [Quick tour: pipelines](#quick-tour-of-pipelines) | Using Pipelines: Wrapper around tokenizer and models to use finetuned models |
| [Quick tour: Fine-tuning/usage scripts](#quick-tour-of-the-fine-tuningusage-scripts) | Using provided scripts: GLUE, SQuAD and Text generation | | [Quick tour: Fine-tuning/usage scripts](#quick-tour-of-the-fine-tuningusage-scripts) | Using provided scripts: GLUE, SQuAD and Text generation |
| [Quick tour: Share your models ](#Quick-tour-of-model-sharing) | Upload and share your fine-tuned models with the community |
| [Migrating from pytorch-transformers to transformers](#Migrating-from-pytorch-transformers-to-transformers) | Migrating your code from pytorch-transformers to transformers | | [Migrating from pytorch-transformers to transformers](#Migrating-from-pytorch-transformers-to-transformers) | Migrating your code from pytorch-transformers to transformers |
| [Migrating from pytorch-pretrained-bert to pytorch-transformers](#Migrating-from-pytorch-pretrained-bert-to-transformers) | Migrating your code from pytorch-pretrained-bert to transformers | | [Migrating from pytorch-pretrained-bert to pytorch-transformers](#Migrating-from-pytorch-pretrained-bert-to-transformers) | Migrating your code from pytorch-pretrained-bert to transformers |
| [Documentation][(v2.2.0/v2.2.1)](https://huggingface.co/transformers/v2.2.0) [(v2.1.1)](https://huggingface.co/transformers/v2.1.1) [(v2.0.0)](https://huggingface.co/transformers/v2.0.0) [(v1.2.0)](https://huggingface.co/transformers/v1.2.0) [(v1.1.0)](https://huggingface.co/transformers/v1.1.0) [(v1.0.0)](https://huggingface.co/transformers/v1.0.0) [(master)](https://huggingface.co/transformers) | Full API documentation and more | | [Documentation][(v2.3.0)](https://huggingface.co/transformers/v2.3.0)[(v2.2.0/v2.2.1/v2.2.2)](https://huggingface.co/transformers/v2.2.0) [(v2.1.1)](https://huggingface.co/transformers/v2.1.1) [(v2.0.0)](https://huggingface.co/transformers/v2.0.0) [(v1.2.0)](https://huggingface.co/transformers/v1.2.0) [(v1.1.0)](https://huggingface.co/transformers/v1.1.0) [(v1.0.0)](https://huggingface.co/transformers/v1.0.0) [(master)](https://huggingface.co/transformers) | Full API documentation and more |
## Installation ## Installation
...@@ -131,7 +133,7 @@ At some point in the future, you'll be able to seamlessly move from pre-training ...@@ -131,7 +133,7 @@ At some point in the future, you'll be able to seamlessly move from pre-training
## Model architectures ## Model architectures
🤗 Transformers currently provides 10 NLU/NLG architectures: 🤗 Transformers currently provides the following NLU/NLG architectures:
1. **[BERT](https://github.com/google-research/bert)** (from Google) released with the paper [BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding](https://arxiv.org/abs/1810.04805) by Jacob Devlin, Ming-Wei Chang, Kenton Lee and Kristina Toutanova. 1. **[BERT](https://github.com/google-research/bert)** (from Google) released with the paper [BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding](https://arxiv.org/abs/1810.04805) by Jacob Devlin, Ming-Wei Chang, Kenton Lee and Kristina Toutanova.
2. **[GPT](https://github.com/openai/finetune-transformer-lm)** (from OpenAI) released with the paper [Improving Language Understanding by Generative Pre-Training](https://blog.openai.com/language-unsupervised/) by Alec Radford, Karthik Narasimhan, Tim Salimans and Ilya Sutskever. 2. **[GPT](https://github.com/openai/finetune-transformer-lm)** (from OpenAI) released with the paper [Improving Language Understanding by Generative Pre-Training](https://blog.openai.com/language-unsupervised/) by Alec Radford, Karthik Narasimhan, Tim Salimans and Ilya Sutskever.
...@@ -144,8 +146,10 @@ At some point in the future, you'll be able to seamlessly move from pre-training ...@@ -144,8 +146,10 @@ At some point in the future, you'll be able to seamlessly move from pre-training
9. **[CTRL](https://github.com/salesforce/ctrl/)** (from Salesforce) released with the paper [CTRL: A Conditional Transformer Language Model for Controllable Generation](https://arxiv.org/abs/1909.05858) by Nitish Shirish Keskar*, Bryan McCann*, Lav R. Varshney, Caiming Xiong and Richard Socher. 9. **[CTRL](https://github.com/salesforce/ctrl/)** (from Salesforce) released with the paper [CTRL: A Conditional Transformer Language Model for Controllable Generation](https://arxiv.org/abs/1909.05858) by Nitish Shirish Keskar*, Bryan McCann*, Lav R. Varshney, Caiming Xiong and Richard Socher.
10. **[CamemBERT](https://camembert-model.fr)** (from Inria/Facebook/Sorbonne) released with the paper [CamemBERT: a Tasty French Language Model](https://arxiv.org/abs/1911.03894) by Louis Martin*, Benjamin Muller*, Pedro Javier Ortiz Suárez*, Yoann Dupont, Laurent Romary, Éric Villemonte de la Clergerie, Djamé Seddah and Benoît Sagot. 10. **[CamemBERT](https://camembert-model.fr)** (from Inria/Facebook/Sorbonne) released with the paper [CamemBERT: a Tasty French Language Model](https://arxiv.org/abs/1911.03894) by Louis Martin*, Benjamin Muller*, Pedro Javier Ortiz Suárez*, Yoann Dupont, Laurent Romary, Éric Villemonte de la Clergerie, Djamé Seddah and Benoît Sagot.
11. **[ALBERT](https://github.com/google-research/ALBERT)** (from Google Research and the Toyota Technological Institute at Chicago) released with the paper [ALBERT: A Lite BERT for Self-supervised Learning of Language Representations](https://arxiv.org/abs/1909.11942), by Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma, Radu Soricut. 11. **[ALBERT](https://github.com/google-research/ALBERT)** (from Google Research and the Toyota Technological Institute at Chicago) released with the paper [ALBERT: A Lite BERT for Self-supervised Learning of Language Representations](https://arxiv.org/abs/1909.11942), by Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma, Radu Soricut.
12. **[MMBT](https://github.com/facebookresearch/mmbt/)** (from Facebook), released together with the paper a [Supervised Multimodal Bitransformers for Classifying Images and Text](https://arxiv.org/pdf/1909.02950.pdf) by Douwe Kiela, Suvrat Bhooshan, Hamed Firooz, Davide Testuggine. 12. **[T5](https://github.com/google-research/text-to-text-transfer-transformer)** (from Google AI) released with the paper [Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer](https://arxiv.org/abs/1910.10683) by Colin Raffel and Noam Shazeer and Adam Roberts and Katherine Lee and Sharan Narang and Michael Matena and Yanqi Zhou and Wei Li and Peter J. Liu.
12. Want to contribute a new model? We have added a **detailed guide and templates** to guide you in the process of adding a new model. You can find them in the [`templates`](./templates) folder of the repository. Be sure to check the [contributing guidelines](./CONTRIBUTING.md) and contact the maintainers or open an issue to collect feedbacks before starting your PR. 13. **[XLM-RoBERTa](https://github.com/pytorch/fairseq/tree/master/examples/xlmr)** (from Facebook AI), released together with the paper [Unsupervised Cross-lingual Representation Learning at Scale](https://arxiv.org/abs/1911.02116) by Alexis Conneau*, Kartikay Khandelwal*, Naman Goyal, Vishrav Chaudhary, Guillaume Wenzek, Francisco Guzmán, Edouard Grave, Myle Ott, Luke Zettlemoyer and Veselin Stoyanov.
14. **[MMBT](https://github.com/facebookresearch/mmbt/)** (from Facebook), released together with the paper a [Supervised Multimodal Bitransformers for Classifying Images and Text](https://arxiv.org/pdf/1909.02950.pdf) by Douwe Kiela, Suvrat Bhooshan, Hamed Firooz, Davide Testuggine.
15. Want to contribute a new model? We have added a **detailed guide and templates** to guide you in the process of adding a new model. You can find them in the [`templates`](./templates) folder of the repository. Be sure to check the [contributing guidelines](./CONTRIBUTING.md) and contact the maintainers or open an issue to collect feedbacks before starting your PR.
These implementations have been tested on several datasets (see the example scripts) and should match the performances of the original implementations (e.g. ~93 F1 on SQuAD for BERT Whole-Word-Masking, ~88 F1 on RocStories for OpenAI GPT, ~18.3 perplexity on WikiText 103 for Transformer-XL, ~0.916 Peason R coefficient on STS-B for XLNet). You can find more details on the performances in the Examples section of the [documentation](https://huggingface.co/transformers/examples.html). These implementations have been tested on several datasets (see the example scripts) and should match the performances of the original implementations (e.g. ~93 F1 on SQuAD for BERT Whole-Word-Masking, ~88 F1 on RocStories for OpenAI GPT, ~18.3 perplexity on WikiText 103 for Transformer-XL, ~0.916 Peason R coefficient on STS-B for XLNet). You can find more details on the performances in the Examples section of the [documentation](https://huggingface.co/transformers/examples.html).
...@@ -167,7 +171,7 @@ import torch ...@@ -167,7 +171,7 @@ import torch
from transformers import * from transformers import *
# Transformers has a unified API # Transformers has a unified API
# for 8 transformer architectures and 30 pretrained weights. # for 10 transformer architectures and 30 pretrained weights.
# Model | Tokenizer | Pretrained weights shortcut # Model | Tokenizer | Pretrained weights shortcut
MODELS = [(BertModel, BertTokenizer, 'bert-base-uncased'), MODELS = [(BertModel, BertTokenizer, 'bert-base-uncased'),
(OpenAIGPTModel, OpenAIGPTTokenizer, 'openai-gpt'), (OpenAIGPTModel, OpenAIGPTTokenizer, 'openai-gpt'),
...@@ -177,7 +181,9 @@ MODELS = [(BertModel, BertTokenizer, 'bert-base-uncased'), ...@@ -177,7 +181,9 @@ MODELS = [(BertModel, BertTokenizer, 'bert-base-uncased'),
(XLNetModel, XLNetTokenizer, 'xlnet-base-cased'), (XLNetModel, XLNetTokenizer, 'xlnet-base-cased'),
(XLMModel, XLMTokenizer, 'xlm-mlm-enfr-1024'), (XLMModel, XLMTokenizer, 'xlm-mlm-enfr-1024'),
(DistilBertModel, DistilBertTokenizer, 'distilbert-base-uncased'), (DistilBertModel, DistilBertTokenizer, 'distilbert-base-uncased'),
(RobertaModel, RobertaTokenizer, 'roberta-base')] (RobertaModel, RobertaTokenizer, 'roberta-base'),
(XLMRobertaModel, XLMRobertaTokenizer, 'xlm-roberta-base'),
]
# To use TensorFlow 2.0 versions of the models, simply prefix the class names with 'TF', e.g. `TFRobertaModel` is the TF 2.0 counterpart of the PyTorch model `RobertaModel` # To use TensorFlow 2.0 versions of the models, simply prefix the class names with 'TF', e.g. `TFRobertaModel` is the TF 2.0 counterpart of the PyTorch model `RobertaModel`
...@@ -446,6 +452,76 @@ python ./examples/run_generation.py \ ...@@ -446,6 +452,76 @@ python ./examples/run_generation.py \
--repetition_penalty=1.2 \ --repetition_penalty=1.2 \
``` ```
## Quick tour of model sharing
New in `v2.2.2`: you can now upload and share your fine-tuned models with the community, using the <abbr title="Command-line interface">CLI</abbr> that's built-in to the library.
**First, create an account on [https://huggingface.co/join](https://huggingface.co/join)**. Then:
```shell
transformers-cli login
# log in using the same credentials as on huggingface.co
```
Upload your model:
```shell
transformers-cli upload ./path/to/pretrained_model/
# ^^ Upload folder containing weights/tokenizer/config
# saved via `.save_pretrained()`
transformers-cli upload ./config.json [--filename folder/foobar.json]
# ^^ Upload a single file
# (you can optionally override its filename, which can be nested inside a folder)
```
Your model will then be accessible through its identifier, a concatenation of your username and the folder name above:
```python
"username/model_name"
```
Anyone can load it from code:
```python
tokenizer = AutoTokenizer.from_pretrained("username/pretrained_model")
model = AutoModel.from_pretrained("username/pretrained_model")
```
Finally, list all your files on S3:
```shell
transformers-cli ls
# List all your S3 objects.
```
## Quick tour of pipelines
New in version `v2.3`: `Pipeline` are high-level objects which automatically handle tokenization, running your data through a transformers model
and outputting the result in a structured object.
You can create `Pipeline` objects for the following down-stream tasks:
- `feature-extraction`: Generates a tensor representation for the input sequence
- `ner`: Generates named entity mapping for each word in the input sequence.
- `sentiment-analysis`: Gives the polarity (positive / negative) of the whole input sequence.
- `question-answering`: Provided some context and a question refering to the context, it will extract the answer to the question
in the context.
```python
from transformers import pipeline
# Allocate a pipeline for sentiment-analysis
nlp = pipeline('sentiment-analysis')
nlp('We are very happy to include pipeline into the transformers repository.')
>>> {'label': 'POSITIVE', 'score': 0.99893874}
# Allocate a pipeline for question-answering
nlp = pipeline('question-answering')
nlp({
'question': 'What is the name of the repository ?',
'context': 'Pipeline have been included in the huggingface/transformers repository'
})
>>> {'score': 0.28756016668193496, 'start': 35, 'end': 59, 'answer': 'huggingface/transformers'}
```
## Migrating from pytorch-transformers to transformers ## Migrating from pytorch-transformers to transformers
Here is a quick summary of what you should take care of when migrating from `pytorch-transformers` to `transformers`. Here is a quick summary of what you should take care of when migrating from `pytorch-transformers` to `transformers`.
......
...@@ -26,7 +26,7 @@ author = u'huggingface' ...@@ -26,7 +26,7 @@ author = u'huggingface'
# The short X.Y version # The short X.Y version
version = u'' version = u''
# The full version, including alpha/beta/rc tags # The full version, including alpha/beta/rc tags
release = u'2.2.1' release = u'2.3.0'
# -- General configuration --------------------------------------------------- # -- General configuration ---------------------------------------------------
......
...@@ -50,6 +50,7 @@ The library currently contains PyTorch and Tensorflow implementations, pre-train ...@@ -50,6 +50,7 @@ The library currently contains PyTorch and Tensorflow implementations, pre-train
9. `CTRL <https://github.com/pytorch/fairseq/tree/master/examples/ctrl>`_ (from Salesforce), released together with the paper `CTRL: A Conditional Transformer Language Model for Controllable Generation <https://www.github.com/salesforce/ctrl>`_ by Nitish Shirish Keskar*, Bryan McCann*, Lav R. Varshney, Caiming Xiong and Richard Socher. 9. `CTRL <https://github.com/pytorch/fairseq/tree/master/examples/ctrl>`_ (from Salesforce), released together with the paper `CTRL: A Conditional Transformer Language Model for Controllable Generation <https://www.github.com/salesforce/ctrl>`_ by Nitish Shirish Keskar*, Bryan McCann*, Lav R. Varshney, Caiming Xiong and Richard Socher.
10. `CamemBERT <https://huggingface.co/transformers/model_doc/camembert.html>`_ (from FAIR, Inria, Sorbonne Université) released together with the paper `CamemBERT: a Tasty French Language Model <https://arxiv.org/abs/1911.03894>`_ by Louis Martin, Benjamin Muller, Pedro Javier Ortiz Suarez, Yoann Dupont, Laurent Romary, Eric Villemonte de la Clergerie, Djame Seddah, and Benoît Sagot. 10. `CamemBERT <https://huggingface.co/transformers/model_doc/camembert.html>`_ (from FAIR, Inria, Sorbonne Université) released together with the paper `CamemBERT: a Tasty French Language Model <https://arxiv.org/abs/1911.03894>`_ by Louis Martin, Benjamin Muller, Pedro Javier Ortiz Suarez, Yoann Dupont, Laurent Romary, Eric Villemonte de la Clergerie, Djame Seddah, and Benoît Sagot.
11. `ALBERT <https://github.com/google-research/ALBERT>`_ (from Google Research), released together with the paper a `ALBERT: A Lite BERT for Self-supervised Learning of Language Representations <https://arxiv.org/abs/1909.11942>`_ by Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma, Radu Soricut. 11. `ALBERT <https://github.com/google-research/ALBERT>`_ (from Google Research), released together with the paper a `ALBERT: A Lite BERT for Self-supervised Learning of Language Representations <https://arxiv.org/abs/1909.11942>`_ by Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma, Radu Soricut.
12. `XLM-RoBERTa <https://github.com/pytorch/fairseq/tree/master/examples/xlmr>`_ (from Facebook AI), released together with the paper `Unsupervised Cross-lingual Representation Learning at Scale <https://arxiv.org/abs/1911.02116>`_ by Alexis Conneau*, Kartikay Khandelwal*, Naman Goyal, Vishrav Chaudhary, Guillaume Wenzek, Francisco Guzmán, Edouard Grave, Myle Ott, Luke Zettlemoyer and Veselin Stoyanov.
.. toctree:: .. toctree::
:maxdepth: 2 :maxdepth: 2
...@@ -58,6 +59,7 @@ The library currently contains PyTorch and Tensorflow implementations, pre-train ...@@ -58,6 +59,7 @@ The library currently contains PyTorch and Tensorflow implementations, pre-train
installation installation
quickstart quickstart
pretrained_models pretrained_models
model_sharing
examples examples
notebooks notebooks
serialization serialization
......
...@@ -54,8 +54,7 @@ Additionally, the following method can be used to load values from a data file ...@@ -54,8 +54,7 @@ Additionally, the following method can be used to load values from a data file
Example usage Example usage
^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^
An example using these processors is given in the An example using these processors is given in the `run_glue.py <https://github.com/huggingface/pytorch-transformers/blob/master/examples/run_glue.py>`__ script.
`run_glue.py <https://github.com/huggingface/pytorch-transformers/blob/master/examples/run_glue.py>`__ script.
XNLI XNLI
...@@ -74,8 +73,81 @@ This library hosts the processor to load the XNLI data: ...@@ -74,8 +73,81 @@ This library hosts the processor to load the XNLI data:
Please note that since the gold labels are available on the test set, evaluation is performed on the test set. Please note that since the gold labels are available on the test set, evaluation is performed on the test set.
An example using these processors is given in the
`run_xnli.py <https://github.com/huggingface/pytorch-transformers/blob/master/examples/run_xnli.py>`__ script.
SQuAD
~~~~~~~~~~~~~~~~~~~~~
`The Stanford Question Answering Dataset (SQuAD) <https://rajpurkar.github.io/SQuAD-explorer//>`__ is a benchmark that evaluates
the performance of models on question answering. Two versions are available, v1.1 and v2.0. The first version (v1.1) was released together with the paper
`SQuAD: 100,000+ Questions for Machine Comprehension of Text <https://arxiv.org/abs/1606.05250>`__. The second version (v2.0) was released alongside
the paper `Know What You Don't Know: Unanswerable Questions for SQuAD <https://arxiv.org/abs/1806.03822>`__.
This library hosts a processor for each of the two versions:
Processors
^^^^^^^^^^^^^^^^^^^^^^^^^
Those processors are:
- :class:`~transformers.data.processors.utils.SquadV1Processor`
- :class:`~transformers.data.processors.utils.SquadV2Processor`
They both inherit from the abstract class :class:`~transformers.data.processors.utils.SquadProcessor`
.. autoclass:: transformers.data.processors.squad.SquadProcessor
:members:
Additionally, the following method can be used to convert SQuAD examples into :class:`~transformers.data.processors.utils.SquadFeatures`
that can be used as model inputs.
.. automethod:: transformers.data.processors.squad.squad_convert_examples_to_features
These processors as well as the aforementionned method can be used with files containing the data as well as with the `tensorflow_datasets` package.
Examples are given below.
Example usage Example usage
^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^
Here is an example using the processors as well as the conversion method using data files:
An example using these processors is given in the Example::
`run_xnli.py <https://github.com/huggingface/pytorch-transformers/blob/master/examples/run_xnli.py>`__ script.
\ No newline at end of file # Loading a V2 processor
processor = SquadV2Processor()
examples = processor.get_dev_examples(squad_v2_data_dir)
# Loading a V1 processor
processor = SquadV1Processor()
examples = processor.get_dev_examples(squad_v1_data_dir)
features = squad_convert_examples_to_features(
examples=examples,
tokenizer=tokenizer,
max_seq_length=max_seq_length,
doc_stride=args.doc_stride,
max_query_length=max_query_length,
is_training=not evaluate,
)
Using `tensorflow_datasets` is as easy as using a data file:
Example::
# tensorflow_datasets only handle Squad V1.
tfds_examples = tfds.load("squad")
examples = SquadV1Processor().get_examples_from_dataset(tfds_examples, evaluate=evaluate)
features = squad_convert_examples_to_features(
examples=examples,
tokenizer=tokenizer,
max_seq_length=max_seq_length,
doc_stride=args.doc_stride,
max_query_length=max_query_length,
is_training=not evaluate,
)
Another example using these processors is given in the
`run_squad.py <https://github.com/huggingface/transformers/blob/master/examples/run_squad.py>`__ script.
# Model upload and sharing
Starting with `v2.2.2`, you can now upload and share your fine-tuned models with the community, using the <abbr title="Command-line interface">CLI</abbr> that's built-in to the library.
**First, create an account on [https://huggingface.co/join](https://huggingface.co/join)**. Then:
```shell
transformers-cli login
# log in using the same credentials as on huggingface.co
```
Upload your model:
```shell
transformers-cli upload ./path/to/pretrained_model/
# ^^ Upload folder containing weights/tokenizer/config
# saved via `.save_pretrained()`
transformers-cli upload ./config.json [--filename folder/foobar.json]
# ^^ Upload a single file
# (you can optionally override its filename, which can be nested inside a folder)
```
Your model will then be accessible through its identifier, a concatenation of your username and the folder name above:
```python
"username/pretrained_model"
```
Anyone can load it from code:
```python
tokenizer = AutoTokenizer.from_pretrained("username/pretrained_model")
model = AutoModel.from_pretrained("username/pretrained_model")
```
Finally, list all your files on S3:
```shell
transformers-cli ls
# List all your S3 objects.
```
This diff is collapsed.
...@@ -219,4 +219,97 @@ sequence = tokenizer.decode(generated) ...@@ -219,4 +219,97 @@ sequence = tokenizer.decode(generated)
print(sequence) print(sequence)
``` ```
The model only requires a single token as input as all the previous tokens' key/value pairs are contained in the `past`. The model only requires a single token as input as all the previous tokens' key/value pairs are contained in the `past`.
\ No newline at end of file
### Model2Model example
Encoder-decoder architectures require two tokenized inputs: one for the encoder and the other one for the decoder. Let's assume that we want to use `Model2Model` for generative question answering, and start by tokenizing the question and answer that will be fed to the model.
```python
import torch
from transformers import BertTokenizer, Model2Model
# OPTIONAL: if you want to have more information on what's happening under the hood, activate the logger as follows
import logging
logging.basicConfig(level=logging.INFO)
# Load pre-trained model tokenizer (vocabulary)
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
# Encode the input to the encoder (the question)
question = "Who was Jim Henson?"
encoded_question = tokenizer.encode(question)
# Encode the input to the decoder (the answer)
answer = "Jim Henson was a puppeteer"
encoded_answer = tokenizer.encode(answer)
# Convert inputs to PyTorch tensors
question_tensor = torch.tensor([encoded_question])
answer_tensor = torch.tensor([encoded_answer])
```
Let's see how we can use `Model2Model` to get the value of the loss associated with this (question, answer) pair:
```python
# In order to compute the loss we need to provide language model
# labels (the token ids that the model should have produced) to
# the decoder.
lm_labels = encoded_answer
labels_tensor = torch.tensor([lm_labels])
# Load pre-trained model (weights)
model = Model2Model.from_pretrained('bert-base-uncased')
# Set the model in evaluation mode to deactivate the DropOut modules
# This is IMPORTANT to have reproducible results during evaluation!
model.eval()
# If you have a GPU, put everything on cuda
question_tensor = question_tensor.to('cuda')
answer_tensor = answer_tensor.to('cuda')
labels_tensor = labels_tensor.to('cuda')
model.to('cuda')
# Predict hidden states features for each layer
with torch.no_grad():
# See the models docstrings for the detail of the inputs
outputs = model(question_tensor, answer_tensor, decoder_lm_labels=labels_tensor)
# Transformers models always output tuples.
# See the models docstrings for the detail of all the outputs
# In our case, the first element is the value of the LM loss
lm_loss = outputs[0]
```
This loss can be used to fine-tune `Model2Model` on the question answering task. Assuming that we fine-tuned the model, let us now see how to generate an answer:
```python
# Let's re-use the previous question
question = "Who was Jim Henson?"
encoded_question = tokenizer.encode(question)
question_tensor = torch.tensor([encoded_question])
# This time we try to generate the answer, so we start with an empty sequence
answer = "[CLS]"
encoded_answer = tokenizer.encode(answer, add_special_tokens=False)
answer_tensor = torch.tensor([encoded_answer])
# Load pre-trained model (weights)
model = Model2Model.from_pretrained('fine-tuned-weights')
model.eval()
# If you have a GPU, put everything on cuda
question_tensor = encoded_question.to('cuda')
answer_tensor = encoded_answer.to('cuda')
model.to('cuda')
# Predict all tokens
with torch.no_grad():
outputs = model(question_tensor, answer_tensor)
predictions = outputs[0]
# confirm we were able to predict 'jim'
predicted_index = torch.argmax(predictions[0, -1]).item()
predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
assert predicted_token == 'jim'
```
...@@ -24,8 +24,6 @@ pip install -r ./examples/requirements.txt ...@@ -24,8 +24,6 @@ pip install -r ./examples/requirements.txt
| [Multiple Choice](#multiple-choice) | Examples running BERT/XLNet/RoBERTa on the SWAG/RACE/ARC tasks. | [Multiple Choice](#multiple-choice) | Examples running BERT/XLNet/RoBERTa on the SWAG/RACE/ARC tasks.
| [Named Entity Recognition](#named-entity-recognition) | Using BERT for Named Entity Recognition (NER) on the CoNLL 2003 dataset, examples with distributed training. | | [Named Entity Recognition](#named-entity-recognition) | Using BERT for Named Entity Recognition (NER) on the CoNLL 2003 dataset, examples with distributed training. |
| [XNLI](#xnli) | Examples running BERT/XLM on the XNLI benchmark. | | [XNLI](#xnli) | Examples running BERT/XLM on the XNLI benchmark. |
| [Abstractive summarization](#abstractive-summarization) | Using the BertAbs
model finetuned on the CNN/DailyMail dataset to generate summaries. |
## TensorFlow 2.0 Bert models on GLUE ## TensorFlow 2.0 Bert models on GLUE
...@@ -45,7 +43,7 @@ Quick benchmarks from the script (no other modifications): ...@@ -45,7 +43,7 @@ Quick benchmarks from the script (no other modifications):
| Titan V | AMP | 26s | 0.8281/0.8568/0.8411 | | Titan V | AMP | 26s | 0.8281/0.8568/0.8411 |
| V100 | FP32 | 35s | 0.8646/0.8359/0.8464 | | V100 | FP32 | 35s | 0.8646/0.8359/0.8464 |
| V100 | AMP | 22s | 0.8646/0.8385/0.8411 | | V100 | AMP | 22s | 0.8646/0.8385/0.8411 |
| 1080 Ti | FP32 | 55s | - | | 1080 Ti | FP32 | 55s | - |
Mixed precision (AMP) reduces the training time considerably for the same hardware and hyper-parameters (same batch size was used). Mixed precision (AMP) reduces the training time considerably for the same hardware and hyper-parameters (same batch size was used).
...@@ -359,9 +357,9 @@ eval_loss = 0.44457291918821606 ...@@ -359,9 +357,9 @@ eval_loss = 0.44457291918821606
Based on the script [`run_squad.py`](https://github.com/huggingface/transformers/blob/master/examples/run_squad.py). Based on the script [`run_squad.py`](https://github.com/huggingface/transformers/blob/master/examples/run_squad.py).
#### Fine-tuning on SQuAD #### Fine-tuning BERT on SQuAD1.0
This example code fine-tunes BERT on the SQuAD dataset. It runs in 24 min (with BERT-base) or 68 min (with BERT-large) This example code fine-tunes BERT on the SQuAD1.0 dataset. It runs in 24 min (with BERT-base) or 68 min (with BERT-large)
on a single tesla V100 16GB. The data for SQuAD can be downloaded with the following links and should be saved in a on a single tesla V100 16GB. The data for SQuAD can be downloaded with the following links and should be saved in a
$SQUAD_DIR directory. $SQUAD_DIR directory.
...@@ -369,6 +367,12 @@ $SQUAD_DIR directory. ...@@ -369,6 +367,12 @@ $SQUAD_DIR directory.
* [dev-v1.1.json](https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json) * [dev-v1.1.json](https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json)
* [evaluate-v1.1.py](https://github.com/allenai/bi-att-flow/blob/master/squad/evaluate-v1.1.py) * [evaluate-v1.1.py](https://github.com/allenai/bi-att-flow/blob/master/squad/evaluate-v1.1.py)
And for SQuAD2.0, you need to download:
- [train-v2.0.json](https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json)
- [dev-v2.0.json](https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json)
- [evaluate-v2.0.py](https://worksheets.codalab.org/rest/bundles/0x6b567e1cf2e041ec80d7098f031c5c9e/contents/blob/)
```bash ```bash
export SQUAD_DIR=/path/to/SQUAD export SQUAD_DIR=/path/to/SQUAD
...@@ -398,7 +402,7 @@ exact_match = 81.22 ...@@ -398,7 +402,7 @@ exact_match = 81.22
#### Distributed training #### Distributed training
Here is an example using distributed training on 8 V100 GPUs and Bert Whole Word Masking uncased model to reach a F1 > 93 on SQuAD: Here is an example using distributed training on 8 V100 GPUs and Bert Whole Word Masking uncased model to reach a F1 > 93 on SQuAD1.0:
```bash ```bash
python -m torch.distributed.launch --nproc_per_node=8 run_squad.py \ python -m torch.distributed.launch --nproc_per_node=8 run_squad.py \
...@@ -430,7 +434,9 @@ This fine-tuned model is available as a checkpoint under the reference ...@@ -430,7 +434,9 @@ This fine-tuned model is available as a checkpoint under the reference
#### Fine-tuning XLNet on SQuAD #### Fine-tuning XLNet on SQuAD
This example code fine-tunes XLNet on the SQuAD dataset. See above to download the data for SQuAD . This example code fine-tunes XLNet on both SQuAD1.0 and SQuAD2.0 dataset. See above to download the data for SQuAD .
##### Command for SQuAD1.0:
```bash ```bash
export SQUAD_DIR=/path/to/SQUAD export SQUAD_DIR=/path/to/SQUAD
...@@ -453,7 +459,32 @@ python /data/home/hlu/transformers/examples/run_squad.py \ ...@@ -453,7 +459,32 @@ python /data/home/hlu/transformers/examples/run_squad.py \
--save_steps 5000 --save_steps 5000
``` ```
Training with the previously defined hyper-parameters yields the following results: ##### Command for SQuAD2.0:
```bash
export SQUAD_DIR=/path/to/SQUAD
python run_squad.py \
--model_type xlnet \
--model_name_or_path xlnet-large-cased \
--do_train \
--do_eval \
--version_2_with_negative \
--train_file $SQUAD_DIR/train-v2.0.json \
--predict_file $SQUAD_DIR/dev-v2.0.json \
--learning_rate 3e-5 \
--num_train_epochs 4 \
--max_seq_length 384 \
--doc_stride 128 \
--output_dir ./wwm_cased_finetuned_squad/ \
--per_gpu_eval_batch_size=2 \
--per_gpu_train_batch_size=2 \
--save_steps 5000
```
Larger batch size may improve the performance while costing more memory.
##### Results for SQuAD1.0 with the previously defined hyper-parameters:
```python ```python
{ {
...@@ -466,10 +497,28 @@ Training with the previously defined hyper-parameters yields the following resul ...@@ -466,10 +497,28 @@ Training with the previously defined hyper-parameters yields the following resul
} }
``` ```
##### Results for SQuAD2.0 with the previously defined hyper-parameters:
```python
{
"exact": 80.4177545691906,
"f1": 84.07154997729623,
"total": 11873,
"HasAns_exact": 76.73751686909581,
"HasAns_f1": 84.05558584352873,
"HasAns_total": 5928,
"NoAns_exact": 84.0874684608915,
"NoAns_f1": 84.0874684608915,
"NoAns_total": 5945
}
```
## Named Entity Recognition ## Named Entity Recognition
Based on the scripts [`run_ner.py`](https://github.com/huggingface/transformers/blob/master/examples/run_ner.py) for Pytorch and Based on the scripts [`run_ner.py`](https://github.com/huggingface/transformers/blob/master/examples/run_ner.py) for Pytorch and
[`run_tf_ner.py`(https://github.com/huggingface/transformers/blob/master/examples/run_tf_ner.py)] for Tensorflow 2. [`run_tf_ner.py`](https://github.com/huggingface/transformers/blob/master/examples/run_tf_ner.py) for Tensorflow 2.
This example fine-tune Bert Multilingual on GermEval 2014 (German NER). This example fine-tune Bert Multilingual on GermEval 2014 (German NER).
Details and results for the fine-tuning provided by @stefan-it. Details and results for the fine-tuning provided by @stefan-it.
...@@ -646,34 +695,6 @@ micro avg 0.8722 0.8774 0.8748 13869 ...@@ -646,34 +695,6 @@ micro avg 0.8722 0.8774 0.8748 13869
macro avg 0.8712 0.8774 0.8740 13869 macro avg 0.8712 0.8774 0.8740 13869
``` ```
## Abstractive summarization
Based on the script
[`run_summarization_finetuning.py`](https://github.com/huggingface/transformers/blob/master/examples/run_summarization_finetuning.py).
Before running this script you should download **both** CNN and Daily Mail
datasets from [Kyunghyun Cho's website](https://cs.nyu.edu/~kcho/DMQA/) (the
links next to "Stories") in the same folder. Then uncompress the archives by running:
```bash
tar -xvf cnn_stories.tgz && tar -xvf dailymail_stories.tgz
```
note that the finetuning script **will not work** if you do not download both
datasets. We will refer as `$DATA_PATH` the path to where you uncompressed both
archive.
```bash
export DATA_PATH=/path/to/dataset/
python run_summarization_finetuning.py \
--output_dir=output \
--model_type=bert2bert \
--model_name_or_path=bert2bert \
--do_train \
--data_path=$DATA_PATH \
```
## XNLI ## XNLI
Based on the script [`run_xnli.py`](https://github.com/huggingface/transformers/blob/master/examples/run_xnli.py). Based on the script [`run_xnli.py`](https://github.com/huggingface/transformers/blob/master/examples/run_xnli.py).
......
...@@ -20,14 +20,10 @@ from __future__ import absolute_import, division, print_function, unicode_litera ...@@ -20,14 +20,10 @@ from __future__ import absolute_import, division, print_function, unicode_litera
import argparse import argparse
import logging import logging
from tqdm import trange
import torch import torch
import torch.nn.functional as F
import numpy as np import numpy as np
from transformers import GPT2Config, OpenAIGPTConfig, XLNetConfig, TransfoXLConfig, XLMConfig, CTRLConfig
from transformers import GPT2LMHeadModel, GPT2Tokenizer from transformers import GPT2LMHeadModel, GPT2Tokenizer
from transformers import OpenAIGPTLMHeadModel, OpenAIGPTTokenizer from transformers import OpenAIGPTLMHeadModel, OpenAIGPTTokenizer
from transformers import XLNetLMHeadModel, XLNetTokenizer from transformers import XLNetLMHeadModel, XLNetTokenizer
...@@ -36,22 +32,22 @@ from transformers import CTRLLMHeadModel, CTRLTokenizer ...@@ -36,22 +32,22 @@ from transformers import CTRLLMHeadModel, CTRLTokenizer
from transformers import XLMWithLMHeadModel, XLMTokenizer from transformers import XLMWithLMHeadModel, XLMTokenizer
logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s', logging.basicConfig(
datefmt = '%m/%d/%Y %H:%M:%S', format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
level = logging.INFO) datefmt="%m/%d/%Y %H:%M:%S",
level=logging.INFO,
)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
MAX_LENGTH = int(10000) # Hardcoded max length to avoid infinite loop MAX_LENGTH = int(10000) # Hardcoded max length to avoid infinite loop
ALL_MODELS = sum((tuple(conf.pretrained_config_archive_map.keys()) for conf in (GPT2Config, OpenAIGPTConfig, XLNetConfig, TransfoXLConfig, XLMConfig, CTRLConfig)), ())
MODEL_CLASSES = { MODEL_CLASSES = {
'gpt2': (GPT2LMHeadModel, GPT2Tokenizer), "gpt2": (GPT2LMHeadModel, GPT2Tokenizer),
'ctrl': (CTRLLMHeadModel, CTRLTokenizer), "ctrl": (CTRLLMHeadModel, CTRLTokenizer),
'openai-gpt': (OpenAIGPTLMHeadModel, OpenAIGPTTokenizer), "openai-gpt": (OpenAIGPTLMHeadModel, OpenAIGPTTokenizer),
'xlnet': (XLNetLMHeadModel, XLNetTokenizer), "xlnet": (XLNetLMHeadModel, XLNetTokenizer),
'transfo-xl': (TransfoXLLMHeadModel, TransfoXLTokenizer), "transfo-xl": (TransfoXLLMHeadModel, TransfoXLTokenizer),
'xlm': (XLMWithLMHeadModel, XLMTokenizer), "xlm": (XLMWithLMHeadModel, XLMTokenizer),
} }
# Padding text to help Transformer-XL and XLNet with short prompts as proposed by Aman Rusia # Padding text to help Transformer-XL and XLNet with short prompts as proposed by Aman Rusia
...@@ -75,81 +71,79 @@ def set_seed(args): ...@@ -75,81 +71,79 @@ def set_seed(args):
if args.n_gpu > 0: if args.n_gpu > 0:
torch.cuda.manual_seed_all(args.seed) torch.cuda.manual_seed_all(args.seed)
#
# Functions to prepare models' input
#
def prepare_ctrl_input(args, _, tokenizer, prompt_text):
if args.temperature > 0.7:
logger.info(
"CTRL typically works better with lower temperatures (and lower top_k)."
)
encoded_prompt = tokenizer.encode(prompt_text, add_special_tokens=False)
if not any(encoded_prompt[0] == x for x in tokenizer.control_codes.values()):
logger.info(
"WARNING! You are not starting your generation from a control code so you won't get good results"
)
return prompt_text
def prepare_xlm_input(args, model, tokenizer, prompt_text):
# kwargs = {"language": None, "mask_token_id": None}
# Set the language
use_lang_emb = hasattr(model.config, "use_lang_emb") and model.config.use_lang_emb
if hasattr(model.config, "lang2id") and use_lang_emb:
available_languages = model.config.lang2id.keys()
if args.xlm_language in available_languages:
language = args.xlm_language
else:
language = None
while language not in available_languages:
language = input(
"Using XLM. Select language in "
+ str(list(available_languages))
+ " >>> "
)
# kwargs["language"] = tokenizer.lang2id[language]
# TODO fix mask_token_id setup when configurations will be synchronized between models and tokenizers
# XLM masked-language modeling (MLM) models need masked token
# is_xlm_mlm = "mlm" in args.model_name_or_path
# if is_xlm_mlm:
# kwargs["mask_token_id"] = tokenizer.mask_token_id
return prompt_text
def prepare_xlnet_input(args, _, tokenizer, prompt_text):
prompt_text = (args.padding_text if args.padding_text else PADDING_TEXT) + prompt_text
return prompt_text, {}
def top_k_top_p_filtering(logits, top_k=0, top_p=0.0, filter_value=-float('Inf')): def prepare_transfoxl_input(args, _, tokenizer, prompt_text):
""" Filter a distribution of logits using top-k and/or nucleus (top-p) filtering prompt_text = (args.padding_text if args.padding_text else PADDING_TEXT) + prompt_text
Args: return prompt_text, {}
logits: logits distribution shape (batch size x vocabulary size)
top_k > 0: keep only top k tokens with highest probability (top-k filtering).
top_p > 0.0: keep the top tokens with cumulative probability >= top_p (nucleus filtering). PREPROCESSING_FUNCTIONS = {
Nucleus filtering is described in Holtzman et al. (http://arxiv.org/abs/1904.09751) "ctrl": prepare_ctrl_input,
From: https://gist.github.com/thomwolf/1a5a29f6962089e871b94cbd09daf317 "xlm": prepare_xlm_input,
""" "xlnet": prepare_xlnet_input,
top_k = min(top_k, logits.size(-1)) # Safety check "transfo-xl": prepare_transfoxl_input,
if top_k > 0: }
# Remove all tokens with a probability less than the last token of the top-k
indices_to_remove = logits < torch.topk(logits, top_k)[0][..., -1, None]
logits[indices_to_remove] = filter_value def adjust_length_to_model(length, max_sequence_length):
if length < 0 and max_sequence_length > 0:
if top_p > 0.0: length = max_sequence_length
sorted_logits, sorted_indices = torch.sort(logits, descending=True) elif 0 < max_sequence_length < length:
cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1) length = max_sequence_length # No generation bigger than model size
elif length < 0:
# Remove tokens with cumulative probability above the threshold length = MAX_LENGTH # avoid infinite loop
sorted_indices_to_remove = cumulative_probs > top_p return length
# Shift the indices to the right to keep also the first token above the threshold
sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
sorted_indices_to_remove[..., 0] = 0
# scatter sorted tensors to original indexing
indices_to_remove = sorted_indices_to_remove.scatter(dim=1, index=sorted_indices, src=sorted_indices_to_remove)
logits[indices_to_remove] = filter_value
return logits
def sample_sequence(model, length, context, num_samples=1, temperature=1, top_k=0, top_p=0.0, repetition_penalty=1.0,
is_xlnet=False, is_xlm_mlm=False, xlm_mask_token=None, xlm_lang=None, device='cpu'):
context = torch.tensor(context, dtype=torch.long, device=device)
context = context.unsqueeze(0).repeat(num_samples, 1)
generated = context
with torch.no_grad():
for _ in trange(length):
inputs = {'input_ids': generated}
if is_xlnet:
# XLNet is a direct (predict same token, not next token) and bi-directional model by default
# => need one additional dummy token in the input (will be masked), attention mask and target mapping (see model docstring)
input_ids = torch.cat((generated, torch.zeros((1, 1), dtype=torch.long, device=device)), dim=1)
perm_mask = torch.zeros((1, input_ids.shape[1], input_ids.shape[1]), dtype=torch.float, device=device)
perm_mask[:, :, -1] = 1.0 # Previous tokens don't see last token
target_mapping = torch.zeros((1, 1, input_ids.shape[1]), dtype=torch.float, device=device)
target_mapping[0, 0, -1] = 1.0 # predict last token
inputs = {'input_ids': input_ids, 'perm_mask': perm_mask, 'target_mapping': target_mapping}
if is_xlm_mlm and xlm_mask_token:
# XLM MLM models are direct models (predict same token, not next token)
# => need one additional dummy token in the input (will be masked and guessed)
input_ids = torch.cat((generated, torch.full((1, 1), xlm_mask_token, dtype=torch.long, device=device)), dim=1)
inputs = {'input_ids': input_ids}
if xlm_lang is not None:
inputs["langs"] = torch.tensor([xlm_lang] * inputs["input_ids"].shape[1], device=device).view(1, -1)
outputs = model(**inputs) # Note: we could also use 'past' with GPT-2/Transfo-XL/XLNet/CTRL (cached hidden-states)
next_token_logits = outputs[0][:, -1, :] / (temperature if temperature > 0 else 1.)
# repetition penalty from CTRL (https://arxiv.org/abs/1909.05858)
for i in range(num_samples):
for _ in set(generated[i].tolist()):
next_token_logits[i, _] /= repetition_penalty
filtered_logits = top_k_top_p_filtering(next_token_logits, top_k=top_k, top_p=top_p)
if temperature == 0: # greedy sampling:
next_token = torch.argmax(filtered_logits, dim=-1).unsqueeze(-1)
else:
next_token = torch.multinomial(F.softmax(filtered_logits, dim=-1), num_samples=1)
generated = torch.cat((generated, next_token), dim=1)
return generated
def main(): def main():
...@@ -157,104 +151,76 @@ def main(): ...@@ -157,104 +151,76 @@ def main():
parser.add_argument("--model_type", default=None, type=str, required=True, parser.add_argument("--model_type", default=None, type=str, required=True,
help="Model type selected in the list: " + ", ".join(MODEL_CLASSES.keys())) help="Model type selected in the list: " + ", ".join(MODEL_CLASSES.keys()))
parser.add_argument("--model_name_or_path", default=None, type=str, required=True, parser.add_argument("--model_name_or_path", default=None, type=str, required=True,
help="Path to pre-trained model or shortcut name selected in the list: " + ", ".join(ALL_MODELS)) help="Path to pre-trained model or shortcut name selected in the list: " + ", ".join(MODEL_CLASSES.keys()))
parser.add_argument("--prompt", type=str, default="") parser.add_argument("--prompt", type=str, default="")
parser.add_argument("--padding_text", type=str, default="")
parser.add_argument("--xlm_lang", type=str, default="", help="Optional language when used with the XLM model.")
parser.add_argument("--length", type=int, default=20) parser.add_argument("--length", type=int, default=20)
parser.add_argument("--num_samples", type=int, default=1) parser.add_argument("--stop_token", type=str, default=None, help="Token at which text generation is stopped")
parser.add_argument("--temperature", type=float, default=1.0,
help="temperature of 0 implies greedy sampling") parser.add_argument("--temperature", type=float, default=1.0, help="temperature of 1.0 has no effect, lower tend toward greedy sampling")
parser.add_argument("--repetition_penalty", type=float, default=1.0, parser.add_argument("--repetition_penalty", type=float, default=1.0, help="primarily useful for CTRL model; in that case, use 1.2")
help="primarily useful for CTRL model; in that case, use 1.2") parser.add_argument("--k", type=int, default=0)
parser.add_argument("--top_k", type=int, default=0) parser.add_argument("--p", type=float, default=0.9)
parser.add_argument("--top_p", type=float, default=0.9)
parser.add_argument("--no_cuda", action='store_true', parser.add_argument("--padding_text", type=str, default="", help="Padding text for Transfo-XL and XLNet.")
help="Avoid using CUDA when available") parser.add_argument("--xlm_language", type=str, default="", help="Optional language when used with the XLM model.")
parser.add_argument('--seed', type=int, default=42,
help="random seed for initialization") parser.add_argument("--seed", type=int, default=42, help="random seed for initialization")
parser.add_argument('--stop_token', type=str, default=None, parser.add_argument("--no_cuda", action="store_true", help="Avoid using CUDA when available")
help="Token at which text generation is stopped")
args = parser.parse_args() args = parser.parse_args()
args.device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") args.device = torch.device(
"cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu"
)
args.n_gpu = torch.cuda.device_count() args.n_gpu = torch.cuda.device_count()
set_seed(args) set_seed(args)
args.model_type = args.model_type.lower() # Initialize the model and tokenizer
model_class, tokenizer_class = MODEL_CLASSES[args.model_type] try:
args.model_type = args.model_type.lower()
model_class, tokenizer_class = MODEL_CLASSES[args.model_type]
except KeyError:
raise KeyError(
"the model {} you specified is not supported. You are welcome to add it and open a PR :)"
)
tokenizer = tokenizer_class.from_pretrained(args.model_name_or_path) tokenizer = tokenizer_class.from_pretrained(args.model_name_or_path)
model = model_class.from_pretrained(args.model_name_or_path) model = model_class.from_pretrained(args.model_name_or_path)
model.to(args.device) model.to(args.device)
model.eval()
if args.length < 0 and model.config.max_position_embeddings > 0:
args.length = model.config.max_position_embeddings
elif 0 < model.config.max_position_embeddings < args.length:
args.length = model.config.max_position_embeddings # No generation bigger than model size
elif args.length < 0:
args.length = MAX_LENGTH # avoid infinite loop
args.length = adjust_length_to_model(
args.length, max_sequence_length=model.config.max_position_embeddings
)
logger.info(args) logger.info(args)
if args.model_type in ["ctrl"]:
if args.temperature > 0.7:
logger.info('CTRL typically works better with lower temperatures (and lower top_k).')
while True:
xlm_lang = None
# XLM Language usage detailed in the issues #1414
if args.model_type in ["xlm"] and hasattr(tokenizer, 'lang2id') and hasattr(model.config, 'use_lang_emb') \
and model.config.use_lang_emb:
if args.xlm_lang:
language = args.xlm_lang
else:
language = None
while language not in tokenizer.lang2id.keys():
language = input("Using XLM. Select language in " + str(list(tokenizer.lang2id.keys())) + " >>> ")
xlm_lang = tokenizer.lang2id[language]
# XLM masked-language modeling (MLM) models need masked token (see details in sample_sequence)
is_xlm_mlm = args.model_type in ["xlm"] and 'mlm' in args.model_name_or_path
if is_xlm_mlm:
xlm_mask_token = tokenizer.mask_token_id
else:
xlm_mask_token = None
raw_text = args.prompt if args.prompt else input("Model prompt >>> ")
if args.model_type in ["transfo-xl", "xlnet"]:
# Models with memory likes to have a long prompt for short inputs.
raw_text = (args.padding_text if args.padding_text else PADDING_TEXT) + raw_text
context_tokens = tokenizer.encode(raw_text, add_special_tokens=False)
if args.model_type == "ctrl":
if not any(context_tokens[0] == x for x in tokenizer.control_codes.values()):
logger.info("WARNING! You are not starting your generation from a control code so you won't get good results")
out = sample_sequence(
model=model,
context=context_tokens,
num_samples=args.num_samples,
length=args.length,
temperature=args.temperature,
top_k=args.top_k,
top_p=args.top_p,
repetition_penalty=args.repetition_penalty,
is_xlnet=bool(args.model_type == "xlnet"),
is_xlm_mlm=is_xlm_mlm,
xlm_mask_token=xlm_mask_token,
xlm_lang=xlm_lang,
device=args.device,
)
out = out[:, len(context_tokens):].tolist()
for o in out:
text = tokenizer.decode(o, clean_up_tokenization_spaces=True)
text = text[: text.find(args.stop_token) if args.stop_token else None]
print(text) prompt_text = args.prompt if args.prompt else input("Model prompt >>> ")
# Different models need different input formatting and/or extra arguments
requires_preprocessing = args.model_type in PREPROCESSING_FUNCTIONS.keys()
if requires_preprocessing:
prepare_input = PREPROCESSING_FUNCTIONS.get(args.model_type)
prompt_text = prepare_input(args, model, tokenizer, prompt_text)
encoded_prompt = tokenizer.encode(prompt_text, add_special_tokens=False, return_tensors='pt')
output_sequences = model.generate(
input_ids=encoded_prompt,
max_length=args.length,
temperature=args.temperature,
top_k=args.k,
top_p=args.p,
repetition_penalty=args.repetition_penalty,
)
# Batch size == 1. to add more examples please use num_return_sequences > 1
generated_sequence = output_sequences[0].tolist()
text = tokenizer.decode(generated_sequence, clean_up_tokenization_spaces=True)
text = text[: t.find(args.stop_token) if args.stop_token else None]
print(text)
if args.prompt:
break
return text return text
if __name__ == '__main__': if __name__ == "__main__":
main() main()
...@@ -52,6 +52,9 @@ from transformers import (WEIGHTS_NAME, BertConfig, ...@@ -52,6 +52,9 @@ from transformers import (WEIGHTS_NAME, BertConfig,
AlbertConfig, AlbertConfig,
AlbertForSequenceClassification, AlbertForSequenceClassification,
AlbertTokenizer, AlbertTokenizer,
XLMRobertaConfig,
XLMRobertaForSequenceClassification,
XLMRobertaTokenizer,
) )
from transformers import AdamW, get_linear_schedule_with_warmup from transformers import AdamW, get_linear_schedule_with_warmup
...@@ -72,7 +75,8 @@ MODEL_CLASSES = { ...@@ -72,7 +75,8 @@ MODEL_CLASSES = {
'xlm': (XLMConfig, XLMForSequenceClassification, XLMTokenizer), 'xlm': (XLMConfig, XLMForSequenceClassification, XLMTokenizer),
'roberta': (RobertaConfig, RobertaForSequenceClassification, RobertaTokenizer), 'roberta': (RobertaConfig, RobertaForSequenceClassification, RobertaTokenizer),
'distilbert': (DistilBertConfig, DistilBertForSequenceClassification, DistilBertTokenizer), 'distilbert': (DistilBertConfig, DistilBertForSequenceClassification, DistilBertTokenizer),
'albert': (AlbertConfig, AlbertForSequenceClassification, AlbertTokenizer) 'albert': (AlbertConfig, AlbertForSequenceClassification, AlbertTokenizer),
'xlmroberta': (XLMRobertaConfig, XLMRobertaForSequenceClassification, XLMRobertaTokenizer),
} }
...@@ -304,9 +308,9 @@ def load_and_cache_examples(args, task, tokenizer, evaluate=False): ...@@ -304,9 +308,9 @@ def load_and_cache_examples(args, task, tokenizer, evaluate=False):
else: else:
logger.info("Creating features from dataset file at %s", args.data_dir) logger.info("Creating features from dataset file at %s", args.data_dir)
label_list = processor.get_labels() label_list = processor.get_labels()
if task in ['mnli', 'mnli-mm'] and args.model_type in ['roberta']: if task in ['mnli', 'mnli-mm'] and args.model_type in ['roberta', 'xlmroberta']:
# HACK(label indices are swapped in RoBERTa pretrained model) # HACK(label indices are swapped in RoBERTa pretrained model)
label_list[1], label_list[2] = label_list[2], label_list[1] label_list[1], label_list[2] = label_list[2], label_list[1]
examples = processor.get_dev_examples(args.data_dir) if evaluate else processor.get_train_examples(args.data_dir) examples = processor.get_dev_examples(args.data_dir) if evaluate else processor.get_train_examples(args.data_dir)
features = convert_examples_to_features(examples, features = convert_examples_to_features(examples,
tokenizer, tokenizer,
...@@ -380,7 +384,7 @@ def main(): ...@@ -380,7 +384,7 @@ def main():
parser.add_argument("--learning_rate", default=5e-5, type=float, parser.add_argument("--learning_rate", default=5e-5, type=float,
help="The initial learning rate for Adam.") help="The initial learning rate for Adam.")
parser.add_argument("--weight_decay", default=0.0, type=float, parser.add_argument("--weight_decay", default=0.0, type=float,
help="Weight deay if we apply some.") help="Weight decay if we apply some.")
parser.add_argument("--adam_epsilon", default=1e-8, type=float, parser.add_argument("--adam_epsilon", default=1e-8, type=float,
help="Epsilon for Adam optimizer.") help="Epsilon for Adam optimizer.")
parser.add_argument("--max_grad_norm", default=1.0, type=float, parser.add_argument("--max_grad_norm", default=1.0, type=float,
......
...@@ -430,7 +430,7 @@ def main(): ...@@ -430,7 +430,7 @@ def main():
parser.add_argument("--learning_rate", default=5e-5, type=float, parser.add_argument("--learning_rate", default=5e-5, type=float,
help="The initial learning rate for Adam.") help="The initial learning rate for Adam.")
parser.add_argument("--weight_decay", default=0.0, type=float, parser.add_argument("--weight_decay", default=0.0, type=float,
help="Weight deay if we apply some.") help="Weight decay if we apply some.")
parser.add_argument("--adam_epsilon", default=1e-8, type=float, parser.add_argument("--adam_epsilon", default=1e-8, type=float,
help="Epsilon for Adam optimizer.") help="Epsilon for Adam optimizer.")
parser.add_argument("--max_grad_norm", default=1.0, type=float, parser.add_argument("--max_grad_norm", default=1.0, type=float,
......
...@@ -38,11 +38,13 @@ from transformers import WEIGHTS_NAME, BertConfig, BertForTokenClassification, B ...@@ -38,11 +38,13 @@ from transformers import WEIGHTS_NAME, BertConfig, BertForTokenClassification, B
from transformers import RobertaConfig, RobertaForTokenClassification, RobertaTokenizer from transformers import RobertaConfig, RobertaForTokenClassification, RobertaTokenizer
from transformers import DistilBertConfig, DistilBertForTokenClassification, DistilBertTokenizer from transformers import DistilBertConfig, DistilBertForTokenClassification, DistilBertTokenizer
from transformers import CamembertConfig, CamembertForTokenClassification, CamembertTokenizer from transformers import CamembertConfig, CamembertForTokenClassification, CamembertTokenizer
from transformers import XLMRobertaConfig, XLMRobertaForTokenClassification, XLMRobertaTokenizer
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
ALL_MODELS = sum( ALL_MODELS = sum(
(tuple(conf.pretrained_config_archive_map.keys()) for conf in (BertConfig, RobertaConfig, DistilBertConfig)), (tuple(conf.pretrained_config_archive_map.keys()) for conf in (BertConfig, RobertaConfig, DistilBertConfig,
CamembertConfig, XLMRobertaConfig)),
()) ())
MODEL_CLASSES = { MODEL_CLASSES = {
...@@ -50,6 +52,7 @@ MODEL_CLASSES = { ...@@ -50,6 +52,7 @@ MODEL_CLASSES = {
"roberta": (RobertaConfig, RobertaForTokenClassification, RobertaTokenizer), "roberta": (RobertaConfig, RobertaForTokenClassification, RobertaTokenizer),
"distilbert": (DistilBertConfig, DistilBertForTokenClassification, DistilBertTokenizer), "distilbert": (DistilBertConfig, DistilBertForTokenClassification, DistilBertTokenizer),
"camembert": (CamembertConfig, CamembertForTokenClassification, CamembertTokenizer), "camembert": (CamembertConfig, CamembertForTokenClassification, CamembertTokenizer),
"xlmroberta": (XLMRobertaConfig, XLMRobertaForTokenClassification, XLMRobertaTokenizer),
} }
......
This diff is collapsed.
...@@ -29,7 +29,7 @@ And move all the stories to the same folder. We will refer as `$DATA_PATH` the p ...@@ -29,7 +29,7 @@ And move all the stories to the same folder. We will refer as `$DATA_PATH` the p
python run_summarization.py \ python run_summarization.py \
--documents_dir $DATA_PATH \ --documents_dir $DATA_PATH \
--summaries_output_dir $SUMMARIES_PATH \ # optional --summaries_output_dir $SUMMARIES_PATH \ # optional
--to_cpu false \ --no_cuda false \
--batch_size 4 \ --batch_size 4 \
--min_length 50 \ --min_length 50 \
--max_length 200 \ --max_length 200 \
...@@ -39,7 +39,7 @@ python run_summarization.py \ ...@@ -39,7 +39,7 @@ python run_summarization.py \
--compute_rouge true --compute_rouge true
``` ```
The scripts executes on GPU if one is available and if `to_cpu` is not set to `true`. Inference on multiple GPUs is not suported yet. The ROUGE scores will be displayed in the console at the end of evaluation and written in a `rouge_scores.txt` file. The script takes 30 hours to compute with a single Tesla V100 GPU and a batch size of 10 (300,000 texts to summarize). The scripts executes on GPU if one is available and if `no_cuda` is not set to `true`. Inference on multiple GPUs is not suported yet. The ROUGE scores will be displayed in the console at the end of evaluation and written in a `rouge_scores.txt` file. The script takes 30 hours to compute with a single Tesla V100 GPU and a batch size of 10 (300,000 texts to summarize).
## Summarize any text ## Summarize any text
...@@ -49,7 +49,7 @@ Put the documents that you would like to summarize in a folder (the path to whic ...@@ -49,7 +49,7 @@ Put the documents that you would like to summarize in a folder (the path to whic
python run_summarization.py \ python run_summarization.py \
--documents_dir $DATA_PATH \ --documents_dir $DATA_PATH \
--summaries_output_dir $SUMMARIES_PATH \ # optional --summaries_output_dir $SUMMARIES_PATH \ # optional
--to_cpu false \ --no_cuda false \
--batch_size 4 \ --batch_size 4 \
--min_length 50 \ --min_length 50 \
--max_length 200 \ --max_length 200 \
......
...@@ -33,6 +33,8 @@ class BertAbsConfig(PretrainedConfig): ...@@ -33,6 +33,8 @@ class BertAbsConfig(PretrainedConfig):
r""" Class to store the configuration of the BertAbs model. r""" Class to store the configuration of the BertAbs model.
Arguments: Arguments:
vocab_size: int
Number of tokens in the vocabulary.
max_pos: int max_pos: int
The maximum sequence length that this model will be used with. The maximum sequence length that this model will be used with.
enc_layer: int enc_layer: int
...@@ -65,7 +67,7 @@ class BertAbsConfig(PretrainedConfig): ...@@ -65,7 +67,7 @@ class BertAbsConfig(PretrainedConfig):
def __init__( def __init__(
self, self,
vocab_size_or_config_json_file=30522, vocab_size=30522,
max_pos=512, max_pos=512,
enc_layers=6, enc_layers=6,
enc_hidden_size=512, enc_hidden_size=512,
...@@ -81,39 +83,17 @@ class BertAbsConfig(PretrainedConfig): ...@@ -81,39 +83,17 @@ class BertAbsConfig(PretrainedConfig):
): ):
super(BertAbsConfig, self).__init__(**kwargs) super(BertAbsConfig, self).__init__(**kwargs)
if self._input_is_path_to_json(vocab_size_or_config_json_file): self.vocab_size = vocab_size
path_to_json = vocab_size_or_config_json_file self.max_pos = max_pos
with open(path_to_json, "r", encoding="utf-8") as reader:
json_config = json.loads(reader.read())
for key, value in json_config.items():
self.__dict__[key] = value
elif isinstance(vocab_size_or_config_json_file, int):
self.vocab_size = vocab_size_or_config_json_file
self.max_pos = max_pos
self.enc_layers = enc_layers self.enc_layers = enc_layers
self.enc_hidden_size = enc_hidden_size self.enc_hidden_size = enc_hidden_size
self.enc_heads = enc_heads self.enc_heads = enc_heads
self.enc_ff_size = enc_ff_size self.enc_ff_size = enc_ff_size
self.enc_dropout = enc_dropout self.enc_dropout = enc_dropout
self.dec_layers = dec_layers self.dec_layers = dec_layers
self.dec_hidden_size = dec_hidden_size self.dec_hidden_size = dec_hidden_size
self.dec_heads = dec_heads self.dec_heads = dec_heads
self.dec_ff_size = dec_ff_size self.dec_ff_size = dec_ff_size
self.dec_dropout = dec_dropout self.dec_dropout = dec_dropout
else:
raise ValueError(
"First argument must be either a vocabulary size (int)"
"or the path to a pretrained model config file (str)"
)
def _input_is_path_to_json(self, first_argument):
""" Checks whether the first argument passed to config
is the path to a JSON file that contains the config.
"""
is_python_2 = sys.version_info[0] == 2
if is_python_2:
return isinstance(first_argument, unicode)
else:
return isinstance(first_argument, str)
...@@ -72,8 +72,7 @@ class ExamplesTests(unittest.TestCase): ...@@ -72,8 +72,7 @@ class ExamplesTests(unittest.TestCase):
logger.addHandler(stream_handler) logger.addHandler(stream_handler)
testargs = ["run_squad.py", testargs = ["run_squad.py",
"--train_file=./examples/tests_samples/SQUAD/dev-v2.0-small.json", "--data_dir=./examples/tests_samples/SQUAD",
"--predict_file=./examples/tests_samples/SQUAD/dev-v2.0-small.json",
"--model_name=bert-base-uncased", "--model_name=bert-base-uncased",
"--output_dir=./examples/tests_samples/temp_dir", "--output_dir=./examples/tests_samples/temp_dir",
"--max_steps=10", "--max_steps=10",
......
{
"version": "v2.0",
"data": [{
"title": "Normans",
"paragraphs": [{
"qas": [{
"question": "In what country is Normandy located?",
"id": "56ddde6b9a695914005b9628",
"answers": [{
"text": "France",
"answer_start": 159
}],
"is_impossible": false
}, {
"question": "When were the Normans in Normandy?",
"id": "56ddde6b9a695914005b9629",
"answers": [{
"text": "10th and 11th centuries",
"answer_start": 94
}],
"is_impossible": false
}, {
"question": "From which countries did the Norse originate?",
"id": "56ddde6b9a695914005b962a",
"answers": [{
"text": "Denmark, Iceland and Norway",
"answer_start": 256
}],
"is_impossible": false
}, {
"plausible_answers": [{
"text": "Rollo",
"answer_start": 308
}],
"question": "Who did King Charles III swear fealty to?",
"id": "5ad39d53604f3c001a3fe8d3",
"answers": [],
"is_impossible": true
}, {
"plausible_answers": [{
"text": "10th century",
"answer_start": 671
}],
"question": "When did the Frankish identity emerge?",
"id": "5ad39d53604f3c001a3fe8d4",
"answers": [],
"is_impossible": true
}],
"context": "The Normans (Norman: Nourmands; French: Normands; Latin: Normanni) were the people who in the 10th and 11th centuries gave their name to Normandy, a region in France. They were descended from Norse (\"Norman\" comes from \"Norseman\") raiders and pirates from Denmark, Iceland and Norway who, under their leader Rollo, agreed to swear fealty to King Charles III of West Francia. Through generations of assimilation and mixing with the native Frankish and Roman-Gaulish populations, their descendants would gradually merge with the Carolingian-based cultures of West Francia. The distinct cultural and ethnic identity of the Normans emerged initially in the first half of the 10th century, and it continued to evolve over the succeeding centuries."
}, {
"qas": [{
"question": "Who was the duke in the battle of Hastings?",
"id": "56dddf4066d3e219004dad5f",
"answers": [{
"text": "William the Conqueror",
"answer_start": 1022
}],
"is_impossible": false
}, {
"plausible_answers": [{
"text": "Antioch",
"answer_start": 1295
}],
"question": "What principality did William the conquerer found?",
"id": "5ad3a266604f3c001a3fea2b",
"answers": [],
"is_impossible": true
}],
"context": "The Norman dynasty had a major political, cultural and military impact on medieval Europe and even the Near East. The Normans were famed for their martial spirit and eventually for their Christian piety, becoming exponents of the Catholic orthodoxy into which they assimilated. They adopted the Gallo-Romance language of the Frankish land they settled, their dialect becoming known as Norman, Normaund or Norman French, an important literary language. The Duchy of Normandy, which they formed by treaty with the French crown, was a great fief of medieval France, and under Richard I of Normandy was forged into a cohesive and formidable principality in feudal tenure. The Normans are noted both for their culture, such as their unique Romanesque architecture and musical traditions, and for their significant military accomplishments and innovations. Norman adventurers founded the Kingdom of Sicily under Roger II after conquering southern Italy on the Saracens and Byzantines, and an expedition on behalf of their duke, William the Conqueror, led to the Norman conquest of England at the Battle of Hastings in 1066. Norman cultural and military influence spread from these new European centres to the Crusader states of the Near East, where their prince Bohemond I founded the Principality of Antioch in the Levant, to Scotland and Wales in Great Britain, to Ireland, and to the coasts of north Africa and the Canary Islands."
}]
}, {
"title": "Computational_complexity_theory",
"paragraphs": [{
"qas": [{
"question": "What branch of theoretical computer science deals with broadly classifying computational problems by difficulty and class of relationship?",
"id": "56e16182e3433e1400422e28",
"answers": [{
"text": "Computational complexity theory",
"answer_start": 0
}],
"is_impossible": false
}, {
"plausible_answers": [{
"text": "algorithm",
"answer_start": 472
}],
"question": "What is a manual application of mathematical steps?",
"id": "5ad5316b5b96ef001a10ab76",
"answers": [],
"is_impossible": true
}],
"context": "Computational complexity theory is a branch of the theory of computation in theoretical computer science that focuses on classifying computational problems according to their inherent difficulty, and relating those classes to each other. A computational problem is understood to be a task that is in principle amenable to being solved by a computer, which is equivalent to stating that the problem may be solved by mechanical application of mathematical steps, such as an algorithm."
}, {
"qas": [{
"question": "What measure of a computational problem broadly defines the inherent difficulty of the solution?",
"id": "56e16839cd28a01900c67887",
"answers": [{
"text": "if its solution requires significant resources",
"answer_start": 46
}],
"is_impossible": false
}, {
"question": "What method is used to intuitively assess or quantify the amount of resources required to solve a computational problem?",
"id": "56e16839cd28a01900c67888",
"answers": [{
"text": "mathematical models of computation",
"answer_start": 176
}],
"is_impossible": false
}, {
"question": "What are two basic primary resources used to guage complexity?",
"id": "56e16839cd28a01900c67889",
"answers": [{
"text": "time and storage",
"answer_start": 305
}],
"is_impossible": false
}, {
"plausible_answers": [{
"text": "the number of gates in a circuit",
"answer_start": 436
}],
"question": "What unit is measured to determine circuit simplicity?",
"id": "5ad532575b96ef001a10ab7f",
"answers": [],
"is_impossible": true
}, {
"plausible_answers": [{
"text": "the number of processors",
"answer_start": 502
}],
"question": "What number is used in perpendicular computing?",
"id": "5ad532575b96ef001a10ab80",
"answers": [],
"is_impossible": true
}],
"context": "A problem is regarded as inherently difficult if its solution requires significant resources, whatever the algorithm used. The theory formalizes this intuition, by introducing mathematical models of computation to study these problems and quantifying the amount of resources needed to solve them, such as time and storage. Other complexity measures are also used, such as the amount of communication (used in communication complexity), the number of gates in a circuit (used in circuit complexity) and the number of processors (used in parallel computing). One of the roles of computational complexity theory is to determine the practical limits on what computers can and cannot do."
}]
}]
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment