"src/routes/vscode:/vscode.git/clone" did not exist on "d9de426fcd5fb902aa68be64ca80932ac863b61a"
Commit 61d9ee45 authored by Morgan Funtowicz's avatar Morgan Funtowicz
Browse files

All tests are green.

parent e516a34a
...@@ -343,8 +343,9 @@ class Pipeline(_ScikitCompat): ...@@ -343,8 +343,9 @@ class Pipeline(_ScikitCompat):
if 'distilbert' not in model_type and 'xlm' not in model_type: if 'distilbert' not in model_type and 'xlm' not in model_type:
args += ['token_type_ids'] args += ['token_type_ids']
if 'xlnet' in model_type or 'xlm' in model_type: # PR #1548 (CLI) There is an issue with attention_mask
args += ['cls_index', 'p_mask'] # if 'xlnet' in model_type or 'xlm' in model_type:
# args += ['cls_index', 'p_mask']
if isinstance(features, dict): if isinstance(features, dict):
return {k: features[k] for k in args} return {k: features[k] for k in args}
...@@ -380,7 +381,7 @@ class Pipeline(_ScikitCompat): ...@@ -380,7 +381,7 @@ class Pipeline(_ScikitCompat):
predictions = self.model(inputs, training=False)[0] predictions = self.model(inputs, training=False)[0]
else: else:
with torch.no_grad(): with torch.no_grad():
predictions = self.model(**inputs).cpu()[0] predictions = self.model(**inputs)[0].cpu()
return predictions.numpy() return predictions.numpy()
...@@ -444,7 +445,7 @@ class NerPipeline(Pipeline): ...@@ -444,7 +445,7 @@ class NerPipeline(Pipeline):
# Forward # Forward
if is_tf_available(): if is_tf_available():
entities = self.model(**tokens)[0][0].numpy() entities = self.model(tokens)[0][0].numpy()
else: else:
with torch.no_grad(): with torch.no_grad():
entities = self.model(**tokens)[0][0].cpu().numpy() entities = self.model(**tokens)[0][0].cpu().numpy()
......
import unittest import unittest
from unittest.mock import patch from unittest.mock import patch
from typing import Iterable
from transformers import pipeline
from transformers.tests.utils import require_tf, require_torch
QA_FINETUNED_MODELS = { QA_FINETUNED_MODELS = {
'bert-large-uncased-whole-word-masking-finetuned-squad', ('bert-base-uncased', 'bert-large-uncased-whole-word-masking-finetuned-squad', None),
'bert-large-cased-whole-word-masking-finetuned-squad', ('bert-base-cased', 'bert-large-cased-whole-word-masking-finetuned-squad', None),
'distilbert-base-uncased-distilled-squad', ('bert-base-uncased', 'distilbert-base-uncased-distilled-squad', None)
}
NER_FINETUNED_MODELS = {
(
'bert-base-cased',
'https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-finetuned-conll03-english-pytorch_model.bin',
'https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-finetuned-conll03-english-config.json'
)
}
FEATURE_EXTRACT_FINETUNED_MODELS = {
('bert-base-cased', 'bert-base-cased', None),
# ('xlnet-base-cased', 'xlnet-base-cased', None), # Disabled for now as it crash for TF2
('distilbert-base-uncased', 'distilbert-base-uncased', None)
}
TEXT_CLASSIF_FINETUNED_MODELS = {
(
'bert-base-uncased',
'https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-uncased-finetuned-sst-2-english-pytorch_model.bin',
'https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-uncased-finetuned-sst-2-english-config.json'
)
} }
class QuestionAnsweringPipelineTest(unittest.TestCase): @require_tf
def check_answer_structure(self, answer, batch, topk): def tf_pipeline(*args, **kwargs):
self.assertIsInstance(answer, list) return pipeline(**kwargs)
self.assertEqual(len(answer), batch)
self.assertIsInstance(answer[0], list)
self.assertEqual(len(answer[0]), topk) @require_torch
self.assertIsInstance(answer[0][0], dict) def torch_pipeline(*args, **kwargs):
return pipeline(**kwargs)
for item in answer[0]:
self.assertTrue('start' in item)
self.assertTrue('end' in item) class MonoColumnInputTestCase(unittest.TestCase):
self.assertTrue('score' in item) def _test_mono_column_pipeline(self, nlp, valid_inputs: list, invalid_inputs: list, output_keys: Iterable[str]):
self.assertTrue('answer' in item) self.assertIsNotNone(nlp)
def question_answering_pipeline(self, nlp): mono_result = nlp(valid_inputs[0])
# Simple case with topk = 1, no batching self.assertIsInstance(mono_result, list)
a = nlp(question='What is the name of the company I\'m working for ?', context='I\'m working for Huggingface.') self.assertIsInstance(mono_result[0], (dict, list))
self.check_answer_structure(a, 1, 1)
if isinstance(mono_result[0], list):
# Simple case with topk = 2, no batching mono_result = mono_result[0]
a = nlp(question='What is the name of the company I\'m working for ?', context='I\'m working for Huggingface.', topk=2)
self.check_answer_structure(a, 1, 2) for key in output_keys:
self.assertIn(key, mono_result[0])
# Batch case with topk = 1
a = nlp(question=['What is the name of the company I\'m working for ?', 'Where is the company based ?'], multi_result = nlp(valid_inputs)
context=['I\'m working for Huggingface.', 'The company is based in New York and Paris']) self.assertIsInstance(multi_result, list)
self.check_answer_structure(a, 2, 1) self.assertIsInstance(multi_result[0], (dict, list))
# Batch case with topk = 2 if isinstance(multi_result[0], list):
a = nlp(question=['What is the name of the company I\'m working for ?', 'Where is the company based ?'], multi_result = multi_result[0]
context=['Where is the company based ?', 'The company is based in New York and Paris'], topk=2)
self.check_answer_structure(a, 2, 2) for result in multi_result:
for key in output_keys:
# check for data keyword self.assertIn(key, result)
a = nlp(data=nlp.create_sample(question='What is the name of the company I\'m working for ?', context='I\'m working for Huggingface.'))
self.check_answer_structure(a, 1, 1) self.assertRaises(Exception, nlp, invalid_inputs)
a = nlp(data=nlp.create_sample(question='What is the name of the company I\'m working for ?', context='I\'m working for Huggingface.'), topk=2) def test_ner(self):
self.check_answer_structure(a, 1, 2) mandatory_keys = {'entity', 'word', 'score'}
valid_inputs = ['HuggingFace is solving NLP one commit at a time.', 'HuggingFace is based in New-York & Paris']
a = nlp(data=[ invalid_inputs = [None]
nlp.create_sample(question='What is the name of the company I\'m working for ?', context='I\'m working for Huggingface.'), for tokenizer, model, config in NER_FINETUNED_MODELS:
nlp.create_sample(question='I\'m working for Huggingface.', context='The company is based in New York and Paris'), with patch('transformers.pipelines.is_torch_available', return_value=False):
]) nlp = tf_pipeline(task='ner', model=model, config=config, tokenizer=tokenizer)
self.check_answer_structure(a, 2, 1) self._test_mono_column_pipeline(nlp, valid_inputs, invalid_inputs, mandatory_keys)
a = nlp(data=[ with patch('transformers.pipelines.is_tf_available', return_value=False):
{'question': 'What is the name of the company I\'m working for ?', 'context': 'I\'m working for Huggingface.'}, nlp = torch_pipeline(task='ner', model=model, config=config, tokenizer=tokenizer)
{'question': 'Where is the company based ?', 'context': 'The company is based in New York and Paris'}, self._test_mono_column_pipeline(nlp, valid_inputs, invalid_inputs, mandatory_keys)
])
self.check_answer_structure(a, 2, 1) def test_sentiment_analysis(self):
mandatory_keys = {'label'}
# X keywords valid_inputs = ['HuggingFace is solving NLP one commit at a time.', 'HuggingFace is based in New-York & Paris']
a = nlp(X=nlp.create_sample( invalid_inputs = [None]
question='Where is the company based ?', context='The company is based in New York and Paris' for tokenizer, model, config in TEXT_CLASSIF_FINETUNED_MODELS:
)) with patch('transformers.pipelines.is_torch_available', return_value=False):
self.check_answer_structure(a, 1, 1) nlp = tf_pipeline(task='sentiment-analysis', model=model, config=config, tokenizer=tokenizer)
self._test_mono_column_pipeline(nlp, valid_inputs, invalid_inputs, mandatory_keys)
a = nlp(X=[
{'question': 'What is the name of the company I\'m working for ?', 'context': 'I\'m working for Huggingface.'}, with patch('transformers.pipelines.is_tf_available', return_value=False):
{'question': 'Where is the company based ?', 'context': 'The company is based in New York and Paris'}, nlp = torch_pipeline(task='sentiment-analysis', model=model, config=config, tokenizer=tokenizer)
], topk=2) self._test_mono_column_pipeline(nlp, valid_inputs, invalid_inputs, mandatory_keys)
self.check_answer_structure(a, 2, 2)
def test_features_extraction(self):
@patch('transformers.pipelines.is_torch_available', return_value=False) valid_inputs = ['HuggingFace is solving NLP one commit at a time.', 'HuggingFace is based in New-York & Paris']
def test_tf_models(self, is_torch_available): invalid_inputs = [None]
from transformers import pipeline for tokenizer, model, config in FEATURE_EXTRACT_FINETUNED_MODELS:
for model in QA_FINETUNED_MODELS: with patch('transformers.pipelines.is_torch_available', return_value=False):
self.question_answering_pipeline(pipeline('question-answering', model)) nlp = tf_pipeline(task='sentiment-analysis', model=model, config=config, tokenizer=tokenizer)
self._test_mono_column_pipeline(nlp, valid_inputs, invalid_inputs, {})
@patch('transformers.pipelines.is_tf_available', return_value=False)
@patch('transformers.tokenization_utils.is_tf_available', return_value=False) with patch('transformers.pipelines.is_tf_available', return_value=False):
def test_torch_models(self, is_tf_available, _): nlp = torch_pipeline(task='sentiment-analysis', model=model, config=config, tokenizer=tokenizer)
from transformers import pipeline self._test_mono_column_pipeline(nlp, valid_inputs, invalid_inputs, {})
for model in QA_FINETUNED_MODELS:
self.question_answering_pipeline(pipeline('question-answering', model))
class MultiColumnInputTestCase(unittest.TestCase):
def _test_multicolumn_pipeline(self, nlp, valid_inputs: list, invalid_inputs: list, output_keys: Iterable[str]):
class AutoPipelineTest(unittest.TestCase): self.assertIsNotNone(nlp)
@patch('transformers.pipelines.is_torch_available', return_value=False)
def test_tf_qa(self, is_torch_available): mono_result = nlp(valid_inputs[0])
from transformers import pipeline self.assertIsInstance(mono_result, dict)
from transformers.pipelines import QuestionAnsweringPipeline
from transformers.modeling_tf_utils import TFPreTrainedModel for key in output_keys:
for model in QA_FINETUNED_MODELS: self.assertIn(key, mono_result)
nlp = pipeline('question-answering', model)
self.assertIsInstance(nlp, QuestionAnsweringPipeline) multi_result = nlp(valid_inputs)
self.assertIsInstance(nlp.model, TFPreTrainedModel) self.assertIsInstance(multi_result, list)
self.assertIsInstance(multi_result[0], dict)
@patch('transformers.pipelines.is_tf_available', return_value=False)
def test_torch_qa(self, is_tf_available): for result in multi_result:
from transformers import pipeline for key in output_keys:
from transformers.pipelines import QuestionAnsweringPipeline self.assertIn(key, result)
from transformers.modeling_utils import PreTrainedModel
for model in QA_FINETUNED_MODELS: self.assertRaises(Exception, nlp, invalid_inputs[0])
nlp = pipeline('question-answering', model) self.assertRaises(Exception, nlp, invalid_inputs)
self.assertIsInstance(nlp, QuestionAnsweringPipeline)
self.assertIsInstance(nlp.model, PreTrainedModel) def test_question_answering(self):
mandatory_output_keys = {'score', 'answer', 'start', 'end'}
valid_samples = [
{'question': 'Where was HuggingFace founded ?', 'context': 'HuggingFace was founded in Paris.'},
{
'question': 'In what field is HuggingFace working ?',
'context': 'HuggingFace is a startup based in New-York founded in Paris which is trying to solve NLP.'
}
]
invalid_samples = [
{'question': '', 'context': 'This is a test to try empty question edge case'},
{'question': None, 'context': 'This is a test to try empty question edge case'},
{'question': 'What is does with empty context ?', 'context': ''},
{'question': 'What is does with empty context ?', 'context': None},
]
for tokenizer, model, config in QA_FINETUNED_MODELS:
# Test for Tensorflow
with patch('transformers.pipelines.is_torch_available', return_value=False):
nlp = pipeline(task='question-answering', model=model, config=config, tokenizer=tokenizer)
self._test_multicolumn_pipeline(nlp, valid_samples, invalid_samples, mandatory_output_keys)
# Test for PyTorch
with patch('transformers.pipelines.is_tf_available', return_value=False):
nlp = pipeline(task='question-answering', model=model, config=config, tokenizer=tokenizer)
self._test_multicolumn_pipeline(nlp, valid_samples, invalid_samples, mandatory_output_keys)
if __name__ == '__main__': if __name__ == '__main__':
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment