Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
61d9ee45
Commit
61d9ee45
authored
Dec 20, 2019
by
Morgan Funtowicz
Browse files
All tests are green.
parent
e516a34a
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
157 additions
and
105 deletions
+157
-105
transformers/pipelines.py
transformers/pipelines.py
+5
-4
transformers/tests/pipelines_test.py
transformers/tests/pipelines_test.py
+152
-101
No files found.
transformers/pipelines.py
View file @
61d9ee45
...
...
@@ -343,8 +343,9 @@ class Pipeline(_ScikitCompat):
if
'distilbert'
not
in
model_type
and
'xlm'
not
in
model_type
:
args
+=
[
'token_type_ids'
]
if
'xlnet'
in
model_type
or
'xlm'
in
model_type
:
args
+=
[
'cls_index'
,
'p_mask'
]
# PR #1548 (CLI) There is an issue with attention_mask
# if 'xlnet' in model_type or 'xlm' in model_type:
# args += ['cls_index', 'p_mask']
if
isinstance
(
features
,
dict
):
return
{
k
:
features
[
k
]
for
k
in
args
}
...
...
@@ -380,7 +381,7 @@ class Pipeline(_ScikitCompat):
predictions
=
self
.
model
(
inputs
,
training
=
False
)[
0
]
else
:
with
torch
.
no_grad
():
predictions
=
self
.
model
(
**
inputs
).
cpu
()
[
0
]
predictions
=
self
.
model
(
**
inputs
)
[
0
]
.
cpu
()
return
predictions
.
numpy
()
...
...
@@ -444,7 +445,7 @@ class NerPipeline(Pipeline):
# Forward
if
is_tf_available
():
entities
=
self
.
model
(
**
tokens
)[
0
][
0
].
numpy
()
entities
=
self
.
model
(
tokens
)[
0
][
0
].
numpy
()
else
:
with
torch
.
no_grad
():
entities
=
self
.
model
(
**
tokens
)[
0
][
0
].
cpu
().
numpy
()
...
...
transformers/tests/pipelines_test.py
View file @
61d9ee45
import
unittest
from
unittest.mock
import
patch
from
typing
import
Iterable
from
transformers
import
pipeline
from
transformers.tests.utils
import
require_tf
,
require_torch
QA_FINETUNED_MODELS
=
{
'bert-large-uncased-whole-word-masking-finetuned-squad'
,
'bert-large-cased-whole-word-masking-finetuned-squad'
,
'distilbert-base-uncased-distilled-squad'
,
(
'bert-base-uncased'
,
'bert-large-uncased-whole-word-masking-finetuned-squad'
,
None
),
(
'bert-base-cased'
,
'bert-large-cased-whole-word-masking-finetuned-squad'
,
None
),
(
'bert-base-uncased'
,
'distilbert-base-uncased-distilled-squad'
,
None
)
}
NER_FINETUNED_MODELS
=
{
(
'bert-base-cased'
,
'https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-finetuned-conll03-english-pytorch_model.bin'
,
'https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-finetuned-conll03-english-config.json'
)
}
FEATURE_EXTRACT_FINETUNED_MODELS
=
{
(
'bert-base-cased'
,
'bert-base-cased'
,
None
),
# ('xlnet-base-cased', 'xlnet-base-cased', None), # Disabled for now as it crash for TF2
(
'distilbert-base-uncased'
,
'distilbert-base-uncased'
,
None
)
}
TEXT_CLASSIF_FINETUNED_MODELS
=
{
(
'bert-base-uncased'
,
'https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-uncased-finetuned-sst-2-english-pytorch_model.bin'
,
'https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-uncased-finetuned-sst-2-english-config.json'
)
}
class
QuestionAnsweringPipelineTest
(
unittest
.
TestCase
):
def
check_answer_structure
(
self
,
answer
,
batch
,
topk
):
self
.
assertIsInstance
(
answer
,
list
)
self
.
assertEqual
(
len
(
answer
),
batch
)
self
.
assertIsInstance
(
answer
[
0
],
list
)
self
.
assertEqual
(
len
(
answer
[
0
]),
topk
)
self
.
assertIsInstance
(
answer
[
0
][
0
],
dict
)
for
item
in
answer
[
0
]:
self
.
assertTrue
(
'start'
in
item
)
self
.
assertTrue
(
'end'
in
item
)
self
.
assertTrue
(
'score'
in
item
)
self
.
assertTrue
(
'answer'
in
item
)
def
question_answering_pipeline
(
self
,
nlp
):
# Simple case with topk = 1, no batching
a
=
nlp
(
question
=
'What is the name of the company I
\'
m working for ?'
,
context
=
'I
\'
m working for Huggingface.'
)
self
.
check_answer_structure
(
a
,
1
,
1
)
# Simple case with topk = 2, no batching
a
=
nlp
(
question
=
'What is the name of the company I
\'
m working for ?'
,
context
=
'I
\'
m working for Huggingface.'
,
topk
=
2
)
self
.
check_answer_structure
(
a
,
1
,
2
)
# Batch case with topk = 1
a
=
nlp
(
question
=
[
'What is the name of the company I
\'
m working for ?'
,
'Where is the company based ?'
],
context
=
[
'I
\'
m working for Huggingface.'
,
'The company is based in New York and Paris'
])
self
.
check_answer_structure
(
a
,
2
,
1
)
# Batch case with topk = 2
a
=
nlp
(
question
=
[
'What is the name of the company I
\'
m working for ?'
,
'Where is the company based ?'
],
context
=
[
'Where is the company based ?'
,
'The company is based in New York and Paris'
],
topk
=
2
)
self
.
check_answer_structure
(
a
,
2
,
2
)
# check for data keyword
a
=
nlp
(
data
=
nlp
.
create_sample
(
question
=
'What is the name of the company I
\'
m working for ?'
,
context
=
'I
\'
m working for Huggingface.'
))
self
.
check_answer_structure
(
a
,
1
,
1
)
a
=
nlp
(
data
=
nlp
.
create_sample
(
question
=
'What is the name of the company I
\'
m working for ?'
,
context
=
'I
\'
m working for Huggingface.'
),
topk
=
2
)
self
.
check_answer_structure
(
a
,
1
,
2
)
a
=
nlp
(
data
=
[
nlp
.
create_sample
(
question
=
'What is the name of the company I
\'
m working for ?'
,
context
=
'I
\'
m working for Huggingface.'
),
nlp
.
create_sample
(
question
=
'I
\'
m working for Huggingface.'
,
context
=
'The company is based in New York and Paris'
),
])
self
.
check_answer_structure
(
a
,
2
,
1
)
a
=
nlp
(
data
=
[
{
'question'
:
'What is the name of the company I
\'
m working for ?'
,
'context'
:
'I
\'
m working for Huggingface.'
},
{
'question'
:
'Where is the company based ?'
,
'context'
:
'The company is based in New York and Paris'
},
])
self
.
check_answer_structure
(
a
,
2
,
1
)
# X keywords
a
=
nlp
(
X
=
nlp
.
create_sample
(
question
=
'Where is the company based ?'
,
context
=
'The company is based in New York and Paris'
))
self
.
check_answer_structure
(
a
,
1
,
1
)
a
=
nlp
(
X
=
[
{
'question'
:
'What is the name of the company I
\'
m working for ?'
,
'context'
:
'I
\'
m working for Huggingface.'
},
{
'question'
:
'Where is the company based ?'
,
'context'
:
'The company is based in New York and Paris'
},
],
topk
=
2
)
self
.
check_answer_structure
(
a
,
2
,
2
)
@
patch
(
'transformers.pipelines.is_torch_available'
,
return_value
=
False
)
def
test_tf_models
(
self
,
is_torch_available
):
from
transformers
import
pipeline
for
model
in
QA_FINETUNED_MODELS
:
self
.
question_answering_pipeline
(
pipeline
(
'question-answering'
,
model
))
@
patch
(
'transformers.pipelines.is_tf_available'
,
return_value
=
False
)
@
patch
(
'transformers.tokenization_utils.is_tf_available'
,
return_value
=
False
)
def
test_torch_models
(
self
,
is_tf_available
,
_
):
from
transformers
import
pipeline
for
model
in
QA_FINETUNED_MODELS
:
self
.
question_answering_pipeline
(
pipeline
(
'question-answering'
,
model
))
class
AutoPipelineTest
(
unittest
.
TestCase
):
@
patch
(
'transformers.pipelines.is_torch_available'
,
return_value
=
False
)
def
test_tf_qa
(
self
,
is_torch_available
):
from
transformers
import
pipeline
from
transformers.pipelines
import
QuestionAnsweringPipeline
from
transformers.modeling_tf_utils
import
TFPreTrainedModel
for
model
in
QA_FINETUNED_MODELS
:
nlp
=
pipeline
(
'question-answering'
,
model
)
self
.
assertIsInstance
(
nlp
,
QuestionAnsweringPipeline
)
self
.
assertIsInstance
(
nlp
.
model
,
TFPreTrainedModel
)
@
patch
(
'transformers.pipelines.is_tf_available'
,
return_value
=
False
)
def
test_torch_qa
(
self
,
is_tf_available
):
from
transformers
import
pipeline
from
transformers.pipelines
import
QuestionAnsweringPipeline
from
transformers.modeling_utils
import
PreTrainedModel
for
model
in
QA_FINETUNED_MODELS
:
nlp
=
pipeline
(
'question-answering'
,
model
)
self
.
assertIsInstance
(
nlp
,
QuestionAnsweringPipeline
)
self
.
assertIsInstance
(
nlp
.
model
,
PreTrainedModel
)
@
require_tf
def
tf_pipeline
(
*
args
,
**
kwargs
):
return
pipeline
(
**
kwargs
)
@
require_torch
def
torch_pipeline
(
*
args
,
**
kwargs
):
return
pipeline
(
**
kwargs
)
class
MonoColumnInputTestCase
(
unittest
.
TestCase
):
def
_test_mono_column_pipeline
(
self
,
nlp
,
valid_inputs
:
list
,
invalid_inputs
:
list
,
output_keys
:
Iterable
[
str
]):
self
.
assertIsNotNone
(
nlp
)
mono_result
=
nlp
(
valid_inputs
[
0
])
self
.
assertIsInstance
(
mono_result
,
list
)
self
.
assertIsInstance
(
mono_result
[
0
],
(
dict
,
list
))
if
isinstance
(
mono_result
[
0
],
list
):
mono_result
=
mono_result
[
0
]
for
key
in
output_keys
:
self
.
assertIn
(
key
,
mono_result
[
0
])
multi_result
=
nlp
(
valid_inputs
)
self
.
assertIsInstance
(
multi_result
,
list
)
self
.
assertIsInstance
(
multi_result
[
0
],
(
dict
,
list
))
if
isinstance
(
multi_result
[
0
],
list
):
multi_result
=
multi_result
[
0
]
for
result
in
multi_result
:
for
key
in
output_keys
:
self
.
assertIn
(
key
,
result
)
self
.
assertRaises
(
Exception
,
nlp
,
invalid_inputs
)
def
test_ner
(
self
):
mandatory_keys
=
{
'entity'
,
'word'
,
'score'
}
valid_inputs
=
[
'HuggingFace is solving NLP one commit at a time.'
,
'HuggingFace is based in New-York & Paris'
]
invalid_inputs
=
[
None
]
for
tokenizer
,
model
,
config
in
NER_FINETUNED_MODELS
:
with
patch
(
'transformers.pipelines.is_torch_available'
,
return_value
=
False
):
nlp
=
tf_pipeline
(
task
=
'ner'
,
model
=
model
,
config
=
config
,
tokenizer
=
tokenizer
)
self
.
_test_mono_column_pipeline
(
nlp
,
valid_inputs
,
invalid_inputs
,
mandatory_keys
)
with
patch
(
'transformers.pipelines.is_tf_available'
,
return_value
=
False
):
nlp
=
torch_pipeline
(
task
=
'ner'
,
model
=
model
,
config
=
config
,
tokenizer
=
tokenizer
)
self
.
_test_mono_column_pipeline
(
nlp
,
valid_inputs
,
invalid_inputs
,
mandatory_keys
)
def
test_sentiment_analysis
(
self
):
mandatory_keys
=
{
'label'
}
valid_inputs
=
[
'HuggingFace is solving NLP one commit at a time.'
,
'HuggingFace is based in New-York & Paris'
]
invalid_inputs
=
[
None
]
for
tokenizer
,
model
,
config
in
TEXT_CLASSIF_FINETUNED_MODELS
:
with
patch
(
'transformers.pipelines.is_torch_available'
,
return_value
=
False
):
nlp
=
tf_pipeline
(
task
=
'sentiment-analysis'
,
model
=
model
,
config
=
config
,
tokenizer
=
tokenizer
)
self
.
_test_mono_column_pipeline
(
nlp
,
valid_inputs
,
invalid_inputs
,
mandatory_keys
)
with
patch
(
'transformers.pipelines.is_tf_available'
,
return_value
=
False
):
nlp
=
torch_pipeline
(
task
=
'sentiment-analysis'
,
model
=
model
,
config
=
config
,
tokenizer
=
tokenizer
)
self
.
_test_mono_column_pipeline
(
nlp
,
valid_inputs
,
invalid_inputs
,
mandatory_keys
)
def
test_features_extraction
(
self
):
valid_inputs
=
[
'HuggingFace is solving NLP one commit at a time.'
,
'HuggingFace is based in New-York & Paris'
]
invalid_inputs
=
[
None
]
for
tokenizer
,
model
,
config
in
FEATURE_EXTRACT_FINETUNED_MODELS
:
with
patch
(
'transformers.pipelines.is_torch_available'
,
return_value
=
False
):
nlp
=
tf_pipeline
(
task
=
'sentiment-analysis'
,
model
=
model
,
config
=
config
,
tokenizer
=
tokenizer
)
self
.
_test_mono_column_pipeline
(
nlp
,
valid_inputs
,
invalid_inputs
,
{})
with
patch
(
'transformers.pipelines.is_tf_available'
,
return_value
=
False
):
nlp
=
torch_pipeline
(
task
=
'sentiment-analysis'
,
model
=
model
,
config
=
config
,
tokenizer
=
tokenizer
)
self
.
_test_mono_column_pipeline
(
nlp
,
valid_inputs
,
invalid_inputs
,
{})
class
MultiColumnInputTestCase
(
unittest
.
TestCase
):
def
_test_multicolumn_pipeline
(
self
,
nlp
,
valid_inputs
:
list
,
invalid_inputs
:
list
,
output_keys
:
Iterable
[
str
]):
self
.
assertIsNotNone
(
nlp
)
mono_result
=
nlp
(
valid_inputs
[
0
])
self
.
assertIsInstance
(
mono_result
,
dict
)
for
key
in
output_keys
:
self
.
assertIn
(
key
,
mono_result
)
multi_result
=
nlp
(
valid_inputs
)
self
.
assertIsInstance
(
multi_result
,
list
)
self
.
assertIsInstance
(
multi_result
[
0
],
dict
)
for
result
in
multi_result
:
for
key
in
output_keys
:
self
.
assertIn
(
key
,
result
)
self
.
assertRaises
(
Exception
,
nlp
,
invalid_inputs
[
0
])
self
.
assertRaises
(
Exception
,
nlp
,
invalid_inputs
)
def
test_question_answering
(
self
):
mandatory_output_keys
=
{
'score'
,
'answer'
,
'start'
,
'end'
}
valid_samples
=
[
{
'question'
:
'Where was HuggingFace founded ?'
,
'context'
:
'HuggingFace was founded in Paris.'
},
{
'question'
:
'In what field is HuggingFace working ?'
,
'context'
:
'HuggingFace is a startup based in New-York founded in Paris which is trying to solve NLP.'
}
]
invalid_samples
=
[
{
'question'
:
''
,
'context'
:
'This is a test to try empty question edge case'
},
{
'question'
:
None
,
'context'
:
'This is a test to try empty question edge case'
},
{
'question'
:
'What is does with empty context ?'
,
'context'
:
''
},
{
'question'
:
'What is does with empty context ?'
,
'context'
:
None
},
]
for
tokenizer
,
model
,
config
in
QA_FINETUNED_MODELS
:
# Test for Tensorflow
with
patch
(
'transformers.pipelines.is_torch_available'
,
return_value
=
False
):
nlp
=
pipeline
(
task
=
'question-answering'
,
model
=
model
,
config
=
config
,
tokenizer
=
tokenizer
)
self
.
_test_multicolumn_pipeline
(
nlp
,
valid_samples
,
invalid_samples
,
mandatory_output_keys
)
# Test for PyTorch
with
patch
(
'transformers.pipelines.is_tf_available'
,
return_value
=
False
):
nlp
=
pipeline
(
task
=
'question-answering'
,
model
=
model
,
config
=
config
,
tokenizer
=
tokenizer
)
self
.
_test_multicolumn_pipeline
(
nlp
,
valid_samples
,
invalid_samples
,
mandatory_output_keys
)
if
__name__
==
'__main__'
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment