Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
db0795b5
Commit
db0795b5
authored
Dec 20, 2019
by
thomwolf
Browse files
defaults models for tf and pt - update tests
parent
7f740845
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
50 additions
and
9 deletions
+50
-9
transformers/pipelines.py
transformers/pipelines.py
+18
-5
transformers/tests/pipelines_test.py
transformers/tests/pipelines_test.py
+32
-4
No files found.
transformers/pipelines.py
View file @
db0795b5
...
@@ -776,7 +776,10 @@ SUPPORTED_TASKS = {
...
@@ -776,7 +776,10 @@ SUPPORTED_TASKS = {
'tf'
:
TFAutoModel
if
is_tf_available
()
else
None
,
'tf'
:
TFAutoModel
if
is_tf_available
()
else
None
,
'pt'
:
AutoModel
if
is_torch_available
()
else
None
,
'pt'
:
AutoModel
if
is_torch_available
()
else
None
,
'default'
:
{
'default'
:
{
'model'
:
'distilbert-base-uncased'
,
'model'
:
{
'pt'
:
'distilbert-base-uncased'
,
'tf'
:
'distilbert-base-uncased'
,
},
'config'
:
None
,
'config'
:
None
,
'tokenizer'
:
'distilbert-base-uncased'
'tokenizer'
:
'distilbert-base-uncased'
}
}
...
@@ -786,7 +789,10 @@ SUPPORTED_TASKS = {
...
@@ -786,7 +789,10 @@ SUPPORTED_TASKS = {
'tf'
:
TFAutoModelForSequenceClassification
if
is_tf_available
()
else
None
,
'tf'
:
TFAutoModelForSequenceClassification
if
is_tf_available
()
else
None
,
'pt'
:
AutoModelForSequenceClassification
if
is_torch_available
()
else
None
,
'pt'
:
AutoModelForSequenceClassification
if
is_torch_available
()
else
None
,
'default'
:
{
'default'
:
{
'model'
:
'https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-uncased-finetuned-sst-2-english-pytorch_model.bin'
,
'model'
:
{
'pt'
:
'https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-uncased-finetuned-sst-2-english-pytorch_model.bin'
,
'tf'
:
'https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-uncased-finetuned-sst-2-english-tf_model.h5'
,
},
'config'
:
'https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-uncased-finetuned-sst-2-english-config.json'
,
'config'
:
'https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-uncased-finetuned-sst-2-english-config.json'
,
'tokenizer'
:
'distilbert-base-uncased'
'tokenizer'
:
'distilbert-base-uncased'
}
}
...
@@ -796,7 +802,10 @@ SUPPORTED_TASKS = {
...
@@ -796,7 +802,10 @@ SUPPORTED_TASKS = {
'tf'
:
TFAutoModelForTokenClassification
if
is_tf_available
()
else
None
,
'tf'
:
TFAutoModelForTokenClassification
if
is_tf_available
()
else
None
,
'pt'
:
AutoModelForTokenClassification
if
is_torch_available
()
else
None
,
'pt'
:
AutoModelForTokenClassification
if
is_torch_available
()
else
None
,
'default'
:
{
'default'
:
{
'model'
:
'https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-finetuned-conll03-english-pytorch_model.bin'
,
'model'
:
{
'pt'
:
'https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-finetuned-conll03-english-pytorch_model.bin'
,
'tf'
:
'https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-finetuned-conll03-english-tf_model.h5'
,
},
'config'
:
'https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-finetuned-conll03-english-config.json'
,
'config'
:
'https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-finetuned-conll03-english-config.json'
,
'tokenizer'
:
'bert-large-cased'
'tokenizer'
:
'bert-large-cased'
}
}
...
@@ -806,7 +815,10 @@ SUPPORTED_TASKS = {
...
@@ -806,7 +815,10 @@ SUPPORTED_TASKS = {
'tf'
:
TFAutoModelForQuestionAnswering
if
is_tf_available
()
else
None
,
'tf'
:
TFAutoModelForQuestionAnswering
if
is_tf_available
()
else
None
,
'pt'
:
AutoModelForQuestionAnswering
if
is_torch_available
()
else
None
,
'pt'
:
AutoModelForQuestionAnswering
if
is_torch_available
()
else
None
,
'default'
:
{
'default'
:
{
'model'
:
'distilbert-base-uncased-distilled-squad'
,
'model'
:
{
'pt'
:
'distilbert-base-uncased-distilled-squad'
,
'tf'
:
'distilbert-base-uncased-distilled-squad'
,
},
'config'
:
None
,
'config'
:
None
,
'tokenizer'
:
'distilbert-base-uncased'
'tokenizer'
:
'distilbert-base-uncased'
}
}
...
@@ -843,7 +855,8 @@ def pipeline(task: str, model: Optional = None,
...
@@ -843,7 +855,8 @@ def pipeline(task: str, model: Optional = None,
# Use default model/config/tokenizer for the task if no model is provided
# Use default model/config/tokenizer for the task if no model is provided
if
model
is
None
:
if
model
is
None
:
model
,
config
,
tokenizer
=
tuple
(
targeted_task
[
'default'
].
values
())
models
,
config
,
tokenizer
=
tuple
(
targeted_task
[
'default'
].
values
())
model
=
models
[
framework
]
# Try to infer tokenizer from model or config name (if provided as str)
# Try to infer tokenizer from model or config name (if provided as str)
if
tokenizer
is
None
:
if
tokenizer
is
None
:
...
...
transformers/tests/pipelines_test.py
View file @
db0795b5
...
@@ -11,6 +11,20 @@ QA_FINETUNED_MODELS = {
...
@@ -11,6 +11,20 @@ QA_FINETUNED_MODELS = {
(
'bert-base-uncased'
,
'distilbert-base-uncased-distilled-squad'
,
None
)
(
'bert-base-uncased'
,
'distilbert-base-uncased-distilled-squad'
,
None
)
}
}
TF_QA_FINETUNED_MODELS
=
{
(
'bert-base-uncased'
,
'bert-large-uncased-whole-word-masking-finetuned-squad'
,
None
),
(
'bert-base-cased'
,
'bert-large-cased-whole-word-masking-finetuned-squad'
,
None
),
(
'bert-base-uncased'
,
'distilbert-base-uncased-distilled-squad'
,
None
)
}
TF_NER_FINETUNED_MODELS
=
{
(
'bert-base-cased'
,
'https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-finetuned-conll03-english-tf_model.h5'
,
'https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-finetuned-conll03-english-config.json'
)
}
NER_FINETUNED_MODELS
=
{
NER_FINETUNED_MODELS
=
{
(
(
'bert-base-cased'
,
'bert-base-cased'
,
...
@@ -25,6 +39,20 @@ FEATURE_EXTRACT_FINETUNED_MODELS = {
...
@@ -25,6 +39,20 @@ FEATURE_EXTRACT_FINETUNED_MODELS = {
(
'distilbert-base-uncased'
,
'distilbert-base-uncased'
,
None
)
(
'distilbert-base-uncased'
,
'distilbert-base-uncased'
,
None
)
}
}
TF_FEATURE_EXTRACT_FINETUNED_MODELS
=
{
(
'bert-base-cased'
,
'bert-base-cased'
,
None
),
# ('xlnet-base-cased', 'xlnet-base-cased', None), # Disabled for now as it crash for TF2
(
'distilbert-base-uncased'
,
'distilbert-base-uncased'
,
None
)
}
TF_TEXT_CLASSIF_FINETUNED_MODELS
=
{
(
'bert-base-uncased'
,
'https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-uncased-finetuned-sst-2-english-tf_model.h5'
,
'https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-uncased-finetuned-sst-2-english-config.json'
)
}
TEXT_CLASSIF_FINETUNED_MODELS
=
{
TEXT_CLASSIF_FINETUNED_MODELS
=
{
(
(
'bert-base-uncased'
,
'bert-base-uncased'
,
...
@@ -75,7 +103,7 @@ class MonoColumnInputTestCase(unittest.TestCase):
...
@@ -75,7 +103,7 @@ class MonoColumnInputTestCase(unittest.TestCase):
mandatory_keys
=
{
'entity'
,
'word'
,
'score'
}
mandatory_keys
=
{
'entity'
,
'word'
,
'score'
}
valid_inputs
=
[
'HuggingFace is solving NLP one commit at a time.'
,
'HuggingFace is based in New-York & Paris'
]
valid_inputs
=
[
'HuggingFace is solving NLP one commit at a time.'
,
'HuggingFace is based in New-York & Paris'
]
invalid_inputs
=
[
None
]
invalid_inputs
=
[
None
]
for
tokenizer
,
model
,
config
in
NER_FINETUNED_MODELS
:
for
tokenizer
,
model
,
config
in
TF_
NER_FINETUNED_MODELS
:
nlp
=
pipeline
(
task
=
'ner'
,
model
=
model
,
config
=
config
,
tokenizer
=
tokenizer
)
nlp
=
pipeline
(
task
=
'ner'
,
model
=
model
,
config
=
config
,
tokenizer
=
tokenizer
)
self
.
_test_mono_column_pipeline
(
nlp
,
valid_inputs
,
invalid_inputs
,
mandatory_keys
)
self
.
_test_mono_column_pipeline
(
nlp
,
valid_inputs
,
invalid_inputs
,
mandatory_keys
)
...
@@ -93,7 +121,7 @@ class MonoColumnInputTestCase(unittest.TestCase):
...
@@ -93,7 +121,7 @@ class MonoColumnInputTestCase(unittest.TestCase):
mandatory_keys
=
{
'label'
}
mandatory_keys
=
{
'label'
}
valid_inputs
=
[
'HuggingFace is solving NLP one commit at a time.'
,
'HuggingFace is based in New-York & Paris'
]
valid_inputs
=
[
'HuggingFace is solving NLP one commit at a time.'
,
'HuggingFace is based in New-York & Paris'
]
invalid_inputs
=
[
None
]
invalid_inputs
=
[
None
]
for
tokenizer
,
model
,
config
in
TEXT_CLASSIF_FINETUNED_MODELS
:
for
tokenizer
,
model
,
config
in
TF_
TEXT_CLASSIF_FINETUNED_MODELS
:
nlp
=
pipeline
(
task
=
'sentiment-analysis'
,
model
=
model
,
config
=
config
,
tokenizer
=
tokenizer
)
nlp
=
pipeline
(
task
=
'sentiment-analysis'
,
model
=
model
,
config
=
config
,
tokenizer
=
tokenizer
)
self
.
_test_mono_column_pipeline
(
nlp
,
valid_inputs
,
invalid_inputs
,
mandatory_keys
)
self
.
_test_mono_column_pipeline
(
nlp
,
valid_inputs
,
invalid_inputs
,
mandatory_keys
)
...
@@ -109,7 +137,7 @@ class MonoColumnInputTestCase(unittest.TestCase):
...
@@ -109,7 +137,7 @@ class MonoColumnInputTestCase(unittest.TestCase):
def
test_tf_features_extraction
(
self
):
def
test_tf_features_extraction
(
self
):
valid_inputs
=
[
'HuggingFace is solving NLP one commit at a time.'
,
'HuggingFace is based in New-York & Paris'
]
valid_inputs
=
[
'HuggingFace is solving NLP one commit at a time.'
,
'HuggingFace is based in New-York & Paris'
]
invalid_inputs
=
[
None
]
invalid_inputs
=
[
None
]
for
tokenizer
,
model
,
config
in
FEATURE_EXTRACT_FINETUNED_MODELS
:
for
tokenizer
,
model
,
config
in
TF_
FEATURE_EXTRACT_FINETUNED_MODELS
:
nlp
=
pipeline
(
task
=
'sentiment-analysis'
,
model
=
model
,
config
=
config
,
tokenizer
=
tokenizer
)
nlp
=
pipeline
(
task
=
'sentiment-analysis'
,
model
=
model
,
config
=
config
,
tokenizer
=
tokenizer
)
self
.
_test_mono_column_pipeline
(
nlp
,
valid_inputs
,
invalid_inputs
,
{})
self
.
_test_mono_column_pipeline
(
nlp
,
valid_inputs
,
invalid_inputs
,
{})
...
@@ -173,7 +201,7 @@ class MultiColumnInputTestCase(unittest.TestCase):
...
@@ -173,7 +201,7 @@ class MultiColumnInputTestCase(unittest.TestCase):
{
'question'
:
'What is does with empty context ?'
,
'context'
:
None
},
{
'question'
:
'What is does with empty context ?'
,
'context'
:
None
},
]
]
for
tokenizer
,
model
,
config
in
QA_FINETUNED_MODELS
:
for
tokenizer
,
model
,
config
in
TF_
QA_FINETUNED_MODELS
:
nlp
=
pipeline
(
task
=
'question-answering'
,
model
=
model
,
config
=
config
,
tokenizer
=
tokenizer
)
nlp
=
pipeline
(
task
=
'question-answering'
,
model
=
model
,
config
=
config
,
tokenizer
=
tokenizer
)
self
.
_test_multicolumn_pipeline
(
nlp
,
valid_samples
,
invalid_samples
,
mandatory_output_keys
)
self
.
_test_multicolumn_pipeline
(
nlp
,
valid_samples
,
invalid_samples
,
mandatory_output_keys
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment