Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
db0795b5
Commit
db0795b5
authored
Dec 20, 2019
by
thomwolf
Browse files
defaults models for tf and pt - update tests
parent
7f740845
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
50 additions
and
9 deletions
+50
-9
transformers/pipelines.py
transformers/pipelines.py
+18
-5
transformers/tests/pipelines_test.py
transformers/tests/pipelines_test.py
+32
-4
No files found.
transformers/pipelines.py
View file @
db0795b5
...
@@ -776,7 +776,10 @@ SUPPORTED_TASKS = {
...
@@ -776,7 +776,10 @@ SUPPORTED_TASKS = {
'tf'
:
TFAutoModel
if
is_tf_available
()
else
None
,
'tf'
:
TFAutoModel
if
is_tf_available
()
else
None
,
'pt'
:
AutoModel
if
is_torch_available
()
else
None
,
'pt'
:
AutoModel
if
is_torch_available
()
else
None
,
'default'
:
{
'default'
:
{
'model'
:
'distilbert-base-uncased'
,
'model'
:
{
'pt'
:
'distilbert-base-uncased'
,
'tf'
:
'distilbert-base-uncased'
,
},
'config'
:
None
,
'config'
:
None
,
'tokenizer'
:
'distilbert-base-uncased'
'tokenizer'
:
'distilbert-base-uncased'
}
}
...
@@ -786,7 +789,10 @@ SUPPORTED_TASKS = {
...
@@ -786,7 +789,10 @@ SUPPORTED_TASKS = {
'tf'
:
TFAutoModelForSequenceClassification
if
is_tf_available
()
else
None
,
'tf'
:
TFAutoModelForSequenceClassification
if
is_tf_available
()
else
None
,
'pt'
:
AutoModelForSequenceClassification
if
is_torch_available
()
else
None
,
'pt'
:
AutoModelForSequenceClassification
if
is_torch_available
()
else
None
,
'default'
:
{
'default'
:
{
'model'
:
'https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-uncased-finetuned-sst-2-english-pytorch_model.bin'
,
'model'
:
{
'pt'
:
'https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-uncased-finetuned-sst-2-english-pytorch_model.bin'
,
'tf'
:
'https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-uncased-finetuned-sst-2-english-tf_model.h5'
,
},
'config'
:
'https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-uncased-finetuned-sst-2-english-config.json'
,
'config'
:
'https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-uncased-finetuned-sst-2-english-config.json'
,
'tokenizer'
:
'distilbert-base-uncased'
'tokenizer'
:
'distilbert-base-uncased'
}
}
...
@@ -796,7 +802,10 @@ SUPPORTED_TASKS = {
...
@@ -796,7 +802,10 @@ SUPPORTED_TASKS = {
'tf'
:
TFAutoModelForTokenClassification
if
is_tf_available
()
else
None
,
'tf'
:
TFAutoModelForTokenClassification
if
is_tf_available
()
else
None
,
'pt'
:
AutoModelForTokenClassification
if
is_torch_available
()
else
None
,
'pt'
:
AutoModelForTokenClassification
if
is_torch_available
()
else
None
,
'default'
:
{
'default'
:
{
'model'
:
'https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-finetuned-conll03-english-pytorch_model.bin'
,
'model'
:
{
'pt'
:
'https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-finetuned-conll03-english-pytorch_model.bin'
,
'tf'
:
'https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-finetuned-conll03-english-tf_model.h5'
,
},
'config'
:
'https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-finetuned-conll03-english-config.json'
,
'config'
:
'https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-finetuned-conll03-english-config.json'
,
'tokenizer'
:
'bert-large-cased'
'tokenizer'
:
'bert-large-cased'
}
}
...
@@ -806,7 +815,10 @@ SUPPORTED_TASKS = {
...
@@ -806,7 +815,10 @@ SUPPORTED_TASKS = {
'tf'
:
TFAutoModelForQuestionAnswering
if
is_tf_available
()
else
None
,
'tf'
:
TFAutoModelForQuestionAnswering
if
is_tf_available
()
else
None
,
'pt'
:
AutoModelForQuestionAnswering
if
is_torch_available
()
else
None
,
'pt'
:
AutoModelForQuestionAnswering
if
is_torch_available
()
else
None
,
'default'
:
{
'default'
:
{
'model'
:
'distilbert-base-uncased-distilled-squad'
,
'model'
:
{
'pt'
:
'distilbert-base-uncased-distilled-squad'
,
'tf'
:
'distilbert-base-uncased-distilled-squad'
,
},
'config'
:
None
,
'config'
:
None
,
'tokenizer'
:
'distilbert-base-uncased'
'tokenizer'
:
'distilbert-base-uncased'
}
}
...
@@ -843,7 +855,8 @@ def pipeline(task: str, model: Optional = None,
...
@@ -843,7 +855,8 @@ def pipeline(task: str, model: Optional = None,
# Use default model/config/tokenizer for the task if no model is provided
# Use default model/config/tokenizer for the task if no model is provided
if
model
is
None
:
if
model
is
None
:
model
,
config
,
tokenizer
=
tuple
(
targeted_task
[
'default'
].
values
())
models
,
config
,
tokenizer
=
tuple
(
targeted_task
[
'default'
].
values
())
model
=
models
[
framework
]
# Try to infer tokenizer from model or config name (if provided as str)
# Try to infer tokenizer from model or config name (if provided as str)
if
tokenizer
is
None
:
if
tokenizer
is
None
:
...
...
transformers/tests/pipelines_test.py
View file @
db0795b5
...
@@ -11,6 +11,20 @@ QA_FINETUNED_MODELS = {
...
@@ -11,6 +11,20 @@ QA_FINETUNED_MODELS = {
(
'bert-base-uncased'
,
'distilbert-base-uncased-distilled-squad'
,
None
)
(
'bert-base-uncased'
,
'distilbert-base-uncased-distilled-squad'
,
None
)
}
}
TF_QA_FINETUNED_MODELS
=
{
(
'bert-base-uncased'
,
'bert-large-uncased-whole-word-masking-finetuned-squad'
,
None
),
(
'bert-base-cased'
,
'bert-large-cased-whole-word-masking-finetuned-squad'
,
None
),
(
'bert-base-uncased'
,
'distilbert-base-uncased-distilled-squad'
,
None
)
}
TF_NER_FINETUNED_MODELS
=
{
(
'bert-base-cased'
,
'https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-finetuned-conll03-english-tf_model.h5'
,
'https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-finetuned-conll03-english-config.json'
)
}
NER_FINETUNED_MODELS
=
{
NER_FINETUNED_MODELS
=
{
(
(
'bert-base-cased'
,
'bert-base-cased'
,
...
@@ -25,6 +39,20 @@ FEATURE_EXTRACT_FINETUNED_MODELS = {
...
@@ -25,6 +39,20 @@ FEATURE_EXTRACT_FINETUNED_MODELS = {
(
'distilbert-base-uncased'
,
'distilbert-base-uncased'
,
None
)
(
'distilbert-base-uncased'
,
'distilbert-base-uncased'
,
None
)
}
}
TF_FEATURE_EXTRACT_FINETUNED_MODELS
=
{
(
'bert-base-cased'
,
'bert-base-cased'
,
None
),
# ('xlnet-base-cased', 'xlnet-base-cased', None), # Disabled for now as it crash for TF2
(
'distilbert-base-uncased'
,
'distilbert-base-uncased'
,
None
)
}
TF_TEXT_CLASSIF_FINETUNED_MODELS
=
{
(
'bert-base-uncased'
,
'https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-uncased-finetuned-sst-2-english-tf_model.h5'
,
'https://s3.amazonaws.com/models.huggingface.co/bert/distilbert-base-uncased-finetuned-sst-2-english-config.json'
)
}
TEXT_CLASSIF_FINETUNED_MODELS
=
{
TEXT_CLASSIF_FINETUNED_MODELS
=
{
(
(
'bert-base-uncased'
,
'bert-base-uncased'
,
...
@@ -75,7 +103,7 @@ class MonoColumnInputTestCase(unittest.TestCase):
...
@@ -75,7 +103,7 @@ class MonoColumnInputTestCase(unittest.TestCase):
mandatory_keys
=
{
'entity'
,
'word'
,
'score'
}
mandatory_keys
=
{
'entity'
,
'word'
,
'score'
}
valid_inputs
=
[
'HuggingFace is solving NLP one commit at a time.'
,
'HuggingFace is based in New-York & Paris'
]
valid_inputs
=
[
'HuggingFace is solving NLP one commit at a time.'
,
'HuggingFace is based in New-York & Paris'
]
invalid_inputs
=
[
None
]
invalid_inputs
=
[
None
]
for
tokenizer
,
model
,
config
in
NER_FINETUNED_MODELS
:
for
tokenizer
,
model
,
config
in
TF_
NER_FINETUNED_MODELS
:
nlp
=
pipeline
(
task
=
'ner'
,
model
=
model
,
config
=
config
,
tokenizer
=
tokenizer
)
nlp
=
pipeline
(
task
=
'ner'
,
model
=
model
,
config
=
config
,
tokenizer
=
tokenizer
)
self
.
_test_mono_column_pipeline
(
nlp
,
valid_inputs
,
invalid_inputs
,
mandatory_keys
)
self
.
_test_mono_column_pipeline
(
nlp
,
valid_inputs
,
invalid_inputs
,
mandatory_keys
)
...
@@ -93,7 +121,7 @@ class MonoColumnInputTestCase(unittest.TestCase):
...
@@ -93,7 +121,7 @@ class MonoColumnInputTestCase(unittest.TestCase):
mandatory_keys
=
{
'label'
}
mandatory_keys
=
{
'label'
}
valid_inputs
=
[
'HuggingFace is solving NLP one commit at a time.'
,
'HuggingFace is based in New-York & Paris'
]
valid_inputs
=
[
'HuggingFace is solving NLP one commit at a time.'
,
'HuggingFace is based in New-York & Paris'
]
invalid_inputs
=
[
None
]
invalid_inputs
=
[
None
]
for
tokenizer
,
model
,
config
in
TEXT_CLASSIF_FINETUNED_MODELS
:
for
tokenizer
,
model
,
config
in
TF_
TEXT_CLASSIF_FINETUNED_MODELS
:
nlp
=
pipeline
(
task
=
'sentiment-analysis'
,
model
=
model
,
config
=
config
,
tokenizer
=
tokenizer
)
nlp
=
pipeline
(
task
=
'sentiment-analysis'
,
model
=
model
,
config
=
config
,
tokenizer
=
tokenizer
)
self
.
_test_mono_column_pipeline
(
nlp
,
valid_inputs
,
invalid_inputs
,
mandatory_keys
)
self
.
_test_mono_column_pipeline
(
nlp
,
valid_inputs
,
invalid_inputs
,
mandatory_keys
)
...
@@ -109,7 +137,7 @@ class MonoColumnInputTestCase(unittest.TestCase):
...
@@ -109,7 +137,7 @@ class MonoColumnInputTestCase(unittest.TestCase):
def
test_tf_features_extraction
(
self
):
def
test_tf_features_extraction
(
self
):
valid_inputs
=
[
'HuggingFace is solving NLP one commit at a time.'
,
'HuggingFace is based in New-York & Paris'
]
valid_inputs
=
[
'HuggingFace is solving NLP one commit at a time.'
,
'HuggingFace is based in New-York & Paris'
]
invalid_inputs
=
[
None
]
invalid_inputs
=
[
None
]
for
tokenizer
,
model
,
config
in
FEATURE_EXTRACT_FINETUNED_MODELS
:
for
tokenizer
,
model
,
config
in
TF_
FEATURE_EXTRACT_FINETUNED_MODELS
:
nlp
=
pipeline
(
task
=
'sentiment-analysis'
,
model
=
model
,
config
=
config
,
tokenizer
=
tokenizer
)
nlp
=
pipeline
(
task
=
'sentiment-analysis'
,
model
=
model
,
config
=
config
,
tokenizer
=
tokenizer
)
self
.
_test_mono_column_pipeline
(
nlp
,
valid_inputs
,
invalid_inputs
,
{})
self
.
_test_mono_column_pipeline
(
nlp
,
valid_inputs
,
invalid_inputs
,
{})
...
@@ -173,7 +201,7 @@ class MultiColumnInputTestCase(unittest.TestCase):
...
@@ -173,7 +201,7 @@ class MultiColumnInputTestCase(unittest.TestCase):
{
'question'
:
'What is does with empty context ?'
,
'context'
:
None
},
{
'question'
:
'What is does with empty context ?'
,
'context'
:
None
},
]
]
for
tokenizer
,
model
,
config
in
QA_FINETUNED_MODELS
:
for
tokenizer
,
model
,
config
in
TF_
QA_FINETUNED_MODELS
:
nlp
=
pipeline
(
task
=
'question-answering'
,
model
=
model
,
config
=
config
,
tokenizer
=
tokenizer
)
nlp
=
pipeline
(
task
=
'question-answering'
,
model
=
model
,
config
=
config
,
tokenizer
=
tokenizer
)
self
.
_test_multicolumn_pipeline
(
nlp
,
valid_samples
,
invalid_samples
,
mandatory_output_keys
)
self
.
_test_multicolumn_pipeline
(
nlp
,
valid_samples
,
invalid_samples
,
mandatory_output_keys
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment