Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
1fa93ca1
Commit
1fa93ca1
authored
Dec 20, 2019
by
thomwolf
Browse files
Clean up framework handling
parent
ca6bdb28
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
58 additions
and
27 deletions
+58
-27
transformers/pipelines.py
transformers/pipelines.py
+58
-27
No files found.
transformers/pipelines.py
View file @
1fa93ca1
...
@@ -48,6 +48,19 @@ if is_torch_available():
...
@@ -48,6 +48,19 @@ if is_torch_available():
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
def
get_framework
(
model
=
None
):
if
is_tf_available
()
and
is_torch_available
()
and
model
is
not
None
and
not
isinstance
(
model
,
str
):
# Both framework are available but the use supplied a model class instance.
# Try to guess which framework to use from the model classname
framework
=
'tf'
if
model
.
__class__
.
__name__
.
startswith
(
'TF'
)
else
'pt'
else
:
framework
=
'tf'
if
is_tf_available
()
else
(
'pt'
if
is_torch_available
()
else
None
)
if
framework
is
None
:
raise
ImportError
(
"At least one of TensorFlow 2.0 or PyTorch should be installed. "
"To install TensorFlow 2.0, read the instructions at https://www.tensorflow.org/install/ "
"To install PyTorch, read the instructions at https://pytorch.org/."
)
return
framework
class
ArgumentHandler
(
ABC
):
class
ArgumentHandler
(
ABC
):
"""
"""
Base interface for handling varargs for each Pipeline
Base interface for handling varargs for each Pipeline
...
@@ -279,19 +292,23 @@ class Pipeline(_ScikitCompat):
...
@@ -279,19 +292,23 @@ class Pipeline(_ScikitCompat):
nlp = QuestionAnsweringPipeline(model=AutoModel.from_pretrained('...'), tokenizer='...')
nlp = QuestionAnsweringPipeline(model=AutoModel.from_pretrained('...'), tokenizer='...')
"""
"""
def
__init__
(
self
,
model
,
tokenizer
:
PreTrainedTokenizer
=
None
,
def
__init__
(
self
,
model
,
tokenizer
:
PreTrainedTokenizer
=
None
,
modelcard
:
ModelCard
=
None
,
modelcard
:
ModelCard
=
None
,
framework
:
Optional
[
str
]
=
None
,
args_parser
:
ArgumentHandler
=
None
,
device
:
int
=
-
1
,
args_parser
:
ArgumentHandler
=
None
,
device
:
int
=
-
1
,
binary_output
:
bool
=
False
):
binary_output
:
bool
=
False
):
if
framework
is
None
:
framework
=
get_framework
()
self
.
model
=
model
self
.
model
=
model
self
.
tokenizer
=
tokenizer
self
.
tokenizer
=
tokenizer
self
.
modelcard
=
modelcard
self
.
modelcard
=
modelcard
self
.
framework
=
framework
self
.
device
=
device
self
.
device
=
device
self
.
binary_output
=
binary_output
self
.
binary_output
=
binary_output
self
.
_args_parser
=
args_parser
or
DefaultArgumentHandler
()
self
.
_args_parser
=
args_parser
or
DefaultArgumentHandler
()
# Special handling
# Special handling
if
self
.
device
>=
0
and
not
is_tf_available
()
:
if
self
.
device
>=
0
and
self
.
framework
==
'pt'
:
self
.
model
=
self
.
model
.
to
(
'cuda:{}'
.
format
(
self
.
device
))
self
.
model
=
self
.
model
.
to
(
'cuda:{}'
.
format
(
self
.
device
))
def
save_pretrained
(
self
,
save_directory
):
def
save_pretrained
(
self
,
save_directory
):
...
@@ -332,7 +349,7 @@ class Pipeline(_ScikitCompat):
...
@@ -332,7 +349,7 @@ class Pipeline(_ScikitCompat):
Returns:
Returns:
Context manager
Context manager
"""
"""
if
is_tf_available
()
:
if
self
.
framework
==
'tf'
:
with
tf
.
device
(
'/CPU:0'
if
self
.
device
==
-
1
else
'/device:GPU:{}'
.
format
(
self
.
device
)):
with
tf
.
device
(
'/CPU:0'
if
self
.
device
==
-
1
else
'/device:GPU:{}'
.
format
(
self
.
device
)):
yield
yield
else
:
else
:
...
@@ -371,7 +388,7 @@ class Pipeline(_ScikitCompat):
...
@@ -371,7 +388,7 @@ class Pipeline(_ScikitCompat):
with
self
.
device_placement
():
with
self
.
device_placement
():
inputs
=
self
.
tokenizer
.
batch_encode_plus
(
inputs
=
self
.
tokenizer
.
batch_encode_plus
(
inputs
,
add_special_tokens
=
True
,
inputs
,
add_special_tokens
=
True
,
return_tensors
=
'tf'
if
is_tf_available
()
else
'pt'
,
return_tensors
=
self
.
framework
,
max_length
=
self
.
tokenizer
.
max_len
max_length
=
self
.
tokenizer
.
max_len
)
)
...
@@ -387,7 +404,7 @@ class Pipeline(_ScikitCompat):
...
@@ -387,7 +404,7 @@ class Pipeline(_ScikitCompat):
Returns:
Returns:
Numpy array
Numpy array
"""
"""
if
is_tf_available
()
:
if
self
.
framework
==
'tf'
:
# TODO trace model
# TODO trace model
predictions
=
self
.
model
(
inputs
,
training
=
False
)[
0
]
predictions
=
self
.
model
(
inputs
,
training
=
False
)[
0
]
else
:
else
:
...
@@ -405,9 +422,16 @@ class FeatureExtractionPipeline(Pipeline):
...
@@ -405,9 +422,16 @@ class FeatureExtractionPipeline(Pipeline):
def
__init__
(
self
,
model
,
def
__init__
(
self
,
model
,
tokenizer
:
PreTrainedTokenizer
=
None
,
tokenizer
:
PreTrainedTokenizer
=
None
,
modelcard
:
ModelCard
=
None
,
modelcard
:
ModelCard
=
None
,
framework
:
Optional
[
str
]
=
None
,
args_parser
:
ArgumentHandler
=
None
,
args_parser
:
ArgumentHandler
=
None
,
device
:
int
=
-
1
):
device
:
int
=
-
1
):
super
().
__init__
(
model
,
tokenizer
,
modelcard
,
args_parser
,
device
,
binary_output
=
True
)
super
().
__init__
(
model
=
model
,
tokenizer
=
tokenizer
,
modelcard
=
modelcard
,
framework
=
framework
,
args_parser
=
args_parser
,
device
=
device
,
binary_output
=
True
)
def
__call__
(
self
,
*
args
,
**
kwargs
):
def
__call__
(
self
,
*
args
,
**
kwargs
):
return
super
().
__call__
(
*
args
,
**
kwargs
).
tolist
()
return
super
().
__call__
(
*
args
,
**
kwargs
).
tolist
()
...
@@ -430,10 +454,16 @@ class NerPipeline(Pipeline):
...
@@ -430,10 +454,16 @@ class NerPipeline(Pipeline):
"""
"""
def
__init__
(
self
,
model
,
tokenizer
:
PreTrainedTokenizer
=
None
,
def
__init__
(
self
,
model
,
tokenizer
:
PreTrainedTokenizer
=
None
,
modelcard
:
ModelCard
=
None
,
modelcard
:
ModelCard
=
None
,
framework
:
Optional
[
str
]
=
None
,
args_parser
:
ArgumentHandler
=
None
,
device
:
int
=
-
1
,
args_parser
:
ArgumentHandler
=
None
,
device
:
int
=
-
1
,
binary_output
:
bool
=
False
):
binary_output
:
bool
=
False
):
super
().
__init__
(
model
,
tokenizer
,
modelcard
,
args_parser
,
device
,
binary_output
)
super
().
__init__
(
model
=
model
,
tokenizer
=
tokenizer
,
modelcard
=
modelcard
,
framework
=
framework
,
args_parser
=
args_parser
,
device
=
device
,
binary_output
=
binary_output
)
self
.
_basic_tokenizer
=
BasicTokenizer
(
do_lower_case
=
False
)
self
.
_basic_tokenizer
=
BasicTokenizer
(
do_lower_case
=
False
)
...
@@ -452,12 +482,12 @@ class NerPipeline(Pipeline):
...
@@ -452,12 +482,12 @@ class NerPipeline(Pipeline):
tokens
=
self
.
tokenizer
.
encode_plus
(
tokens
=
self
.
tokenizer
.
encode_plus
(
sentence
,
return_attention_mask
=
False
,
sentence
,
return_attention_mask
=
False
,
return_tensors
=
'tf'
if
is_tf_available
()
else
'pt'
,
return_tensors
=
self
.
framework
,
max_length
=
self
.
tokenizer
.
max_len
max_length
=
self
.
tokenizer
.
max_len
)
)
# Forward
# Forward
if
is_tf_available
()
:
if
self
.
framework
==
'tf'
:
entities
=
self
.
model
(
tokens
)[
0
][
0
].
numpy
()
entities
=
self
.
model
(
tokens
)[
0
][
0
].
numpy
()
else
:
else
:
with
torch
.
no_grad
():
with
torch
.
no_grad
():
...
@@ -549,6 +579,18 @@ class QuestionAnsweringPipeline(Pipeline):
...
@@ -549,6 +579,18 @@ class QuestionAnsweringPipeline(Pipeline):
Question Answering pipeline using ModelForQuestionAnswering head.
Question Answering pipeline using ModelForQuestionAnswering head.
"""
"""
def
__init__
(
self
,
model
,
tokenizer
:
Optional
[
PreTrainedTokenizer
],
modelcard
:
Optional
[
ModelCard
],
framework
:
Optional
[
str
]
=
None
,
device
:
int
=
-
1
,
**
kwargs
):
super
().
__init__
(
model
=
model
,
tokenizer
=
tokenizer
,
modelcard
=
modelcard
,
framework
=
framework
,
args_parser
=
QuestionAnsweringArgumentHandler
(),
device
=
device
,
**
kwargs
)
@
staticmethod
@
staticmethod
def
create_sample
(
question
:
Union
[
str
,
List
[
str
]],
context
:
Union
[
str
,
List
[
str
]])
->
Union
[
SquadExample
,
List
[
SquadExample
]]:
def
create_sample
(
question
:
Union
[
str
,
List
[
str
]],
context
:
Union
[
str
,
List
[
str
]])
->
Union
[
SquadExample
,
List
[
SquadExample
]]:
"""
"""
...
@@ -567,12 +609,6 @@ class QuestionAnsweringPipeline(Pipeline):
...
@@ -567,12 +609,6 @@ class QuestionAnsweringPipeline(Pipeline):
else
:
else
:
return
SquadExample
(
None
,
question
,
context
,
None
,
None
,
None
)
return
SquadExample
(
None
,
question
,
context
,
None
,
None
,
None
)
def
__init__
(
self
,
model
,
tokenizer
:
Optional
[
PreTrainedTokenizer
],
modelcard
:
Optional
[
ModelCard
],
device
:
int
=
-
1
,
**
kwargs
):
super
().
__init__
(
model
,
tokenizer
,
modelcard
,
args_parser
=
QuestionAnsweringArgumentHandler
(),
device
=
device
,
**
kwargs
)
def
__call__
(
self
,
*
texts
,
**
kwargs
):
def
__call__
(
self
,
*
texts
,
**
kwargs
):
"""
"""
Args:
Args:
...
@@ -608,7 +644,7 @@ class QuestionAnsweringPipeline(Pipeline):
...
@@ -608,7 +644,7 @@ class QuestionAnsweringPipeline(Pipeline):
# Manage tensor allocation on correct device
# Manage tensor allocation on correct device
with
self
.
device_placement
():
with
self
.
device_placement
():
if
is_tf_available
()
:
if
self
.
framework
==
'tf'
:
fw_args
=
{
k
:
tf
.
constant
(
v
)
for
(
k
,
v
)
in
fw_args
.
items
()}
fw_args
=
{
k
:
tf
.
constant
(
v
)
for
(
k
,
v
)
in
fw_args
.
items
()}
start
,
end
=
self
.
model
(
fw_args
)
start
,
end
=
self
.
model
(
fw_args
)
start
,
end
=
start
.
numpy
(),
end
.
numpy
()
start
,
end
=
start
.
numpy
(),
end
.
numpy
()
...
@@ -798,15 +834,10 @@ def pipeline(task: str, model: Optional = None,
...
@@ -798,15 +834,10 @@ def pipeline(task: str, model: Optional = None,
if
task
not
in
SUPPORTED_TASKS
:
if
task
not
in
SUPPORTED_TASKS
:
raise
KeyError
(
"Unknown task {}, available tasks are {}"
.
format
(
task
,
list
(
SUPPORTED_TASKS
.
keys
())))
raise
KeyError
(
"Unknown task {}, available tasks are {}"
.
format
(
task
,
list
(
SUPPORTED_TASKS
.
keys
())))
pipeline_framework
=
'tf'
if
is_tf_available
()
else
(
'pt'
if
is_torch_available
()
else
None
)
framework
=
get_framework
(
model
)
if
pipeline_framework
is
None
:
raise
ImportError
(
"At least one of TensorFlow 2.0 or PyTorch should be installed. "
"To install TensorFlow 2.0, read the instructions at https://www.tensorflow.org/install/ "
"To install PyTorch, read the instructions at https://pytorch.org/."
)
targeted_task
=
SUPPORTED_TASKS
[
task
]
targeted_task
=
SUPPORTED_TASKS
[
task
]
task
,
model_class
=
targeted_task
[
'impl'
],
targeted_task
[
pipeline_
framework
]
task
,
model_class
=
targeted_task
[
'impl'
],
targeted_task
[
framework
]
# Use default model/config/tokenizer for the task if no model is provided
# Use default model/config/tokenizer for the task if no model is provided
if
model
is
None
:
if
model
is
None
:
...
@@ -843,14 +874,14 @@ def pipeline(task: str, model: Optional = None,
...
@@ -843,14 +874,14 @@ def pipeline(task: str, model: Optional = None,
if
isinstance
(
model
,
str
):
if
isinstance
(
model
,
str
):
# Handle transparent TF/PT model conversion
# Handle transparent TF/PT model conversion
model_kwargs
=
{}
model_kwargs
=
{}
if
pipeline_
framework
==
'pt'
and
model
.
endswith
(
'.h5'
):
if
framework
==
'pt'
and
model
.
endswith
(
'.h5'
):
model_kwargs
[
'from_tf'
]
=
True
model_kwargs
[
'from_tf'
]
=
True
logger
.
warning
(
'Model might be a TensorFlow model (ending with `.h5`) but TensorFlow is not available. '
logger
.
warning
(
'Model might be a TensorFlow model (ending with `.h5`) but TensorFlow is not available. '
'Trying to load the model with PyTorch.'
)
'Trying to load the model with PyTorch.'
)
elif
pipeline_
framework
==
'tf'
and
model
.
endswith
(
'.bin'
):
elif
framework
==
'tf'
and
model
.
endswith
(
'.bin'
):
model_kwargs
[
'from_pt'
]
=
True
model_kwargs
[
'from_pt'
]
=
True
logger
.
warning
(
'Model might be a PyTorch model (ending with `.bin`) but PyTorch is not available. '
logger
.
warning
(
'Model might be a PyTorch model (ending with `.bin`) but PyTorch is not available. '
'Trying to load the model with Tensorflow.'
)
'Trying to load the model with Tensorflow.'
)
model
=
model_class
.
from_pretrained
(
model
,
config
=
config
,
**
model_kwargs
)
model
=
model_class
.
from_pretrained
(
model
,
config
=
config
,
**
model_kwargs
)
return
task
(
model
,
tokenizer
,
**
kwargs
)
return
task
(
model
=
model
,
tokenizer
=
tokenizer
,
modelcard
=
modelcard
,
framework
=
framework
,
**
kwargs
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment