Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
896a0eb1
Unverified
Commit
896a0eb1
authored
Jan 13, 2020
by
Thomas Wolf
Committed by
GitHub
Jan 13, 2020
Browse files
Merge pull request #2459 from Perseus14/patch-4
Update pipelines.py
parents
a3085020
0d6c17fc
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
64 additions
and
48 deletions
+64
-48
src/transformers/pipelines.py
src/transformers/pipelines.py
+64
-48
No files found.
src/transformers/pipelines.py
View file @
896a0eb1
...
@@ -705,55 +705,71 @@ class QuestionAnsweringPipeline(Pipeline):
...
@@ -705,55 +705,71 @@ class QuestionAnsweringPipeline(Pipeline):
# Convert inputs to features
# Convert inputs to features
examples
=
self
.
_args_parser
(
*
texts
,
**
kwargs
)
examples
=
self
.
_args_parser
(
*
texts
,
**
kwargs
)
features
=
squad_convert_examples_to_features
(
features_list
=
[
examples
,
self
.
tokenizer
,
kwargs
[
"max_seq_len"
],
kwargs
[
"doc_stride"
],
kwargs
[
"max_question_len"
],
False
squad_convert_examples_to_features
(
)
[
example
],
fw_args
=
self
.
inputs_for_model
([
f
.
__dict__
for
f
in
features
])
self
.
tokenizer
,
kwargs
[
"max_seq_len"
],
kwargs
[
"doc_stride"
],
kwargs
[
"max_question_len"
],
False
,
)
for
example
in
examples
]
all_answers
=
[]
for
features
,
example
in
zip
(
features_list
,
examples
):
fw_args
=
self
.
inputs_for_model
([
f
.
__dict__
for
f
in
features
])
# Manage tensor allocation on correct device
# Manage tensor allocation on correct device
with
self
.
device_placement
():
with
self
.
device_placement
():
if
self
.
framework
==
"tf"
:
if
self
.
framework
==
"tf"
:
fw_args
=
{
k
:
tf
.
constant
(
v
)
for
(
k
,
v
)
in
fw_args
.
items
()}
fw_args
=
{
k
:
tf
.
constant
(
v
)
for
(
k
,
v
)
in
fw_args
.
items
()}
start
,
end
=
self
.
model
(
fw_args
)
start
,
end
=
self
.
model
(
fw_args
)
start
,
end
=
start
.
numpy
(),
end
.
numpy
()
start
,
end
=
start
.
numpy
(),
end
.
numpy
()
else
:
else
:
with
torch
.
no_grad
():
with
torch
.
no_grad
():
# Retrieve the score for the context tokens only (removing question tokens)
# Retrieve the score for the context tokens only (removing question tokens)
fw_args
=
{
k
:
torch
.
tensor
(
v
,
device
=
self
.
device
)
for
(
k
,
v
)
in
fw_args
.
items
()}
fw_args
=
{
k
:
torch
.
tensor
(
v
,
device
=
self
.
device
)
for
(
k
,
v
)
in
fw_args
.
items
()}
start
,
end
=
self
.
model
(
**
fw_args
)
start
,
end
=
self
.
model
(
**
fw_args
)
start
,
end
=
start
.
cpu
().
numpy
(),
end
.
cpu
().
numpy
()
start
,
end
=
start
.
cpu
().
numpy
(),
end
.
cpu
().
numpy
()
answers
=
[]
answers
=
[]
for
(
example
,
feature
,
start_
,
end_
)
in
zip
(
examples
,
features
,
start
,
end
):
for
(
feature
,
start_
,
end_
)
in
zip
(
features
,
start
,
end
):
# Normalize logits and spans to retrieve the answer
# Normalize logits and spans to retrieve the answer
start_
=
np
.
exp
(
start_
)
/
np
.
sum
(
np
.
exp
(
start_
))
start_
=
np
.
exp
(
start_
)
/
np
.
sum
(
np
.
exp
(
start_
))
end_
=
np
.
exp
(
end_
)
/
np
.
sum
(
np
.
exp
(
end_
))
end_
=
np
.
exp
(
end_
)
/
np
.
sum
(
np
.
exp
(
end_
))
# Mask padding and question
# Mask padding and question
start_
,
end_
=
start_
*
np
.
abs
(
np
.
array
(
feature
.
p_mask
)
-
1
),
end_
*
np
.
abs
(
np
.
array
(
feature
.
p_mask
)
-
1
)
start_
,
end_
=
(
start_
*
np
.
abs
(
np
.
array
(
feature
.
p_mask
)
-
1
),
# TODO : What happens if not possible
end_
*
np
.
abs
(
np
.
array
(
feature
.
p_mask
)
-
1
),
# Mask CLS
)
start_
[
0
]
=
end_
[
0
]
=
0
# TODO : What happens if not possible
starts
,
ends
,
scores
=
self
.
decode
(
start_
,
end_
,
kwargs
[
"topk"
],
kwargs
[
"max_answer_len"
])
# Mask CLS
char_to_word
=
np
.
array
(
example
.
char_to_word_offset
)
start_
[
0
]
=
end_
[
0
]
=
0
# Convert the answer (tokens) back to the original text
starts
,
ends
,
scores
=
self
.
decode
(
start_
,
end_
,
kwargs
[
"topk"
],
kwargs
[
"max_answer_len"
])
answers
+=
[
char_to_word
=
np
.
array
(
example
.
char_to_word_offset
)
{
"score"
:
score
.
item
(),
# Convert the answer (tokens) back to the original text
"start"
:
np
.
where
(
char_to_word
==
feature
.
token_to_orig_map
[
s
])[
0
][
0
].
item
(),
answers
+=
[
"end"
:
np
.
where
(
char_to_word
==
feature
.
token_to_orig_map
[
e
])[
0
][
-
1
].
item
(),
{
"answer"
:
" "
.
join
(
"score"
:
score
.
item
(),
example
.
doc_tokens
[
feature
.
token_to_orig_map
[
s
]
:
feature
.
token_to_orig_map
[
e
]
+
1
]
"start"
:
np
.
where
(
char_to_word
==
feature
.
token_to_orig_map
[
s
])[
0
][
0
].
item
(),
),
"end"
:
np
.
where
(
char_to_word
==
feature
.
token_to_orig_map
[
e
])[
0
][
-
1
].
item
(),
}
"answer"
:
" "
.
join
(
for
s
,
e
,
score
in
zip
(
starts
,
ends
,
scores
)
example
.
doc_tokens
[
feature
.
token_to_orig_map
[
s
]
:
feature
.
token_to_orig_map
[
e
]
+
1
]
]
),
if
len
(
answers
)
==
1
:
}
return
answers
[
0
]
for
s
,
e
,
score
in
zip
(
starts
,
ends
,
scores
)
return
answers
]
answers
=
sorted
(
answers
,
key
=
lambda
x
:
x
[
"score"
],
reverse
=
True
)[:
kwargs
[
"topk"
]]
all_answers
+=
answers
if
len
(
all_answers
)
==
1
:
return
all_answers
[
0
]
return
all_answers
def
decode
(
self
,
start
:
np
.
ndarray
,
end
:
np
.
ndarray
,
topk
:
int
,
max_answer_len
:
int
)
->
Tuple
:
def
decode
(
self
,
start
:
np
.
ndarray
,
end
:
np
.
ndarray
,
topk
:
int
,
max_answer_len
:
int
)
->
Tuple
:
"""
"""
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment