Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
bf119c05
"...git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "d7a4c3252ed5e630b7fb6e4b4616daddfe574fc5"
Commit
bf119c05
authored
Dec 04, 2019
by
LysandreJik
Browse files
TFDS dataset can now be evaluated
parent
9ddc3f1a
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
25 additions
and
9 deletions
+25
-9
transformers/data/processors/squad.py
transformers/data/processors/squad.py
+25
-9
No files found.
transformers/data/processors/squad.py
View file @
bf119c05
...
...
@@ -245,22 +245,37 @@ class SquadProcessor(DataProcessor):
train_file
=
None
dev_file
=
None
def
get_example_from_tensor_dict
(
self
,
tensor_dict
):
def
get_example_from_tensor_dict
(
self
,
tensor_dict
,
evaluate
=
False
):
if
not
evaluate
:
answer
=
tensor_dict
[
'answers'
][
'text'
][
0
].
numpy
().
decode
(
'utf-8'
)
answer_start
=
tensor_dict
[
'answers'
][
'answer_start'
][
0
].
numpy
()
answers
=
None
else
:
answers
=
[{
"answer_start"
:
start
.
numpy
(),
"text"
:
text
.
numpy
().
decode
(
'utf-8'
)
}
for
start
,
text
in
zip
(
tensor_dict
[
'answers'
][
"answer_start"
],
tensor_dict
[
'answers'
][
"text"
])]
answer
=
None
answer_start
=
None
return
SquadExample
(
tensor_dict
[
'id'
].
numpy
().
decode
(
"utf-8"
),
tensor_dict
[
'question'
].
numpy
().
decode
(
'utf-8'
),
tensor_dict
[
'context'
].
numpy
().
decode
(
'utf-8'
),
tensor_dict
[
'answers'
][
'text'
][
0
].
numpy
().
decode
(
'utf-8'
),
tensor_dict
[
'answers'
][
'answer_start'
][
0
].
numpy
(),
tensor_dict
[
'title'
].
numpy
().
decode
(
'utf-8'
)
qas_id
=
tensor_dict
[
'id'
].
numpy
().
decode
(
"utf-8"
),
question_text
=
tensor_dict
[
'question'
].
numpy
().
decode
(
'utf-8'
),
context_text
=
tensor_dict
[
'context'
].
numpy
().
decode
(
'utf-8'
),
answer_text
=
answer
,
start_position_character
=
answer_start
,
title
=
tensor_dict
[
'title'
].
numpy
().
decode
(
'utf-8'
),
answers
=
answers
)
def
get_examples_from_dataset
(
self
,
dataset
):
def
get_examples_from_dataset
(
self
,
dataset
,
evaluate
=
False
):
"""See base class."""
examples
=
[]
for
tensor_dict
in
tqdm
(
dataset
):
examples
.
append
(
self
.
get_example_from_tensor_dict
(
tensor_dict
))
examples
.
append
(
self
.
get_example_from_tensor_dict
(
tensor_dict
,
evaluate
=
evaluate
))
return
examples
...
...
@@ -300,6 +315,7 @@ class SquadProcessor(DataProcessor):
question_text
=
qa
[
"question"
]
start_position_character
=
None
answer_text
=
None
answers
=
None
if
"is_impossible"
in
qa
:
is_impossible
=
qa
[
"is_impossible"
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment