Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
0ebda538
Unverified
Commit
0ebda538
authored
Aug 26, 2021
by
Nicolas Patry
Committed by
GitHub
Aug 26, 2021
Browse files
Moving `table-question-answering` pipeline to new testing. (#13280)
parent
879fe8fa
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
204 additions
and
150 deletions
+204
-150
src/transformers/models/tapas/modeling_tapas.py
src/transformers/models/tapas/modeling_tapas.py
+1
-0
tests/test_pipelines_table_question_answering.py
tests/test_pipelines_table_question_answering.py
+203
-150
No files found.
src/transformers/models/tapas/modeling_tapas.py
View file @
0ebda538
...
@@ -62,6 +62,7 @@ if is_scatter_available():
...
@@ -62,6 +62,7 @@ if is_scatter_available():
_CONFIG_FOR_DOC
=
"TapasConfig"
_CONFIG_FOR_DOC
=
"TapasConfig"
_TOKENIZER_FOR_DOC
=
"TapasTokenizer"
_TOKENIZER_FOR_DOC
=
"TapasTokenizer"
_TOKENIZER_FOR_DOC
=
"google/tapas-base"
_TOKENIZER_FOR_DOC
=
"google/tapas-base"
_CHECKPOINT_FOR_DOC
=
"google/tapas-base"
TAPAS_PRETRAINED_MODEL_ARCHIVE_LIST
=
[
TAPAS_PRETRAINED_MODEL_ARCHIVE_LIST
=
[
# large models
# large models
...
...
tests/test_pipelines_table_question_answering.py
View file @
0ebda538
...
@@ -14,203 +14,256 @@
...
@@ -14,203 +14,256 @@
import
unittest
import
unittest
from
transformers.pipelines
import
Pipeline
,
pipeline
from
transformers
import
(
from
transformers.testing_utils
import
require_pandas
,
require_torch
,
require_torch_scatter
,
slow
MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING
,
AutoModelForTableQuestionAnswering
,
AutoTokenizer
,
TableQuestionAnsweringPipeline
,
pipeline
,
)
from
transformers.testing_utils
import
(
is_pipeline_test
,
require_pandas
,
require_tf
,
require_torch
,
require_torch_scatter
,
slow
,
)
from
.test_pipelines_common
import
CustomInputPipelineCommonMixin
from
.test_pipelines_common
import
PipelineTestCaseMeta
@
require_torch_scatter
@
require_torch_scatter
@
require_torch
@
require_torch
@
require_pandas
@
require_pandas
class
TQAPipelineTests
(
CustomInputPipelineCommonMixin
,
unittest
.
TestCase
):
@
is_pipeline_test
pipeline_task
=
"table-question-answering"
class
TQAPipelineTests
(
unittest
.
TestCase
,
metaclass
=
PipelineTestCaseMeta
):
pipeline_running_kwargs
=
{
# Putting it there for consistency, but TQA do not have fast tokenizer
"padding"
:
"max_length"
,
# which are needed to generate automatic tests
}
model_mapping
=
MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING
small_models
=
[
"lysandre/tiny-tapas-random-wtq"
,
@
require_tf
"lysandre/tiny-tapas-random-sqa"
,
@
unittest
.
skip
(
"Table question answering not implemented in TF"
)
]
def
test_small_model_tf
(
self
):
large_models
=
[
"google/tapas-base-finetuned-wtq"
]
# Models tested with the @slow decorator
pass
valid_inputs
=
[
{
@
require_torch
"table"
:
{
def
test_small_model_pt
(
self
):
model_id
=
"lysandre/tiny-tapas-random-wtq"
model
=
AutoModelForTableQuestionAnswering
.
from_pretrained
(
model_id
)
tokenizer
=
AutoTokenizer
.
from_pretrained
(
model_id
)
self
.
assertIsInstance
(
model
.
config
.
aggregation_labels
,
dict
)
self
.
assertIsInstance
(
model
.
config
.
no_aggregation_label_index
,
int
)
table_querier
=
TableQuestionAnsweringPipeline
(
model
=
model
,
tokenizer
=
tokenizer
)
outputs
=
table_querier
(
table
=
{
"actors"
:
[
"brad pitt"
,
"leonardo di caprio"
,
"george clooney"
],
"actors"
:
[
"brad pitt"
,
"leonardo di caprio"
,
"george clooney"
],
"age"
:
[
"56"
,
"45"
,
"59"
],
"age"
:
[
"56"
,
"45"
,
"59"
],
"number of movies"
:
[
"87"
,
"53"
,
"69"
],
"number of movies"
:
[
"87"
,
"53"
,
"69"
],
"date of birth"
:
[
"7 february 1967"
,
"10 june 1996"
,
"28 november 1967"
],
"date of birth"
:
[
"7 february 1967"
,
"10 june 1996"
,
"28 november 1967"
],
},
},
"query"
:
"how many movies has george clooney played in?"
,
query
=
"how many movies has george clooney played in?"
,
},
)
{
self
.
assertEqual
(
"table"
:
{
outputs
,
{
"answer"
:
"AVERAGE > "
,
"coordinates"
:
[],
"cells"
:
[],
"aggregator"
:
"AVERAGE"
},
)
outputs
=
table_querier
(
table
=
{
"actors"
:
[
"brad pitt"
,
"leonardo di caprio"
,
"george clooney"
],
"actors"
:
[
"brad pitt"
,
"leonardo di caprio"
,
"george clooney"
],
"age"
:
[
"56"
,
"45"
,
"59"
],
"age"
:
[
"56"
,
"45"
,
"59"
],
"number of movies"
:
[
"87"
,
"53"
,
"69"
],
"number of movies"
:
[
"87"
,
"53"
,
"69"
],
"date of birth"
:
[
"7 february 1967"
,
"10 june 1996"
,
"28 november 1967"
],
"date of birth"
:
[
"7 february 1967"
,
"10 june 1996"
,
"28 november 1967"
],
},
},
"query"
:
[
"how many movies has george clooney played in?"
,
"how old is he?"
,
"what's his date of birth?"
],
query
=
[
"how many movies has george clooney played in?"
,
"how old is he?"
,
"what's his date of birth?"
],
},
)
{
self
.
assertEqual
(
"table"
:
{
outputs
,
[
{
"answer"
:
"AVERAGE > "
,
"coordinates"
:
[],
"cells"
:
[],
"aggregator"
:
"AVERAGE"
},
{
"answer"
:
"AVERAGE > "
,
"coordinates"
:
[],
"cells"
:
[],
"aggregator"
:
"AVERAGE"
},
{
"answer"
:
"AVERAGE > "
,
"coordinates"
:
[],
"cells"
:
[],
"aggregator"
:
"AVERAGE"
},
],
)
outputs
=
table_querier
(
table
=
{
"Repository"
:
[
"Transformers"
,
"Datasets"
,
"Tokenizers"
],
"Repository"
:
[
"Transformers"
,
"Datasets"
,
"Tokenizers"
],
"Stars"
:
[
"36542"
,
"4512"
,
"3934"
],
"Stars"
:
[
"36542"
,
"4512"
,
"3934"
],
"Contributors"
:
[
"651"
,
"77"
,
"34"
],
"Contributors"
:
[
"651"
,
"77"
,
"34"
],
"Programming language"
:
[
"Python"
,
"Python"
,
"Rust, Python and NodeJS"
],
"Programming language"
:
[
"Python"
,
"Python"
,
"Rust, Python and NodeJS"
],
},
},
"
query
"
:
[
query
=
[
"What repository has the largest number of stars?"
,
"What repository has the largest number of stars?"
,
"Given that the numbers of stars defines if a repository is active, what repository is the most active?"
,
"Given that the numbers of stars defines if a repository is active, what repository is the most active?"
,
"What is the number of repositories?"
,
"What is the number of repositories?"
,
"What is the average number of stars?"
,
"What is the average number of stars?"
,
"What is the total amount of stars?"
,
"What is the total amount of stars?"
,
],
],
},
]
def
_test_pipeline
(
self
,
table_querier
:
Pipeline
):
output_keys
=
{
"answer"
,
"coordinates"
,
"cells"
}
valid_inputs
=
self
.
valid_inputs
invalid_inputs
=
[
{
"query"
:
"What does it do with empty context ?"
,
"table"
:
""
},
{
"query"
:
"What does it do with empty context ?"
,
"table"
:
None
},
]
self
.
assertIsNotNone
(
table_querier
)
mono_result
=
table_querier
(
valid_inputs
[
0
])
self
.
assertIsInstance
(
mono_result
,
dict
)
for
key
in
output_keys
:
self
.
assertIn
(
key
,
mono_result
)
multi_result
=
table_querier
(
valid_inputs
)
self
.
assertIsInstance
(
multi_result
,
list
)
for
result
in
multi_result
:
self
.
assertIsInstance
(
result
,
(
list
,
dict
))
for
result
in
multi_result
:
if
isinstance
(
result
,
list
):
for
_result
in
result
:
for
key
in
output_keys
:
self
.
assertIn
(
key
,
_result
)
else
:
for
key
in
output_keys
:
self
.
assertIn
(
key
,
result
)
for
bad_input
in
invalid_inputs
:
self
.
assertRaises
(
ValueError
,
table_querier
,
bad_input
)
self
.
assertRaises
(
ValueError
,
table_querier
,
invalid_inputs
)
def
test_aggregation
(
self
):
table_querier
=
pipeline
(
"table-question-answering"
,
model
=
"lysandre/tiny-tapas-random-wtq"
,
tokenizer
=
"lysandre/tiny-tapas-random-wtq"
,
)
)
self
.
assertIsInstance
(
table_querier
.
model
.
config
.
aggregation_labels
,
dict
)
self
.
assertEqual
(
self
.
assertIsInstance
(
table_querier
.
model
.
config
.
no_aggregation_label_index
,
int
)
outputs
,
[
mono_result
=
table_querier
(
self
.
valid_inputs
[
0
])
{
"answer"
:
"AVERAGE > "
,
"coordinates"
:
[],
"cells"
:
[],
"aggregator"
:
"AVERAGE"
},
multi_result
=
table_querier
(
self
.
valid_inputs
)
{
"answer"
:
"AVERAGE > "
,
"coordinates"
:
[],
"cells"
:
[],
"aggregator"
:
"AVERAGE"
},
{
"answer"
:
"AVERAGE > "
,
"coordinates"
:
[],
"cells"
:
[],
"aggregator"
:
"AVERAGE"
},
self
.
assertIn
(
"aggregator"
,
mono_result
)
{
"answer"
:
"AVERAGE > "
,
"coordinates"
:
[],
"cells"
:
[],
"aggregator"
:
"AVERAGE"
},
{
"answer"
:
"AVERAGE > "
,
"coordinates"
:
[],
"cells"
:
[],
"aggregator"
:
"AVERAGE"
},
for
result
in
multi_result
:
],
if
isinstance
(
result
,
list
):
for
_result
in
result
:
self
.
assertIn
(
"aggregator"
,
_result
)
else
:
self
.
assertIn
(
"aggregator"
,
result
)
def
test_aggregation_with_sequential
(
self
):
table_querier
=
pipeline
(
"table-question-answering"
,
model
=
"lysandre/tiny-tapas-random-wtq"
,
tokenizer
=
"lysandre/tiny-tapas-random-wtq"
,
)
)
self
.
assertIsInstance
(
table_querier
.
model
.
config
.
aggregation_labels
,
dict
)
self
.
assertIsInstance
(
table_querier
.
model
.
config
.
no_aggregation_label_index
,
int
)
with
self
.
assertRaises
(
ValueError
):
with
self
.
assertRaises
(
ValueError
):
table_querier
(
table_querier
(
query
=
"What does it do with empty context ?"
,
table
=
None
)
{
with
self
.
assertRaises
(
ValueError
):
"table"
:
{},
table_querier
(
query
=
"What does it do with empty context ?"
,
table
=
""
)
"query"
:
"how many movies has george clooney played in?"
,
with
self
.
assertRaises
(
ValueError
):
}
table_querier
(
query
=
"What does it do with empty context ?"
,
table
=
{})
)
with
self
.
assertRaises
(
ValueError
):
with
self
.
assertRaises
(
ValueError
):
table_querier
(
table_querier
(
{
table
=
{
"query"
:
"how many movies has george clooney played in?"
,
"Repository"
:
[
"Transformers"
,
"Datasets"
,
"Tokenizers"
],
"Stars"
:
[
"36542"
,
"4512"
,
"3934"
],
"Contributors"
:
[
"651"
,
"77"
,
"34"
],
"Programming language"
:
[
"Python"
,
"Python"
,
"Rust, Python and NodeJS"
],
}
}
)
)
with
self
.
assertRaises
(
ValueError
):
with
self
.
assertRaises
(
ValueError
):
table_querier
(
table_querier
(
{
query
=
""
,
"
table
"
:
{
table
=
{
"Repository"
:
[
"Transformers"
,
"Datasets"
,
"Tokenizers"
],
"Repository"
:
[
"Transformers"
,
"Datasets"
,
"Tokenizers"
],
"Stars"
:
[
"36542"
,
"4512"
,
"3934"
],
"Stars"
:
[
"36542"
,
"4512"
,
"3934"
],
"Contributors"
:
[
"651"
,
"77"
,
"34"
],
"Contributors"
:
[
"651"
,
"77"
,
"34"
],
"Programming language"
:
[
"Python"
,
"Python"
,
"Rust, Python and NodeJS"
],
"Programming language"
:
[
"Python"
,
"Python"
,
"Rust, Python and NodeJS"
],
},
},
"query"
:
""
,
}
)
)
with
self
.
assertRaises
(
ValueError
):
with
self
.
assertRaises
(
ValueError
):
table_querier
(
table_querier
(
{
query
=
None
,
"
table
"
:
{
table
=
{
"Repository"
:
[
"Transformers"
,
"Datasets"
,
"Tokenizers"
],
"Repository"
:
[
"Transformers"
,
"Datasets"
,
"Tokenizers"
],
"Stars"
:
[
"36542"
,
"4512"
,
"3934"
],
"Stars"
:
[
"36542"
,
"4512"
,
"3934"
],
"Contributors"
:
[
"651"
,
"77"
,
"34"
],
"Contributors"
:
[
"651"
,
"77"
,
"34"
],
"Programming language"
:
[
"Python"
,
"Python"
,
"Rust, Python and NodeJS"
],
"Programming language"
:
[
"Python"
,
"Python"
,
"Rust, Python and NodeJS"
],
},
},
}
)
)
def
test_empty_errors
(
self
):
def
test_slow_tokenizer_sqa
(
self
):
table_querier
=
pipeline
(
model_id
=
"lysandre/tiny-tapas-random-sqa"
"table-question-answering"
,
model
=
AutoModelForTableQuestionAnswering
.
from_pretrained
(
model_id
)
model
=
"lysandre/tiny-tapas-random-wtq"
,
tokenizer
=
AutoTokenizer
.
from_pretrained
(
model_id
)
tokenizer
=
"lysandre/tiny-tapas-random-wtq"
,
table_querier
=
TableQuestionAnsweringPipeline
(
model
=
model
,
tokenizer
=
tokenizer
)
)
mono_result
=
table_querier
(
self
.
valid_inputs
[
0
],
sequential
=
True
)
multi_result
=
table_querier
(
self
.
valid_inputs
,
sequential
=
True
)
self
.
assertIn
(
"aggregator"
,
mono_result
)
inputs
=
{
"table"
:
{
"actors"
:
[
"brad pitt"
,
"leonardo di caprio"
,
"george clooney"
],
"age"
:
[
"56"
,
"45"
,
"59"
],
"number of movies"
:
[
"87"
,
"53"
,
"69"
],
"date of birth"
:
[
"7 february 1967"
,
"10 june 1996"
,
"28 november 1967"
],
},
"query"
:
[
"how many movies has george clooney played in?"
,
"how old is he?"
,
"what's his date of birth?"
],
}
sequential_outputs
=
table_querier
(
**
inputs
,
sequential
=
True
)
batch_outputs
=
table_querier
(
**
inputs
,
sequential
=
False
)
for
result
in
multi_result
:
self
.
assertEqual
(
len
(
sequential_outputs
),
3
)
if
isinstance
(
result
,
list
):
self
.
assertEqual
(
len
(
batch_outputs
),
3
)
for
_result
in
result
:
self
.
assertEqual
(
sequential_outputs
[
0
],
batch_outputs
[
0
])
self
.
assertIn
(
"aggregator"
,
_result
)
self
.
assertNotEqual
(
sequential_outputs
[
1
],
batch_outputs
[
1
])
else
:
# self.assertNotEqual(sequential_outputs[2], batch_outputs[2])
self
.
assertIn
(
"aggregator"
,
result
)
def
test_sequential
(
self
):
table_querier
=
TableQuestionAnsweringPipeline
(
model
=
model
,
tokenizer
=
tokenizer
)
table_querier
=
pipeline
(
outputs
=
table_querier
(
"table-question-answering"
,
table
=
{
model
=
"lysandre/tiny-tapas-random-sqa"
,
"actors"
:
[
"brad pitt"
,
"leonardo di caprio"
,
"george clooney"
],
tokenizer
=
"lysandre/tiny-tapas-random-sqa"
,
"age"
:
[
"56"
,
"45"
,
"59"
],
"number of movies"
:
[
"87"
,
"53"
,
"69"
],
"date of birth"
:
[
"7 february 1967"
,
"10 june 1996"
,
"28 november 1967"
],
},
query
=
"how many movies has george clooney played in?"
,
)
self
.
assertEqual
(
outputs
,
{
"answer"
:
"7 february 1967"
,
"coordinates"
:
[(
0
,
3
)],
"cells"
:
[
"7 february 1967"
]},
)
outputs
=
table_querier
(
table
=
{
"actors"
:
[
"brad pitt"
,
"leonardo di caprio"
,
"george clooney"
],
"age"
:
[
"56"
,
"45"
,
"59"
],
"number of movies"
:
[
"87"
,
"53"
,
"69"
],
"date of birth"
:
[
"7 february 1967"
,
"10 june 1996"
,
"28 november 1967"
],
},
query
=
[
"how many movies has george clooney played in?"
,
"how old is he?"
,
"what's his date of birth?"
],
)
self
.
assertEqual
(
outputs
,
[
{
"answer"
:
"7 february 1967"
,
"coordinates"
:
[(
0
,
3
)],
"cells"
:
[
"7 february 1967"
]},
{
"answer"
:
"7 february 1967"
,
"coordinates"
:
[(
0
,
3
)],
"cells"
:
[
"7 february 1967"
]},
{
"answer"
:
"7 february 1967"
,
"coordinates"
:
[(
0
,
3
)],
"cells"
:
[
"7 february 1967"
]},
],
)
outputs
=
table_querier
(
table
=
{
"Repository"
:
[
"Transformers"
,
"Datasets"
,
"Tokenizers"
],
"Stars"
:
[
"36542"
,
"4512"
,
"3934"
],
"Contributors"
:
[
"651"
,
"77"
,
"34"
],
"Programming language"
:
[
"Python"
,
"Python"
,
"Rust, Python and NodeJS"
],
},
query
=
[
"What repository has the largest number of stars?"
,
"Given that the numbers of stars defines if a repository is active, what repository is the most active?"
,
"What is the number of repositories?"
,
"What is the average number of stars?"
,
"What is the total amount of stars?"
,
],
)
self
.
assertEqual
(
outputs
,
[
{
"answer"
:
"Python, Python"
,
"coordinates"
:
[(
0
,
3
),
(
1
,
3
)],
"cells"
:
[
"Python"
,
"Python"
]},
{
"answer"
:
"Python, Python"
,
"coordinates"
:
[(
0
,
3
),
(
1
,
3
)],
"cells"
:
[
"Python"
,
"Python"
]},
{
"answer"
:
"Python, Python"
,
"coordinates"
:
[(
0
,
3
),
(
1
,
3
)],
"cells"
:
[
"Python"
,
"Python"
]},
{
"answer"
:
"Python, Python"
,
"coordinates"
:
[(
0
,
3
),
(
1
,
3
)],
"cells"
:
[
"Python"
,
"Python"
]},
{
"answer"
:
"Python, Python"
,
"coordinates"
:
[(
0
,
3
),
(
1
,
3
)],
"cells"
:
[
"Python"
,
"Python"
]},
],
)
)
sequential_mono_result_0
=
table_querier
(
self
.
valid_inputs
[
0
],
sequential
=
True
)
sequential_mono_result_1
=
table_querier
(
self
.
valid_inputs
[
1
],
sequential
=
True
)
sequential_multi_result
=
table_querier
(
self
.
valid_inputs
,
sequential
=
True
)
mono_result_0
=
table_querier
(
self
.
valid_inputs
[
0
])
mono_result_1
=
table_querier
(
self
.
valid_inputs
[
1
])
multi_result
=
table_querier
(
self
.
valid_inputs
)
# First valid input has a single question, the dict should be equal
self
.
assertDictEqual
(
sequential_mono_result_0
,
mono_result_0
)
# Second valid input has several questions, the questions following the first one should not be equal
self
.
assertNotEqual
(
sequential_mono_result_1
,
mono_result_1
)
# Assert that we get the same results when passing in several sequences.
with
self
.
assertRaises
(
ValueError
):
for
index
,
(
sequential_multi
,
multi
)
in
enumerate
(
zip
(
sequential_multi_result
,
multi_result
)):
table_querier
(
query
=
"What does it do with empty context ?"
,
table
=
None
)
if
index
==
0
:
with
self
.
assertRaises
(
ValueError
):
self
.
assertDictEqual
(
sequential_multi
,
multi
)
table_querier
(
query
=
"What does it do with empty context ?"
,
table
=
""
)
else
:
with
self
.
assertRaises
(
ValueError
):
self
.
assertNotEqual
(
sequential_multi
,
multi
)
table_querier
(
query
=
"What does it do with empty context ?"
,
table
=
{})
with
self
.
assertRaises
(
ValueError
):
table_querier
(
table
=
{
"Repository"
:
[
"Transformers"
,
"Datasets"
,
"Tokenizers"
],
"Stars"
:
[
"36542"
,
"4512"
,
"3934"
],
"Contributors"
:
[
"651"
,
"77"
,
"34"
],
"Programming language"
:
[
"Python"
,
"Python"
,
"Rust, Python and NodeJS"
],
}
)
with
self
.
assertRaises
(
ValueError
):
table_querier
(
query
=
""
,
table
=
{
"Repository"
:
[
"Transformers"
,
"Datasets"
,
"Tokenizers"
],
"Stars"
:
[
"36542"
,
"4512"
,
"3934"
],
"Contributors"
:
[
"651"
,
"77"
,
"34"
],
"Programming language"
:
[
"Python"
,
"Python"
,
"Rust, Python and NodeJS"
],
},
)
with
self
.
assertRaises
(
ValueError
):
table_querier
(
query
=
None
,
table
=
{
"Repository"
:
[
"Transformers"
,
"Datasets"
,
"Tokenizers"
],
"Stars"
:
[
"36542"
,
"4512"
,
"3934"
],
"Contributors"
:
[
"651"
,
"77"
,
"34"
],
"Programming language"
:
[
"Python"
,
"Python"
,
"Rust, Python and NodeJS"
],
},
)
@
slow
@
slow
def
test_integration_wtq
(
self
):
def
test_integration_wtq
(
self
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment