Unverified Commit d415882b authored by Lysandre Debut's avatar Lysandre Debut Committed by GitHub
Browse files

Remove tolerance + drop_rows_to_fit by default (#9507)

* Remove tolerance + drop_rows_to_fit by default

* remove drop_rows_to_fit
parent 1243ee7d
...@@ -255,7 +255,7 @@ class TapasTokenizer(PreTrainedTokenizer): ...@@ -255,7 +255,7 @@ class TapasTokenizer(PreTrainedTokenizer):
value for :obj:`lowercase` (as in the original BERT). value for :obj:`lowercase` (as in the original BERT).
cell_trim_length (:obj:`int`, `optional`, defaults to -1): cell_trim_length (:obj:`int`, `optional`, defaults to -1):
If > 0: Trim cells so that the length is <= this value. Also disables further cell trimming, should thus be If > 0: Trim cells so that the length is <= this value. Also disables further cell trimming, should thus be
used with 'drop_rows_to_fit' below. used with :obj:`truncation` set to :obj:`True`.
max_column_id (:obj:`int`, `optional`): max_column_id (:obj:`int`, `optional`):
Max column id to extract. Max column id to extract.
max_row_id (:obj:`int`, `optional`): max_row_id (:obj:`int`, `optional`):
...@@ -264,8 +264,6 @@ class TapasTokenizer(PreTrainedTokenizer): ...@@ -264,8 +264,6 @@ class TapasTokenizer(PreTrainedTokenizer):
Whether to add empty strings instead of column names. Whether to add empty strings instead of column names.
update_answer_coordinates (:obj:`bool`, `optional`, defaults to :obj:`False`): update_answer_coordinates (:obj:`bool`, `optional`, defaults to :obj:`False`):
Whether to recompute the answer coordinates from the answer text. Whether to recompute the answer coordinates from the answer text.
drop_rows_to_fit (:obj:`bool`, `optional`, defaults to :obj:`False`):
Whether to drop the last rows if a table doesn't fit within max sequence length.
""" """
...@@ -292,7 +290,6 @@ class TapasTokenizer(PreTrainedTokenizer): ...@@ -292,7 +290,6 @@ class TapasTokenizer(PreTrainedTokenizer):
max_row_id: int = None, max_row_id: int = None,
strip_column_names: bool = False, strip_column_names: bool = False,
update_answer_coordinates: bool = False, update_answer_coordinates: bool = False,
drop_rows_to_fit: bool = False,
model_max_length: int = 512, model_max_length: int = 512,
additional_special_tokens: Optional[List[str]] = None, additional_special_tokens: Optional[List[str]] = None,
**kwargs **kwargs
...@@ -323,7 +320,6 @@ class TapasTokenizer(PreTrainedTokenizer): ...@@ -323,7 +320,6 @@ class TapasTokenizer(PreTrainedTokenizer):
max_row_id=max_row_id, max_row_id=max_row_id,
strip_column_names=strip_column_names, strip_column_names=strip_column_names,
update_answer_coordinates=update_answer_coordinates, update_answer_coordinates=update_answer_coordinates,
drop_rows_to_fit=drop_rows_to_fit,
model_max_length=model_max_length, model_max_length=model_max_length,
additional_special_tokens=additional_special_tokens, additional_special_tokens=additional_special_tokens,
**kwargs, **kwargs,
...@@ -352,7 +348,6 @@ class TapasTokenizer(PreTrainedTokenizer): ...@@ -352,7 +348,6 @@ class TapasTokenizer(PreTrainedTokenizer):
self.max_row_id = max_row_id if max_row_id is not None else self.model_max_length self.max_row_id = max_row_id if max_row_id is not None else self.model_max_length
self.strip_column_names = strip_column_names self.strip_column_names = strip_column_names
self.update_answer_coordinates = update_answer_coordinates self.update_answer_coordinates = update_answer_coordinates
self.drop_rows_to_fit = drop_rows_to_fit
@property @property
def do_lower_case(self): def do_lower_case(self):
...@@ -1122,7 +1117,7 @@ class TapasTokenizer(PreTrainedTokenizer): ...@@ -1122,7 +1117,7 @@ class TapasTokenizer(PreTrainedTokenizer):
prev_answer_coordinates = kwargs["prev_answer_coordinates"] prev_answer_coordinates = kwargs["prev_answer_coordinates"]
prev_answer_text = kwargs["prev_answer_text"] prev_answer_text = kwargs["prev_answer_text"]
num_rows = self._get_num_rows(raw_table, self.drop_rows_to_fit) num_rows = self._get_num_rows(raw_table, truncation != TapasTruncationStrategy.DO_NOT_TRUNCATE)
num_columns = self._get_num_columns(raw_table) num_columns = self._get_num_columns(raw_table)
_, _, num_tokens = self._get_table_boundaries(tokenized_table) _, _, num_tokens = self._get_table_boundaries(tokenized_table)
......
...@@ -540,9 +540,6 @@ def prepare_tapas_batch_inputs_for_training(): ...@@ -540,9 +540,6 @@ def prepare_tapas_batch_inputs_for_training():
return table, queries, answer_coordinates, answer_text, float_answer return table, queries, answer_coordinates, answer_text, float_answer
TOLERANCE = 1
@require_torch @require_torch
@require_scatter @require_scatter
class TapasModelIntegrationTest(unittest.TestCase): class TapasModelIntegrationTest(unittest.TestCase):
...@@ -574,12 +571,12 @@ class TapasModelIntegrationTest(unittest.TestCase): ...@@ -574,12 +571,12 @@ class TapasModelIntegrationTest(unittest.TestCase):
device=torch_device, device=torch_device,
) )
self.assertTrue(torch.allclose(outputs.last_hidden_state[:, :3, :3], expected_slice, atol=TOLERANCE)) self.assertTrue(torch.allclose(outputs.last_hidden_state[:, :3, :3], expected_slice, atol=0.0005))
# test the pooled output # test the pooled output
expected_slice = torch.tensor([[0.987518311, -0.970520139, -0.994303405]], device=torch_device) expected_slice = torch.tensor([[0.987518311, -0.970520139, -0.994303405]], device=torch_device)
self.assertTrue(torch.allclose(outputs.pooler_output[:, :3], expected_slice, atol=TOLERANCE)) self.assertTrue(torch.allclose(outputs.pooler_output[:, :3], expected_slice, atol=0.0005))
@unittest.skip(reason="Model not available yet") @unittest.skip(reason="Model not available yet")
def test_inference_masked_lm(self): def test_inference_masked_lm(self):
...@@ -634,7 +631,7 @@ class TapasModelIntegrationTest(unittest.TestCase): ...@@ -634,7 +631,7 @@ class TapasModelIntegrationTest(unittest.TestCase):
device=torch_device, device=torch_device,
) )
self.assertTrue(torch.allclose(logits, expected_tensor, atol=TOLERANCE)) self.assertTrue(torch.allclose(logits, expected_tensor, atol=0.015))
@slow @slow
def test_inference_question_answering_head_conversational_absolute_embeddings(self): def test_inference_question_answering_head_conversational_absolute_embeddings(self):
...@@ -683,7 +680,7 @@ class TapasModelIntegrationTest(unittest.TestCase): ...@@ -683,7 +680,7 @@ class TapasModelIntegrationTest(unittest.TestCase):
device=torch_device, device=torch_device,
) )
self.assertTrue(torch.allclose(logits, expected_tensor, atol=TOLERANCE)) self.assertTrue(torch.allclose(logits, expected_tensor, atol=0.01))
@slow @slow
def test_inference_question_answering_head_weak_supervision(self): def test_inference_question_answering_head_weak_supervision(self):
...@@ -710,7 +707,7 @@ class TapasModelIntegrationTest(unittest.TestCase): ...@@ -710,7 +707,7 @@ class TapasModelIntegrationTest(unittest.TestCase):
device=torch_device, device=torch_device,
) )
self.assertTrue(torch.allclose(logits[:, -6:], expected_slice, atol=TOLERANCE)) self.assertTrue(torch.allclose(logits[:, -6:], expected_slice, atol=0.4))
# test the aggregation logits # test the aggregation logits
logits_aggregation = outputs.logits_aggregation logits_aggregation = outputs.logits_aggregation
...@@ -721,7 +718,7 @@ class TapasModelIntegrationTest(unittest.TestCase): ...@@ -721,7 +718,7 @@ class TapasModelIntegrationTest(unittest.TestCase):
device=torch_device, device=torch_device,
) )
self.assertTrue(torch.allclose(logits_aggregation, expected_tensor, atol=TOLERANCE)) self.assertTrue(torch.allclose(logits_aggregation, expected_tensor, atol=0.001))
# test the predicted answer coordinates and aggregation indices # test the predicted answer coordinates and aggregation indices
EXPECTED_PREDICTED_ANSWER_COORDINATES = [[(0, 0)], [(1, 2)]] EXPECTED_PREDICTED_ANSWER_COORDINATES = [[(0, 0)], [(1, 2)]]
...@@ -778,7 +775,7 @@ class TapasModelIntegrationTest(unittest.TestCase): ...@@ -778,7 +775,7 @@ class TapasModelIntegrationTest(unittest.TestCase):
# test the loss # test the loss
loss = outputs.loss loss = outputs.loss
expected_loss = torch.tensor(3.3527612686157227e-08, device=torch_device) expected_loss = torch.tensor(3.3527612686157227e-08, device=torch_device)
self.assertTrue(torch.allclose(loss, expected_loss, atol=TOLERANCE)) self.assertTrue(torch.allclose(loss, expected_loss, atol=1e-6))
# test the logits on the first example # test the logits on the first example
logits = outputs.logits logits = outputs.logits
...@@ -799,7 +796,7 @@ class TapasModelIntegrationTest(unittest.TestCase): ...@@ -799,7 +796,7 @@ class TapasModelIntegrationTest(unittest.TestCase):
device=torch_device, device=torch_device,
) )
self.assertTrue(torch.allclose(logits[0, -9:], expected_slice, atol=TOLERANCE)) self.assertTrue(torch.allclose(logits[0, -9:], expected_slice, atol=1e-6))
# test the aggregation logits on the second example # test the aggregation logits on the second example
logits_aggregation = outputs.logits_aggregation logits_aggregation = outputs.logits_aggregation
...@@ -807,7 +804,7 @@ class TapasModelIntegrationTest(unittest.TestCase): ...@@ -807,7 +804,7 @@ class TapasModelIntegrationTest(unittest.TestCase):
self.assertEqual(logits_aggregation.shape, expected_shape) self.assertEqual(logits_aggregation.shape, expected_shape)
expected_slice = torch.tensor([-4.0538, 40.0304, -5.3554, 23.3965], device=torch_device) expected_slice = torch.tensor([-4.0538, 40.0304, -5.3554, 23.3965], device=torch_device)
self.assertTrue(torch.allclose(logits_aggregation[1, -4:], expected_slice, atol=TOLERANCE)) self.assertTrue(torch.allclose(logits_aggregation[1, -4:], expected_slice, atol=1e-4))
@slow @slow
def test_inference_question_answering_head_strong_supervision(self): def test_inference_question_answering_head_strong_supervision(self):
...@@ -854,7 +851,7 @@ class TapasModelIntegrationTest(unittest.TestCase): ...@@ -854,7 +851,7 @@ class TapasModelIntegrationTest(unittest.TestCase):
device=torch_device, device=torch_device,
) )
self.assertTrue(torch.allclose(logits, expected_tensor, atol=TOLERANCE)) self.assertTrue(torch.allclose(logits, expected_tensor, atol=0.02))
# test the aggregation logits # test the aggregation logits
logits_aggregation = outputs.logits_aggregation logits_aggregation = outputs.logits_aggregation
...@@ -864,7 +861,7 @@ class TapasModelIntegrationTest(unittest.TestCase): ...@@ -864,7 +861,7 @@ class TapasModelIntegrationTest(unittest.TestCase):
[[16.5659733, -3.06624889, -2.34152961, -0.970244825]], device=torch_device [[16.5659733, -3.06624889, -2.34152961, -0.970244825]], device=torch_device
) # PyTorch model outputs [[16.5679, -3.0668, -2.3442, -0.9674]] ) # PyTorch model outputs [[16.5679, -3.0668, -2.3442, -0.9674]]
self.assertTrue(torch.allclose(logits_aggregation, expected_tensor, atol=TOLERANCE)) self.assertTrue(torch.allclose(logits_aggregation, expected_tensor, atol=0.003))
@slow @slow
def test_inference_classification_head(self): def test_inference_classification_head(self):
...@@ -885,7 +882,7 @@ class TapasModelIntegrationTest(unittest.TestCase): ...@@ -885,7 +882,7 @@ class TapasModelIntegrationTest(unittest.TestCase):
[[0.795137286, 9.5572]], device=torch_device [[0.795137286, 9.5572]], device=torch_device
) # Note that the PyTorch model outputs [[0.8057, 9.5281]] ) # Note that the PyTorch model outputs [[0.8057, 9.5281]]
self.assertTrue(torch.allclose(outputs.logits, expected_tensor, atol=TOLERANCE)) self.assertTrue(torch.allclose(outputs.logits, expected_tensor, atol=0.05))
# Below: tests for Tapas utilities which are defined in modeling_tapas.py. # Below: tests for Tapas utilities which are defined in modeling_tapas.py.
......
...@@ -290,7 +290,7 @@ class TapasTokenizationTest(TokenizerTesterMixin, unittest.TestCase): ...@@ -290,7 +290,7 @@ class TapasTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
@slow @slow
def test_sequence_builders(self): def test_sequence_builders(self):
tokenizer = self.tokenizer_class.from_pretrained("nielsr/tapas-base-finetuned-wtq") tokenizer = self.tokenizer_class.from_pretrained("google/tapas-base-finetuned-wtq")
empty_table = self.get_table(tokenizer, length=0) empty_table = self.get_table(tokenizer, length=0)
table = self.get_table(tokenizer, length=10) table = self.get_table(tokenizer, length=10)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment