"...git@developer.sourcefind.cn:modelzoo/solov2-pytorch.git" did not exist on "c95c6373d5de2633afaccb96562c19d3b0ba8882"
Unverified Commit 59b7334c authored by Yih-Dar's avatar Yih-Dar Committed by GitHub
Browse files

Fix `test_tf_encode_plus_sent_to_model` for `TAPAS` (#19559)


Co-authored-by: default avatarydshieh <ydshieh@users.noreply.github.com>
parent 1967be98
...@@ -143,8 +143,39 @@ class TapasTokenizationTest(TokenizerTesterMixin, unittest.TestCase): ...@@ -143,8 +143,39 @@ class TapasTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
return input_text, output_text return input_text, output_text
@require_tensorflow_probability @require_tensorflow_probability
@slow
def test_tf_encode_plus_sent_to_model(self): def test_tf_encode_plus_sent_to_model(self):
super().test_tf_encode_plus_sent_to_model() from transformers import TF_MODEL_MAPPING, TOKENIZER_MAPPING
MODEL_TOKENIZER_MAPPING = merge_model_tokenizer_mappings(TF_MODEL_MAPPING, TOKENIZER_MAPPING)
tokenizers = self.get_tokenizers(do_lower_case=False)
for tokenizer in tokenizers:
with self.subTest(f"{tokenizer.__class__.__name__}"):
if tokenizer.__class__ not in MODEL_TOKENIZER_MAPPING:
return
config_class, model_class = MODEL_TOKENIZER_MAPPING[tokenizer.__class__]
config = config_class()
if config.is_encoder_decoder or config.pad_token_id is None:
return
model = model_class(config)
# Make sure the model contains at least the full vocabulary size in its embedding matrix
self.assertGreaterEqual(model.config.vocab_size, len(tokenizer))
# Build sequence
first_ten_tokens = list(tokenizer.get_vocab().keys())[:10]
sequence = " ".join(first_ten_tokens)
table = self.get_table(tokenizer, length=0)
encoded_sequence = tokenizer.encode_plus(table, sequence, return_tensors="tf")
batch_encoded_sequence = tokenizer.batch_encode_plus(table, [sequence, sequence], return_tensors="tf")
# This should not fail
model(encoded_sequence)
model(batch_encoded_sequence)
def test_rust_and_python_full_tokenizers(self): def test_rust_and_python_full_tokenizers(self):
if not self.test_rust_tokenizer: if not self.test_rust_tokenizer:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment