"...git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "b0ad06951708b782e45b02a4d092f6fcde68a9b9"
Unverified Commit 2977bd52 authored by Sam Shleifer's avatar Sam Shleifer Committed by GitHub
Browse files

Faster pegasus tokenization test with reduced data size (#7762)

parent 2d6e2ad4
...@@ -57,7 +57,7 @@ class PegasusTokenizationTest(TokenizerTesterMixin, unittest.TestCase): ...@@ -57,7 +57,7 @@ class PegasusTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
@require_torch @require_torch
def test_pegasus_large_seq2seq_truncation(self): def test_pegasus_large_seq2seq_truncation(self):
src_texts = ["This is going to be way too long" * 10000, "short example"] src_texts = ["This is going to be way too long." * 150, "short example"]
tgt_texts = ["not super long but more than 5 tokens", "tiny"] tgt_texts = ["not super long but more than 5 tokens", "tiny"]
batch = self.pegasus_large_tokenizer.prepare_seq2seq_batch(src_texts, tgt_texts=tgt_texts, max_target_length=5) batch = self.pegasus_large_tokenizer.prepare_seq2seq_batch(src_texts, tgt_texts=tgt_texts, max_target_length=5)
assert batch.input_ids.shape == (2, 1024) assert batch.input_ids.shape == (2, 1024)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment