Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
563485bf
Unverified
Commit
563485bf
authored
Aug 30, 2020
by
Stas Bekman
Committed by
GitHub
Aug 30, 2020
Browse files
[tests] fix typos in inputs (#6818)
parent
22933e66
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
15 additions
and
15 deletions
+15
-15
tests/test_tokenization_bart.py
tests/test_tokenization_bart.py
+7
-7
tests/test_tokenization_t5.py
tests/test_tokenization_t5.py
+8
-8
No files found.
tests/test_tokenization_bart.py
View file @
563485bf
...
...
@@ -69,12 +69,12 @@ class TestTokenizationBart(TokenizerTesterMixin, unittest.TestCase):
@
require_torch
def
test_prepare_seq2seq_batch
(
self
):
src_text
=
[
"A long paragraph for summrization."
,
"Another paragraph for summrization."
]
src_text
=
[
"A long paragraph for summ
a
rization."
,
"Another paragraph for summ
a
rization."
]
tgt_text
=
[
"Summary of the text."
,
"Another summary."
,
]
expected_src_tokens
=
[
0
,
250
,
251
,
17818
,
13
,
3
2933
,
21645
,
1
25
8
,
4
,
2
]
expected_src_tokens
=
[
0
,
250
,
251
,
17818
,
13
,
3
9186
,
1
93
8
,
4
,
2
]
for
tokenizer
in
[
self
.
default_tokenizer
,
self
.
default_tokenizer_fast
]:
batch
=
tokenizer
.
prepare_seq2seq_batch
(
...
...
@@ -82,8 +82,8 @@ class TestTokenizationBart(TokenizerTesterMixin, unittest.TestCase):
)
self
.
assertIsInstance
(
batch
,
BatchEncoding
)
self
.
assertEqual
((
2
,
10
),
batch
.
input_ids
.
shape
)
self
.
assertEqual
((
2
,
10
),
batch
.
attention_mask
.
shape
)
self
.
assertEqual
((
2
,
9
),
batch
.
input_ids
.
shape
)
self
.
assertEqual
((
2
,
9
),
batch
.
attention_mask
.
shape
)
result
=
batch
.
input_ids
.
tolist
()[
0
]
self
.
assertListEqual
(
expected_src_tokens
,
result
)
# Test that special tokens are reset
...
...
@@ -91,7 +91,7 @@ class TestTokenizationBart(TokenizerTesterMixin, unittest.TestCase):
# Test Prepare Seq
@
require_torch
def
test_seq2seq_batch_empty_target_text
(
self
):
src_text
=
[
"A long paragraph for summrization."
,
"Another paragraph for summrization."
]
src_text
=
[
"A long paragraph for summ
a
rization."
,
"Another paragraph for summ
a
rization."
]
for
tokenizer
in
[
self
.
default_tokenizer
,
self
.
default_tokenizer_fast
]:
batch
=
tokenizer
.
prepare_seq2seq_batch
(
src_text
,
return_tensors
=
"pt"
)
# check if input_ids are returned and no labels
...
...
@@ -102,7 +102,7 @@ class TestTokenizationBart(TokenizerTesterMixin, unittest.TestCase):
@
require_torch
def
test_seq2seq_batch_max_target_length
(
self
):
src_text
=
[
"A long paragraph for summrization."
,
"Another paragraph for summrization."
]
src_text
=
[
"A long paragraph for summ
a
rization."
,
"Another paragraph for summ
a
rization."
]
tgt_text
=
[
"Summary of the text."
,
"Another summary."
,
...
...
@@ -131,7 +131,7 @@ class TestTokenizationBart(TokenizerTesterMixin, unittest.TestCase):
@
require_torch
def
test_special_tokens
(
self
):
src_text
=
[
"A long paragraph for summrization."
]
src_text
=
[
"A long paragraph for summ
a
rization."
]
tgt_text
=
[
"Summary of the text."
,
]
...
...
tests/test_tokenization_t5.py
View file @
563485bf
...
...
@@ -120,12 +120,12 @@ class T5TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
def
test_prepare_seq2seq_batch
(
self
):
tokenizer
=
self
.
t5_base_tokenizer
src_text
=
[
"A long paragraph for summrization."
,
"Another paragraph for summrization."
]
src_text
=
[
"A long paragraph for summ
a
rization."
,
"Another paragraph for summ
a
rization."
]
tgt_text
=
[
"Summary of the text."
,
"Another summary."
,
]
expected_src_tokens
=
[
71
,
307
,
8986
,
21
,
4505
,
51
,
52
,
1707
,
5
,
tokenizer
.
eos_token_id
]
expected_src_tokens
=
[
71
,
307
,
8986
,
21
,
4505
,
1635
,
1707
,
5
,
tokenizer
.
eos_token_id
]
batch
=
tokenizer
.
prepare_seq2seq_batch
(
src_text
,
tgt_texts
=
tgt_text
,
...
...
@@ -135,15 +135,15 @@ class T5TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
result
=
list
(
batch
.
input_ids
.
numpy
()[
0
])
self
.
assertListEqual
(
expected_src_tokens
,
result
)
self
.
assertEqual
((
2
,
10
),
batch
.
input_ids
.
shape
)
self
.
assertEqual
((
2
,
10
),
batch
.
attention_mask
.
shape
)
self
.
assertEqual
((
2
,
9
),
batch
.
input_ids
.
shape
)
self
.
assertEqual
((
2
,
9
),
batch
.
attention_mask
.
shape
)
# Test that special tokens are reset
self
.
assertEqual
(
tokenizer
.
prefix_tokens
,
[])
def
test_empty_target_text
(
self
):
tokenizer
=
self
.
t5_base_tokenizer
src_text
=
[
"A long paragraph for summrization."
,
"Another paragraph for summrization."
]
src_text
=
[
"A long paragraph for summ
a
rization."
,
"Another paragraph for summ
a
rization."
]
batch
=
tokenizer
.
prepare_seq2seq_batch
(
src_text
,
return_tensors
=
FRAMEWORK
)
# check if input_ids are returned and no decoder_input_ids
self
.
assertIn
(
"input_ids"
,
batch
)
...
...
@@ -153,7 +153,7 @@ class T5TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
def
test_max_target_length
(
self
):
tokenizer
=
self
.
t5_base_tokenizer
src_text
=
[
"A short paragraph for summrization."
,
"Another short paragraph for summrization."
]
src_text
=
[
"A short paragraph for summ
a
rization."
,
"Another short paragraph for summ
a
rization."
]
tgt_text
=
[
"Summary of the text."
,
"Another summary."
,
...
...
@@ -180,9 +180,9 @@ class T5TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
def
test_eos_in_input
(
self
):
tokenizer
=
self
.
t5_base_tokenizer
src_text
=
[
"A long paragraph for summrization. </s>"
]
src_text
=
[
"A long paragraph for summ
a
rization. </s>"
]
tgt_text
=
[
"Summary of the text. </s>"
]
expected_src_tokens
=
[
71
,
307
,
8986
,
21
,
4505
,
51
,
52
,
1707
,
5
,
1
]
expected_src_tokens
=
[
71
,
307
,
8986
,
21
,
4505
,
1635
,
1707
,
5
,
1
]
expected_tgt_tokens
=
[
0
,
20698
,
13
,
8
,
1499
,
5
,
1
]
batch
=
tokenizer
.
prepare_seq2seq_batch
(
src_text
,
tgt_texts
=
tgt_text
,
return_tensors
=
FRAMEWORK
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment