Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
6f1adc43
Unverified
Commit
6f1adc43
authored
Jul 08, 2021
by
Sylvain Gugger
Committed by
GitHub
Jul 08, 2021
Browse files
Fix group_lengths for short datasets (#12558)
parent
0a6b9048
Changes
10
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
20 additions
and
10 deletions
+20
-10
examples/flax/language-modeling/run_clm_flax.py
examples/flax/language-modeling/run_clm_flax.py
+2
-1
examples/flax/language-modeling/run_mlm_flax.py
examples/flax/language-modeling/run_mlm_flax.py
+2
-1
examples/flax/language-modeling/run_t5_mlm_flax.py
examples/flax/language-modeling/run_t5_mlm_flax.py
+2
-1
examples/pytorch/language-modeling/run_clm.py
examples/pytorch/language-modeling/run_clm.py
+2
-1
examples/pytorch/language-modeling/run_clm_no_trainer.py
examples/pytorch/language-modeling/run_clm_no_trainer.py
+2
-1
examples/pytorch/language-modeling/run_mlm.py
examples/pytorch/language-modeling/run_mlm.py
+2
-1
examples/pytorch/language-modeling/run_mlm_no_trainer.py
examples/pytorch/language-modeling/run_mlm_no_trainer.py
+2
-1
examples/pytorch/language-modeling/run_plm.py
examples/pytorch/language-modeling/run_plm.py
+2
-1
examples/tensorflow/language-modeling/run_clm.py
examples/tensorflow/language-modeling/run_clm.py
+2
-1
examples/tensorflow/language-modeling/run_mlm.py
examples/tensorflow/language-modeling/run_mlm.py
+2
-1
No files found.
examples/flax/language-modeling/run_clm_flax.py
View file @
6f1adc43
...
...
@@ -398,7 +398,8 @@ def main():
total_length
=
len
(
concatenated_examples
[
list
(
examples
.
keys
())[
0
]])
# We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
# customize this part to your needs.
total_length
=
(
total_length
//
block_size
)
*
block_size
if
total_length
>=
block_size
:
total_length
=
(
total_length
//
block_size
)
*
block_size
# Split by chunks of max_len.
result
=
{
k
:
[
t
[
i
:
i
+
block_size
]
for
i
in
range
(
0
,
total_length
,
block_size
)]
...
...
examples/flax/language-modeling/run_mlm_flax.py
View file @
6f1adc43
...
...
@@ -431,7 +431,8 @@ if __name__ == "__main__":
total_length
=
len
(
concatenated_examples
[
list
(
examples
.
keys
())[
0
]])
# We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
# customize this part to your needs.
total_length
=
(
total_length
//
max_seq_length
)
*
max_seq_length
if
total_length
>=
max_seq_length
:
total_length
=
(
total_length
//
max_seq_length
)
*
max_seq_length
# Split by chunks of max_len.
result
=
{
k
:
[
t
[
i
:
i
+
max_seq_length
]
for
i
in
range
(
0
,
total_length
,
max_seq_length
)]
...
...
examples/flax/language-modeling/run_t5_mlm_flax.py
View file @
6f1adc43
...
...
@@ -541,7 +541,8 @@ if __name__ == "__main__":
total_length
=
len
(
concatenated_examples
[
list
(
examples
.
keys
())[
0
]])
# We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
# customize this part to your needs.
total_length
=
(
total_length
//
expanded_inputs_length
)
*
expanded_inputs_length
if
total_length
>=
expanded_inputs_length
:
total_length
=
(
total_length
//
expanded_inputs_length
)
*
expanded_inputs_length
# Split by chunks of max_len.
result
=
{
k
:
[
t
[
i
:
i
+
expanded_inputs_length
]
for
i
in
range
(
0
,
total_length
,
expanded_inputs_length
)]
...
...
examples/pytorch/language-modeling/run_clm.py
View file @
6f1adc43
...
...
@@ -404,7 +404,8 @@ def main():
total_length
=
len
(
concatenated_examples
[
list
(
examples
.
keys
())[
0
]])
# We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
# customize this part to your needs.
total_length
=
(
total_length
//
block_size
)
*
block_size
if
total_length
>=
block_size
:
total_length
=
(
total_length
//
block_size
)
*
block_size
# Split by chunks of max_len.
result
=
{
k
:
[
t
[
i
:
i
+
block_size
]
for
i
in
range
(
0
,
total_length
,
block_size
)]
...
...
examples/pytorch/language-modeling/run_clm_no_trainer.py
View file @
6f1adc43
...
...
@@ -343,7 +343,8 @@ def main():
total_length
=
len
(
concatenated_examples
[
list
(
examples
.
keys
())[
0
]])
# We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
# customize this part to your needs.
total_length
=
(
total_length
//
block_size
)
*
block_size
if
total_length
>=
block_size
:
total_length
=
(
total_length
//
block_size
)
*
block_size
# Split by chunks of max_len.
result
=
{
k
:
[
t
[
i
:
i
+
block_size
]
for
i
in
range
(
0
,
total_length
,
block_size
)]
...
...
examples/pytorch/language-modeling/run_mlm.py
View file @
6f1adc43
...
...
@@ -433,7 +433,8 @@ def main():
total_length
=
len
(
concatenated_examples
[
list
(
examples
.
keys
())[
0
]])
# We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
# customize this part to your needs.
total_length
=
(
total_length
//
max_seq_length
)
*
max_seq_length
if
total_length
>=
max_seq_length
:
total_length
=
(
total_length
//
max_seq_length
)
*
max_seq_length
# Split by chunks of max_len.
result
=
{
k
:
[
t
[
i
:
i
+
max_seq_length
]
for
i
in
range
(
0
,
total_length
,
max_seq_length
)]
...
...
examples/pytorch/language-modeling/run_mlm_no_trainer.py
View file @
6f1adc43
...
...
@@ -387,7 +387,8 @@ def main():
total_length
=
len
(
concatenated_examples
[
list
(
examples
.
keys
())[
0
]])
# We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
# customize this part to your needs.
total_length
=
(
total_length
//
max_seq_length
)
*
max_seq_length
if
total_length
>=
max_seq_length
:
total_length
=
(
total_length
//
max_seq_length
)
*
max_seq_length
# Split by chunks of max_len.
result
=
{
k
:
[
t
[
i
:
i
+
max_seq_length
]
for
i
in
range
(
0
,
total_length
,
max_seq_length
)]
...
...
examples/pytorch/language-modeling/run_plm.py
View file @
6f1adc43
...
...
@@ -406,7 +406,8 @@ def main():
total_length
=
len
(
concatenated_examples
[
list
(
examples
.
keys
())[
0
]])
# We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
# customize this part to your needs.
total_length
=
(
total_length
//
max_seq_length
)
*
max_seq_length
if
total_length
>=
max_seq_length
:
total_length
=
(
total_length
//
max_seq_length
)
*
max_seq_length
# Split by chunks of max_len.
result
=
{
k
:
[
t
[
i
:
i
+
max_seq_length
]
for
i
in
range
(
0
,
total_length
,
max_seq_length
)]
...
...
examples/tensorflow/language-modeling/run_clm.py
View file @
6f1adc43
...
...
@@ -405,7 +405,8 @@ def main():
total_length
=
len
(
concatenated_examples
[
list
(
examples
.
keys
())[
0
]])
# We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
# customize this part to your needs.
total_length
=
(
total_length
//
block_size
)
*
block_size
if
total_length
>=
block_size
:
total_length
=
(
total_length
//
block_size
)
*
block_size
# Split by chunks of max_len.
result
=
{
k
:
[
t
[
i
:
i
+
block_size
]
for
i
in
range
(
0
,
total_length
,
block_size
)]
...
...
examples/tensorflow/language-modeling/run_mlm.py
View file @
6f1adc43
...
...
@@ -466,7 +466,8 @@ def main():
total_length
=
len
(
concatenated_examples
[
list
(
examples
.
keys
())[
0
]])
# We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
# customize this part to your needs.
total_length
=
(
total_length
//
max_seq_length
)
*
max_seq_length
if
total_length
>=
max_seq_length
:
total_length
=
(
total_length
//
max_seq_length
)
*
max_seq_length
# Split by chunks of max_len.
result
=
{
k
:
[
t
[
i
:
i
+
max_seq_length
]
for
i
in
range
(
0
,
total_length
,
max_seq_length
)]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment