Unverified Commit 27d0e01d authored by Pavel Tarashkevich's avatar Pavel Tarashkevich Committed by GitHub
Browse files

Fix classification script: enable dynamic padding with truncation (#9554)


Co-authored-by: default avatarPavel Tarashkevich <Pavel.Tarashkievich@orange.com>
parent 245cdb46
...@@ -283,11 +283,9 @@ def main(): ...@@ -283,11 +283,9 @@ def main():
# Padding strategy # Padding strategy
if data_args.pad_to_max_length: if data_args.pad_to_max_length:
padding = "max_length" padding = "max_length"
max_length = data_args.max_seq_length
else: else:
# We will pad later, dynamically at batch creation, to the max sequence length in each batch # We will pad later, dynamically at batch creation, to the max sequence length in each batch
padding = False padding = False
max_length = None
# Some models have set the order of the labels to use, so let's make sure we do use it. # Some models have set the order of the labels to use, so let's make sure we do use it.
label_to_id = None label_to_id = None
...@@ -314,7 +312,7 @@ def main(): ...@@ -314,7 +312,7 @@ def main():
args = ( args = (
(examples[sentence1_key],) if sentence2_key is None else (examples[sentence1_key], examples[sentence2_key]) (examples[sentence1_key],) if sentence2_key is None else (examples[sentence1_key], examples[sentence2_key])
) )
result = tokenizer(*args, padding=padding, max_length=max_length, truncation=True) result = tokenizer(*args, padding=padding, max_length=data_args.max_seq_length, truncation=True)
# Map labels to IDs (not necessary for GLUE tasks) # Map labels to IDs (not necessary for GLUE tasks)
if label_to_id is not None and "label" in examples: if label_to_id is not None and "label" in examples:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment