Unverified Commit 2e72bbab authored by Matt's avatar Matt Committed by GitHub
Browse files

Incorrect setting for num_beams in translation and summarization examples (#27519)



* Remove the torch main_process_first context manager from TF examples

* Correctly set num_beams=1 in our examples, and add a guard in GenerationConfig.validate()

* Update src/transformers/generation/configuration_utils.py
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>

---------
Co-authored-by: default avataramyeroberts <22614925+amyeroberts@users.noreply.github.com>
parent e6522e49
...@@ -312,7 +312,7 @@ class DataTrainingArguments: ...@@ -312,7 +312,7 @@ class DataTrainingArguments:
default=False, metadata={"help": "Whether to use generate to calculate generative metrics (ROUGE, BLEU)."} default=False, metadata={"help": "Whether to use generate to calculate generative metrics (ROUGE, BLEU)."}
) )
num_beams: Optional[int] = field( num_beams: Optional[int] = field(
default=None, default=1,
metadata={ metadata={
"help": ( "help": (
"Number of beams to use for evaluation. This argument will be passed to `model.generate`, " "Number of beams to use for evaluation. This argument will be passed to `model.generate`, "
......
...@@ -249,7 +249,7 @@ class DataTrainingArguments: ...@@ -249,7 +249,7 @@ class DataTrainingArguments:
}, },
) )
num_beams: Optional[int] = field( num_beams: Optional[int] = field(
default=None, default=1,
metadata={ metadata={
"help": ( "help": (
"Number of beams to use for evaluation. This argument will be passed to ``model.generate``, " "Number of beams to use for evaluation. This argument will be passed to ``model.generate``, "
......
...@@ -217,7 +217,7 @@ class DataTrainingArguments: ...@@ -217,7 +217,7 @@ class DataTrainingArguments:
}, },
) )
num_beams: Optional[int] = field( num_beams: Optional[int] = field(
default=None, default=1,
metadata={ metadata={
"help": ( "help": (
"Number of beams to use for evaluation. This argument will be passed to ``model.generate``, " "Number of beams to use for evaluation. This argument will be passed to ``model.generate``, "
......
...@@ -415,13 +415,12 @@ def main(): ...@@ -415,13 +415,12 @@ def main():
if data_args.max_train_samples is not None: if data_args.max_train_samples is not None:
max_train_samples = min(len(train_dataset), data_args.max_train_samples) max_train_samples = min(len(train_dataset), data_args.max_train_samples)
train_dataset = train_dataset.select(range(max_train_samples)) train_dataset = train_dataset.select(range(max_train_samples))
with training_args.main_process_first(desc="train dataset map pre-processing"): train_dataset = train_dataset.map(
train_dataset = train_dataset.map( preprocess_function,
preprocess_function, batched=True,
batched=True, num_proc=data_args.preprocessing_num_workers,
num_proc=data_args.preprocessing_num_workers, load_from_cache_file=not data_args.overwrite_cache,
load_from_cache_file=not data_args.overwrite_cache, )
)
if training_args.do_eval: if training_args.do_eval:
if "validation" not in raw_datasets: if "validation" not in raw_datasets:
...@@ -430,13 +429,12 @@ def main(): ...@@ -430,13 +429,12 @@ def main():
if data_args.max_eval_samples is not None: if data_args.max_eval_samples is not None:
max_eval_samples = min(len(eval_dataset), data_args.max_eval_samples) max_eval_samples = min(len(eval_dataset), data_args.max_eval_samples)
eval_dataset = eval_dataset.select(range(max_eval_samples)) eval_dataset = eval_dataset.select(range(max_eval_samples))
with training_args.main_process_first(desc="validation dataset map pre-processing"): eval_dataset = eval_dataset.map(
eval_dataset = eval_dataset.map( preprocess_function,
preprocess_function, batched=True,
batched=True, num_proc=data_args.preprocessing_num_workers,
num_proc=data_args.preprocessing_num_workers, load_from_cache_file=not data_args.overwrite_cache,
load_from_cache_file=not data_args.overwrite_cache, )
)
if data_args.pad_to_max_length: if data_args.pad_to_max_length:
data_collator = DefaultDataCollator(return_tensors="np") data_collator = DefaultDataCollator(return_tensors="np")
......
...@@ -238,7 +238,7 @@ class DataTrainingArguments: ...@@ -238,7 +238,7 @@ class DataTrainingArguments:
}, },
) )
num_beams: Optional[int] = field( num_beams: Optional[int] = field(
default=None, default=1,
metadata={ metadata={
"help": ( "help": (
"Number of beams to use for evaluation. This argument will be passed to ``model.generate``, " "Number of beams to use for evaluation. This argument will be passed to ``model.generate``, "
...@@ -488,15 +488,14 @@ def main(): ...@@ -488,15 +488,14 @@ def main():
if data_args.max_train_samples is not None: if data_args.max_train_samples is not None:
max_train_samples = min(len(train_dataset), data_args.max_train_samples) max_train_samples = min(len(train_dataset), data_args.max_train_samples)
train_dataset = train_dataset.select(range(max_train_samples)) train_dataset = train_dataset.select(range(max_train_samples))
with training_args.main_process_first(desc="train dataset map pre-processing"): train_dataset = train_dataset.map(
train_dataset = train_dataset.map( preprocess_function,
preprocess_function, batched=True,
batched=True, num_proc=data_args.preprocessing_num_workers,
num_proc=data_args.preprocessing_num_workers, remove_columns=column_names,
remove_columns=column_names, load_from_cache_file=not data_args.overwrite_cache,
load_from_cache_file=not data_args.overwrite_cache, desc="Running tokenizer on train dataset",
desc="Running tokenizer on train dataset", )
)
else: else:
train_dataset = None train_dataset = None
...@@ -508,15 +507,14 @@ def main(): ...@@ -508,15 +507,14 @@ def main():
if data_args.max_eval_samples is not None: if data_args.max_eval_samples is not None:
max_eval_samples = min(len(eval_dataset), data_args.max_eval_samples) max_eval_samples = min(len(eval_dataset), data_args.max_eval_samples)
eval_dataset = eval_dataset.select(range(max_eval_samples)) eval_dataset = eval_dataset.select(range(max_eval_samples))
with training_args.main_process_first(desc="validation dataset map pre-processing"): eval_dataset = eval_dataset.map(
eval_dataset = eval_dataset.map( preprocess_function,
preprocess_function, batched=True,
batched=True, num_proc=data_args.preprocessing_num_workers,
num_proc=data_args.preprocessing_num_workers, remove_columns=column_names,
remove_columns=column_names, load_from_cache_file=not data_args.overwrite_cache,
load_from_cache_file=not data_args.overwrite_cache, desc="Running tokenizer on validation dataset",
desc="Running tokenizer on validation dataset", )
)
else: else:
eval_dataset = None eval_dataset = None
# endregion # endregion
......
...@@ -226,7 +226,7 @@ class DataTrainingArguments: ...@@ -226,7 +226,7 @@ class DataTrainingArguments:
}, },
) )
num_beams: Optional[int] = field( num_beams: Optional[int] = field(
default=None, default=1,
metadata={ metadata={
"help": ( "help": (
"Number of beams to use for evaluation. This argument will be passed to ``model.generate``, " "Number of beams to use for evaluation. This argument will be passed to ``model.generate``, "
...@@ -454,15 +454,14 @@ def main(): ...@@ -454,15 +454,14 @@ def main():
if data_args.max_train_samples is not None: if data_args.max_train_samples is not None:
max_train_samples = min(len(train_dataset), data_args.max_train_samples) max_train_samples = min(len(train_dataset), data_args.max_train_samples)
train_dataset = train_dataset.select(range(max_train_samples)) train_dataset = train_dataset.select(range(max_train_samples))
with training_args.main_process_first(desc="train dataset map pre-processing"): train_dataset = train_dataset.map(
train_dataset = train_dataset.map( preprocess_function,
preprocess_function, batched=True,
batched=True, num_proc=data_args.preprocessing_num_workers,
num_proc=data_args.preprocessing_num_workers, remove_columns=column_names,
remove_columns=column_names, load_from_cache_file=not data_args.overwrite_cache,
load_from_cache_file=not data_args.overwrite_cache, desc="Running tokenizer on train dataset",
desc="Running tokenizer on train dataset", )
)
else: else:
train_dataset = None train_dataset = None
...@@ -474,15 +473,14 @@ def main(): ...@@ -474,15 +473,14 @@ def main():
if data_args.max_eval_samples is not None: if data_args.max_eval_samples is not None:
max_eval_samples = min(len(eval_dataset), data_args.max_eval_samples) max_eval_samples = min(len(eval_dataset), data_args.max_eval_samples)
eval_dataset = eval_dataset.select(range(max_eval_samples)) eval_dataset = eval_dataset.select(range(max_eval_samples))
with training_args.main_process_first(desc="validation dataset map pre-processing"): eval_dataset = eval_dataset.map(
eval_dataset = eval_dataset.map( preprocess_function,
preprocess_function, batched=True,
batched=True, num_proc=data_args.preprocessing_num_workers,
num_proc=data_args.preprocessing_num_workers, remove_columns=column_names,
remove_columns=column_names, load_from_cache_file=not data_args.overwrite_cache,
load_from_cache_file=not data_args.overwrite_cache, desc="Running tokenizer on validation dataset",
desc="Running tokenizer on validation dataset", )
)
else: else:
eval_dataset = None eval_dataset = None
# endregion # endregion
......
...@@ -409,6 +409,10 @@ class GenerationConfig(PushToHubMixin): ...@@ -409,6 +409,10 @@ class GenerationConfig(PushToHubMixin):
) )
# 2. detect beam-only parameterization when not in beam mode # 2. detect beam-only parameterization when not in beam mode
if self.num_beams is None:
logging.warning("`num_beams` is set to None - defaulting to 1.", UserWarning)
self.num_beams = 1
if self.num_beams == 1: if self.num_beams == 1:
single_beam_wrong_parameter_msg = ( single_beam_wrong_parameter_msg = (
"`num_beams` is set to 1. However, `{flag_name}` is set to `{flag_value}` -- this flag is only used " "`num_beams` is set to 1. However, `{flag_name}` is set to `{flag_value}` -- this flag is only used "
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment