Unverified Commit 60d5f8f9 authored by Zach Mueller's avatar Zach Mueller Committed by GitHub
Browse files

🚨🚨🚨Deprecate `evaluation_strategy` to `eval_strategy`🚨🚨🚨 (#30190)

* Alias

* Note alias

* Tests and src

* Rest

* Clean

* Change typing?

* Fix tests

* Deprecation versions
parent c86d020e
......@@ -5,7 +5,7 @@ python run_asr.py \
--per_device_train_batch_size="1" \
--per_device_eval_batch_size="1" \
--gradient_accumulation_steps="8" \
--evaluation_strategy="steps" \
--eval_strategy="steps" \
--save_steps="500" \
--eval_steps="100" \
--logging_steps="50" \
......
......@@ -6,7 +6,7 @@ python run_common_voice.py \
--overwrite_output_dir \
--num_train_epochs="5" \
--per_device_train_batch_size="16" \
--evaluation_strategy="steps" \
--eval_strategy="steps" \
--learning_rate="3e-4" \
--warmup_steps="500" \
--fp16 \
......
......@@ -161,7 +161,7 @@ class TestDeepSpeedWav2Vec2(TestCasePlus):
--num_train_epochs {str(num_train_epochs)}
--per_device_train_batch_size 2
--per_device_eval_batch_size 2
--evaluation_strategy steps
--eval_strategy steps
--learning_rate 5e-4
--warmup_steps 8
--orthography timit
......
......@@ -90,7 +90,7 @@ python -m torch.distributed.launch \
--gradient_accumulation_steps=2 \
--learning_rate="3e-4" \
--warmup_steps=3000 \
--evaluation_strategy="steps" \
--eval_strategy="steps" \
--max_duration_in_seconds=20 \
--save_steps=500 \
--eval_steps=500 \
......@@ -134,7 +134,7 @@ python -m torch.distributed.launch \
--gradient_accumulation_steps=1 \
--learning_rate="3e-4" \
--warmup_steps=1500 \
--evaluation_strategy="steps" \
--eval_strategy="steps" \
--max_duration_in_seconds=30 \
--save_steps=200 \
--eval_steps=200 \
......
......@@ -45,7 +45,7 @@ python run_image_classification.py \
--per_device_eval_batch_size 8 \
--logging_strategy steps \
--logging_steps 10 \
--evaluation_strategy epoch \
--eval_strategy epoch \
--save_strategy epoch \
--load_best_model_at_end True \
--save_total_limit 3 \
......
......@@ -320,13 +320,13 @@ def run_hp_search_ray(trainer, n_trials: int, direction: str, **kwargs) -> BestR
# Check for `do_eval` and `eval_during_training` for schedulers that require intermediate reporting.
if isinstance(
kwargs["scheduler"], (ASHAScheduler, MedianStoppingRule, HyperBandForBOHB, PopulationBasedTraining)
) and (not trainer.args.do_eval or trainer.args.evaluation_strategy == IntervalStrategy.NO):
) and (not trainer.args.do_eval or trainer.args.eval_strategy == IntervalStrategy.NO):
raise RuntimeError(
"You are using {cls} as a scheduler but you haven't enabled evaluation during training. "
"This means your trials will not report intermediate results to Ray Tune, and "
"can thus not be stopped early or used to exploit other trials parameters. "
"If this is what you want, do not use {cls}. If you would like to use {cls}, "
"make sure you pass `do_eval=True` and `evaluation_strategy='steps'` in the "
"make sure you pass `do_eval=True` and `eval_strategy='steps'` in the "
"Trainer `args`.".format(cls=type(kwargs["scheduler"]).__name__)
)
......
......@@ -444,7 +444,7 @@ class DefaultFlowCallback(TrainerCallback):
# Evaluate
if (
args.evaluation_strategy == IntervalStrategy.STEPS
args.eval_strategy == IntervalStrategy.STEPS
and state.global_step % state.eval_steps == 0
and args.eval_delay <= state.global_step
):
......@@ -470,7 +470,7 @@ class DefaultFlowCallback(TrainerCallback):
control.should_log = True
# Evaluate
if args.evaluation_strategy == IntervalStrategy.EPOCH and args.eval_delay <= state.epoch:
if args.eval_strategy == IntervalStrategy.EPOCH and args.eval_delay <= state.epoch:
control.should_evaluate = True
# Save
......@@ -586,7 +586,7 @@ class EarlyStoppingCallback(TrainerCallback):
args.metric_for_best_model is not None
), "EarlyStoppingCallback requires metric_for_best_model is defined"
assert (
args.evaluation_strategy != IntervalStrategy.NO
args.eval_strategy != IntervalStrategy.NO
), "EarlyStoppingCallback requires IntervalStrategy of steps or epoch"
def on_evaluate(self, args, state, control, metrics, **kwargs):
......
......@@ -226,7 +226,7 @@ class TrainingArguments:
by your training/evaluation scripts instead. See the [example
scripts](https://github.com/huggingface/transformers/tree/main/examples) for more details.
do_eval (`bool`, *optional*):
Whether to run evaluation on the validation set or not. Will be set to `True` if `evaluation_strategy` is
Whether to run evaluation on the validation set or not. Will be set to `True` if `eval_strategy` is
different from `"no"`. This argument is not directly used by [`Trainer`], it's intended to be used by your
training/evaluation scripts instead. See the [example
scripts](https://github.com/huggingface/transformers/tree/main/examples) for more details.
......@@ -234,7 +234,7 @@ class TrainingArguments:
Whether to run predictions on the test set or not. This argument is not directly used by [`Trainer`], it's
intended to be used by your training/evaluation scripts instead. See the [example
scripts](https://github.com/huggingface/transformers/tree/main/examples) for more details.
evaluation_strategy (`str` or [`~trainer_utils.IntervalStrategy`], *optional*, defaults to `"no"`):
eval_strategy (`str` or [`~trainer_utils.IntervalStrategy`], *optional*, defaults to `"no"`):
The evaluation strategy to adopt during training. Possible values are:
- `"no"`: No evaluation is done during training.
......@@ -263,7 +263,7 @@ class TrainingArguments:
requires more memory).
eval_delay (`float`, *optional*):
Number of epochs or steps to wait for before the first evaluation can be performed, depending on the
evaluation_strategy.
eval_strategy.
learning_rate (`float`, *optional*, defaults to 5e-5):
The initial learning rate for [`AdamW`] optimizer.
weight_decay (`float`, *optional*, defaults to 0):
......@@ -406,7 +406,7 @@ class TrainingArguments:
Whether to drop the last incomplete batch (if the length of the dataset is not divisible by the batch size)
or not.
eval_steps (`int` or `float`, *optional*):
Number of update steps between two evaluations if `evaluation_strategy="steps"`. Will default to the same
Number of update steps between two evaluations if `eval_strategy="steps"`. Will default to the same
value as `logging_steps` if not set. Should be an integer or a float in range `[0,1)`. If smaller than 1,
will be interpreted as ratio of total training steps.
dataloader_num_workers (`int`, *optional*, defaults to 0):
......@@ -440,7 +440,7 @@ class TrainingArguments:
<Tip>
When set to `True`, the parameters `save_strategy` needs to be the same as `evaluation_strategy`, and in
When set to `True`, the parameters `save_strategy` needs to be the same as `eval_strategy`, and in
the case it is "steps", `save_steps` must be a round multiple of `eval_steps`.
</Tip>
......@@ -767,7 +767,7 @@ class TrainingArguments:
do_train: bool = field(default=False, metadata={"help": "Whether to run training."})
do_eval: bool = field(default=False, metadata={"help": "Whether to run eval on the dev set."})
do_predict: bool = field(default=False, metadata={"help": "Whether to run predictions on the test set."})
evaluation_strategy: Union[IntervalStrategy, str] = field(
eval_strategy: Union[IntervalStrategy, str] = field(
default="no",
metadata={"help": "The evaluation strategy to use."},
)
......@@ -816,7 +816,7 @@ class TrainingArguments:
metadata={
"help": (
"Number of epochs or steps to wait for before the first evaluation can be performed, depending on the"
" evaluation_strategy."
" eval_strategy."
)
},
)
......@@ -1306,6 +1306,10 @@ class TrainingArguments:
"choices": ["auto", "apex", "cpu_amp"],
},
)
evaluation_strategy: Union[IntervalStrategy, str] = field(
default=None,
metadata={"help": "Deprecated. Use `eval_strategy` instead"},
)
push_to_hub_model_id: Optional[str] = field(
default=None, metadata={"help": "The name of the repository to which push the `Trainer`."}
)
......@@ -1441,14 +1445,21 @@ class TrainingArguments:
if self.disable_tqdm is None:
self.disable_tqdm = logger.getEffectiveLevel() > logging.WARN
if isinstance(self.evaluation_strategy, EvaluationStrategy):
if self.evaluation_strategy is not None:
warnings.warn(
"`evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead",
FutureWarning,
)
self.eval_strategy = self.evaluation_strategy
if isinstance(self.eval_strategy, EvaluationStrategy):
warnings.warn(
"using `EvaluationStrategy` for `evaluation_strategy` is deprecated and will be removed in version 5"
"using `EvaluationStrategy` for `eval_strategy` is deprecated and will be removed in version 5"
" of 🤗 Transformers. Use `IntervalStrategy` instead",
FutureWarning,
)
# Go back to the underlying string or we won't be able to instantiate `IntervalStrategy` on it.
self.evaluation_strategy = self.evaluation_strategy.value
self.eval_strategy = self.eval_strategy.value
if self.no_cuda:
warnings.warn(
"using `no_cuda` is deprecated and will be removed in version 5.0 of 🤗 Transformers. "
......@@ -1457,23 +1468,23 @@ class TrainingArguments:
)
self.use_cpu = self.no_cuda
self.evaluation_strategy = IntervalStrategy(self.evaluation_strategy)
self.eval_strategy = IntervalStrategy(self.eval_strategy)
self.logging_strategy = IntervalStrategy(self.logging_strategy)
self.save_strategy = IntervalStrategy(self.save_strategy)
self.hub_strategy = HubStrategy(self.hub_strategy)
self.lr_scheduler_type = SchedulerType(self.lr_scheduler_type)
if self.do_eval is False and self.evaluation_strategy != IntervalStrategy.NO:
if self.do_eval is False and self.eval_strategy != IntervalStrategy.NO:
self.do_eval = True
# eval_steps has to be defined and non-zero, fallbacks to logging_steps if the latter is non-zero
if self.evaluation_strategy == IntervalStrategy.STEPS and (self.eval_steps is None or self.eval_steps == 0):
if self.eval_strategy == IntervalStrategy.STEPS and (self.eval_steps is None or self.eval_steps == 0):
if self.logging_steps > 0:
logger.info(f"using `logging_steps` to initialize `eval_steps` to {self.logging_steps}")
self.eval_steps = self.logging_steps
else:
raise ValueError(
f"evaluation strategy {self.evaluation_strategy} requires either non-zero --eval_steps or"
f"evaluation strategy {self.eval_strategy} requires either non-zero --eval_steps or"
" --logging_steps"
)
......@@ -1485,7 +1496,7 @@ class TrainingArguments:
if self.logging_steps != int(self.logging_steps):
raise ValueError(f"--logging_steps must be an integer if bigger than 1: {self.logging_steps}")
self.logging_steps = int(self.logging_steps)
if self.evaluation_strategy == IntervalStrategy.STEPS and self.eval_steps > 1:
if self.eval_strategy == IntervalStrategy.STEPS and self.eval_steps > 1:
if self.eval_steps != int(self.eval_steps):
raise ValueError(f"--eval_steps must be an integer if bigger than 1: {self.eval_steps}")
self.eval_steps = int(self.eval_steps)
......@@ -1496,12 +1507,12 @@ class TrainingArguments:
# Sanity checks for load_best_model_at_end: we require save and eval strategies to be compatible.
if self.load_best_model_at_end:
if self.evaluation_strategy != self.save_strategy:
if self.eval_strategy != self.save_strategy:
raise ValueError(
"--load_best_model_at_end requires the save and eval strategy to match, but found\n- Evaluation "
f"strategy: {self.evaluation_strategy}\n- Save strategy: {self.save_strategy}"
f"strategy: {self.eval_strategy}\n- Save strategy: {self.save_strategy}"
)
if self.evaluation_strategy == IntervalStrategy.STEPS and self.save_steps % self.eval_steps != 0:
if self.eval_strategy == IntervalStrategy.STEPS and self.save_steps % self.eval_steps != 0:
if self.eval_steps < 1 or self.save_steps < 1:
if not (self.eval_steps < 1 and self.save_steps < 1):
raise ValueError(
......@@ -1579,7 +1590,7 @@ class TrainingArguments:
raise ValueError(" `--half_precision_backend apex`: GPU bf16 is not supported by apex.")
if self.lr_scheduler_type == SchedulerType.REDUCE_ON_PLATEAU:
if self.evaluation_strategy == IntervalStrategy.NO:
if self.eval_strategy == IntervalStrategy.NO:
raise ValueError("lr_scheduler_type reduce_lr_on_plateau requires an eval strategy")
if not is_torch_available():
raise ValueError("lr_scheduler_type reduce_lr_on_plateau requires torch>=0.2.0")
......@@ -2443,7 +2454,7 @@ class TrainingArguments:
but requires more memory).
delay (`float`, *optional*):
Number of epochs or steps to wait for before the first evaluation can be performed, depending on the
evaluation_strategy.
eval_strategy.
loss_only (`bool`, *optional*, defaults to `False`):
Ignores all outputs except the loss.
jit_mode (`bool`, *optional*):
......@@ -2460,10 +2471,10 @@ class TrainingArguments:
100
```
"""
self.evaluation_strategy = IntervalStrategy(strategy)
if self.evaluation_strategy == IntervalStrategy.STEPS and steps == 0:
self.eval_strategy = IntervalStrategy(strategy)
if self.eval_strategy == IntervalStrategy.STEPS and steps == 0:
raise ValueError("Setting `strategy` as 'steps' requires a positive value for `steps`.")
self.do_eval = self.evaluation_strategy != IntervalStrategy.NO
self.do_eval = self.eval_strategy != IntervalStrategy.NO
self.eval_steps = steps
self.per_device_eval_batch_size = batch_size
self.eval_accumulation_steps = accumulation_steps
......
......@@ -49,7 +49,7 @@ class TFTrainingArguments(TrainingArguments):
by your training/evaluation scripts instead. See the [example
scripts](https://github.com/huggingface/transformers/tree/main/examples) for more details.
do_eval (`bool`, *optional*):
Whether to run evaluation on the validation set or not. Will be set to `True` if `evaluation_strategy` is
Whether to run evaluation on the validation set or not. Will be set to `True` if `eval_strategy` is
different from `"no"`. This argument is not directly used by [`Trainer`], it's intended to be used by your
training/evaluation scripts instead. See the [example
scripts](https://github.com/huggingface/transformers/tree/main/examples) for more details.
......@@ -57,7 +57,7 @@ class TFTrainingArguments(TrainingArguments):
Whether to run predictions on the test set or not. This argument is not directly used by [`Trainer`], it's
intended to be used by your training/evaluation scripts instead. See the [example
scripts](https://github.com/huggingface/transformers/tree/main/examples) for more details.
evaluation_strategy (`str` or [`~trainer_utils.IntervalStrategy`], *optional*, defaults to `"no"`):
eval_strategy (`str` or [`~trainer_utils.IntervalStrategy`], *optional*, defaults to `"no"`):
The evaluation strategy to adopt during training. Possible values are:
- `"no"`: No evaluation is done during training.
......
......@@ -292,11 +292,11 @@ class NotebookProgressCallback(TrainerCallback):
self._force_next_update = False
def on_train_begin(self, args, state, control, **kwargs):
self.first_column = "Epoch" if args.evaluation_strategy == IntervalStrategy.EPOCH else "Step"
self.first_column = "Epoch" if args.eval_strategy == IntervalStrategy.EPOCH else "Step"
self.training_loss = 0
self.last_log = 0
column_names = [self.first_column] + ["Training Loss"]
if args.evaluation_strategy != IntervalStrategy.NO:
if args.eval_strategy != IntervalStrategy.NO:
column_names.append("Validation Loss")
self.training_tracker = NotebookTrainingTracker(state.max_steps, column_names)
......@@ -328,7 +328,7 @@ class NotebookProgressCallback(TrainerCallback):
def on_log(self, args, state, control, logs=None, **kwargs):
# Only for when there is no evaluation
if args.evaluation_strategy == IntervalStrategy.NO and "loss" in logs:
if args.eval_strategy == IntervalStrategy.NO and "loss" in logs:
values = {"Training Loss": logs["loss"]}
# First column is necessarily Step sine we're not in epoch eval strategy
values["Step"] = state.global_step
......
......@@ -959,7 +959,7 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T
"do_train": True,
"do_eval": True,
"optim": "adafactor",
"evaluation_strategy": "steps",
"eval_strategy": "steps",
"eval_steps": 1,
"save_strategy": "steps",
"save_steps": 1,
......
......@@ -308,7 +308,7 @@ class TestTrainerExt(TestCasePlus):
--per_device_eval_batch_size 4
--max_eval_samples 8
--val_max_target_length {max_len}
--evaluation_strategy steps
--eval_strategy steps
--eval_steps {str(eval_steps)}
""".split()
......
......@@ -308,6 +308,6 @@ class TrainerIntegrationFSDP(TestCasePlus, TrainerIntegrationCommon):
--logging_steps {logging_steps}
--save_strategy epoch
--do_eval
--evaluation_strategy epoch
--eval_strategy epoch
--report_to none
"""
......@@ -740,7 +740,7 @@ class TrainerIntegrationPrerunTest(TestCasePlus, TrainerIntegrationCommon):
eval_dataset = RegressionDataset(length=64)
args = TrainingArguments(
"./regression",
evaluation_strategy="epoch",
eval_strategy="epoch",
metric_for_best_model="eval_loss",
)
model = RegressionModel()
......@@ -772,7 +772,7 @@ class TrainerIntegrationPrerunTest(TestCasePlus, TrainerIntegrationCommon):
args = TrainingArguments(
"./regression",
lr_scheduler_type="reduce_lr_on_plateau",
evaluation_strategy="epoch",
eval_strategy="epoch",
metric_for_best_model="eval_loss",
num_train_epochs=10,
learning_rate=0.2,
......@@ -2210,7 +2210,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
output_dir=tmpdir,
learning_rate=0.1,
eval_steps=5,
evaluation_strategy="steps",
eval_strategy="steps",
save_steps=5,
load_best_model_at_end=True,
)
......@@ -2226,7 +2226,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
output_dir=tmpdir,
learning_rate=0.1,
eval_steps=5,
evaluation_strategy="steps",
eval_strategy="steps",
save_steps=5,
load_best_model_at_end=True,
metric_for_best_model="accuracy",
......@@ -2243,7 +2243,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
b=2.5,
output_dir=tmpdir,
learning_rate=0.1,
evaluation_strategy="epoch",
eval_strategy="epoch",
save_strategy="epoch",
load_best_model_at_end=True,
metric_for_best_model="accuracy",
......@@ -2262,7 +2262,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
output_dir=tmpdir,
learning_rate=0.1,
eval_steps=5,
evaluation_strategy="steps",
eval_strategy="steps",
save_steps=5,
load_best_model_at_end=True,
pretrained=False,
......@@ -2283,7 +2283,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
output_dir=tmpdir,
learning_rate=0.1,
eval_steps=5,
evaluation_strategy="steps",
eval_strategy="steps",
save_steps=5,
load_best_model_at_end=True,
save_safetensors=save_safetensors,
......@@ -2437,7 +2437,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
gradient_accumulation_steps=1,
per_device_train_batch_size=16,
load_best_model_at_end=True,
evaluation_strategy=IntervalStrategy.EPOCH,
eval_strategy=IntervalStrategy.EPOCH,
save_strategy=IntervalStrategy.EPOCH,
compute_metrics=AlmostAccuracy(),
metric_for_best_model="accuracy",
......@@ -2453,7 +2453,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
num_train_epochs=20,
gradient_accumulation_steps=1,
per_device_train_batch_size=16,
evaluation_strategy=IntervalStrategy.EPOCH,
eval_strategy=IntervalStrategy.EPOCH,
compute_metrics=AlmostAccuracy(),
metric_for_best_model="accuracy",
)
......@@ -2497,7 +2497,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
# With best model at end
trainer = get_regression_trainer(
output_dir=tmp_dir, evaluation_strategy="steps", load_best_model_at_end=True, save_total_limit=2
output_dir=tmp_dir, eval_strategy="steps", load_best_model_at_end=True, save_total_limit=2
)
trainer.state.best_model_checkpoint = os.path.join(tmp_dir, "checkpoint-5")
self.check_checkpoint_deletion(trainer, tmp_dir, [5, 25])
......@@ -2505,7 +2505,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
# Edge case: we don't always honor save_total_limit=1 if load_best_model_at_end=True to be able to resume
# from checkpoint
trainer = get_regression_trainer(
output_dir=tmp_dir, evaluation_strategy="steps", load_best_model_at_end=True, save_total_limit=1
output_dir=tmp_dir, eval_strategy="steps", load_best_model_at_end=True, save_total_limit=1
)
trainer.state.best_model_checkpoint = os.path.join(tmp_dir, "checkpoint-25")
self.check_checkpoint_deletion(trainer, tmp_dir, [25])
......@@ -3341,7 +3341,7 @@ class TrainerHyperParameterOptunaIntegrationTest(unittest.TestCase):
output_dir=tmp_dir,
learning_rate=0.1,
logging_steps=1,
evaluation_strategy=IntervalStrategy.EPOCH,
eval_strategy=IntervalStrategy.EPOCH,
save_strategy=IntervalStrategy.EPOCH,
num_train_epochs=4,
disable_tqdm=True,
......@@ -3390,7 +3390,7 @@ class TrainerHyperParameterMultiObjectOptunaIntegrationTest(unittest.TestCase):
output_dir=tmp_dir,
learning_rate=0.1,
logging_steps=1,
evaluation_strategy=IntervalStrategy.EPOCH,
eval_strategy=IntervalStrategy.EPOCH,
save_strategy=IntervalStrategy.EPOCH,
num_train_epochs=10,
disable_tqdm=True,
......@@ -3448,7 +3448,7 @@ class TrainerHyperParameterRayIntegrationTest(unittest.TestCase):
output_dir=tmp_dir,
learning_rate=0.1,
logging_steps=1,
evaluation_strategy=IntervalStrategy.EPOCH,
eval_strategy=IntervalStrategy.EPOCH,
save_strategy=IntervalStrategy.EPOCH,
num_train_epochs=4,
disable_tqdm=True,
......@@ -3511,7 +3511,7 @@ class TrainerHyperParameterSigOptIntegrationTest(unittest.TestCase):
output_dir=tmp_dir,
learning_rate=0.1,
logging_steps=1,
evaluation_strategy=IntervalStrategy.EPOCH,
eval_strategy=IntervalStrategy.EPOCH,
save_strategy=IntervalStrategy.EPOCH,
num_train_epochs=4,
disable_tqdm=True,
......@@ -3931,7 +3931,7 @@ class TrainerHyperParameterWandbIntegrationTest(unittest.TestCase):
output_dir=tmp_dir,
learning_rate=0.1,
logging_steps=1,
evaluation_strategy=IntervalStrategy.EPOCH,
eval_strategy=IntervalStrategy.EPOCH,
save_strategy=IntervalStrategy.EPOCH,
num_train_epochs=4,
disable_tqdm=True,
......
......@@ -133,12 +133,12 @@ class TrainerCallbackTest(unittest.TestCase):
expected_events += ["on_step_begin", "on_step_end"]
if step % trainer.args.logging_steps == 0:
expected_events.append("on_log")
if trainer.args.evaluation_strategy == IntervalStrategy.STEPS and step % trainer.args.eval_steps == 0:
if trainer.args.eval_strategy == IntervalStrategy.STEPS and step % trainer.args.eval_steps == 0:
expected_events += evaluation_events.copy()
if step % trainer.args.save_steps == 0:
expected_events.append("on_save")
expected_events.append("on_epoch_end")
if trainer.args.evaluation_strategy == IntervalStrategy.EPOCH:
if trainer.args.eval_strategy == IntervalStrategy.EPOCH:
expected_events += evaluation_events.copy()
expected_events += ["on_log", "on_train_end"]
return expected_events
......@@ -215,12 +215,12 @@ class TrainerCallbackTest(unittest.TestCase):
events = trainer.callback_handler.callbacks[-2].events
self.assertEqual(events, self.get_expected_events(trainer))
trainer = self.get_trainer(callbacks=[MyTestTrainerCallback], eval_steps=5, evaluation_strategy="steps")
trainer = self.get_trainer(callbacks=[MyTestTrainerCallback], eval_steps=5, eval_strategy="steps")
trainer.train()
events = trainer.callback_handler.callbacks[-2].events
self.assertEqual(events, self.get_expected_events(trainer))
trainer = self.get_trainer(callbacks=[MyTestTrainerCallback], evaluation_strategy="epoch")
trainer = self.get_trainer(callbacks=[MyTestTrainerCallback], eval_strategy="epoch")
trainer.train()
events = trainer.callback_handler.callbacks[-2].events
self.assertEqual(events, self.get_expected_events(trainer))
......@@ -231,7 +231,7 @@ class TrainerCallbackTest(unittest.TestCase):
logging_steps=3,
save_steps=10,
eval_steps=5,
evaluation_strategy="steps",
eval_strategy="steps",
)
trainer.train()
events = trainer.callback_handler.callbacks[-2].events
......
......@@ -113,7 +113,7 @@ class Seq2seqTrainerTester(TestCasePlus):
per_device_train_batch_size=batch_size,
per_device_eval_batch_size=batch_size,
predict_with_generate=True,
evaluation_strategy="steps",
eval_strategy="steps",
do_train=True,
do_eval=True,
warmup_steps=0,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment