"examples/vscode:/vscode.git/clone" did not exist on "b97cab7e6d3934c566e6cc8daf3e2f76d92397d5"
Unverified Commit 60d5f8f9 authored by Zach Mueller's avatar Zach Mueller Committed by GitHub
Browse files

🚨🚨🚨Deprecate `evaluation_strategy` to `eval_strategy`🚨🚨🚨 (#30190)

* Alias

* Note alias

* Tests and src

* Rest

* Clean

* Change typing?

* Fix tests

* Deprecation versions
parent c86d020e
...@@ -5,7 +5,7 @@ python run_asr.py \ ...@@ -5,7 +5,7 @@ python run_asr.py \
--per_device_train_batch_size="1" \ --per_device_train_batch_size="1" \
--per_device_eval_batch_size="1" \ --per_device_eval_batch_size="1" \
--gradient_accumulation_steps="8" \ --gradient_accumulation_steps="8" \
--evaluation_strategy="steps" \ --eval_strategy="steps" \
--save_steps="500" \ --save_steps="500" \
--eval_steps="100" \ --eval_steps="100" \
--logging_steps="50" \ --logging_steps="50" \
......
...@@ -6,7 +6,7 @@ python run_common_voice.py \ ...@@ -6,7 +6,7 @@ python run_common_voice.py \
--overwrite_output_dir \ --overwrite_output_dir \
--num_train_epochs="5" \ --num_train_epochs="5" \
--per_device_train_batch_size="16" \ --per_device_train_batch_size="16" \
--evaluation_strategy="steps" \ --eval_strategy="steps" \
--learning_rate="3e-4" \ --learning_rate="3e-4" \
--warmup_steps="500" \ --warmup_steps="500" \
--fp16 \ --fp16 \
......
...@@ -161,7 +161,7 @@ class TestDeepSpeedWav2Vec2(TestCasePlus): ...@@ -161,7 +161,7 @@ class TestDeepSpeedWav2Vec2(TestCasePlus):
--num_train_epochs {str(num_train_epochs)} --num_train_epochs {str(num_train_epochs)}
--per_device_train_batch_size 2 --per_device_train_batch_size 2
--per_device_eval_batch_size 2 --per_device_eval_batch_size 2
--evaluation_strategy steps --eval_strategy steps
--learning_rate 5e-4 --learning_rate 5e-4
--warmup_steps 8 --warmup_steps 8
--orthography timit --orthography timit
......
...@@ -90,7 +90,7 @@ python -m torch.distributed.launch \ ...@@ -90,7 +90,7 @@ python -m torch.distributed.launch \
--gradient_accumulation_steps=2 \ --gradient_accumulation_steps=2 \
--learning_rate="3e-4" \ --learning_rate="3e-4" \
--warmup_steps=3000 \ --warmup_steps=3000 \
--evaluation_strategy="steps" \ --eval_strategy="steps" \
--max_duration_in_seconds=20 \ --max_duration_in_seconds=20 \
--save_steps=500 \ --save_steps=500 \
--eval_steps=500 \ --eval_steps=500 \
...@@ -134,7 +134,7 @@ python -m torch.distributed.launch \ ...@@ -134,7 +134,7 @@ python -m torch.distributed.launch \
--gradient_accumulation_steps=1 \ --gradient_accumulation_steps=1 \
--learning_rate="3e-4" \ --learning_rate="3e-4" \
--warmup_steps=1500 \ --warmup_steps=1500 \
--evaluation_strategy="steps" \ --eval_strategy="steps" \
--max_duration_in_seconds=30 \ --max_duration_in_seconds=30 \
--save_steps=200 \ --save_steps=200 \
--eval_steps=200 \ --eval_steps=200 \
......
...@@ -45,7 +45,7 @@ python run_image_classification.py \ ...@@ -45,7 +45,7 @@ python run_image_classification.py \
--per_device_eval_batch_size 8 \ --per_device_eval_batch_size 8 \
--logging_strategy steps \ --logging_strategy steps \
--logging_steps 10 \ --logging_steps 10 \
--evaluation_strategy epoch \ --eval_strategy epoch \
--save_strategy epoch \ --save_strategy epoch \
--load_best_model_at_end True \ --load_best_model_at_end True \
--save_total_limit 3 \ --save_total_limit 3 \
......
...@@ -320,13 +320,13 @@ def run_hp_search_ray(trainer, n_trials: int, direction: str, **kwargs) -> BestR ...@@ -320,13 +320,13 @@ def run_hp_search_ray(trainer, n_trials: int, direction: str, **kwargs) -> BestR
# Check for `do_eval` and `eval_during_training` for schedulers that require intermediate reporting. # Check for `do_eval` and `eval_during_training` for schedulers that require intermediate reporting.
if isinstance( if isinstance(
kwargs["scheduler"], (ASHAScheduler, MedianStoppingRule, HyperBandForBOHB, PopulationBasedTraining) kwargs["scheduler"], (ASHAScheduler, MedianStoppingRule, HyperBandForBOHB, PopulationBasedTraining)
) and (not trainer.args.do_eval or trainer.args.evaluation_strategy == IntervalStrategy.NO): ) and (not trainer.args.do_eval or trainer.args.eval_strategy == IntervalStrategy.NO):
raise RuntimeError( raise RuntimeError(
"You are using {cls} as a scheduler but you haven't enabled evaluation during training. " "You are using {cls} as a scheduler but you haven't enabled evaluation during training. "
"This means your trials will not report intermediate results to Ray Tune, and " "This means your trials will not report intermediate results to Ray Tune, and "
"can thus not be stopped early or used to exploit other trials parameters. " "can thus not be stopped early or used to exploit other trials parameters. "
"If this is what you want, do not use {cls}. If you would like to use {cls}, " "If this is what you want, do not use {cls}. If you would like to use {cls}, "
"make sure you pass `do_eval=True` and `evaluation_strategy='steps'` in the " "make sure you pass `do_eval=True` and `eval_strategy='steps'` in the "
"Trainer `args`.".format(cls=type(kwargs["scheduler"]).__name__) "Trainer `args`.".format(cls=type(kwargs["scheduler"]).__name__)
) )
......
...@@ -444,7 +444,7 @@ class DefaultFlowCallback(TrainerCallback): ...@@ -444,7 +444,7 @@ class DefaultFlowCallback(TrainerCallback):
# Evaluate # Evaluate
if ( if (
args.evaluation_strategy == IntervalStrategy.STEPS args.eval_strategy == IntervalStrategy.STEPS
and state.global_step % state.eval_steps == 0 and state.global_step % state.eval_steps == 0
and args.eval_delay <= state.global_step and args.eval_delay <= state.global_step
): ):
...@@ -470,7 +470,7 @@ class DefaultFlowCallback(TrainerCallback): ...@@ -470,7 +470,7 @@ class DefaultFlowCallback(TrainerCallback):
control.should_log = True control.should_log = True
# Evaluate # Evaluate
if args.evaluation_strategy == IntervalStrategy.EPOCH and args.eval_delay <= state.epoch: if args.eval_strategy == IntervalStrategy.EPOCH and args.eval_delay <= state.epoch:
control.should_evaluate = True control.should_evaluate = True
# Save # Save
...@@ -586,7 +586,7 @@ class EarlyStoppingCallback(TrainerCallback): ...@@ -586,7 +586,7 @@ class EarlyStoppingCallback(TrainerCallback):
args.metric_for_best_model is not None args.metric_for_best_model is not None
), "EarlyStoppingCallback requires metric_for_best_model is defined" ), "EarlyStoppingCallback requires metric_for_best_model is defined"
assert ( assert (
args.evaluation_strategy != IntervalStrategy.NO args.eval_strategy != IntervalStrategy.NO
), "EarlyStoppingCallback requires IntervalStrategy of steps or epoch" ), "EarlyStoppingCallback requires IntervalStrategy of steps or epoch"
def on_evaluate(self, args, state, control, metrics, **kwargs): def on_evaluate(self, args, state, control, metrics, **kwargs):
......
...@@ -226,7 +226,7 @@ class TrainingArguments: ...@@ -226,7 +226,7 @@ class TrainingArguments:
by your training/evaluation scripts instead. See the [example by your training/evaluation scripts instead. See the [example
scripts](https://github.com/huggingface/transformers/tree/main/examples) for more details. scripts](https://github.com/huggingface/transformers/tree/main/examples) for more details.
do_eval (`bool`, *optional*): do_eval (`bool`, *optional*):
Whether to run evaluation on the validation set or not. Will be set to `True` if `evaluation_strategy` is Whether to run evaluation on the validation set or not. Will be set to `True` if `eval_strategy` is
different from `"no"`. This argument is not directly used by [`Trainer`], it's intended to be used by your different from `"no"`. This argument is not directly used by [`Trainer`], it's intended to be used by your
training/evaluation scripts instead. See the [example training/evaluation scripts instead. See the [example
scripts](https://github.com/huggingface/transformers/tree/main/examples) for more details. scripts](https://github.com/huggingface/transformers/tree/main/examples) for more details.
...@@ -234,7 +234,7 @@ class TrainingArguments: ...@@ -234,7 +234,7 @@ class TrainingArguments:
Whether to run predictions on the test set or not. This argument is not directly used by [`Trainer`], it's Whether to run predictions on the test set or not. This argument is not directly used by [`Trainer`], it's
intended to be used by your training/evaluation scripts instead. See the [example intended to be used by your training/evaluation scripts instead. See the [example
scripts](https://github.com/huggingface/transformers/tree/main/examples) for more details. scripts](https://github.com/huggingface/transformers/tree/main/examples) for more details.
evaluation_strategy (`str` or [`~trainer_utils.IntervalStrategy`], *optional*, defaults to `"no"`): eval_strategy (`str` or [`~trainer_utils.IntervalStrategy`], *optional*, defaults to `"no"`):
The evaluation strategy to adopt during training. Possible values are: The evaluation strategy to adopt during training. Possible values are:
- `"no"`: No evaluation is done during training. - `"no"`: No evaluation is done during training.
...@@ -263,7 +263,7 @@ class TrainingArguments: ...@@ -263,7 +263,7 @@ class TrainingArguments:
requires more memory). requires more memory).
eval_delay (`float`, *optional*): eval_delay (`float`, *optional*):
Number of epochs or steps to wait for before the first evaluation can be performed, depending on the Number of epochs or steps to wait for before the first evaluation can be performed, depending on the
evaluation_strategy. eval_strategy.
learning_rate (`float`, *optional*, defaults to 5e-5): learning_rate (`float`, *optional*, defaults to 5e-5):
The initial learning rate for [`AdamW`] optimizer. The initial learning rate for [`AdamW`] optimizer.
weight_decay (`float`, *optional*, defaults to 0): weight_decay (`float`, *optional*, defaults to 0):
...@@ -406,7 +406,7 @@ class TrainingArguments: ...@@ -406,7 +406,7 @@ class TrainingArguments:
Whether to drop the last incomplete batch (if the length of the dataset is not divisible by the batch size) Whether to drop the last incomplete batch (if the length of the dataset is not divisible by the batch size)
or not. or not.
eval_steps (`int` or `float`, *optional*): eval_steps (`int` or `float`, *optional*):
Number of update steps between two evaluations if `evaluation_strategy="steps"`. Will default to the same Number of update steps between two evaluations if `eval_strategy="steps"`. Will default to the same
value as `logging_steps` if not set. Should be an integer or a float in range `[0,1)`. If smaller than 1, value as `logging_steps` if not set. Should be an integer or a float in range `[0,1)`. If smaller than 1,
will be interpreted as ratio of total training steps. will be interpreted as ratio of total training steps.
dataloader_num_workers (`int`, *optional*, defaults to 0): dataloader_num_workers (`int`, *optional*, defaults to 0):
...@@ -440,7 +440,7 @@ class TrainingArguments: ...@@ -440,7 +440,7 @@ class TrainingArguments:
<Tip> <Tip>
When set to `True`, the parameters `save_strategy` needs to be the same as `evaluation_strategy`, and in When set to `True`, the parameters `save_strategy` needs to be the same as `eval_strategy`, and in
the case it is "steps", `save_steps` must be a round multiple of `eval_steps`. the case it is "steps", `save_steps` must be a round multiple of `eval_steps`.
</Tip> </Tip>
...@@ -767,7 +767,7 @@ class TrainingArguments: ...@@ -767,7 +767,7 @@ class TrainingArguments:
do_train: bool = field(default=False, metadata={"help": "Whether to run training."}) do_train: bool = field(default=False, metadata={"help": "Whether to run training."})
do_eval: bool = field(default=False, metadata={"help": "Whether to run eval on the dev set."}) do_eval: bool = field(default=False, metadata={"help": "Whether to run eval on the dev set."})
do_predict: bool = field(default=False, metadata={"help": "Whether to run predictions on the test set."}) do_predict: bool = field(default=False, metadata={"help": "Whether to run predictions on the test set."})
evaluation_strategy: Union[IntervalStrategy, str] = field( eval_strategy: Union[IntervalStrategy, str] = field(
default="no", default="no",
metadata={"help": "The evaluation strategy to use."}, metadata={"help": "The evaluation strategy to use."},
) )
...@@ -816,7 +816,7 @@ class TrainingArguments: ...@@ -816,7 +816,7 @@ class TrainingArguments:
metadata={ metadata={
"help": ( "help": (
"Number of epochs or steps to wait for before the first evaluation can be performed, depending on the" "Number of epochs or steps to wait for before the first evaluation can be performed, depending on the"
" evaluation_strategy." " eval_strategy."
) )
}, },
) )
...@@ -1306,6 +1306,10 @@ class TrainingArguments: ...@@ -1306,6 +1306,10 @@ class TrainingArguments:
"choices": ["auto", "apex", "cpu_amp"], "choices": ["auto", "apex", "cpu_amp"],
}, },
) )
evaluation_strategy: Union[IntervalStrategy, str] = field(
default=None,
metadata={"help": "Deprecated. Use `eval_strategy` instead"},
)
push_to_hub_model_id: Optional[str] = field( push_to_hub_model_id: Optional[str] = field(
default=None, metadata={"help": "The name of the repository to which push the `Trainer`."} default=None, metadata={"help": "The name of the repository to which push the `Trainer`."}
) )
...@@ -1441,14 +1445,21 @@ class TrainingArguments: ...@@ -1441,14 +1445,21 @@ class TrainingArguments:
if self.disable_tqdm is None: if self.disable_tqdm is None:
self.disable_tqdm = logger.getEffectiveLevel() > logging.WARN self.disable_tqdm = logger.getEffectiveLevel() > logging.WARN
if isinstance(self.evaluation_strategy, EvaluationStrategy): if self.evaluation_strategy is not None:
warnings.warn(
"`evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead",
FutureWarning,
)
self.eval_strategy = self.evaluation_strategy
if isinstance(self.eval_strategy, EvaluationStrategy):
warnings.warn( warnings.warn(
"using `EvaluationStrategy` for `evaluation_strategy` is deprecated and will be removed in version 5" "using `EvaluationStrategy` for `eval_strategy` is deprecated and will be removed in version 5"
" of 🤗 Transformers. Use `IntervalStrategy` instead", " of 🤗 Transformers. Use `IntervalStrategy` instead",
FutureWarning, FutureWarning,
) )
# Go back to the underlying string or we won't be able to instantiate `IntervalStrategy` on it. # Go back to the underlying string or we won't be able to instantiate `IntervalStrategy` on it.
self.evaluation_strategy = self.evaluation_strategy.value self.eval_strategy = self.eval_strategy.value
if self.no_cuda: if self.no_cuda:
warnings.warn( warnings.warn(
"using `no_cuda` is deprecated and will be removed in version 5.0 of 🤗 Transformers. " "using `no_cuda` is deprecated and will be removed in version 5.0 of 🤗 Transformers. "
...@@ -1457,23 +1468,23 @@ class TrainingArguments: ...@@ -1457,23 +1468,23 @@ class TrainingArguments:
) )
self.use_cpu = self.no_cuda self.use_cpu = self.no_cuda
self.evaluation_strategy = IntervalStrategy(self.evaluation_strategy) self.eval_strategy = IntervalStrategy(self.eval_strategy)
self.logging_strategy = IntervalStrategy(self.logging_strategy) self.logging_strategy = IntervalStrategy(self.logging_strategy)
self.save_strategy = IntervalStrategy(self.save_strategy) self.save_strategy = IntervalStrategy(self.save_strategy)
self.hub_strategy = HubStrategy(self.hub_strategy) self.hub_strategy = HubStrategy(self.hub_strategy)
self.lr_scheduler_type = SchedulerType(self.lr_scheduler_type) self.lr_scheduler_type = SchedulerType(self.lr_scheduler_type)
if self.do_eval is False and self.evaluation_strategy != IntervalStrategy.NO: if self.do_eval is False and self.eval_strategy != IntervalStrategy.NO:
self.do_eval = True self.do_eval = True
# eval_steps has to be defined and non-zero, fallbacks to logging_steps if the latter is non-zero # eval_steps has to be defined and non-zero, fallbacks to logging_steps if the latter is non-zero
if self.evaluation_strategy == IntervalStrategy.STEPS and (self.eval_steps is None or self.eval_steps == 0): if self.eval_strategy == IntervalStrategy.STEPS and (self.eval_steps is None or self.eval_steps == 0):
if self.logging_steps > 0: if self.logging_steps > 0:
logger.info(f"using `logging_steps` to initialize `eval_steps` to {self.logging_steps}") logger.info(f"using `logging_steps` to initialize `eval_steps` to {self.logging_steps}")
self.eval_steps = self.logging_steps self.eval_steps = self.logging_steps
else: else:
raise ValueError( raise ValueError(
f"evaluation strategy {self.evaluation_strategy} requires either non-zero --eval_steps or" f"evaluation strategy {self.eval_strategy} requires either non-zero --eval_steps or"
" --logging_steps" " --logging_steps"
) )
...@@ -1485,7 +1496,7 @@ class TrainingArguments: ...@@ -1485,7 +1496,7 @@ class TrainingArguments:
if self.logging_steps != int(self.logging_steps): if self.logging_steps != int(self.logging_steps):
raise ValueError(f"--logging_steps must be an integer if bigger than 1: {self.logging_steps}") raise ValueError(f"--logging_steps must be an integer if bigger than 1: {self.logging_steps}")
self.logging_steps = int(self.logging_steps) self.logging_steps = int(self.logging_steps)
if self.evaluation_strategy == IntervalStrategy.STEPS and self.eval_steps > 1: if self.eval_strategy == IntervalStrategy.STEPS and self.eval_steps > 1:
if self.eval_steps != int(self.eval_steps): if self.eval_steps != int(self.eval_steps):
raise ValueError(f"--eval_steps must be an integer if bigger than 1: {self.eval_steps}") raise ValueError(f"--eval_steps must be an integer if bigger than 1: {self.eval_steps}")
self.eval_steps = int(self.eval_steps) self.eval_steps = int(self.eval_steps)
...@@ -1496,12 +1507,12 @@ class TrainingArguments: ...@@ -1496,12 +1507,12 @@ class TrainingArguments:
# Sanity checks for load_best_model_at_end: we require save and eval strategies to be compatible. # Sanity checks for load_best_model_at_end: we require save and eval strategies to be compatible.
if self.load_best_model_at_end: if self.load_best_model_at_end:
if self.evaluation_strategy != self.save_strategy: if self.eval_strategy != self.save_strategy:
raise ValueError( raise ValueError(
"--load_best_model_at_end requires the save and eval strategy to match, but found\n- Evaluation " "--load_best_model_at_end requires the save and eval strategy to match, but found\n- Evaluation "
f"strategy: {self.evaluation_strategy}\n- Save strategy: {self.save_strategy}" f"strategy: {self.eval_strategy}\n- Save strategy: {self.save_strategy}"
) )
if self.evaluation_strategy == IntervalStrategy.STEPS and self.save_steps % self.eval_steps != 0: if self.eval_strategy == IntervalStrategy.STEPS and self.save_steps % self.eval_steps != 0:
if self.eval_steps < 1 or self.save_steps < 1: if self.eval_steps < 1 or self.save_steps < 1:
if not (self.eval_steps < 1 and self.save_steps < 1): if not (self.eval_steps < 1 and self.save_steps < 1):
raise ValueError( raise ValueError(
...@@ -1579,7 +1590,7 @@ class TrainingArguments: ...@@ -1579,7 +1590,7 @@ class TrainingArguments:
raise ValueError(" `--half_precision_backend apex`: GPU bf16 is not supported by apex.") raise ValueError(" `--half_precision_backend apex`: GPU bf16 is not supported by apex.")
if self.lr_scheduler_type == SchedulerType.REDUCE_ON_PLATEAU: if self.lr_scheduler_type == SchedulerType.REDUCE_ON_PLATEAU:
if self.evaluation_strategy == IntervalStrategy.NO: if self.eval_strategy == IntervalStrategy.NO:
raise ValueError("lr_scheduler_type reduce_lr_on_plateau requires an eval strategy") raise ValueError("lr_scheduler_type reduce_lr_on_plateau requires an eval strategy")
if not is_torch_available(): if not is_torch_available():
raise ValueError("lr_scheduler_type reduce_lr_on_plateau requires torch>=0.2.0") raise ValueError("lr_scheduler_type reduce_lr_on_plateau requires torch>=0.2.0")
...@@ -2443,7 +2454,7 @@ class TrainingArguments: ...@@ -2443,7 +2454,7 @@ class TrainingArguments:
but requires more memory). but requires more memory).
delay (`float`, *optional*): delay (`float`, *optional*):
Number of epochs or steps to wait for before the first evaluation can be performed, depending on the Number of epochs or steps to wait for before the first evaluation can be performed, depending on the
evaluation_strategy. eval_strategy.
loss_only (`bool`, *optional*, defaults to `False`): loss_only (`bool`, *optional*, defaults to `False`):
Ignores all outputs except the loss. Ignores all outputs except the loss.
jit_mode (`bool`, *optional*): jit_mode (`bool`, *optional*):
...@@ -2460,10 +2471,10 @@ class TrainingArguments: ...@@ -2460,10 +2471,10 @@ class TrainingArguments:
100 100
``` ```
""" """
self.evaluation_strategy = IntervalStrategy(strategy) self.eval_strategy = IntervalStrategy(strategy)
if self.evaluation_strategy == IntervalStrategy.STEPS and steps == 0: if self.eval_strategy == IntervalStrategy.STEPS and steps == 0:
raise ValueError("Setting `strategy` as 'steps' requires a positive value for `steps`.") raise ValueError("Setting `strategy` as 'steps' requires a positive value for `steps`.")
self.do_eval = self.evaluation_strategy != IntervalStrategy.NO self.do_eval = self.eval_strategy != IntervalStrategy.NO
self.eval_steps = steps self.eval_steps = steps
self.per_device_eval_batch_size = batch_size self.per_device_eval_batch_size = batch_size
self.eval_accumulation_steps = accumulation_steps self.eval_accumulation_steps = accumulation_steps
......
...@@ -49,7 +49,7 @@ class TFTrainingArguments(TrainingArguments): ...@@ -49,7 +49,7 @@ class TFTrainingArguments(TrainingArguments):
by your training/evaluation scripts instead. See the [example by your training/evaluation scripts instead. See the [example
scripts](https://github.com/huggingface/transformers/tree/main/examples) for more details. scripts](https://github.com/huggingface/transformers/tree/main/examples) for more details.
do_eval (`bool`, *optional*): do_eval (`bool`, *optional*):
Whether to run evaluation on the validation set or not. Will be set to `True` if `evaluation_strategy` is Whether to run evaluation on the validation set or not. Will be set to `True` if `eval_strategy` is
different from `"no"`. This argument is not directly used by [`Trainer`], it's intended to be used by your different from `"no"`. This argument is not directly used by [`Trainer`], it's intended to be used by your
training/evaluation scripts instead. See the [example training/evaluation scripts instead. See the [example
scripts](https://github.com/huggingface/transformers/tree/main/examples) for more details. scripts](https://github.com/huggingface/transformers/tree/main/examples) for more details.
...@@ -57,7 +57,7 @@ class TFTrainingArguments(TrainingArguments): ...@@ -57,7 +57,7 @@ class TFTrainingArguments(TrainingArguments):
Whether to run predictions on the test set or not. This argument is not directly used by [`Trainer`], it's Whether to run predictions on the test set or not. This argument is not directly used by [`Trainer`], it's
intended to be used by your training/evaluation scripts instead. See the [example intended to be used by your training/evaluation scripts instead. See the [example
scripts](https://github.com/huggingface/transformers/tree/main/examples) for more details. scripts](https://github.com/huggingface/transformers/tree/main/examples) for more details.
evaluation_strategy (`str` or [`~trainer_utils.IntervalStrategy`], *optional*, defaults to `"no"`): eval_strategy (`str` or [`~trainer_utils.IntervalStrategy`], *optional*, defaults to `"no"`):
The evaluation strategy to adopt during training. Possible values are: The evaluation strategy to adopt during training. Possible values are:
- `"no"`: No evaluation is done during training. - `"no"`: No evaluation is done during training.
......
...@@ -292,11 +292,11 @@ class NotebookProgressCallback(TrainerCallback): ...@@ -292,11 +292,11 @@ class NotebookProgressCallback(TrainerCallback):
self._force_next_update = False self._force_next_update = False
def on_train_begin(self, args, state, control, **kwargs): def on_train_begin(self, args, state, control, **kwargs):
self.first_column = "Epoch" if args.evaluation_strategy == IntervalStrategy.EPOCH else "Step" self.first_column = "Epoch" if args.eval_strategy == IntervalStrategy.EPOCH else "Step"
self.training_loss = 0 self.training_loss = 0
self.last_log = 0 self.last_log = 0
column_names = [self.first_column] + ["Training Loss"] column_names = [self.first_column] + ["Training Loss"]
if args.evaluation_strategy != IntervalStrategy.NO: if args.eval_strategy != IntervalStrategy.NO:
column_names.append("Validation Loss") column_names.append("Validation Loss")
self.training_tracker = NotebookTrainingTracker(state.max_steps, column_names) self.training_tracker = NotebookTrainingTracker(state.max_steps, column_names)
...@@ -328,7 +328,7 @@ class NotebookProgressCallback(TrainerCallback): ...@@ -328,7 +328,7 @@ class NotebookProgressCallback(TrainerCallback):
def on_log(self, args, state, control, logs=None, **kwargs): def on_log(self, args, state, control, logs=None, **kwargs):
# Only for when there is no evaluation # Only for when there is no evaluation
if args.evaluation_strategy == IntervalStrategy.NO and "loss" in logs: if args.eval_strategy == IntervalStrategy.NO and "loss" in logs:
values = {"Training Loss": logs["loss"]} values = {"Training Loss": logs["loss"]}
# First column is necessarily Step sine we're not in epoch eval strategy # First column is necessarily Step sine we're not in epoch eval strategy
values["Step"] = state.global_step values["Step"] = state.global_step
......
...@@ -959,7 +959,7 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T ...@@ -959,7 +959,7 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T
"do_train": True, "do_train": True,
"do_eval": True, "do_eval": True,
"optim": "adafactor", "optim": "adafactor",
"evaluation_strategy": "steps", "eval_strategy": "steps",
"eval_steps": 1, "eval_steps": 1,
"save_strategy": "steps", "save_strategy": "steps",
"save_steps": 1, "save_steps": 1,
......
...@@ -308,7 +308,7 @@ class TestTrainerExt(TestCasePlus): ...@@ -308,7 +308,7 @@ class TestTrainerExt(TestCasePlus):
--per_device_eval_batch_size 4 --per_device_eval_batch_size 4
--max_eval_samples 8 --max_eval_samples 8
--val_max_target_length {max_len} --val_max_target_length {max_len}
--evaluation_strategy steps --eval_strategy steps
--eval_steps {str(eval_steps)} --eval_steps {str(eval_steps)}
""".split() """.split()
......
...@@ -308,6 +308,6 @@ class TrainerIntegrationFSDP(TestCasePlus, TrainerIntegrationCommon): ...@@ -308,6 +308,6 @@ class TrainerIntegrationFSDP(TestCasePlus, TrainerIntegrationCommon):
--logging_steps {logging_steps} --logging_steps {logging_steps}
--save_strategy epoch --save_strategy epoch
--do_eval --do_eval
--evaluation_strategy epoch --eval_strategy epoch
--report_to none --report_to none
""" """
...@@ -740,7 +740,7 @@ class TrainerIntegrationPrerunTest(TestCasePlus, TrainerIntegrationCommon): ...@@ -740,7 +740,7 @@ class TrainerIntegrationPrerunTest(TestCasePlus, TrainerIntegrationCommon):
eval_dataset = RegressionDataset(length=64) eval_dataset = RegressionDataset(length=64)
args = TrainingArguments( args = TrainingArguments(
"./regression", "./regression",
evaluation_strategy="epoch", eval_strategy="epoch",
metric_for_best_model="eval_loss", metric_for_best_model="eval_loss",
) )
model = RegressionModel() model = RegressionModel()
...@@ -772,7 +772,7 @@ class TrainerIntegrationPrerunTest(TestCasePlus, TrainerIntegrationCommon): ...@@ -772,7 +772,7 @@ class TrainerIntegrationPrerunTest(TestCasePlus, TrainerIntegrationCommon):
args = TrainingArguments( args = TrainingArguments(
"./regression", "./regression",
lr_scheduler_type="reduce_lr_on_plateau", lr_scheduler_type="reduce_lr_on_plateau",
evaluation_strategy="epoch", eval_strategy="epoch",
metric_for_best_model="eval_loss", metric_for_best_model="eval_loss",
num_train_epochs=10, num_train_epochs=10,
learning_rate=0.2, learning_rate=0.2,
...@@ -2210,7 +2210,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon): ...@@ -2210,7 +2210,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
output_dir=tmpdir, output_dir=tmpdir,
learning_rate=0.1, learning_rate=0.1,
eval_steps=5, eval_steps=5,
evaluation_strategy="steps", eval_strategy="steps",
save_steps=5, save_steps=5,
load_best_model_at_end=True, load_best_model_at_end=True,
) )
...@@ -2226,7 +2226,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon): ...@@ -2226,7 +2226,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
output_dir=tmpdir, output_dir=tmpdir,
learning_rate=0.1, learning_rate=0.1,
eval_steps=5, eval_steps=5,
evaluation_strategy="steps", eval_strategy="steps",
save_steps=5, save_steps=5,
load_best_model_at_end=True, load_best_model_at_end=True,
metric_for_best_model="accuracy", metric_for_best_model="accuracy",
...@@ -2243,7 +2243,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon): ...@@ -2243,7 +2243,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
b=2.5, b=2.5,
output_dir=tmpdir, output_dir=tmpdir,
learning_rate=0.1, learning_rate=0.1,
evaluation_strategy="epoch", eval_strategy="epoch",
save_strategy="epoch", save_strategy="epoch",
load_best_model_at_end=True, load_best_model_at_end=True,
metric_for_best_model="accuracy", metric_for_best_model="accuracy",
...@@ -2262,7 +2262,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon): ...@@ -2262,7 +2262,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
output_dir=tmpdir, output_dir=tmpdir,
learning_rate=0.1, learning_rate=0.1,
eval_steps=5, eval_steps=5,
evaluation_strategy="steps", eval_strategy="steps",
save_steps=5, save_steps=5,
load_best_model_at_end=True, load_best_model_at_end=True,
pretrained=False, pretrained=False,
...@@ -2283,7 +2283,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon): ...@@ -2283,7 +2283,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
output_dir=tmpdir, output_dir=tmpdir,
learning_rate=0.1, learning_rate=0.1,
eval_steps=5, eval_steps=5,
evaluation_strategy="steps", eval_strategy="steps",
save_steps=5, save_steps=5,
load_best_model_at_end=True, load_best_model_at_end=True,
save_safetensors=save_safetensors, save_safetensors=save_safetensors,
...@@ -2437,7 +2437,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon): ...@@ -2437,7 +2437,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
gradient_accumulation_steps=1, gradient_accumulation_steps=1,
per_device_train_batch_size=16, per_device_train_batch_size=16,
load_best_model_at_end=True, load_best_model_at_end=True,
evaluation_strategy=IntervalStrategy.EPOCH, eval_strategy=IntervalStrategy.EPOCH,
save_strategy=IntervalStrategy.EPOCH, save_strategy=IntervalStrategy.EPOCH,
compute_metrics=AlmostAccuracy(), compute_metrics=AlmostAccuracy(),
metric_for_best_model="accuracy", metric_for_best_model="accuracy",
...@@ -2453,7 +2453,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon): ...@@ -2453,7 +2453,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
num_train_epochs=20, num_train_epochs=20,
gradient_accumulation_steps=1, gradient_accumulation_steps=1,
per_device_train_batch_size=16, per_device_train_batch_size=16,
evaluation_strategy=IntervalStrategy.EPOCH, eval_strategy=IntervalStrategy.EPOCH,
compute_metrics=AlmostAccuracy(), compute_metrics=AlmostAccuracy(),
metric_for_best_model="accuracy", metric_for_best_model="accuracy",
) )
...@@ -2497,7 +2497,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon): ...@@ -2497,7 +2497,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
# With best model at end # With best model at end
trainer = get_regression_trainer( trainer = get_regression_trainer(
output_dir=tmp_dir, evaluation_strategy="steps", load_best_model_at_end=True, save_total_limit=2 output_dir=tmp_dir, eval_strategy="steps", load_best_model_at_end=True, save_total_limit=2
) )
trainer.state.best_model_checkpoint = os.path.join(tmp_dir, "checkpoint-5") trainer.state.best_model_checkpoint = os.path.join(tmp_dir, "checkpoint-5")
self.check_checkpoint_deletion(trainer, tmp_dir, [5, 25]) self.check_checkpoint_deletion(trainer, tmp_dir, [5, 25])
...@@ -2505,7 +2505,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon): ...@@ -2505,7 +2505,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
# Edge case: we don't always honor save_total_limit=1 if load_best_model_at_end=True to be able to resume # Edge case: we don't always honor save_total_limit=1 if load_best_model_at_end=True to be able to resume
# from checkpoint # from checkpoint
trainer = get_regression_trainer( trainer = get_regression_trainer(
output_dir=tmp_dir, evaluation_strategy="steps", load_best_model_at_end=True, save_total_limit=1 output_dir=tmp_dir, eval_strategy="steps", load_best_model_at_end=True, save_total_limit=1
) )
trainer.state.best_model_checkpoint = os.path.join(tmp_dir, "checkpoint-25") trainer.state.best_model_checkpoint = os.path.join(tmp_dir, "checkpoint-25")
self.check_checkpoint_deletion(trainer, tmp_dir, [25]) self.check_checkpoint_deletion(trainer, tmp_dir, [25])
...@@ -3341,7 +3341,7 @@ class TrainerHyperParameterOptunaIntegrationTest(unittest.TestCase): ...@@ -3341,7 +3341,7 @@ class TrainerHyperParameterOptunaIntegrationTest(unittest.TestCase):
output_dir=tmp_dir, output_dir=tmp_dir,
learning_rate=0.1, learning_rate=0.1,
logging_steps=1, logging_steps=1,
evaluation_strategy=IntervalStrategy.EPOCH, eval_strategy=IntervalStrategy.EPOCH,
save_strategy=IntervalStrategy.EPOCH, save_strategy=IntervalStrategy.EPOCH,
num_train_epochs=4, num_train_epochs=4,
disable_tqdm=True, disable_tqdm=True,
...@@ -3390,7 +3390,7 @@ class TrainerHyperParameterMultiObjectOptunaIntegrationTest(unittest.TestCase): ...@@ -3390,7 +3390,7 @@ class TrainerHyperParameterMultiObjectOptunaIntegrationTest(unittest.TestCase):
output_dir=tmp_dir, output_dir=tmp_dir,
learning_rate=0.1, learning_rate=0.1,
logging_steps=1, logging_steps=1,
evaluation_strategy=IntervalStrategy.EPOCH, eval_strategy=IntervalStrategy.EPOCH,
save_strategy=IntervalStrategy.EPOCH, save_strategy=IntervalStrategy.EPOCH,
num_train_epochs=10, num_train_epochs=10,
disable_tqdm=True, disable_tqdm=True,
...@@ -3448,7 +3448,7 @@ class TrainerHyperParameterRayIntegrationTest(unittest.TestCase): ...@@ -3448,7 +3448,7 @@ class TrainerHyperParameterRayIntegrationTest(unittest.TestCase):
output_dir=tmp_dir, output_dir=tmp_dir,
learning_rate=0.1, learning_rate=0.1,
logging_steps=1, logging_steps=1,
evaluation_strategy=IntervalStrategy.EPOCH, eval_strategy=IntervalStrategy.EPOCH,
save_strategy=IntervalStrategy.EPOCH, save_strategy=IntervalStrategy.EPOCH,
num_train_epochs=4, num_train_epochs=4,
disable_tqdm=True, disable_tqdm=True,
...@@ -3511,7 +3511,7 @@ class TrainerHyperParameterSigOptIntegrationTest(unittest.TestCase): ...@@ -3511,7 +3511,7 @@ class TrainerHyperParameterSigOptIntegrationTest(unittest.TestCase):
output_dir=tmp_dir, output_dir=tmp_dir,
learning_rate=0.1, learning_rate=0.1,
logging_steps=1, logging_steps=1,
evaluation_strategy=IntervalStrategy.EPOCH, eval_strategy=IntervalStrategy.EPOCH,
save_strategy=IntervalStrategy.EPOCH, save_strategy=IntervalStrategy.EPOCH,
num_train_epochs=4, num_train_epochs=4,
disable_tqdm=True, disable_tqdm=True,
...@@ -3931,7 +3931,7 @@ class TrainerHyperParameterWandbIntegrationTest(unittest.TestCase): ...@@ -3931,7 +3931,7 @@ class TrainerHyperParameterWandbIntegrationTest(unittest.TestCase):
output_dir=tmp_dir, output_dir=tmp_dir,
learning_rate=0.1, learning_rate=0.1,
logging_steps=1, logging_steps=1,
evaluation_strategy=IntervalStrategy.EPOCH, eval_strategy=IntervalStrategy.EPOCH,
save_strategy=IntervalStrategy.EPOCH, save_strategy=IntervalStrategy.EPOCH,
num_train_epochs=4, num_train_epochs=4,
disable_tqdm=True, disable_tqdm=True,
......
...@@ -133,12 +133,12 @@ class TrainerCallbackTest(unittest.TestCase): ...@@ -133,12 +133,12 @@ class TrainerCallbackTest(unittest.TestCase):
expected_events += ["on_step_begin", "on_step_end"] expected_events += ["on_step_begin", "on_step_end"]
if step % trainer.args.logging_steps == 0: if step % trainer.args.logging_steps == 0:
expected_events.append("on_log") expected_events.append("on_log")
if trainer.args.evaluation_strategy == IntervalStrategy.STEPS and step % trainer.args.eval_steps == 0: if trainer.args.eval_strategy == IntervalStrategy.STEPS and step % trainer.args.eval_steps == 0:
expected_events += evaluation_events.copy() expected_events += evaluation_events.copy()
if step % trainer.args.save_steps == 0: if step % trainer.args.save_steps == 0:
expected_events.append("on_save") expected_events.append("on_save")
expected_events.append("on_epoch_end") expected_events.append("on_epoch_end")
if trainer.args.evaluation_strategy == IntervalStrategy.EPOCH: if trainer.args.eval_strategy == IntervalStrategy.EPOCH:
expected_events += evaluation_events.copy() expected_events += evaluation_events.copy()
expected_events += ["on_log", "on_train_end"] expected_events += ["on_log", "on_train_end"]
return expected_events return expected_events
...@@ -215,12 +215,12 @@ class TrainerCallbackTest(unittest.TestCase): ...@@ -215,12 +215,12 @@ class TrainerCallbackTest(unittest.TestCase):
events = trainer.callback_handler.callbacks[-2].events events = trainer.callback_handler.callbacks[-2].events
self.assertEqual(events, self.get_expected_events(trainer)) self.assertEqual(events, self.get_expected_events(trainer))
trainer = self.get_trainer(callbacks=[MyTestTrainerCallback], eval_steps=5, evaluation_strategy="steps") trainer = self.get_trainer(callbacks=[MyTestTrainerCallback], eval_steps=5, eval_strategy="steps")
trainer.train() trainer.train()
events = trainer.callback_handler.callbacks[-2].events events = trainer.callback_handler.callbacks[-2].events
self.assertEqual(events, self.get_expected_events(trainer)) self.assertEqual(events, self.get_expected_events(trainer))
trainer = self.get_trainer(callbacks=[MyTestTrainerCallback], evaluation_strategy="epoch") trainer = self.get_trainer(callbacks=[MyTestTrainerCallback], eval_strategy="epoch")
trainer.train() trainer.train()
events = trainer.callback_handler.callbacks[-2].events events = trainer.callback_handler.callbacks[-2].events
self.assertEqual(events, self.get_expected_events(trainer)) self.assertEqual(events, self.get_expected_events(trainer))
...@@ -231,7 +231,7 @@ class TrainerCallbackTest(unittest.TestCase): ...@@ -231,7 +231,7 @@ class TrainerCallbackTest(unittest.TestCase):
logging_steps=3, logging_steps=3,
save_steps=10, save_steps=10,
eval_steps=5, eval_steps=5,
evaluation_strategy="steps", eval_strategy="steps",
) )
trainer.train() trainer.train()
events = trainer.callback_handler.callbacks[-2].events events = trainer.callback_handler.callbacks[-2].events
......
...@@ -113,7 +113,7 @@ class Seq2seqTrainerTester(TestCasePlus): ...@@ -113,7 +113,7 @@ class Seq2seqTrainerTester(TestCasePlus):
per_device_train_batch_size=batch_size, per_device_train_batch_size=batch_size,
per_device_eval_batch_size=batch_size, per_device_eval_batch_size=batch_size,
predict_with_generate=True, predict_with_generate=True,
evaluation_strategy="steps", eval_strategy="steps",
do_train=True, do_train=True,
do_eval=True, do_eval=True,
warmup_steps=0, warmup_steps=0,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment