Move prediction_loss_only to TrainingArguments (#6426)

34fabe16 · Sylvain Gugger · GitHub · e9c30314 · 34fabe16 · 34fabe16
Unverified Commit 34fabe16 authored Aug 12, 2020 by Sylvain Gugger Committed by GitHub Aug 12, 2020
5 changed files
--- a/docs/source/model_doc/pegasus.rst
+++ b/docs/source/model_doc/pegasus.rst
@@ -66,6 +66,10 @@ PegasusForConditionalGeneration
 This class inherits all functionality from ``BartForConditionalGeneration``, see that page for method signatures.
 Available models are listed at `Model List <https://huggingface.co/models?search=pegasus>`__
+.. autoclass:: transformers.PegasusForConditionalGeneration
+    :members: generate, forward
 PegasusConfig
 ~~~~~~~~~~~~~~~~~~~
 This config fully inherits from ``BartConfig``, but pegasus uses different default values:

--- a/src/transformers/trainer.py
+++ b/src/transformers/trainer.py
@@ -159,6 +159,8 @@ class Trainer:
            A tuple containing the optimizer and the scheduler to use. Will default to an instance of
            :class:`~transformers.AdamW` on your model and a scheduler given by
            :func:`~transformers.get_linear_schedule_with_warmup` controlled by :obj:`args`.
+        kwargs:
+            Deprecated keyword arguments.
    """
    def __init__(
@@ -169,9 +171,9 @@ class Trainer:
        train_dataset: Optional[Dataset] = None,
        eval_dataset: Optional[Dataset] = None,
        compute_metrics: Optional[Callable[[EvalPrediction], Dict]] = None,
-        prediction_loss_only=False,
        tb_writer: Optional["SummaryWriter"] = None,
        optimizers: Tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None),
+        **kwargs,
    ):
        self.model = model.to(args.device)
        self.args = args
@@ -179,9 +181,16 @@ class Trainer:
        self.train_dataset = train_dataset
        self.eval_dataset = eval_dataset
        self.compute_metrics = compute_metrics
-        self.prediction_loss_only = prediction_loss_only
        self.optimizer, self.lr_scheduler = optimizers
        self.tb_writer = tb_writer
+        if "prediction_loss_only" in kwargs:
+            warnings.warn(
+                "Passing `prediction_loss_only` as a keyword argument is deprecated and won't be possible in a future version. Use `args.prediction_loss_only` instead.",
+                FutureWarning,
+            )
+            self.args.prediction_loss_only = kwargs.pop("prediction_loss_only")
+        assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
        if tb_writer is None and is_tensorboard_available() and self.is_world_process_zero():
            self.tb_writer = SummaryWriter(log_dir=self.args.logging_dir)
        if not is_tensorboard_available():
@@ -951,7 +960,9 @@ class Trainer:
            )
            return self._prediction_loop(dataloader, description, prediction_loss_only=prediction_loss_only)
-        prediction_loss_only = prediction_loss_only if prediction_loss_only is not None else self.prediction_loss_only
+        prediction_loss_only = (
+            prediction_loss_only if prediction_loss_only is not None else self.args.prediction_loss_only
+        )
        model = self.model
        # multi-gpu eval

--- a/src/transformers/trainer_tf.py
+++ b/src/transformers/trainer_tf.py
@@ -44,8 +44,6 @@ class TFTrainer:
        compute_metrics (:obj:`Callable[[EvalPrediction], Dict]`, `optional`):
            The function that will be used to compute metrics at evaluation. Must take a
            :class:`~transformers.EvalPrediction` and return a dictionary string to metric values.
-        prediction_loss_only (:obj:`bool`, `optional`, defaults to `False`):
-            When performing evaluation and predictions, only returns the loss.
        tb_writer (:obj:`tf.summary.SummaryWriter`, `optional`):
            Object to write to TensorBoard.
        optimizers (:obj:`Tuple[tf.keras.optimizers.Optimizer, tf.keras.optimizers.schedules.LearningRateSchedule]`, `optional`):
@@ -54,6 +52,8 @@ class TFTrainer:
            :class:`~transformers.AdamWeightDecay`. The scheduler will default to an instance of
            :class:`tf.keras.optimizers.schedules.PolynomialDecay` if :obj:`args.num_warmup_steps` is 0 else
            an instance of :class:`~transformers.WarmUp`.
+        kwargs:
+            Deprecated keyword arguments.
    """
    def __init__(
@@ -63,12 +63,12 @@ class TFTrainer:
        train_dataset: Optional[tf.data.Dataset] = None,
        eval_dataset: Optional[tf.data.Dataset] = None,
        compute_metrics: Optional[Callable[[EvalPrediction], Dict]] = None,
-        prediction_loss_only=False,
        tb_writer: Optional[tf.summary.SummaryWriter] = None,
        optimizers: Tuple[tf.keras.optimizers.Optimizer, tf.keras.optimizers.schedules.LearningRateSchedule] = (
            None,
            None,
        ),
+        **kwargs,
    ):
        assert parse(tf.__version__).release >= (2, 2, 0), (
            "You need to run the TensorFlow trainer with at least the version 2.2.0, your version is %r "
@@ -80,11 +80,17 @@ class TFTrainer:
        self.train_dataset = train_dataset
        self.eval_dataset = eval_dataset
        self.compute_metrics = compute_metrics
-        self.prediction_loss_only = prediction_loss_only
        self.optimizer, self.lr_scheduler = optimizers
        self.gradient_accumulator = GradientAccumulator()
        self.global_step = 0
        self.epoch_logging = 0
+        if "prediction_loss_only" in kwargs:
+            warnings.warn(
+                "Passing `prediction_loss_only` as a keyword argument is deprecated and won't be possible in a future version. Use `args.prediction_loss_only` instead.",
+                FutureWarning,
+            )
+            self.args.prediction_loss_only = kwargs.pop("prediction_loss_only")
+        assert kwargs == {}, f"Unexpected keyword arguments: {list(kwargs.keys())}."
        if tb_writer is not None:
            self.tb_writer = tb_writer
@@ -282,7 +288,9 @@ class TFTrainer:
                dataset, steps, num_examples, description, prediction_loss_only=prediction_loss_only
            )
-        prediction_loss_only = prediction_loss_only if prediction_loss_only is not None else self.prediction_loss_only
+        prediction_loss_only = (
+            prediction_loss_only if prediction_loss_only is not None else self.args.prediction_loss_only
+        )
        logger.info("***** Running %s *****", description)
        logger.info("  Num examples = %d", num_examples)

--- a/src/transformers/training_args.py
+++ b/src/transformers/training_args.py
@@ -52,6 +52,8 @@ class TrainingArguments:
            Whether to run predictions on the test set or not.
        evaluate_during_training (:obj:`bool`, `optional`, defaults to :obj:`False`):
            Whether to run evaluation during training at each logging step or not.
+        prediction_loss_only (:obj:`bool`, `optional`, defaults to `False`):
+            When performing evaluation and predictions, only returns the loss.
        per_device_train_batch_size (:obj:`int`, `optional`, defaults to 8):
            The batch size per GPU/TPU core/CPU for training.
        per_device_eval_batch_size (:obj:`int`, `optional`, defaults to 8):
@@ -132,6 +134,9 @@ class TrainingArguments:
    evaluate_during_training: bool = field(
        default=False, metadata={"help": "Run evaluation during training at each logging step."},
    )
+    prediction_loss_only: bool = field(
+        default=False, metadata={"help": "When performing evaluation and predictions, only returns the loss."},
+    )
    per_device_train_batch_size: int = field(
        default=8, metadata={"help": "Batch size per GPU/TPU core/CPU for training."}

--- a/utils/check_repo.py
+++ b/utils/check_repo.py
@@ -29,6 +29,7 @@ TEST_FILES_WITH_NO_COMMON_TESTS = [
    "test_modeling_tf_camembert.py",
    "test_modeling_tf_xlm_roberta.py",
    "test_modeling_xlm_roberta.py",
+    "test_modeling_pegasus.py",
 ]
 # Update this list for models that are not documented with a comment explaining the reason it should not be.