Adds trainer and checkpoint exporter as the arguments of the run_experiment functions.

PiperOrigin-RevId: 368778443

Adds trainer and checkpoint exporter as the arguments of the run_experiment functions.
PiperOrigin-RevId: 368778443
9cfbc813 · Yeqing Li · A. Unique TensorFlower · b9599c26 · 9cfbc813
Commit 9cfbc813 authored Apr 15, 2021 by Yeqing Li Committed by A. Unique TensorFlower Apr 15, 2021
Show whitespace changes
Inline Side-by-side

Showing with 22 additions and 17 deletions

official/core/train_lib.py official/core/train_lib.py +22 -17

No files found.
--- a/official/core/train_lib.py
+++ b/official/core/train_lib.py
@@ -15,7 +15,7 @@
 """TFM common training driver library."""
 # pytype: disable=attribute-error
 import os
-from typing import Any, Mapping, Tuple
+from typing import Any, Mapping, Tuple, Optional
 # Import libraries
 from absl import logging
@@ -23,21 +23,23 @@ import orbit
 import tensorflow as tf
 from official.core import base_task
+from official.core import base_trainer
 from official.core import config_definitions
 from official.core import train_utils
-BestCheckpointExporter = train_utils.BestCheckpointExporter
 maybe_create_best_ckpt_exporter = train_utils.maybe_create_best_ckpt_exporter
-def run_experiment(distribution_strategy: tf.distribute.Strategy,
+def run_experiment(
+    distribution_strategy: tf.distribute.Strategy,
    task: base_task.Task,
    mode: str,
    params: config_definitions.ExperimentConfig,
    model_dir: str,
    run_post_eval: bool = False,
-                   save_summary: bool = True) \
+    save_summary: bool = True,
-> Tuple[tf.keras.Model, Mapping[str, Any]]:
+    trainer: Optional[base_trainer.Trainer] = None
+) -> Tuple[tf.keras.Model, Mapping[str, Any]]:
  """Runs train/eval configured by the experiment params.
  Args:
@@ -50,6 +52,8 @@ def run_experiment(distribution_strategy: tf.distribute.Strategy,
    run_post_eval: Whether to run post eval once after training, metrics logs
      are returned.
    save_summary: Whether to save train and validation summary.
+    trainer: the base_trainer.Trainer instance. It should be created within the
+      strategy.scope().
  Returns:
    A 2-tuple of (model, eval_logs).
@@ -59,6 +63,7 @@ def run_experiment(distribution_strategy: tf.distribute.Strategy,
  """
  with distribution_strategy.scope():
+    if not trainer:
      trainer = train_utils.create_trainer(
          params,
          task,