Intermediate commit for argparse move

30fa4ebb · Eli Bixby · 25fe395c · 30fa4ebb · 30fa4ebb · 30fa4ebb
Commit 30fa4ebb authored Aug 15, 2017 by Eli Bixby
4 changed files
--- a/tutorials/image/cifar10_estimator/README.md
+++ b/tutorials/image/cifar10_estimator/README.md
@@ -34,8 +34,8 @@ data_batch_4  data_batch_5  readme.html  test_batch
 ```shell
 # This will generate a tf record for the training and test data available at the input_dir.
 # You can see more details in generate_cifar10_tf_records.py
-$ python generate_cifar10_tfrecords.py --input_dir=/prefix/to/downloaded/data/cifar-10-batches-py \
+$ python generate_cifar10_tfrecords.py --input-dir=/prefix/to/downloaded/data/cifar-10-batches-py \
-                                       --output_dir=/prefix/to/downloaded/data/cifar-10-batches-py
+                                       --output-dir=/prefix/to/downloaded/data/cifar-10-batches-py
 ```
 After running the command above, you should see the following new files in the output_dir.
@@ -51,30 +51,30 @@ train.tfrecords validation.tfrecords eval.tfrecords
 ```
 # Run the model on CPU only. After training, it runs the evaluation.
-$ python cifar10_main.py --data_dir=/prefix/to/downloaded/data/cifar-10-batches-py \
+$ python cifar10_main.py --data-dir=/prefix/to/downloaded/data/cifar-10-batches-py \
-                         --model_dir=/tmp/cifar10 \
+                         --job-dir=/tmp/cifar10 \
-                         --is_cpu_ps=True \
+                         --is-cpu-ps=True \
-                         --num_gpus=0 \
+                         --num-gpus=0 \
-                         --train_steps=1000
+                         --train-steps=1000
 # Run the model on 2 GPUs using CPU as parameter server. After training, it runs the evaluation.
-$ python cifar10_main.py --data_dir=/prefix/to/downloaded/data/cifar-10-batches-py \
+$ python cifar10_main.py --data-dir=/prefix/to/downloaded/data/cifar-10-batches-py \
-                         --model_dir=/tmp/cifar10 \
+                         --job-dir=/tmp/cifar10 \
-                         --is_cpu_ps=True \
+                         --is-cpu-ps=True \
-                         --force_gpu_compatible=True \
+                         --force-gpu-compatible=True \
-                         --num_gpus=2 \
+                         --num-gpus=2 \
-                         --train_steps=1000
+                         --train-steps=1000
 # Run the model on 2 GPUs using GPU as parameter server.
 # It will run an experiment, which for local setting basically means it will run stop training
 # a couple of times to perform evaluation.
-$ python cifar10_main.py --data_dir=/prefix/to/downloaded/data/cifar-10-batches-bin \
+$ python cifar10_main.py --data-dir=/prefix/to/downloaded/data/cifar-10-batches-bin \
-                         --model_dir=/tmp/cifar10 \
+                         --job-dir=/tmp/cifar10 \
-                         --is_cpu_ps=False \
+                         --is-cpu-ps=False \
-                         --force_gpu_compatible=True \
+                         --force-gpu-compatible=True \
-                         --num_gpus=2 \
+                         --num-gpus=2 \
-                         --train_steps=1000
+                         --train-steps=1000
-                         --run_experiment=True
 # There are more command line flags to play with; check cifar10_main.py for details.
 ```
@@ -105,13 +105,13 @@ gcloud ml-engine jobs submit training cifarmultigpu \
    --region us-central1 \
    --module-name cifar10_estimator.cifar10_main \
    -- \
-    --data_dir=$MY_BUCKET/cifar-10-batches-py \
+    --data-dir=$MY_BUCKET/cifar-10-batches-py \
-    --model_dir=$MY_BUCKLET/model_dirs/cifarmultigpu \
+    --job-dir=$MY_BUCKLET/model_dirs/cifarmultigpu \
-    --is_cpu_ps=True \
+    --is-cpu-ps=True \
-    --force_gpu_compatible=True \
+    --force-gpu-compatible=True \
-    --num_gpus=4 \
+    --num-gpus=4 \
-    --train_steps=1000 \
+    --train-steps=1000 \
-    --run_experiment=True
 ```
@@ -188,15 +188,15 @@ Once you have a `TF_CONFIG` configured properly on each host you're ready to run
 # It will run evaluation a couple of times during training.
 # The num_workers arugument is used only to update the learning rate correctly.
 # Make sure the model_dir is the same as defined on the TF_CONFIG.
-$ python cifar10_main.py --data_dir=gs://path/cifar-10-batches-py \
+$ python cifar10_main.py --data-dir=gs://path/cifar-10-batches-py \
-                         --model_dir=gs://path/model_dir/ \
+                         --job-dir=gs://path/model_dir/ \
-                         --is_cpu_ps=True \
+                         --is-cpu-ps=True \
-                         --force_gpu_compatible=True \
+                         --force-gpu-compatible=True \
-                         --num_gpus=4 \
+                         --num-gpus=4 \
-                         --train_steps=40000 \
+                         --train-steps=40000 \
                         --sync=True \
-                         --run_experiment=True \
+                          \
-                         --num_workers=2
+                         --num-workers=2
 ```
 *Output:*
@@ -331,14 +331,13 @@ INFO:tensorflow:Saving dict for global step 1: accuracy = 0.0994, global_step =
 # Runs an Experiment in sync mode on 4 GPUs using CPU as parameter server for 40000 steps.
 # It will run evaluation a couple of times during training.
 # Make sure the model_dir is the same as defined on the TF_CONFIG.
-$ python cifar10_main.py --data_dir=gs://path/cifar-10-batches-py \
+$ python cifar10_main.py --data-dir=gs://path/cifar-10-batches-py \
-                         --model_dir=gs://path/model_dir/ \
+                         --job-dir=gs://path/model_dir/ \
-                         --is_cpu_ps=True \
+                         --is-cpu-ps=True \
-                         --force_gpu_compatible=True \
+                         --force-gpu-compatible=True \
-                         --num_gpus=4 \
+                         --num-gpus=4 \
-                         --train_steps=40000 \
+                         --train-steps=40000 \
                         --sync=True
-                         --run_experiment=True
 ```
 *Output:*
@@ -447,7 +446,7 @@ INFO:tensorflow:loss = 27.8453, step = 179 (18.893 sec)
 ```shell
 # Run this on ps:
 # The ps will not do training so most of the arguments won't affect the execution
-$ python cifar10_main.py --run_experiment=True --model_dir=gs://path/model_dir/
+$ python cifar10_main.py --job-dir=gs://path/model_dir/
 # There are more command line flags to play with; check cifar10_main.py for details.
 ```
@@ -480,7 +479,7 @@ You'll see something similar to this if you "point" TensorBoard to the `model_di
 # Check TensorBoard during training or after it.
 # Just point TensorBoard to the model_dir you chose on the previous step
 # by default the model_dir is "sentiment_analysis_output"
-$ tensorboard --log_dir="sentiment_analysis_output"
+$ tensorboard --log-dir="sentiment_analysis_output"
 ```
 ## Warnings

--- a/tutorials/image/cifar10_estimator/cifar10_main.py
+++ b/tutorials/image/cifar10_estimator/cifar10_main.py
@@ -29,13 +29,11 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
+import argparse
 import functools
 import operator
 import os
-from . import cifar10
-from . import cifar10_model
 import numpy as np
 from six.moves import xrange  # pylint: disable=redefined-builtin
 import tensorflow as tf
@@ -44,84 +42,11 @@ from tensorflow.python.training import basic_session_run_hooks
 from tensorflow.python.training import session_run_hook
 from tensorflow.python.training import training_util
+from . import cifar10
+from . import cifar10_model
 tf.logging.set_verbosity(tf.logging.INFO)
-FLAGS = tf.flags.FLAGS
-tf.flags.DEFINE_string('data_dir', '',
-                       'The directory where the CIFAR-10 input data is stored.')
-tf.flags.DEFINE_string('model_dir', '',
-                       'The directory where the model will be stored.')
-tf.flags.DEFINE_boolean('is_cpu_ps', True,
-                        'If using CPU as the parameter server.')
-tf.flags.DEFINE_integer('num_gpus', 1,
-                        'The number of gpus used. Uses only CPU if set to 0.')
-tf.flags.DEFINE_integer('num_layers', 44, 'The number of layers of the model.')
-tf.flags.DEFINE_integer('train_steps', 80000,
-                        'The number of steps to use for training.')
-tf.flags.DEFINE_integer('train_batch_size', 128, 'Batch size for training.')
-tf.flags.DEFINE_integer('eval_batch_size', 100, 'Batch size for validation.')
-tf.flags.DEFINE_float('momentum', 0.9, 'Momentum for MomentumOptimizer.')
-tf.flags.DEFINE_float('weight_decay', 2e-4, 'Weight decay for convolutions.')
-tf.flags.DEFINE_float('learning_rate', 0.1,
-                      'This is the inital learning rate value.'
-                      ' The learning rate will decrease during training.'
-                      ' For more details check the model_fn implementation'
-                      ' in this file.')
-tf.flags.DEFINE_boolean('use_distortion_for_training', True,
-                        'If doing image distortion for training.')
-tf.flags.DEFINE_boolean('run_experiment', False,
-                        'If True will run an experiment,'
-                        ' otherwise will run training and evaluation'
-                        ' using the estimator interface.'
-                        ' Experiments perform training on several workers in'
-                        ' parallel, in other words experiments know how to'
-                        ' invoke train and eval in a sensible fashion for'
-                        ' distributed training.')
-tf.flags.DEFINE_boolean('sync', False,
-                        'If true when running in a distributed environment'
-                        ' will run on sync mode.')
-tf.flags.DEFINE_integer('num_workers', 1, 'Number of workers.')
-# Perf flags
-tf.flags.DEFINE_integer('num_intra_threads', 1,
-                        'Number of threads to use for intra-op parallelism.'
-                        ' If set to 0, the system will pick an appropriate number.'
-                        ' The default is 1 since in this example CPU only handles'
-                        ' the input pipeline and gradient aggregation (when'
-                        ' --is_cpu_ps). Ops that could potentially benefit'
-                        ' from intra-op parallelism are scheduled to run on GPUs.')
-tf.flags.DEFINE_integer('num_inter_threads', 0,
-                        'Number of threads to use for inter-op'
-                        ' parallelism. If set to 0, the system will pick'
-                        ' an appropriate number.')
-tf.flags.DEFINE_boolean('force_gpu_compatible', False,
-                        'Whether to enable force_gpu_compatible in'
-                        ' GPU_Options. Check'
-                        ' tensorflow/core/protobuf/config.proto#L69'
-                        ' for details.')
-# Debugging flags
-tf.flags.DEFINE_boolean('log_device_placement', False,
-                        'Whether to log device placement.')
 class ExamplesPerSecondHook(session_run_hook.SessionRunHook):
  """Hook to print out examples per second.
@@ -221,19 +146,19 @@ class GpuParamServerDeviceSetter(object):
    return device_name
-def _create_device_setter(is_cpu_ps, worker, num_gpus):
+def _create_device_setter(avg_on_gpu, worker, num_gpus):
  """Create device setter object."""
-  if is_cpu_ps:
+  if avg_on_gpu:
+    gpus = ['/gpu:%d' % i for i in range(num_gpus)]
+    return GpuParamServerDeviceSetter(worker, gpus)
+  else:
    # tf.train.replica_device_setter supports placing variables on the CPU, all
    # on one GPU, or on ps_servers defined in a cluster_spec.
    return tf.train.replica_device_setter(
        worker_device=worker, ps_device='/cpu:0', ps_tasks=1)
-  else:
-    gpus = ['/gpu:%d' % i for i in range(num_gpus)]
-    return GpuParamServerDeviceSetter(worker, gpus)
+def get_model_fn(num_gpus, avg_on_gpu, num_workers):
-def _resnet_model_fn(features, labels, mode):
+  def _resnet_model_fn(features, labels, mode, params):
    """Resnet model body.
    Support single host, one or more GPU training. Parameter distribution can be
@@ -246,14 +171,13 @@ def _resnet_model_fn(features, labels, mode):
      features: a list of tensors, one for each tower
      labels: a list of tensors, one for each tower
      mode: ModeKeys.TRAIN or EVAL
+      params: Dictionary of Hyperparameters suitable for tuning
    Returns:
      A EstimatorSpec object.
    """
    is_training = (mode == tf.estimator.ModeKeys.TRAIN)
-  is_cpu_ps = FLAGS.is_cpu_ps
+    weight_decay = params['weight_decay']
-  num_gpus = FLAGS.num_gpus
+    momentum = params['momentum']
-  weight_decay = FLAGS.weight_decay
-  momentum = FLAGS.momentum
    tower_features = features
    tower_labels = labels
@@ -264,18 +188,19 @@ def _resnet_model_fn(features, labels, mode):
    if num_gpus != 0:
      for i in range(num_gpus):
        worker = '/gpu:%d' % i
-      device_setter = _create_device_setter(is_cpu_ps, worker, FLAGS.num_gpus)
+        device_setter = _create_device_setter(avg_on_gpu, worker, num_gpus)
        with tf.variable_scope('resnet', reuse=bool(i != 0)):
          with tf.name_scope('tower_%d' % i) as name_scope:
            with tf.device(device_setter):
              _tower_fn(is_training, weight_decay, tower_features[i],
                        tower_labels[i], tower_losses, tower_gradvars,
-                      tower_preds, False)
+                        tower_preds, False, params['num_layers'])
              if i == 0:
-              # Only trigger batch_norm moving mean and variance update from the
+                # Only trigger batch_norm moving mean and variance update from
-              # 1st tower. Ideally, we should grab the updates from all towers
+                # the 1st tower. Ideally, we should grab the updates from all
-              # but these stats accumulate extremely fast so we can ignore the
+                # towers but these stats accumulate extremely fast so we can
-              # other stats from the other towers without significant detriment.
+                # ignore the other stats from the other towers without
+                # significant detriment.
                update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS,
                                               name_scope)
    else:
@@ -287,11 +212,9 @@ def _resnet_model_fn(features, labels, mode):
    # Now compute global loss and gradients.
    gradvars = []
-  # parameter server here isn't necessarily one server storing the model params.
+    # Server that runs the ops to apply global gradient updates.
-  # (For gpu-as-ps case, model params are distributed evenly across all gpus.)
+    avg_device = '/gpu:0' if avg_on_gpu else '/cpu:0'
-  # It's the server that runs the ops to apply global gradient updates.
+    with tf.device(avg_device):
-  ps_device = '/cpu:0' if is_cpu_ps else '/gpu:0'
-  with tf.device(ps_device):
      with tf.name_scope('gradient_averaging'):
        loss = tf.reduce_mean(tower_losses, name='loss')
        for zipped_gradvars in zip(*tower_gradvars):
@@ -309,12 +232,12 @@ def _resnet_model_fn(features, labels, mode):
      # https://github.com/ppwwyyxx/tensorpack/blob/master/examples/ResNet/cifar10-resnet.py#L155
      # users could apply other scheduling.
      num_batches_per_epoch = cifar10.Cifar10DataSet.num_examples_per_epoch(
-        'train') // (FLAGS.train_batch_size * FLAGS.num_workers)
+          'train') // (params['train_batch_size'] * num_workers)
      boundaries = [
          num_batches_per_epoch * x
          for x in np.array([82, 123, 300], dtype=np.int64)
      ]
-    staged_lr = [FLAGS.learning_rate * x for x in [1, 0.1, 0.01, 0.002]]
+      staged_lr = [params['learning_rate'] * x for x in [1, 0.1, 0.01, 0.002]]
      learning_rate = tf.train.piecewise_constant(tf.train.get_global_step(),
                                                  boundaries, staged_lr)
@@ -325,10 +248,10 @@ def _resnet_model_fn(features, labels, mode):
          learning_rate=learning_rate, momentum=momentum)
      chief_hooks = []
-    if FLAGS.sync:
+      if params['sync']:
        optimizer = tf.train.SyncReplicasOptimizer(
            optimizer,
-          replicas_to_aggregate=FLAGS.num_workers)
+            replicas_to_aggregate=num_workers)
        sync_replicas_hook = optimizer.make_session_run_hook(True)
        chief_hooks.append(sync_replicas_hook)
@@ -361,7 +284,7 @@ def _resnet_model_fn(features, labels, mode):
 def _tower_fn(is_training, weight_decay, feature, label, tower_losses,
-              tower_gradvars, tower_preds, is_cpu):
+              tower_gradvars, tower_preds, is_cpu, num_layers):
  """Build computation tower for each device (CPU or GPU).
  Args:
@@ -376,7 +299,7 @@ def _tower_fn(is_training, weight_decay, feature, label, tower_losses,
  """
  data_format = 'channels_last' if is_cpu else 'channels_first'
  model = cifar10_model.ResNetCifar10(
-      FLAGS.num_layers, is_training=is_training, data_format=data_format)
+      num_layers, is_training=is_training, data_format=data_format)
  logits = model.forward_pass(feature, input_data_format='channels_last')
  tower_pred = {
      'classes': tf.argmax(input=logits, axis=1),
@@ -397,7 +320,8 @@ def _tower_fn(is_training, weight_decay, feature, label, tower_losses,
  tower_gradvars.append(zip(tower_grad, model_params))
-def input_fn(subset, num_shards):
+def input_fn(data_dir, subset, num_shards, batch_size,
+             use_distortion_for_training=True):
  """Create input graph for model.
  Args:
@@ -406,16 +330,9 @@ def input_fn(subset, num_shards):
  Returns:
    two lists of tensors for features and labels, each of num_shards length.
  """
-  if subset == 'train':
-    batch_size = FLAGS.train_batch_size
-  elif subset == 'validate' or subset == 'eval':
-    batch_size = FLAGS.eval_batch_size
-  else:
-    raise ValueError('Subset must be one of \'train\''
-                     ', \'validate\' and \'eval\'')
  with tf.device('/cpu:0'):
-    use_distortion = subset == 'train' and FLAGS.use_distortion_for_training
+    use_distortion = subset == 'train' and use_distortion_for_training
-    dataset = cifar10.Cifar10DataSet(FLAGS.data_dir, subset, use_distortion)
+    dataset = cifar10.Cifar10DataSet(data_dir, subset, use_distortion)
    image_batch, label_batch = dataset.make_batch(batch_size)
    if num_shards <= 1:
      # No GPU available or only 1 GPU.
@@ -439,20 +356,73 @@ def input_fn(subset, num_shards):
 # create experiment
-def get_experiment_fn(train_input_fn, eval_input_fn, train_steps, eval_steps,
+def get_experiment_fn(data_dir, num_gpus, is_gpu_ps,
-                      train_hooks):
+                      use_distortion_for_training=True):
  """Returns an Experiment function.
  Experiments perform training on several workers in parallel,
  in other words experiments know how to invoke train and eval in a sensible
-  fashion for distributed training.
+  fashion for distributed training. Arguments passed directly to this
+  function are not tunable, all other arguments should be passed within
+  tf.HParams, passed to the enclosed function.
+  Args:
+      data_dir: str. Location of the data for input_fns.
+      num_gpus: int. Number of GPUs on each worker.
+      is_gpu_ps: bool. If true, average gradients on GPUs.
+      use_distortion_for_training: bool. See cifar10.Cifar10DataSet.
+  Returns:
+      A function (tf.estimator.RunConfig, tf.contrib.training.HParams) ->
+      tf.contrib.learn.Experiment.
+      Suitable for use by tf.contrib.learn.learn_runner, which will run various
+      methods on Experiment (train, evaluate) based on information
+      about the current runner in `run_config`.
  """
  def _experiment_fn(run_config, hparams):
    """Returns an Experiment."""
-    del hparams  # Unused arg.
    # Create estimator.
-    classifier = tf.estimator.Estimator(model_fn=_resnet_model_fn,
+    train_input_fn = functools.partial(
-                                        config=run_config)
+        input_fn,
+        data_dir,
+        subset='train',
+        num_shards=num_gpus,
+        batch_size=hparams.train_batch_size,
+        use_distortion_for_training=use_distortion_for_training
+    )
+    eval_input_fn = functools.partial(
+        input_fn,
+        data_dir,
+        subset='eval',
+        batch_size=hparams.eval_batch_size,
+        num_shards=num_gpus
+    )
+    num_eval_examples = cifar10.Cifar10DataSet.num_examples_per_epoch('eval')
+    if num_eval_examples % hparams.eval_batch_size != 0:
+      raise ValueError('validation set size must be multiple of eval_batch_size')
+    train_steps = hparams.train_steps
+    eval_steps = num_eval_examples // hparams.eval_batch_size
+    examples_sec_hook = ExamplesPerSecondHook(
+      hparams.train_batch_size, every_n_steps=10)
+    tensors_to_log = {'learning_rate': 'learning_rate',
+                      'loss': 'gradient_averaging/loss'}
+    logging_hook = tf.train.LoggingTensorHook(
+      tensors=tensors_to_log, every_n_iter=100)
+    hooks = [logging_hook, examples_sec_hook]
+    classifier = tf.estimator.Estimator(
+        model_fn=get_model_fn(
+            num_gpus, is_gpu_ps, run_config.num_worker_replicas),
+        config=run_config,
+        params=vars(hparams)
+    )
    # Create experiment.
    experiment = tf.contrib.learn.Experiment(
        classifier,
@@ -461,89 +431,189 @@ def get_experiment_fn(train_input_fn, eval_input_fn, train_steps, eval_steps,
        train_steps=train_steps,
        eval_steps=eval_steps)
    # Adding hooks to be used by the estimator on training mode.
-    experiment.extend_train_hooks(train_hooks)
+    experiment.extend_train_hooks(hooks)
    return experiment
  return _experiment_fn
-def main(unused_argv):
+def main(job_dir,
+         data_dir,
+         num_gpus,
+         avg_on_gpu,
+         use_distortion_for_training,
+         log_device_placement,
+         num_intra_threads,
+         force_gpu_compatible,
+         **hparams):
  # The env variable is on deprecation path, default is set to off.
  os.environ['TF_SYNC_ON_FINISH'] = '0'
-  if FLAGS.num_gpus < 0:
-    raise ValueError(
-        'Invalid GPU count: \"num_gpus\" must be 0 or a positive integer.')
-  if FLAGS.num_gpus == 0 and not FLAGS.is_cpu_ps:
-    raise ValueError(
-        'No GPU available for use, must use CPU as parameter server.')
-  if (FLAGS.num_layers - 2) % 6 != 0:
-    raise ValueError('Invalid num_layers parameter.')
-  if FLAGS.num_gpus != 0 and FLAGS.train_batch_size % FLAGS.num_gpus != 0:
-    raise ValueError('train_batch_size must be multiple of num_gpus.')
-  if FLAGS.num_gpus != 0 and FLAGS.eval_batch_size % FLAGS.num_gpus != 0:
-    raise ValueError('eval_batch_size must be multiple of num_gpus.')
-  num_eval_examples = cifar10.Cifar10DataSet.num_examples_per_epoch('eval')
-  if num_eval_examples % FLAGS.eval_batch_size != 0:
-    raise ValueError('validation set size must be multiple of eval_batch_size')
-  train_input_fn = functools.partial(input_fn, subset='train',
-                                     num_shards=FLAGS.num_gpus)
-  eval_input_fn = functools.partial(input_fn, subset='eval',
-                                    num_shards=FLAGS.num_gpus)
-  train_steps = FLAGS.train_steps
-  eval_steps = num_eval_examples // FLAGS.eval_batch_size
  # Session configuration.
  sess_config = tf.ConfigProto(
      allow_soft_placement=True,
-      log_device_placement=FLAGS.log_device_placement,
+      log_device_placement=log_device_placement,
-      intra_op_parallelism_threads=FLAGS.num_intra_threads,
+      intra_op_parallelism_threads=num_intra_threads,
-      inter_op_parallelism_threads=FLAGS.num_inter_threads,
      gpu_options=tf.GPUOptions(
-          force_gpu_compatible=FLAGS.force_gpu_compatible
+          force_gpu_compatible=force_gpu_compatible
      )
  )
-  # Hooks that add extra logging that is useful to see the loss more often in
+  config = tf.contrib.learn.RunConfig(
-  # the console as well as examples per second.
+      session_config=sess_config,
-  tensors_to_log = {'learning_rate': 'learning_rate',
+      model_dir=job_dir)
-                    'loss': 'gradient_averaging/loss'}
-  logging_hook = tf.train.LoggingTensorHook(
-      tensors=tensors_to_log, every_n_iter=100)
-  examples_sec_hook = ExamplesPerSecondHook(
-      FLAGS.train_batch_size, every_n_steps=10)
-  hooks = [logging_hook, examples_sec_hook]
-  if FLAGS.run_experiment:
-    config = tf.contrib.learn.RunConfig(model_dir=FLAGS.model_dir)
-    config = config.replace(session_config=sess_config)
  tf.contrib.learn.learn_runner.run(
-        get_experiment_fn(train_input_fn, eval_input_fn,
+      get_experiment_fn(
-                          train_steps, eval_steps,
+          data_dir,
-                          hooks), run_config=config)
+          num_gpus,
+          avg_on_gpu,
+          use_distortion_for_training
+      ),
+      run_config=config,
+      hparams=tf.contrib.training.HParams(**hparams)
+  )
-  else:
-    config = tf.estimator.RunConfig()
-    config = config.replace(session_config=sess_config)
-    classifier = tf.estimator.Estimator(
-        model_fn=_resnet_model_fn, model_dir=FLAGS.model_dir, config=config)
-    print('Starting to train...')
+if __name__ == '__main__':
-    classifier.train(input_fn=train_input_fn,
+  parser = argparse.ArgumentParser()
-                     steps=train_steps,
+  parser.add_argument(
-                     hooks=hooks)
+      '--data-dir',
+      type=str,
+      required=True,
+      help='The directory where the CIFAR-10 input data is stored.'
+  )
+  parser.add_argument(
+      '--job-dir',
+      type=str,
+      required=True,
+      help='The directory where the model will be stored.'
+  )
+  parser.add_argument(
+      '--avg-on-gpu',
+      action='store_true',
+      default=False,
+      help='If present, use GPU to average gradients.'
+  )
+  parser.add_argument(
+      '--num-gpus',
+      type=int,
+      default=1,
+      help='The number of gpus used. Uses only CPU if set to 0.'
+  )
+  parser.add_argument(
+      '--num-layers',
+      type=int,
+      default=44,
+      help='The number of layers of the model.'
+  )
+  parser.add_argument(
+      '--train-steps',
+      type=int,
+      default=80000,
+      help='The number of steps to use for training.'
+  )
+  parser.add_argument(
+      '--train-batch-size',
+      type=int,
+      default=128,
+      help='Batch size for training.'
+  )
+  parser.add_argument(
+      '--eval-batch-size',
+      type=int,
+      default=100,
+      help='Batch size for validation.'
+  )
+  parser.add_argument(
+      '--momentum',
+      type=float,
+      default=0.9,
+      help='Momentum for MomentumOptimizer.'
+  )
+  parser.add_argument(
+      '--weight-decay',
+      type=float,
+      default=2e-4,
+      help='Weight decay for convolutions.'
+  )
+  parser.add_argument(
+      '--learning-rate',
+      type=float,
+      default=0.1,
+      help="""\
+      This is the inital learning rate value. The learning rate will decrease
+      during training. For more details check the model_fn implementation in
+      this file.\
+      """
+  )
+  parser.add_argument(
+      '--use-distortion-for-training',
+      type=bool,
+      default=True,
+      help='If doing image distortion for training.'
+  )
+  parser.add_argument(
+      '--sync',
+      action='store_true',
+      default=False,
+      help="""\
+      If present when running in a distributed environment will run on sync mode.\
+      """
+  )
+  parser.add_argument(
+      '--num-workers',
+      type=int,
+      default=1,
+      help='Number of workers.'
+  )
+  parser.add_argument(
+      '--num-intra-threads',
+      type=int,
+      default=1,
+      help="""\
+      Number of threads to use for intra-op parallelism. If set to 0, the
+      system will pick an appropriate number. The default is 1 since in this
+      example CPU only handles the input pipeline and gradient aggregation
+      (when --is-cpu-ps). Ops that could potentially benefit from intra-op
+      parallelism are scheduled to run on GPUs.\
+      """
+  )
+  parser.add_argument(
+      '--num-inter-threads',
+      type=int,
+      default=0,
+      help="""\
+      Number of threads to use for inter-op parallelism. If set to 0, the
+      system will pick an appropriate number.\
+      """
+  )
+  parser.add_argument(
+      '--force-gpu-compatible',
+      action='store_true',
+      default=False,
+      help="""\
+      Whether to enable force_gpu_compatible in GPU_Options. Check
+      tensorflow/core/protobuf/config.proto#L69 for details.\
+      """
+  )
+  parser.add_argument(
+      '--log-device-placement',
+      action='store_true',
+      default=False,
+      help='Whether to log device placement.'
+  )
+  args = parser.parse_args()
-    print('Starting to evaluate...')
+  if args.num_gpus < 0:
-    eval_results = classifier.evaluate(
+    raise ValueError(
-        input_fn=eval_input_fn,
+        'Invalid GPU count: \"num_gpus\" must be 0 or a positive integer.')
-        steps=eval_steps)
+  if args.num_gpus == 0 and not args.avg_on_gpu:
-    print(eval_results)
+    raise ValueError(
+        'No GPU available for use, must use CPU to average gradients.')
+  if (args.num_layers - 2) % 6 != 0:
+    raise ValueError('Invalid num_layers parameter.')
+  if args.num_gpus != 0 and args.train_batch_size % args.num_gpus != 0:
+    raise ValueError('train_batch_size must be multiple of num_gpus.')
+  if args.num_gpus != 0 and args.eval_batch_size % args.num_gpus != 0:
+    raise ValueError('eval_batch_size must be multiple of num_gpus.')
-if __name__ == '__main__':
+  main(**vars(args))
-  tf.app.run()
--- a/tutorials/image/cifar10_estimator/generate_cifar10_tfrecords.py
+++ b/tutorials/image/cifar10_estimator/generate_cifar10_tfrecords.py
@@ -22,19 +22,15 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
+import argparse
 import cPickle
 import os
 import tensorflow as tf
-FLAGS = tf.flags.FLAGS
+FLAGS = None
-tf.flags.DEFINE_string('input_dir', '',
-                       'Directory where CIFAR10 data is located.')
-tf.flags.DEFINE_string('output_dir', '',
-                       'Directory where TFRecords will be saved.'
-                       'The TFRecords will have the same name as'
-                       ' the CIFAR10 inputs + .tfrecords.')
 def _int64_feature(value):
@@ -91,4 +87,22 @@ def main(unused_argv):
 if __name__ == '__main__':
+  parser = argparse.ArgumentParser()
+  parser.add_argument(
+      '--input_dir',
+      type=str,
+      default='',
+      help='Directory where CIFAR10 data is located.'
+  )
+  parser.add_argument(
+      '--output_dir',
+      type=str,
+      default='',
+      help="""\
+      Directory where TFRecords will be saved.The TFRecords will have the same
+      name as the CIFAR10 inputs + .tfrecords.\
+      """
+  )
+  FLAGS = parser.parse_args()
  tf.app.run(main)
--- a/tutorials/image/cifar10_estimator/model_base.py
+++ b/tutorials/image/cifar10_estimator/model_base.py
@@ -23,12 +23,12 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
+import argparse
 import tensorflow as tf
-FLAGS = tf.flags.FLAGS
+FLAGS = None
-tf.flags.DEFINE_float('batch_norm_decay', 0.997, 'Decay for batch norm.')
-tf.flags.DEFINE_float('batch_norm_epsilon', 1e-5, 'Epsilon for batch norm.')
 class ResNet(object):