Adding quantization support for deeplab (#6681)

* deeplab quantize * Fix bug in train.py * Create quantize.md

Adding quantization support for deeplab (#6681)
* deeplab quantize * Fix bug in train.py * Create quantize.md
59c218f5 · Yukun Zhu · aquariusjay · a182abc1 · 59c218f5 · 59c218f5
Commit 59c218f5 authored May 06, 2019 by Yukun Zhu Committed by aquariusjay May 06, 2019
10 changed files
--- a/research/deeplab/eval.py
+++ b/research/deeplab/eval.py
@@ -38,8 +38,8 @@ flags.DEFINE_string('checkpoint_dir', None, 'Directory of model checkpoints.')
 flags.DEFINE_integer('eval_batch_size', 1,
                     'The number of images in each batch during evaluation.')

-flags.DEFINE_multi_integer('eval_crop_size', [513, 513],
-                           'Image crop size [height, width] for evaluation.')
+flags.DEFINE_list('eval_crop_size', '513,513',
+                  'Image crop size [height, width] for evaluation.')

 flags.DEFINE_integer('eval_interval_secs', 60 * 5,
                     'How often (in seconds) to run evaluation.')
@@ -61,6 +61,10 @@ flags.DEFINE_multi_float('eval_scales', [1.0],
 flags.DEFINE_bool('add_flipped_images', False,
                  'Add flipped images for evaluation or not.')

+flags.DEFINE_integer(
+    'quantize_delay_step', -1,
+    'Steps to start quantized training. If < 0, will not quantize model.')
+
 # Dataset settings.

 flags.DEFINE_string('dataset', 'pascal_voc_seg',
@@ -84,7 +88,7 @@ def main(unused_argv):
      split_name=FLAGS.eval_split,
      dataset_dir=FLAGS.dataset_dir,
      batch_size=FLAGS.eval_batch_size,
-      crop_size=FLAGS.eval_crop_size,
+      crop_size=map(int, FLAGS.eval_crop_size),
      min_resize_value=FLAGS.min_resize_value,
      max_resize_value=FLAGS.max_resize_value,
      resize_factor=FLAGS.resize_factor,
@@ -102,15 +106,15 @@ def main(unused_argv):

    model_options = common.ModelOptions(
        outputs_to_num_classes={common.OUTPUT_TYPE: dataset.num_of_classes},
-        crop_size=FLAGS.eval_crop_size,
+        crop_size=map(int, FLAGS.eval_crop_size),
        atrous_rates=FLAGS.atrous_rates,
        output_stride=FLAGS.output_stride)

    # Set shape in order for tf.contrib.tfprof.model_analyzer to work properly.
    samples[common.IMAGE].set_shape(
        [FLAGS.eval_batch_size,
-         FLAGS.eval_crop_size[0],
-         FLAGS.eval_crop_size[1],
+         int(FLAGS.eval_crop_size[0]),
+         int(FLAGS.eval_crop_size[1]),
         3])
    if tuple(FLAGS.eval_scales) == (1.0,):
      tf.logging.info('Performing single-scale test.')
@@ -118,6 +122,10 @@ def main(unused_argv):
                                         image_pyramid=FLAGS.image_pyramid)
    else:
      tf.logging.info('Performing multi-scale test.')
+      if FLAGS.quantize_delay_step >= 0:
+        raise ValueError(
+            'Quantize mode is not supported with multi-scale test.')
+
      predictions = model.predict_labels_multi_scale(
          samples[common.IMAGE],
          model_options=model_options,
@@ -154,6 +162,9 @@ def main(unused_argv):
    if FLAGS.max_number_of_evaluations > 0:
      num_eval_iters = FLAGS.max_number_of_evaluations

+    if FLAGS.quantize_delay_step >= 0:
+      tf.contrib.quantize.create_eval_graph()
+
    tf.contrib.tfprof.model_analyzer.print_model_analysis(
        tf.get_default_graph(),
        tfprof_options=tf.contrib.tfprof.model_analyzer.

--- a/research/deeplab/export_model.py
+++ b/research/deeplab/export_model.py
@@ -53,6 +53,10 @@ flags.DEFINE_multi_float('inference_scales', [1.0],
 flags.DEFINE_bool('add_flipped_images', False,
                  'Add flipped images during inference or not.')

+flags.DEFINE_integer(
+    'quantize_delay_step', -1,
+    'Steps to start quantized training. If < 0, will not quantize model.')
+
 flags.DEFINE_bool('save_inference_graph', False,
                  'Save inference graph in text proto.')

@@ -124,6 +128,9 @@ def main(unused_argv):
          image_pyramid=FLAGS.image_pyramid)
    else:
      tf.logging.info('Exported model performs multi-scale inference.')
+      if FLAGS.quantize_delay_step >= 0:
+        raise ValueError(
+            'Quantize mode is not supported with multi-scale test.')
      predictions = model.predict_labels_multi_scale(
          image,
          model_options=model_options,
@@ -150,7 +157,10 @@ def main(unused_argv):
    semantic_predictions = _resize_label(semantic_predictions, image_size)
    semantic_predictions = tf.identity(semantic_predictions, name=_OUTPUT_NAME)

-    saver = tf.train.Saver(tf.model_variables())
+    if FLAGS.quantize_delay_step >= 0:
+      tf.contrib.quantize.create_eval_graph()
+
+    saver = tf.train.Saver(tf.all_variables())

    dirname = os.path.dirname(FLAGS.export_path)
    tf.gfile.MakeDirs(dirname)

--- a/research/deeplab/g3doc/ade20k.md
+++ b/research/deeplab/g3doc/ade20k.md
@@ -57,8 +57,7 @@ python deeplab/train.py \
    --atrous_rates=18 \
    --output_stride=16 \
    --decoder_output_stride=4 \
-    --train_crop_size=513 \
-    --train_crop_size=513 \
+    --train_crop_size="513,513" \
    --train_batch_size=4 \
    --min_resize_value=513 \
    --max_resize_value=513 \

--- a/research/deeplab/g3doc/cityscapes.md
+++ b/research/deeplab/g3doc/cityscapes.md
@@ -50,8 +50,7 @@ python deeplab/train.py \
    --atrous_rates=18 \
    --output_stride=16 \
    --decoder_output_stride=4 \
-    --train_crop_size=769 \
-    --train_crop_size=769 \
+    --train_crop_size="769,769" \
    --train_batch_size=1 \
    --dataset="cityscapes" \
    --tf_initial_checkpoint=${PATH_TO_INITIAL_CHECKPOINT} \
@@ -103,8 +102,7 @@ python deeplab/eval.py \
    --atrous_rates=18 \
    --output_stride=16 \
    --decoder_output_stride=4 \
-    --eval_crop_size=1025 \
-    --eval_crop_size=2049 \
+    --eval_crop_size="1025,2049" \
    --dataset="cityscapes" \
    --checkpoint_dir=${PATH_TO_CHECKPOINT} \
    --eval_logdir=${PATH_TO_EVAL_DIR} \
@@ -130,8 +128,7 @@ python deeplab/vis.py \
    --atrous_rates=18 \
    --output_stride=16 \
    --decoder_output_stride=4 \
-    --vis_crop_size=1025 \
-    --vis_crop_size=2049 \
+    --vis_crop_size="1025,2049" \
    --dataset="cityscapes" \
    --colormap_type="cityscapes" \
    --checkpoint_dir=${PATH_TO_CHECKPOINT} \

--- a/research/deeplab/g3doc/pascal.md
+++ b/research/deeplab/g3doc/pascal.md
@@ -52,8 +52,7 @@ python deeplab/train.py \
    --atrous_rates=18 \
    --output_stride=16 \
    --decoder_output_stride=4 \
-    --train_crop_size=513 \
-    --train_crop_size=513 \
+    --train_crop_size="513,513" \
    --train_batch_size=1 \
    --dataset="pascal_voc_seg" \
    --tf_initial_checkpoint=${PATH_TO_INITIAL_CHECKPOINT} \
@@ -96,8 +95,7 @@ python deeplab/eval.py \
    --atrous_rates=18 \
    --output_stride=16 \
    --decoder_output_stride=4 \
-    --eval_crop_size=513 \
-    --eval_crop_size=513 \
+    --eval_crop_size="513,513" \
    --dataset="pascal_voc_seg" \
    --checkpoint_dir=${PATH_TO_CHECKPOINT} \
    --eval_logdir=${PATH_TO_EVAL_DIR} \
@@ -123,8 +121,7 @@ python deeplab/vis.py \
    --atrous_rates=18 \
    --output_stride=16 \
    --decoder_output_stride=4 \
-    --vis_crop_size=513 \
-    --vis_crop_size=513 \
+    --vis_crop_size="513,513" \
    --dataset="pascal_voc_seg" \
    --checkpoint_dir=${PATH_TO_CHECKPOINT} \
    --vis_logdir=${PATH_TO_VIS_DIR} \

--- a/research/deeplab/g3doc/quantize.md
+++ b/research/deeplab/g3doc/quantize.md
+# Quantize DeepLab model for faster on-device inference
+
+This page describes the steps required to quantize DeepLab model and convert it
+to TFLite for on-device inference. The main steps include:
+
+1.  Quantization-aware training
+1.  Exporting model
+1.  Converting to TFLite FlatBuffer
+
+We provide details for each step below.
+
+## Quantization-aware training
+
+DeepLab supports two approaches to quantize your model.
+
+1.  **[Recommended]** Training a non-quantized model until convergence. Then
+    fine-tune the trained float model with quantization using a small learning
+    rate (on PASCAL we use the value of 3e-5) . This fine-tuning step usually
+    takes 2k to 5k steps to converge.
+
+1.  Training a deeplab float model with delayed quantization. Usually we delay
+    quantization until the last a few thousand steps in training.
+
+In the current implementation, quantization is only supported with 1)
+`num_clones=1` for training and 2) single scale inference for evaluation,
+visualization and model export. To get the best performance for the quantized
+model, we strongly recommend to train the float model with larger `num_clones`
+and then fine-tune the model with a single clone.
+
+Here shows the commandline to quantize deeplab model trained on PASCAL VOC
+dataset using fine-tuning:
+
+```
+# From tensorflow/models/research/
+python deeplab/train.py \
+    --logtostderr \
+    --training_number_of_steps=3000 \
+    --train_split="train" \
+    --model_variant="mobilenet_v2" \
+    --output_stride=16 \
+    --train_crop_size="513,513" \
+    --train_batch_size=8 \
+    --base_learning_rate=3e-5 \
+    --dataset="pascal_voc_seg" \
+    --initialize_last_layer \
+    --quantize_delay_step=0 \
+    --tf_initial_checkpoint=${PATH_TO_TRAINED_FLOAT_MODEL} \
+    --train_logdir=${PATH_TO_TRAIN_DIR} \
+    --dataset_dir=${PATH_TO_DATASET}
+```
+
+## Converting to TFLite FlatBuffer
+
+First use the following commandline to export your trained model.
+
+```
+# From tensorflow/models/research/
+python deeplab/export_model.py \
+    --checkpoint_path=${CHECKPOINT_PATH} \
+    --quantize_delay_step=0 \
+    --export_path=${OUTPUT_DIR}/frozen_inference_graph.pb
+
+```
+
+Commandline below shows how to convert exported graphdef to TFlite model.
+
+```
+tflite_convert \
+  --graph_def_file=${OUTPUT_DIR}/frozen_inference_graph.pb \
+  --output_file=${OUTPUT_DIR}/frozen_inference_graph.tflite \
+  --output_format=TFLITE \
+  --input_shape=1,513,513,3 \
+  --input_arrays="MobilenetV2/MobilenetV2/input" \
+  --inference_type=QUANTIZED_UINT8 \
+  --inference_input_type=QUANTIZED_UINT8 \
+  --std_dev_values=128 \
+  --mean_values=128 \
+  --change_concat_input_ranges=true \
+  --output_arrays="ArgMax"
+```
+
+**[Important]** Note that converted model expects 513x513 RGB input and doesn't
+include preprocessing (resize and pad input image) and post processing (crop
+padded region and resize to original input size). These steps can be implemented
+outside of TFlite model.
+
+## Quantized model on PASCAL VOC
+
+We provide float and quantized checkpoints that have been pretrained on VOC 2012
+train_aug set, using MobileNet-v2 backbone with different depth multipliers.
+Quantized model usually have 1% decay in mIoU.
+
+For quantized (8bit) model, un-tar'ed directory includes:
+
+*   a frozen inference graph (frozen_inference_graph.pb)
+
+*   a checkpoint (model.ckpt.data*, model.ckpt.index)
+
+*   a converted TFlite FlatBuffer file (frozen_inference_graph.tflite)
+
+Checkpoint name                                                                                                                              | Eval OS | Eval scales | Left-right Flip | Multiply-Adds | Quantize | PASCAL mIOU  | File Size
+-------------------------------------------------------------------------------------------------------------------------------------------- | :-----: | :---------: | :-------------: | :-----------: | :------: | :----------: | :-------:
+[mobilenetv2_dm05_coco_voc_trainaug](http://download.tensorflow.org/models/deeplabv3_mnv2_dm05_pascal_trainaug_2018_10_01.tar.gz)            | 16      | [1.0]       | No              | 0.88B         | No       | 70.19% (val) | 7.6MB
+[mobilenetv2_dm05_coco_voc_trainaug_8bit](http://download.tensorflow.org/models/deeplabv3_mnv2_dm05_pascal_train_aug_8bit_2019_04_26.tar.gz) | 16      | [1.0]       | No              | 0.88B         | Yes      | 69.65% (val) | 8.2MB
+[mobilenetv2_coco_voc_trainaug](http://download.tensorflow.org/models/deeplabv3_mnv2_pascal_train_aug_2018_01_29.tar.gz)                     | 16      | [1.0]       | No              | 2.75B         | No       | 75.32% (val) | 23MB
+[mobilenetv2_coco_voc_trainaug_8bit](http://download.tensorflow.org/models/deeplabv3_mnv2_pascal_train_aug_8bit_2019_04_26.tar.gz)           | 16      | [1.0]       | No              | 2.75B         | Yes      | 74.26% (val) | 24MB
+
+Note that you might need the nightly build of TensorFlow (see
+[here](https://www.tensorflow.org/install) for install instructions) to convert
+above quantized model to TFLite.
--- a/research/deeplab/local_test.sh
+++ b/research/deeplab/local_test.sh
@@ -82,8 +82,7 @@ python "${WORK_DIR}"/train.py \
  --atrous_rates=18 \
  --output_stride=16 \
  --decoder_output_stride=4 \
-  --train_crop_size=513 \
-  --train_crop_size=513 \
+  --train_crop_size="513,513" \
  --train_batch_size=4 \
  --training_number_of_steps="${NUM_ITERATIONS}" \
  --fine_tune_batch_norm=true \
@@ -103,8 +102,7 @@ python "${WORK_DIR}"/eval.py \
  --atrous_rates=18 \
  --output_stride=16 \
  --decoder_output_stride=4 \
-  --eval_crop_size=513 \
-  --eval_crop_size=513 \
+  --eval_crop_size="513,513" \
  --checkpoint_dir="${TRAIN_LOGDIR}" \
  --eval_logdir="${EVAL_LOGDIR}" \
  --dataset_dir="${PASCAL_DATASET}" \
@@ -120,8 +118,7 @@ python "${WORK_DIR}"/vis.py \
  --atrous_rates=18 \
  --output_stride=16 \
  --decoder_output_stride=4 \
-  --vis_crop_size=513 \
-  --vis_crop_size=513 \
+  --vis_crop_size="513,513" \
  --checkpoint_dir="${TRAIN_LOGDIR}" \
  --vis_logdir="${VIS_LOGDIR}" \
  --dataset_dir="${PASCAL_DATASET}" \

--- a/research/deeplab/local_test_mobilenetv2.sh
+++ b/research/deeplab/local_test_mobilenetv2.sh
@@ -79,8 +79,7 @@ python "${WORK_DIR}"/train.py \
  --train_split="trainval" \
  --model_variant="mobilenet_v2" \
  --output_stride=16 \
-  --train_crop_size=513 \
-  --train_crop_size=513 \
+  --train_crop_size="513,513" \
  --train_batch_size=4 \
  --training_number_of_steps="${NUM_ITERATIONS}" \
  --fine_tune_batch_norm=true \
@@ -95,8 +94,7 @@ python "${WORK_DIR}"/eval.py \
  --logtostderr \
  --eval_split="val" \
  --model_variant="mobilenet_v2" \
-  --eval_crop_size=513 \
-  --eval_crop_size=513 \
+  --eval_crop_size="513,513" \
  --checkpoint_dir="${TRAIN_LOGDIR}" \
  --eval_logdir="${EVAL_LOGDIR}" \
  --dataset_dir="${PASCAL_DATASET}" \
@@ -107,8 +105,7 @@ python "${WORK_DIR}"/vis.py \
  --logtostderr \
  --vis_split="val" \
  --model_variant="mobilenet_v2" \
-  --vis_crop_size=513 \
-  --vis_crop_size=513 \
+  --vis_crop_size="513,513" \
  --checkpoint_dir="${TRAIN_LOGDIR}" \
  --vis_logdir="${VIS_LOGDIR}" \
  --dataset_dir="${PASCAL_DATASET}" \

--- a/research/deeplab/train.py
+++ b/research/deeplab/train.py
@@ -107,8 +107,8 @@ flags.DEFINE_integer('train_batch_size', 8,
 flags.DEFINE_float('weight_decay', 0.00004,
                   'The value of the weight decay for training.')

-flags.DEFINE_multi_integer('train_crop_size', [513, 513],
-                           'Image crop size [height, width] during training.')
+flags.DEFINE_list('train_crop_size', '513,513',
+                  'Image crop size [height, width] during training.')

 flags.DEFINE_float(
    'last_layer_gradient_multiplier', 1.0,
@@ -166,7 +166,6 @@ flags.DEFINE_integer('output_stride', 16,
                     'The ratio of input to output spatial resolution.')

 # Hard example mining related flags.
-
 flags.DEFINE_integer(
    'hard_example_mining_step', 0,
    'The training step in which exact hard example mining kicks off. Note we '
@@ -181,6 +180,11 @@ flags.DEFINE_float(
    'The top k percent pixels (in terms of the loss values) used to compute '
    'loss during training. This is useful for hard pixel mining.')

+# Quantization setting.
+flags.DEFINE_integer(
+    'quantize_delay_step', -1,
+    'Steps to start quantized training. If < 0, will not quantize model.')
+
 # Dataset settings.
 flags.DEFINE_string('dataset', 'pascal_voc_seg',
                    'Name of the segmentation dataset.')
@@ -209,7 +213,7 @@ def _build_deeplab(iterator, outputs_to_num_classes, ignore_label):

  model_options = common.ModelOptions(
      outputs_to_num_classes=outputs_to_num_classes,
-      crop_size=FLAGS.train_crop_size,
+      crop_size=map(int, FLAGS.train_crop_size),
      atrous_rates=FLAGS.atrous_rates,
      output_stride=FLAGS.output_stride)

@@ -344,39 +348,46 @@ def _train_deeplab_model(iterator, num_of_classes, ignore_label):
    summary_op: An operation to log the summaries.
  """
  global_step = tf.train.get_or_create_global_step()
-  summaries = []

  learning_rate = train_utils.get_model_learning_rate(
      FLAGS.learning_policy, FLAGS.base_learning_rate,
      FLAGS.learning_rate_decay_step, FLAGS.learning_rate_decay_factor,
      FLAGS.training_number_of_steps, FLAGS.learning_power,
      FLAGS.slow_start_step, FLAGS.slow_start_learning_rate)
-  summaries.append(tf.summary.scalar('learning_rate', learning_rate))
+  tf.summary.scalar('learning_rate', learning_rate)

  optimizer = tf.train.MomentumOptimizer(learning_rate, FLAGS.momentum)

+  tower_losses = []
  tower_grads = []
-  tower_summaries = None
  for i in range(FLAGS.num_clones):
    with tf.device('/gpu:%d' % i):
-      with tf.name_scope('clone_%d' % i) as scope:
+      # First tower has default name scope.
+      name_scope = ('clone_%d' % i) if i else ''
+      with tf.name_scope(name_scope) as scope:
        loss = _tower_loss(
            iterator=iterator,
            num_of_classes=num_of_classes,
            ignore_label=ignore_label,
            scope=scope,
            reuse_variable=(i != 0))
-        grads = optimizer.compute_gradients(loss)
-        tower_grads.append(grads)
+        tower_losses.append(loss)
+
+  if FLAGS.quantize_delay_step >= 0:
+    if FLAGS.num_clones > 1:
+      raise ValueError('Quantization doesn\'t support multi-clone yet.')
+    tf.contrib.quantize.create_training_graph(
+        quant_delay=FLAGS.quantize_delay_step)

-        # Retain the summaries from the first tower.
-        if not i:
-          tower_summaries = tf.summary.merge_all(scope=scope)
+  for i in range(FLAGS.num_clones):
+    with tf.device('/gpu:%d' % i):
+      name_scope = ('clone_%d' % i) if i else ''
+      with tf.name_scope(name_scope) as scope:
+        grads = optimizer.compute_gradients(tower_losses[i])
+        tower_grads.append(grads)

  with tf.device('/cpu:0'):
    grads_and_vars = _average_gradients(tower_grads)
-    if tower_summaries is not None:
-      summaries.append(tower_summaries)

    # Modify the gradients for biases and last layer variables.
    last_layers = model.get_extra_layer_scopes(
@@ -407,11 +418,12 @@ def _train_deeplab_model(iterator, num_of_classes, ignore_label):
        lambda: tf.Print(total_loss, [total_loss], 'Total loss is :'),
        lambda: total_loss)

-    summaries.append(tf.summary.scalar('total_loss', total_loss))
-
+    tf.summary.scalar('total_loss', total_loss)
    with tf.control_dependencies([update_op]):
      train_tensor = tf.identity(total_loss, name='train_op')
-    summary_op = tf.summary.merge(summaries)
+
+    # Excludes summaries from towers other than the first one.
+    summary_op = tf.summary.merge_all(scope='(?!clone_)')

  return train_tensor, summary_op

@@ -434,7 +446,7 @@ def main(unused_argv):
          split_name=FLAGS.train_split,
          dataset_dir=FLAGS.dataset_dir,
          batch_size=clone_batch_size,
-          crop_size=FLAGS.train_crop_size,
+          crop_size=map(int, FLAGS.train_crop_size),
          min_resize_value=FLAGS.min_resize_value,
          max_resize_value=FLAGS.max_resize_value,
          resize_factor=FLAGS.resize_factor,
@@ -471,7 +483,8 @@ def main(unused_argv):
          summary_op=summary_op,
      )

-      stop_hook = tf.train.StopAtStepHook(FLAGS.training_number_of_steps)
+      stop_hook = tf.train.StopAtStepHook(
+          last_step=FLAGS.training_number_of_steps)

      profile_dir = FLAGS.profile_logdir
      if profile_dir is not None:

--- a/research/deeplab/vis.py
+++ b/research/deeplab/vis.py
@@ -43,8 +43,8 @@ flags.DEFINE_string('checkpoint_dir', None, 'Directory of model checkpoints.')
 flags.DEFINE_integer('vis_batch_size', 1,
                     'The number of images in each batch during evaluation.')

-flags.DEFINE_multi_integer('vis_crop_size', [513, 513],
-                           'Crop size [height, width] for visualization.')
+flags.DEFINE_list('vis_crop_size', '513,513',
+                  'Crop size [height, width] for visualization.')

 flags.DEFINE_integer('eval_interval_secs', 60 * 5,
                     'How often (in seconds) to run evaluation.')
@@ -66,6 +66,10 @@ flags.DEFINE_multi_float('eval_scales', [1.0],
 flags.DEFINE_bool('add_flipped_images', False,
                  'Add flipped images for evaluation or not.')

+flags.DEFINE_integer(
+    'quantize_delay_step', -1,
+    'Steps to start quantized training. If < 0, will not quantize model.')
+
 # Dataset settings.

 flags.DEFINE_string('dataset', 'pascal_voc_seg',
@@ -189,7 +193,7 @@ def main(unused_argv):
      split_name=FLAGS.vis_split,
      dataset_dir=FLAGS.dataset_dir,
      batch_size=FLAGS.vis_batch_size,
-      crop_size=FLAGS.vis_crop_size,
+      crop_size=map(int, FLAGS.vis_crop_size),
      min_resize_value=FLAGS.min_resize_value,
      max_resize_value=FLAGS.max_resize_value,
      resize_factor=FLAGS.resize_factor,
@@ -218,7 +222,7 @@ def main(unused_argv):

    model_options = common.ModelOptions(
        outputs_to_num_classes={common.OUTPUT_TYPE: dataset.num_of_classes},
-        crop_size=FLAGS.vis_crop_size,
+        crop_size=map(int, FLAGS.vis_crop_size),
        atrous_rates=FLAGS.atrous_rates,
        output_stride=FLAGS.output_stride)

@@ -230,6 +234,9 @@ def main(unused_argv):
          image_pyramid=FLAGS.image_pyramid)
    else:
      tf.logging.info('Performing multi-scale test.')
+      if FLAGS.quantize_delay_step >= 0:
+        raise ValueError(
+            'Quantize mode is not supported with multi-scale test.')
      predictions = model.predict_labels_multi_scale(
          samples[common.IMAGE],
          model_options=model_options,
@@ -259,6 +266,10 @@ def main(unused_argv):
                                 method=tf.image.ResizeMethod.NEAREST_NEIGHBOR,
                                 align_corners=True), 3)

+    tf.train.get_or_create_global_step()
+    if FLAGS.quantize_delay_step >= 0:
+      tf.contrib.quantize.create_eval_graph()
+
    num_iteration = 0
    max_num_iteration = FLAGS.max_number_of_iterations

@@ -274,8 +285,6 @@ def main(unused_argv):
                                                       time.gmtime()))
      tf.logging.info('Visualizing with model %s', checkpoint_path)

-      tf.train.get_or_create_global_step()
-
      scaffold = tf.train.Scaffold(init_op=tf.global_variables_initializer())
      session_creator = tf.train.ChiefSessionCreator(
          scaffold=scaffold,