estimator_cifar_benchmark.py 3.98 KB
Newer Older
Shining Sun's avatar
Shining Sun committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
15
"""Executes Estimator benchmarks and accuracy tests."""
Shining Sun's avatar
Shining Sun committed
16
17
18

from __future__ import absolute_import
from __future__ import division
19
20
21
22
23
24
25
26
27
28
from __future__ import print_function

import os

from absl import flags
from absl.testing import flagsaver
import tensorflow as tf  # pylint: disable=g-bad-import-order

from official.resnet import cifar10_main as cifar_main

29
DATA_DIR = '/data/cifar10_data/cifar-10-batches-bin'
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113


class EstimatorCifar10BenchmarkTests(object):
  """Benchmarks and accuracy tests for Estimator ResNet56."""

  local_flags = None

  def __init__(self, output_dir=None):
    self.oss_report_object = None
    self.output_dir = output_dir

  def resnet56_1_gpu(self):
    """Test layers model with Estimator and distribution strategies."""
    self._setup()
    flags.FLAGS.num_gpus = 1
    flags.FLAGS.data_dir = DATA_DIR
    flags.FLAGS.batch_size = 128
    flags.FLAGS.train_epochs = 182
    flags.FLAGS.model_dir = self._get_model_dir('resnet56_1_gpu')
    flags.FLAGS.resnet_size = 56
    flags.FLAGS.dtype = 'fp32'
    stats = cifar_main.run_cifar(flags.FLAGS)
    self._fill_report_object(stats)

  def resnet56_fp16_1_gpu(self):
    """Test layers FP16 model with Estimator and distribution strategies."""
    self._setup()
    flags.FLAGS.num_gpus = 1
    flags.FLAGS.data_dir = DATA_DIR
    flags.FLAGS.batch_size = 128
    flags.FLAGS.train_epochs = 182
    flags.FLAGS.model_dir = self._get_model_dir('resnet56_fp16_1_gpu')
    flags.FLAGS.resnet_size = 56
    flags.FLAGS.dtype = 'fp16'
    stats = cifar_main.run_cifar(flags.FLAGS)
    self._fill_report_object(stats)

  def resnet56_2_gpu(self):
    """Test layers model with Estimator and dist_strat. 2 GPUs."""
    self._setup()
    flags.FLAGS.num_gpus = 1
    flags.FLAGS.data_dir = DATA_DIR
    flags.FLAGS.batch_size = 128
    flags.FLAGS.train_epochs = 182
    flags.FLAGS.model_dir = self._get_model_dir('resnet56_2_gpu')
    flags.FLAGS.resnet_size = 56
    flags.FLAGS.dtype = 'fp32'
    stats = cifar_main.run_cifar(flags.FLAGS)
    self._fill_report_object(stats)

  def resnet56_fp16_2_gpu(self):
    """Test layers FP16 model with Estimator and dist_strat. 2 GPUs."""
    self._setup()
    flags.FLAGS.num_gpus = 2
    flags.FLAGS.data_dir = DATA_DIR
    flags.FLAGS.batch_size = 128
    flags.FLAGS.train_epochs = 182
    flags.FLAGS.model_dir = self._get_model_dir('resnet56_fp16_2_gpu')
    flags.FLAGS.resnet_size = 56
    flags.FLAGS.dtype = 'fp16'
    stats = cifar_main.run_cifar(flags.FLAGS)
    self._fill_report_object(stats)

  def _fill_report_object(self, stats):
    # Also "available global_step"
    if self.oss_report_object:
      self.oss_report_object.top_1 = stats['accuracy'].item()
      self.oss_report_object.top_5 = stats['accuracy_top_5'].item()
    else:
      raise ValueError('oss_report_object has not been set.')

  def _get_model_dir(self, folder_name):
    return os.path.join(self.output_dir, folder_name)

  def _setup(self):
    tf.logging.set_verbosity(tf.logging.DEBUG)
    if EstimatorCifar10BenchmarkTests.local_flags is None:
      cifar_main.define_cifar_flags()
      # Loads flags to get defaults to then override.
      flags.FLAGS(['foo'])
      saved_flag_values = flagsaver.save_flag_values()
      EstimatorCifar10BenchmarkTests.local_flags = saved_flag_values
      return
    flagsaver.restore_flag_values(EstimatorCifar10BenchmarkTests.local_flags)