"TensorFlow2x/vscode:/vscode.git/clone" did not exist on "c056df7823f6c8d5ad4234870596871f9ece9df1"
benchmark.py 8.02 KB
Newer Older
huchen's avatar
huchen committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Execute benchmark."""
from __future__ import print_function

import argparse
import json
import logging
import multiprocessing
import os
import re
import sys
import time

import perfzero.benchmark_method_runner as benchmark_method_runner
import perfzero.perfzero_config as perfzero_config
qianyj's avatar
qianyj committed
29
import perfzero.tpu_runtime_utils as tpu_runtime_utils
huchen's avatar
huchen committed
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import perfzero.utils as utils


class BenchmarkRunner(object):
  """Execute benchmark and report results."""

  def __init__(self, config):
    self.config = config
    self.project_dir = os.path.abspath(
        os.path.dirname(os.path.dirname(__file__)))
    self.workspace_dir = os.path.join(self.project_dir, config.workspace)
    self.site_packages_dir = os.path.join(self.workspace_dir, 'site-packages')
    self.root_output_dir = os.path.join(self.workspace_dir, 'output')
    self.benchmark_execution_time = {}

  def _setup(self):
    """Download data and checkout git repository."""

    # Acticate gcloud service
    start_time = time.time()
    utils.setup_python_path(self.site_packages_dir, self.config.python_path_str)
    utils.active_gcloud_service(self.config.gcloud_key_file_url,
                                self.workspace_dir)
    utils.make_dir_if_not_exist(self.root_output_dir)
    self.benchmark_execution_time['activate_gcloud_service'] = (
        time.time() - start_time)

    # Download data
    start_time = time.time()
    utils.download_data(utils.parse_data_downloads_str(
        self.config.root_data_dir, self.config.gcs_downloads_str))
    utils.download_data(utils.parse_data_downloads_str(
        self.config.root_data_dir, self.config.data_downloads_str))
    self.benchmark_execution_time['download_data'] = time.time() - start_time

    # Checkout git repositories
    start_time = time.time()
    site_package_info = utils.checkout_git_repos(
        self.config.get_git_repos(self.site_packages_dir),
        self.config.use_cached_site_packages)
    self.benchmark_execution_time['checkout_repository'] = (
        time.time() - start_time)

    # Start cloud TPU.
    if self.config.tpu_parameters is not None:
      start_time = time.time()
      utils.setup_tpu(self.config.tpu_parameters)
qianyj's avatar
qianyj committed
77
78
      tpu_info = tpu_runtime_utils.configure_tpu(self.config.tpu_parameters)
      site_package_info['tpu_version'] = tpu_info
huchen's avatar
huchen committed
79
      self.benchmark_execution_time['start_tpu'] = time.time() - start_time
qianyj's avatar
qianyj committed
80
      
huchen's avatar
huchen committed
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
    self.stream_handler = logging.StreamHandler(sys.stdout)
    self.stream_handler.setFormatter(
        logging.Formatter('%(asctime)s %(levelname)s: %(message)s'))
    logging.getLogger().addHandler(self.stream_handler)
    return site_package_info

  def _get_benchmark_methods(self):
    """Returns list of benchmark methods to execute."""
    filter_prefix = 'filter:'
    benchmark_methods = []
    for benchmark_method_pattern in self.config.benchmark_method_patterns:
      if filter_prefix not in benchmark_method_pattern:
        benchmark_methods.append(benchmark_method_pattern)
      else:
        index = benchmark_method_pattern.find(filter_prefix)
        benchmark_class = benchmark_method_pattern[:index - 1]
        pattern = benchmark_method_pattern[index + len(filter_prefix):]
qianyj's avatar
qianyj committed
98
99
100
101
        class_instance = utils.instantiate_benchmark_class(
          benchmark_class, '/dev/null', '', None, {},
          benchmark_class_type=self.config.benchmark_class_type)
        
huchen's avatar
huchen committed
102
103
104
105
106
107
108
109
110
        for benchmark_method_name in dir(class_instance):
          if re.match(pattern, benchmark_method_name):
            benchmark_methods.append(benchmark_class + '.' +
                                     benchmark_method_name)

    logging.info('The following benchmark methods will be executed: %s',
                 benchmark_methods)
    return benchmark_methods

qianyj's avatar
qianyj committed
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
  def _run_benchmarks_trial(self, harness_info, site_package_info,
                            benchmark_methods, trial_id):
    """Runs a single trial of all benchmark methods."""
    # Run the benchmark method in a separate process so that its memory usage
    # will not affect the execution of other benchmark method
    # This is a walkaround before we fix all memory leak issues in TensorFlow
    has_exception = False
    benchmark_success_results = {}
    benchmark_output_dirs = {}
    benchmark_execution_time = {}
    for benchmark_method in benchmark_methods:
      queue = multiprocessing.Queue()
      process = multiprocessing.Process(target=benchmark_method_runner.run,
                                        args=(benchmark_method,
                                              harness_info,
                                              site_package_info,
                                              self.root_output_dir,
                                              self.config, queue, trial_id))
      process.start()
      process.join()
      method_has_exception, method_execution_time, succeeded, output_dir = queue.get()  # pylint: disable=line-too-long
      has_exception |= method_has_exception
      benchmark_execution_time[benchmark_method] = method_execution_time
      benchmark_success_results[benchmark_method] = succeeded
      benchmark_output_dirs[benchmark_method] = output_dir
    return (has_exception, benchmark_success_results,
            benchmark_output_dirs, benchmark_execution_time)

huchen's avatar
huchen committed
139
140
141
142
143
144
  def run_benchmark(self):
    """Run benchmark."""
    harness_info = utils.get_git_repo_info(self.project_dir)
    has_exception = False
    benchmark_success_results = {}
    benchmark_output_dirs = {}
qianyj's avatar
qianyj committed
145
    num_trials = self.config.benchmark_num_trials
huchen's avatar
huchen committed
146
147

    try:
qianyj's avatar
qianyj committed
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
      site_package_info = self._setup()
      benchmark_methods = self._get_benchmark_methods()

      print('Setup complete. Running {} trials'.format(num_trials))
      for trial_id in range(1, num_trials + 1):
        print('Running trial {} / {}'.format(trial_id, num_trials))
        (trial_has_exception, trial_success_results,
         trial_output_dirs, trial_execution_time) = self._run_benchmarks_trial(
             harness_info, site_package_info, benchmark_methods, trial_id)

        trial_key = 'trial_{}'.format(trial_id)
        has_exception |= trial_has_exception
        self.benchmark_execution_time[trial_key] = trial_execution_time
        benchmark_success_results[trial_key] = trial_success_results
        benchmark_output_dirs[trial_key] = trial_output_dirs
huchen's avatar
huchen committed
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
    finally:
      if self.config.tpu_parameters is not None:
        has_exception |= utils.cleanup_tpu(self.config.tpu_parameters)

    print('Benchmark execution time in seconds by operation:\n {}'.format(
        json.dumps(self.benchmark_execution_time, indent=2)))
    print('Benchmark success results:\n{}'.format(
        json.dumps(benchmark_success_results, indent=2)))
    print('Benchmark local output directories:\n{}'.format(
        json.dumps(benchmark_output_dirs, indent=2)))
    if has_exception:
      sys.exit(1)


if __name__ == '__main__':
  parser = argparse.ArgumentParser(
      formatter_class=argparse.ArgumentDefaultsHelpFormatter)
  perfzero_config.add_benchmark_parser_arguments(parser)
  FLAGS, unparsed = parser.parse_known_args()

  level = logging.DEBUG if FLAGS.debug else logging.INFO
  logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s',
                      level=level)

  if unparsed:
    logging.error('Arguments %s are not recognized', unparsed)
    sys.exit(1)

  config_ = perfzero_config.PerfZeroConfig(mode='flags', flags=FLAGS)
  benchmark_runner = BenchmarkRunner(config_)
  benchmark_runner.run_benchmark()