# coding=utf-8 # Copyright 2021 Arm Limited and affiliates. # Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved. # Copyright 2018 The Google AI Language Team Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from absl import flags from absl import app import subprocess import mlperf_loadgen as lg import argparse import os import sys sys.path.insert(0, os.getcwd()) sys.path.insert(0, os.path.join(os.getcwd(), "..", "..", "lon")) def get_args(): parser = argparse.ArgumentParser() parser.add_argument( "--backend", choices=["tf", "pytorch", "onnxruntime", "tf_estimator", "ray"], default="tf", help="Backend", ) parser.add_argument( "--scenario", choices=["SingleStream", "Offline", "Server", "MultiStream"], default="Offline", help="Scenario", ) parser.add_argument( "--accuracy", action="store_true", help="enable accuracy pass") parser.add_argument( "--quantized", action="store_true", help="use quantized model (only valid for onnxruntime backend)", ) parser.add_argument( "--profile", action="store_true", help="enable profiling (only valid for onnxruntime backend)", ) parser.add_argument( "--user_conf", default="user.conf", help="user config for user LoadGen settings such as target QPS", ) parser.add_argument( "--audit_conf", default="audit.conf", help="audit config for LoadGen settings during compliance runs", ) parser.add_argument( "--max_examples", type=int, help="Maximum number of examples to consider (not limited by default)", ) parser.add_argument( "--network", choices=["sut", "lon", None], default=None, help="Loadgen network mode", ) parser.add_argument("--node", type=str, default="") parser.add_argument("--port", type=int, default=8000) parser.add_argument( "--sut_server", nargs="*", default=["http://localhost:8000"], help="Address of the server(s) under test.", ) args = parser.parse_args() return args scenario_map = { "SingleStream": lg.TestScenario.SingleStream, "Offline": lg.TestScenario.Offline, "Server": lg.TestScenario.Server, "MultiStream": lg.TestScenario.MultiStream, } def main(): args = get_args() sut = None if not args.network or args.network == "sut": if args.backend == "pytorch": assert ( not args.quantized ), "Quantized model is only supported by onnxruntime backend!" assert ( not args.profile ), "Profiling is only supported by onnxruntime backend!" from pytorch_SUT import get_pytorch_sut sut = get_pytorch_sut(args) elif args.backend == "tf": assert ( not args.quantized ), "Quantized model is only supported by onnxruntime backend!" assert ( not args.profile ), "Profiling is only supported by onnxruntime backend!" from tf_SUT import get_tf_sut sut = get_tf_sut(args) elif args.backend == "tf_estimator": assert ( not args.quantized ), "Quantized model is only supported by onnxruntime backend!" assert ( not args.profile ), "Profiling is only supported by onnxruntime backend!" from tf_estimator_SUT import get_tf_estimator_sut sut = get_tf_estimator_sut() elif args.backend == "onnxruntime": from onnxruntime_SUT import get_onnxruntime_sut sut = get_onnxruntime_sut(args) elif args.backend == "ray": assert ( not args.quantized ), "Quantized model is only supported by onnxruntime backend!" assert ( not args.profile ), "Profiling is only supported by onnxruntime backend!" from ray_SUT import get_ray_sut sut = get_ray_sut(args) else: raise ValueError("Unknown backend: {:}".format(args.backend)) settings = lg.TestSettings() settings.scenario = scenario_map[args.scenario] # mlperf.conf is automatically loaded by the loadgen # settings.FromConfig(args.mlperf_conf, "bert", args.scenario) settings.FromConfig(args.user_conf, "bert", args.scenario) if args.accuracy: settings.mode = lg.TestMode.AccuracyOnly else: settings.mode = lg.TestMode.PerformanceOnly log_path = os.environ.get("LOG_PATH") if not log_path: log_path = "build/logs" if not os.path.exists(log_path): os.makedirs(log_path) log_output_settings = lg.LogOutputSettings() log_output_settings.outdir = log_path log_output_settings.copy_summary_to_stdout = True log_settings = lg.LogSettings() log_settings.log_output = log_output_settings log_settings.enable_trace = True if args.network == "lon": from network_LON import app, set_args, main as app_main set_args( args, settings, log_settings, args.audit_conf, args.sut_server, args.backend, args.max_examples, ) app.run(app_main) elif args.network == "sut": from network_SUT import app, node, set_backend node = args.node set_backend(sut) app.run(debug=False, port=args.port, host="0.0.0.0") else: print("Running LoadGen test...") lg.StartTestWithLogSettings( sut.sut, sut.qsl.qsl, settings, log_settings, args.audit_conf ) if args.accuracy and not os.environ.get("SKIP_VERIFY_ACCURACY"): cmd = "python3 {:}/accuracy-squad.py {}".format( os.path.dirname(os.path.abspath(__file__)), ( "--max_examples {}".format(args.max_examples) if args.max_examples else "" ), ) subprocess.check_call(cmd, shell=True) print("Done!") if sut: print("Destroying SUT...") lg.DestroySUT(sut.sut) print("Destroying QSL...") lg.DestroyQSL(sut.qsl.qsl) if __name__ == "__main__": main()