git init

3c15726c · yangzhong · 3c15726c · 3c15726c · 3c15726c · 3c15726c
Commit 3c15726c authored Nov 01, 2025 by yangzhong
6 changed files
--- a/loadgen/setup.py
+++ b/loadgen/setup.py
+# Copyright 2019 The MLPerf Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+# \file
+#  \brief MLPerf Inference LoadGen python module setup.
+#  \details Creates a module that python can import.
+#  All source files are compiled by python"s C++ toolchain  without depending
+#  on a loadgen lib.
+#
+#  This setup.py can be used stand-alone, without the use of an external
+#  build system. This will polute your source tree with output files
+#  and binaries. Use one of the gn build targets instead if you want
+#  to avoid poluting the source tree.
+
+from setuptools import Extension, setup
+from pathlib import Path
+from pybind11 import get_include
+from pybind11.setup_helpers import Pybind11Extension, build_ext
+from version_generator import generate_loadgen_version_definitions
+import subprocess
+
+generated_version_source_filename = "generated/version_generated.cc"
+generate_loadgen_version_definitions(generated_version_source_filename, ".")
+
+public_headers = [
+    "loadgen.h",
+    "query_sample.h",
+    "query_sample_library.h",
+    "system_under_test.h",
+    "test_settings.h",
+    "issue_query_controller.h",
+    "early_stopping.h",
+    "query_dispatch_library.h"
+]
+
+lib_headers = [
+    "logging.h",
+    "test_settings_internal.h",
+    "trace_generator.h",
+    "utils.h",
+    "version.h",
+    "results.h",
+    "bindings/c_api.h",
+    "version_generator.py",
+    "mlperf_conf.h"
+]
+
+lib_sources = [
+    "early_stopping.cc",
+    "issue_query_controller.cc",
+    "loadgen.cc",
+    "logging.cc",
+    "test_settings_internal.cc",
+    "utils.cc",
+    "version.cc",
+    "results.cc",
+]
+
+lib_bindings = [
+    "bindings/c_api.cc",
+    "bindings/python_api.cc",
+]
+
+this_directory = Path(__file__).parent
+mlperf_loadgen_headers = public_headers + lib_headers
+mlperf_loadgen_sources_no_gen = lib_sources + lib_bindings
+mlperf_loadgen_sources = mlperf_loadgen_sources_no_gen + [
+    generated_version_source_filename
+]
+mlperf_long_description = (
+    this_directory /
+    "README.md").read_text(
+        encoding="utf-8")
+
+with open("VERSION.txt", "r") as f:
+    version = f.read()
+version_split = version.split(".")
+
+if len(version_split) < 2:
+    print("Version is incomplete. Needs a format like 4.1.1 in VERSION file")
+
+
+try:
+    with open("mlperf.conf", 'r') as file:
+        conf_contents = file.read()
+
+    # Escape backslashes and double quotes
+    conf_contents = conf_contents.replace('\\', '\\\\').replace('"', '\\"')
+
+    # Convert newlines
+    conf_contents = conf_contents.replace('\n', '\\n"\n"')
+
+    formatted_content = f'const char* mlperf_conf =\n"{conf_contents}";\n'
+
+    with open("mlperf_conf.h", 'w') as header_file:
+        header_file.write(formatted_content)
+
+except IOError as e:
+    raise RuntimeError(f"Failed to generate header file: {e}")
+
+mlperf_loadgen_module = Pybind11Extension(
+    "mlperf_loadgen",
+    define_macros=[
+        ("MAJOR_VERSION",
+         version_split[0]),
+        ("MINOR_VERSION",
+         version_split[1])
+    ],
+    include_dirs=[".", get_include()],
+    sources=mlperf_loadgen_sources,
+    depends=mlperf_loadgen_headers,
+)
+
+setup(name="mlcommons_loadgen",
+      version=version,
+      description="MLPerf Inference LoadGen python bindings",
+      url="https://mlcommons.org/",
+      cmdclass={"build_ext": build_ext},
+      ext_modules=[mlperf_loadgen_module],
+      packages=['mlcommons_loadgen'],
+      package_dir={'mlcommons_loadgen': '.'},
+      include_package_data=True,
+      long_description=mlperf_long_description,
+      long_description_content_type='text/markdown')
--- a/loadgen/system_under_test.h
+++ b/loadgen/system_under_test.h
+/* Copyright 2019 The MLPerf Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+/// \file
+/// \brief Defines the SystemUnderTest interface.
+
+#ifndef MLPERF_LOADGEN_SYSTEM_UNDER_TEST_H
+#define MLPERF_LOADGEN_SYSTEM_UNDER_TEST_H
+
+#include <string>
+#include <vector>
+
+#include "query_sample.h"
+
+namespace mlperf {
+
+/// \addtogroup LoadgenAPI
+/// @{
+
+/// \brief The interface a client implements for the loadgen to test.
+/// \todo Add hook for an untimed warm up period for the SUT.
+/// \todo Add hook for an untimed warm up period for the loadgen logic.
+/// \todo Support power hooks for cool-down period before runing performance
+/// traffic.
+/// \todo Support power hooks for correlating test timeline with power
+/// measurment timeline.
+class SystemUnderTest {
+ public:
+  virtual ~SystemUnderTest() {}
+
+  /// \brief A human-readable string for logging purposes.
+  virtual const std::string& Name() = 0;
+
+  /// \brief Lets the loadgen issue N samples to the SUT.
+  /// \details The SUT may either a) return immediately and signal completion
+  /// at a later time on another thread or b) it may block and signal
+  /// completion on the current stack. The load generator will handle both
+  /// cases properly.
+  /// Note: The data for neighboring samples may or may not be contiguous
+  /// depending on the scenario.
+  virtual void IssueQuery(const std::vector<QuerySample>& samples) = 0;
+
+  /// \brief Called immediately after the last call to IssueQuery
+  /// in a series is made.
+  /// \details This doesn't necessarily signify the end of the
+  /// test since there may be multiple series involved during a test; for
+  /// example in accuracy mode.
+  /// Clients can use this to flush any deferred queries immediately, rather
+  /// than waiting for some timeout.
+  /// This is especially useful in the server scenario.
+  virtual void FlushQueries() = 0;
+};
+
+/// @}
+
+}  // namespace mlperf
+
+#endif  // MLPERF_LOADGEN_SYSTEM_UNDER_TEST_H
--- a/loadgen/test_settings.h
+++ b/loadgen/test_settings.h
+/* Copyright 2019 The MLPerf Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+/// \file
+/// \brief Provides ways for a client to change the behavior and
+/// constraints of the load generator.
+/// \details Note: The MLPerf specification takes precedent over any of the
+/// comments in this file if there are inconsistencies in regards to how the
+/// loadgen *should* work.
+/// The comments in this file are indicative of the loadgen implementation.
+
+#ifndef MLPERF_LOADGEN_TEST_SETTINGS_H
+#define MLPERF_LOADGEN_TEST_SETTINGS_H
+
+#include <cstdint>
+#include <string>
+
+namespace mlperf {
+
+/// \addtogroup LoadgenAPI
+/// @{
+
+/// \addtogroup LoadgenAPITestSettings Test Settings
+/// \brief This page contains a description of all the scenarios, modes,
+/// and log settings as implemented by the LoadGen.
+/// @{
+
+///
+/// \enum TestScenario
+/// * **SingleStream**
+///  + Issues queries containing a single sample.
+///  + The next query is only issued once the previous one has completed.
+///  + Internal LoadGen latency between queries is not included in the
+///    latency results.
+///  + **Final performance result is:** a percentile of the latency.
+/// * **MultiStream**
+///  + Issues queries containing N samples.
+///   - N is specified by \link
+///   mlperf::TestSettings::multi_stream_samples_per_query
+///   multi_stream_samples_per_query \endlink.
+///  + The next query is only issued once the previous one has completed.
+///  + The samples of each query are guaranteed to be contiguous with respect
+///    to the order they were loaded in the QuerySampleLibrary.
+///  + Latency is tracked and reported on a per-query and per-sample basis.
+///  + The latency of a query is the maximum latency of its samples, including
+///    any cross-thread communication within the loadgen.
+///  + Internal LoadGen latency between queries is not included in the
+///    latency results.
+///  + **Final performance result is:** a percentile of the query latency.
+/// * **Server**
+///  + Sends queries with a single sample.
+///  + Queries have a random poisson (non-uniform) arrival rate that, when
+///    averaged, hits the target QPS.
+///  + There is no limit on the number of outstanding queries, as long as
+///    the latency constraints are met.
+///  + **Final performance result is:** PASS if the a percentile of the latency
+///    is under a given threshold. FAIL otherwise.
+///   - Threshold is specified by \link
+///   mlperf::TestSettings::server_target_latency_ns server_target_latency_ns
+///   \endlink.
+/// * **Offline**
+///  + Sends all N samples to the SUT inside of a single query.
+///  + The samples of the query are guaranteed to be contiguous with respect
+///    to the order they were loaded in the QuerySampleLibrary.
+///  + **Final performance result is:** samples per second.
+///
+enum class TestScenario {
+  SingleStream,
+  MultiStream,
+  Server,
+  Offline,
+};
+
+///
+/// \enum TestMode
+/// * **SubmissionRun**
+///  + Runs accuracy mode followed by performance mode.
+///  + TODO: Implement further requirements as decided by MLPerf.
+/// * **AccuracyOnly**
+///  + Runs each sample from the QSL through the SUT a least once.
+///  + Outputs responses to an accuracy json that can be parsed by a model +
+///    sample library specific script.
+/// * **PerformanceOnly**
+///  + Runs the performance traffic for the given scenario, as described in
+///    the comments for TestScenario.
+/// * **FindPeakPerformance**
+///  + Determines the maximumum QPS for the Server scenario.
+///  + Not applicable for SingleStream, MultiStream or Offline scenarios.
+///
+enum class TestMode {
+  SubmissionRun,
+  AccuracyOnly,
+  PerformanceOnly,
+  FindPeakPerformance,
+};
+
+///
+/// \brief Top-level struct specifing the modes and parameters of the test.
+///
+struct TestSettings {
+  TestScenario scenario = TestScenario::SingleStream;
+  TestMode mode = TestMode::PerformanceOnly;
+
+  // ==================================
+  /// \name SingleStream-specific
+  /**@{*/
+  /// \brief A hint used by the loadgen to pre-generate enough samples to
+  ///        meet the minimum test duration.
+  double single_stream_expected_latency_ns = 1000000;
+  /// \brief The latency percentile reported as the final result.
+  double single_stream_target_latency_percentile = 0.90;
+  /**@}*/
+
+  // ==================================
+  /// \name MultiStream-specific
+  /**@{*/
+  /// \brief A hint used by the loadgen to pre-generate enough samples to
+  ///        meet the minimum test duration.
+  /// \brief MultiStream latency is for query (not sample) latency
+  double multi_stream_expected_latency_ns = 8000000;
+  /// \brief The latency percentile for MultiStream mode.
+  double multi_stream_target_latency_percentile = 0.99;
+  /// \brief The number of samples in each query.
+  /// \details How many samples are bundled in a query
+  uint64_t multi_stream_samples_per_query = 8;
+  /**@}*/
+
+  // ==================================
+  /// \name Server-specific
+  /**@{*/
+  /// \brief The average QPS of the poisson distribution.
+  /// \details note: This field is used as a FindPeakPerformance's lower bound.
+  /// When you run FindPeakPerformanceMode, you should make sure that this value
+  /// satisfies performance constraints.
+  double server_target_qps = 1;
+  /// \brief The latency constraint for the Server scenario.
+  uint64_t server_target_latency_ns = 100000000;
+  /// \brief The latency percentile for server mode. This value is combined with
+  /// server_target_latency_ns to determine if a run is valid.
+  /// \details 99% is the default value, which is correct for image models. GNMT
+  /// should be set to 0.97 (97%) in v0.5.(As always, check the policy page for
+  /// updated values for the benchmark you are running.)
+  double server_target_latency_percentile = 0.99;
+  /// \brief If this flag is set to true, LoadGen will combine samples from
+  /// multiple queries into a single query if their scheduled issue times have
+  /// passed.
+  bool server_coalesce_queries = false;
+  /// \brief The decimal places of QPS precision used to terminate
+  /// FindPeakPerformance mode.
+  int server_find_peak_qps_decimals_of_precision = 1;
+  /// \brief A step size (as a fraction of the QPS) used to widen the lower and
+  /// upper bounds to find the initial boundaries of binary search.
+  double server_find_peak_qps_boundary_step_size = 1;
+  /// \brief The maximum number of outstanding queries to allow before earlying
+  /// out from a performance run. Useful for performance tuning and speeding up
+  /// the FindPeakPerformance mode.
+  uint64_t server_max_async_queries = 0;  ///< 0: Infinity.
+  /// \brief The number of issue query threads that will be registered and used
+  /// to call SUT's IssueQuery(). If this is 0, the same thread calling
+  /// StartTest() will be used to call IssueQuery(). See also
+  /// mlperf::RegisterIssueQueryThread().
+  uint64_t server_num_issue_query_threads = 0;
+  /**@}*/
+
+  // ==================================
+  /// \name Offline-specific
+  /**@{*/
+  /// \brief Specifies the QPS the SUT expects to hit for the offline load.
+  /// The loadgen generates 10% more queries than it thinks it needs to meet
+  /// the minimum test duration.
+  double offline_expected_qps = 1;
+  /// \brief Affects the order in which the samples of the dataset are chosen.
+  /// If false it concatenates a single permutation of the dataset (or part
+  /// of it depending on QSL->PerformanceSampleCount()) several times up to the
+  /// number of samples requested.
+  /// If true it concatenates a multiple permutation of the dataset (or a
+  /// part of it depending on QSL->PerformanceSampleCount()) several times
+  /// up to the number of samples requested.
+  bool sample_concatenate_permutation = false;
+  /**@}*/
+
+  // ==================================
+  /// \name Test duration
+  /// The test runs until **both** min duration and min query count have been
+  /// met. However, it will exit before that point if **either** max duration or
+  /// max query count have been reached.
+  /**@{*/
+  uint64_t min_duration_ms = 10000;
+  uint64_t max_duration_ms = 0;  ///< 0: Infinity.
+  uint64_t min_query_count = 100;
+  uint64_t max_query_count = 0;  ///< 0: Infinity.
+  /**@}*/
+
+  // ==================================
+  /// \name Random number generation
+  /// There are 4 separate seeds, so each dimension can be changed
+  /// independently.
+  /**@{*/
+  /// \brief Affects which subset of samples from the QSL are chosen for
+  /// the performance sample set and accuracy sample sets.
+  uint64_t qsl_rng_seed = 0;
+  /// \brief Affects the order in which samples from the performance set will
+  /// be included in queries.
+  uint64_t sample_index_rng_seed = 0;
+  /// \brief Affects the poisson arrival process of the Server scenario.
+  /// \details Different seeds will appear to "jitter" the queries
+  /// differently in time, but should not affect the average issued QPS.
+  uint64_t schedule_rng_seed = 0;
+  /// \brief Affects which samples have their query returns logged to the
+  /// accuracy log in performance mode.
+  uint64_t accuracy_log_rng_seed = 0;
+
+  /// \brief Probability of the query response of a sample being logged to the
+  /// accuracy log in performance mode
+  double accuracy_log_probability = 0.0;
+
+  /// \brief Target number of samples that will have their results printed to
+  /// accuracy log in performance mode for compliance testing
+  uint64_t accuracy_log_sampling_target = 0;
+
+  /// \brief Variables for running test05 from native config. A boolean that
+  /// determines whether or not to run test05 and three random seed to run the
+  /// test
+  bool test05 = false;
+  uint64_t test05_qsl_rng_seed = 0;
+  uint64_t test05_sample_index_rng_seed = 0;
+  uint64_t test05_schedule_rng_seed = 0;
+
+  /// \brief Load mlperf parameter config from file.
+  int FromConfig(const std::string &path, const std::string &model,
+                 const std::string &scenario, int conf_type = 1);
+  /**@}*/
+
+  // ==================================
+  /// \name Performance Sample modifiers
+  /// \details These settings can be used to Audit Performance mode runs.
+  /// In order to detect sample caching by SUT, performance of runs when only
+  /// unique queries (with non-repeated samples) are issued can be compared with
+  /// that when the same query is repeatedly issued.
+  /**@{*/
+  /// \brief Prints measurement interval start and stop timestamps to std::cout
+  /// for the purpose of comparison against an external timer
+  bool print_timestamps = false;
+  /// \brief Allows issuing only unique queries in Performance mode of any
+  /// scenario \details This can be used to send non-repeat & hence unique
+  /// samples to SUT
+  bool performance_issue_unique = false;
+  /// \brief If true, the same query is chosen repeatedley for Inference.
+  /// In offline scenario, the query is filled with the same sample.
+  bool performance_issue_same = false;
+  /// \brief Offset to control which sample is repeated in
+  /// performance_issue_same mode.
+  /// Value should be within [0, performance_sample_count)
+  uint64_t performance_issue_same_index = 0;
+  /// \brief Overrides QSL->PerformanceSampleCount() when non-zero
+  uint64_t performance_sample_count_override = 0;
+  /// \brief Measure token latencies
+  bool use_token_latencies = false;
+  /// Token latency parameters
+  uint64_t server_ttft_latency = 100000000;
+  uint64_t server_tpot_latency = 100000000;
+  /// \brief Infer token latencies
+  bool infer_token_latencies = false;
+  uint64_t token_latency_scaling_factor;
+  /**@}*/
+};
+
+///
+/// \enum LoggingMode
+/// Specifies how and when logging should be sampled and stringified at
+/// runtime.
+/// * **AsyncPoll**
+///  + Logs are serialized and output on an IOThread that polls for new logs at
+///  a fixed interval. This is the only mode currently implemented.
+/// * **EndOfTestOnly**
+///  + TODO: Logs are serialzied and output only at the end of the test.
+/// * **Synchronous**
+///  + TODO: Logs are serialized and output inline.
+enum class LoggingMode {
+  AsyncPoll,
+  EndOfTestOnly,
+  Synchronous,
+};
+
+///
+/// \brief Specifies where log outputs should go.
+///
+/// By default, the loadgen outputs its log files to outdir and
+/// modifies the filenames of its logs with a prefix and suffix.
+/// Filenames will take the form:
+/// "<outdir>/<datetime><prefix>summary<suffix>.txt"
+///
+/// Affordances for outputing logs to stdout are also provided.
+///
+struct LogOutputSettings {
+  std::string outdir = ".";
+  std::string prefix = "mlperf_log_";
+  std::string suffix = "";
+  bool prefix_with_datetime = false;
+  bool copy_detail_to_stdout = false;
+  bool copy_summary_to_stdout = false;
+};
+
+///
+/// \brief Top-level log settings.
+///
+struct LogSettings {
+  LogOutputSettings log_output;
+  LoggingMode log_mode = LoggingMode::AsyncPoll;
+  uint64_t log_mode_async_poll_interval_ms = 1000;  ///< TODO: Implement this.
+  bool enable_trace = true;
+};
+
+/// @}
+
+/// @}
+
+}  // namespace mlperf
+
+#endif  // MLPERF_LOADGEN_TEST_SETTINGS_H
--- a/loadgen/test_settings_internal.cc
+++ b/loadgen/test_settings_internal.cc
+/* Copyright 2019 The MLPerf Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "test_settings_internal.h"
+
+#include <fstream>
+#include <map>
+#include <sstream>
+#include <string>
+
+#include "logging.h"
+#include "mlperf_conf.h"
+#include "utils.h"
+
+namespace mlperf {
+namespace loadgen {
+
+TestSettingsInternal::TestSettingsInternal(
+    const TestSettings &requested_settings, size_t qsl_performance_sample_count)
+    : requested(requested_settings),
+      scenario(requested.scenario),
+      mode(requested.mode),
+      samples_per_query(1),
+      target_qps(1),
+      max_async_queries(0),
+      target_duration(std::chrono::milliseconds(requested.min_duration_ms)),
+      min_duration(std::chrono::milliseconds(requested.min_duration_ms)),
+      max_duration(std::chrono::milliseconds(requested.max_duration_ms)),
+      min_query_count(requested.min_query_count),
+      max_query_count(requested.max_query_count),
+      min_sample_count(0),
+      qsl_rng_seed(requested.qsl_rng_seed),
+      sample_index_rng_seed(requested.sample_index_rng_seed),
+      schedule_rng_seed(requested.schedule_rng_seed),
+      accuracy_log_rng_seed(requested.accuracy_log_rng_seed),
+      accuracy_log_probability(requested.accuracy_log_probability),
+      accuracy_log_sampling_target(requested.accuracy_log_sampling_target),
+      print_timestamps(requested.print_timestamps),
+      performance_issue_unique(requested.performance_issue_unique),
+      performance_issue_same(requested.performance_issue_same),
+      performance_issue_same_index(requested.performance_issue_same_index),
+      performance_sample_count(0),
+      sample_concatenate_permutation(false),
+      use_token_latencies(requested.use_token_latencies),
+      server_ttft_latency(requested.server_ttft_latency),
+      server_tpot_latency(requested.server_tpot_latency),
+      infer_token_latencies(requested.infer_token_latencies),
+      token_latency_scaling_factor(requested.token_latency_scaling_factor) {
+  // Target QPS, target latency, and max_async_queries.
+  switch (requested.scenario) {
+    case TestScenario::SingleStream:
+      target_qps = static_cast<double>(std::nano::den) /
+                   requested.single_stream_expected_latency_ns;
+      max_async_queries = 1;
+      target_latency_percentile =
+          requested.single_stream_target_latency_percentile;
+      break;
+    case TestScenario::MultiStream:
+      target_qps = static_cast<double>(std::nano::den) /
+                   requested.multi_stream_expected_latency_ns;
+      max_async_queries = 1;
+      target_latency_percentile =
+          requested.multi_stream_target_latency_percentile;
+      break;
+    case TestScenario::Server:
+      if (requested.server_target_qps >= 0.0) {
+        target_qps = requested.server_target_qps;
+      } else {
+        LogDetail([server_target_qps = requested.server_target_qps,
+                   target_qps = target_qps](AsyncDetail &detail) {
+#if USE_NEW_LOGGING_FORMAT
+          std::stringstream ss;
+          ss << "Invalid value for server_target_qps requested."
+             << " requested: " << server_target_qps << " using: " << target_qps;
+          MLPERF_LOG_ERROR(detail, "error_invalid_test_settings", ss.str());
+#else
+          detail.Error("Invalid value for server_target_qps requested.",
+                       "requested", server_target_qps, "using", target_qps);
+#endif
+        });
+      }
+      target_latency =
+          std::chrono::nanoseconds(requested.server_target_latency_ns);
+      target_latency_percentile = requested.server_target_latency_percentile;
+      max_async_queries = requested.server_max_async_queries;
+      break;
+    case TestScenario::Offline:
+      // target_latency_percentile is not used in Offline, but set it to
+      // 0.99 anyway to avoid garbage value.
+      target_latency_percentile = 0.99;
+      if (requested.offline_expected_qps >= 0.0) {
+        target_qps = requested.offline_expected_qps;
+      } else {
+        LogDetail([offline_expected_qps = requested.offline_expected_qps,
+                   target_qps = target_qps](AsyncDetail &detail) {
+#if USE_NEW_LOGGING_FORMAT
+          std::stringstream ss;
+          ss << "Invalid value for offline_expected_qps requested."
+             << " requested: " << offline_expected_qps
+             << " using: " << target_qps;
+          MLPERF_LOG_ERROR(detail, "error_invalid_test_settings", ss.str());
+#else
+          detail.Error("Invalid value for offline_expected_qps requested.",
+                       "requested", offline_expected_qps, "using", target_qps);
+#endif
+        });
+      }
+      max_async_queries = 1;
+      break;
+  }
+
+  // Performance Sample Count: TestSettings override QSL ->
+  // PerformanceSampleCount
+  performance_sample_count = (requested.performance_sample_count_override == 0)
+                                 ? qsl_performance_sample_count
+                                 : requested.performance_sample_count_override;
+
+  // Sample by concatentating several permutations of the dataset
+  // sample_concatenate_permutation
+  sample_concatenate_permutation =
+      (requested.sample_concatenate_permutation == 0)
+          ? false
+          : requested.sample_concatenate_permutation;
+
+  // Samples per query.
+  if (requested.scenario == TestScenario::MultiStream) {
+    samples_per_query = requested.multi_stream_samples_per_query;
+  }
+
+  // In the offline scenario, coalesce all queries into a single query.
+  if (requested.scenario == TestScenario::Offline) {
+    // TODO: Should the spec require a max duration for large query counts?
+    // kSlack is used to make sure we generate enough samples for the SUT
+    // to take longer than than the minimum test duration required by the
+    // MLPerf spec.
+    constexpr double kSlack = 1.1;
+    uint64_t target_sample_count =
+        kSlack * DurationToSeconds(target_duration) * target_qps;
+    samples_per_query =
+        (requested.performance_issue_unique)
+            ? performance_sample_count
+            : std::max<uint64_t>(min_query_count, target_sample_count);
+    min_query_count = 1;
+    target_duration = std::chrono::milliseconds(0);
+  }
+
+  // FIXME: Only do this for 3D-UNet SingleStream, for v2.0
+  // TODO: consolidate after v2.0
+  // make min_queries to be multiple of performance_sample_count
+  // performance_sample_count == 0 makes it to be equal to loaded_samples.size()
+  if (sample_concatenate_permutation &&
+      requested.scenario == TestScenario::SingleStream) {
+    // set slack larger for 3D-UNet KiTS19 distribution, i.e. 50% latency << 90%
+    // latency
+    constexpr double kSlack = 2.0;
+    uint64_t expected_queries =
+        kSlack * DurationToSeconds(target_duration) * target_qps;
+    min_query_count =
+        min_query_count > expected_queries ? min_query_count : expected_queries;
+    min_query_count += qsl_performance_sample_count -
+                       (min_query_count % qsl_performance_sample_count);
+  }
+
+  min_sample_count = min_query_count * samples_per_query;
+
+  // Validate TestSettings
+  if (requested.performance_issue_same &&
+      (requested.performance_issue_same_index >= performance_sample_count)) {
+    LogDetail([performance_issue_same_index =
+                   requested.performance_issue_same_index,
+               performance_sample_count =
+                   performance_sample_count](AsyncDetail &detail) {
+#if USE_NEW_LOGGING_FORMAT
+      std::stringstream ss;
+      ss << "Sample Idx to be repeated in performance_issue_same mode"
+         << " cannot be greater than loaded performance_sample_count."
+         << " performance_issue_same_index: " << performance_issue_same_index
+         << " performance_sample_count: " << performance_sample_count;
+      MLPERF_LOG_ERROR(detail, "error_invalid_test_settings", ss.str());
+#else
+      detail.Error(
+          "Sample Idx to be repeated in performance_issue_same mode"
+          " cannot be greater than loaded performance_sample_count.",
+          "performance_issue_same_index", performance_issue_same_index,
+          "performance_sample_count", performance_sample_count);
+#endif
+    });
+  }
+
+  if (requested.performance_issue_unique && requested.performance_issue_same) {
+    LogDetail([performance_issue_unique = requested.performance_issue_unique,
+               performance_issue_same =
+                   requested.performance_issue_same](AsyncDetail &detail) {
+#if USE_NEW_LOGGING_FORMAT
+      std::stringstream ss;
+      ss << "Performance_issue_unique and performance_issue_same, both"
+         << " cannot be true at the same time."
+         << " performance_issue_unique: " << performance_issue_unique
+         << " performance_issue_same: " << performance_issue_same;
+      MLPERF_LOG_ERROR(detail, "error_invalid_test_settings", ss.str());
+#else
+      detail.Error(
+          "Performance_issue_unique and performance_issue_same, both"
+          " cannot be true at the same time.",
+          "performance_issue_unique", performance_issue_unique,
+          "performance_issue_same", performance_issue_same);
+#endif
+    });
+  }
+}
+
+std::string ToString(TestScenario scenario) {
+  switch (scenario) {
+#if USE_NEW_LOGGING_FORMAT
+    case TestScenario::SingleStream:
+      return "SingleStream";
+    case TestScenario::MultiStream:
+      return "MultiStream";
+#else
+    case TestScenario::SingleStream:
+      return "Single Stream";
+    case TestScenario::MultiStream:
+      return "Multi Stream";
+#endif
+    case TestScenario::Server:
+      return "Server";
+    case TestScenario::Offline:
+      return "Offline";
+  }
+  assert(false);
+  return "InvalidScenario";
+}
+
+std::string ToString(TestMode mode) {
+  switch (mode) {
+#if USE_NEW_LOGGING_FORMAT
+    case TestMode::SubmissionRun:
+      return "SubmissionRun";
+    case TestMode::AccuracyOnly:
+      return "AccuracyOnly";
+    case TestMode::PerformanceOnly:
+      return "PerformanceOnly";
+    case TestMode::FindPeakPerformance:
+      return "FindPeakPerformance";
+#else
+    case TestMode::SubmissionRun:
+      return "Submission";
+    case TestMode::AccuracyOnly:
+      return "Accuracy";
+    case TestMode::PerformanceOnly:
+      return "Performance";
+    case TestMode::FindPeakPerformance:
+      return "Find Peak Performance";
+#endif
+  }
+  assert(false);
+  return "InvalidMode";
+}
+
+void LogRequestedTestSettings(const TestSettings &s) {
+  LogDetail([s](AsyncDetail &detail) {
+#if USE_NEW_LOGGING_FORMAT
+    MLPERF_LOG(detail, "requested_scenario", ToString(s.scenario));
+    MLPERF_LOG(detail, "requested_test_mode", ToString(s.mode));
+
+    // Scenario-specific
+    switch (s.scenario) {
+      case TestScenario::SingleStream:
+        MLPERF_LOG(detail, "requested_single_stream_expected_latency_ns",
+                   s.single_stream_expected_latency_ns);
+        MLPERF_LOG(detail, "requested_single_stream_target_latency_percentile",
+                   s.single_stream_target_latency_percentile);
+        break;
+      case TestScenario::MultiStream:
+        MLPERF_LOG(detail, "requested_multi_stream_expected_latency_ns",
+                   s.multi_stream_expected_latency_ns);
+        MLPERF_LOG(detail, "requested_multi_stream_target_latency_percentile",
+                   s.multi_stream_target_latency_percentile);
+        MLPERF_LOG(detail, "requested_multi_stream_samples_per_query",
+                   s.multi_stream_samples_per_query);
+        break;
+      case TestScenario::Server:
+        MLPERF_LOG(detail, "requested_server_target_qps", s.server_target_qps);
+        MLPERF_LOG(detail, "requested_server_target_latency_ns",
+                   s.server_target_latency_ns);
+        MLPERF_LOG(detail, "requested_server_target_latency_percentile",
+                   s.server_target_latency_percentile);
+        MLPERF_LOG(detail, "requested_server_coalesce_queries",
+                   s.server_coalesce_queries);
+        MLPERF_LOG(detail,
+                   "requested_server_find_peak_qps_decimals_of_precision",
+                   s.server_find_peak_qps_decimals_of_precision);
+        MLPERF_LOG(detail, "requested_server_find_peak_qps_boundary_step_size",
+                   s.server_find_peak_qps_boundary_step_size);
+        MLPERF_LOG(detail, "requested_server_max_async_queries",
+                   s.server_max_async_queries);
+        MLPERF_LOG(detail, "requested_server_num_issue_query_threads",
+                   s.server_num_issue_query_threads);
+        break;
+      case TestScenario::Offline:
+        MLPERF_LOG(detail, "requested_offline_expected_qps",
+                   s.offline_expected_qps);
+        break;
+    }
+
+    // Overrides
+    MLPERF_LOG(detail, "requested_min_duration_ms", s.min_duration_ms);
+    MLPERF_LOG(detail, "requested_max_duration_ms", s.max_duration_ms);
+    MLPERF_LOG(detail, "requested_min_query_count", s.min_query_count);
+    MLPERF_LOG(detail, "requested_max_query_count", s.max_query_count);
+    MLPERF_LOG(detail, "requested_qsl_rng_seed", s.qsl_rng_seed);
+    MLPERF_LOG(detail, "requested_sample_index_rng_seed",
+               s.sample_index_rng_seed);
+    MLPERF_LOG(detail, "requested_schedule_rng_seed", s.schedule_rng_seed);
+    MLPERF_LOG(detail, "requested_accuracy_log_rng_seed",
+               s.accuracy_log_rng_seed);
+    MLPERF_LOG(detail, "requested_accuracy_log_probability",
+               s.accuracy_log_probability);
+    MLPERF_LOG(detail, "requested_accuracy_log_sampling_target",
+               s.accuracy_log_sampling_target);
+    MLPERF_LOG(detail, "requested_print_timestamps", s.print_timestamps);
+    MLPERF_LOG(detail, "requested_performance_issue_unique",
+               s.performance_issue_unique);
+    MLPERF_LOG(detail, "requested_performance_issue_same",
+               s.performance_issue_same);
+    MLPERF_LOG(detail, "requested_performance_issue_same_index",
+               s.performance_issue_same_index);
+    MLPERF_LOG(detail, "requested_performance_sample_count_override",
+               s.performance_sample_count_override);
+    MLPERF_LOG(detail, "requested_sample_concatenate_permutation",
+               s.sample_concatenate_permutation);
+    // Token latencies specific values
+    if (s.use_token_latencies) {
+      MLPERF_LOG(detail, "requested_use_token_latencies",
+                 s.use_token_latencies);
+      if (s.scenario != TestScenario::Offline) {
+        MLPERF_LOG(detail, "requested_server_ttft_latency",
+                   s.server_ttft_latency);
+        MLPERF_LOG(detail, "requested_server_tpot_latency",
+                   s.server_tpot_latency);
+      }
+    }
+#else
+    detail("");
+    detail("Requested Settings:");
+    detail("Scenario : " + ToString(s.scenario));
+    detail("Test mode : " + ToString(s.mode));
+
+    // Scenario-specific
+    switch (s.scenario) {
+      case TestScenario::SingleStream:
+        detail("single_stream_expected_latency_ns : ",
+               s.single_stream_expected_latency_ns);
+        detail("single_stream_target_latency_percentile : ",
+               s.single_stream_target_latency_percentile);
+        break;
+      case TestScenario::MultiStream:
+        detail("multi_stream_expected_latency_ns : ",
+               s.multi_stream_expected_latency_ns);
+        detail("multi_stream_target_latency_percentile : ",
+               s.multi_stream_target_latency_percentile);
+        detail("multi_stream_samples_per_query : ",
+               s.multi_stream_samples_per_query);
+        break;
+      case TestScenario::Server:
+        detail("server_target_qps : ", s.server_target_qps);
+        detail("server_target_latency_ns : ", s.server_target_latency_ns);
+        detail("server_target_latency_percentile : ",
+               s.server_target_latency_percentile);
+        detail("server_coalesce_queries : ", s.server_coalesce_queries);
+        detail("server_find_peak_qps_decimals_of_precision : ",
+               s.server_find_peak_qps_decimals_of_precision);
+        detail("server_find_peak_qps_boundary_step_size : ",
+               s.server_find_peak_qps_boundary_step_size);
+        detail("server_max_async_queries : ", s.server_max_async_queries);
+        detail("server_num_issue_query_threads : ",
+               s.server_num_issue_query_threads);
+        break;
+      case TestScenario::Offline:
+        detail("offline_expected_qps : ", s.offline_expected_qps);
+        break;
+    }
+
+    // Overrides
+    detail("min_duration_ms : ", s.min_duration_ms);
+    detail("max_duration_ms : ", s.max_duration_ms);
+    detail("min_query_count : ", s.min_query_count);
+    detail("max_query_count : ", s.max_query_count);
+    detail("qsl_rng_seed : ", s.qsl_rng_seed);
+    detail("sample_index_rng_seed : ", s.sample_index_rng_seed);
+    detail("schedule_rng_seed : ", s.schedule_rng_seed);
+    detail("accuracy_log_rng_seed : ", s.accuracy_log_rng_seed);
+    detail("accuracy_log_probability : ", s.accuracy_log_probability);
+    detail("accuracy_log_sampling_target : ", s.accuracy_log_sampling_target);
+    detail("print_timestamps : ", s.print_timestamps);
+    detail("performance_issue_unique : ", s.performance_issue_unique);
+    detail("performance_issue_same : ", s.performance_issue_same);
+    detail("performance_issue_same_index : ", s.performance_issue_same_index);
+    detail("performance_sample_count_override : ",
+           s.performance_sample_count_override);
+    detail("");
+#endif
+  });
+}
+
+void TestSettingsInternal::LogEffectiveSettings() const {
+  LogDetail([s = *this](AsyncDetail &detail) {
+#if USE_NEW_LOGGING_FORMAT
+    MLPERF_LOG(detail, "effective_scenario", ToString(s.scenario));
+    MLPERF_LOG(detail, "effective_test_mode", ToString(s.mode));
+
+    MLPERF_LOG(detail, "effective_samples_per_query", s.samples_per_query);
+    MLPERF_LOG(detail, "effective_target_qps", s.target_qps);
+    MLPERF_LOG(detail, "effective_target_latency_ns", s.target_latency.count());
+    MLPERF_LOG(detail, "effective_target_latency_percentile",
+               s.target_latency_percentile);
+    MLPERF_LOG(detail, "effective_max_async_queries", s.max_async_queries);
+    MLPERF_LOG(detail, "effective_target_duration_ms",
+               s.target_duration.count());
+    MLPERF_LOG(detail, "effective_min_duration_ms", s.min_duration.count());
+    MLPERF_LOG(detail, "effective_max_duration_ms", s.max_duration.count());
+    MLPERF_LOG(detail, "effective_min_query_count", s.min_query_count);
+    MLPERF_LOG(detail, "effective_max_query_count", s.max_query_count);
+    MLPERF_LOG(detail, "effective_min_sample_count", s.min_sample_count);
+    MLPERF_LOG(detail, "effective_qsl_rng_seed", s.qsl_rng_seed);
+    MLPERF_LOG(detail, "effective_sample_index_rng_seed",
+               s.sample_index_rng_seed);
+    MLPERF_LOG(detail, "effective_schedule_rng_seed", s.schedule_rng_seed);
+    MLPERF_LOG(detail, "effective_accuracy_log_rng_seed",
+               s.accuracy_log_rng_seed);
+    MLPERF_LOG(detail, "effective_accuracy_log_probability",
+               s.accuracy_log_probability);
+    MLPERF_LOG(detail, "effective_accuracy_log_sampling_target",
+               s.accuracy_log_sampling_target);
+    MLPERF_LOG(detail, "effective_print_timestamps", s.print_timestamps);
+    MLPERF_LOG(detail, "effective_performance_issue_unique",
+               s.performance_issue_unique);
+    MLPERF_LOG(detail, "effective_performance_issue_same",
+               s.performance_issue_same);
+    MLPERF_LOG(detail, "effective_performance_issue_same_index",
+               s.performance_issue_same_index);
+    MLPERF_LOG(detail, "effective_performance_sample_count",
+               s.performance_sample_count);
+    MLPERF_LOG(detail, "effective_sample_concatenate_permutation",
+               s.sample_concatenate_permutation);
+#else
+    detail("");
+    detail("Effective Settings:");
+
+    detail("Scenario : " + ToString(s.scenario));
+    detail("Test mode : " + ToString(s.mode));
+
+    detail("samples_per_query : ", s.samples_per_query);
+    detail("target_qps : ", s.target_qps);
+    detail("target_latency (ns): ", s.target_latency.count());
+    detail("target_latency_percentile : ", s.target_latency_percentile);
+    detail("max_async_queries : ", s.max_async_queries);
+    detail("target_duration (ms): ", s.target_duration.count());
+    detail("min_duration (ms): ", s.min_duration.count());
+    detail("max_duration (ms): ", s.max_duration.count());
+    detail("min_query_count : ", s.min_query_count);
+    detail("max_query_count : ", s.max_query_count);
+    detail("min_sample_count : ", s.min_sample_count);
+    detail("qsl_rng_seed : ", s.qsl_rng_seed);
+    detail("sample_index_rng_seed : ", s.sample_index_rng_seed);
+    detail("schedule_rng_seed : ", s.schedule_rng_seed);
+    detail("accuracy_log_rng_seed : ", s.accuracy_log_rng_seed);
+    detail("accuracy_log_probability : ", s.accuracy_log_probability);
+    detail("accuracy_log_sampling_target : ", s.accuracy_log_sampling_target);
+    detail("print_timestamps : ", s.print_timestamps);
+    detail("performance_issue_unique : ", s.performance_issue_unique);
+    detail("performance_issue_same : ", s.performance_issue_same);
+    detail("performance_issue_same_index : ", s.performance_issue_same_index);
+    detail("performance_sample_count : ", s.performance_sample_count);
+#endif
+  });
+}
+
+void TestSettingsInternal::LogAllSettings() const {
+  LogRequestedTestSettings(requested);
+  LogEffectiveSettings();
+}
+
+void TestSettingsInternal::LogSummary(AsyncSummary &summary) const {
+  summary("samples_per_query : ", samples_per_query);
+  summary("target_qps : ", target_qps);
+  if (!use_token_latencies) {
+    summary("target_latency (ns): ", target_latency.count());
+  } else {
+    summary("ttft_latency (ns): ", server_ttft_latency);
+    summary("tpot_latency (ns): ", server_tpot_latency);
+  }
+  summary("max_async_queries : ", max_async_queries);
+  summary("min_duration (ms): ", min_duration.count());
+  summary("max_duration (ms): ", max_duration.count());
+  summary("min_query_count : ", min_query_count);
+  summary("max_query_count : ", max_query_count);
+  summary("qsl_rng_seed : ", qsl_rng_seed);
+  summary("sample_index_rng_seed : ", sample_index_rng_seed);
+  summary("schedule_rng_seed : ", schedule_rng_seed);
+  summary("accuracy_log_rng_seed : ", accuracy_log_rng_seed);
+  summary("accuracy_log_probability : ", accuracy_log_probability);
+  summary("accuracy_log_sampling_target : ", accuracy_log_sampling_target);
+  summary("print_timestamps : ", print_timestamps);
+  summary("performance_issue_unique : ", performance_issue_unique);
+  summary("performance_issue_same : ", performance_issue_same);
+  summary("performance_issue_same_index : ", performance_issue_same_index);
+  summary("performance_sample_count : ", performance_sample_count);
+  if (sample_concatenate_permutation) {
+    summary(
+        "WARNING: sample_concatenate_permutation was set to true. \n"
+        "Generated samples per query might be different as the one in the "
+        "setting.\n"
+        "Check the generated_samples_per_query line in the detailed log for "
+        "the real\n"
+        "samples_per_query value");
+  }
+}
+
+}  // namespace loadgen
+
+int TestSettings::FromConfig(const std::string &path, const std::string &model,
+                             const std::string &scenario, int conf_type) {
+  std::map<std::string, std::string> kv;
+  static int configCount = 0;
+
+  if (conf_type == 1) {
+    if (configCount == 0) {
+      // Only allow userConf as the single configFile and loadgen loads the
+      // mlperfConf automatically for perf and accuracy runs
+      FromConfig("", model, scenario, 0);
+    }
+
+    else {
+      LogDetail([](AsyncDetail &detail) {
+        std::stringstream ss;
+        ss << "Multiple conf files are used. This is not valid for official "
+              "submission.";
+        MLPERF_LOG_ERROR(detail, "error_invalid_config", ss.str());
+      });
+    }
+    configCount++;
+  }
+
+  // lookup key/value pairs from config
+  auto lookupkv = [&](const std::string &model, const std::string &scenario,
+                      const std::string &key, uint64_t *val_l, double *val_d,
+                      double multiplier = 1.0) {
+    std::map<std::string, std::string>::iterator it;
+    std::string found;
+    // lookup exact key first
+    it = kv.find(model + "." + scenario + "." + key);
+    if (it != kv.end()) {
+      found = it->second;
+    } else {
+      // lookup key with model wildcard
+      it = kv.find("*." + scenario + "." + key);
+      if (it != kv.end()) {
+        found = it->second;
+      } else {
+        it = kv.find(model + ".*." + key);
+        if (it != kv.end()) {
+          found = it->second;
+        } else {
+          it = kv.find("*.*." + key);
+          if (it != kv.end()) {
+            found = it->second;
+          } else {
+            return false;
+          }
+        }
+      }
+    }
+    // if we get here, found will be set
+    if (val_l) {
+      *val_l = strtoull(found.c_str(), nullptr, 0) *
+               static_cast<uint64_t>(multiplier);
+    }
+    if (val_d) *val_d = strtod(found.c_str(), nullptr) * multiplier;
+    return true;
+  };
+
+  int line_nr = 0;
+  int errors = 0;
+  // Declare the input stream before the if-else block
+  std::unique_ptr<std::istream> fss;
+  std::string line;
+
+  if (conf_type != 0) {
+    // dirt simple config parser
+    fss = std::make_unique<std::ifstream>(path);
+    if (!static_cast<std::ifstream *>(fss.get())->is_open()) {
+      LogDetail([p = path](AsyncDetail &detail) {
+#if USE_NEW_LOGGING_FORMAT
+        std::stringstream ss;
+        ss << "can't open file " << p;
+        MLPERF_LOG_ERROR(detail, "error_invalid_config", ss.str());
+#else
+        detail.Error("can't open file ", p);
+#endif
+      });
+      return -ENOENT;
+    }
+  } else {
+    // Convert unsigned char array to std::string
+    std::string config_str(mlperf_conf);
+    fss = std::make_unique<std::istringstream>(config_str);
+  }
+  while (std::getline(*fss, line)) {
+    line_nr++;
+    std::istringstream iss(line);
+    std::string s, k;
+    int looking_for = 0;  // 0=key, 1=equal, 2=value
+    while (iss >> s) {
+      if (s == "#" && looking_for != 2) {
+        // done with this line
+        break;
+      }
+      if (looking_for == 2) {
+        // got key and value
+        const char *start = s.c_str();
+        char *stop;
+        (void)strtoul(start, &stop, 0);
+        if (start + s.size() == stop) {
+          kv[k] = s;
+          continue;
+        }
+        (void)strtod(start, &stop);
+        if (start + s.size() == stop) {
+          kv[k] = s;
+          continue;
+        }
+        errors++;
+        LogDetail([l = line_nr](AsyncDetail &detail) {
+#if USE_NEW_LOGGING_FORMAT
+          std::stringstream ss;
+          ss << "value needs to be integer or double, line=" << l;
+          MLPERF_LOG_ERROR(detail, "error_invalid_config", ss.str());
+#else
+          detail.Error("value needs to be integer or double, line=", l);
+#endif
+        });
+        break;
+      }
+      if (looking_for == 1 && s != "=") {
+        errors++;
+        LogDetail([l = line_nr](AsyncDetail &detail) {
+#if USE_NEW_LOGGING_FORMAT
+          std::stringstream ss;
+          ss << "expected 'key=value', line=" << l;
+          MLPERF_LOG_ERROR(detail, "error_invalid_config", ss.str());
+#else
+          detail.Error("expected 'key=value', line=", l);
+#endif
+        });
+        break;
+      }
+      if (looking_for == 0) k = s;
+      looking_for++;
+    }
+  }
+  if (errors != 0) return -EINVAL;
+
+  uint64_t val;
+
+  // keys that apply to all scenarios
+  if (lookupkv(model, scenario, "mode", &val, nullptr)) {
+    switch (val) {
+      case 0:
+        mode = TestMode::SubmissionRun;
+        break;
+      case 1:
+        mode = TestMode::AccuracyOnly;
+        break;
+      case 2:
+        mode = TestMode::PerformanceOnly;
+        break;
+      case 3:
+        mode = TestMode::FindPeakPerformance;
+        break;
+      default:
+        LogDetail([](AsyncDetail &detail) {
+#if USE_NEW_LOGGING_FORMAT
+          std::stringstream ss;
+          ss << "Invalid value passed to Mode key in config.";
+          MLPERF_LOG_ERROR(detail, "error_invalid_config", ss.str());
+#else
+          detail.Error("Invalid value passed to Mode key in config.");
+#endif
+        });
+        break;
+    }
+  }
+
+  if (conf_type == 0) {
+    lookupkv(model, scenario, "qsl_rng_seed", &qsl_rng_seed, nullptr);
+    lookupkv(model, scenario, "sample_index_rng_seed", &sample_index_rng_seed,
+             nullptr);
+    lookupkv(model, scenario, "schedule_rng_seed", &schedule_rng_seed, nullptr);
+    lookupkv(model, scenario, "accuracy_log_probability", nullptr,
+             &accuracy_log_probability, 0.01);
+    if (lookupkv(model, scenario, "test05", &val, nullptr))
+      test05 = (val == 1) ? true : false;
+    lookupkv(model, scenario, "test05_qsl_rng_seed", &test05_qsl_rng_seed,
+             nullptr);
+    lookupkv(model, scenario, "test05_sample_index_rng_seed",
+             &test05_sample_index_rng_seed, nullptr);
+    lookupkv(model, scenario, "test05_schedule_rng_seed",
+             &test05_schedule_rng_seed, nullptr);
+  }
+
+  // keys that can be overriden in user.conf but will make the results eligible
+  // only for open submissions
+
+  // keys to measure token metrics
+  if (lookupkv(model, scenario, "use_token_latencies", &val, nullptr)) {
+    use_token_latencies = (val == 1) ? true : false;
+  }
+  if (use_token_latencies) {
+    lookupkv(model, "Server", "ttft_latency", &server_ttft_latency, nullptr,
+             1000 * 1000);
+    lookupkv(model, "Server", "tpot_latency", &server_tpot_latency, nullptr,
+             1000 * 1000);
+  }
+
+  // keys to infer token metrics
+  if (lookupkv(model, scenario, "infer_token_latencies", &val, nullptr)) {
+    infer_token_latencies = (val == 1) ? true : false;
+  }
+  if (infer_token_latencies) {
+    lookupkv(model, scenario, "token_latency_scaling_factor",
+             &token_latency_scaling_factor, nullptr, 1);
+  }
+  // keys that apply to SingleStream
+  lookupkv(model, "SingleStream", "target_latency_percentile", nullptr,
+           &single_stream_target_latency_percentile, 0.01);
+
+  // keys that apply to MultiStream
+  lookupkv(model, "MultiStream", "target_latency_percentile", nullptr,
+           &multi_stream_target_latency_percentile, 0.01);
+  lookupkv(model, "MultiStream", "samples_per_query",
+           &multi_stream_samples_per_query, nullptr, 1);
+
+  // keys that apply to Server
+  lookupkv(model, "Server", "target_latency_percentile", nullptr,
+           &server_target_latency_percentile, 0.01);
+  lookupkv(model, "Server", "target_latency", &server_target_latency_ns,
+           nullptr, 1000 * 1000);
+
+  // keys that can be overriden in user.conf (the provided values still need to
+  // pass the submission checker rules)
+  if (lookupkv(model, scenario, "performance_issue_unique", &val, nullptr))
+    performance_issue_unique = (val == 0) ? false : true;
+  if (lookupkv(model, scenario, "performance_issue_same", &val, nullptr))
+    performance_issue_same = (val == 0) ? false : true;
+  lookupkv(model, scenario, "performance_issue_same_index",
+           &performance_issue_same_index, nullptr);
+
+  if (lookupkv(model, scenario, "sample_concatenate_permutation", &val,
+               nullptr))
+    sample_concatenate_permutation = (val == 1) ? true : false;
+  if (lookupkv(model, "Server", "coalesce_queries", &val, nullptr))
+    server_coalesce_queries = (val == 0) ? false : true;
+  if (lookupkv(model, "Server", "max_async_queries", &val, nullptr))
+    server_max_async_queries = int(val);
+
+  lookupkv(model, scenario, "min_duration", &min_duration_ms, nullptr);
+  lookupkv(model, scenario, "max_duration", &max_duration_ms, nullptr);
+  lookupkv(model, scenario, "min_query_count", &min_query_count, nullptr);
+  lookupkv(model, scenario, "max_query_count", &max_query_count, nullptr);
+  lookupkv(model, scenario, "performance_sample_count_override",
+           &performance_sample_count_override, nullptr);
+  lookupkv(model, "SingleStream", "target_latency", nullptr,
+           &single_stream_expected_latency_ns, 1000 * 1000);
+  lookupkv(model, "MultiStream", "target_latency", nullptr,
+           &multi_stream_expected_latency_ns, 1000 * 1000);
+  lookupkv(model, "Server", "target_qps", nullptr, &server_target_qps);
+  lookupkv(model, "Offline", "target_qps", 0, &offline_expected_qps);
+
+  if (lookupkv(model, scenario, "print_timestamps", &val, nullptr))
+    print_timestamps = (val == 0) ? false : true;
+
+  // keys that are used in audit.conf
+  lookupkv(model, scenario, "accuracy_log_rng_seed", &accuracy_log_rng_seed,
+           nullptr);
+  lookupkv(model, scenario, "accuracy_log_sampling_target",
+           &accuracy_log_sampling_target, nullptr);
+  return 0;
+}
+
+}  // namespace mlperf
--- a/loadgen/test_settings_internal.h
+++ b/loadgen/test_settings_internal.h
+/* Copyright 2019 The MLPerf Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+/// \file
+/// \brief The internal representation of user-provided settings.
+
+#ifndef MLPERF_LOADGEN_TEST_SETTINGS_INTERNAL_H
+#define MLPERF_LOADGEN_TEST_SETTINGS_INTERNAL_H
+
+#include <chrono>
+#include <cmath>
+#include <string>
+
+#include "logging.h"
+#include "test_settings.h"
+
+namespace mlperf {
+
+namespace logging {
+class AsyncSummary;
+}
+
+namespace loadgen {
+
+using AsyncSummary = logging::AsyncSummary;
+
+std::string ToString(TestScenario scenario);
+std::string ToString(TestMode mode);
+
+/// \brief takes the user-friendly TestSettings and normalizes it
+/// for consumption by the loadgen.
+/// \details It does things like remove scenario-specific naming and introduce
+/// the concept of target_duration used to pre-generate queries.
+struct TestSettingsInternal {
+  explicit TestSettingsInternal(const TestSettings &requested_settings,
+                                size_t qsl_performance_sample_count);
+  void LogEffectiveSettings() const;
+  void LogAllSettings() const;
+  void LogSummary(AsyncSummary &summary) const;
+
+  const TestSettings requested;
+  const TestScenario scenario;  // Copied here for convenience.
+  const TestMode mode;          // Copied here for convenience.
+
+  uint64_t samples_per_query;
+  double target_qps;
+  std::chrono::nanoseconds target_latency{0};
+  double target_latency_percentile;  // Single, multistream, and server modes.
+  uint64_t max_async_queries;
+
+  // Target duration is used to generate queries of a minimum duration before
+  // the test run.
+  std::chrono::milliseconds target_duration{0};
+
+  // Min duration/query_count/sample_count are used to validate the test
+  // duration at the end of the run.
+  std::chrono::milliseconds min_duration{0};
+  std::chrono::milliseconds max_duration{0};
+  uint64_t min_query_count;
+  uint64_t max_query_count;
+  uint64_t min_sample_count;  // Offline only.
+
+  uint64_t qsl_rng_seed;
+  uint64_t sample_index_rng_seed;
+  uint64_t schedule_rng_seed;
+  uint64_t accuracy_log_rng_seed;
+  double accuracy_log_probability;
+  uint64_t accuracy_log_sampling_target;
+  bool print_timestamps;
+  bool performance_issue_unique;
+  bool performance_issue_same;
+  uint64_t performance_issue_same_index;
+  uint64_t performance_sample_count;
+
+  bool sample_concatenate_permutation;
+  bool use_token_latencies = false;
+  int64_t server_ttft_latency;
+  int64_t server_tpot_latency;
+
+  bool infer_token_latencies = false;
+  int64_t token_latency_scaling_factor;
+};
+
+/// \brief A namespace of collections of FindPeakPerformance helper functions,
+/// mainly about binary search.
+namespace find_peak_performance {
+
+constexpr char const *kNotSupportedMsg =
+    "Finding peak performance is only supported in Server scenarios.";
+
+template <TestScenario scenario>
+TestSettingsInternal MidOfBoundaries(
+    const TestSettingsInternal &lower_bound_settings,
+    const TestSettingsInternal &upper_bound_settings) {
+  TestSettingsInternal mid_settings = lower_bound_settings;
+  if (scenario == TestScenario::Server) {
+    assert(lower_bound_settings.target_qps < upper_bound_settings.target_qps);
+    mid_settings.target_qps =
+        lower_bound_settings.target_qps +
+        (upper_bound_settings.target_qps - lower_bound_settings.target_qps) / 2;
+  } else {
+    LogDetail([](AsyncDetail &detail) {
+#if USE_NEW_LOGGING_FORMAT
+      MLPERF_LOG_ERROR(detail, "error_invalid_test_settings", kNotSupportedMsg);
+#else
+      detail(kNotSupportedMsg);
+#endif
+    });
+  }
+  return mid_settings;
+}
+
+template <TestScenario scenario>
+bool IsFinished(const TestSettingsInternal &lower_bound_settings,
+                const TestSettingsInternal &upper_bound_settings) {
+  if (scenario == TestScenario::Server) {
+    uint8_t precision = lower_bound_settings.requested
+                            .server_find_peak_qps_decimals_of_precision;
+    double l =
+        std::floor(lower_bound_settings.target_qps * std::pow(10, precision));
+    double u =
+        std::floor(upper_bound_settings.target_qps * std::pow(10, precision));
+    return l + 1 >= u;
+  } else {
+    LogDetail([](AsyncDetail &detail) {
+#if USE_NEW_LOGGING_FORMAT
+      MLPERF_LOG_ERROR(detail, "error_invalid_test_settings", kNotSupportedMsg);
+#else
+      detail(kNotSupportedMsg);
+#endif
+    });
+    return true;
+  }
+}
+
+template <TestScenario scenario>
+std::string ToStringPerformanceField(const TestSettingsInternal &settings) {
+  if (scenario == TestScenario::Server) {
+    return std::to_string(settings.target_qps);
+  } else {
+    LogDetail([](AsyncDetail &detail) {
+#if USE_NEW_LOGGING_FORMAT
+      MLPERF_LOG_ERROR(detail, "error_invalid_test_settings", kNotSupportedMsg);
+#else
+      detail(kNotSupportedMsg);
+#endif
+    });
+    return ToString(settings.scenario);
+  }
+}
+
+template <TestScenario scenario>
+void WidenPerformanceField(TestSettingsInternal *settings) {
+  if (scenario == TestScenario::Server) {
+    settings->target_qps =
+        settings->target_qps *
+        (1 + settings->requested.server_find_peak_qps_boundary_step_size);
+  } else {
+    LogDetail([](AsyncDetail &detail) {
+#if USE_NEW_LOGGING_FORMAT
+      MLPERF_LOG_ERROR(detail, "error_invalid_test_settings", kNotSupportedMsg);
+#else
+      detail(kNotSupportedMsg);
+#endif
+    });
+  }
+}
+
+}  // namespace find_peak_performance
+}  // namespace loadgen
+}  // namespace mlperf
+
+#endif  // MLPERF_LOADGEN_TEST_SETTINGS_INTERNAL_H
--- a/loadgen/tests/BUILD.gn
+++ b/loadgen/tests/BUILD.gn
+static_library("mlperf_loadgen_tests_loadgen_test_main") {
+  sources = [ "loadgen_test.h", "loadgen_test_main.cc" ]
+  configs += [ "//build/config/compiler:exceptions" ]
+}
+
+executable("mlperf_loadgen_perftests") {
+  sources = [ "perftests_null_sut.cc" ]
+  deps = [ "..:mlperf_loadgen" ]
+}
+
+executable("mlperf_loadgen_tests_basic") {
+  sources = [ "basic.cc" ]
+  deps = [ "..:mlperf_loadgen",
+           ":mlperf_loadgen_tests_loadgen_test_main"  ]
+  configs += [ "//build/config/compiler:exceptions" ]
+}
+
+source_set("mlperf_loadgen_perftests_py") {
+  sources = [ "perftests_null_sut.py" ]
+  deps = [ "../..:loadgen_pymodule_wheel_lib" ]
+}
+
+source_set("docs") {
+  sources = [ "README.md" ]
+}