Commit 3c15726c authored by yangzhong's avatar yangzhong
Browse files

git init

parents
# Copyright 2019 The MLPerf Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =============================================================================
# \file
# \brief MLPerf Inference LoadGen python module setup.
# \details Creates a module that python can import.
# All source files are compiled by python"s C++ toolchain without depending
# on a loadgen lib.
#
# This setup.py can be used stand-alone, without the use of an external
# build system. This will polute your source tree with output files
# and binaries. Use one of the gn build targets instead if you want
# to avoid poluting the source tree.
from setuptools import Extension, setup
from pathlib import Path
from pybind11 import get_include
from pybind11.setup_helpers import Pybind11Extension, build_ext
from version_generator import generate_loadgen_version_definitions
import subprocess
generated_version_source_filename = "generated/version_generated.cc"
generate_loadgen_version_definitions(generated_version_source_filename, ".")
public_headers = [
"loadgen.h",
"query_sample.h",
"query_sample_library.h",
"system_under_test.h",
"test_settings.h",
"issue_query_controller.h",
"early_stopping.h",
"query_dispatch_library.h"
]
lib_headers = [
"logging.h",
"test_settings_internal.h",
"trace_generator.h",
"utils.h",
"version.h",
"results.h",
"bindings/c_api.h",
"version_generator.py",
"mlperf_conf.h"
]
lib_sources = [
"early_stopping.cc",
"issue_query_controller.cc",
"loadgen.cc",
"logging.cc",
"test_settings_internal.cc",
"utils.cc",
"version.cc",
"results.cc",
]
lib_bindings = [
"bindings/c_api.cc",
"bindings/python_api.cc",
]
this_directory = Path(__file__).parent
mlperf_loadgen_headers = public_headers + lib_headers
mlperf_loadgen_sources_no_gen = lib_sources + lib_bindings
mlperf_loadgen_sources = mlperf_loadgen_sources_no_gen + [
generated_version_source_filename
]
mlperf_long_description = (
this_directory /
"README.md").read_text(
encoding="utf-8")
with open("VERSION.txt", "r") as f:
version = f.read()
version_split = version.split(".")
if len(version_split) < 2:
print("Version is incomplete. Needs a format like 4.1.1 in VERSION file")
try:
with open("mlperf.conf", 'r') as file:
conf_contents = file.read()
# Escape backslashes and double quotes
conf_contents = conf_contents.replace('\\', '\\\\').replace('"', '\\"')
# Convert newlines
conf_contents = conf_contents.replace('\n', '\\n"\n"')
formatted_content = f'const char* mlperf_conf =\n"{conf_contents}";\n'
with open("mlperf_conf.h", 'w') as header_file:
header_file.write(formatted_content)
except IOError as e:
raise RuntimeError(f"Failed to generate header file: {e}")
mlperf_loadgen_module = Pybind11Extension(
"mlperf_loadgen",
define_macros=[
("MAJOR_VERSION",
version_split[0]),
("MINOR_VERSION",
version_split[1])
],
include_dirs=[".", get_include()],
sources=mlperf_loadgen_sources,
depends=mlperf_loadgen_headers,
)
setup(name="mlcommons_loadgen",
version=version,
description="MLPerf Inference LoadGen python bindings",
url="https://mlcommons.org/",
cmdclass={"build_ext": build_ext},
ext_modules=[mlperf_loadgen_module],
packages=['mlcommons_loadgen'],
package_dir={'mlcommons_loadgen': '.'},
include_package_data=True,
long_description=mlperf_long_description,
long_description_content_type='text/markdown')
/* Copyright 2019 The MLPerf Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
/// \file
/// \brief Defines the SystemUnderTest interface.
#ifndef MLPERF_LOADGEN_SYSTEM_UNDER_TEST_H
#define MLPERF_LOADGEN_SYSTEM_UNDER_TEST_H
#include <string>
#include <vector>
#include "query_sample.h"
namespace mlperf {
/// \addtogroup LoadgenAPI
/// @{
/// \brief The interface a client implements for the loadgen to test.
/// \todo Add hook for an untimed warm up period for the SUT.
/// \todo Add hook for an untimed warm up period for the loadgen logic.
/// \todo Support power hooks for cool-down period before runing performance
/// traffic.
/// \todo Support power hooks for correlating test timeline with power
/// measurment timeline.
class SystemUnderTest {
public:
virtual ~SystemUnderTest() {}
/// \brief A human-readable string for logging purposes.
virtual const std::string& Name() = 0;
/// \brief Lets the loadgen issue N samples to the SUT.
/// \details The SUT may either a) return immediately and signal completion
/// at a later time on another thread or b) it may block and signal
/// completion on the current stack. The load generator will handle both
/// cases properly.
/// Note: The data for neighboring samples may or may not be contiguous
/// depending on the scenario.
virtual void IssueQuery(const std::vector<QuerySample>& samples) = 0;
/// \brief Called immediately after the last call to IssueQuery
/// in a series is made.
/// \details This doesn't necessarily signify the end of the
/// test since there may be multiple series involved during a test; for
/// example in accuracy mode.
/// Clients can use this to flush any deferred queries immediately, rather
/// than waiting for some timeout.
/// This is especially useful in the server scenario.
virtual void FlushQueries() = 0;
};
/// @}
} // namespace mlperf
#endif // MLPERF_LOADGEN_SYSTEM_UNDER_TEST_H
/* Copyright 2019 The MLPerf Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
/// \file
/// \brief Provides ways for a client to change the behavior and
/// constraints of the load generator.
/// \details Note: The MLPerf specification takes precedent over any of the
/// comments in this file if there are inconsistencies in regards to how the
/// loadgen *should* work.
/// The comments in this file are indicative of the loadgen implementation.
#ifndef MLPERF_LOADGEN_TEST_SETTINGS_H
#define MLPERF_LOADGEN_TEST_SETTINGS_H
#include <cstdint>
#include <string>
namespace mlperf {
/// \addtogroup LoadgenAPI
/// @{
/// \addtogroup LoadgenAPITestSettings Test Settings
/// \brief This page contains a description of all the scenarios, modes,
/// and log settings as implemented by the LoadGen.
/// @{
///
/// \enum TestScenario
/// * **SingleStream**
/// + Issues queries containing a single sample.
/// + The next query is only issued once the previous one has completed.
/// + Internal LoadGen latency between queries is not included in the
/// latency results.
/// + **Final performance result is:** a percentile of the latency.
/// * **MultiStream**
/// + Issues queries containing N samples.
/// - N is specified by \link
/// mlperf::TestSettings::multi_stream_samples_per_query
/// multi_stream_samples_per_query \endlink.
/// + The next query is only issued once the previous one has completed.
/// + The samples of each query are guaranteed to be contiguous with respect
/// to the order they were loaded in the QuerySampleLibrary.
/// + Latency is tracked and reported on a per-query and per-sample basis.
/// + The latency of a query is the maximum latency of its samples, including
/// any cross-thread communication within the loadgen.
/// + Internal LoadGen latency between queries is not included in the
/// latency results.
/// + **Final performance result is:** a percentile of the query latency.
/// * **Server**
/// + Sends queries with a single sample.
/// + Queries have a random poisson (non-uniform) arrival rate that, when
/// averaged, hits the target QPS.
/// + There is no limit on the number of outstanding queries, as long as
/// the latency constraints are met.
/// + **Final performance result is:** PASS if the a percentile of the latency
/// is under a given threshold. FAIL otherwise.
/// - Threshold is specified by \link
/// mlperf::TestSettings::server_target_latency_ns server_target_latency_ns
/// \endlink.
/// * **Offline**
/// + Sends all N samples to the SUT inside of a single query.
/// + The samples of the query are guaranteed to be contiguous with respect
/// to the order they were loaded in the QuerySampleLibrary.
/// + **Final performance result is:** samples per second.
///
enum class TestScenario {
SingleStream,
MultiStream,
Server,
Offline,
};
///
/// \enum TestMode
/// * **SubmissionRun**
/// + Runs accuracy mode followed by performance mode.
/// + TODO: Implement further requirements as decided by MLPerf.
/// * **AccuracyOnly**
/// + Runs each sample from the QSL through the SUT a least once.
/// + Outputs responses to an accuracy json that can be parsed by a model +
/// sample library specific script.
/// * **PerformanceOnly**
/// + Runs the performance traffic for the given scenario, as described in
/// the comments for TestScenario.
/// * **FindPeakPerformance**
/// + Determines the maximumum QPS for the Server scenario.
/// + Not applicable for SingleStream, MultiStream or Offline scenarios.
///
enum class TestMode {
SubmissionRun,
AccuracyOnly,
PerformanceOnly,
FindPeakPerformance,
};
///
/// \brief Top-level struct specifing the modes and parameters of the test.
///
struct TestSettings {
TestScenario scenario = TestScenario::SingleStream;
TestMode mode = TestMode::PerformanceOnly;
// ==================================
/// \name SingleStream-specific
/**@{*/
/// \brief A hint used by the loadgen to pre-generate enough samples to
/// meet the minimum test duration.
double single_stream_expected_latency_ns = 1000000;
/// \brief The latency percentile reported as the final result.
double single_stream_target_latency_percentile = 0.90;
/**@}*/
// ==================================
/// \name MultiStream-specific
/**@{*/
/// \brief A hint used by the loadgen to pre-generate enough samples to
/// meet the minimum test duration.
/// \brief MultiStream latency is for query (not sample) latency
double multi_stream_expected_latency_ns = 8000000;
/// \brief The latency percentile for MultiStream mode.
double multi_stream_target_latency_percentile = 0.99;
/// \brief The number of samples in each query.
/// \details How many samples are bundled in a query
uint64_t multi_stream_samples_per_query = 8;
/**@}*/
// ==================================
/// \name Server-specific
/**@{*/
/// \brief The average QPS of the poisson distribution.
/// \details note: This field is used as a FindPeakPerformance's lower bound.
/// When you run FindPeakPerformanceMode, you should make sure that this value
/// satisfies performance constraints.
double server_target_qps = 1;
/// \brief The latency constraint for the Server scenario.
uint64_t server_target_latency_ns = 100000000;
/// \brief The latency percentile for server mode. This value is combined with
/// server_target_latency_ns to determine if a run is valid.
/// \details 99% is the default value, which is correct for image models. GNMT
/// should be set to 0.97 (97%) in v0.5.(As always, check the policy page for
/// updated values for the benchmark you are running.)
double server_target_latency_percentile = 0.99;
/// \brief If this flag is set to true, LoadGen will combine samples from
/// multiple queries into a single query if their scheduled issue times have
/// passed.
bool server_coalesce_queries = false;
/// \brief The decimal places of QPS precision used to terminate
/// FindPeakPerformance mode.
int server_find_peak_qps_decimals_of_precision = 1;
/// \brief A step size (as a fraction of the QPS) used to widen the lower and
/// upper bounds to find the initial boundaries of binary search.
double server_find_peak_qps_boundary_step_size = 1;
/// \brief The maximum number of outstanding queries to allow before earlying
/// out from a performance run. Useful for performance tuning and speeding up
/// the FindPeakPerformance mode.
uint64_t server_max_async_queries = 0; ///< 0: Infinity.
/// \brief The number of issue query threads that will be registered and used
/// to call SUT's IssueQuery(). If this is 0, the same thread calling
/// StartTest() will be used to call IssueQuery(). See also
/// mlperf::RegisterIssueQueryThread().
uint64_t server_num_issue_query_threads = 0;
/**@}*/
// ==================================
/// \name Offline-specific
/**@{*/
/// \brief Specifies the QPS the SUT expects to hit for the offline load.
/// The loadgen generates 10% more queries than it thinks it needs to meet
/// the minimum test duration.
double offline_expected_qps = 1;
/// \brief Affects the order in which the samples of the dataset are chosen.
/// If false it concatenates a single permutation of the dataset (or part
/// of it depending on QSL->PerformanceSampleCount()) several times up to the
/// number of samples requested.
/// If true it concatenates a multiple permutation of the dataset (or a
/// part of it depending on QSL->PerformanceSampleCount()) several times
/// up to the number of samples requested.
bool sample_concatenate_permutation = false;
/**@}*/
// ==================================
/// \name Test duration
/// The test runs until **both** min duration and min query count have been
/// met. However, it will exit before that point if **either** max duration or
/// max query count have been reached.
/**@{*/
uint64_t min_duration_ms = 10000;
uint64_t max_duration_ms = 0; ///< 0: Infinity.
uint64_t min_query_count = 100;
uint64_t max_query_count = 0; ///< 0: Infinity.
/**@}*/
// ==================================
/// \name Random number generation
/// There are 4 separate seeds, so each dimension can be changed
/// independently.
/**@{*/
/// \brief Affects which subset of samples from the QSL are chosen for
/// the performance sample set and accuracy sample sets.
uint64_t qsl_rng_seed = 0;
/// \brief Affects the order in which samples from the performance set will
/// be included in queries.
uint64_t sample_index_rng_seed = 0;
/// \brief Affects the poisson arrival process of the Server scenario.
/// \details Different seeds will appear to "jitter" the queries
/// differently in time, but should not affect the average issued QPS.
uint64_t schedule_rng_seed = 0;
/// \brief Affects which samples have their query returns logged to the
/// accuracy log in performance mode.
uint64_t accuracy_log_rng_seed = 0;
/// \brief Probability of the query response of a sample being logged to the
/// accuracy log in performance mode
double accuracy_log_probability = 0.0;
/// \brief Target number of samples that will have their results printed to
/// accuracy log in performance mode for compliance testing
uint64_t accuracy_log_sampling_target = 0;
/// \brief Variables for running test05 from native config. A boolean that
/// determines whether or not to run test05 and three random seed to run the
/// test
bool test05 = false;
uint64_t test05_qsl_rng_seed = 0;
uint64_t test05_sample_index_rng_seed = 0;
uint64_t test05_schedule_rng_seed = 0;
/// \brief Load mlperf parameter config from file.
int FromConfig(const std::string &path, const std::string &model,
const std::string &scenario, int conf_type = 1);
/**@}*/
// ==================================
/// \name Performance Sample modifiers
/// \details These settings can be used to Audit Performance mode runs.
/// In order to detect sample caching by SUT, performance of runs when only
/// unique queries (with non-repeated samples) are issued can be compared with
/// that when the same query is repeatedly issued.
/**@{*/
/// \brief Prints measurement interval start and stop timestamps to std::cout
/// for the purpose of comparison against an external timer
bool print_timestamps = false;
/// \brief Allows issuing only unique queries in Performance mode of any
/// scenario \details This can be used to send non-repeat & hence unique
/// samples to SUT
bool performance_issue_unique = false;
/// \brief If true, the same query is chosen repeatedley for Inference.
/// In offline scenario, the query is filled with the same sample.
bool performance_issue_same = false;
/// \brief Offset to control which sample is repeated in
/// performance_issue_same mode.
/// Value should be within [0, performance_sample_count)
uint64_t performance_issue_same_index = 0;
/// \brief Overrides QSL->PerformanceSampleCount() when non-zero
uint64_t performance_sample_count_override = 0;
/// \brief Measure token latencies
bool use_token_latencies = false;
/// Token latency parameters
uint64_t server_ttft_latency = 100000000;
uint64_t server_tpot_latency = 100000000;
/// \brief Infer token latencies
bool infer_token_latencies = false;
uint64_t token_latency_scaling_factor;
/**@}*/
};
///
/// \enum LoggingMode
/// Specifies how and when logging should be sampled and stringified at
/// runtime.
/// * **AsyncPoll**
/// + Logs are serialized and output on an IOThread that polls for new logs at
/// a fixed interval. This is the only mode currently implemented.
/// * **EndOfTestOnly**
/// + TODO: Logs are serialzied and output only at the end of the test.
/// * **Synchronous**
/// + TODO: Logs are serialized and output inline.
enum class LoggingMode {
AsyncPoll,
EndOfTestOnly,
Synchronous,
};
///
/// \brief Specifies where log outputs should go.
///
/// By default, the loadgen outputs its log files to outdir and
/// modifies the filenames of its logs with a prefix and suffix.
/// Filenames will take the form:
/// "<outdir>/<datetime><prefix>summary<suffix>.txt"
///
/// Affordances for outputing logs to stdout are also provided.
///
struct LogOutputSettings {
std::string outdir = ".";
std::string prefix = "mlperf_log_";
std::string suffix = "";
bool prefix_with_datetime = false;
bool copy_detail_to_stdout = false;
bool copy_summary_to_stdout = false;
};
///
/// \brief Top-level log settings.
///
struct LogSettings {
LogOutputSettings log_output;
LoggingMode log_mode = LoggingMode::AsyncPoll;
uint64_t log_mode_async_poll_interval_ms = 1000; ///< TODO: Implement this.
bool enable_trace = true;
};
/// @}
/// @}
} // namespace mlperf
#endif // MLPERF_LOADGEN_TEST_SETTINGS_H
/* Copyright 2019 The MLPerf Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "test_settings_internal.h"
#include <fstream>
#include <map>
#include <sstream>
#include <string>
#include "logging.h"
#include "mlperf_conf.h"
#include "utils.h"
namespace mlperf {
namespace loadgen {
TestSettingsInternal::TestSettingsInternal(
const TestSettings &requested_settings, size_t qsl_performance_sample_count)
: requested(requested_settings),
scenario(requested.scenario),
mode(requested.mode),
samples_per_query(1),
target_qps(1),
max_async_queries(0),
target_duration(std::chrono::milliseconds(requested.min_duration_ms)),
min_duration(std::chrono::milliseconds(requested.min_duration_ms)),
max_duration(std::chrono::milliseconds(requested.max_duration_ms)),
min_query_count(requested.min_query_count),
max_query_count(requested.max_query_count),
min_sample_count(0),
qsl_rng_seed(requested.qsl_rng_seed),
sample_index_rng_seed(requested.sample_index_rng_seed),
schedule_rng_seed(requested.schedule_rng_seed),
accuracy_log_rng_seed(requested.accuracy_log_rng_seed),
accuracy_log_probability(requested.accuracy_log_probability),
accuracy_log_sampling_target(requested.accuracy_log_sampling_target),
print_timestamps(requested.print_timestamps),
performance_issue_unique(requested.performance_issue_unique),
performance_issue_same(requested.performance_issue_same),
performance_issue_same_index(requested.performance_issue_same_index),
performance_sample_count(0),
sample_concatenate_permutation(false),
use_token_latencies(requested.use_token_latencies),
server_ttft_latency(requested.server_ttft_latency),
server_tpot_latency(requested.server_tpot_latency),
infer_token_latencies(requested.infer_token_latencies),
token_latency_scaling_factor(requested.token_latency_scaling_factor) {
// Target QPS, target latency, and max_async_queries.
switch (requested.scenario) {
case TestScenario::SingleStream:
target_qps = static_cast<double>(std::nano::den) /
requested.single_stream_expected_latency_ns;
max_async_queries = 1;
target_latency_percentile =
requested.single_stream_target_latency_percentile;
break;
case TestScenario::MultiStream:
target_qps = static_cast<double>(std::nano::den) /
requested.multi_stream_expected_latency_ns;
max_async_queries = 1;
target_latency_percentile =
requested.multi_stream_target_latency_percentile;
break;
case TestScenario::Server:
if (requested.server_target_qps >= 0.0) {
target_qps = requested.server_target_qps;
} else {
LogDetail([server_target_qps = requested.server_target_qps,
target_qps = target_qps](AsyncDetail &detail) {
#if USE_NEW_LOGGING_FORMAT
std::stringstream ss;
ss << "Invalid value for server_target_qps requested."
<< " requested: " << server_target_qps << " using: " << target_qps;
MLPERF_LOG_ERROR(detail, "error_invalid_test_settings", ss.str());
#else
detail.Error("Invalid value for server_target_qps requested.",
"requested", server_target_qps, "using", target_qps);
#endif
});
}
target_latency =
std::chrono::nanoseconds(requested.server_target_latency_ns);
target_latency_percentile = requested.server_target_latency_percentile;
max_async_queries = requested.server_max_async_queries;
break;
case TestScenario::Offline:
// target_latency_percentile is not used in Offline, but set it to
// 0.99 anyway to avoid garbage value.
target_latency_percentile = 0.99;
if (requested.offline_expected_qps >= 0.0) {
target_qps = requested.offline_expected_qps;
} else {
LogDetail([offline_expected_qps = requested.offline_expected_qps,
target_qps = target_qps](AsyncDetail &detail) {
#if USE_NEW_LOGGING_FORMAT
std::stringstream ss;
ss << "Invalid value for offline_expected_qps requested."
<< " requested: " << offline_expected_qps
<< " using: " << target_qps;
MLPERF_LOG_ERROR(detail, "error_invalid_test_settings", ss.str());
#else
detail.Error("Invalid value for offline_expected_qps requested.",
"requested", offline_expected_qps, "using", target_qps);
#endif
});
}
max_async_queries = 1;
break;
}
// Performance Sample Count: TestSettings override QSL ->
// PerformanceSampleCount
performance_sample_count = (requested.performance_sample_count_override == 0)
? qsl_performance_sample_count
: requested.performance_sample_count_override;
// Sample by concatentating several permutations of the dataset
// sample_concatenate_permutation
sample_concatenate_permutation =
(requested.sample_concatenate_permutation == 0)
? false
: requested.sample_concatenate_permutation;
// Samples per query.
if (requested.scenario == TestScenario::MultiStream) {
samples_per_query = requested.multi_stream_samples_per_query;
}
// In the offline scenario, coalesce all queries into a single query.
if (requested.scenario == TestScenario::Offline) {
// TODO: Should the spec require a max duration for large query counts?
// kSlack is used to make sure we generate enough samples for the SUT
// to take longer than than the minimum test duration required by the
// MLPerf spec.
constexpr double kSlack = 1.1;
uint64_t target_sample_count =
kSlack * DurationToSeconds(target_duration) * target_qps;
samples_per_query =
(requested.performance_issue_unique)
? performance_sample_count
: std::max<uint64_t>(min_query_count, target_sample_count);
min_query_count = 1;
target_duration = std::chrono::milliseconds(0);
}
// FIXME: Only do this for 3D-UNet SingleStream, for v2.0
// TODO: consolidate after v2.0
// make min_queries to be multiple of performance_sample_count
// performance_sample_count == 0 makes it to be equal to loaded_samples.size()
if (sample_concatenate_permutation &&
requested.scenario == TestScenario::SingleStream) {
// set slack larger for 3D-UNet KiTS19 distribution, i.e. 50% latency << 90%
// latency
constexpr double kSlack = 2.0;
uint64_t expected_queries =
kSlack * DurationToSeconds(target_duration) * target_qps;
min_query_count =
min_query_count > expected_queries ? min_query_count : expected_queries;
min_query_count += qsl_performance_sample_count -
(min_query_count % qsl_performance_sample_count);
}
min_sample_count = min_query_count * samples_per_query;
// Validate TestSettings
if (requested.performance_issue_same &&
(requested.performance_issue_same_index >= performance_sample_count)) {
LogDetail([performance_issue_same_index =
requested.performance_issue_same_index,
performance_sample_count =
performance_sample_count](AsyncDetail &detail) {
#if USE_NEW_LOGGING_FORMAT
std::stringstream ss;
ss << "Sample Idx to be repeated in performance_issue_same mode"
<< " cannot be greater than loaded performance_sample_count."
<< " performance_issue_same_index: " << performance_issue_same_index
<< " performance_sample_count: " << performance_sample_count;
MLPERF_LOG_ERROR(detail, "error_invalid_test_settings", ss.str());
#else
detail.Error(
"Sample Idx to be repeated in performance_issue_same mode"
" cannot be greater than loaded performance_sample_count.",
"performance_issue_same_index", performance_issue_same_index,
"performance_sample_count", performance_sample_count);
#endif
});
}
if (requested.performance_issue_unique && requested.performance_issue_same) {
LogDetail([performance_issue_unique = requested.performance_issue_unique,
performance_issue_same =
requested.performance_issue_same](AsyncDetail &detail) {
#if USE_NEW_LOGGING_FORMAT
std::stringstream ss;
ss << "Performance_issue_unique and performance_issue_same, both"
<< " cannot be true at the same time."
<< " performance_issue_unique: " << performance_issue_unique
<< " performance_issue_same: " << performance_issue_same;
MLPERF_LOG_ERROR(detail, "error_invalid_test_settings", ss.str());
#else
detail.Error(
"Performance_issue_unique and performance_issue_same, both"
" cannot be true at the same time.",
"performance_issue_unique", performance_issue_unique,
"performance_issue_same", performance_issue_same);
#endif
});
}
}
std::string ToString(TestScenario scenario) {
switch (scenario) {
#if USE_NEW_LOGGING_FORMAT
case TestScenario::SingleStream:
return "SingleStream";
case TestScenario::MultiStream:
return "MultiStream";
#else
case TestScenario::SingleStream:
return "Single Stream";
case TestScenario::MultiStream:
return "Multi Stream";
#endif
case TestScenario::Server:
return "Server";
case TestScenario::Offline:
return "Offline";
}
assert(false);
return "InvalidScenario";
}
std::string ToString(TestMode mode) {
switch (mode) {
#if USE_NEW_LOGGING_FORMAT
case TestMode::SubmissionRun:
return "SubmissionRun";
case TestMode::AccuracyOnly:
return "AccuracyOnly";
case TestMode::PerformanceOnly:
return "PerformanceOnly";
case TestMode::FindPeakPerformance:
return "FindPeakPerformance";
#else
case TestMode::SubmissionRun:
return "Submission";
case TestMode::AccuracyOnly:
return "Accuracy";
case TestMode::PerformanceOnly:
return "Performance";
case TestMode::FindPeakPerformance:
return "Find Peak Performance";
#endif
}
assert(false);
return "InvalidMode";
}
void LogRequestedTestSettings(const TestSettings &s) {
LogDetail([s](AsyncDetail &detail) {
#if USE_NEW_LOGGING_FORMAT
MLPERF_LOG(detail, "requested_scenario", ToString(s.scenario));
MLPERF_LOG(detail, "requested_test_mode", ToString(s.mode));
// Scenario-specific
switch (s.scenario) {
case TestScenario::SingleStream:
MLPERF_LOG(detail, "requested_single_stream_expected_latency_ns",
s.single_stream_expected_latency_ns);
MLPERF_LOG(detail, "requested_single_stream_target_latency_percentile",
s.single_stream_target_latency_percentile);
break;
case TestScenario::MultiStream:
MLPERF_LOG(detail, "requested_multi_stream_expected_latency_ns",
s.multi_stream_expected_latency_ns);
MLPERF_LOG(detail, "requested_multi_stream_target_latency_percentile",
s.multi_stream_target_latency_percentile);
MLPERF_LOG(detail, "requested_multi_stream_samples_per_query",
s.multi_stream_samples_per_query);
break;
case TestScenario::Server:
MLPERF_LOG(detail, "requested_server_target_qps", s.server_target_qps);
MLPERF_LOG(detail, "requested_server_target_latency_ns",
s.server_target_latency_ns);
MLPERF_LOG(detail, "requested_server_target_latency_percentile",
s.server_target_latency_percentile);
MLPERF_LOG(detail, "requested_server_coalesce_queries",
s.server_coalesce_queries);
MLPERF_LOG(detail,
"requested_server_find_peak_qps_decimals_of_precision",
s.server_find_peak_qps_decimals_of_precision);
MLPERF_LOG(detail, "requested_server_find_peak_qps_boundary_step_size",
s.server_find_peak_qps_boundary_step_size);
MLPERF_LOG(detail, "requested_server_max_async_queries",
s.server_max_async_queries);
MLPERF_LOG(detail, "requested_server_num_issue_query_threads",
s.server_num_issue_query_threads);
break;
case TestScenario::Offline:
MLPERF_LOG(detail, "requested_offline_expected_qps",
s.offline_expected_qps);
break;
}
// Overrides
MLPERF_LOG(detail, "requested_min_duration_ms", s.min_duration_ms);
MLPERF_LOG(detail, "requested_max_duration_ms", s.max_duration_ms);
MLPERF_LOG(detail, "requested_min_query_count", s.min_query_count);
MLPERF_LOG(detail, "requested_max_query_count", s.max_query_count);
MLPERF_LOG(detail, "requested_qsl_rng_seed", s.qsl_rng_seed);
MLPERF_LOG(detail, "requested_sample_index_rng_seed",
s.sample_index_rng_seed);
MLPERF_LOG(detail, "requested_schedule_rng_seed", s.schedule_rng_seed);
MLPERF_LOG(detail, "requested_accuracy_log_rng_seed",
s.accuracy_log_rng_seed);
MLPERF_LOG(detail, "requested_accuracy_log_probability",
s.accuracy_log_probability);
MLPERF_LOG(detail, "requested_accuracy_log_sampling_target",
s.accuracy_log_sampling_target);
MLPERF_LOG(detail, "requested_print_timestamps", s.print_timestamps);
MLPERF_LOG(detail, "requested_performance_issue_unique",
s.performance_issue_unique);
MLPERF_LOG(detail, "requested_performance_issue_same",
s.performance_issue_same);
MLPERF_LOG(detail, "requested_performance_issue_same_index",
s.performance_issue_same_index);
MLPERF_LOG(detail, "requested_performance_sample_count_override",
s.performance_sample_count_override);
MLPERF_LOG(detail, "requested_sample_concatenate_permutation",
s.sample_concatenate_permutation);
// Token latencies specific values
if (s.use_token_latencies) {
MLPERF_LOG(detail, "requested_use_token_latencies",
s.use_token_latencies);
if (s.scenario != TestScenario::Offline) {
MLPERF_LOG(detail, "requested_server_ttft_latency",
s.server_ttft_latency);
MLPERF_LOG(detail, "requested_server_tpot_latency",
s.server_tpot_latency);
}
}
#else
detail("");
detail("Requested Settings:");
detail("Scenario : " + ToString(s.scenario));
detail("Test mode : " + ToString(s.mode));
// Scenario-specific
switch (s.scenario) {
case TestScenario::SingleStream:
detail("single_stream_expected_latency_ns : ",
s.single_stream_expected_latency_ns);
detail("single_stream_target_latency_percentile : ",
s.single_stream_target_latency_percentile);
break;
case TestScenario::MultiStream:
detail("multi_stream_expected_latency_ns : ",
s.multi_stream_expected_latency_ns);
detail("multi_stream_target_latency_percentile : ",
s.multi_stream_target_latency_percentile);
detail("multi_stream_samples_per_query : ",
s.multi_stream_samples_per_query);
break;
case TestScenario::Server:
detail("server_target_qps : ", s.server_target_qps);
detail("server_target_latency_ns : ", s.server_target_latency_ns);
detail("server_target_latency_percentile : ",
s.server_target_latency_percentile);
detail("server_coalesce_queries : ", s.server_coalesce_queries);
detail("server_find_peak_qps_decimals_of_precision : ",
s.server_find_peak_qps_decimals_of_precision);
detail("server_find_peak_qps_boundary_step_size : ",
s.server_find_peak_qps_boundary_step_size);
detail("server_max_async_queries : ", s.server_max_async_queries);
detail("server_num_issue_query_threads : ",
s.server_num_issue_query_threads);
break;
case TestScenario::Offline:
detail("offline_expected_qps : ", s.offline_expected_qps);
break;
}
// Overrides
detail("min_duration_ms : ", s.min_duration_ms);
detail("max_duration_ms : ", s.max_duration_ms);
detail("min_query_count : ", s.min_query_count);
detail("max_query_count : ", s.max_query_count);
detail("qsl_rng_seed : ", s.qsl_rng_seed);
detail("sample_index_rng_seed : ", s.sample_index_rng_seed);
detail("schedule_rng_seed : ", s.schedule_rng_seed);
detail("accuracy_log_rng_seed : ", s.accuracy_log_rng_seed);
detail("accuracy_log_probability : ", s.accuracy_log_probability);
detail("accuracy_log_sampling_target : ", s.accuracy_log_sampling_target);
detail("print_timestamps : ", s.print_timestamps);
detail("performance_issue_unique : ", s.performance_issue_unique);
detail("performance_issue_same : ", s.performance_issue_same);
detail("performance_issue_same_index : ", s.performance_issue_same_index);
detail("performance_sample_count_override : ",
s.performance_sample_count_override);
detail("");
#endif
});
}
void TestSettingsInternal::LogEffectiveSettings() const {
LogDetail([s = *this](AsyncDetail &detail) {
#if USE_NEW_LOGGING_FORMAT
MLPERF_LOG(detail, "effective_scenario", ToString(s.scenario));
MLPERF_LOG(detail, "effective_test_mode", ToString(s.mode));
MLPERF_LOG(detail, "effective_samples_per_query", s.samples_per_query);
MLPERF_LOG(detail, "effective_target_qps", s.target_qps);
MLPERF_LOG(detail, "effective_target_latency_ns", s.target_latency.count());
MLPERF_LOG(detail, "effective_target_latency_percentile",
s.target_latency_percentile);
MLPERF_LOG(detail, "effective_max_async_queries", s.max_async_queries);
MLPERF_LOG(detail, "effective_target_duration_ms",
s.target_duration.count());
MLPERF_LOG(detail, "effective_min_duration_ms", s.min_duration.count());
MLPERF_LOG(detail, "effective_max_duration_ms", s.max_duration.count());
MLPERF_LOG(detail, "effective_min_query_count", s.min_query_count);
MLPERF_LOG(detail, "effective_max_query_count", s.max_query_count);
MLPERF_LOG(detail, "effective_min_sample_count", s.min_sample_count);
MLPERF_LOG(detail, "effective_qsl_rng_seed", s.qsl_rng_seed);
MLPERF_LOG(detail, "effective_sample_index_rng_seed",
s.sample_index_rng_seed);
MLPERF_LOG(detail, "effective_schedule_rng_seed", s.schedule_rng_seed);
MLPERF_LOG(detail, "effective_accuracy_log_rng_seed",
s.accuracy_log_rng_seed);
MLPERF_LOG(detail, "effective_accuracy_log_probability",
s.accuracy_log_probability);
MLPERF_LOG(detail, "effective_accuracy_log_sampling_target",
s.accuracy_log_sampling_target);
MLPERF_LOG(detail, "effective_print_timestamps", s.print_timestamps);
MLPERF_LOG(detail, "effective_performance_issue_unique",
s.performance_issue_unique);
MLPERF_LOG(detail, "effective_performance_issue_same",
s.performance_issue_same);
MLPERF_LOG(detail, "effective_performance_issue_same_index",
s.performance_issue_same_index);
MLPERF_LOG(detail, "effective_performance_sample_count",
s.performance_sample_count);
MLPERF_LOG(detail, "effective_sample_concatenate_permutation",
s.sample_concatenate_permutation);
#else
detail("");
detail("Effective Settings:");
detail("Scenario : " + ToString(s.scenario));
detail("Test mode : " + ToString(s.mode));
detail("samples_per_query : ", s.samples_per_query);
detail("target_qps : ", s.target_qps);
detail("target_latency (ns): ", s.target_latency.count());
detail("target_latency_percentile : ", s.target_latency_percentile);
detail("max_async_queries : ", s.max_async_queries);
detail("target_duration (ms): ", s.target_duration.count());
detail("min_duration (ms): ", s.min_duration.count());
detail("max_duration (ms): ", s.max_duration.count());
detail("min_query_count : ", s.min_query_count);
detail("max_query_count : ", s.max_query_count);
detail("min_sample_count : ", s.min_sample_count);
detail("qsl_rng_seed : ", s.qsl_rng_seed);
detail("sample_index_rng_seed : ", s.sample_index_rng_seed);
detail("schedule_rng_seed : ", s.schedule_rng_seed);
detail("accuracy_log_rng_seed : ", s.accuracy_log_rng_seed);
detail("accuracy_log_probability : ", s.accuracy_log_probability);
detail("accuracy_log_sampling_target : ", s.accuracy_log_sampling_target);
detail("print_timestamps : ", s.print_timestamps);
detail("performance_issue_unique : ", s.performance_issue_unique);
detail("performance_issue_same : ", s.performance_issue_same);
detail("performance_issue_same_index : ", s.performance_issue_same_index);
detail("performance_sample_count : ", s.performance_sample_count);
#endif
});
}
void TestSettingsInternal::LogAllSettings() const {
LogRequestedTestSettings(requested);
LogEffectiveSettings();
}
void TestSettingsInternal::LogSummary(AsyncSummary &summary) const {
summary("samples_per_query : ", samples_per_query);
summary("target_qps : ", target_qps);
if (!use_token_latencies) {
summary("target_latency (ns): ", target_latency.count());
} else {
summary("ttft_latency (ns): ", server_ttft_latency);
summary("tpot_latency (ns): ", server_tpot_latency);
}
summary("max_async_queries : ", max_async_queries);
summary("min_duration (ms): ", min_duration.count());
summary("max_duration (ms): ", max_duration.count());
summary("min_query_count : ", min_query_count);
summary("max_query_count : ", max_query_count);
summary("qsl_rng_seed : ", qsl_rng_seed);
summary("sample_index_rng_seed : ", sample_index_rng_seed);
summary("schedule_rng_seed : ", schedule_rng_seed);
summary("accuracy_log_rng_seed : ", accuracy_log_rng_seed);
summary("accuracy_log_probability : ", accuracy_log_probability);
summary("accuracy_log_sampling_target : ", accuracy_log_sampling_target);
summary("print_timestamps : ", print_timestamps);
summary("performance_issue_unique : ", performance_issue_unique);
summary("performance_issue_same : ", performance_issue_same);
summary("performance_issue_same_index : ", performance_issue_same_index);
summary("performance_sample_count : ", performance_sample_count);
if (sample_concatenate_permutation) {
summary(
"WARNING: sample_concatenate_permutation was set to true. \n"
"Generated samples per query might be different as the one in the "
"setting.\n"
"Check the generated_samples_per_query line in the detailed log for "
"the real\n"
"samples_per_query value");
}
}
} // namespace loadgen
int TestSettings::FromConfig(const std::string &path, const std::string &model,
const std::string &scenario, int conf_type) {
std::map<std::string, std::string> kv;
static int configCount = 0;
if (conf_type == 1) {
if (configCount == 0) {
// Only allow userConf as the single configFile and loadgen loads the
// mlperfConf automatically for perf and accuracy runs
FromConfig("", model, scenario, 0);
}
else {
LogDetail([](AsyncDetail &detail) {
std::stringstream ss;
ss << "Multiple conf files are used. This is not valid for official "
"submission.";
MLPERF_LOG_ERROR(detail, "error_invalid_config", ss.str());
});
}
configCount++;
}
// lookup key/value pairs from config
auto lookupkv = [&](const std::string &model, const std::string &scenario,
const std::string &key, uint64_t *val_l, double *val_d,
double multiplier = 1.0) {
std::map<std::string, std::string>::iterator it;
std::string found;
// lookup exact key first
it = kv.find(model + "." + scenario + "." + key);
if (it != kv.end()) {
found = it->second;
} else {
// lookup key with model wildcard
it = kv.find("*." + scenario + "." + key);
if (it != kv.end()) {
found = it->second;
} else {
it = kv.find(model + ".*." + key);
if (it != kv.end()) {
found = it->second;
} else {
it = kv.find("*.*." + key);
if (it != kv.end()) {
found = it->second;
} else {
return false;
}
}
}
}
// if we get here, found will be set
if (val_l) {
*val_l = strtoull(found.c_str(), nullptr, 0) *
static_cast<uint64_t>(multiplier);
}
if (val_d) *val_d = strtod(found.c_str(), nullptr) * multiplier;
return true;
};
int line_nr = 0;
int errors = 0;
// Declare the input stream before the if-else block
std::unique_ptr<std::istream> fss;
std::string line;
if (conf_type != 0) {
// dirt simple config parser
fss = std::make_unique<std::ifstream>(path);
if (!static_cast<std::ifstream *>(fss.get())->is_open()) {
LogDetail([p = path](AsyncDetail &detail) {
#if USE_NEW_LOGGING_FORMAT
std::stringstream ss;
ss << "can't open file " << p;
MLPERF_LOG_ERROR(detail, "error_invalid_config", ss.str());
#else
detail.Error("can't open file ", p);
#endif
});
return -ENOENT;
}
} else {
// Convert unsigned char array to std::string
std::string config_str(mlperf_conf);
fss = std::make_unique<std::istringstream>(config_str);
}
while (std::getline(*fss, line)) {
line_nr++;
std::istringstream iss(line);
std::string s, k;
int looking_for = 0; // 0=key, 1=equal, 2=value
while (iss >> s) {
if (s == "#" && looking_for != 2) {
// done with this line
break;
}
if (looking_for == 2) {
// got key and value
const char *start = s.c_str();
char *stop;
(void)strtoul(start, &stop, 0);
if (start + s.size() == stop) {
kv[k] = s;
continue;
}
(void)strtod(start, &stop);
if (start + s.size() == stop) {
kv[k] = s;
continue;
}
errors++;
LogDetail([l = line_nr](AsyncDetail &detail) {
#if USE_NEW_LOGGING_FORMAT
std::stringstream ss;
ss << "value needs to be integer or double, line=" << l;
MLPERF_LOG_ERROR(detail, "error_invalid_config", ss.str());
#else
detail.Error("value needs to be integer or double, line=", l);
#endif
});
break;
}
if (looking_for == 1 && s != "=") {
errors++;
LogDetail([l = line_nr](AsyncDetail &detail) {
#if USE_NEW_LOGGING_FORMAT
std::stringstream ss;
ss << "expected 'key=value', line=" << l;
MLPERF_LOG_ERROR(detail, "error_invalid_config", ss.str());
#else
detail.Error("expected 'key=value', line=", l);
#endif
});
break;
}
if (looking_for == 0) k = s;
looking_for++;
}
}
if (errors != 0) return -EINVAL;
uint64_t val;
// keys that apply to all scenarios
if (lookupkv(model, scenario, "mode", &val, nullptr)) {
switch (val) {
case 0:
mode = TestMode::SubmissionRun;
break;
case 1:
mode = TestMode::AccuracyOnly;
break;
case 2:
mode = TestMode::PerformanceOnly;
break;
case 3:
mode = TestMode::FindPeakPerformance;
break;
default:
LogDetail([](AsyncDetail &detail) {
#if USE_NEW_LOGGING_FORMAT
std::stringstream ss;
ss << "Invalid value passed to Mode key in config.";
MLPERF_LOG_ERROR(detail, "error_invalid_config", ss.str());
#else
detail.Error("Invalid value passed to Mode key in config.");
#endif
});
break;
}
}
if (conf_type == 0) {
lookupkv(model, scenario, "qsl_rng_seed", &qsl_rng_seed, nullptr);
lookupkv(model, scenario, "sample_index_rng_seed", &sample_index_rng_seed,
nullptr);
lookupkv(model, scenario, "schedule_rng_seed", &schedule_rng_seed, nullptr);
lookupkv(model, scenario, "accuracy_log_probability", nullptr,
&accuracy_log_probability, 0.01);
if (lookupkv(model, scenario, "test05", &val, nullptr))
test05 = (val == 1) ? true : false;
lookupkv(model, scenario, "test05_qsl_rng_seed", &test05_qsl_rng_seed,
nullptr);
lookupkv(model, scenario, "test05_sample_index_rng_seed",
&test05_sample_index_rng_seed, nullptr);
lookupkv(model, scenario, "test05_schedule_rng_seed",
&test05_schedule_rng_seed, nullptr);
}
// keys that can be overriden in user.conf but will make the results eligible
// only for open submissions
// keys to measure token metrics
if (lookupkv(model, scenario, "use_token_latencies", &val, nullptr)) {
use_token_latencies = (val == 1) ? true : false;
}
if (use_token_latencies) {
lookupkv(model, "Server", "ttft_latency", &server_ttft_latency, nullptr,
1000 * 1000);
lookupkv(model, "Server", "tpot_latency", &server_tpot_latency, nullptr,
1000 * 1000);
}
// keys to infer token metrics
if (lookupkv(model, scenario, "infer_token_latencies", &val, nullptr)) {
infer_token_latencies = (val == 1) ? true : false;
}
if (infer_token_latencies) {
lookupkv(model, scenario, "token_latency_scaling_factor",
&token_latency_scaling_factor, nullptr, 1);
}
// keys that apply to SingleStream
lookupkv(model, "SingleStream", "target_latency_percentile", nullptr,
&single_stream_target_latency_percentile, 0.01);
// keys that apply to MultiStream
lookupkv(model, "MultiStream", "target_latency_percentile", nullptr,
&multi_stream_target_latency_percentile, 0.01);
lookupkv(model, "MultiStream", "samples_per_query",
&multi_stream_samples_per_query, nullptr, 1);
// keys that apply to Server
lookupkv(model, "Server", "target_latency_percentile", nullptr,
&server_target_latency_percentile, 0.01);
lookupkv(model, "Server", "target_latency", &server_target_latency_ns,
nullptr, 1000 * 1000);
// keys that can be overriden in user.conf (the provided values still need to
// pass the submission checker rules)
if (lookupkv(model, scenario, "performance_issue_unique", &val, nullptr))
performance_issue_unique = (val == 0) ? false : true;
if (lookupkv(model, scenario, "performance_issue_same", &val, nullptr))
performance_issue_same = (val == 0) ? false : true;
lookupkv(model, scenario, "performance_issue_same_index",
&performance_issue_same_index, nullptr);
if (lookupkv(model, scenario, "sample_concatenate_permutation", &val,
nullptr))
sample_concatenate_permutation = (val == 1) ? true : false;
if (lookupkv(model, "Server", "coalesce_queries", &val, nullptr))
server_coalesce_queries = (val == 0) ? false : true;
if (lookupkv(model, "Server", "max_async_queries", &val, nullptr))
server_max_async_queries = int(val);
lookupkv(model, scenario, "min_duration", &min_duration_ms, nullptr);
lookupkv(model, scenario, "max_duration", &max_duration_ms, nullptr);
lookupkv(model, scenario, "min_query_count", &min_query_count, nullptr);
lookupkv(model, scenario, "max_query_count", &max_query_count, nullptr);
lookupkv(model, scenario, "performance_sample_count_override",
&performance_sample_count_override, nullptr);
lookupkv(model, "SingleStream", "target_latency", nullptr,
&single_stream_expected_latency_ns, 1000 * 1000);
lookupkv(model, "MultiStream", "target_latency", nullptr,
&multi_stream_expected_latency_ns, 1000 * 1000);
lookupkv(model, "Server", "target_qps", nullptr, &server_target_qps);
lookupkv(model, "Offline", "target_qps", 0, &offline_expected_qps);
if (lookupkv(model, scenario, "print_timestamps", &val, nullptr))
print_timestamps = (val == 0) ? false : true;
// keys that are used in audit.conf
lookupkv(model, scenario, "accuracy_log_rng_seed", &accuracy_log_rng_seed,
nullptr);
lookupkv(model, scenario, "accuracy_log_sampling_target",
&accuracy_log_sampling_target, nullptr);
return 0;
}
} // namespace mlperf
/* Copyright 2019 The MLPerf Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
/// \file
/// \brief The internal representation of user-provided settings.
#ifndef MLPERF_LOADGEN_TEST_SETTINGS_INTERNAL_H
#define MLPERF_LOADGEN_TEST_SETTINGS_INTERNAL_H
#include <chrono>
#include <cmath>
#include <string>
#include "logging.h"
#include "test_settings.h"
namespace mlperf {
namespace logging {
class AsyncSummary;
}
namespace loadgen {
using AsyncSummary = logging::AsyncSummary;
std::string ToString(TestScenario scenario);
std::string ToString(TestMode mode);
/// \brief takes the user-friendly TestSettings and normalizes it
/// for consumption by the loadgen.
/// \details It does things like remove scenario-specific naming and introduce
/// the concept of target_duration used to pre-generate queries.
struct TestSettingsInternal {
explicit TestSettingsInternal(const TestSettings &requested_settings,
size_t qsl_performance_sample_count);
void LogEffectiveSettings() const;
void LogAllSettings() const;
void LogSummary(AsyncSummary &summary) const;
const TestSettings requested;
const TestScenario scenario; // Copied here for convenience.
const TestMode mode; // Copied here for convenience.
uint64_t samples_per_query;
double target_qps;
std::chrono::nanoseconds target_latency{0};
double target_latency_percentile; // Single, multistream, and server modes.
uint64_t max_async_queries;
// Target duration is used to generate queries of a minimum duration before
// the test run.
std::chrono::milliseconds target_duration{0};
// Min duration/query_count/sample_count are used to validate the test
// duration at the end of the run.
std::chrono::milliseconds min_duration{0};
std::chrono::milliseconds max_duration{0};
uint64_t min_query_count;
uint64_t max_query_count;
uint64_t min_sample_count; // Offline only.
uint64_t qsl_rng_seed;
uint64_t sample_index_rng_seed;
uint64_t schedule_rng_seed;
uint64_t accuracy_log_rng_seed;
double accuracy_log_probability;
uint64_t accuracy_log_sampling_target;
bool print_timestamps;
bool performance_issue_unique;
bool performance_issue_same;
uint64_t performance_issue_same_index;
uint64_t performance_sample_count;
bool sample_concatenate_permutation;
bool use_token_latencies = false;
int64_t server_ttft_latency;
int64_t server_tpot_latency;
bool infer_token_latencies = false;
int64_t token_latency_scaling_factor;
};
/// \brief A namespace of collections of FindPeakPerformance helper functions,
/// mainly about binary search.
namespace find_peak_performance {
constexpr char const *kNotSupportedMsg =
"Finding peak performance is only supported in Server scenarios.";
template <TestScenario scenario>
TestSettingsInternal MidOfBoundaries(
const TestSettingsInternal &lower_bound_settings,
const TestSettingsInternal &upper_bound_settings) {
TestSettingsInternal mid_settings = lower_bound_settings;
if (scenario == TestScenario::Server) {
assert(lower_bound_settings.target_qps < upper_bound_settings.target_qps);
mid_settings.target_qps =
lower_bound_settings.target_qps +
(upper_bound_settings.target_qps - lower_bound_settings.target_qps) / 2;
} else {
LogDetail([](AsyncDetail &detail) {
#if USE_NEW_LOGGING_FORMAT
MLPERF_LOG_ERROR(detail, "error_invalid_test_settings", kNotSupportedMsg);
#else
detail(kNotSupportedMsg);
#endif
});
}
return mid_settings;
}
template <TestScenario scenario>
bool IsFinished(const TestSettingsInternal &lower_bound_settings,
const TestSettingsInternal &upper_bound_settings) {
if (scenario == TestScenario::Server) {
uint8_t precision = lower_bound_settings.requested
.server_find_peak_qps_decimals_of_precision;
double l =
std::floor(lower_bound_settings.target_qps * std::pow(10, precision));
double u =
std::floor(upper_bound_settings.target_qps * std::pow(10, precision));
return l + 1 >= u;
} else {
LogDetail([](AsyncDetail &detail) {
#if USE_NEW_LOGGING_FORMAT
MLPERF_LOG_ERROR(detail, "error_invalid_test_settings", kNotSupportedMsg);
#else
detail(kNotSupportedMsg);
#endif
});
return true;
}
}
template <TestScenario scenario>
std::string ToStringPerformanceField(const TestSettingsInternal &settings) {
if (scenario == TestScenario::Server) {
return std::to_string(settings.target_qps);
} else {
LogDetail([](AsyncDetail &detail) {
#if USE_NEW_LOGGING_FORMAT
MLPERF_LOG_ERROR(detail, "error_invalid_test_settings", kNotSupportedMsg);
#else
detail(kNotSupportedMsg);
#endif
});
return ToString(settings.scenario);
}
}
template <TestScenario scenario>
void WidenPerformanceField(TestSettingsInternal *settings) {
if (scenario == TestScenario::Server) {
settings->target_qps =
settings->target_qps *
(1 + settings->requested.server_find_peak_qps_boundary_step_size);
} else {
LogDetail([](AsyncDetail &detail) {
#if USE_NEW_LOGGING_FORMAT
MLPERF_LOG_ERROR(detail, "error_invalid_test_settings", kNotSupportedMsg);
#else
detail(kNotSupportedMsg);
#endif
});
}
}
} // namespace find_peak_performance
} // namespace loadgen
} // namespace mlperf
#endif // MLPERF_LOADGEN_TEST_SETTINGS_INTERNAL_H
static_library("mlperf_loadgen_tests_loadgen_test_main") {
sources = [ "loadgen_test.h", "loadgen_test_main.cc" ]
configs += [ "//build/config/compiler:exceptions" ]
}
executable("mlperf_loadgen_perftests") {
sources = [ "perftests_null_sut.cc" ]
deps = [ "..:mlperf_loadgen" ]
}
executable("mlperf_loadgen_tests_basic") {
sources = [ "basic.cc" ]
deps = [ "..:mlperf_loadgen",
":mlperf_loadgen_tests_loadgen_test_main" ]
configs += [ "//build/config/compiler:exceptions" ]
}
source_set("mlperf_loadgen_perftests_py") {
sources = [ "perftests_null_sut.py" ]
deps = [ "../..:loadgen_pymodule_wheel_lib" ]
}
source_set("docs") {
sources = [ "README.md" ]
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment