Commit c68e1835 authored by lijian6's avatar lijian6
Browse files

Initial commit

parents
Pipeline #561 failed with stages
in 0 seconds
// Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include "data_loader.h"
#include "gmock/gmock.h"
namespace triton { namespace perfanalyzer {
/// Mock DataLoader class used for testing to allow JSON data to be read
/// from string, rather than file.
///
class NaggyMockDataLoader : public DataLoader {
public:
NaggyMockDataLoader() { SetupMocks(); }
NaggyMockDataLoader(size_t batch_size) : DataLoader(batch_size)
{
SetupMocks();
}
void SetupMocks()
{
ON_CALL(*this, GetTotalSteps(testing::_))
.WillByDefault([this](size_t stream_id) -> size_t {
return this->DataLoader::GetTotalSteps(stream_id);
});
ON_CALL(*this, ReadFile(testing::_, testing::_))
.WillByDefault(
[this](
const std::string& path,
std::vector<char>* contents) -> cb::Error {
return this->DataLoader::ReadFile(path, contents);
});
ON_CALL(*this, ReadTextFile(testing::_, testing::_))
.WillByDefault(
[this](
const std::string& path,
std::vector<std::string>* contents) -> cb::Error {
return this->DataLoader::ReadTextFile(path, contents);
});
}
MOCK_METHOD(size_t, GetTotalSteps, (size_t), (override));
MOCK_METHOD(cb::Error, ReadFile, (const std::string&, std::vector<char>*));
MOCK_METHOD(
cb::Error, ReadTextFile, (const std::string&, std::vector<std::string>*));
cb::Error ReadDataFromJSON(
const std::shared_ptr<ModelTensorMap>& inputs,
const std::shared_ptr<ModelTensorMap>& outputs,
const std::string& json_file) override
{
return ReadDataFromStr(json_file, inputs, outputs);
}
cb::Error ReadDataFromStr(
const std::string& str, const std::shared_ptr<ModelTensorMap>& inputs,
const std::shared_ptr<ModelTensorMap>& outputs)
{
rapidjson::Document d{};
const unsigned int parseFlags = rapidjson::kParseNanAndInfFlag;
d.Parse<parseFlags>(str.c_str());
return ParseData(d, inputs, outputs);
};
std::vector<size_t>& step_num_{DataLoader::step_num_};
size_t& data_stream_cnt_{DataLoader::data_stream_cnt_};
};
// Non-naggy version of Mock Data Loader (won't warn when using default gmock
// mocked function)
using MockDataLoader = testing::NiceMock<NaggyMockDataLoader>;
}} // namespace triton::perfanalyzer
// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include "gmock/gmock.h"
#include "infer_context.h"
namespace triton { namespace perfanalyzer {
class NaggyMockInferContext : public InferContext {
public:
NaggyMockInferContext()
{
ON_CALL(*this, SendRequest(testing::_, testing::_, testing::_))
.WillByDefault(
[this](
const uint64_t request_id, const bool delayed,
const uint64_t sequence_id) -> void {
this->InferContext::SendRequest(request_id, delayed, sequence_id);
});
}
MOCK_METHOD(
void, SendRequest, (const uint64_t, const bool, const uint64_t),
(override));
std::shared_ptr<SequenceManager>& sequence_manager_{
InferContext::sequence_manager_};
std::shared_ptr<DataLoader>& data_loader_{InferContext::data_loader_};
std::shared_ptr<IInferDataManager>& infer_data_manager_{
InferContext::infer_data_manager_};
std::shared_ptr<ThreadStat>& thread_stat_{InferContext::thread_stat_};
std::reference_wrapper<const bool>& execute_{InferContext::execute_};
bool& using_json_data_{InferContext::using_json_data_};
bool& async_{InferContext::async_};
bool& streaming_{InferContext::streaming_};
InferData& infer_data_{InferContext::infer_data_};
std::unique_ptr<cb::ClientBackend>& infer_backend_{
InferContext::infer_backend_};
std::function<void(cb::InferResult*)>& async_callback_func_{
InferContext::async_callback_func_};
};
using MockInferContext = testing::NiceMock<NaggyMockInferContext>;
}} // namespace triton::perfanalyzer
// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include "gmock/gmock.h"
#include "infer_data_manager.h"
#include "infer_data_manager_shm.h"
#include "mock_client_backend.h"
namespace triton { namespace perfanalyzer {
class MockInferDataManagerShm : public InferDataManagerShm {
public:
MockInferDataManagerShm(
const int32_t batch_size, const SharedMemoryType shared_memory_type,
const size_t output_shm_size, const std::shared_ptr<ModelParser>& parser,
const std::shared_ptr<cb::ClientBackendFactory>& factory,
const std::shared_ptr<DataLoader>& data_loader)
: InferDataManagerShm(
batch_size, shared_memory_type, output_shm_size, parser, factory,
data_loader)
{
}
// Mocked version of the CopySharedMemory method in loadmanager.
// Tracks the mapping of shared memory label to data
//
cb::Error CopySharedMemory(
uint8_t* input_shm_ptr, const std::vector<TensorData>& input_datas,
bool is_shape_tensor, std::string& region_name) override
{
std::vector<int32_t> vals;
for (size_t i = 0; i < input_datas.size(); i++) {
int32_t val = *reinterpret_cast<const int32_t*>(input_datas[i].data_ptr);
vals.push_back(val);
}
mocked_shared_memory_regions.insert(std::make_pair(region_name, vals));
return cb::Error::Success;
}
cb::Error CreateInferInput(
cb::InferInput** infer_input, const cb::BackendKind kind,
const std::string& name, const std::vector<int64_t>& dims,
const std::string& datatype) override
{
*infer_input = new cb::MockInferInput(kind, name, dims, datatype);
return cb::Error::Success;
}
// Tracks the mapping of shared memory label to data
std::map<std::string, std::vector<int32_t>> mocked_shared_memory_regions;
};
class MockInferDataManager : public InferDataManager {
public:
MockInferDataManager() { SetupMocks(); }
MockInferDataManager(
const size_t max_threads, const int32_t batch_size,
const std::shared_ptr<ModelParser>& parser,
const std::shared_ptr<cb::ClientBackendFactory>& factory,
const std::shared_ptr<DataLoader>& data_loader)
: InferDataManager(max_threads, batch_size, parser, factory, data_loader)
{
SetupMocks();
}
void SetupMocks()
{
ON_CALL(
*this, UpdateInferData(testing::_, testing::_, testing::_, testing::_))
.WillByDefault(
[this](
size_t thread_id, int stream_index, int step_index,
InferData& infer_data) -> cb::Error {
return this->InferDataManager::UpdateInferData(
thread_id, stream_index, step_index, infer_data);
});
}
MOCK_METHOD(
cb::Error, UpdateInferData, (size_t, int, int, InferData&), (override));
cb::Error CreateInferInput(
cb::InferInput** infer_input, const cb::BackendKind kind,
const std::string& name, const std::vector<int64_t>& dims,
const std::string& datatype) override
{
*infer_input = new cb::MockInferInput(kind, name, dims, datatype);
return cb::Error::Success;
}
};
class MockInferDataManagerFactory {
public:
static std::shared_ptr<IInferDataManager> CreateMockInferDataManager(
const size_t max_threads, const int32_t batch_size,
const SharedMemoryType shared_memory_type, const size_t output_shm_size,
const std::shared_ptr<ModelParser>& parser,
const std::shared_ptr<cb::ClientBackendFactory>& factory,
const std::shared_ptr<DataLoader>& data_loader)
{
if (shared_memory_type == SharedMemoryType::NO_SHARED_MEMORY) {
return std::make_shared<testing::NiceMock<MockInferDataManager>>(
max_threads, batch_size, parser, factory, data_loader);
} else {
return std::make_shared<testing::NiceMock<MockInferDataManagerShm>>(
batch_size, shared_memory_type, output_shm_size, parser, factory,
data_loader);
}
}
};
}} // namespace triton::perfanalyzer
// Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include "gmock/gmock.h"
#include "inference_profiler.h"
namespace triton { namespace perfanalyzer {
class NaggyMockInferenceProfiler : public InferenceProfiler {
public:
NaggyMockInferenceProfiler()
{
ON_CALL(
*this, ValidLatencyMeasurement(
testing::_, testing::_, testing::_, testing::_, testing::_,
testing::_))
.WillByDefault(
[this](
const std::pair<uint64_t, uint64_t>& valid_range,
size_t& valid_sequence_count, size_t& delayed_request_count,
std::vector<uint64_t>* latencies, size_t& response_count,
std::vector<RequestRecord>& valid_requests) -> void {
this->InferenceProfiler::ValidLatencyMeasurement(
valid_range, valid_sequence_count, delayed_request_count,
latencies, response_count, valid_requests);
});
ON_CALL(*this, SummarizeLatency(testing::_, testing::_))
.WillByDefault(
[this](
const std::vector<uint64_t>& latencies,
PerfStatus& summary) -> cb::Error {
return this->InferenceProfiler::SummarizeLatency(
latencies, summary);
});
ON_CALL(*this, MergePerfStatusReports(testing::_, testing::_))
.WillByDefault(
[this](
std::deque<PerfStatus>& perf_status,
PerfStatus& summary_status) -> cb::Error {
return this->InferenceProfiler::MergePerfStatusReports(
perf_status, summary_status);
});
ON_CALL(*this, MergeServerSideStats(testing::_, testing::_))
.WillByDefault(
[this](
std::vector<ServerSideStats>& server_side_stats,
ServerSideStats& server_side_summary) -> cb::Error {
return this->InferenceProfiler::MergeServerSideStats(
server_side_stats, server_side_summary);
});
ON_CALL(
*this, SummarizeClientStat(
testing::_, testing::_, testing::_, testing::_, testing::_,
testing::_, testing::_, testing::_))
.WillByDefault(
[this](
const cb::InferStat& start_stat, const cb::InferStat& end_stat,
const uint64_t duration_ns, const size_t valid_request_count,
const size_t delayed_request_count,
const size_t valid_sequence_count, const size_t response_count,
PerfStatus& summary) -> cb::Error {
return this->InferenceProfiler::SummarizeClientStat(
start_stat, end_stat, duration_ns, valid_request_count,
delayed_request_count, valid_sequence_count, response_count,
summary);
});
};
MOCK_METHOD0(IncludeServerStats, bool());
MOCK_METHOD(
void, ValidLatencyMeasurement,
((const std::pair<uint64_t, uint64_t>&), size_t&, size_t&,
std::vector<uint64_t>*, size_t&, std::vector<RequestRecord>&),
(override));
MOCK_METHOD(
cb::Error, SummarizeLatency, (const std::vector<uint64_t>&, PerfStatus&),
(override));
MOCK_METHOD(
cb::Error, MergePerfStatusReports, (std::deque<PerfStatus>&, PerfStatus&),
(override));
MOCK_METHOD(
cb::Error, MergeServerSideStats,
(std::vector<ServerSideStats>&, ServerSideStats&), (override));
MOCK_METHOD(
cb::Error, SummarizeClientStat,
(const cb::InferStat&, const cb::InferStat&, const uint64_t, const size_t,
const size_t, const size_t, const size_t, PerfStatus&),
(override));
std::shared_ptr<ModelParser>& parser_{InferenceProfiler::parser_};
std::unique_ptr<LoadManager>& manager_{InferenceProfiler::manager_};
bool& include_lib_stats_{InferenceProfiler::include_lib_stats_};
std::vector<RequestRecord>& all_request_records_{
InferenceProfiler::all_request_records_};
};
using MockInferenceProfiler = testing::NiceMock<NaggyMockInferenceProfiler>;
}} // namespace triton::perfanalyzer
// Copyright 2023 (c), NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include "gmock/gmock.h"
#include "load_manager.h"
namespace triton { namespace perfanalyzer {
class NaggyMockLoadManager : public LoadManager {};
using MockLoadManager = testing::NiceMock<NaggyMockLoadManager>;
}} // namespace triton::perfanalyzer
// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include "model_parser.h"
namespace triton { namespace perfanalyzer {
class MockModelParser : public ModelParser {
public:
MockModelParser() : ModelParser(clientbackend::BackendKind::TRITON) {}
MockModelParser(
bool is_sequence_model, bool is_decoupled_model,
size_t max_batch_size = 64)
: ModelParser(clientbackend::BackendKind::TRITON)
{
if (is_sequence_model) {
scheduler_type_ = ModelParser::SEQUENCE;
}
is_decoupled_ = is_decoupled_model;
max_batch_size_ = max_batch_size;
}
// Expose private function
cb::Error GetInt(const rapidjson::Value& value, int64_t* integer_value)
{
return ModelParser::GetInt(value, integer_value);
}
// Expose private function
cb::Error DetermineComposingModelMap(
const std::vector<cb::ModelIdentifier>& bls_composing_models,
const rapidjson::Document& config,
std::unique_ptr<cb::ClientBackend>& backend)
{
return ModelParser::DetermineComposingModelMap(
bls_composing_models, config, backend);
}
// Expose private function
cb::Error DetermineSchedulerType(
const rapidjson::Document& config,
std::unique_ptr<cb::ClientBackend>& backend)
{
return ModelParser::DetermineSchedulerType(config, backend);
}
std::shared_ptr<ComposingModelMap>& composing_models_map_{
ModelParser::composing_models_map_};
std::shared_ptr<ModelTensorMap>& inputs_{ModelParser::inputs_};
};
}} // namespace triton::perfanalyzer
// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include "gmock/gmock.h"
#include "profile_data_collector.h"
namespace triton { namespace perfanalyzer {
class NaggyMockProfileDataCollector : public ProfileDataCollector {
public:
NaggyMockProfileDataCollector()
{
ON_CALL(*this, FindExperiment(testing::_))
.WillByDefault(
[this](InferenceLoadMode& id) -> std::vector<Experiment>::iterator {
return this->ProfileDataCollector::FindExperiment(id);
});
}
MOCK_METHOD(
std::vector<Experiment>::iterator, FindExperiment, (InferenceLoadMode&),
(override));
std::vector<Experiment>& experiments_{ProfileDataCollector::experiments_};
};
using MockProfileDataCollector =
testing::NiceMock<NaggyMockProfileDataCollector>;
}} // namespace triton::perfanalyzer
// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS"" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include "gmock/gmock.h"
#include "profile_data_exporter.h"
namespace triton { namespace perfanalyzer {
class NaggyMockProfileDataExporter : public ProfileDataExporter {
public:
NaggyMockProfileDataExporter()
{
ON_CALL(*this, ConvertToJson(testing::_, testing::_))
.WillByDefault(
[this](
const std::vector<Experiment>& raw_experiments,
std::string& raw_version) -> void {
return this->ProfileDataExporter::ConvertToJson(
raw_experiments, raw_version);
});
ON_CALL(*this, OutputToFile(testing::_))
.WillByDefault([this](std::string& file_path) -> void {
this->ProfileDataExporter::OutputToFile(file_path);
});
ON_CALL(*this, AddExperiment(testing::_, testing::_, testing::_))
.WillByDefault(
[this](
rapidjson::Value& entry, rapidjson::Value& experiment,
const Experiment& raw_experiment) -> void {
this->ProfileDataExporter::AddExperiment(
entry, experiment, raw_experiment);
});
}
MOCK_METHOD(
void, ConvertToJson, (const std::vector<Experiment>&, std::string&),
(override));
MOCK_METHOD(
void, AddExperiment,
(rapidjson::Value&, rapidjson::Value&, const Experiment&), (override));
MOCK_METHOD(void, OutputToFile, (std::string&), (override));
rapidjson::Document& document_{ProfileDataExporter::document_};
};
using MockProfileDataExporter = testing::NiceMock<NaggyMockProfileDataExporter>;
}} // namespace triton::perfanalyzer
// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include "gmock/gmock.h"
#include "request_rate_worker.h"
namespace triton { namespace perfanalyzer {
class NaggyMockRequestRateWorker : public RequestRateWorker {
public:
NaggyMockRequestRateWorker(
uint32_t id, std::shared_ptr<ThreadStat> thread_stat,
std::shared_ptr<ThreadConfig> thread_config,
const std::shared_ptr<ModelParser> parser,
std::shared_ptr<DataLoader> data_loader,
const std::shared_ptr<cb::ClientBackendFactory> factory,
const bool on_sequence_model, const bool async, const size_t max_threads,
const bool using_json_data, const bool streaming,
const int32_t batch_size, std::condition_variable& wake_signal,
std::mutex& wake_mutex, bool& execute,
std::chrono::steady_clock::time_point& start_time,
const bool serial_sequences,
const std::shared_ptr<IInferDataManager>& infer_data_manager,
std::shared_ptr<SequenceManager> sequence_manager)
: RequestRateWorker(
id, thread_stat, thread_config, parser, data_loader, factory,
on_sequence_model, async, max_threads, using_json_data, streaming,
batch_size, wake_signal, wake_mutex, execute, start_time,
serial_sequences, infer_data_manager, sequence_manager)
{
ON_CALL(*this, Infer()).WillByDefault([this]() -> void {
RequestRateWorker::Infer();
});
}
MOCK_METHOD(void, Infer, (), (override));
void CreateContext() override { RequestRateWorker::CreateContext(); }
void SendInferRequest()
{
if (thread_stat_->status_.IsOk()) {
LoadWorker::SendInferRequest(0, false);
}
}
void EmptyInfer() { thread_config_->is_paused_ = true; }
};
// Non-naggy version of Mock (won't warn when using default gmock
// mocked function)
using MockRequestRateWorker = testing::NiceMock<NaggyMockRequestRateWorker>;
}} // namespace triton::perfanalyzer
// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include "gmock/gmock.h"
#include "sequence_manager.h"
namespace triton { namespace perfanalyzer {
class NaggyMockSequenceManager : public SequenceManager {
public:
NaggyMockSequenceManager() { SetupMocks(); }
NaggyMockSequenceManager(
const uint64_t start_sequence_id, const uint64_t sequence_id_range,
const size_t sequence_length, const bool sequence_length_specified,
const double sequence_length_variation, const bool using_json_data,
std::shared_ptr<DataLoader> data_loader)
: SequenceManager(
start_sequence_id, sequence_id_range, sequence_length,
sequence_length_specified, sequence_length_variation,
using_json_data, data_loader)
{
SetupMocks();
}
void SetupMocks()
{
ON_CALL(*this, SetInferSequenceOptions(testing::_, testing::_))
.WillByDefault([this](
const uint32_t seq_stat_index,
std::unique_ptr<cb::InferOptions>& options) {
this->SequenceManager::SetInferSequenceOptions(
seq_stat_index, options);
});
ON_CALL(*this, InitNewSequence(testing::_))
.WillByDefault([this](int seq_stat_index) {
this->SequenceManager::InitNewSequence(seq_stat_index);
});
ON_CALL(*this, GetNextSeqId(testing::_))
.WillByDefault([this](int seq_stat_index) -> uint64_t {
return this->SequenceManager::GetNextSeqId(seq_stat_index);
});
ON_CALL(*this, GetRandomSequenceLength(testing::_))
.WillByDefault([this](double offset_ratio) -> size_t {
return this->SequenceManager::GetRandomSequenceLength(offset_ratio);
});
ON_CALL(*this, GetNewDataStreamId()).WillByDefault([this]() -> size_t {
return this->SequenceManager::GetNewDataStreamId();
});
}
MOCK_METHOD(
void, SetInferSequenceOptions,
(const uint32_t, std::unique_ptr<cb::InferOptions>&), (override));
MOCK_METHOD(void, InitNewSequence, (int), (override));
MOCK_METHOD(uint64_t, GetNextSeqId, (int), (override));
MOCK_METHOD(size_t, GetRandomSequenceLength, (double), (override));
MOCK_METHOD(uint64_t, GetNewDataStreamId, (), (override));
std::vector<std::shared_ptr<SequenceStatus>>& sequence_statuses_{
SequenceManager::sequence_statuses_};
std::atomic<uint64_t>& curr_seq_id_{SequenceManager::curr_seq_id_};
};
using MockSequenceManager = testing::NiceMock<NaggyMockSequenceManager>;
}} // namespace triton::perfanalyzer
// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "model_parser.h"
#include "rapidjson/writer.h"
namespace triton { namespace perfanalyzer {
cb::Error
ModelParser::InitTriton(
const rapidjson::Document& metadata, const rapidjson::Document& config,
const std::string& model_version,
const std::vector<cb::ModelIdentifier>& bls_composing_models,
const std::unordered_map<std::string, std::vector<int64_t>>& input_shapes,
std::unique_ptr<cb::ClientBackend>& backend)
{
model_name_ = metadata["name"].GetString();
model_version_ = model_version;
RETURN_IF_ERROR(
DetermineComposingModelMap(bls_composing_models, config, backend));
RETURN_IF_ERROR(DetermineSchedulerType(config, backend));
max_batch_size_ = 0;
const auto bs_itr = config.FindMember("max_batch_size");
if (bs_itr != config.MemberEnd()) {
int64_t mbs;
RETURN_IF_ERROR(GetInt(bs_itr->value, &mbs));
max_batch_size_ = mbs;
}
const auto txn_itr = config.FindMember("model_transaction_policy");
if (txn_itr != config.MemberEnd()) {
is_decoupled_ = txn_itr->value["decoupled"].GetBool();
}
// Get the information about inputs from metadata
const auto inputs_itr = metadata.FindMember("inputs");
if (inputs_itr != metadata.MemberEnd()) {
for (const auto& input : inputs_itr->value.GetArray()) {
auto it =
inputs_->emplace(input["name"].GetString(), ModelTensor()).first;
it->second.name_ = input["name"].GetString();
it->second.datatype_ = input["datatype"].GetString();
bool is_dynamic = false;
bool skip = (max_batch_size_ > 0);
for (const auto& dim : input["shape"].GetArray()) {
if (skip) {
skip = false;
continue;
}
int64_t dim_int;
RETURN_IF_ERROR(GetInt(dim, &dim_int));
if (dim_int == -1) {
is_dynamic = true;
}
it->second.shape_.push_back(dim_int);
}
if (is_dynamic) {
const auto user_shape_it = input_shapes.find(it->second.name_);
if (user_shape_it != input_shapes.end()) {
// Update the default shape to be used.
it->second.shape_.clear();
for (const auto dim : user_shape_it->second) {
it->second.shape_.push_back(dim);
}
}
}
}
}
// Check whether the tensor is shape tensor or not from config.
const auto inputs_config_itr = config.FindMember("input");
if (inputs_config_itr != config.MemberEnd()) {
for (const auto& input_config : inputs_config_itr->value.GetArray()) {
const auto name = std::string(
input_config["name"].GetString(),
input_config["name"].GetStringLength());
auto it = inputs_->find(name);
if (it == inputs_->end()) {
return cb::Error(
"no metadata found for input tensor " + name, pa::GENERIC_ERROR);
}
const auto& shape_tensor_itr = input_config.FindMember("is_shape_tensor");
if (shape_tensor_itr != input_config.MemberEnd()) {
it->second.is_shape_tensor_ = shape_tensor_itr->value.GetBool();
}
if (input_config.HasMember("optional")) {
it->second.is_optional_ = input_config["optional"].GetBool();
} else {
it->second.is_optional_ = false;
}
}
}
// Get the information about outputs from metadata
const auto outputs_itr = metadata.FindMember("outputs");
if (outputs_itr != metadata.MemberEnd()) {
for (const auto& output : outputs_itr->value.GetArray()) {
auto it =
outputs_->emplace(output["name"].GetString(), ModelTensor()).first;
it->second.name_ = output["name"].GetString();
it->second.datatype_ = output["datatype"].GetString();
bool skip = (max_batch_size_ > 0);
for (const auto& dim : output["shape"].GetArray()) {
if (skip) {
skip = false;
continue;
}
int64_t dim_int;
RETURN_IF_ERROR(GetInt(dim, &dim_int));
it->second.shape_.push_back(dim_int);
}
}
}
// Check whether the tensor is shape tensor or not from config.
const auto output_config_itr = config.FindMember("output");
if (output_config_itr != config.MemberEnd()) {
for (const auto& output_config : output_config_itr->value.GetArray()) {
const auto name = std::string(
output_config["name"].GetString(),
output_config["name"].GetStringLength());
auto itr = outputs_->find(name);
if (itr == outputs_->end()) {
return cb::Error(
"no metadata found for output tensor " + name, pa::GENERIC_ERROR);
}
const auto& shape_tensor_itr =
output_config.FindMember("is_shape_tensor");
if (shape_tensor_itr != output_config.MemberEnd()) {
itr->second.is_shape_tensor_ = shape_tensor_itr->value.GetBool();
}
}
}
// Check if model has response caching enabled
const auto cache_itr = config.FindMember("response_cache");
// response_cache_enabled_ set globally for reporting purposes if any
// composing model has it enabled, so don't overwrite it if already set
if (cache_itr != config.MemberEnd() && !response_cache_enabled_) {
response_cache_enabled_ = cache_itr->value["enable"].GetBool();
}
return cb::Error::Success;
}
cb::Error
ModelParser::InitTFServe(
const rapidjson::Document& metadata, const std::string& model_name,
const std::string& model_version, const std::string& model_signature_name,
const int32_t batch_size,
const std::unordered_map<std::string, std::vector<int64_t>>& input_shapes,
std::unique_ptr<cb::ClientBackend>& backend)
{
model_name_ = model_name;
model_version_ = model_version;
model_signature_name_ = model_signature_name;
// Get the scheduler type for the model
scheduler_type_ = NONE;
// Will use the user provided batch size as max. Relies on the service
// to throw an error if not supported.
max_batch_size_ = batch_size;
const rapidjson::Value& signature_config =
metadata["metadata"]["signature_def"]["signature_def"];
if (!signature_config.HasMember(model_signature_name.c_str())) {
return cb::Error(
"Failed to find signature_name \"" + model_signature_name +
"\" in the metadata",
pa::GENERIC_ERROR);
}
// Get the information about inputs from metadata
if (signature_config[model_signature_name.c_str()].HasMember("inputs")) {
const rapidjson::Value& inputs =
signature_config[model_signature_name.c_str()]["inputs"];
for (rapidjson::Value::ConstMemberIterator json_itr = inputs.MemberBegin();
json_itr != inputs.MemberEnd(); ++json_itr) {
auto it =
inputs_->emplace(json_itr->name.GetString(), ModelTensor()).first;
it->second.name_ = json_itr->name.GetString();
RETURN_IF_ERROR(ConvertDTypeFromTFS(
json_itr->value["dtype"].GetString(), &it->second.datatype_));
bool is_dynamic = false;
if (json_itr->value["tensor_shape"]["unknown_rank"].GetBool()) {
if (max_batch_size_ != 0) {
return cb::Error(
"Can not specify -b flag for saved model with unknown ranked "
"inputs",
pa::GENERIC_ERROR);
}
is_dynamic = true;
} else {
bool first_dim = true;
for (const auto& dim :
json_itr->value["tensor_shape"]["dim"].GetArray()) {
int64_t dim_int;
RETURN_IF_ERROR(GetInt(dim["size"], &dim_int));
if (first_dim && (max_batch_size_ != 0)) {
if (dim_int != -1) {
return cb::Error(
"Can not specify -b flag for saved model with input not "
"having their first dim as -1",
pa::GENERIC_ERROR);
}
first_dim = false;
} else {
if (dim_int == -1) {
is_dynamic = true;
}
it->second.shape_.push_back(dim_int);
}
}
}
if (is_dynamic) {
const auto user_shape_it = input_shapes.find(it->second.name_);
if (user_shape_it != input_shapes.end()) {
// Update the default shape to be used.
it->second.shape_.clear();
for (const auto dim : user_shape_it->second) {
it->second.shape_.push_back(dim);
}
}
}
}
}
// Will not extract the information about the information about the outputs.
// As by default, the TensorFlow serving will return all the output tensors
// if none are requested.
// See here
// https://github.com/tensorflow/serving/blob/2.3.0/tensorflow_serving/apis/predict.proto#L27
return cb::Error::Success;
}
cb::Error
ModelParser::InitTorchServe(
const std::string& model_name, const std::string& model_version,
const int32_t batch_size)
{
// TorchServe does not return model metadata hence we can not obtain any
// parameters.
model_name_ = model_name;
model_version_ = model_version;
max_batch_size_ = batch_size;
// TorchServe needs to upload a file to the server. The input will hold the
// path to the file which should be provided as json to --input-data
auto it = inputs_->emplace("TORCHSERVE_INPUT", ModelTensor()).first;
it->second.name_ = "TORCHSERVE_INPUT";
it->second.datatype_ = "BYTES";
// Supports only a single input file
it->second.shape_.push_back(1);
return cb::Error::Success;
}
cb::Error
ModelParser::DetermineComposingModelMap(
const std::vector<cb::ModelIdentifier>& bls_composing_models,
const rapidjson::Document& config,
std::unique_ptr<cb::ClientBackend>& backend)
{
RETURN_IF_ERROR(AddBLSComposingModels(bls_composing_models, config, backend));
RETURN_IF_ERROR(AddEnsembleComposingModels(config, backend));
return cb::Error::Success;
}
cb::Error
ModelParser::AddBLSComposingModels(
const std::vector<cb::ModelIdentifier>& bls_composing_models,
const rapidjson::Document& config,
std::unique_ptr<cb::ClientBackend>& backend)
{
for (auto model : bls_composing_models) {
(*composing_models_map_)[config["name"].GetString()].insert(model);
rapidjson::Document composing_model_config;
RETURN_IF_ERROR(backend->ModelConfig(
&composing_model_config, model.first, model.second));
RETURN_IF_ERROR(
AddEnsembleComposingModels(composing_model_config, backend));
}
return cb::Error::Success;
}
cb::Error
ModelParser::AddEnsembleComposingModels(
const rapidjson::Document& config,
std::unique_ptr<cb::ClientBackend>& backend)
{
if (config.HasMember("platform") &&
std::string(config["platform"].GetString()).compare("ensemble") == 0) {
const auto step_itr = config["ensemble_scheduling"].FindMember("step");
for (const auto& step : step_itr->value.GetArray()) {
std::string step_model_version;
int64_t model_version_int;
RETURN_IF_ERROR(GetInt(step["model_version"], &model_version_int));
if (model_version_int == -1) {
step_model_version = "";
} else {
step_model_version = std::to_string(model_version_int);
}
(*composing_models_map_)[config["name"].GetString()].emplace(
std::string(step["model_name"].GetString()), step_model_version);
rapidjson::Document composing_model_config;
RETURN_IF_ERROR(backend->ModelConfig(
&composing_model_config, step["model_name"].GetString(),
step_model_version));
RETURN_IF_ERROR(
AddEnsembleComposingModels(composing_model_config, backend));
}
}
return cb::Error::Success;
}
cb::Error
ModelParser::DetermineSchedulerType(
const rapidjson::Document& config,
std::unique_ptr<cb::ClientBackend>& backend)
{
scheduler_type_ = NONE;
if (composing_models_map_->size() != 0) {
bool is_sequential = false;
RETURN_IF_ERROR(GetComposingSchedulerType(backend, &is_sequential));
if (is_sequential) {
scheduler_type_ = ENSEMBLE_SEQUENCE;
} else {
scheduler_type_ = ENSEMBLE;
}
} else {
const auto& sequence_itr = config.FindMember("sequence_batching");
if (sequence_itr != config.MemberEnd()) {
scheduler_type_ = SEQUENCE;
} else {
const auto& dynamic_itr = config.FindMember("dynamic_batching");
if (dynamic_itr != config.MemberEnd()) {
scheduler_type_ = DYNAMIC;
}
}
}
return cb::Error::Success;
}
cb::Error
ModelParser::GetComposingSchedulerType(
std::unique_ptr<cb::ClientBackend>& backend, bool* is_sequential)
{
for (auto parent_composing_models : *composing_models_map_.get()) {
auto& composing_models = parent_composing_models.second;
for (auto composing_model : composing_models) {
rapidjson::Document config;
RETURN_IF_ERROR(backend->ModelConfig(
&config, composing_model.first, composing_model.second));
const auto& sequence_itr = config.FindMember("sequence_batching");
if (sequence_itr != config.MemberEnd()) {
*is_sequential = true;
}
const auto cache_itr = config.FindMember("response_cache");
// response_cache_enabled_ set globally for reporting purposes if any
// composing model has it enabled, so don't overwrite it if already set
if (cache_itr != config.MemberEnd() && !response_cache_enabled_) {
response_cache_enabled_ = cache_itr->value["enable"].GetBool();
}
}
}
return cb::Error::Success;
}
cb::Error
ModelParser::GetInt(const rapidjson::Value& value, int64_t* integer_value)
{
if (value.IsString()) {
std::string str(value.GetString(), value.GetStringLength());
try {
*integer_value = std::stoll(str.c_str());
}
catch (...) {
return cb::Error(
std::string("unable to convert '") + str + "' to integer",
pa::GENERIC_ERROR);
}
} else if (value.IsInt64()) {
*integer_value = value.GetInt64();
} else if (value.IsInt()) {
*integer_value = value.GetInt();
} else {
return cb::Error("failed to parse the integer value", pa::GENERIC_ERROR);
}
return cb::Error::Success;
}
}} // namespace triton::perfanalyzer
// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <unordered_map>
#include "client_backend/client_backend.h"
#include "perf_utils.h"
namespace triton { namespace perfanalyzer {
#ifndef DOCTEST_CONFIG_DISABLE
class TestModelParser;
class MockModelParser;
#endif
struct ModelTensor {
ModelTensor() : is_shape_tensor_(false) {}
std::string name_;
std::string datatype_;
std::vector<int64_t> shape_;
// Indicates if this tensor holds shape information for other tensors
bool is_shape_tensor_;
bool is_optional_;
};
using ModelTensorMap = std::map<std::string, ModelTensor>;
using ComposingModelMap = std::map<std::string, std::set<cb::ModelIdentifier>>;
//==============================================================================
/// ModelParser is a helper class to parse the information about the target
/// model from the metadata and configuration returned by the server.
///
/// Perf Analyzer depends upon the various properties of the model to correctly
/// generate and issue inference request for the model. The object of this
/// class will provide these necessary details.
class ModelParser {
public:
enum ModelSchedulerType {
NONE,
DYNAMIC,
SEQUENCE,
ENSEMBLE,
ENSEMBLE_SEQUENCE
};
explicit ModelParser(cb::BackendKind backend_kind)
: backend_kind_(backend_kind),
inputs_(std::make_shared<ModelTensorMap>()),
outputs_(std::make_shared<ModelTensorMap>()),
composing_models_map_(std::make_shared<ComposingModelMap>()),
scheduler_type_(NONE), max_batch_size_(0), is_decoupled_(false),
response_cache_enabled_(false)
{
}
/// Initializes the ModelParser with the metadata and config rapidjson DOM
/// for the target model obtained from Triton service
/// \param metadata The metadata of the target model.
/// \param config The config of the target model.
/// \param model_version The version of target model.
/// \param bls_composing_models A list of BLS composing model identifiers
/// \param input_shapes The user provided default shapes which will be use
/// if a certain input has wildcard in its dimension.
/// \param backend The backend object.
/// \return cb::Error object indicating success or failure.
cb::Error InitTriton(
const rapidjson::Document& metadata, const rapidjson::Document& config,
const std::string& model_version,
const std::vector<cb::ModelIdentifier>& bls_composing_models,
const std::unordered_map<std::string, std::vector<int64_t>>& input_shapes,
std::unique_ptr<cb::ClientBackend>& backend);
/// Initializes the ModelParser with the metadata and config rapidjson DOM
/// for the target model obtained from TF serving service.
/// \param metadata The metadata of the target model.
/// \param model_name The name of target model.
/// \param model_version The version of target model.
/// \param model_signature_name The signature name of target model.
/// \param input_shapes The user provided default shapes which will be use
/// if a certain input has wildcard in its dimension.
/// \param backend The backend object.
/// \return cb::Error object indicating success or failure.
cb::Error InitTFServe(
const rapidjson::Document& metadata, const std::string& model_name,
const std::string& model_version, const std::string& model_signature_name,
const int32_t batch_size,
const std::unordered_map<std::string, std::vector<int64_t>>& input_shapes,
std::unique_ptr<cb::ClientBackend>& backend);
cb::Error InitTorchServe(
const std::string& model_name, const std::string& model_version,
const int32_t batch_size);
/// Get the name of the target model
/// \return Model name as string
const std::string& ModelName() const { return model_name_; }
/// Get the version of target model
/// \return Model version as string
const std::string& ModelVersion() const { return model_version_; }
/// Get the signature name of target model
/// \return Model signature name as string
const std::string& ModelSignatureName() const
{
return model_signature_name_;
}
/// Get the scheduler type for the model
ModelSchedulerType SchedulerType() const { return scheduler_type_; }
/// Get the max batch size supported by the model. Returns 0 if the model
/// does not support batching.
/// \return The maximum supported batch size.
size_t MaxBatchSize() const { return max_batch_size_; }
/// Returns whether or not the model is decoupled
/// \return the truth value of whether the model is decoupled
bool IsDecoupled() const { return is_decoupled_; }
/// Returns whether or not response cache is enabled for this model
/// \return the truth value of whether response cache is enabled for this
/// model
bool ResponseCacheEnabled() const { return response_cache_enabled_; }
/// Get the details about the model inputs.
/// \return The map with tensor_name and the tensor details
/// stored as key-value pair.
const std::shared_ptr<ModelTensorMap>& Inputs() { return inputs_; }
/// Get the details about the model outputs.
/// \return The map with tensor_name and the tensor details
/// stored as key-value pair.
const std::shared_ptr<ModelTensorMap>& Outputs() { return outputs_; }
/// Get the composing maps for the target model.
/// \return The pointer to the nested map describing the
/// nested flow in the target model.
const std::shared_ptr<ComposingModelMap>& GetComposingModelMap()
{
return composing_models_map_;
}
protected:
ModelSchedulerType scheduler_type_;
bool is_decoupled_;
private:
/// Populate composing_models_map_ based on any bls composing models passed in
/// via the CLI as well as any ensemble or nested ensemble models
cb::Error DetermineComposingModelMap(
const std::vector<cb::ModelIdentifier>& bls_composing_models,
const rapidjson::Document& config,
std::unique_ptr<cb::ClientBackend>& backend);
cb::Error AddBLSComposingModels(
const std::vector<cb::ModelIdentifier>& bls_composing_models,
const rapidjson::Document& config,
std::unique_ptr<cb::ClientBackend>& backend);
cb::Error AddEnsembleComposingModels(
const rapidjson::Document& config,
std::unique_ptr<cb::ClientBackend>& backend);
/// Populate scheduler_type_ based on the scheduler type of the parent model
/// as well as any composing models
cb::Error DetermineSchedulerType(
const rapidjson::Document& config,
std::unique_ptr<cb::ClientBackend>& backend);
/// Sets is_sequential to true if any of the composing models are sequential
cb::Error GetComposingSchedulerType(
std::unique_ptr<cb::ClientBackend>& backend, bool* is_sequential);
/// In the json produced by protobuf, int64 and uint64 values are
/// represented as strings. Protobuf doesn't provide an option to
/// disable this (sigh) so we need to correctly parse these fields
/// for ModelParser to receive appropriate requests.
/// \param value The rapidjson value object with the int value.
/// \param integer_value The output integer pointer.
/// \return cb::Error object indicating success or failure.
cb::Error GetInt(const rapidjson::Value& value, int64_t* integer_value);
cb::BackendKind backend_kind_;
std::shared_ptr<ModelTensorMap> inputs_;
std::shared_ptr<ModelTensorMap> outputs_;
std::shared_ptr<ComposingModelMap> composing_models_map_;
std::string model_name_;
std::string model_version_;
std::string model_signature_name_;
size_t max_batch_size_;
bool response_cache_enabled_;
#ifndef DOCTEST_CONFIG_DISABLE
friend TestModelParser;
friend MockModelParser;
public:
ModelParser() = default;
#endif
};
}} // namespace triton::perfanalyzer
// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "mpi_utils.h"
#include <dlfcn.h>
#include <iostream>
#include <stdexcept>
namespace triton { namespace perfanalyzer {
MPIDriver::MPIDriver(bool is_enabled) : is_enabled_(is_enabled)
{
if (is_enabled_ == false) {
return;
}
handle_ = dlopen("libmpi.so", RTLD_LAZY | RTLD_GLOBAL);
if (handle_ == nullptr) {
throw std::runtime_error(
"Unable to load MPI library. If you are trying to run with "
"MPI / multiple models, check that 'libmpi.so' is on "
"`LD_LIBRARY_PATH` environment variable path.");
}
CheckMPIImpl();
}
bool
MPIDriver::IsMPIRun()
{
if (is_enabled_ == false) {
return false;
}
if (MPIInitialized() == false) {
throw std::runtime_error("Must call MPI_Init() before calling IsMPIRun().");
}
return MPICommSizeWorld() > 1;
}
void
MPIDriver::MPIInit(int* argc, char*** argv)
{
if (is_enabled_ == false) {
return;
}
int (*MPI_Init)(
int*, char***){(int (*)(int*, char***))dlsym(handle_, "MPI_Init")};
if (MPI_Init == nullptr) {
throw std::runtime_error("Unable to obtain address of `MPI_Init` symbol.");
}
MPI_Init(argc, argv);
}
int
MPIDriver::MPICommSizeWorld()
{
if (is_enabled_ == false) {
return -1;
}
int world_size{1};
int (*MPI_Comm_size)(
void*, int*){(int (*)(void*, int*))dlsym(handle_, "MPI_Comm_size")};
if (MPI_Comm_size == nullptr) {
throw std::runtime_error(
"Unable to obtain address of `MPI_Comm_size` symbol.");
}
MPI_Comm_size(MPICommWorld(), &world_size);
return world_size;
}
void
MPIDriver::MPIBarrierWorld()
{
if (is_enabled_ == false) {
return;
}
int (*MPI_Barrier)(void*){(int (*)(void*))dlsym(handle_, "MPI_Barrier")};
if (MPI_Barrier == nullptr) {
throw std::runtime_error(
"Unable to obtain address of `MPI_Barrier` symbol.");
}
MPI_Barrier(MPICommWorld());
}
int
MPIDriver::MPICommRankWorld()
{
if (is_enabled_ == false) {
return -1;
}
int rank{0};
int (*MPI_Comm_rank)(
void*, int*){(int (*)(void*, int*))dlsym(handle_, "MPI_Comm_rank")};
if (MPI_Comm_rank == nullptr) {
throw std::runtime_error(
"Unable to obtain address of `MPI_Comm_rank` symbol.");
}
MPI_Comm_rank(MPICommWorld(), &rank);
return rank;
}
void
MPIDriver::MPIBcastIntWorld(void* buffer, int count, int root)
{
if (is_enabled_ == false) {
return;
}
int (*MPI_Bcast)(void*, int, void*, int, void*){
(int (*)(void*, int, void*, int, void*))dlsym(handle_, "MPI_Bcast")};
if (MPI_Bcast == nullptr) {
throw std::runtime_error("Unable to obtain address of `MPI_Bcast` symbol.");
}
MPI_Bcast(buffer, count, MPIInt(), root, MPICommWorld());
}
void
MPIDriver::MPIFinalize()
{
if (is_enabled_ == false) {
return;
}
int (*MPI_Finalize)(){(int (*)())dlsym(handle_, "MPI_Finalize")};
if (MPI_Finalize == nullptr) {
throw std::runtime_error(
"Unable to obtain address of `MPI_Finalize` symbol.");
}
MPI_Finalize();
}
bool
MPIDriver::MPIInitialized()
{
if (is_enabled_ == false) {
return false;
}
int (*MPI_Initialized)(int*){
(int (*)(int*))dlsym(handle_, "MPI_Initialized")};
if (MPI_Initialized == nullptr) {
throw std::runtime_error(
"Unable to obtain address of `MPI_Initialized` symbol.");
}
int initialized{0};
MPI_Initialized(&initialized);
return initialized != 0;
}
void*
MPIDriver::MPICommWorld()
{
if (is_enabled_ == false) {
return nullptr;
}
void* MPI_COMM_WORLD{dlsym(handle_, "ompi_mpi_comm_world")};
if (MPI_COMM_WORLD == nullptr) {
throw std::runtime_error(
"Unable to obtain address of `ompi_mpi_comm_world` symbol.");
}
return MPI_COMM_WORLD;
}
void*
MPIDriver::MPIInt()
{
if (is_enabled_ == false) {
return nullptr;
}
void* MPI_INT{dlsym(handle_, "ompi_mpi_int")};
if (MPI_INT == nullptr) {
throw std::runtime_error(
"Unable to obtain address of `ompi_mpi_int` symbol.");
}
return MPI_INT;
}
void
MPIDriver::CheckMPIImpl()
{
if (is_enabled_ == false) {
return;
}
int (*MPI_Get_library_version)(char*, int*){
(int (*)(char*, int*))dlsym(handle_, "MPI_Get_library_version")};
if (MPI_Get_library_version == nullptr) {
throw std::runtime_error(
"Unable to obtain address of `MPI_Get_library_version` symbol.");
}
std::string version;
version.resize(MPIVersionStringMaximumLength);
int resultlen{0};
MPI_Get_library_version(&version[0], &resultlen);
if (version.find("Open MPI") != 0) {
throw std::runtime_error(
"Perf Analyzer only supports Open MPI. Please uninstall your current "
"implementation of MPI and install Open MPI.");
}
}
}} // namespace triton::perfanalyzer
// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <memory>
namespace triton { namespace perfanalyzer {
class MPIDriver {
public:
// Initializes class. Saves handle to MPI library if MPI library is available.
MPIDriver(bool is_enabled = false);
// Returns true if the current process is an MPI process with world size
// greater than 1.
bool IsMPIRun();
// Attempts to call MPI_Init API.
void MPIInit(int* argc, char*** argv);
// Attempts to call MPI_Comm_size API with MPI_COMM_WORLD communicator.
int MPICommSizeWorld();
// Attempts to call MPI_Barrier API with MPI_COMM_WORLD communicator.
void MPIBarrierWorld();
// Attempts to call MPI_Comm_rank API with MPI_COMM_WORLD communicator.
int MPICommRankWorld();
// Attempts to call MPI_Bcast API with MPI_INT data type and MPI_COMM_WORLD
// communicator.
void MPIBcastIntWorld(void* buffer, int count, int root);
// Attempts to call MPI_Finalize API.
void MPIFinalize();
private:
// Attempts to call MPI_Initialized API.
bool MPIInitialized();
// Returns MPI_COMM_WORLD symbol address if MPI library is available,
// otherwise `nullptr`.
void* MPICommWorld();
// Returns MPI_INT symbol address if MPI library is available, otherwise
// `nullptr`.
void* MPIInt();
// Attempts to check that Open MPI is installed.
void CheckMPIImpl();
// Bool for whether user has opted to attempt to use MPI functionality.
bool is_enabled_{false};
// Loaded object for MPI library.
void* handle_{nullptr};
// Maximum string length for MPI version string.
const uint64_t MPIVersionStringMaximumLength{32768};
};
}} // namespace triton::perfanalyzer
// Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "perf_analyzer.h"
#include "perf_analyzer_exception.h"
#include "report_writer.h"
#include "request_rate_manager.h"
namespace pa = triton::perfanalyzer;
namespace triton { namespace perfanalyzer {
volatile bool early_exit = false;
void
SignalHandler(int signum)
{
std::cout << "Interrupt signal (" << signum << ") received." << std::endl;
// Upon invoking the SignalHandler for the first time early_exit flag is
// invoked and analyzer waits for in-flight inferences to complete before
// exiting. On the second invocation, the program exits immediately.
if (!early_exit) {
std::cout << "Waiting for in-flight inferences to complete." << std::endl;
early_exit = true;
} else {
std::cout << "Exiting immediately..." << std::endl;
exit(0);
}
}
}} // namespace triton::perfanalyzer
PerfAnalyzer::PerfAnalyzer(pa::PAParamsPtr params) : params_(params)
{
CreateAnalyzerObjects();
}
void
PerfAnalyzer::Run()
{
PrerunReport();
Profile();
WriteReport();
GenerateProfileExport();
Finalize();
}
void
PerfAnalyzer::CreateAnalyzerObjects()
{
// trap SIGINT to allow threads to exit gracefully
signal(SIGINT, pa::SignalHandler);
std::shared_ptr<cb::ClientBackendFactory> factory;
FAIL_IF_ERR(
cb::ClientBackendFactory::Create(
params_->kind, params_->url, params_->protocol, params_->ssl_options,
params_->trace_options, params_->compression_algorithm,
params_->http_headers, params_->triton_server_path,
params_->model_repository_path, params_->extra_verbose,
params_->metrics_url, params_->input_tensor_format,
params_->output_tensor_format, &factory),
"failed to create client factory");
FAIL_IF_ERR(
factory->CreateClientBackend(&backend_),
"failed to create triton client backend");
parser_ = std::make_shared<pa::ModelParser>(params_->kind);
if (params_->kind == cb::BackendKind::TRITON ||
params_->kind == cb::BackendKind::TRITON_C_API) {
rapidjson::Document model_metadata;
FAIL_IF_ERR(
backend_->ModelMetadata(
&model_metadata, params_->model_name, params_->model_version),
"failed to get model metadata");
rapidjson::Document model_config;
FAIL_IF_ERR(
backend_->ModelConfig(
&model_config, params_->model_name, params_->model_version),
"failed to get model config");
FAIL_IF_ERR(
parser_->InitTriton(
model_metadata, model_config, params_->model_version,
params_->bls_composing_models, params_->input_shapes, backend_),
"failed to create model parser");
} else if (params_->kind == cb::BackendKind::TENSORFLOW_SERVING) {
rapidjson::Document model_metadata;
FAIL_IF_ERR(
backend_->ModelMetadata(
&model_metadata, params_->model_name, params_->model_version),
"failed to get model metadata");
FAIL_IF_ERR(
parser_->InitTFServe(
model_metadata, params_->model_name, params_->model_version,
params_->model_signature_name, params_->batch_size,
params_->input_shapes, backend_),
"failed to create model parser");
} else if (params_->kind == cb::BackendKind::TORCHSERVE) {
FAIL_IF_ERR(
parser_->InitTorchServe(
params_->model_name, params_->model_version, params_->batch_size),
"failed to create model parser");
} else {
std::cerr << "unsupported client backend kind" << std::endl;
throw pa::PerfAnalyzerException(pa::GENERIC_ERROR);
}
if ((parser_->MaxBatchSize() == 0) && params_->batch_size > 1) {
std::cerr << "can not specify batch size > 1 as the model does not support "
"batching"
<< std::endl;
throw pa::PerfAnalyzerException(pa::GENERIC_ERROR);
}
// Change the default value for the --async option for sequential models
if ((parser_->SchedulerType() == pa::ModelParser::SEQUENCE) ||
(parser_->SchedulerType() == pa::ModelParser::ENSEMBLE_SEQUENCE)) {
if (!params_->async) {
params_->async = params_->forced_sync ? false : true;
}
// Validate the batch_size specification
if (params_->batch_size > 1) {
std::cerr << "can not specify batch size > 1 when using a sequence model"
<< std::endl;
throw pa::PerfAnalyzerException(pa::GENERIC_ERROR);
}
}
if (params_->streaming) {
if (params_->forced_sync) {
std::cerr << "can not use streaming with synchronous API" << std::endl;
throw pa::PerfAnalyzerException(pa::GENERIC_ERROR);
}
params_->async = true;
}
std::unique_ptr<pa::LoadManager> manager;
if (params_->targeting_concurrency()) {
if ((parser_->SchedulerType() == pa::ModelParser::SEQUENCE) ||
(parser_->SchedulerType() == pa::ModelParser::ENSEMBLE_SEQUENCE)) {
if (params_->concurrency_range.end == pa::NO_LIMIT && params_->async) {
std::cerr << "The 'end' concurrency can not be 0 for sequence "
"models when using asynchronous API."
<< std::endl;
throw pa::PerfAnalyzerException(pa::GENERIC_ERROR);
}
}
params_->max_concurrency = std::max(
params_->concurrency_range.start, params_->concurrency_range.end);
if (!params_->async) {
if (params_->concurrency_range.end == pa::NO_LIMIT) {
std::cerr
<< "WARNING: The maximum attainable concurrency will be limited by "
"max_threads specification."
<< std::endl;
params_->concurrency_range.end = params_->max_threads;
} else {
// As only one synchronous request can be generated from a thread at a
// time, to maintain the requested concurrency, that many threads need
// to be generated.
if (params_->max_threads_specified) {
std::cerr
<< "WARNING: Overriding max_threads specification to ensure "
"requested concurrency range."
<< std::endl;
}
params_->max_threads = std::max(
params_->concurrency_range.start, params_->concurrency_range.end);
}
}
if ((params_->sequence_id_range != 0) &&
(params_->sequence_id_range < params_->max_concurrency)) {
std::cerr << "sequence id range specified is smaller than the "
<< "maximum possible concurrency, sequence id collision may "
<< "occur." << std::endl;
throw pa::PerfAnalyzerException(pa::GENERIC_ERROR);
}
FAIL_IF_ERR(
pa::ConcurrencyManager::Create(
params_->async, params_->streaming, params_->batch_size,
params_->max_threads, params_->max_concurrency,
params_->shared_memory_type, params_->output_shm_size, parser_,
factory, &manager),
"failed to create concurrency manager");
} else if (params_->using_request_rate_range) {
if ((params_->sequence_id_range != 0) &&
(params_->sequence_id_range < params_->num_of_sequences)) {
std::cerr
<< "sequence id range specified is smaller than the "
<< "maximum possible number of sequences, sequence id collision "
<< "may occur." << std::endl;
throw pa::PerfAnalyzerException(pa::GENERIC_ERROR);
}
FAIL_IF_ERR(
pa::RequestRateManager::Create(
params_->async, params_->streaming, params_->measurement_window_ms,
params_->max_trials, params_->request_distribution,
params_->batch_size, params_->max_threads,
params_->num_of_sequences, params_->shared_memory_type,
params_->output_shm_size, params_->serial_sequences, parser_,
factory, &manager),
"failed to create request rate manager");
} else {
if ((params_->sequence_id_range != 0) &&
(params_->sequence_id_range < params_->num_of_sequences)) {
std::cerr
<< "sequence id range specified is smaller than the "
<< "maximum possible number of sequences, sequence id collision "
<< "may occur." << std::endl;
throw pa::PerfAnalyzerException(pa::GENERIC_ERROR);
}
FAIL_IF_ERR(
pa::CustomLoadManager::Create(
params_->async, params_->streaming, params_->measurement_window_ms,
params_->max_trials, params_->request_intervals_file,
params_->batch_size, params_->max_threads,
params_->num_of_sequences, params_->shared_memory_type,
params_->output_shm_size, params_->serial_sequences, parser_,
factory, &manager),
"failed to create custom load manager");
}
manager->InitManager(
params_->string_length, params_->string_data, params_->zero_input,
params_->user_data, params_->start_sequence_id,
params_->sequence_id_range, params_->sequence_length,
params_->sequence_length_specified, params_->sequence_length_variation);
FAIL_IF_ERR(
pa::ProfileDataCollector::Create(&collector_),
"failed to create profile data collector");
FAIL_IF_ERR(
pa::ProfileDataExporter::Create(&exporter_),
"failed to create profile data exporter");
FAIL_IF_ERR(
pa::InferenceProfiler::Create(
params_->verbose, params_->stability_threshold,
params_->measurement_window_ms, params_->max_trials,
params_->percentile, params_->latency_threshold_ms, params_->protocol,
parser_, std::move(backend_), std::move(manager), &profiler_,
params_->measurement_request_count, params_->measurement_mode,
params_->mpi_driver, params_->metrics_interval_ms,
params_->should_collect_metrics, params_->overhead_pct_threshold,
collector_, !params_->profile_export_file.empty()),
"failed to create profiler");
}
void
PerfAnalyzer::PrerunReport()
{
std::cout << "*** Measurement Settings ***" << std::endl;
if (params_->kind == cb::BackendKind::TRITON || params_->using_batch_size) {
std::cout << " Batch size: " << params_->batch_size << std::endl;
}
if (params_->kind == cb::BackendKind::TRITON_C_API) {
std::cout << " Service Kind: Triton C-API" << std::endl;
} else if (params_->kind == cb::BackendKind::TRITON) {
std::cout << " Service Kind: Triton" << std::endl;
} else if (params_->kind == cb::BackendKind::TORCHSERVE) {
std::cout << " Service Kind: TorchServe" << std::endl;
} else if (params_->kind == cb::BackendKind::TENSORFLOW_SERVING) {
std::cout << " Service Kind: TensorFlow Serving" << std::endl;
}
if (params_->measurement_mode == pa::MeasurementMode::COUNT_WINDOWS) {
std::cout << " Using \"count_windows\" mode for stabilization"
<< std::endl;
} else {
std::cout << " Using \"time_windows\" mode for stabilization" << std::endl;
}
if (params_->measurement_mode == pa::MeasurementMode::TIME_WINDOWS) {
std::cout << " Measurement window: " << params_->measurement_window_ms
<< " msec" << std::endl;
} else if (params_->measurement_mode == pa::MeasurementMode::COUNT_WINDOWS) {
std::cout << " Minimum number of samples in each window: "
<< params_->measurement_request_count << std::endl;
}
if (params_->concurrency_range.end != 1) {
std::cout << " Latency limit: " << params_->latency_threshold_ms << " msec"
<< std::endl;
if (params_->concurrency_range.end != pa::NO_LIMIT) {
std::cout << " Concurrency limit: "
<< std::max(
params_->concurrency_range.start,
params_->concurrency_range.end)
<< " concurrent requests" << std::endl;
}
}
if (params_->request_rate_range[pa::SEARCH_RANGE::kEND] != 1.0) {
std::cout << " Latency limit: " << params_->latency_threshold_ms << " msec"
<< std::endl;
if (params_->request_rate_range[pa::SEARCH_RANGE::kEND] !=
static_cast<double>(pa::NO_LIMIT)) {
std::cout << " Request Rate limit: "
<< std::max(
params_->request_rate_range[pa::SEARCH_RANGE::kSTART],
params_->request_rate_range[pa::SEARCH_RANGE::kEND])
<< " requests per seconds" << std::endl;
}
}
if (params_->using_request_rate_range) {
if (params_->request_distribution == pa::Distribution::POISSON) {
std::cout << " Using poisson distribution on request generation"
<< std::endl;
} else {
std::cout << " Using uniform distribution on request generation"
<< std::endl;
}
}
if (params_->search_mode == pa::SearchMode::BINARY) {
std::cout << " Using Binary Search algorithm" << std::endl;
}
if (params_->async) {
std::cout << " Using asynchronous calls for inference" << std::endl;
} else {
std::cout << " Using synchronous calls for inference" << std::endl;
}
if (parser_->IsDecoupled()) {
std::cout << " Detected decoupled model, using the first response for "
"measuring latency"
<< std::endl;
}
if (params_->percentile == -1) {
std::cout << " Stabilizing using average latency" << std::endl;
} else {
std::cout << " Stabilizing using p" << params_->percentile << " latency"
<< std::endl;
}
std::cout << std::endl;
}
void
PerfAnalyzer::Profile()
{
params_->mpi_driver->MPIBarrierWorld();
cb::Error err;
if (params_->targeting_concurrency()) {
err = profiler_->Profile<size_t>(
params_->concurrency_range.start, params_->concurrency_range.end,
params_->concurrency_range.step, params_->search_mode, perf_statuses_);
} else {
err = profiler_->Profile<double>(
params_->request_rate_range[pa::SEARCH_RANGE::kSTART],
params_->request_rate_range[pa::SEARCH_RANGE::kEND],
params_->request_rate_range[pa::SEARCH_RANGE::kSTEP],
params_->search_mode, perf_statuses_);
}
params_->mpi_driver->MPIBarrierWorld();
if (!err.IsOk()) {
std::cerr << err;
// In the case of early_exit, the thread does not return and continues to
// report the summary
if (!pa::early_exit) {
throw pa::PerfAnalyzerException(err.Err());
}
}
}
void
PerfAnalyzer::WriteReport()
{
if (!perf_statuses_.size()) {
return;
}
// Can print more depending on verbose, but it seems too much information
std::cout << "Inferences/Second vs. Client ";
if (params_->percentile == -1) {
std::cout << "Average Batch Latency" << std::endl;
} else {
std::cout << "p" << params_->percentile << " Batch Latency" << std::endl;
}
for (pa::PerfStatus& status : perf_statuses_) {
if (params_->targeting_concurrency()) {
std::cout << "Concurrency: " << status.concurrency << ", ";
} else {
std::cout << "Request Rate: " << status.request_rate << ", ";
}
std::cout << "throughput: " << status.client_stats.infer_per_sec
<< " infer/sec, latency "
<< (status.stabilizing_latency_ns / 1000) << " usec" << std::endl;
}
bool should_output_metrics{
params_->should_collect_metrics && params_->verbose_csv};
std::unique_ptr<pa::ReportWriter> writer;
FAIL_IF_ERR(
pa::ReportWriter::Create(
params_->filename, params_->targeting_concurrency(), perf_statuses_,
params_->verbose_csv, profiler_->IncludeServerStats(),
params_->percentile, parser_, &writer, should_output_metrics),
"failed to create report writer");
writer->GenerateReport();
}
void
PerfAnalyzer::GenerateProfileExport()
{
if (!params_->profile_export_file.empty()) {
exporter_->Export(
collector_->GetData(), collector_->GetVersion(),
params_->profile_export_file);
}
}
void
PerfAnalyzer::Finalize()
{
params_->mpi_driver->MPIFinalize();
}
// Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <getopt.h>
#include <signal.h>
#include <algorithm>
#include "command_line_parser.h"
#include "concurrency_manager.h"
#include "custom_load_manager.h"
#include "inference_profiler.h"
#include "model_parser.h"
#include "mpi_utils.h"
#include "perf_utils.h"
#include "profile_data_collector.h"
#include "profile_data_exporter.h"
// Perf Analyzer provides various metrics to measure the performance of
// the inference server. It can either be used to measure the throughput,
// latency and time distribution under specific setting (i.e. fixed batch size
// and fixed concurrent requests), or be used to generate throughput-latency
// data point under dynamic setting (i.e. collecting throughput-latency data
// under different load level).
//
// The following data is collected and used as part of the metrics:
// - Throughput (infer/sec):
// The number of inference processed per second as seen by the analyzer.
// The number of inference is measured by the multiplication of the number
// of requests and their batch size. And the total time is the time elapsed
// from when the analyzer starts sending requests to when it received
// all responses.
// - Latency (usec):
// The average elapsed time between when a request is sent and
// when the response for the request is received. If 'percentile' flag is
// specified, the selected percentile value will be reported instead of
// average value.
//
// Perf Analyzer determines the stability of throughput and latency by observing
// measurements in different trials. If the latency and throughput, are within
// the stability percentage (see --stability-percentage option) Perf Analyzer
// will report the average of the throughput and latency numbers observed in the
// last three trials. All the measurements gathered during the last three trials
// is aggregated to generate a single report. The number of total requests is
// the sum of all the requests in the individual measurement windows.
//
// There are broadly three ways to load server for the data collection using
// perf_analyzer:
// - Maintaining Target Concurrency:
// In this setting, the analyzer will maintain a target number of concurrent
// requests sent to the server (see --concurrency-range option) while
// taking measurements.
// The number of requests will be the total number of requests sent within
// the time interval for measurement (see --measurement-interval option) and
// the latency will be the average latency across all requests.
//
// Besides throughput and latency, which is measured on client side,
// the following data measured by the server will also be reported
// in this setting:
// - Concurrent request: the number of concurrent requests as specified
// in --concurrency-range option. Note, for running perf analyzer for
// a single concurrency, user must specify --concurrency-range
// <'start'>, omitting 'end' and 'step' values.
// - Batch size: the batch size of each request as specified in -b option
// - Inference count: batch size * number of inference requests
// - Cumulative time: the total time between request received and
// response sent on the requests sent by perf analyzer.
// - Average Cumulative time: cumulative time / number of inference requests
// - Compute time: the total time it takes to run inferencing including time
// copying input tensors to GPU memory, time executing the model,
// and time copying output tensors from GPU memory for the requests
// sent by perf analyzer.
// - Average compute time: compute time / number of inference requests
// - Queue time: the total time it takes to wait for an available model
// instance for the requests sent by perf analyzer.
// - Average queue time: queue time / number of inference requests
// If all fields of --concurrency-range are specified, the analyzer will
// perform the following procedure:
// 1. Follows the procedure in fixed concurrent request mode using
// k concurrent requests (k starts at 'start').
// 2. Gathers data reported from step 1.
// 3. Increases k by 'step' and repeats step 1 and 2 until latency from
// current iteration exceeds latency threshold (see --latency-threshold
// option) or concurrency level reaches 'end'. Note, by setting
// --latency-threshold or 'end' to 0 the effect of each threshold can
// be removed. However, both can not be 0 simultaneously.
// At each iteration, the data mentioned in fixed concurrent request mode
// will be reported. Besides that, after the procedure above, a collection
// of "throughput, latency, concurrent request count" tuples will be
// reported in increasing load level order.
//
// - Maintaining Target Request Rate:
// This mode is enabled only when --request-rate-range option is specified.
// Unlike above, here the analyzer will try to maintain a target rate of
// requests issued to the server while taking measurements. Rest of the
// behaviour of analyzer is identical as above. It is important to note that
// even though over a sufficiently large interval the rate of requests
// will tend to the target request rate, the actual request rate for a small
// time interval will depend upon the selected request distribution
// (--request-distribution). For 'constant' request distribution the time
// interval between successive requests is maintained to be constant, hence
// request rate is constant over time. However, 'poisson' request
// distribution varies the time interval between successive requests such
// that there are periods of bursts and nulls in request generation.
// Additionally, 'poisson' distribution mimics the real-world traffic and
// can be used to obtain measurements for a realistic-load.
// With each request-rate, the analyzer also reports the 'Delayed Request
// Count' which gives an idea of how many requests missed their schedule as
// specified by the distribution. Users can use --max-threads to increase
// the number of threads which might help in dispatching requests as per
// the schedule. Also note that a very large number of threads might be
// counter-productive with most of the time being spent on context-switching
// the threads.
//
// - Following User Provided Request Delivery Schedule:
// This mode is enabled only when --request-intervals option is specified.
// In this case, analyzer will try to dispatch the requests to the server
// with time intervals between successive requests specified in a user
// provided file. This file should contain time intervals in microseconds in
// each new line. Analyzer will loop around the values to produce a
// consistent load for measurements. Once, the readings are stabilized then
// the final statistics will be reported. The statistics will include
// 'Delayed Request Count' for the requests that missed their schedule. As
// described before, users can tune --max-threads to allow analyzer in
// keeping up with the schedule. This mode will help user in analyzing the
// performance of the server under different custom settings which may be of
// interest.
//
// By default, perf_analyzer will maintain target concurrency while measuring
// the performance.
//
// Options:
// -b: batch size for each request sent.
// --concurrency-range: The range of concurrency levels perf_analyzer will use.
// A concurrency level indicates the number of concurrent requests in queue.
// --request-rate-range: The range of request rates perf_analyzer will use to
// load the server.
// --request-intervals: File containing time intervals (in microseconds) to use
// between successive requests.
// --latency-threshold: latency threshold in msec.
// --measurement-interval: time interval for each measurement window in msec.
// --async: Enables Asynchronous inference calls.
// --binary-search: Enables binary search within the specified range.
// --request-distribution: Allows user to specify the distribution for selecting
// the time intervals between the request dispatch.
//
// For detail of the options not listed, please refer to the usage.
//
class PerfAnalyzer {
public:
PerfAnalyzer(pa::PAParamsPtr params);
virtual ~PerfAnalyzer(){};
// Main runner function for Perf Analyzer.
void Run();
private:
pa::PAParamsPtr params_;
std::unique_ptr<pa::InferenceProfiler> profiler_;
std::unique_ptr<cb::ClientBackend> backend_;
std::shared_ptr<pa::ModelParser> parser_;
std::vector<pa::PerfStatus> perf_statuses_;
std::shared_ptr<pa::ProfileDataCollector> collector_;
std::shared_ptr<pa::ProfileDataExporter> exporter_;
//
// Helper methods
//
// Parse the options out of the command line argument
//
void CreateAnalyzerObjects();
void PrerunReport();
void Profile();
void WriteReport();
void GenerateProfileExport();
void Finalize();
};
// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
#pragma once
#include <exception>
#include <string>
namespace triton { namespace perfanalyzer {
// Perf Exception error class
//
class PerfAnalyzerException : public std::exception {
public:
PerfAnalyzerException(uint32_t error) : error_(error) {}
PerfAnalyzerException(const std::string& message, uint32_t error)
: message_(message), error_(error)
{
}
virtual const char* what() const throw() { return message_.c_str(); }
inline int GetError() const { return error_; }
private:
const std::string message_{""};
uint32_t error_;
};
}} // namespace triton::perfanalyzer
// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// This file exists to hold a macro-expanded main function for the unit test
// runner executable.
//
// The old contents of main.cc are needed for the unit test runner to compile,
// but since two main functions cannot be compiled in the same executable, the
// contents of the old main.cc were moved to a new file/class, which are now
// included in the compilation of the unit test runner executable.
//
// The new contents of main.cc just include the new file/class mentioned above
// and run the primary function from there in a simplified main function, which
// runs Perf Analyzer.
#define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN
#include "doctest.h"
// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "perf_utils.h"
#include <fcntl.h>
#include <sys/mman.h>
#include <unistd.h>
#include <algorithm>
#include <cctype>
#include <iostream>
#include <string>
#include "client_backend/client_backend.h"
#include "doctest.h"
namespace triton { namespace perfanalyzer {
cb::ProtocolType
ParseProtocol(const std::string& str)
{
std::string protocol(str);
std::transform(protocol.begin(), protocol.end(), protocol.begin(), ::tolower);
if (protocol == "http") {
return cb::ProtocolType::HTTP;
} else if (protocol == "grpc") {
return cb::ProtocolType::GRPC;
}
return cb::ProtocolType::UNKNOWN;
}
cb::Error
ConvertDTypeFromTFS(const std::string& tf_dtype, std::string* datatype)
{
if (tf_dtype == "DT_HALF") {
*datatype = "FP16";
} else if (tf_dtype == "DT_BFLOAT16") {
*datatype = "BF16";
} else if (tf_dtype == "DT_FLOAT") {
*datatype = "FP32";
} else if (tf_dtype == "DT_DOUBLE") {
*datatype = "FP64";
} else if (tf_dtype == "DT_INT32") {
*datatype = "INT32";
} else if (tf_dtype == "DT_INT16") {
*datatype = "INT16";
} else if (tf_dtype == "DT_UINT16") {
*datatype = "UINT16";
} else if (tf_dtype == "DT_INT8") {
*datatype = "INT8";
} else if (tf_dtype == "DT_UINT8") {
*datatype = "UINT8";
} else if (tf_dtype == "DT_STRING") {
*datatype = "BYTES";
} else if (tf_dtype == "DT_INT64") {
*datatype = "INT64";
} else if (tf_dtype == "DT_BOOL") {
*datatype = "BOOL";
} else if (tf_dtype == "DT_UINT32") {
*datatype = "UINT32";
} else if (tf_dtype == "DT_UINT64") {
*datatype = "UINT64";
} else {
return cb::Error(
"unsupported datatype encountered " + tf_dtype, pa::GENERIC_ERROR);
}
return cb::Error::Success;
}
bool
IsDirectory(const std::string& path)
{
struct stat s;
if (stat(path.c_str(), &s) == 0 && (s.st_mode & S_IFDIR)) {
return true;
} else {
return false;
}
}
bool
IsFile(const std::string& complete_path)
{
struct stat s;
if (stat(complete_path.c_str(), &s) == 0 && (s.st_mode & S_IFREG)) {
return true;
} else {
return false;
}
}
int64_t
ByteSize(const std::vector<int64_t>& shape, const std::string& datatype)
{
int one_element_size;
if ((datatype.compare("BOOL") == 0) || (datatype.compare("INT8") == 0) ||
(datatype.compare("UINT8") == 0)) {
one_element_size = 1;
} else if (
(datatype.compare("INT16") == 0) || (datatype.compare("UINT16") == 0) ||
(datatype.compare("FP16") == 0) || (datatype.compare("BF16") == 0)) {
one_element_size = 2;
} else if (
(datatype.compare("INT32") == 0) || (datatype.compare("UINT32") == 0) ||
(datatype.compare("FP32") == 0)) {
one_element_size = 4;
} else if (
(datatype.compare("INT64") == 0) || (datatype.compare("UINT64") == 0) ||
(datatype.compare("FP64") == 0)) {
one_element_size = 8;
} else {
return -1;
}
int64_t count = ElementCount(shape);
if (count < 0) {
return count;
}
return (one_element_size * count);
}
int64_t
ElementCount(const std::vector<int64_t>& shape)
{
int64_t count = 1;
bool is_dynamic = false;
for (const auto dim : shape) {
if (dim == -1) {
is_dynamic = true;
} else {
count *= dim;
}
}
if (is_dynamic) {
count = -1;
}
return count;
}
void
SerializeStringTensor(
std::vector<std::string> string_tensor, std::vector<char>* serialized_data)
{
std::string serialized = "";
for (auto s : string_tensor) {
uint32_t len = s.size();
serialized.append(reinterpret_cast<const char*>(&len), sizeof(uint32_t));
serialized.append(s);
}
std::copy(
serialized.begin(), serialized.end(),
std::back_inserter(*serialized_data));
}
cb::Error
SerializeExplicitTensor(
const rapidjson::Value& tensor, const std::string& dt,
std::vector<char>* decoded_data)
{
if (dt.compare("BYTES") == 0) {
std::string serialized = "";
for (const auto& value : tensor.GetArray()) {
if (!value.IsString()) {
return cb::Error(
"unable to find string data in json", pa::GENERIC_ERROR);
}
std::string element(value.GetString());
uint32_t len = element.size();
serialized.append(reinterpret_cast<const char*>(&len), sizeof(uint32_t));
serialized.append(element);
}
std::copy(
serialized.begin(), serialized.end(),
std::back_inserter(*decoded_data));
} else {
for (const auto& value : tensor.GetArray()) {
if (dt.compare("BOOL") == 0) {
if (!value.IsBool()) {
return cb::Error(
"unable to find bool data in json", pa::GENERIC_ERROR);
}
bool element(value.GetBool());
const char* src = reinterpret_cast<const char*>(&element);
decoded_data->insert(decoded_data->end(), src, src + sizeof(bool));
} else if (dt.compare("UINT8") == 0) {
if (!value.IsUint()) {
return cb::Error(
"unable to find uint8_t data in json", pa::GENERIC_ERROR);
}
uint8_t element(static_cast<uint8_t>(value.GetUint()));
const char* src = reinterpret_cast<const char*>(&element);
decoded_data->insert(decoded_data->end(), src, src + sizeof(uint8_t));
} else if (dt.compare("INT8") == 0) {
if (!value.IsInt()) {
return cb::Error(
"unable to find int8_t data in json", pa::GENERIC_ERROR);
}
int8_t element(static_cast<int8_t>(value.GetInt()));
const char* src = reinterpret_cast<const char*>(&element);
decoded_data->insert(decoded_data->end(), src, src + sizeof(int8_t));
} else if (dt.compare("UINT16") == 0) {
if (!value.IsUint()) {
return cb::Error(
"unable to find uint16_t data in json", pa::GENERIC_ERROR);
}
uint16_t element(static_cast<uint16_t>(value.GetUint()));
const char* src = reinterpret_cast<const char*>(&element);
decoded_data->insert(decoded_data->end(), src, src + sizeof(uint16_t));
} else if (dt.compare("INT16") == 0) {
if (!value.IsInt()) {
return cb::Error(
"unable to find int16_t data in json", pa::GENERIC_ERROR);
}
int16_t element(static_cast<int16_t>(value.GetInt()));
const char* src = reinterpret_cast<const char*>(&element);
decoded_data->insert(decoded_data->end(), src, src + sizeof(int16_t));
} else if (dt.compare("FP16") == 0) {
return cb::Error(
"Can not use explicit tensor description for fp16 datatype",
pa::GENERIC_ERROR);
} else if (dt.compare("BF16") == 0) {
return cb::Error(
"Can not use explicit tensor description for bf16 datatype",
pa::GENERIC_ERROR);
} else if (dt.compare("UINT32") == 0) {
if (!value.IsUint()) {
return cb::Error(
"unable to find uint32_t data in json", pa::GENERIC_ERROR);
}
uint32_t element(value.GetUint());
const char* src = reinterpret_cast<const char*>(&element);
decoded_data->insert(decoded_data->end(), src, src + sizeof(uint32_t));
} else if (dt.compare("INT32") == 0) {
if (!value.IsInt()) {
return cb::Error(
"unable to find int32_t data in json", pa::GENERIC_ERROR);
}
int32_t element(value.GetInt());
const char* src = reinterpret_cast<const char*>(&element);
decoded_data->insert(decoded_data->end(), src, src + sizeof(int32_t));
} else if (dt.compare("FP32") == 0) {
if (!value.IsDouble()) {
return cb::Error(
"unable to find float data in json", pa::GENERIC_ERROR);
}
float element(value.GetFloat());
const char* src = reinterpret_cast<const char*>(&element);
decoded_data->insert(decoded_data->end(), src, src + sizeof(float));
} else if (dt.compare("UINT64") == 0) {
if (!value.IsUint64()) {
return cb::Error(
"unable to find uint64_t data in json", pa::GENERIC_ERROR);
}
uint64_t element(value.GetUint64());
const char* src = reinterpret_cast<const char*>(&element);
decoded_data->insert(decoded_data->end(), src, src + sizeof(uint64_t));
} else if (dt.compare("INT64") == 0) {
if (!value.IsInt64()) {
return cb::Error(
"unable to find int64_t data in json", pa::GENERIC_ERROR);
}
int64_t element(value.GetInt64());
const char* src = reinterpret_cast<const char*>(&element);
decoded_data->insert(decoded_data->end(), src, src + sizeof(int64_t));
} else if (dt.compare("FP64") == 0) {
if (!value.IsDouble()) {
return cb::Error(
"unable to find fp64 data in json", pa::GENERIC_ERROR);
}
double element(value.GetDouble());
const char* src = reinterpret_cast<const char*>(&element);
decoded_data->insert(decoded_data->end(), src, src + sizeof(double));
}
}
}
return cb::Error::Success;
}
std::string
GetRandomString(const int string_length)
{
std::mt19937_64 gen{std::random_device()()};
std::uniform_int_distribution<size_t> dist{0, character_set.length() - 1};
std::string random_string;
std::generate_n(std::back_inserter(random_string), string_length, [&] {
return character_set[dist(gen)];
});
return random_string;
}
std::string
ShapeVecToString(const std::vector<int64_t> shape_vec, bool skip_first)
{
bool first = true;
std::string str("[");
for (const auto& value : shape_vec) {
if (skip_first) {
skip_first = false;
continue;
}
if (!first) {
str += ",";
}
str += std::to_string(value);
first = false;
}
str += "]";
return str;
}
std::string
TensorToRegionName(std::string name)
{
// Remove slashes from the name, if any.
name.erase(
std::remove_if(
name.begin(), name.end(),
[](const char& c) { return ((c == '/') || (c == '\\')); }),
name.end());
return name;
}
template <>
std::function<std::chrono::nanoseconds(std::mt19937&)>
ScheduleDistribution<Distribution::POISSON>(const double request_rate)
{
std::exponential_distribution<> dist =
std::exponential_distribution<>(request_rate);
return [dist](std::mt19937& gen) mutable {
return std::chrono::duration_cast<std::chrono::nanoseconds>(
std::chrono::duration<double>(dist(gen)));
};
}
template <>
std::function<std::chrono::nanoseconds(std::mt19937&)>
ScheduleDistribution<Distribution::CONSTANT>(const double request_rate)
{
std::chrono::nanoseconds period =
std::chrono::duration_cast<std::chrono::nanoseconds>(
std::chrono::duration<double>(1.0 / request_rate));
return [period](std::mt19937& /*gen*/) { return period; };
}
cb::TensorFormat
ParseTensorFormat(const std::string& content_type_str)
{
std::string content_type_str_lowercase{content_type_str};
std::transform(
content_type_str.cbegin(), content_type_str.cend(),
content_type_str_lowercase.begin(),
[](unsigned char c) { return std::tolower(c); });
if (content_type_str_lowercase == "binary") {
return cb::TensorFormat::BINARY;
} else if (content_type_str_lowercase == "json") {
return cb::TensorFormat::JSON;
} else {
return cb::TensorFormat::UNKNOWN;
}
}
}} // namespace triton::perfanalyzer
// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <rapidjson/document.h>
#include <rapidjson/rapidjson.h>
#include <sys/stat.h>
#include <time.h>
#include <chrono>
#include <fstream>
#include <functional>
#include <iomanip>
#include <iostream>
#include <memory>
#include <random>
#include "client_backend/client_backend.h"
namespace pa = triton::perfanalyzer;
namespace cb = triton::perfanalyzer::clientbackend;
namespace triton { namespace perfanalyzer {
constexpr uint64_t NANOS_PER_SECOND = 1000000000;
constexpr uint64_t NANOS_PER_MILLIS = 1000000;
#define CHRONO_TO_NANOS(TS) \
(std::chrono::duration_cast<std::chrono::nanoseconds>(TS.time_since_epoch()) \
.count())
#define CHRONO_TO_MILLIS(TS) (CHRONO_TO_NANOS(TS) / pa::NANOS_PER_MILLIS)
//==============================================================================
// Will use the characters specified here to construct random strings
std::string const character_set =
"abcdefghijklmnaoqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890 .?!";
// A boolean flag to mark an interrupt and commencement of early exit
extern volatile bool early_exit;
enum Distribution { POISSON = 0, CONSTANT = 1, CUSTOM = 2 };
enum SearchMode { LINEAR = 0, BINARY = 1, NONE = 2 };
enum SharedMemoryType {
SYSTEM_SHARED_MEMORY = 0,
CUDA_SHARED_MEMORY = 1,
NO_SHARED_MEMORY = 2
};
constexpr uint64_t NO_LIMIT = 0;
// Templated range class that tracks the start, stop, and step for a range.
//
template <typename T>
class Range {
public:
Range(T start, T end, T step) : start(start), end(end), step(step) {}
T start;
T end;
T step;
};
// Converts the datatype from tensorflow to perf analyzer space
// \param tf_dtype The data type string returned from the model metadata.
// \param datatype Returns the datatype in perf_analyzer space.
// \return error status. Returns Non-Ok if an error is encountered during
// read operation.
cb::Error ConvertDTypeFromTFS(
const std::string& tf_dtype, std::string* datatype);
// Parse the communication protocol type
cb::ProtocolType ParseProtocol(const std::string& str);
// To check whether the path points to a valid system directory
bool IsDirectory(const std::string& path);
// To check whether the path points to a valid system file
bool IsFile(const std::string& complete_path);
// Calculates the byte size tensor for given shape and datatype.
int64_t ByteSize(
const std::vector<int64_t>& shape, const std::string& datatype);
// Get the number of elements in the tensor for given shape.
int64_t ElementCount(const std::vector<int64_t>& shape);
// Serializes the string tensor to length prepended bytes.
void SerializeStringTensor(
std::vector<std::string> string_tensor, std::vector<char>* serialized_data);
// Serializes an explicit tensor read from the data file to the
// raw bytes.
cb::Error SerializeExplicitTensor(
const rapidjson::Value& tensor, const std::string& dt,
std::vector<char>* decoded_data);
// Generates a random string of specified length using characters specified in
// character_set.
std::string GetRandomString(const int string_length);
// Returns the shape string containing the values provided in the vector
std::string ShapeVecToString(
const std::vector<int64_t> shape_vec, bool skip_first = false);
// Remove slashes from tensor name, if any
std::string TensorToRegionName(std::string name);
// Returns the request schedule distribution generator with the specified
// request rate.
template <Distribution distribution>
std::function<std::chrono::nanoseconds(std::mt19937&)> ScheduleDistribution(
const double request_rate);
// Parse the HTTP tensor format
cb::TensorFormat ParseTensorFormat(const std::string& tensor_format_str);
}} // namespace triton::perfanalyzer
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment