Commit c68e1835 authored by lijian6's avatar lijian6
Browse files

Initial commit

parents
Pipeline #561 failed with stages
in 0 seconds
// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <unistd.h>
#include <iostream>
#include <string>
#include "grpc_client.h"
namespace tc = triton::client;
#define FAIL_IF_ERR(X, MSG) \
{ \
tc::Error err = (X); \
if (!err.IsOk()) { \
std::cerr << "error: " << (MSG) << ": " << err << std::endl; \
exit(1); \
} \
}
namespace {
void
Usage(char** argv, const std::string& msg = std::string())
{
if (!msg.empty()) {
std::cerr << "error: " << msg << std::endl;
}
std::cerr << "Usage: " << argv[0] << " [options]" << std::endl;
std::cerr << "\t-v" << std::endl;
std::cerr << "\t-u <URL for inference service>" << std::endl;
std::cerr << "\t-H <HTTP header>" << std::endl;
std::cerr << std::endl;
std::cerr
<< "For -H, header must be 'Header:Value'. May be given multiple times."
<< std::endl;
exit(1);
}
} // namespace
int
main(int argc, char** argv)
{
bool verbose = false;
std::string url("localhost:8001");
tc::Headers http_headers;
// Parse commandline...
int opt;
while ((opt = getopt(argc, argv, "vu:H:")) != -1) {
switch (opt) {
case 'v':
verbose = true;
break;
case 'u':
url = optarg;
break;
case 'H': {
std::string arg = optarg;
std::string header = arg.substr(0, arg.find(":"));
http_headers[header] = arg.substr(header.size() + 1);
break;
}
case '?':
Usage(argv);
break;
}
}
std::string model_name = "simple";
// Create a InferenceServerGrpcClient instance to communicate with the
// server using gRPC protocol.
std::unique_ptr<tc::InferenceServerGrpcClient> client;
FAIL_IF_ERR(
tc::InferenceServerGrpcClient::Create(&client, url, verbose),
"unable to create grpc client");
inference::RepositoryIndexResponse repository_index;
FAIL_IF_ERR(
client->ModelRepositoryIndex(&repository_index, http_headers),
"Failed to get repository index");
if (repository_index.models().size() != 8) {
std::cerr << "expected number of models 8, got "
<< repository_index.models().size() << std::endl;
exit(1);
}
FAIL_IF_ERR(
client->LoadModel(model_name, http_headers), "Failed to load model");
bool model_ready;
FAIL_IF_ERR(
client->IsModelReady(&model_ready, model_name),
"unable to get model readiness");
if (!model_ready) {
std::cerr << "error: model " << model_name << " is not live" << std::endl;
exit(1);
}
FAIL_IF_ERR(
client->UnloadModel(model_name, http_headers), "Failed to unload model");
FAIL_IF_ERR(
client->IsModelReady(&model_ready, model_name),
"unable to get model readiness");
if (model_ready) {
std::cerr << "error: model " << model_name << " is live after unloading"
<< std::endl;
exit(1);
}
tc::Error err = client->LoadModel("wrong_model_name", http_headers);
if (err.IsOk()) {
std::cerr << "error: wrong model name was successfully loaded" << std::endl;
}
return 0;
}
// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <unistd.h>
#include <condition_variable>
#include <iostream>
#include <string>
#include <vector>
#include "grpc_client.h"
namespace tc = triton::client;
using ResultList = std::vector<std::shared_ptr<tc::InferResult>>;
// Global mutex to synchronize the threads
std::mutex mutex_;
std::condition_variable cv_;
#define FAIL_IF_ERR(X, MSG) \
{ \
tc::Error err = (X); \
if (!err.IsOk()) { \
std::cerr << "error: " << (MSG) << ": " << err << std::endl; \
exit(1); \
} \
}
namespace {
void
Usage(char** argv, const std::string& msg = std::string())
{
if (!msg.empty()) {
std::cerr << "error: " << msg << std::endl;
}
std::cerr << "Usage: " << argv[0] << " [options]" << std::endl;
std::cerr << "\t-v" << std::endl;
std::cerr << "\t-u <URL for inference service and its gRPC port>"
<< std::endl;
std::cerr
<< "For -H, header must be 'Header:Value'. May be given multiple times."
<< std::endl;
std::cerr << "\t-t <stream timeout in microseconds>" << std::endl;
std::cerr << "\t-o <offset for sequence ID>" << std::endl;
std::cerr << std::endl;
std::cerr << "For -o, the client will use sequence ID <1 + 2 * offset> "
<< "and <2 + 2 * offset>. Default offset is 0." << std::endl;
exit(1);
}
void
StreamSend(
const std::unique_ptr<tc::InferenceServerGrpcClient>& client,
tc::InferOptions& options, int32_t value, const int32_t index)
{
// Initialize the inputs with the data.
tc::InferInput* input;
std::vector<int64_t> shape{1, 1};
FAIL_IF_ERR(
tc::InferInput::Create(&input, "INPUT", shape, "INT32"),
"unable to create 'INPUT'");
std::shared_ptr<tc::InferInput> ivalue(input);
FAIL_IF_ERR(ivalue->Reset(), "unable to reset 'INPUT'");
FAIL_IF_ERR(
ivalue->AppendRaw(reinterpret_cast<uint8_t*>(&value), sizeof(int32_t)),
"unable to set data for 'INPUT'");
std::vector<tc::InferInput*> inputs = {ivalue.get()};
// Send inference request to the inference server.
FAIL_IF_ERR(client->AsyncStreamInfer(options, inputs), "unable to run model");
}
void
StreamSend(
const std::unique_ptr<tc::InferenceServerGrpcClient>& client,
const std::string& model_name, int32_t value, const uint64_t sequence_id,
bool start_of_sequence, bool end_of_sequence, const int32_t index)
{
// Stream send for unsigned int sequence IDs
tc::InferOptions options(model_name);
options.sequence_id_ = sequence_id;
options.sequence_start_ = start_of_sequence;
options.sequence_end_ = end_of_sequence;
options.request_id_ =
std::to_string(sequence_id) + "_" + std::to_string(index);
StreamSend(client, options, value, index);
}
void
StreamSend(
const std::unique_ptr<tc::InferenceServerGrpcClient>& client,
const std::string& model_name, int32_t value,
const std::string& sequence_id, bool start_of_sequence,
bool end_of_sequence, const int32_t index)
{
// Stream send for string sequence IDs
tc::InferOptions options(model_name);
options.sequence_id_str_ = sequence_id;
options.sequence_start_ = start_of_sequence;
options.sequence_end_ = end_of_sequence;
options.request_id_ = sequence_id + "_" + std::to_string(index);
StreamSend(client, options, value, index);
}
} // namespace
int
main(int argc, char** argv)
{
bool verbose = false;
bool dyna_sequence = false;
std::string url("localhost:8001");
tc::Headers http_headers;
int sequence_id_offset = 0;
uint32_t stream_timeout = 0;
// Parse commandline...
int opt;
while ((opt = getopt(argc, argv, "vdu:H:t:o:")) != -1) {
switch (opt) {
case 'v':
verbose = true;
break;
case 'd':
dyna_sequence = true;
break;
case 'u':
url = optarg;
break;
case 'H': {
std::string arg = optarg;
std::string header = arg.substr(0, arg.find(":"));
http_headers[header] = arg.substr(header.size() + 1);
break;
}
case 't':
stream_timeout = std::stoi(optarg);
break;
case 'o':
sequence_id_offset = std::stoi(optarg);
break;
case '?':
Usage(argv);
break;
}
}
tc::Error err;
// We use the custom "sequence" model which takes 1 input value. The
// output is the accumulated value of the inputs. See
// src/custom/sequence.
std::string int_model_name =
dyna_sequence ? "simple_dyna_sequence" : "simple_sequence";
std::string string_model_name =
dyna_sequence ? "simple_string_dyna_sequence" : "simple_sequence";
const uint64_t int_sequence_id0 = 1 + sequence_id_offset * 2;
const uint64_t int_sequence_id1 = 2 + sequence_id_offset * 2;
// For string sequence IDs, the dyna backend requires that the
// sequence id be decodable into an integer, otherwise we'll use
// a test string sequence id and a model that doesn't require corrid
// control.
const std::string string_sequence_id0 =
dyna_sequence ? std::to_string(3 + sequence_id_offset * 2) : "SEQ-3";
std::cout << "sequence ID " << int_sequence_id0 << " : "
<< "sequence ID " << int_sequence_id1 << " : "
<< "sequence ID " << string_sequence_id0 << std::endl;
// Create a InferenceServerGrpcClient instance to communicate with the
// server using gRPC protocol.
std::unique_ptr<tc::InferenceServerGrpcClient> client;
FAIL_IF_ERR(
tc::InferenceServerGrpcClient::Create(&client, url, verbose),
"unable to create grpc client");
// Now send the inference sequences..
//
std::vector<int32_t> values{11, 7, 5, 3, 2, 0, 1};
ResultList result_list;
FAIL_IF_ERR(
client->StartStream(
[&](tc::InferResult* result) {
{
std::shared_ptr<tc::InferResult> result_ptr(result);
std::lock_guard<std::mutex> lk(mutex_);
result_list.push_back(result_ptr);
}
cv_.notify_all();
},
false /*ship_stats*/, stream_timeout, http_headers),
"unable to establish a streaming connection to server");
// Send requests, first reset accumulator for the sequence.
int32_t index = 0;
StreamSend(
client, int_model_name, 0, int_sequence_id0, true /* start-of-sequence */,
false /* end-of-sequence */, index++);
StreamSend(
client, int_model_name, 100, int_sequence_id1,
true /* start-of-sequence */, false /* end-of-sequence */, index++);
StreamSend(
client, string_model_name, 20, string_sequence_id0,
true /* start-of-sequence */, false /* end-of-sequence */, index++);
// Now send a sequence of values...
for (int32_t v : values) {
StreamSend(
client, int_model_name, v, int_sequence_id0,
false /* start-of-sequence */, (v == 1) /* end-of-sequence */, index++);
StreamSend(
client, int_model_name, -v, int_sequence_id1,
false /* start-of-sequence */, (v == 1) /* end-of-sequence */, index++);
StreamSend(
client, string_model_name, -v, string_sequence_id0,
false /* start-of-sequence */, (v == 1) /* end-of-sequence */, index++);
}
if (stream_timeout == 0) {
// Wait until all callbacks are invoked
{
std::unique_lock<std::mutex> lk(mutex_);
cv_.wait(lk, [&]() {
if (result_list.size() > (3 * values.size() + 2)) {
return true;
} else {
return false;
}
});
}
} else {
auto timeout = std::chrono::microseconds(stream_timeout);
// Wait until all callbacks are invoked or the timeout expires
{
std::unique_lock<std::mutex> lk(mutex_);
if (!cv_.wait_for(lk, timeout, [&]() {
return (result_list.size() > (3 * values.size() + 2));
})) {
std::cerr << "Stream has been closed" << std::endl;
exit(1);
}
}
}
// Extract data from the result
std::vector<int32_t> int_result0_data;
std::vector<int32_t> int_result1_data;
std::vector<int32_t> string_result0_data;
for (const auto& this_result : result_list) {
auto err = this_result->RequestStatus();
if (!err.IsOk()) {
std::cerr << "The inference failed: " << err << std::endl;
exit(1);
}
// Get pointers to the result returned...
int32_t* output_data;
size_t output_byte_size;
FAIL_IF_ERR(
this_result->RawData(
"OUTPUT", (const uint8_t**)&output_data, &output_byte_size),
"unable to get result data for 'OUTPUT'");
if (output_byte_size != 4) {
std::cerr << "error: received incorrect byte size for 'OUTPUT': "
<< output_byte_size << std::endl;
exit(1);
}
std::string request_id;
FAIL_IF_ERR(
this_result->Id(&request_id), "unable to get request id for response");
try {
std::string this_sequence_id =
std::string(request_id, 0, request_id.find("_"));
if (std::stoi(this_sequence_id) == int_sequence_id0) {
int_result0_data.push_back(*output_data);
} else if (std::stoi(this_sequence_id) == int_sequence_id1) {
int_result1_data.push_back(*output_data);
} else if (this_sequence_id == string_sequence_id0) {
string_result0_data.push_back(*output_data);
} else {
std::cerr << "error: received incorrect sequence id in response: "
<< this_sequence_id << std::endl;
exit(1);
}
}
catch (std::invalid_argument& e) {
// stoi will throw this when called with the test sequence SEQ3
string_result0_data.push_back(*output_data);
}
}
for (size_t i = 0; i < int_result0_data.size(); i++) {
int32_t int_seq0_expected = (i == 0) ? 1 : values[i - 1];
int32_t int_seq1_expected = (i == 0) ? 101 : values[i - 1] * -1;
int32_t string_seq0_expected;
// For string sequence ID case we are testing two different backends
if ((i == 0) && dyna_sequence) {
string_seq0_expected = 20;
} else if ((i == 0) && !dyna_sequence) {
string_seq0_expected = 21;
} else if ((i != 0) && dyna_sequence) {
string_seq0_expected = values[i - 1] * -1 + string_result0_data[i - 1];
} else {
string_seq0_expected = values[i - 1] * -1;
}
// The dyna_sequence custom backend adds the sequence ID to
// the last request in a sequence.
if (dyna_sequence && (i != 0) && (values[i - 1] == 1)) {
int_seq0_expected += int_sequence_id0;
int_seq1_expected += int_sequence_id1;
string_seq0_expected += std::stoi(string_sequence_id0);
}
std::cout << "[" << i << "] " << int_result0_data[i] << " : "
<< int_result1_data[i] << " : " << string_result0_data[i]
<< std::endl;
if ((int_seq0_expected != int_result0_data[i]) ||
(int_seq1_expected != int_result1_data[i]) ||
(string_seq0_expected != string_result0_data[i])) {
std::cout << "[ expected ] " << int_seq0_expected << " : "
<< int_seq1_expected << " : " << string_seq0_expected
<< std::endl;
return 1;
}
}
return 0;
}
// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <unistd.h>
#include <condition_variable>
#include <iostream>
#include <string>
#include <vector>
#include "grpc_client.h"
namespace tc = triton::client;
using ResultList = std::vector<std::shared_ptr<tc::InferResult>>;
// Global mutex to synchronize the threads
std::mutex mutex_;
std::condition_variable cv_;
#define FAIL_IF_ERR(X, MSG) \
{ \
tc::Error err = (X); \
if (!err.IsOk()) { \
std::cerr << "error: " << (MSG) << ": " << err << std::endl; \
exit(1); \
} \
}
namespace {
void
Usage(char** argv, const std::string& msg = std::string())
{
if (!msg.empty()) {
std::cerr << "error: " << msg << std::endl;
}
std::cerr << "Usage: " << argv[0] << " [options]" << std::endl;
std::cerr << "\t-v" << std::endl;
std::cerr << "\t-u <URL for inference service and its gRPC port>"
<< std::endl;
std::cerr
<< "For -H, header must be 'Header:Value'. May be given multiple times."
<< std::endl;
std::cerr << "\t-o <offset for sequence ID>" << std::endl;
std::cerr << std::endl;
std::cerr << "For -o, the client will use sequence ID <1 + 2 * offset> "
<< "and <2 + 2 * offset>. Default offset is 0." << std::endl;
exit(1);
}
void
SyncSend(
const std::unique_ptr<tc::InferenceServerGrpcClient>& client,
tc::InferOptions& options, int32_t value, std::vector<int32_t>& result_data,
tc::Headers& http_headers)
{
// Initialize the inputs with the data.
tc::InferInput* input;
std::vector<int64_t> shape{1, 1};
FAIL_IF_ERR(
tc::InferInput::Create(&input, "INPUT", shape, "INT32"),
"unable to create 'INPUT'");
std::shared_ptr<tc::InferInput> ivalue(input);
FAIL_IF_ERR(ivalue->Reset(), "unable to reset 'INPUT'");
FAIL_IF_ERR(
ivalue->AppendRaw(reinterpret_cast<uint8_t*>(&value), sizeof(int32_t)),
"unable to set data for 'INPUT'");
std::vector<tc::InferInput*> inputs = {ivalue.get()};
tc::InferRequestedOutput* output;
FAIL_IF_ERR(
tc::InferRequestedOutput::Create(&output, "OUTPUT"),
"unable to get 'OUTPUT'");
std::shared_ptr<const tc::InferRequestedOutput> routput;
routput.reset(output);
std::vector<const tc::InferRequestedOutput*> outputs = {routput.get()};
tc::InferResult* result;
// Send inference request to the inference server.
FAIL_IF_ERR(
client->Infer(&result, options, inputs, outputs, http_headers),
"unable to run model");
std::shared_ptr<tc::InferResult> this_result(result);
// Get pointers to the result returned...
int32_t* output_data;
size_t output_byte_size;
FAIL_IF_ERR(
this_result->RawData(
"OUTPUT", (const uint8_t**)&output_data, &output_byte_size),
"unable to get result data for 'OUTPUT'");
if (output_byte_size != 4) {
std::cerr << "error: received incorrect byte size for 'OUTPUT': "
<< output_byte_size << std::endl;
exit(1);
}
result_data.push_back(*output_data);
}
void
SyncSend(
const std::unique_ptr<tc::InferenceServerGrpcClient>& client,
const std::string& model_name, int32_t value, const uint64_t sequence_id,
bool start_of_sequence, bool end_of_sequence,
std::vector<int32_t>& result_data, tc::Headers& http_headers)
{
tc::InferOptions options(model_name);
options.sequence_id_ = sequence_id;
options.sequence_start_ = start_of_sequence;
options.sequence_end_ = end_of_sequence;
SyncSend(client, options, value, result_data, http_headers);
}
void
SyncSend(
const std::unique_ptr<tc::InferenceServerGrpcClient>& client,
const std::string& model_name, int32_t value,
const std::string& sequence_id, bool start_of_sequence,
bool end_of_sequence, std::vector<int32_t>& result_data,
tc::Headers& http_headers)
{
tc::InferOptions options(model_name);
options.sequence_id_str_ = sequence_id;
options.sequence_start_ = start_of_sequence;
options.sequence_end_ = end_of_sequence;
SyncSend(client, options, value, result_data, http_headers);
}
} // namespace
int
main(int argc, char** argv)
{
bool verbose = false;
bool dyna_sequence = false;
std::string url("localhost:8001");
int sequence_id_offset = 0;
tc::Headers http_headers;
// Parse commandline...
int opt;
while ((opt = getopt(argc, argv, "vdu:H:o:")) != -1) {
switch (opt) {
case 'v':
verbose = true;
break;
case 'd':
dyna_sequence = true;
break;
case 'u':
url = optarg;
break;
case 'H': {
std::string arg = optarg;
std::string header = arg.substr(0, arg.find(":"));
http_headers[header] = arg.substr(header.size() + 1);
break;
}
case 'o':
sequence_id_offset = std::stoi(optarg);
break;
case '?':
Usage(argv);
break;
}
}
tc::Error err;
// We use the custom "sequence" model which takes 1 input value. The
// output is the accumulated value of the inputs. See
// src/custom/sequence.
std::string int_model_name =
dyna_sequence ? "simple_dyna_sequence" : "simple_sequence";
std::string string_model_name =
dyna_sequence ? "simple_string_dyna_sequence" : "simple_sequence";
const uint64_t int_sequence_id0 = 1 + sequence_id_offset * 2;
const uint64_t int_sequence_id1 = 2 + sequence_id_offset * 2;
// For string sequence IDs, the dyna backend requires that the
// sequence id be decodable into an integer, otherwise we'll use
// a test string sequence id and a model that doesn't require corrid
// control.
const std::string string_sequence_id0 =
dyna_sequence ? std::to_string(3 + sequence_id_offset * 2) : "SEQ-3";
std::cout << "sequence ID " << int_sequence_id0 << " : "
<< "sequence ID " << int_sequence_id1 << " : "
<< "sequence ID " << string_sequence_id0 << std::endl;
// Create a InferenceServerGrpcClient instance to communicate with the
// server using gRPC protocol.
std::unique_ptr<tc::InferenceServerGrpcClient> client;
FAIL_IF_ERR(
tc::InferenceServerGrpcClient::Create(&client, url, verbose),
"unable to create grpc client");
// Now send the inference sequences..
//
std::vector<int32_t> values{11, 7, 5, 3, 2, 0, 1};
std::vector<int32_t> int_result0_data;
std::vector<int32_t> int_result1_data;
std::vector<int32_t> string_result0_data;
// Send requests, first reset accumulator for the sequence.
SyncSend(
client, int_model_name, 0, int_sequence_id0, true /* start-of-sequence */,
false /* end-of-sequence */, int_result0_data, http_headers);
SyncSend(
client, int_model_name, 100, int_sequence_id1,
true /* start-of-sequence */, false /* end-of-sequence */,
int_result1_data, http_headers);
SyncSend(
client, string_model_name, 20, string_sequence_id0,
true /* start-of-sequence */, false /* end-of-sequence */,
string_result0_data, http_headers);
// Now send a sequence of values...
for (int32_t v : values) {
SyncSend(
client, int_model_name, v, int_sequence_id0,
false /* start-of-sequence */, (v == 1) /* end-of-sequence */,
int_result0_data, http_headers);
SyncSend(
client, int_model_name, -v, int_sequence_id1,
false /* start-of-sequence */, (v == 1) /* end-of-sequence */,
int_result1_data, http_headers);
SyncSend(
client, string_model_name, -v, string_sequence_id0,
false /* start-of-sequence */, (v == 1) /* end-of-sequence */,
string_result0_data, http_headers);
}
for (size_t i = 0; i < int_result0_data.size(); i++) {
int32_t int_seq0_expected = (i == 0) ? 1 : values[i - 1];
int32_t int_seq1_expected = (i == 0) ? 101 : values[i - 1] * -1;
int32_t string_seq0_expected;
// For string sequence ID case we are testing two different backends
if ((i == 0) && dyna_sequence) {
string_seq0_expected = 20;
} else if ((i == 0) && !dyna_sequence) {
string_seq0_expected = 21;
} else if ((i != 0) && dyna_sequence) {
string_seq0_expected = values[i - 1] * -1 + string_result0_data[i - 1];
} else {
string_seq0_expected = values[i - 1] * -1;
}
// The dyna_sequence custom backend adds the sequence ID to
// the last request in a sequence.
if (dyna_sequence && (i != 0) && (values[i - 1] == 1)) {
int_seq0_expected += int_sequence_id0;
int_seq1_expected += int_sequence_id1;
string_seq0_expected += std::stoi(string_sequence_id0);
}
std::cout << "[" << i << "] " << int_result0_data[i] << " : "
<< int_result1_data[i] << " : " << string_result0_data[i]
<< std::endl;
if ((int_seq0_expected != int_result0_data[i]) ||
(int_seq1_expected != int_result1_data[i]) ||
(string_seq0_expected != string_result0_data[i])) {
std::cout << "[ expected ] " << int_seq0_expected << " : "
<< int_seq1_expected << " : " << string_seq0_expected
<< std::endl;
return 1;
}
}
return 0;
}
// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <unistd.h>
#include <iostream>
#include <string>
#include "grpc_client.h"
#include "shm_utils.h"
namespace tc = triton::client;
#define FAIL_IF_ERR(X, MSG) \
{ \
tc::Error err = (X); \
if (!err.IsOk()) { \
std::cerr << "error: " << (MSG) << ": " << err << std::endl; \
exit(1); \
} \
}
namespace {
void
ValidateShapeAndDatatype(
const std::string& name, std::shared_ptr<tc::InferResult> result)
{
std::vector<int64_t> shape;
FAIL_IF_ERR(
result->Shape(name, &shape), "unable to get shape for '" + name + "'");
// Validate shape
if ((shape.size() != 2) || (shape[0] != 1) || (shape[1] != 16)) {
std::cerr << "error: received incorrect shapes for '" << name << "'"
<< std::endl;
exit(1);
}
std::string datatype;
FAIL_IF_ERR(
result->Datatype(name, &datatype),
"unable to get datatype for '" + name + "'");
// Validate datatype
if (datatype.compare("INT32") != 0) {
std::cerr << "error: received incorrect datatype for '" << name
<< "': " << datatype << std::endl;
exit(1);
}
}
void
Usage(char** argv, const std::string& msg = std::string())
{
if (!msg.empty()) {
std::cerr << "error: " << msg << std::endl;
}
std::cerr << "Usage: " << argv[0] << " [options]" << std::endl;
std::cerr << "\t-v" << std::endl;
std::cerr << "\t-u <URL for inference service>" << std::endl;
std::cerr << "\t-H <HTTP header>" << std::endl;
std::cerr << std::endl;
std::cerr
<< "For -H, header must be 'Header:Value'. May be given multiple times."
<< std::endl;
exit(1);
}
} // namespace
int
main(int argc, char** argv)
{
bool verbose = false;
std::string url("localhost:8001");
tc::Headers http_headers;
// Parse commandline...
int opt;
while ((opt = getopt(argc, argv, "vu:H:")) != -1) {
switch (opt) {
case 'v':
verbose = true;
break;
case 'u':
url = optarg;
break;
case 'H': {
std::string arg = optarg;
std::string header = arg.substr(0, arg.find(":"));
http_headers[header] = arg.substr(header.size() + 1);
break;
}
case '?':
Usage(argv);
break;
}
}
// We use a simple model that takes 2 input tensors of 16 integers
// each and returns 2 output tensors of 16 integers each. One output
// tensor is the element-wise sum of the inputs and one output is
// the element-wise difference.
std::string model_name = "simple";
std::string model_version = "";
// Create a InferenceServerGrpcClient instance to communicate with the
// server using gRPC protocol.
std::unique_ptr<tc::InferenceServerGrpcClient> client;
FAIL_IF_ERR(
tc::InferenceServerGrpcClient::Create(&client, url, verbose),
"unable to create grpc client");
// Unregistering all shared memory regions for a clean
// start.
FAIL_IF_ERR(
client->UnregisterSystemSharedMemory(),
"unable to unregister all system shared memory regions");
FAIL_IF_ERR(
client->UnregisterCudaSharedMemory(),
"unable to unregister all cuda shared memory regions");
std::vector<int64_t> shape{1, 16};
size_t input_byte_size = 64;
size_t output_byte_size = 64;
// Initialize the inputs with the data.
tc::InferInput* input0;
tc::InferInput* input1;
FAIL_IF_ERR(
tc::InferInput::Create(&input0, "INPUT0", shape, "INT32"),
"unable to get INPUT0");
std::shared_ptr<tc::InferInput> input0_ptr;
input0_ptr.reset(input0);
FAIL_IF_ERR(
tc::InferInput::Create(&input1, "INPUT1", shape, "INT32"),
"unable to get INPUT1");
std::shared_ptr<tc::InferInput> input1_ptr;
input1_ptr.reset(input1);
// Create Input0 and Input1 in Shared Memory. Initialize Input0 to unique
// integers and Input1 to all ones.
std::string shm_key = "/input_simple";
int shm_fd_ip, *input0_shm;
FAIL_IF_ERR(
tc::CreateSharedMemoryRegion(shm_key, input_byte_size * 2, &shm_fd_ip),
"");
FAIL_IF_ERR(
tc::MapSharedMemory(
shm_fd_ip, 0, input_byte_size * 2, (void**)&input0_shm),
"");
FAIL_IF_ERR(tc::CloseSharedMemory(shm_fd_ip), "");
int* input1_shm = (int*)(input0_shm + 16);
for (size_t i = 0; i < 16; ++i) {
*(input0_shm + i) = i;
*(input1_shm + i) = 1;
}
FAIL_IF_ERR(
client->RegisterSystemSharedMemory(
"input_data", "/input_simple", input_byte_size * 2),
"failed to register input shared memory region");
FAIL_IF_ERR(
input0_ptr->SetSharedMemory(
"input_data", input_byte_size, 0 /* offset */),
"unable to set shared memory for INPUT0");
FAIL_IF_ERR(
input1_ptr->SetSharedMemory(
"input_data", input_byte_size, input_byte_size /* offset */),
"unable to set shared memory for INPUT1");
// Generate the outputs to be requested.
tc::InferRequestedOutput* output0;
tc::InferRequestedOutput* output1;
FAIL_IF_ERR(
tc::InferRequestedOutput::Create(&output0, "OUTPUT0"),
"unable to get 'OUTPUT0'");
std::shared_ptr<tc::InferRequestedOutput> output0_ptr;
output0_ptr.reset(output0);
FAIL_IF_ERR(
tc::InferRequestedOutput::Create(&output1, "OUTPUT1"),
"unable to get 'OUTPUT1'");
std::shared_ptr<tc::InferRequestedOutput> output1_ptr;
output1_ptr.reset(output1);
// Create Output0 and Output1 in Shared Memory
shm_key = "/output_simple";
int shm_fd_op;
int* output0_shm;
FAIL_IF_ERR(
tc::CreateSharedMemoryRegion(shm_key, output_byte_size * 2, &shm_fd_op),
"");
FAIL_IF_ERR(
tc::MapSharedMemory(
shm_fd_op, 0, output_byte_size * 2, (void**)&output0_shm),
"");
FAIL_IF_ERR(tc::CloseSharedMemory(shm_fd_op), "");
int* output1_shm = (int*)(output0_shm + 16);
FAIL_IF_ERR(
client->RegisterSystemSharedMemory(
"output_data", "/output_simple", output_byte_size * 2),
"failed to register output shared memory region");
FAIL_IF_ERR(
output0_ptr->SetSharedMemory(
"output_data", output_byte_size, 0 /* offset */),
"unable to set shared memory for 'OUTPUT0'");
FAIL_IF_ERR(
output1_ptr->SetSharedMemory(
"output_data", output_byte_size, output_byte_size /* offset */),
"unable to set shared memory for 'OUTPUT1'");
// The inference settings. Will be using default for now.
tc::InferOptions options(model_name);
options.model_version_ = model_version;
std::vector<tc::InferInput*> inputs = {input0_ptr.get(), input1_ptr.get()};
std::vector<const tc::InferRequestedOutput*> outputs = {
output0_ptr.get(), output1_ptr.get()};
tc::InferResult* results;
FAIL_IF_ERR(
client->Infer(&results, options, inputs, outputs, http_headers),
"unable to run model");
std::shared_ptr<tc::InferResult> results_ptr;
results_ptr.reset(results);
// Validate the results...
ValidateShapeAndDatatype("OUTPUT0", results_ptr);
ValidateShapeAndDatatype("OUTPUT1", results_ptr);
for (size_t i = 0; i < 16; ++i) {
std::cout << input0_shm[i] << " + " << input1_shm[i] << " = "
<< output0_shm[i] << std::endl;
std::cout << input0_shm[i] << " - " << input1_shm[i] << " = "
<< output1_shm[i] << std::endl;
if ((input0_shm[i] + input1_shm[i]) != output0_shm[i]) {
std::cerr << "error: incorrect sum" << std::endl;
exit(1);
}
if ((input0_shm[i] - input1_shm[i]) != output1_shm[i]) {
std::cerr << "error: incorrect difference" << std::endl;
exit(1);
}
}
// Get shared memory regions active/registered within triton
inference::SystemSharedMemoryStatusResponse status;
FAIL_IF_ERR(
client->SystemSharedMemoryStatus(&status),
"failed to get shared memory status");
std::cout << "Shared Memory Status:\n" << status.DebugString() << "\n";
// Unregister shared memory
FAIL_IF_ERR(
client->UnregisterSystemSharedMemory("input_data"),
"unable to unregister shared memory input region");
FAIL_IF_ERR(
client->UnregisterSystemSharedMemory("output_data"),
"unable to unregister shared memory output region");
// Cleanup shared memory
FAIL_IF_ERR(tc::UnmapSharedMemory(input0_shm, input_byte_size * 2), "");
FAIL_IF_ERR(tc::UnlinkSharedMemoryRegion("/input_simple"), "");
FAIL_IF_ERR(tc::UnmapSharedMemory(output0_shm, output_byte_size * 2), "");
FAIL_IF_ERR(tc::UnlinkSharedMemoryRegion("/output_simple"), "");
std::cout << "PASS : System Shared Memory " << std::endl;
return 0;
}
// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <unistd.h>
#include <iostream>
#include <string>
#include "grpc_client.h"
namespace tc = triton::client;
#define FAIL_IF_ERR(X, MSG) \
{ \
tc::Error err = (X); \
if (!err.IsOk()) { \
std::cerr << "error: " << (MSG) << ": " << err << std::endl; \
exit(1); \
} \
}
namespace {
void
ValidateShapeAndDatatype(
const std::string& name, std::shared_ptr<tc::InferResult> result)
{
std::vector<int64_t> shape;
FAIL_IF_ERR(result->Shape(name, &shape), "unable to get shape for " + name);
// Validate shape
if ((shape.size() != 2) || (shape[0] != 1) || (shape[1] != 16)) {
std::cerr << "error: received incorrect shapes for " << name << std::endl;
exit(1);
}
std::string datatype;
FAIL_IF_ERR(
result->Datatype(name, &datatype), "unable to get datatype for " + name);
// Validate datatype
if (datatype.compare("BYTES") != 0) {
std::cerr << "error: received incorrect datatype for " << name << ": "
<< datatype << std::endl;
exit(1);
}
}
void
Usage(char** argv, const std::string& msg = std::string())
{
if (!msg.empty()) {
std::cerr << "error: " << msg << std::endl;
}
std::cerr << "Usage: " << argv[0] << " [options]" << std::endl;
std::cerr << "\t-v" << std::endl;
std::cerr << "\t-u <URL for inference service>" << std::endl;
std::cerr << "\t-H <HTTP header>" << std::endl;
std::cerr << std::endl;
std::cerr
<< "For -H, header must be 'Header:Value'. May be given multiple times."
<< std::endl;
exit(1);
}
} // namespace
int
main(int argc, char** argv)
{
bool verbose = false;
std::string url("localhost:8001");
tc::Headers http_headers;
// Parse commandline...
int opt;
while ((opt = getopt(argc, argv, "vu:H:")) != -1) {
switch (opt) {
case 'v':
verbose = true;
break;
case 'u':
url = optarg;
break;
case 'H': {
std::string arg = optarg;
std::string header = arg.substr(0, arg.find(":"));
http_headers[header] = arg.substr(header.size() + 1);
break;
}
case '?':
Usage(argv);
break;
}
}
// We use a simple model that takes 2 input tensors of 16 strings
// each and returns 2 output tensors of 16 strings each. The input
// strings must represent integers. One output tensor is the
// element-wise sum of the inputs and one output is the element-wise
// difference.
std::string model_name = "simple_string";
std::string model_version = "";
// Create a InferenceServerGrpcClient instance to communicate with the
// server using gRPC protocol.
std::unique_ptr<tc::InferenceServerGrpcClient> client;
FAIL_IF_ERR(
tc::InferenceServerGrpcClient::Create(&client, url, verbose),
"unable to create grpc client");
// Create the data for the two input tensors. Initialize the first
// to unique integers and the second to all ones. The input tensors
// are the string representation of these values.
std::vector<std::string> input0_data(16);
std::vector<std::string> input1_data(16);
std::vector<int32_t> expected_sum(16);
std::vector<int32_t> expected_diff(16);
for (size_t i = 0; i < 16; ++i) {
input0_data[i] = std::to_string(i);
input1_data[i] = std::to_string(1);
expected_sum[i] = i + 1;
expected_diff[i] = i - 1;
}
std::vector<int64_t> shape{1, 16};
// Initialize the inputs with the data.
tc::InferInput* input0;
tc::InferInput* input1;
FAIL_IF_ERR(
tc::InferInput::Create(&input0, "INPUT0", shape, "BYTES"),
"unable to get INPUT0");
std::shared_ptr<tc::InferInput> input0_ptr;
input0_ptr.reset(input0);
FAIL_IF_ERR(
tc::InferInput::Create(&input1, "INPUT1", shape, "BYTES"),
"unable to get INPUT1");
std::shared_ptr<tc::InferInput> input1_ptr;
input1_ptr.reset(input1);
FAIL_IF_ERR(
input0_ptr->AppendFromString(input0_data),
"unable to set data for INPUT0");
FAIL_IF_ERR(
input1_ptr->AppendFromString(input1_data),
"unable to set data for INPUT1");
// Generate the outputs to be requested.
tc::InferRequestedOutput* output0;
tc::InferRequestedOutput* output1;
FAIL_IF_ERR(
tc::InferRequestedOutput::Create(&output0, "OUTPUT0"),
"unable to get OUTPUT0");
std::shared_ptr<tc::InferRequestedOutput> output0_ptr;
output0_ptr.reset(output0);
FAIL_IF_ERR(
tc::InferRequestedOutput::Create(&output1, "OUTPUT1"),
"unable to get OUTPUT1");
std::shared_ptr<tc::InferRequestedOutput> output1_ptr;
output1_ptr.reset(output1);
// The inference settings. Will be using default for now.
tc::InferOptions options(model_name);
options.model_version_ = model_version;
std::vector<tc::InferInput*> inputs = {input0_ptr.get(), input1_ptr.get()};
std::vector<const tc::InferRequestedOutput*> outputs = {
output0_ptr.get(), output1_ptr.get()};
tc::InferResult* results;
FAIL_IF_ERR(
client->Infer(&results, options, inputs, outputs, http_headers),
"unable to run model");
std::shared_ptr<tc::InferResult> results_ptr;
results_ptr.reset(results);
// Validate the results...
ValidateShapeAndDatatype("OUTPUT0", results_ptr);
ValidateShapeAndDatatype("OUTPUT1", results_ptr);
// Get the result data
std::vector<std::string> result0_data;
std::vector<std::string> result1_data;
FAIL_IF_ERR(
results_ptr->StringData("OUTPUT0", &result0_data),
"unable to get data for OUTPUT0");
if (result0_data.size() != 16) {
std::cerr << "error: received incorrect number of strings for OUTPUT0: "
<< result0_data.size() << std::endl;
exit(1);
}
FAIL_IF_ERR(
results_ptr->StringData("OUTPUT1", &result1_data),
"unable to get data for OUTPUT1");
if (result1_data.size() != 16) {
std::cerr << "error: received incorrect number of strings for OUTPUT1: "
<< result1_data.size() << std::endl;
exit(1);
}
for (size_t i = 0; i < 16; ++i) {
std::cout << input0_data[i] << " + " << input0_data[i] << " = "
<< result0_data[i] << std::endl;
std::cout << input0_data[i] << " - " << input1_data[i] << " = "
<< result1_data[i] << std::endl;
if (expected_sum[i] != std::stoi(result0_data[i])) {
std::cerr << "error: incorrect sum" << std::endl;
exit(1);
}
if (expected_diff[i] != std::stoi(result1_data[i])) {
std::cerr << "error: incorrect difference" << std::endl;
exit(1);
}
}
// Get full response
std::cout << results_ptr->DebugString() << std::endl;
std::cout << "PASS : String Infer" << std::endl;
return 0;
}
// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <unistd.h>
#include <condition_variable>
#include <iostream>
#include <mutex>
#include <string>
#include "http_client.h"
namespace tc = triton::client;
#define FAIL_IF_ERR(X, MSG) \
{ \
tc::Error err = (X); \
if (!err.IsOk()) { \
std::cerr << "error: " << (MSG) << ": " << err << std::endl; \
exit(1); \
} \
}
namespace {
void
ValidateShapeAndDatatype(
const std::string& name, std::shared_ptr<tc::InferResult> result)
{
std::vector<int64_t> shape;
FAIL_IF_ERR(
result->Shape(name, &shape), "unable to get shape for '" + name + "'");
// Validate shape
if ((shape.size() != 2) || (shape[0] != 1) || (shape[1] != 16)) {
std::cerr << "error: received incorrect shapes for '" << name << "'"
<< std::endl;
exit(1);
}
std::string datatype;
FAIL_IF_ERR(
result->Datatype(name, &datatype),
"unable to get datatype for '" + name + "'");
// Validate datatype
if (datatype.compare("INT32") != 0) {
std::cerr << "error: received incorrect datatype for '" << name
<< "': " << datatype << std::endl;
exit(1);
}
}
void
ValidateResult(
const std::shared_ptr<tc::InferResult> result,
std::vector<int32_t>& input0_data, std::vector<int32_t>& input1_data)
{
// Validate the results...
ValidateShapeAndDatatype("OUTPUT0", result);
ValidateShapeAndDatatype("OUTPUT1", result);
// Get pointers to the result returned...
int32_t* output0_data;
size_t output0_byte_size;
FAIL_IF_ERR(
result->RawData(
"OUTPUT0", (const uint8_t**)&output0_data, &output0_byte_size),
"unable to get result data for 'OUTPUT0'");
if (output0_byte_size != 64) {
std::cerr << "error: received incorrect byte size for 'OUTPUT0': "
<< output0_byte_size << std::endl;
exit(1);
}
int32_t* output1_data;
size_t output1_byte_size;
FAIL_IF_ERR(
result->RawData(
"OUTPUT1", (const uint8_t**)&output1_data, &output1_byte_size),
"unable to get result data for 'OUTPUT1'");
if (output0_byte_size != 64) {
std::cerr << "error: received incorrect byte size for 'OUTPUT1': "
<< output0_byte_size << std::endl;
exit(1);
}
for (size_t i = 0; i < 16; ++i) {
std::cout << input0_data[i] << " + " << input1_data[i] << " = "
<< *(output0_data + i) << std::endl;
std::cout << input0_data[i] << " - " << input1_data[i] << " = "
<< *(output1_data + i) << std::endl;
if ((input0_data[i] + input1_data[i]) != *(output0_data + i)) {
std::cerr << "error: incorrect sum" << std::endl;
exit(1);
}
if ((input0_data[i] - input1_data[i]) != *(output1_data + i)) {
std::cerr << "error: incorrect difference" << std::endl;
exit(1);
}
}
// Get full response
std::cout << result->DebugString() << std::endl;
}
void
Usage(char** argv, const std::string& msg = std::string())
{
if (!msg.empty()) {
std::cerr << "error: " << msg << std::endl;
}
std::cerr << "Usage: " << argv[0] << " [options]" << std::endl;
std::cerr << "\t-v" << std::endl;
std::cerr << "\t-u <URL for inference service>" << std::endl;
std::cerr << "\t-t <client timeout in microseconds>" << std::endl;
std::cerr << "\t-H <HTTP header>" << std::endl;
std::cerr << std::endl;
std::cerr
<< "For -H, header must be 'Header:Value'. May be given multiple times."
<< std::endl;
exit(1);
}
} // namespace
int
main(int argc, char** argv)
{
bool verbose = false;
std::string url("localhost:8000");
tc::Headers http_headers;
uint32_t client_timeout = 0;
// Parse commandline...
int opt;
while ((opt = getopt(argc, argv, "vu:t:H:")) != -1) {
switch (opt) {
case 'v':
verbose = true;
break;
case 'u':
url = optarg;
break;
case 't':
client_timeout = std::stoi(optarg);
break;
case 'H': {
std::string arg = optarg;
std::string header = arg.substr(0, arg.find(":"));
http_headers[header] = arg.substr(header.size() + 1);
break;
}
case '?':
Usage(argv);
break;
}
}
// We use a simple model that takes 2 input tensors of 16 integers
// each and returns 2 output tensors of 16 integers each. One output
// tensor is the element-wise sum of the inputs and one output is
// the element-wise difference.
std::string model_name = "simple";
std::string model_version = "";
// Create a InferenceServerHttpClient instance to communicate with the
// server using HTTP protocol.
std::unique_ptr<tc::InferenceServerHttpClient> client;
FAIL_IF_ERR(
tc::InferenceServerHttpClient::Create(&client, url, verbose),
"unable to create http client");
// Create the data for the two input tensors. Initialize the first
// to unique integers and the second to all ones.
std::vector<int32_t> input0_data(16);
std::vector<int32_t> input1_data(16);
for (size_t i = 0; i < 16; ++i) {
input0_data[i] = i;
input1_data[i] = 1;
}
std::vector<int64_t> shape{1, 16};
// Initialize the inputs with the data.
tc::InferInput* input0;
tc::InferInput* input1;
FAIL_IF_ERR(
tc::InferInput::Create(&input0, "INPUT0", shape, "INT32"),
"unable to get INPUT0");
std::shared_ptr<tc::InferInput> input0_ptr;
input0_ptr.reset(input0);
FAIL_IF_ERR(
tc::InferInput::Create(&input1, "INPUT1", shape, "INT32"),
"unable to get INPUT1");
std::shared_ptr<tc::InferInput> input1_ptr;
input1_ptr.reset(input1);
FAIL_IF_ERR(
input0_ptr->AppendRaw(
reinterpret_cast<uint8_t*>(&input0_data[0]),
input0_data.size() * sizeof(int32_t)),
"unable to set data for INPUT0");
FAIL_IF_ERR(
input1_ptr->AppendRaw(
reinterpret_cast<uint8_t*>(&input1_data[0]),
input1_data.size() * sizeof(int32_t)),
"unable to set data for INPUT1");
// Generate the outputs to be requested.
tc::InferRequestedOutput* output0;
tc::InferRequestedOutput* output1;
FAIL_IF_ERR(
tc::InferRequestedOutput::Create(&output0, "OUTPUT0"),
"unable to get 'OUTPUT0'");
std::shared_ptr<tc::InferRequestedOutput> output0_ptr;
output0_ptr.reset(output0);
FAIL_IF_ERR(
tc::InferRequestedOutput::Create(&output1, "OUTPUT1"),
"unable to get 'OUTPUT1'");
std::shared_ptr<tc::InferRequestedOutput> output1_ptr;
output1_ptr.reset(output1);
// The inference settings. Will be using default for now.
tc::InferOptions options(model_name);
options.model_version_ = model_version;
options.client_timeout_ = client_timeout;
std::vector<tc::InferInput*> inputs = {input0_ptr.get(), input1_ptr.get()};
std::vector<const tc::InferRequestedOutput*> outputs = {
output0_ptr.get(), output1_ptr.get()};
// Send inference request to the inference server.
std::mutex mtx;
std::condition_variable cv;
size_t repeat_cnt = 2;
size_t done_cnt = 0;
for (size_t i = 0; i < repeat_cnt; i++) {
FAIL_IF_ERR(
client->AsyncInfer(
[&, i](tc::InferResult* result) {
{
std::shared_ptr<tc::InferResult> result_ptr;
result_ptr.reset(result);
std::lock_guard<std::mutex> lk(mtx);
std::cout << "Callback no." << i << " is called" << std::endl;
done_cnt++;
if (result_ptr->RequestStatus().IsOk()) {
ValidateResult(result_ptr, input0_data, input1_data);
} else {
std::cerr << "error: Inference failed: "
<< result_ptr->RequestStatus() << std::endl;
exit(1);
}
}
cv.notify_all();
},
options, inputs, outputs, http_headers),
"unable to run model");
}
// Wait until all callbacks are invoked
{
std::unique_lock<std::mutex> lk(mtx);
cv.wait(lk, [&]() {
if (done_cnt >= repeat_cnt) {
return true;
} else {
return false;
}
});
}
if (done_cnt == repeat_cnt) {
std::cout << "All done" << std::endl;
} else {
std::cerr << "Done cnt: " << done_cnt
<< " does not match repeat cnt: " << repeat_cnt << std::endl;
exit(1);
}
// Send another AsyncInfer whose callback defers the completed request
// to another thread (main thread) to handle
bool callback_invoked = false;
std::shared_ptr<tc::InferResult> result_placeholder;
FAIL_IF_ERR(
client->AsyncInfer(
[&](tc::InferResult* result) {
{
std::shared_ptr<tc::InferResult> result_ptr;
result_ptr.reset(result);
// Defer the response retrieval to main thread
std::lock_guard<std::mutex> lk(mtx);
callback_invoked = true;
result_placeholder = std::move(result_ptr);
}
cv.notify_all();
},
options, inputs, outputs, http_headers),
"unable to run model");
// Ensure callback is completed
{
std::unique_lock<std::mutex> lk(mtx);
cv.wait(lk, [&]() { return callback_invoked; });
}
// Get deferred response
std::cout << "Getting results from deferred response" << std::endl;
if (result_placeholder->RequestStatus().IsOk()) {
ValidateResult(result_placeholder, input0_data, input1_data);
} else {
std::cerr << "error: Inference failed: "
<< result_placeholder->RequestStatus() << std::endl;
exit(1);
}
tc::InferStat infer_stat;
client->ClientInferStat(&infer_stat);
std::cout << "completed_request_count " << infer_stat.completed_request_count
<< std::endl;
std::cout << "cumulative_total_request_time_ns "
<< infer_stat.cumulative_total_request_time_ns << std::endl;
std::cout << "cumulative_send_time_ns " << infer_stat.cumulative_send_time_ns
<< std::endl;
std::cout << "cumulative_receive_time_ns "
<< infer_stat.cumulative_receive_time_ns << std::endl;
std::cout << "PASS : Async Infer" << std::endl;
return 0;
}
// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <cuda_runtime_api.h>
#include <unistd.h>
#include <iostream>
#include <string>
#include "http_client.h"
#include "shm_utils.h"
namespace tc = triton::client;
#define FAIL_IF_ERR(X, MSG) \
{ \
tc::Error err = (X); \
if (!err.IsOk()) { \
std::cerr << "error: " << (MSG) << ": " << err << std::endl; \
exit(1); \
} \
}
namespace {
void
ValidateShapeAndDatatype(
const std::string& name, std::shared_ptr<tc::InferResult> result)
{
std::vector<int64_t> shape;
FAIL_IF_ERR(
result->Shape(name, &shape), "unable to get shape for '" + name + "'");
// Validate shape
if ((shape.size() != 2) || (shape[0] != 1) || (shape[1] != 16)) {
std::cerr << "error: received incorrect shapes for '" << name << "'"
<< std::endl;
exit(1);
}
std::string datatype;
FAIL_IF_ERR(
result->Datatype(name, &datatype),
"unable to get datatype for '" + name + "'");
// Validate datatype
if (datatype.compare("INT32") != 0) {
std::cerr << "error: received incorrect datatype for '" << name
<< "': " << datatype << std::endl;
exit(1);
}
}
void
Usage(char** argv, const std::string& msg = std::string())
{
if (!msg.empty()) {
std::cerr << "error: " << msg << std::endl;
}
std::cerr << "Usage: " << argv[0] << " [options]" << std::endl;
std::cerr << "\t-v" << std::endl;
std::cerr << "\t-u <URL for inference service>" << std::endl;
std::cerr << "\t-H <HTTP header>" << std::endl;
std::cerr << std::endl;
std::cerr
<< "For -H, header must be 'Header:Value'. May be given multiple times."
<< std::endl;
exit(1);
}
} // namespace
#define FAIL_IF_CUDA_ERR(FUNC) \
{ \
const cudaError_t result = FUNC; \
if (result != cudaSuccess) { \
std::cerr << "CUDA exception (line " << __LINE__ \
<< "): " << cudaGetErrorName(result) << " (" \
<< cudaGetErrorString(result) << ")" << std::endl; \
exit(1); \
} \
}
void
CreateCUDAIPCHandle(
cudaIpcMemHandle_t* cuda_handle, void* input_d_ptr, int device_id = 0)
{
// Set the GPU device to the desired GPU
FAIL_IF_CUDA_ERR(cudaSetDevice(device_id));
// Create IPC handle for data on the gpu
FAIL_IF_CUDA_ERR(cudaIpcGetMemHandle(cuda_handle, input_d_ptr));
}
int
main(int argc, char** argv)
{
bool verbose = false;
std::string url("localhost:8000");
tc::Headers http_headers;
// Parse commandline...
int opt;
while ((opt = getopt(argc, argv, "vu:H:")) != -1) {
switch (opt) {
case 'v':
verbose = true;
break;
case 'u':
url = optarg;
break;
case 'H': {
std::string arg = optarg;
std::string header = arg.substr(0, arg.find(":"));
http_headers[header] = arg.substr(header.size() + 1);
break;
}
case '?':
Usage(argv);
break;
}
}
// We use a simple model that takes 2 input tensors of 16 integers
// each and returns 2 output tensors of 16 integers each. One output
// tensor is the element-wise sum of the inputs and one output is
// the element-wise difference.
std::string model_name = "simple";
std::string model_version = "";
// Create a InferenceServerHttpClient instance to communicate with the
// server using http protocol.
std::unique_ptr<tc::InferenceServerHttpClient> client;
FAIL_IF_ERR(
tc::InferenceServerHttpClient::Create(&client, url, verbose),
"unable to create http client");
// Unregistering all shared memory regions for a clean
// start.
FAIL_IF_ERR(
client->UnregisterSystemSharedMemory(),
"unable to unregister all system shared memory regions");
FAIL_IF_ERR(
client->UnregisterCudaSharedMemory(),
"unable to unregister all cuda shared memory regions");
std::vector<int64_t> shape{1, 16};
size_t input_byte_size = 64;
size_t output_byte_size = 64;
// Initialize the inputs with the data.
tc::InferInput* input0;
tc::InferInput* input1;
FAIL_IF_ERR(
tc::InferInput::Create(&input0, "INPUT0", shape, "INT32"),
"unable to get INPUT0");
std::shared_ptr<tc::InferInput> input0_ptr;
input0_ptr.reset(input0);
FAIL_IF_ERR(
tc::InferInput::Create(&input1, "INPUT1", shape, "INT32"),
"unable to get INPUT1");
std::shared_ptr<tc::InferInput> input1_ptr;
input1_ptr.reset(input1);
// Create Input0 and Input1 in CUDA Shared Memory. Initialize Input0 to
// unique integers and Input1 to all ones.
int input_data[32];
for (size_t i = 0; i < 16; ++i) {
input_data[i] = i;
input_data[16 + i] = 1;
}
// copy INPUT0 and INPUT1 data in GPU shared memory
int* input_d_ptr;
cudaMalloc((void**)&input_d_ptr, input_byte_size * 2);
cudaMemcpy(
(void*)input_d_ptr, (void*)input_data, input_byte_size * 2,
cudaMemcpyHostToDevice);
cudaIpcMemHandle_t input_cuda_handle;
CreateCUDAIPCHandle(&input_cuda_handle, (void*)input_d_ptr);
FAIL_IF_ERR(
client->RegisterCudaSharedMemory(
"input_data", input_cuda_handle, 0 /* device_id */,
input_byte_size * 2),
"failed to register input shared memory region");
FAIL_IF_ERR(
input0_ptr->SetSharedMemory(
"input_data", input_byte_size, 0 /* offset */),
"unable to set shared memory for INPUT0");
FAIL_IF_ERR(
input1_ptr->SetSharedMemory(
"input_data", input_byte_size, input_byte_size /* offset */),
"unable to set shared memory for INPUT1");
// Generate the outputs to be requested.
tc::InferRequestedOutput* output0;
tc::InferRequestedOutput* output1;
FAIL_IF_ERR(
tc::InferRequestedOutput::Create(&output0, "OUTPUT0"),
"unable to get 'OUTPUT0'");
std::shared_ptr<tc::InferRequestedOutput> output0_ptr;
output0_ptr.reset(output0);
FAIL_IF_ERR(
tc::InferRequestedOutput::Create(&output1, "OUTPUT1"),
"unable to get 'OUTPUT1'");
std::shared_ptr<tc::InferRequestedOutput> output1_ptr;
output1_ptr.reset(output1);
// Create Output0 and Output1 in CUDA Shared Memory
int *output0_d_ptr, *output1_d_ptr;
cudaMalloc((void**)&output0_d_ptr, output_byte_size * 2);
output1_d_ptr = (int*)output0_d_ptr + 16;
cudaIpcMemHandle_t output_cuda_handle;
CreateCUDAIPCHandle(&output_cuda_handle, (void*)output0_d_ptr);
FAIL_IF_ERR(
client->RegisterCudaSharedMemory(
"output_data", output_cuda_handle, 0 /* device_id */,
output_byte_size * 2),
"failed to register output shared memory region");
FAIL_IF_ERR(
output0_ptr->SetSharedMemory(
"output_data", output_byte_size, 0 /* offset */),
"unable to set shared memory for 'OUTPUT0'");
FAIL_IF_ERR(
output1_ptr->SetSharedMemory(
"output_data", output_byte_size, output_byte_size /* offset */),
"unable to set shared memory for 'OUTPUT1'");
// The inference settings. Will be using default for now.
tc::InferOptions options(model_name);
options.model_version_ = model_version;
std::vector<tc::InferInput*> inputs = {input0_ptr.get(), input1_ptr.get()};
std::vector<const tc::InferRequestedOutput*> outputs = {
output0_ptr.get(), output1_ptr.get()};
tc::InferResult* results;
FAIL_IF_ERR(
client->Infer(&results, options, inputs, outputs, http_headers),
"unable to run model");
std::shared_ptr<tc::InferResult> results_ptr;
results_ptr.reset(results);
// Validate the results...
ValidateShapeAndDatatype("OUTPUT0", results_ptr);
ValidateShapeAndDatatype("OUTPUT1", results_ptr);
// Copy input and output data back to the CPU
int output0_data[16], output1_data[16];
cudaMemcpy(
output0_data, output0_d_ptr, output_byte_size, cudaMemcpyDeviceToHost);
cudaMemcpy(
output1_data, output1_d_ptr, output_byte_size, cudaMemcpyDeviceToHost);
for (size_t i = 0; i < 16; ++i) {
std::cout << input_data[i] << " + " << input_data[16 + i] << " = "
<< output0_data[i] << std::endl;
std::cout << input_data[i] << " + " << input_data[16 + i] << " = "
<< output1_data[i] << std::endl;
if ((input_data[i] + input_data[16 + i]) != output0_data[i]) {
std::cerr << "error: incorrect sum" << std::endl;
exit(1);
}
if ((input_data[i] - input_data[16 + i]) != output1_data[i]) {
std::cerr << "error: incorrect difference" << std::endl;
exit(1);
}
}
// Get shared memory regions active/registered within triton
std::string shm_status;
FAIL_IF_ERR(
client->CudaSharedMemoryStatus(&shm_status),
"failed to get shared memory status");
std::cout << "Shared Memory Status:\n" << shm_status << "\n";
// Unregister shared memory
FAIL_IF_ERR(
client->UnregisterCudaSharedMemory("input_data"),
"unable to unregister shared memory input region");
FAIL_IF_ERR(
client->UnregisterCudaSharedMemory("output_data"),
"unable to unregister shared memory output region");
// Free GPU memory
FAIL_IF_CUDA_ERR(cudaFree(input_d_ptr));
FAIL_IF_CUDA_ERR(cudaFree(output0_d_ptr));
std::cout << "PASS : Cuda Shared Memory " << std::endl;
return 0;
}
// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <unistd.h>
#include <iostream>
#include <string>
#include "http_client.h"
#include "json_utils.h"
namespace tc = triton::client;
#define FAIL_IF_ERR(X, MSG) \
{ \
tc::Error err = (X); \
if (!err.IsOk()) { \
std::cerr << "error: " << (MSG) << ": " << err << std::endl; \
exit(1); \
} \
}
namespace {
void
Usage(char** argv, const std::string& msg = std::string())
{
if (!msg.empty()) {
std::cerr << "error: " << msg << std::endl;
}
std::cerr << "Usage: " << argv[0] << " [options]" << std::endl;
std::cerr << "\t-v" << std::endl;
std::cerr << "\t-u <URL for inference service>" << std::endl;
std::cerr << "\t-H <HTTP header>" << std::endl;
std::cerr << std::endl;
std::cerr
<< "For -H, header must be 'Header:Value'. May be given multiple times."
<< std::endl;
exit(1);
}
} // namespace
int
main(int argc, char** argv)
{
bool verbose = false;
std::string url("localhost:8000");
tc::Headers http_headers;
// Parse commandline...
int opt;
while ((opt = getopt(argc, argv, "vu:H:")) != -1) {
switch (opt) {
case 'v':
verbose = true;
break;
case 'u':
url = optarg;
break;
case 'H': {
std::string arg = optarg;
std::string header = arg.substr(0, arg.find(":"));
http_headers[header] = arg.substr(header.size() + 1);
break;
}
case '?':
Usage(argv);
break;
}
}
// We use a simple model that takes 2 input tensors of 16 integers
// each and returns 2 output tensors of 16 integers each. One output
// tensor is the element-wise sum of the inputs and one output is
// the element-wise difference.
std::string model_name = "simple";
std::string model_version = "";
// Create a InferenceServerHttpClient instance to communicate with the
// server using http protocol.
std::unique_ptr<tc::InferenceServerHttpClient> client;
FAIL_IF_ERR(
tc::InferenceServerHttpClient::Create(&client, url, verbose),
"unable to create http client");
bool live;
FAIL_IF_ERR(
client->IsServerLive(&live, http_headers),
"unable to get server liveness");
if (!live) {
std::cerr << "error: server is not live" << std::endl;
exit(1);
}
bool ready;
FAIL_IF_ERR(
client->IsServerReady(&ready, http_headers), "server is not live");
bool model_ready;
FAIL_IF_ERR(
client->IsModelReady(
&model_ready, model_name, model_version, http_headers),
"unable to get model readiness");
if (!model_ready) {
std::cerr << "error: model " << model_name << " is not live" << std::endl;
exit(1);
}
{
std::string server_metadata;
FAIL_IF_ERR(
client->ServerMetadata(&server_metadata, http_headers),
"unable to get server metadata");
rapidjson::Document server_metadata_json;
FAIL_IF_ERR(
tc::ParseJson(&server_metadata_json, server_metadata),
"failed to parse server metadata");
if ((std::string(server_metadata_json["name"].GetString()))
.compare("triton") != 0) {
std::cerr << "error: unexpected server metadata: " << server_metadata
<< std::endl;
exit(1);
}
}
{
std::string model_metadata;
FAIL_IF_ERR(
client->ModelMetadata(
&model_metadata, model_name, model_version, http_headers),
"unable to get model metadata");
rapidjson::Document model_metadata_json;
FAIL_IF_ERR(
tc::ParseJson(&model_metadata_json, model_metadata),
"failed to parse model metadata");
if ((std::string(model_metadata_json["name"].GetString()))
.compare(model_name) != 0) {
std::cerr << "error: unexpected model metadata: " << model_metadata
<< std::endl;
exit(1);
}
}
{
std::string model_config;
FAIL_IF_ERR(
client->ModelConfig(
&model_config, model_name, model_version, http_headers),
"unable to get model config");
rapidjson::Document model_config_json;
FAIL_IF_ERR(
tc::ParseJson(&model_config_json, model_config),
"failed to parse model config");
if ((std::string(model_config_json["name"].GetString()))
.compare(model_name) != 0) {
std::cerr << "error: unexpected model config: " << model_config
<< std::endl;
exit(1);
}
}
{
std::string model_metadata;
tc::Error err = client->ModelMetadata(
&model_metadata, "wrong_model_name", model_version, http_headers);
if (err.IsOk()) {
std::cerr << "error: expected an error but got: " << err << std::endl;
exit(1);
}
}
std::cout << "SUCCESS" << std::endl;
return 0;
}
// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <getopt.h>
#include <unistd.h>
#include <iostream>
#include <string>
#include "http_client.h"
namespace tc = triton::client;
#define FAIL_IF_ERR(X, MSG) \
{ \
tc::Error err = (X); \
if (!err.IsOk()) { \
std::cerr << "error: " << (MSG) << ": " << err << std::endl; \
exit(1); \
} \
}
namespace {
void
ValidateShapeAndDatatype(
const std::string& name, std::shared_ptr<tc::InferResult> result)
{
std::vector<int64_t> shape;
FAIL_IF_ERR(
result->Shape(name, &shape), "unable to get shape for '" + name + "'");
// Validate shape
if ((shape.size() != 2) || (shape[0] != 1) || (shape[1] != 16)) {
std::cerr << "error: received incorrect shapes for '" << name << "'"
<< std::endl;
exit(1);
}
std::string datatype;
FAIL_IF_ERR(
result->Datatype(name, &datatype),
"unable to get datatype for '" + name + "'");
// Validate datatype
if (datatype.compare("INT32") != 0) {
std::cerr << "error: received incorrect datatype for '" << name
<< "': " << datatype << std::endl;
exit(1);
}
}
void
Usage(char** argv, const std::string& msg = std::string())
{
if (!msg.empty()) {
std::cerr << "error: " << msg << std::endl;
}
std::cerr << "Usage: " << argv[0] << " [options]" << std::endl;
std::cerr << "\t-v" << std::endl;
std::cerr << "\t-u <URL for inference service>" << std::endl;
std::cerr << "\t-t <client timeout in microseconds>" << std::endl;
std::cerr << "\t-H <HTTP header>" << std::endl;
std::cerr << "\t-i <none|gzip|deflate>" << std::endl;
std::cerr << "\t-o <none|gzip|deflate>" << std::endl;
std::cerr << std::endl;
std::cerr << "\t--verify-peer" << std::endl;
std::cerr << "\t--verify-host" << std::endl;
std::cerr << "\t--ca-certs" << std::endl;
std::cerr << "\t--cert-file" << std::endl;
std::cerr << "\t--key-file" << std::endl;
std::cerr
<< "For -H, header must be 'Header:Value'. May be given multiple times."
<< std::endl
<< "For -i, it sets the compression algorithm used for sending request "
"body."
<< "For -o, it sets the compression algorithm used for receiving "
"response body."
<< std::endl;
exit(1);
}
} // namespace
int
main(int argc, char** argv)
{
bool verbose = false;
std::string url("localhost:8000");
tc::Headers http_headers;
uint32_t client_timeout = 0;
auto request_compression_algorithm =
tc::InferenceServerHttpClient::CompressionType::NONE;
auto response_compression_algorithm =
tc::InferenceServerHttpClient::CompressionType::NONE;
long verify_peer = 1;
long verify_host = 2;
std::string cacerts;
std::string certfile;
std::string keyfile;
// {name, has_arg, *flag, val}
static struct option long_options[] = {
{"verify-peer", 1, 0, 0}, {"verify-host", 1, 0, 1}, {"ca-certs", 1, 0, 2},
{"cert-file", 1, 0, 3}, {"key-file", 1, 0, 4}, {0, 0, 0, 0}};
// Parse commandline...
int opt;
while ((opt = getopt_long(argc, argv, "vu:t:H:i:o:", long_options, NULL)) !=
-1) {
switch (opt) {
case 0:
verify_peer = std::atoi(optarg);
break;
case 1:
verify_host = std::atoi(optarg);
break;
case 2:
cacerts = optarg;
break;
case 3:
certfile = optarg;
break;
case 4:
keyfile = optarg;
break;
case 'v':
verbose = true;
break;
case 'u':
url = optarg;
break;
case 't':
client_timeout = std::stoi(optarg);
break;
case 'H': {
std::string arg = optarg;
std::string header = arg.substr(0, arg.find(":"));
http_headers[header] = arg.substr(header.size() + 1);
break;
}
case 'i': {
std::string arg = optarg;
if (arg == "gzip") {
request_compression_algorithm =
tc::InferenceServerHttpClient::CompressionType::GZIP;
} else if (arg == "deflate") {
request_compression_algorithm =
tc::InferenceServerHttpClient::CompressionType::DEFLATE;
}
break;
}
case 'o': {
std::string arg = optarg;
if (arg == "gzip") {
response_compression_algorithm =
tc::InferenceServerHttpClient::CompressionType::GZIP;
} else if (arg == "deflate") {
response_compression_algorithm =
tc::InferenceServerHttpClient::CompressionType::DEFLATE;
}
break;
}
case '?':
Usage(argv);
break;
}
}
// We use a simple model that takes 2 input tensors of 16 integers
// each and returns 2 output tensors of 16 integers each. One output
// tensor is the element-wise sum of the inputs and one output is
// the element-wise difference.
std::string model_name = "simple";
std::string model_version = "";
tc::HttpSslOptions ssl_options;
ssl_options.verify_peer = verify_peer;
ssl_options.verify_host = verify_host;
ssl_options.ca_info = cacerts;
ssl_options.cert = certfile;
ssl_options.key = keyfile;
// Create a InferenceServerHttpClient instance to communicate with the
// server using HTTP protocol.
std::unique_ptr<tc::InferenceServerHttpClient> client;
FAIL_IF_ERR(
tc::InferenceServerHttpClient::Create(&client, url, verbose, ssl_options),
"unable to create http client");
// Create the data for the two input tensors. Initialize the first
// to unique integers and the second to all ones.
std::vector<int32_t> input0_data(16);
std::vector<int32_t> input1_data(16);
for (size_t i = 0; i < 16; ++i) {
input0_data[i] = i;
input1_data[i] = 1;
}
std::vector<int64_t> shape{1, 16};
// Initialize the inputs with the data.
tc::InferInput* input0;
tc::InferInput* input1;
FAIL_IF_ERR(
tc::InferInput::Create(&input0, "INPUT0", shape, "INT32"),
"unable to get INPUT0");
std::shared_ptr<tc::InferInput> input0_ptr;
input0_ptr.reset(input0);
FAIL_IF_ERR(
tc::InferInput::Create(&input1, "INPUT1", shape, "INT32"),
"unable to get INPUT1");
std::shared_ptr<tc::InferInput> input1_ptr;
input1_ptr.reset(input1);
FAIL_IF_ERR(
input0_ptr->AppendRaw(
reinterpret_cast<uint8_t*>(&input0_data[0]),
input0_data.size() * sizeof(int32_t)),
"unable to set data for INPUT0");
FAIL_IF_ERR(
input1_ptr->AppendRaw(
reinterpret_cast<uint8_t*>(&input1_data[0]),
input1_data.size() * sizeof(int32_t)),
"unable to set data for INPUT1");
// The inference settings. Will be using default for now.
tc::InferOptions options(model_name);
options.model_version_ = model_version;
options.client_timeout_ = client_timeout;
std::vector<tc::InferInput*> inputs = {input0_ptr.get(), input1_ptr.get()};
// Empty output vector will request data for all the output tensors from
// the server.
std::vector<const tc::InferRequestedOutput*> outputs = {};
tc::InferResult* results;
FAIL_IF_ERR(
client->Infer(
&results, options, inputs, outputs, http_headers, tc::Parameters(),
request_compression_algorithm, response_compression_algorithm),
"unable to run model");
std::shared_ptr<tc::InferResult> results_ptr;
results_ptr.reset(results);
// Validate the results...
ValidateShapeAndDatatype("OUTPUT0", results_ptr);
ValidateShapeAndDatatype("OUTPUT1", results_ptr);
// Get pointers to the result returned...
int32_t* output0_data;
size_t output0_byte_size;
FAIL_IF_ERR(
results_ptr->RawData(
"OUTPUT0", (const uint8_t**)&output0_data, &output0_byte_size),
"unable to get result data for 'OUTPUT0'");
if (output0_byte_size != 64) {
std::cerr << "error: received incorrect byte size for 'OUTPUT0': "
<< output0_byte_size << std::endl;
exit(1);
}
int32_t* output1_data;
size_t output1_byte_size;
FAIL_IF_ERR(
results_ptr->RawData(
"OUTPUT1", (const uint8_t**)&output1_data, &output1_byte_size),
"unable to get result data for 'OUTPUT1'");
if (output0_byte_size != 64) {
std::cerr << "error: received incorrect byte size for 'OUTPUT1': "
<< output0_byte_size << std::endl;
exit(1);
}
for (size_t i = 0; i < 16; ++i) {
std::cout << input0_data[i] << " + " << input1_data[i] << " = "
<< *(output0_data + i) << std::endl;
std::cout << input0_data[i] << " - " << input1_data[i] << " = "
<< *(output1_data + i) << std::endl;
if ((input0_data[i] + input1_data[i]) != *(output0_data + i)) {
std::cerr << "error: incorrect sum" << std::endl;
exit(1);
}
if ((input0_data[i] - input1_data[i]) != *(output1_data + i)) {
std::cerr << "error: incorrect difference" << std::endl;
exit(1);
}
}
// Get full response
std::cout << results_ptr->DebugString() << std::endl;
tc::InferStat infer_stat;
client->ClientInferStat(&infer_stat);
std::cout << "======Client Statistics======" << std::endl;
std::cout << "completed_request_count " << infer_stat.completed_request_count
<< std::endl;
std::cout << "cumulative_total_request_time_ns "
<< infer_stat.cumulative_total_request_time_ns << std::endl;
std::cout << "cumulative_send_time_ns " << infer_stat.cumulative_send_time_ns
<< std::endl;
std::cout << "cumulative_receive_time_ns "
<< infer_stat.cumulative_receive_time_ns << std::endl;
std::string model_stat;
FAIL_IF_ERR(
client->ModelInferenceStatistics(&model_stat, model_name),
"unable to get model statistics");
std::cout << "======Model Statistics======" << std::endl;
std::cout << model_stat << std::endl;
std::cout << "PASS : Infer" << std::endl;
return 0;
}
// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <unistd.h>
#include <iostream>
#include <string>
#include "http_client.h"
#include "json_utils.h"
namespace tc = triton::client;
#define FAIL_IF_ERR(X, MSG) \
{ \
tc::Error err = (X); \
if (!err.IsOk()) { \
std::cerr << "error: " << (MSG) << ": " << err << std::endl; \
exit(1); \
} \
}
namespace {
void
Usage(char** argv, const std::string& msg = std::string())
{
if (!msg.empty()) {
std::cerr << "error: " << msg << std::endl;
}
std::cerr << "Usage: " << argv[0] << " [options]" << std::endl;
std::cerr << "\t-v" << std::endl;
std::cerr << "\t-u <URL for inference service>" << std::endl;
std::cerr << "\t-H <HTTP header>" << std::endl;
std::cerr << std::endl;
std::cerr
<< "For -H, header must be 'Header:Value'. May be given multiple times."
<< std::endl;
exit(1);
}
} // namespace
int
main(int argc, char** argv)
{
bool verbose = false;
std::string url("localhost:8000");
tc::Headers http_headers;
// Parse commandline...
int opt;
while ((opt = getopt(argc, argv, "vu:H:")) != -1) {
switch (opt) {
case 'v':
verbose = true;
break;
case 'u':
url = optarg;
break;
case 'H': {
std::string arg = optarg;
std::string header = arg.substr(0, arg.find(":"));
http_headers[header] = arg.substr(header.size() + 1);
break;
}
case '?':
Usage(argv);
break;
}
}
std::string model_name = "simple";
// Create a InferenceServerHttpClient instance to communicate with the
// server using http protocol.
std::unique_ptr<tc::InferenceServerHttpClient> client;
FAIL_IF_ERR(
tc::InferenceServerHttpClient::Create(&client, url, verbose),
"unable to create http client");
{
std::string repository_index;
FAIL_IF_ERR(
client->ModelRepositoryIndex(&repository_index, http_headers),
"Failed to get repository index");
rapidjson::Document repository_index_json;
FAIL_IF_ERR(
tc::ParseJson(&repository_index_json, repository_index),
"failed to parse model config");
if (repository_index_json.Size() != 7) {
std::cerr << "expected number of models 7, got "
<< repository_index_json.Size() << std::endl;
exit(1);
}
}
FAIL_IF_ERR(
client->LoadModel(model_name, http_headers), "Failed to load model");
bool model_ready;
FAIL_IF_ERR(
client->IsModelReady(&model_ready, model_name),
"unable to get model readiness");
if (!model_ready) {
std::cerr << "error: model " << model_name << " is not live" << std::endl;
exit(1);
}
FAIL_IF_ERR(
client->UnloadModel(model_name, http_headers), "Failed to unload model");
FAIL_IF_ERR(
client->IsModelReady(&model_ready, model_name),
"unable to get model readiness");
if (model_ready) {
std::cerr << "error: model " << model_name << " is live after unloading"
<< std::endl;
exit(1);
}
tc::Error err = client->LoadModel("wrong_model_name", http_headers);
if (err.IsOk()) {
std::cerr << "error: wrong model name was successfully loaded" << std::endl;
}
return 0;
}
// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <unistd.h>
#include <condition_variable>
#include <iostream>
#include <string>
#include <vector>
#include "http_client.h"
namespace tc = triton::client;
using ResultList = std::vector<std::shared_ptr<tc::InferResult>>;
// Global mutex to synchronize the threads
std::mutex mutex_;
std::condition_variable cv_;
#define FAIL_IF_ERR(X, MSG) \
{ \
tc::Error err = (X); \
if (!err.IsOk()) { \
std::cerr << "error: " << (MSG) << ": " << err << std::endl; \
exit(1); \
} \
}
namespace {
void
Usage(char** argv, const std::string& msg = std::string())
{
if (!msg.empty()) {
std::cerr << "error: " << msg << std::endl;
}
std::cerr << "Usage: " << argv[0] << " [options]" << std::endl;
std::cerr << "\t-v" << std::endl;
std::cerr << "\t-u <URL for inference service and its http port>"
<< std::endl;
std::cerr
<< "For -H, header must be 'Header:Value'. May be given multiple times."
<< std::endl;
std::cerr << "\t-o <offset for sequence ID>" << std::endl;
std::cerr << std::endl;
std::cerr << "For -o, the client will use sequence ID <1 + 2 * offset> "
<< "and <2 + 2 * offset>. Default offset is 0." << std::endl;
exit(1);
}
void
SyncSend(
const std::unique_ptr<tc::InferenceServerHttpClient>& client,
tc::InferOptions& options, int32_t value, std::vector<int32_t>& result_data,
tc::Headers& http_headers)
{
// Initialize the inputs with the data.
tc::InferInput* input;
std::vector<int64_t> shape{1, 1};
FAIL_IF_ERR(
tc::InferInput::Create(&input, "INPUT", shape, "INT32"),
"unable to create 'INPUT'");
std::shared_ptr<tc::InferInput> ivalue(input);
FAIL_IF_ERR(ivalue->Reset(), "unable to reset 'INPUT'");
FAIL_IF_ERR(
ivalue->AppendRaw(reinterpret_cast<uint8_t*>(&value), sizeof(int32_t)),
"unable to set data for 'INPUT'");
std::vector<tc::InferInput*> inputs = {ivalue.get()};
tc::InferRequestedOutput* output;
FAIL_IF_ERR(
tc::InferRequestedOutput::Create(&output, "OUTPUT"),
"unable to get 'OUTPUT'");
std::shared_ptr<const tc::InferRequestedOutput> routput;
routput.reset(output);
std::vector<const tc::InferRequestedOutput*> outputs = {routput.get()};
tc::InferResult* result;
// Send inference request to the inference server.
FAIL_IF_ERR(
client->Infer(&result, options, inputs, outputs, http_headers),
"unable to run model");
std::shared_ptr<tc::InferResult> this_result(result);
// Get pointers to the result returned...
int32_t* output_data;
size_t output_byte_size;
FAIL_IF_ERR(
this_result->RawData(
"OUTPUT", (const uint8_t**)&output_data, &output_byte_size),
"unable to get result data for 'OUTPUT'");
if (output_byte_size != 4) {
std::cerr << "error: received incorrect byte size for 'OUTPUT': "
<< output_byte_size << std::endl;
exit(1);
}
result_data.push_back(*output_data);
}
void
SyncSend(
const std::unique_ptr<tc::InferenceServerHttpClient>& client,
const std::string& model_name, int32_t value, const uint64_t sequence_id,
bool start_of_sequence, bool end_of_sequence,
std::vector<int32_t>& result_data, tc::Headers& http_headers)
{
tc::InferOptions options(model_name);
options.sequence_id_ = sequence_id;
options.sequence_start_ = start_of_sequence;
options.sequence_end_ = end_of_sequence;
SyncSend(client, options, value, result_data, http_headers);
}
void
SyncSend(
const std::unique_ptr<tc::InferenceServerHttpClient>& client,
const std::string& model_name, int32_t value,
const std::string& sequence_id, bool start_of_sequence,
bool end_of_sequence, std::vector<int32_t>& result_data,
tc::Headers& http_headers)
{
tc::InferOptions options(model_name);
options.sequence_id_str_ = sequence_id;
options.sequence_start_ = start_of_sequence;
options.sequence_end_ = end_of_sequence;
SyncSend(client, options, value, result_data, http_headers);
}
} // namespace
int
main(int argc, char** argv)
{
bool verbose = false;
bool dyna_sequence = false;
std::string url("localhost:8000");
int sequence_id_offset = 0;
tc::Headers http_headers;
// Parse commandline...
int opt;
while ((opt = getopt(argc, argv, "vdu:H:o:")) != -1) {
switch (opt) {
case 'v':
verbose = true;
break;
case 'H': {
std::string arg = optarg;
std::string header = arg.substr(0, arg.find(":"));
http_headers[header] = arg.substr(header.size() + 1);
break;
}
case 'd':
dyna_sequence = true;
break;
case 'u':
url = optarg;
break;
case 'o':
sequence_id_offset = std::stoi(optarg);
break;
case '?':
Usage(argv);
break;
}
}
tc::Error err;
// We use the custom "sequence" model which takes 1 input value. The
// output is the accumulated value of the inputs. See
// src/custom/sequence.
std::string int_model_name =
dyna_sequence ? "simple_dyna_sequence" : "simple_sequence";
std::string string_model_name =
dyna_sequence ? "simple_string_dyna_sequence" : "simple_sequence";
const uint64_t int_sequence_id0 = 1 + sequence_id_offset * 2;
const uint64_t int_sequence_id1 = 2 + sequence_id_offset * 2;
// For string sequence IDs, the dyna backend requires that the
// sequence id be decodable into an integer, otherwise we'll use
// a test string sequence id and a model that doesn't require corrid
// control.
const std::string string_sequence_id0 =
dyna_sequence ? std::to_string(3 + sequence_id_offset * 2) : "SEQ-3";
std::cout << "sequence ID " << int_sequence_id0 << " : "
<< "sequence ID " << int_sequence_id1 << " : "
<< "sequence ID " << string_sequence_id0 << std::endl;
// Create a InferenceServerHttpClient instance to communicate with the
// server using http protocol.
std::unique_ptr<tc::InferenceServerHttpClient> client;
FAIL_IF_ERR(
tc::InferenceServerHttpClient::Create(&client, url, verbose),
"unable to create http client");
// Now send the inference sequences..
//
std::vector<int32_t> values{11, 7, 5, 3, 2, 0, 1};
std::vector<int32_t> int_result0_data;
std::vector<int32_t> int_result1_data;
std::vector<int32_t> string_result0_data;
// Send requests, first reset accumulator for the sequence.
SyncSend(
client, int_model_name, 0, int_sequence_id0, true /* start-of-sequence */,
false /* end-of-sequence */, int_result0_data, http_headers);
SyncSend(
client, int_model_name, 100, int_sequence_id1,
true /* start-of-sequence */, false /* end-of-sequence */,
int_result1_data, http_headers);
SyncSend(
client, string_model_name, 20, string_sequence_id0,
true /* start-of-sequence */, false /* end-of-sequence */,
string_result0_data, http_headers);
// Now send a sequence of values...
for (int32_t v : values) {
SyncSend(
client, int_model_name, v, int_sequence_id0,
false /* start-of-sequence */, (v == 1) /* end-of-sequence */,
int_result0_data, http_headers);
SyncSend(
client, int_model_name, -v, int_sequence_id1,
false /* start-of-sequence */, (v == 1) /* end-of-sequence */,
int_result1_data, http_headers);
SyncSend(
client, string_model_name, -v, string_sequence_id0,
false /* start-of-sequence */, (v == 1) /* end-of-sequence */,
string_result0_data, http_headers);
}
for (size_t i = 0; i < int_result0_data.size(); i++) {
int32_t int_seq0_expected = (i == 0) ? 1 : values[i - 1];
int32_t int_seq1_expected = (i == 0) ? 101 : values[i - 1] * -1;
int32_t string_seq0_expected;
// For string sequence ID case we are testing two different backends
if ((i == 0) && dyna_sequence) {
string_seq0_expected = 20;
} else if ((i == 0) && !dyna_sequence) {
string_seq0_expected = 21;
} else if ((i != 0) && dyna_sequence) {
string_seq0_expected = values[i - 1] * -1 + string_result0_data[i - 1];
} else {
string_seq0_expected = values[i - 1] * -1;
}
// The dyna_sequence custom backend adds the sequence ID to
// the last request in a sequence.
if (dyna_sequence && (i != 0) && (values[i - 1] == 1)) {
int_seq0_expected += int_sequence_id0;
int_seq1_expected += int_sequence_id1;
string_seq0_expected += std::stoi(string_sequence_id0);
}
std::cout << "[" << i << "] " << int_result0_data[i] << " : "
<< int_result1_data[i] << " : " << string_result0_data[i]
<< std::endl;
if ((int_seq0_expected != int_result0_data[i]) ||
(int_seq1_expected != int_result1_data[i]) ||
(string_seq0_expected != string_result0_data[i])) {
std::cout << "[ expected ] " << int_seq0_expected << " : "
<< int_seq1_expected << " : " << string_seq0_expected
<< std::endl;
return 1;
}
}
return 0;
}
// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <unistd.h>
#include <iostream>
#include <string>
#include "http_client.h"
#include "shm_utils.h"
namespace tc = triton::client;
#define FAIL_IF_ERR(X, MSG) \
{ \
tc::Error err = (X); \
if (!err.IsOk()) { \
std::cerr << "error: " << (MSG) << ": " << err << std::endl; \
exit(1); \
} \
}
namespace {
void
ValidateShapeAndDatatype(
const std::string& name, std::shared_ptr<tc::InferResult> result)
{
std::vector<int64_t> shape;
FAIL_IF_ERR(
result->Shape(name, &shape), "unable to get shape for '" + name + "'");
// Validate shape
if ((shape.size() != 2) || (shape[0] != 1) || (shape[1] != 16)) {
std::cerr << "error: received incorrect shapes for '" << name << "'"
<< std::endl;
exit(1);
}
std::string datatype;
FAIL_IF_ERR(
result->Datatype(name, &datatype),
"unable to get datatype for '" + name + "'");
// Validate datatype
if (datatype.compare("INT32") != 0) {
std::cerr << "error: received incorrect datatype for '" << name
<< "': " << datatype << std::endl;
exit(1);
}
}
void
Usage(char** argv, const std::string& msg = std::string())
{
if (!msg.empty()) {
std::cerr << "error: " << msg << std::endl;
}
std::cerr << "Usage: " << argv[0] << " [options]" << std::endl;
std::cerr << "\t-v" << std::endl;
std::cerr << "\t-u <URL for inference service>" << std::endl;
std::cerr << "\t-H <HTTP header>" << std::endl;
std::cerr << std::endl;
std::cerr
<< "For -H, header must be 'Header:Value'. May be given multiple times."
<< std::endl;
exit(1);
}
} // namespace
int
main(int argc, char** argv)
{
bool verbose = false;
std::string url("localhost:8000");
tc::Headers http_headers;
// Parse commandline...
int opt;
while ((opt = getopt(argc, argv, "vu:H:")) != -1) {
switch (opt) {
case 'v':
verbose = true;
break;
case 'u':
url = optarg;
break;
case 'H': {
std::string arg = optarg;
std::string header = arg.substr(0, arg.find(":"));
http_headers[header] = arg.substr(header.size() + 1);
break;
}
case '?':
Usage(argv);
break;
}
}
// We use a simple model that takes 2 input tensors of 16 integers
// each and returns 2 output tensors of 16 integers each. One output
// tensor is the element-wise sum of the inputs and one output is
// the element-wise difference.
std::string model_name = "simple";
std::string model_version = "";
// Create a InferenceServerHttpClient instance to communicate with the
// server using http protocol.
std::unique_ptr<tc::InferenceServerHttpClient> client;
FAIL_IF_ERR(
tc::InferenceServerHttpClient::Create(&client, url, verbose),
"unable to create http client");
// Unregistering all shared memory regions for a clean
// start.
FAIL_IF_ERR(
client->UnregisterSystemSharedMemory(),
"unable to unregister all system shared memory regions");
FAIL_IF_ERR(
client->UnregisterCudaSharedMemory(),
"unable to unregister all cuda shared memory regions");
std::vector<int64_t> shape{1, 16};
size_t input_byte_size = 64;
size_t output_byte_size = 64;
// Initialize the inputs with the data.
tc::InferInput* input0;
tc::InferInput* input1;
FAIL_IF_ERR(
tc::InferInput::Create(&input0, "INPUT0", shape, "INT32"),
"unable to get INPUT0");
std::shared_ptr<tc::InferInput> input0_ptr;
input0_ptr.reset(input0);
FAIL_IF_ERR(
tc::InferInput::Create(&input1, "INPUT1", shape, "INT32"),
"unable to get INPUT1");
std::shared_ptr<tc::InferInput> input1_ptr;
input1_ptr.reset(input1);
// Create Input0 and Input1 in Shared Memory. Initialize Input0 to unique
// integers and Input1 to all ones.
std::string shm_key = "/input_simple";
int shm_fd_ip, *input0_shm;
FAIL_IF_ERR(
tc::CreateSharedMemoryRegion(shm_key, input_byte_size * 2, &shm_fd_ip),
"");
FAIL_IF_ERR(
tc::MapSharedMemory(
shm_fd_ip, 0, input_byte_size * 2, (void**)&input0_shm),
"");
FAIL_IF_ERR(tc::CloseSharedMemory(shm_fd_ip), "");
int* input1_shm = (int*)(input0_shm + 16);
for (size_t i = 0; i < 16; ++i) {
*(input0_shm + i) = i;
*(input1_shm + i) = 1;
}
FAIL_IF_ERR(
client->RegisterSystemSharedMemory(
"input_data", "/input_simple", input_byte_size * 2),
"failed to register input shared memory region");
FAIL_IF_ERR(
input0_ptr->SetSharedMemory(
"input_data", input_byte_size, 0 /* offset */),
"unable to set shared memory for INPUT0");
FAIL_IF_ERR(
input1_ptr->SetSharedMemory(
"input_data", input_byte_size, input_byte_size /* offset */),
"unable to set shared memory for INPUT1");
// Generate the outputs to be requested.
tc::InferRequestedOutput* output0;
tc::InferRequestedOutput* output1;
FAIL_IF_ERR(
tc::InferRequestedOutput::Create(&output0, "OUTPUT0"),
"unable to get 'OUTPUT0'");
std::shared_ptr<tc::InferRequestedOutput> output0_ptr;
output0_ptr.reset(output0);
FAIL_IF_ERR(
tc::InferRequestedOutput::Create(&output1, "OUTPUT1"),
"unable to get 'OUTPUT1'");
std::shared_ptr<tc::InferRequestedOutput> output1_ptr;
output1_ptr.reset(output1);
// Create Output0 and Output1 in Shared Memory
shm_key = "/output_simple";
int shm_fd_op;
int* output0_shm;
FAIL_IF_ERR(
tc::CreateSharedMemoryRegion(shm_key, output_byte_size * 2, &shm_fd_op),
"");
FAIL_IF_ERR(
tc::MapSharedMemory(
shm_fd_op, 0, output_byte_size * 2, (void**)&output0_shm),
"");
FAIL_IF_ERR(tc::CloseSharedMemory(shm_fd_op), "");
int* output1_shm = (int*)(output0_shm + 16);
FAIL_IF_ERR(
client->RegisterSystemSharedMemory(
"output_data", "/output_simple", output_byte_size * 2),
"failed to register output shared memory region");
FAIL_IF_ERR(
output0_ptr->SetSharedMemory(
"output_data", output_byte_size, 0 /* offset */),
"unable to set shared memory for 'OUTPUT0'");
FAIL_IF_ERR(
output1_ptr->SetSharedMemory(
"output_data", output_byte_size, output_byte_size /* offset */),
"unable to set shared memory for 'OUTPUT1'");
// The inference settings. Will be using default for now.
tc::InferOptions options(model_name);
options.model_version_ = model_version;
std::vector<tc::InferInput*> inputs = {input0_ptr.get(), input1_ptr.get()};
std::vector<const tc::InferRequestedOutput*> outputs = {
output0_ptr.get(), output1_ptr.get()};
tc::InferResult* results;
FAIL_IF_ERR(
client->Infer(&results, options, inputs, outputs, http_headers),
"unable to run model");
std::shared_ptr<tc::InferResult> results_ptr;
results_ptr.reset(results);
// Validate the results...
ValidateShapeAndDatatype("OUTPUT0", results_ptr);
ValidateShapeAndDatatype("OUTPUT1", results_ptr);
for (size_t i = 0; i < 16; ++i) {
std::cout << input0_shm[i] << " + " << input1_shm[i] << " = "
<< output0_shm[i] << std::endl;
std::cout << input0_shm[i] << " - " << input1_shm[i] << " = "
<< output1_shm[i] << std::endl;
if ((input0_shm[i] + input1_shm[i]) != output0_shm[i]) {
std::cerr << "error: incorrect sum" << std::endl;
exit(1);
}
if ((input0_shm[i] - input1_shm[i]) != output1_shm[i]) {
std::cerr << "error: incorrect difference" << std::endl;
exit(1);
}
}
// Get shared memory regions active/registered within triton
std::string shm_status;
FAIL_IF_ERR(
client->SystemSharedMemoryStatus(&shm_status),
"failed to get shared memory status");
std::cout << "Shared Memory Status:\n" << shm_status << "\n";
// Unregister shared memory
FAIL_IF_ERR(
client->UnregisterSystemSharedMemory("input_data"),
"unable to unregister shared memory input region");
FAIL_IF_ERR(
client->UnregisterSystemSharedMemory("output_data"),
"unable to unregister shared memory output region");
// Cleanup shared memory
FAIL_IF_ERR(tc::UnmapSharedMemory(input0_shm, input_byte_size * 2), "");
FAIL_IF_ERR(tc::UnlinkSharedMemoryRegion("/input_simple"), "");
FAIL_IF_ERR(tc::UnmapSharedMemory(output0_shm, output_byte_size * 2), "");
FAIL_IF_ERR(tc::UnlinkSharedMemoryRegion("/output_simple"), "");
std::cout << "PASS : System Shared Memory " << std::endl;
return 0;
}
// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <getopt.h>
#include <unistd.h>
#include <iostream>
#include <string>
#include "http_client.h"
namespace tc = triton::client;
#define FAIL_IF_ERR(X, MSG) \
{ \
tc::Error err = (X); \
if (!err.IsOk()) { \
std::cerr << "error: " << (MSG) << ": " << err << std::endl; \
exit(1); \
} \
}
namespace {
void
ValidateShapeAndDatatype(
const std::string& name, std::shared_ptr<tc::InferResult> result)
{
std::vector<int64_t> shape;
FAIL_IF_ERR(result->Shape(name, &shape), "unable to get shape for " + name);
// Validate shape
if ((shape.size() != 2) || (shape[0] != 1) || (shape[1] != 16)) {
std::cerr << "error: received incorrect shapes for " << name << std::endl;
exit(1);
}
std::string datatype;
FAIL_IF_ERR(
result->Datatype(name, &datatype), "unable to get datatype for " + name);
// Validate datatype
if (datatype.compare("BYTES") != 0) {
std::cerr << "error: received incorrect datatype for " << name << ": "
<< datatype << std::endl;
exit(1);
}
}
void
Usage(char** argv, const std::string& msg = std::string())
{
if (!msg.empty()) {
std::cerr << "error: " << msg << std::endl;
}
std::cerr << "Usage: " << argv[0] << " [options]" << std::endl;
std::cerr << "\t-v" << std::endl;
std::cerr << "\t-u <URL for inference service>" << std::endl;
std::cerr << "\t-H <HTTP header>" << std::endl;
std::cerr << "\t--json-input-data" << std::endl;
std::cerr << "\t--json-output-data" << std::endl;
std::cerr << std::endl;
std::cerr
<< "For -H, header must be 'Header:Value'. May be given multiple times."
<< std::endl;
exit(1);
}
} // namespace
int
main(int argc, char** argv)
{
bool verbose = false;
std::string url("localhost:8000");
tc::Headers http_headers;
bool json_input_data{false};
bool json_output_data{false};
// {name, has_arg, *flag, val}
static struct option long_options[] = {
{"json-input-data", 0, 0, 0},
{"json-output-data", 0, 0, 1},
{0, 0, 0, 0}};
// Parse commandline...
int opt;
while ((opt = getopt_long(argc, argv, "vu:H:", long_options, NULL)) != -1) {
switch (opt) {
case 0:
json_input_data = true;
break;
case 1:
json_output_data = true;
break;
case 'v':
verbose = true;
break;
case 'u':
url = optarg;
break;
case 'H': {
std::string arg = optarg;
std::string header = arg.substr(0, arg.find(":"));
http_headers[header] = arg.substr(header.size() + 1);
break;
}
case '?':
Usage(argv);
break;
}
}
// We use a simple model that takes 2 input tensors of 16 strings
// each and returns 2 output tensors of 16 strings each. The input
// strings must represent integers. One output tensor is the
// element-wise sum of the inputs and one output is the element-wise
// difference.
std::string model_name = "simple_string";
std::string model_version = "";
// Create a InferenceServerHttpClient instance to communicate with the
// server using http protocol.
std::unique_ptr<tc::InferenceServerHttpClient> client;
FAIL_IF_ERR(
tc::InferenceServerHttpClient::Create(&client, url, verbose),
"unable to create http client");
// Create the data for the two input tensors. Initialize the first
// to unique integers and the second to all ones. The input tensors
// are the string representation of these values.
std::vector<std::string> input0_data(16);
std::vector<std::string> input1_data(16);
std::vector<int32_t> expected_sum(16);
std::vector<int32_t> expected_diff(16);
for (size_t i = 0; i < 16; ++i) {
input0_data[i] = std::to_string(i);
input1_data[i] = std::to_string(1);
expected_sum[i] = i + 1;
expected_diff[i] = i - 1;
}
std::vector<int64_t> shape{1, 16};
// Initialize the inputs with the data.
tc::InferInput* input0;
tc::InferInput* input1;
FAIL_IF_ERR(
tc::InferInput::Create(&input0, "INPUT0", shape, "BYTES"),
"unable to get INPUT0");
std::shared_ptr<tc::InferInput> input0_ptr;
input0_ptr.reset(input0);
input0_ptr->SetBinaryData(!json_input_data);
FAIL_IF_ERR(
tc::InferInput::Create(&input1, "INPUT1", shape, "BYTES"),
"unable to get INPUT1");
std::shared_ptr<tc::InferInput> input1_ptr;
input1_ptr.reset(input1);
input1_ptr->SetBinaryData(!json_input_data);
FAIL_IF_ERR(
input0_ptr->AppendFromString(input0_data),
"unable to set data for INPUT0");
FAIL_IF_ERR(
input1_ptr->AppendFromString(input1_data),
"unable to set data for INPUT1");
// Generate the outputs to be requested.
tc::InferRequestedOutput* output0;
tc::InferRequestedOutput* output1;
FAIL_IF_ERR(
tc::InferRequestedOutput::Create(&output0, "OUTPUT0"),
"unable to get OUTPUT0");
std::shared_ptr<tc::InferRequestedOutput> output0_ptr;
output0_ptr.reset(output0);
output0_ptr->SetBinaryData(!json_output_data);
FAIL_IF_ERR(
tc::InferRequestedOutput::Create(&output1, "OUTPUT1"),
"unable to get OUTPUT1");
std::shared_ptr<tc::InferRequestedOutput> output1_ptr;
output1_ptr.reset(output1);
output1_ptr->SetBinaryData(!json_output_data);
// The inference settings. Will be using default for now.
tc::InferOptions options(model_name);
options.model_version_ = model_version;
std::vector<tc::InferInput*> inputs = {input0_ptr.get(), input1_ptr.get()};
std::vector<const tc::InferRequestedOutput*> outputs = {
output0_ptr.get(), output1_ptr.get()};
tc::InferResult* results;
FAIL_IF_ERR(
client->Infer(&results, options, inputs, outputs, http_headers),
"unable to run model");
std::shared_ptr<tc::InferResult> results_ptr;
results_ptr.reset(results);
// Validate the results...
ValidateShapeAndDatatype("OUTPUT0", results_ptr);
ValidateShapeAndDatatype("OUTPUT1", results_ptr);
// Get the result data
std::vector<std::string> result0_data;
std::vector<std::string> result1_data;
FAIL_IF_ERR(
results_ptr->StringData("OUTPUT0", &result0_data),
"unable to get data for OUTPUT0");
if (result0_data.size() != 16) {
std::cerr << "error: received incorrect number of strings for OUTPUT0: "
<< result0_data.size() << std::endl;
exit(1);
}
FAIL_IF_ERR(
results_ptr->StringData("OUTPUT1", &result1_data),
"unable to get data for OUTPUT1");
if (result1_data.size() != 16) {
std::cerr << "error: received incorrect number of strings for OUTPUT1: "
<< result1_data.size() << std::endl;
exit(1);
}
for (size_t i = 0; i < 16; ++i) {
std::cout << input0_data[i] << " + " << input0_data[i] << " = "
<< result0_data[i] << std::endl;
std::cout << input0_data[i] << " - " << input1_data[i] << " = "
<< result1_data[i] << std::endl;
if (expected_sum[i] != std::stoi(result0_data[i])) {
std::cerr << "error: incorrect sum" << std::endl;
exit(1);
}
if (expected_diff[i] != std::stoi(result1_data[i])) {
std::cerr << "error: incorrect difference" << std::endl;
exit(1);
}
}
// Get full response
std::cout << results_ptr->DebugString() << std::endl;
std::cout << "PASS : String Infer" << std::endl;
return 0;
}
#include <dirent.h>
#include <getopt.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/time.h>
#include <unistd.h>
#include <algorithm>
#include <condition_variable>
#include <fstream>
#include <iostream>
#include <iterator>
#include <mutex>
#include <queue>
#include <string>
#include "grpc_client.h"
#include "http_client.h"
#include "json_utils.h"
#include <opencv2/opencv.hpp>
#include <opencv2/core/version.hpp>
#if CV_MAJOR_VERSION == 2
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#elif CV_MAJOR_VERSION >= 3
#include <opencv2/core.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc.hpp>
#endif
#if CV_MAJOR_VERSION == 4
#define GET_TRANSFORMATION_CODE(x) cv::COLOR_##x
#else
#define GET_TRANSFORMATION_CODE(x) CV_##x
#endif
using namespace cv;
namespace tc = triton::client;
namespace {
enum ProtocolType { HTTP = 0, GRPC = 1 };
struct ModelInfo {
std::string output_name_;
std::string input_name_;
std::string input_datatype_;
int input_c_;
int input_h_;
int input_w_;
std::string input_format_;
int type1_;
int type3_;
int max_batch_size_;
};
typedef struct _ResultOfDetection
{
cv::Rect boundingBox;
float confidence;
int classID;
std::string className;
bool exist;
_ResultOfDetection():confidence(0.0f),classID(0),exist(true){}
}ResultOfDetection;
std::vector<int> NMSBoxes(const std::vector<cv::Rect>& boxes, std::vector<float>& scores, float score_threshold, float nms_threshold)
{
std::vector<int> indices;
std::vector<float> areas(boxes.size());
for (size_t i = 0; i < boxes.size(); i++)
{
areas[i] = boxes[i].width * boxes[i].height;
}
for (size_t i = 0; i < boxes.size(); i++)
{
if (scores[i] > score_threshold)
{
indices.push_back(static_cast<int>(i));
for (size_t j = i + 1; j < boxes.size(); j++)
{
if (scores[j] > score_threshold)
{
cv::Rect intersection = boxes[i] & boxes[j];
float overlap = static_cast<float>(intersection.area()) / (areas[i] + areas[j] - intersection.area());
if (overlap > nms_threshold)
{
scores[j] = 0.0f;
}
}
}
}
}
return indices;
}
void Preprocess(
const std::string& filename, int img_type1, int img_type3, size_t img_channels,
const cv::Size& img_size, std::vector<uint8_t>* input_data)
{
cv::Mat img = cv::imread(filename, 1);
if (img.empty()) {
std::cerr << "error: unable to decode image " << filename << std::endl;
exit(1);
}
cv::Mat sample;
if ((img.channels() == 3) && (img_channels == 3)) {
cv::cvtColor(img, sample, GET_TRANSFORMATION_CODE(BGR2RGB));
} else {
std::cerr << "unexpected number of channels " << img.channels()
<< " in input image, model expects " << img_channels << "."
<< std::endl;
exit(1);
}
cv::Mat sample_resized;
cv::resize(sample, sample_resized, img_size);
cv::Mat sample_type;
sample_resized.convertTo(sample_type, (img_channels == 3) ? img_type3 : img_type1);
cv::Mat sample_final;
sample_final = sample_type.mul(cv::Scalar(1.0 / 255.0, 1.0 / 255.0, 1.0 / 255.0));;
size_t img_byte_size = sample_final.total() * sample_final.elemSize();
size_t pos = 0;
input_data->resize(img_byte_size);
std::vector<cv::Mat> input_bgr_channels;
for (size_t i = 0; i < img_channels; ++i) {
input_bgr_channels.emplace_back(img_size.height, img_size.width, img_type1, &((*input_data)[pos]));
pos += input_bgr_channels.back().total() * input_bgr_channels.back().elemSize();
}
cv::split(sample_final, input_bgr_channels);
if (pos != img_byte_size) {
std::cerr << "unexpected total size of channels " << pos << ", expecting "
<< img_byte_size << std::endl;
exit(1);
}
}
void Postprocess(
const std::unique_ptr<tc::InferResult> result,
const std::vector<std::string>& filenames, const size_t batch_size,
const std::string& output_name, const bool batching)
{
if (!result->RequestStatus().IsOk()) {
std::cerr << "inference failed with error: " << result->RequestStatus()
<< std::endl;
exit(1);
}
if (filenames.size() != batch_size) {
std::cerr << "expected " << batch_size << " filenames, got "
<< filenames.size() << std::endl;
exit(1);
}
std::vector<int64_t> shape;
tc::Error err = result->Shape(output_name, &shape);
if (!err.IsOk()) {
std::cerr << "unable to get shape for " << output_name << std::endl;
exit(1);
}
std::string datatype;
err = result->Datatype(output_name, &datatype);
if (!err.IsOk()) {
std::cerr << "unable to get datatype for " << output_name << std::endl;
exit(1);
}
const uint8_t* result_data;
size_t outputCount = 0;
err = result->RawData(output_name, &result_data, &outputCount);
if (!err.IsOk()) {
std::cerr << "unable to get data for " << output_name << std::endl;
exit(1);
}
std::vector<cv::Mat> outs;
cv::Mat srcImage = cv::imread(filenames[0], 1);
int Shape[]={shape[0], shape[1], shape[2]};
cv::Mat out(3, Shape, CV_32F);
memcpy(out.data, result_data, sizeof(uint8_t)*outputCount);
outs.push_back(out);
int numProposal = outs[0].size[1];
int numOut = outs[0].size[2];
outs[0] = outs[0].reshape(0, numProposal);
std::vector<float> confidences;
std::vector<cv::Rect> boxes;
std::vector<int> classIds;
float ratioh = (float)srcImage.rows / 640, ratiow = (float)srcImage.cols / 640;
int n = 0, rowInd = 0;
float* pdata = (float*)outs[0].data;
for (n = 0; n < numProposal; n++)
{
float boxScores = pdata[4];
if (boxScores > 0.5)
{
cv::Mat scores = outs[0].row(rowInd).colRange(5, numOut);
cv::Point classIdPoint;
double maxClassScore;
cv::minMaxLoc(scores, 0, &maxClassScore, 0, &classIdPoint);
maxClassScore *= boxScores;
if (maxClassScore > 0.25)
{
const int classIdx = classIdPoint.x;
float cx = pdata[0] * ratiow;
float cy = pdata[1] * ratioh;
float w = pdata[2] * ratiow;
float h = pdata[3] * ratioh;
int left = int(cx - 0.5 * w);
int top = int(cy - 0.5 * h);
confidences.push_back((float)maxClassScore);
boxes.push_back(cv::Rect(left, top, (int)(w), (int)(h)));
classIds.push_back(classIdx);
}
}
rowInd++;
pdata += numOut;
}
std::vector<int> indices;
indices = NMSBoxes(boxes, confidences, 0.25, 0.5);
std::vector<ResultOfDetection> resultsOfDetection;
for (size_t i = 0; i < indices.size(); ++i)
{
int idx = indices[i];
int classID=classIds[idx];
float confidence=confidences[idx];
cv::Rect box = boxes[idx];
ResultOfDetection result;
result.boundingBox=box;
result.confidence=confidence;
result.classID=classID;
resultsOfDetection.push_back(result);
}
fprintf(stdout,"//////////////Detection Results//////////////\n");
for( size_t i = 0; i < resultsOfDetection.size(); ++i)
{
ResultOfDetection result = resultsOfDetection[i];
cv::rectangle(srcImage, result.boundingBox, cv::Scalar(0,255,255),2);
fprintf(stdout,"box:%d %d %d %d,label:%d,confidence:%.3f\n",result.boundingBox.x,
result.boundingBox.y,result.boundingBox.width,result.boundingBox.height,result.classID,result.confidence);
}
cv::imwrite("result.jpg", srcImage);
}
bool ParseType(const std::string& dtype, int* type1, int* type3)
{
if (dtype.compare("UINT8") == 0) {
*type1 = CV_8UC1;
*type3 = CV_8UC3;
} else if (dtype.compare("INT8") == 0) {
*type1 = CV_8SC1;
*type3 = CV_8SC3;
} else if (dtype.compare("UINT16") == 0) {
*type1 = CV_16UC1;
*type3 = CV_16UC3;
} else if (dtype.compare("INT16") == 0) {
*type1 = CV_16SC1;
*type3 = CV_16SC3;
} else if (dtype.compare("INT32") == 0) {
*type1 = CV_32SC1;
*type3 = CV_32SC3;
} else if (dtype.compare("FP32") == 0) {
*type1 = CV_32FC1;
*type3 = CV_32FC3;
} else if (dtype.compare("FP64") == 0) {
*type1 = CV_64FC1;
*type3 = CV_64FC3;
} else {
return false;
}
return true;
}
void ParseModelHttp(
const rapidjson::Document& model_metadata,
const rapidjson::Document& model_config, const size_t batch_size,
ModelInfo* model_info)
{
const auto& input_itr = model_metadata.FindMember("inputs");
size_t input_count = 0;
if (input_itr != model_metadata.MemberEnd()) {
input_count = input_itr->value.Size();
}
if (input_count != 1) {
std::cerr << "expecting 1 input, got " << input_count << std::endl;
exit(1);
}
const auto& output_itr = model_metadata.FindMember("outputs");
size_t output_count = 0;
if (output_itr != model_metadata.MemberEnd()) {
output_count = output_itr->value.Size();
}
if (output_count != 1) {
std::cerr << "expecting 1 output, got " << output_count << std::endl;
exit(1);
}
const auto& input_config_itr = model_config.FindMember("input");
input_count = 0;
if (input_config_itr != model_config.MemberEnd()) {
input_count = input_config_itr->value.Size();
}
if (input_count != 1) {
std::cerr << "expecting 1 input in model configuration, got " << input_count
<< std::endl;
exit(1);
}
const auto& input_metadata = *input_itr->value.Begin();
const auto& input_config = *input_config_itr->value.Begin();
const auto& output_metadata = *output_itr->value.Begin();
const auto& output_dtype_itr = output_metadata.FindMember("datatype");
if (output_dtype_itr == output_metadata.MemberEnd()) {
std::cerr << "output missing datatype in the metadata for model'"
<< model_metadata["name"].GetString() << "'" << std::endl;
exit(1);
}
auto datatype = std::string(output_dtype_itr->value.GetString(),
output_dtype_itr->value.GetStringLength());
if (datatype.compare("FP32") != 0) {
std::cerr << "expecting output datatype to be FP32, model '"
<< model_metadata["name"].GetString() << "' output type is '"
<< datatype << "'" << std::endl;
exit(1);
}
int max_batch_size = 0;
const auto bs_itr = model_config.FindMember("max_batch_size");
if (bs_itr != model_config.MemberEnd()) {
max_batch_size = bs_itr->value.GetUint();
}
model_info->max_batch_size_ = max_batch_size;
if (max_batch_size == 0) {
if (batch_size != 1) {
std::cerr << "batching not supported for model '"
<< model_metadata["name"].GetString() << "'" << std::endl;
exit(1);
}
} else {
if (batch_size > (size_t)max_batch_size) {
std::cerr << "expecting batch size <= " << max_batch_size
<< " for model '" << model_metadata["name"].GetString() << "'"
<< std::endl;
exit(1);
}
}
const bool input_batch_dim = (max_batch_size == 0);
const size_t expected_input_dims = 3 + (input_batch_dim ? 1 : 0);
const auto input_shape_itr = input_metadata.FindMember("shape");
if (input_shape_itr != input_metadata.MemberEnd()) {
if (input_shape_itr->value.Size() != expected_input_dims) {
std::cerr << " expecting input to have " << expected_input_dims
<< " dimensions, model '" << model_metadata["name"].GetString()
<< "' input has " << input_shape_itr->value.Size() << std::endl;
}
} else {
std::cerr << "input missing shape in the metadata for model'"
<< model_metadata["name"].GetString() << "'" << std::endl;
exit(1);
}
model_info->input_format_ = std::string(input_config["format"].GetString(), input_config["format"].GetStringLength());
model_info->output_name_ = std::string(output_metadata["name"].GetString(), output_metadata["name"].GetStringLength());
model_info->input_name_ = std::string(input_metadata["name"].GetString(), input_metadata["name"].GetStringLength());
model_info->input_datatype_ = std::string(input_metadata["datatype"].GetString(), input_metadata["datatype"].GetStringLength());
model_info->input_c_ = input_shape_itr->value[1].GetInt();
model_info->input_h_ = input_shape_itr->value[2].GetInt();
model_info->input_w_ = input_shape_itr->value[3].GetInt();
if (!ParseType(model_info->input_datatype_, &(model_info->type1_), &(model_info->type3_))) {
std::cerr << "unexpected input datatype '" << model_info->input_datatype_
<< "' for model \"" << model_metadata["name"].GetString()
<< std::endl;
exit(1);
}
}
union TritonClient {
TritonClient()
{
new (&http_client_) std::unique_ptr<tc::InferenceServerHttpClient>{};
}
~TritonClient() {}
std::unique_ptr<tc::InferenceServerHttpClient> http_client_;
std::unique_ptr<tc::InferenceServerGrpcClient> grpc_client_;
};
}
int
main(int argc, char** argv)
{
bool verbose = false;
bool async = false;
int batch_size = 1;
if (argc < 3 || argc > 3)
{
fprintf(stdout, "Two args are required: ./a yolov7-tiny image_path\n");
return -1;
}
std::string model_name = argv[1];
std::string fileName = argv[2];
std::string preprocess_output_filename;
std::string model_version = "";
std::string url("localhost:8000");
ProtocolType protocol = ProtocolType::HTTP;
tc::Headers http_headers;
TritonClient triton_client;
tc::Error err;
err = tc::InferenceServerHttpClient::Create(
&triton_client.http_client_, url, verbose);
if (!err.IsOk()) {
std::cerr << "error: unable to create client for inference: " << err << std::endl;
exit(1);
}
ModelInfo model_info;
std::string model_metadata;
err = triton_client.http_client_->ModelMetadata(&model_metadata, model_name, model_version, http_headers);
if (!err.IsOk()) {
std::cerr << "error: failed to get model metadata: " << err << std::endl;
}
rapidjson::Document model_metadata_json;
err = tc::ParseJson(&model_metadata_json, model_metadata);
if (!err.IsOk()) {
std::cerr << "error: failed to parse model metadata: " << err
<< std::endl;
}
std::string model_config;
err = triton_client.http_client_->ModelConfig(&model_config, model_name, model_version, http_headers);
if (!err.IsOk()) {
std::cerr << "error: failed to get model config: " << err << std::endl;
}
rapidjson::Document model_config_json;
err = tc::ParseJson(&model_config_json, model_config);
if (!err.IsOk()) {
std::cerr << "error: failed to parse model config: " << err << std::endl;
}
ParseModelHttp( model_metadata_json, model_config_json, batch_size, &model_info);
std::vector<std::string> image_filenames;
struct stat name_stat;
if (stat(fileName.c_str(), &name_stat) != 0) {
std::cerr << "Failed to find '" << fileName << "': " << strerror(errno) << std::endl;
exit(1);
}
if (name_stat.st_mode & S_IFDIR) {
const std::string dirname = fileName;
DIR* dir_ptr = opendir(dirname.c_str());
struct dirent* d_ptr;
while ((d_ptr = readdir(dir_ptr)) != NULL) {
const std::string filename = d_ptr->d_name;
if ((filename != ".") && (filename != "..")) {
image_filenames.push_back(dirname + "/" + filename);
}
}
closedir(dir_ptr);
} else {
image_filenames.push_back(fileName);
}
std::sort(image_filenames.begin(), image_filenames.end());
std::vector<std::vector<uint8_t>> image_data;
for (const auto& fn : image_filenames) {
image_data.emplace_back();
Preprocess(fn, model_info.type1_, model_info.type3_, model_info.input_c_,
cv::Size(model_info.input_w_, model_info.input_h_), &(image_data.back()));
if ((image_data.size() == 1) && !preprocess_output_filename.empty()) {
std::ofstream output_file(preprocess_output_filename);
std::ostream_iterator<uint8_t> output_iterator(output_file);
std::copy(image_data[0].begin(), image_data[0].end(), output_iterator);
}
}
std::vector<int64_t> shape;
shape.push_back(batch_size);
shape.push_back(model_info.input_c_);
shape.push_back(model_info.input_h_);
shape.push_back(model_info.input_w_);
tc::InferInput* input;
err = tc::InferInput::Create(&input, model_info.input_name_, shape, model_info.input_datatype_);
if (!err.IsOk()) {
std::cerr << "unable to get input: " << err << std::endl;
exit(1);
}
std::shared_ptr<tc::InferInput> input_ptr(input);
tc::InferRequestedOutput* output;
err = tc::InferRequestedOutput::Create(&output, model_info.output_name_);
if (!err.IsOk()) {
std::cerr << "unable to get output: " << err << std::endl;
exit(1);
}
std::shared_ptr<tc::InferRequestedOutput> output_ptr(output);
std::vector<tc::InferInput*> inputs = {input_ptr.get()};
std::vector<const tc::InferRequestedOutput*> outputs = {output_ptr.get()};
tc::InferOptions options(model_name);
options.model_version_ = model_version;
std::vector<std::unique_ptr<tc::InferResult>> results;
std::vector<std::vector<std::string>> result_filenames;
size_t image_idx = 0;
size_t done_cnt = 0;
size_t sent_count = 0;
bool last_request = false;
std::mutex mtx;
std::condition_variable cv;
auto callback_func = [&](tc::InferResult* result)
{
{
std::lock_guard<std::mutex> lk(mtx);
results.emplace_back(result);
done_cnt++;
}
cv.notify_all();
};
while (!last_request) {
err = input_ptr->Reset();
if (!err.IsOk()) {
std::cerr << "failed resetting input: " << err << std::endl;
exit(1);
}
std::vector<std::string> input_filenames;
for (int idx = 0; idx < batch_size; ++idx) {
input_filenames.push_back(image_filenames[image_idx]);
err = input_ptr->AppendRaw(image_data[image_idx]);
if (!err.IsOk()) {
std::cerr << "failed setting input: " << err << std::endl;
exit(1);
}
image_idx = (image_idx + 1) % image_data.size();
if (image_idx == 0) {
last_request = true;
}
}
result_filenames.emplace_back(std::move(input_filenames));
options.request_id_ = std::to_string(sent_count);
double time1 = getTickCount();
tc::InferResult* result;
if (protocol == ProtocolType::HTTP) {
err = triton_client.http_client_->Infer(
&result, options, inputs, outputs, http_headers);
} else {
err = triton_client.grpc_client_->Infer(
&result, options, inputs, outputs, http_headers);
}
if (!err.IsOk()) {
std::cerr << "failed sending synchronous infer request: " << err
<< std::endl;
exit(1);
}
results.emplace_back(result);
double time2 = getTickCount();
double elapsedTime = (time2 - time1)*1000 / getTickFrequency();
fprintf(stdout, "inference time:%f ms\n", elapsedTime);
sent_count++;
}
for (size_t idx = 0; idx < results.size(); idx++) {
std::cout << "Request " << idx << ", batch size " << batch_size << std::endl;
Postprocess(
std::move(results[idx]), result_filenames[idx], batch_size,
model_info.output_name_, model_info.max_batch_size_ != 0);
}
return 0;
}
# Copyright (c) 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
cmake_minimum_required (VERSION 3.18)
find_package(Threads REQUIRED)
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS TRUE)
#
# common
#
add_library(
client-common-library INTERFACE
)
target_include_directories(
client-common-library
INTERFACE
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
)
#
# json_utils
#
if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_PERF_ANALYZER OR TRITON_ENABLE_EXAMPLES)
find_package(RapidJSON CONFIG REQUIRED)
add_library(
json-utils-library EXCLUDE_FROM_ALL OBJECT
json_utils.h json_utils.cc
)
target_include_directories(
json-utils-library
PUBLIC ${RapidJSON_INCLUDE_DIRS}
)
target_link_libraries(
json-utils-library
PRIVATE
client-common-library
)
endif()
#
# shm_utils
#
add_library(
shm-utils-library EXCLUDE_FROM_ALL OBJECT
shm_utils.h shm_utils.cc
)
target_link_libraries(
shm-utils-library
PRIVATE
client-common-library
)
if(TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER)
#
# libgrpcclient.so and libgrpcclient_static.a
#
configure_file(libgrpcclient.ldscript libgrpcclient.ldscript COPYONLY)
# libgrpcclient object build
set(
REQUEST_SRCS
grpc_client.cc common.cc
)
set(
REQUEST_HDRS
grpc_client.h common.h ipc.h
)
add_library(
grpc-client-library EXCLUDE_FROM_ALL OBJECT
${REQUEST_SRCS} ${REQUEST_HDRS}
)
add_dependencies(
grpc-client-library
grpc-service-library proto-library
)
# libgrpcclient_static.a
add_library(
grpcclient_static STATIC
$<TARGET_OBJECTS:grpc-client-library>
$<TARGET_OBJECTS:grpc-service-library>
$<TARGET_OBJECTS:proto-library>
)
add_library(
TritonClient::grpcclient_static ALIAS grpcclient_static
)
target_include_directories(
grpcclient_static
PUBLIC
$<INSTALL_INTERFACE:include>
$<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
)
target_link_libraries(
grpcclient_static
PRIVATE gRPC::grpc++
PRIVATE gRPC::grpc
PUBLIC protobuf::libprotobuf
PUBLIC Threads::Threads
)
# libgrpcclient.so
add_library(
grpcclient SHARED
$<TARGET_OBJECTS:grpc-service-library>
$<TARGET_OBJECTS:proto-library>
$<TARGET_OBJECTS:grpc-client-library>
)
add_library(
TritonClient::grpcclient ALIAS grpcclient
)
if (NOT WIN32 AND NOT TRITON_KEEP_TYPEINFO)
set_target_properties(
grpcclient
PROPERTIES
LINK_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libgrpcclient.ldscript
LINK_FLAGS "-Wl,--version-script=libgrpcclient.ldscript"
)
endif() # NOT WIN32 AND NOT TRITON_KEEP_TYPEINFO
target_link_libraries(
grpcclient
PRIVATE gRPC::grpc++
PRIVATE gRPC::grpc
PUBLIC protobuf::libprotobuf
PUBLIC Threads::Threads
)
foreach(_client_target grpc-client-library grpcclient_static grpcclient)
target_compile_features(${_client_target} PRIVATE cxx_std_11)
target_compile_options(
${_client_target} PRIVATE
$<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
-Wall -Wextra -Wno-unused-parameter -Werror>
$<$<CXX_COMPILER_ID:MSVC>:/W0 /D_WIN32_WINNT=0x0A00 /EHsc>
)
set_target_properties(
${_client_target}
PROPERTIES
POSITION_INDEPENDENT_CODE ON
)
target_include_directories(
${_client_target}
PUBLIC
$<INSTALL_INTERFACE:include>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
$<TARGET_PROPERTY:proto-library,INCLUDE_DIRECTORIES>
$<TARGET_PROPERTY:grpc-service-library,INCLUDE_DIRECTORIES>
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}
)
if(TRITON_ENABLE_GPU)
target_compile_definitions(
${_client_target}
PUBLIC TRITON_ENABLE_GPU=1
)
endif() # TRITON_ENABLE_GPU
if(TRITON_ENABLE_GPU)
target_link_libraries(
${_client_target}
PUBLIC CUDA::cudart
)
endif() # TRITON_ENABLE_GPU
endforeach()
if (TRITON_USE_THIRD_PARTY)
if (NOT WIN32)
install(
DIRECTORY
${CMAKE_CURRENT_BINARY_DIR}/../../third-party/curl/lib/
${CMAKE_CURRENT_BINARY_DIR}/../../third-party/grpc/lib/
${CMAKE_CURRENT_BINARY_DIR}/../../third-party/protobuf/lib/
${CMAKE_CURRENT_BINARY_DIR}/../../third-party/c-ares/lib/
DESTINATION ${CMAKE_INSTALL_LIBDIR}
FILES_MATCHING
PATTERN "*\.a"
PATTERN "CMakeFiles" EXCLUDE
PATTERN "cmake" EXCLUDE
PATTERN "gens" EXCLUDE
PATTERN "libs" EXCLUDE
PATTERN "third_party" EXCLUDE
)
else()
install(
DIRECTORY
${CMAKE_CURRENT_BINARY_DIR}/../../third-party/curl/lib/
${CMAKE_CURRENT_BINARY_DIR}/../../third-party/grpc/lib/
${CMAKE_CURRENT_BINARY_DIR}/../../third-party/protobuf/lib/
${CMAKE_CURRENT_BINARY_DIR}/../../third-party/c-ares/lib/
DESTINATION ${CMAKE_INSTALL_LIBDIR}
FILES_MATCHING
PATTERN "*\.lib"
PATTERN "CMakeFiles" EXCLUDE
PATTERN "cmake" EXCLUDE
PATTERN "gens" EXCLUDE
PATTERN "libs" EXCLUDE
PATTERN "third_party" EXCLUDE
)
endif() # NOT WIN32
install(
DIRECTORY
${CMAKE_CURRENT_BINARY_DIR}/../../third-party/grpc/include/
${CMAKE_CURRENT_BINARY_DIR}/../../third-party/protobuf/include/
DESTINATION include
)
endif()
install(
FILES
${CMAKE_CURRENT_SOURCE_DIR}/grpc_client.h
DESTINATION include
)
endif() # TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER
if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_PERF_ANALYZER)
if(${TRITON_ENABLE_ZLIB})
find_package(ZLIB REQUIRED)
endif() # TRITON_ENABLE_ZLIB
#
# libhttpclient.so and libhttpclient_static.a
#
configure_file(libhttpclient.ldscript libhttpclient.ldscript COPYONLY)
# libhttpclient object build
set(
REQUEST_SRCS
http_client.cc common.cc cencode.c
)
set(
REQUEST_HDRS
http_client.h common.h ipc.h cencode.h
)
add_library(
http-client-library EXCLUDE_FROM_ALL OBJECT
${REQUEST_SRCS} ${REQUEST_HDRS}
)
if (NOT WIN32)
set_property(
SOURCE cencode.c
PROPERTY COMPILE_FLAGS -Wno-implicit-fallthrough
)
endif() # NOT WIN32
target_link_libraries(
http-client-library
PUBLIC
triton-common-json # from repo-common
)
# libhttpclient_static.a
add_library(
httpclient_static STATIC
$<TARGET_OBJECTS:http-client-library>
)
add_library(
TritonClient::httpclient_static ALIAS httpclient_static
)
target_link_libraries(
httpclient_static
PRIVATE triton-common-json
PUBLIC CURL::libcurl
PUBLIC Threads::Threads
)
if(${TRITON_ENABLE_ZLIB})
target_link_libraries(
httpclient_static
PRIVATE ZLIB::ZLIB
)
endif() # TRITON_ENABLE_ZLIB
# libhttpclient.so
add_library(
httpclient SHARED
$<TARGET_OBJECTS:http-client-library>
)
add_library(
TritonClient::httpclient ALIAS httpclient
)
if (NOT WIN32)
set_target_properties(
httpclient
PROPERTIES
LINK_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libhttpclient.ldscript
LINK_FLAGS "-Wl,--version-script=libhttpclient.ldscript"
)
endif() # NOT WIN32
target_link_libraries(
httpclient
PRIVATE triton-common-json
PUBLIC CURL::libcurl
PUBLIC Threads::Threads
)
foreach(_client_target http-client-library httpclient_static httpclient)
target_compile_features(${_client_target} PRIVATE cxx_std_11)
target_compile_options(
${_client_target} PRIVATE
$<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
-Wall -Wextra -Wno-unused-parameter -Werror>
$<$<CXX_COMPILER_ID:MSVC>:/W0 /D_WIN32_WINNT=0x0A00 /EHsc>
)
set_target_properties(
${_client_target}
PROPERTIES
POSITION_INDEPENDENT_CODE ON
)
target_include_directories(
${_client_target}
PUBLIC
$<INSTALL_INTERFACE:include>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
$<TARGET_PROPERTY:CURL::libcurl,INTERFACE_INCLUDE_DIRECTORIES>
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}
)
target_compile_definitions(
${_client_target}
PRIVATE CURL_STATICLIB=1
)
if(TRITON_ENABLE_GPU)
target_compile_definitions(
${_client_target}
PUBLIC TRITON_ENABLE_GPU=1
)
endif() # TRITON_ENABLE_GPU
if(TRITON_ENABLE_GPU)
target_link_libraries(
${_client_target}
PUBLIC CUDA::cudart
)
endif() # TRITON_ENABLE_GPU
if(${TRITON_ENABLE_ZLIB})
target_compile_definitions(
${_client_target}
PUBLIC TRITON_ENABLE_ZLIB=1
)
endif() # TRITON_ENABLE_ZLIB
endforeach()
install(
FILES
${CMAKE_CURRENT_SOURCE_DIR}/http_client.h
DESTINATION include
)
endif() # TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_PERF_ANALYZER
if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER)
install(
FILES
${CMAKE_CURRENT_SOURCE_DIR}/common.h
${CMAKE_CURRENT_SOURCE_DIR}/ipc.h
DESTINATION include
)
include(GNUInstallDirs)
set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/TritonClient)
if(TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER)
install(
TARGETS
grpcclient
grpcclient_static
EXPORT
triton-client-targets
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
)
endif() # TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER
if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_PERF_ANALYZER)
install(
TARGETS
httpclient
httpclient_static
EXPORT
triton-client-targets
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
)
endif() # TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_PERF_ANALYZER
install(
EXPORT
triton-client-targets
FILE
TritonClientTargets.cmake
NAMESPACE
TritonClient::
DESTINATION
${INSTALL_CONFIGDIR}
)
include(CMakePackageConfigHelpers)
configure_package_config_file(
${CMAKE_CURRENT_LIST_DIR}/cmake/TritonClientConfig.cmake.in
${CMAKE_CURRENT_BINARY_DIR}/TritonClientConfig.cmake
INSTALL_DESTINATION ${INSTALL_CONFIGDIR}
)
install(
FILES
${CMAKE_CURRENT_BINARY_DIR}/TritonClientConfig.cmake
DESTINATION
${INSTALL_CONFIGDIR}
)
#
# Export from build tree
#
export(
EXPORT
triton-client-targets
FILE
${CMAKE_CURRENT_BINARY_DIR}/TritonClientTargets.cmake
NAMESPACE
TritonClient::
)
export(PACKAGE TritonClient)
endif() # TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER
/*
cencoder.c - c source to a base64 encoding algorithm implementation
This is part of the libb64 project, and has been placed in the public domain.
For details, see http://sourceforge.net/projects/libb64
*/
#include "cencode.h"
const int CHARS_PER_LINE = 72;
void
base64_init_encodestate(base64_encodestate* state_in)
{
state_in->step = step_A;
state_in->result = 0;
state_in->stepcount = 0;
}
char
base64_encode_value(char value_in)
{
static const char* encoding =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
if (value_in > 63)
return '=';
return encoding[(int)value_in];
}
int
base64_encode_block(
const char* plaintext_in, int length_in, char* code_out,
base64_encodestate* state_in)
{
const char* plainchar = plaintext_in;
const char* const plaintextend = plaintext_in + length_in;
char* codechar = code_out;
char result;
char fragment;
result = state_in->result;
switch (state_in->step) {
while (1) {
case step_A:
if (plainchar == plaintextend) {
state_in->result = result;
state_in->step = step_A;
return codechar - code_out;
}
fragment = *plainchar++;
result = (fragment & 0x0fc) >> 2;
*codechar++ = base64_encode_value(result);
result = (fragment & 0x003) << 4;
case step_B:
if (plainchar == plaintextend) {
state_in->result = result;
state_in->step = step_B;
return codechar - code_out;
}
fragment = *plainchar++;
result |= (fragment & 0x0f0) >> 4;
*codechar++ = base64_encode_value(result);
result = (fragment & 0x00f) << 2;
case step_C:
if (plainchar == plaintextend) {
state_in->result = result;
state_in->step = step_C;
return codechar - code_out;
}
fragment = *plainchar++;
result |= (fragment & 0x0c0) >> 6;
*codechar++ = base64_encode_value(result);
result = (fragment & 0x03f) >> 0;
*codechar++ = base64_encode_value(result);
++(state_in->stepcount);
if (state_in->stepcount == CHARS_PER_LINE / 4) {
*codechar++ = '\n';
state_in->stepcount = 0;
}
}
}
/* control should not reach here */
return codechar - code_out;
}
int
base64_encode_blockend(char* code_out, base64_encodestate* state_in)
{
char* codechar = code_out;
switch (state_in->step) {
case step_B:
*codechar++ = base64_encode_value(state_in->result);
*codechar++ = '=';
*codechar++ = '=';
break;
case step_C:
*codechar++ = base64_encode_value(state_in->result);
*codechar++ = '=';
break;
case step_A:
break;
}
*codechar++ = '\n';
return codechar - code_out;
}
/*
cencode.h - c header for a base64 encoding algorithm
This is part of the libb64 project, and has been placed in the public domain.
For details, see http://sourceforge.net/projects/libb64
*/
#ifndef BASE64_CENCODE_H
#define BASE64_CENCODE_H
typedef enum { step_A, step_B, step_C } base64_encodestep;
typedef struct {
base64_encodestep step;
char result;
int stepcount;
} base64_encodestate;
void base64_init_encodestate(base64_encodestate* state_in);
char base64_encode_value(char value_in);
int base64_encode_block(
const char* plaintext_in, int length_in, char* code_out,
base64_encodestate* state_in);
int base64_encode_blockend(char* code_out, base64_encodestate* state_in);
#endif /* BASE64_CENCODE_H */
# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# specific version required for protobuf
if(NOT PROTOBUF_FOUND AND NOT Protobuf_FOUND)
set(protobuf_MODULE_COMPATIBLE TRUE CACHE BOOL "protobuf_MODULE_COMPATIBLE" FORCE)
find_package(Protobuf @Protobuf_VERSION@ CONFIG REQUIRED)
endif()
# specific version required for grpc
if(NOT GRPC_FOUND AND NOT gRPC_FOUND)
find_package(gRPC @gRPC_VERSION@ CONFIG REQUIRED)
endif()
if(NOT CURL_FOUND)
if(${TRITON_CURL_WITHOUT_CONFIG})
find_package(CURL REQUIRED)
else()
find_package(CURL CONFIG REQUIRED)
endif() # TRITON_CURL_WITHOUT_CONFIG
endif()
if(NOT Threads_FOUND)
find_package(Threads REQUIRED)
endif()
if (NOT (TARGET TRITON::grpcclient OR TARGET TRITON::httpclient))
include ("${CMAKE_CURRENT_LIST_DIR}/TRITONTargets.cmake")
endif ()
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
include(CMakeFindDependencyMacro)
get_filename_component(
TRITONCLIENT_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH
)
list(APPEND CMAKE_MODULE_PATH ${TRITONCLIENT_CMAKE_DIR})
if(NOT TARGET TritonClient::triton-client)
include("${TRITONCLIENT_CMAKE_DIR}/TritonClientTargets.cmake")
endif()
// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common.h"
namespace triton { namespace client {
//==============================================================================
const Error Error::Success("");
Error::Error(const std::string& msg) : msg_(msg) {}
std::ostream&
operator<<(std::ostream& out, const Error& err)
{
if (!err.msg_.empty()) {
out << err.msg_;
}
return out;
}
//==============================================================================
Error
InferenceServerClient::ClientInferStat(InferStat* infer_stat) const
{
*infer_stat = infer_stat_;
return Error::Success;
}
Error
InferenceServerClient::UpdateInferStat(const RequestTimers& timer)
{
const uint64_t request_time_ns = timer.Duration(
RequestTimers::Kind::REQUEST_START, RequestTimers::Kind::REQUEST_END);
const uint64_t send_time_ns = timer.Duration(
RequestTimers::Kind::SEND_START, RequestTimers::Kind::SEND_END);
const uint64_t recv_time_ns = timer.Duration(
RequestTimers::Kind::RECV_START, RequestTimers::Kind::RECV_END);
if ((request_time_ns == std::numeric_limits<uint64_t>::max()) ||
(send_time_ns == std::numeric_limits<uint64_t>::max()) ||
(recv_time_ns == std::numeric_limits<uint64_t>::max())) {
return Error(
"Timer not set correctly." +
((timer.Timestamp(RequestTimers::Kind::REQUEST_START) >
timer.Timestamp(RequestTimers::Kind::REQUEST_END))
? (" Request time from " +
std::to_string(
timer.Timestamp(RequestTimers::Kind::REQUEST_START)) +
" to " +
std::to_string(
timer.Timestamp(RequestTimers::Kind::REQUEST_END)) +
".")
: "") +
((timer.Timestamp(RequestTimers::Kind::SEND_START) >
timer.Timestamp(RequestTimers::Kind::SEND_END))
? (" Send time from " +
std::to_string(
timer.Timestamp(RequestTimers::Kind::SEND_START)) +
" to " +
std::to_string(timer.Timestamp(RequestTimers::Kind::SEND_END)) +
".")
: "") +
((timer.Timestamp(RequestTimers::Kind::RECV_START) >
timer.Timestamp(RequestTimers::Kind::RECV_END))
? (" Receive time from " +
std::to_string(
timer.Timestamp(RequestTimers::Kind::RECV_START)) +
" to " +
std::to_string(timer.Timestamp(RequestTimers::Kind::RECV_END)) +
".")
: ""));
}
infer_stat_.completed_request_count++;
infer_stat_.cumulative_total_request_time_ns += request_time_ns;
infer_stat_.cumulative_send_time_ns += send_time_ns;
infer_stat_.cumulative_receive_time_ns += recv_time_ns;
return Error::Success;
}
//==============================================================================
Error
InferInput::Create(
InferInput** infer_input, const std::string& name,
const std::vector<int64_t>& dims, const std::string& datatype)
{
*infer_input = new InferInput(name, dims, datatype);
return Error::Success;
}
Error
InferInput::SetShape(const std::vector<int64_t>& shape)
{
shape_ = shape;
return Error::Success;
}
Error
InferInput::Reset()
{
bufs_.clear();
buf_byte_sizes_.clear();
str_bufs_.clear();
bufs_idx_ = 0;
byte_size_ = 0;
io_type_ = NONE;
return Error::Success;
}
Error
InferInput::AppendRaw(const std::vector<uint8_t>& input)
{
return AppendRaw(&input[0], input.size());
}
Error
InferInput::AppendRaw(const uint8_t* input, size_t input_byte_size)
{
byte_size_ += input_byte_size;
bufs_.push_back(input);
buf_byte_sizes_.push_back(input_byte_size);
io_type_ = RAW;
return Error::Success;
}
Error
InferInput::SetSharedMemory(
const std::string& name, size_t byte_size, size_t offset)
{
shm_name_ = name;
shm_offset_ = offset;
byte_size_ = byte_size;
io_type_ = SHARED_MEMORY;
return Error::Success;
}
Error
InferInput::AppendFromString(const std::vector<std::string>& input)
{
// Serialize the strings into a "raw" buffer. The first 4-bytes are
// the length of the string length. Next are the actual string
// characters. There is *not* a null-terminator on the string.
str_bufs_.emplace_back();
std::string& sbuf = str_bufs_.back();
for (const auto& str : input) {
uint32_t len = str.size();
sbuf.append(reinterpret_cast<const char*>(&len), sizeof(uint32_t));
sbuf.append(str);
}
return AppendRaw(reinterpret_cast<const uint8_t*>(&sbuf[0]), sbuf.size());
}
Error
InferInput::ByteSize(size_t* byte_size) const
{
*byte_size = byte_size_;
return Error::Success;
}
InferInput::InferInput(
const std::string& name, const std::vector<int64_t>& shape,
const std::string& datatype)
: name_(name), shape_(shape), datatype_(datatype), byte_size_(0),
bufs_idx_(0), buf_pos_(0), io_type_(NONE), shm_name_(""), shm_offset_(0)
{
}
Error
InferInput::SharedMemoryInfo(
std::string* name, size_t* byte_size, size_t* offset) const
{
if (io_type_ != SHARED_MEMORY) {
return Error("The input has not been set with the shared memory.");
}
*name = shm_name_;
*offset = shm_offset_;
*byte_size = byte_size_;
return Error::Success;
}
Error
InferInput::SetBinaryData(const bool binary_data)
{
binary_data_ = binary_data;
return Error::Success;
}
Error
InferInput::PrepareForRequest()
{
// Reset position so request sends entire input.
bufs_idx_ = 0;
buf_pos_ = 0;
return Error::Success;
}
Error
InferInput::GetNext(
uint8_t* buf, size_t size, size_t* input_bytes, bool* end_of_input)
{
size_t total_size = 0;
while ((bufs_idx_ < bufs_.size()) && (size > 0)) {
const size_t buf_byte_size = buf_byte_sizes_[bufs_idx_];
const size_t csz = (std::min)(buf_byte_size - buf_pos_, size);
if (csz > 0) {
const uint8_t* input_ptr = bufs_[bufs_idx_] + buf_pos_;
std::copy(input_ptr, input_ptr + csz, buf);
buf_pos_ += csz;
buf += csz;
size -= csz;
total_size += csz;
}
if (buf_pos_ == buf_byte_size) {
bufs_idx_++;
buf_pos_ = 0;
}
}
*input_bytes = total_size;
*end_of_input = (bufs_idx_ >= bufs_.size());
return Error::Success;
}
Error
InferInput::GetNext(
const uint8_t** buf, size_t* input_bytes, bool* end_of_input)
{
if (bufs_idx_ < bufs_.size()) {
*buf = bufs_[bufs_idx_];
*input_bytes = buf_byte_sizes_[bufs_idx_];
bufs_idx_++;
} else {
*buf = nullptr;
*input_bytes = 0;
}
*end_of_input = (bufs_idx_ >= bufs_.size());
return Error::Success;
}
//==============================================================================
Error
InferRequestedOutput::Create(
InferRequestedOutput** infer_output, const std::string& name,
const size_t class_count)
{
*infer_output = new InferRequestedOutput(name, class_count);
return Error::Success;
}
Error
InferRequestedOutput::SetSharedMemory(
const std::string& region_name, const size_t byte_size, const size_t offset)
{
shm_name_ = region_name;
shm_byte_size_ = byte_size;
shm_offset_ = offset;
io_type_ = SHARED_MEMORY;
return Error::Success;
}
Error
InferRequestedOutput::UnsetSharedMemory()
{
shm_name_ = "";
shm_byte_size_ = 0;
shm_offset_ = 0;
io_type_ = NONE;
return Error::Success;
}
InferRequestedOutput::InferRequestedOutput(
const std::string& name, const size_t class_count)
: name_(name), class_count_(class_count), io_type_(NONE)
{
}
Error
InferRequestedOutput::SharedMemoryInfo(
std::string* name, size_t* byte_size, size_t* offset) const
{
if (io_type_ != SHARED_MEMORY) {
return Error("The input has not been set with the shared memory.");
}
*name = shm_name_;
*byte_size = shm_byte_size_;
*offset = shm_offset_;
return Error::Success;
}
Error
InferRequestedOutput::SetBinaryData(const bool binary_data)
{
binary_data_ = binary_data;
return Error::Success;
}
//==============================================================================
}} // namespace triton::client
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment