Commit 0a21fff9 authored by xiabo's avatar xiabo
Browse files

Adapt to 0.1.0

parent 9484fd1c
// Copyright 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#ifdef TRITON_ENABLE_NVTX
#include <nvtx3/nvToolsExt.h>
namespace triton { namespace common {
// Updates a server stat with duration measured by a C++ scope.
class NvtxRange {
public:
explicit NvtxRange(const char* label) { nvtxRangePushA(label); }
explicit NvtxRange(const std::string& label) : NvtxRange(label.c_str()) {}
~NvtxRange() { nvtxRangePop(); }
};
}} // namespace triton::common
#endif // TRITON_ENABLE_NVTX
//
// Macros to access NVTX functionality
//
#ifdef TRITON_ENABLE_NVTX
#define NVTX_INITIALIZE nvtxInitialize(nullptr)
#define NVTX_RANGE(V, L) triton::common::NvtxRange V(L)
#define NVTX_MARKER(L) nvtxMarkA(L)
#else
#define NVTX_INITIALIZE
#define NVTX_RANGE(V, L)
#define NVTX_MARKER(L)
#endif // TRITON_ENABLE_NVTX
// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <condition_variable>
#include <deque>
#include <mutex>
namespace triton { namespace common {
//
// C++11 doesn't have a sync queue so we implement a simple one.
//
template <typename Item>
class SyncQueue {
public:
SyncQueue() {}
bool Empty()
{
std::lock_guard<std::mutex> lk(mu_);
return queue_.empty();
}
Item Get()
{
std::unique_lock<std::mutex> lk(mu_);
if (queue_.empty()) {
cv_.wait(lk, [this] { return !queue_.empty(); });
}
auto res = std::move(queue_.front());
queue_.pop_front();
return res;
}
void Put(const Item& value)
{
{
std::lock_guard<std::mutex> lk(mu_);
queue_.push_back(value);
}
cv_.notify_all();
}
void Put(Item&& value)
{
{
std::lock_guard<std::mutex> lk(mu_);
queue_.push_back(std::move(value));
}
cv_.notify_all();
}
private:
std::mutex mu_;
std::condition_variable cv_;
std::deque<Item> queue_;
};
}} // namespace triton::common
// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <memory>
#include <sstream>
#include <string>
#include <vector>
namespace triton { namespace common {
//
// An ASCII table printer.
//
class TablePrinter {
public:
// Insert a row at the end of the table
void InsertRow(const std::vector<std::string>& row);
// Print the table
std::string PrintTable();
// TablePrinter will take the ownership of `headers`.
TablePrinter(const std::vector<std::string>& headers);
private:
// Update the `shares_` such that all the excess
// amount of space not used a column is fairly allocated
// to the other columns
void FairShare();
// Append a row to `table`. This function handles the cases where a wrapping
// occurs.
void AddRow(std::stringstream& table, size_t row_index);
// Add a row divider
void AddRowDivider(std::stringstream& table);
// Max row width
std::vector<size_t> max_widths_;
// Max row height
std::vector<size_t> max_heights_;
// A vector of vectors of vectors containing data items for every column
// The record is stored in a vector of string, where each of the vector items
// contains a single line from the record. For example, ["Item 1", "Item 2",
// "Item 3\n Item 3 line 2"] will be stored as [["Item 1"], ["Item 2"], ["Item
// 3", "Item 3 line 2"]]
std::vector<std::vector<std::vector<std::string>>> data_;
// Fair share of every column
std::vector<float> shares_;
};
}} // namespace triton::common
// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <condition_variable>
#include <functional>
#include <queue>
#include <thread>
namespace triton { namespace common {
// Generic fixed-size Thread Pool to execute tasks asynchronously
class ThreadPool {
public:
explicit ThreadPool(std::size_t thread_count);
~ThreadPool();
ThreadPool(const ThreadPool&) = delete;
ThreadPool& operator=(const ThreadPool&) = delete;
using Task = std::function<void(void)>;
// Assigns "task" to the task queue for a worker thread to execute when
// available. This will not track the return value of the task.
void Enqueue(Task&& task);
// Returns the number of threads in thread pool
size_t Size() { return workers_.size(); }
private:
std::queue<Task> task_queue_;
std::mutex queue_mtx_;
std::condition_variable cv_;
std::vector<std::thread> workers_;
// If true, tells pool to stop accepting work and tells awake worker threads
// to exit when no tasks are left on the queue.
bool stop_ = false;
};
}} // namespace triton::common
// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#ifdef _WIN32
// Remove GetObject definition from windows.h, which prevents calls to
// RapidJSON's GetObject.
// https://github.com/Tencent/rapidjson/issues/1448
#undef GetObject
#include <rapidjson/document.h>
#else
// Disable class-memaccess warning to facilitate compilation with gcc>7
// https://github.com/Tencent/rapidjson/issues/1700
#pragma GCC diagnostic push
#if defined(__GNUC__) && __GNUC__ >= 8
#pragma GCC diagnostic ignored "-Wclass-memaccess"
#endif
#include <rapidjson/document.h>
#pragma GCC diagnostic pop
#endif // _WIN32
#include <rapidjson/allocators.h> // CrtAllocator (default) for Writer instantiation
#include <rapidjson/encodings.h> // UTF8 (default) for Writer instantiation
#include <rapidjson/error/en.h>
#include <rapidjson/prettywriter.h>
#include <rapidjson/rapidjson.h>
#include <rapidjson/stringbuffer.h>
#include <rapidjson/writer.h>
#include <string>
#include <vector>
// This header can be used both within Triton server and externally
// (i.e. in source that interacts only via TRITONSERVER or
// TRITONBACKEND API). Status is handled differently in these cases so
// the following macros must be defined before including this
// header. As an example the defines are shown here as returned by the
// TRITONSERVER API.
//
// #define TRITONJSON_STATUSTYPE TRITONSERVER_Error*
// #define TRITONJSON_STATUSRETURN(M)
// return TRITONSERVER_ErrorNew(TRITONSERVER_ERROR_INTERNAL, (M).c_str())
// #define TRITONJSON_STATUSSUCCESS nullptr
namespace triton { namespace common {
//
// A JSON parser/writer. Currently based on rapidjson but the intent
// is to provide an abstraction for JSON functions that make it easy
// to substitute a different JSON parser. Specifically for rapidjson
// the class is also designed to provide safe access and error
// reporting to avoid the cases where rapidjson would just abort the
// entire application (!).
//
class TritonJson {
public:
class Value;
enum class ValueType {
OBJECT = rapidjson::kObjectType,
ARRAY = rapidjson::kArrayType,
};
//
// Buffer used when writing JSON representation.
//
class WriteBuffer {
public:
// Get buffer base address.
const char* Base() const { return buffer_.c_str(); }
// Get a reference to the buffer itself. Useful to efficiently
// move the contents out of the buffer.
std::string& MutableContents() { return buffer_; }
// Immutable contents.
const std::string& Contents() const { return buffer_; }
// Interface required by rapidjson::Writer
typedef char Ch;
void Put(char c) { buffer_.push_back(c); }
void Clear() { buffer_.clear(); }
void Flush() { return; }
size_t Size() const { return buffer_.size(); }
private:
std::string buffer_;
};
//
// Value representing the entire document or an element within a
// document.
//
class Value {
public:
// Empty value. Will become a top-level Document value if
// initialized by parsing or a non-top-level value if initialized
// any other way.
explicit Value() : value_(nullptr), allocator_(nullptr) {}
// Construct a top-level JSON document.
explicit Value(const ValueType type)
: document_(static_cast<rapidjson::Type>(type)), value_(nullptr),
allocator_(&document_.GetAllocator())
{
}
// Construct a non-top-level JSON value in a 'document'.
explicit Value(TritonJson::Value& document, const ValueType type)
{
allocator_ = &document.document_.GetAllocator();
value_ = new (allocator_->Malloc(sizeof(rapidjson::Value)))
rapidjson::Value(static_cast<rapidjson::Type>(type));
}
// Move constructor.
explicit Value(Value&& other) { *this = std::move(other); }
// Move assignment operator.
Value& operator=(Value&& other)
{
document_ = std::move(other.document_);
value_ = other.value_;
allocator_ = other.allocator_;
other.value_ = nullptr;
other.allocator_ = nullptr;
return *this;
}
// Parse JSON into document. Can only be called on top-level
// document value, otherwise error is returned.
TRITONJSON_STATUSTYPE Parse(const char* base, const size_t size)
{
if (value_ != nullptr) {
TRITONJSON_STATUSRETURN(
std::string("JSON parsing only available for top-level document"));
}
const unsigned int parseFlags = rapidjson::kParseNanAndInfFlag;
document_.Parse<parseFlags>(base, size);
if (document_.HasParseError()) {
TRITONJSON_STATUSRETURN(std::string(
"failed to parse the request JSON buffer: " +
std::string(GetParseError_En(document_.GetParseError())) + " at " +
std::to_string(document_.GetErrorOffset())));
}
allocator_ = &document_.GetAllocator();
return TRITONJSON_STATUSSUCCESS;
}
// \see Parse(const char* base, const size_t size)
TRITONJSON_STATUSTYPE Parse(const std::string& json)
{
return Parse(json.data(), json.size());
}
// Write JSON representation into a 'buffer' in a compact
// format. Can only be called for a top-level document value,
// otherwise error is returned.
TRITONJSON_STATUSTYPE Write(WriteBuffer* buffer) const
{
if (value_ != nullptr) {
TRITONJSON_STATUSRETURN(
std::string("JSON writing only available for top-level document"));
}
const unsigned int writeFlags = rapidjson::kWriteNanAndInfFlag;
// Provide default template arguments to pass writeFlags
rapidjson::Writer<
WriteBuffer, rapidjson::UTF8<>, rapidjson::UTF8<>,
rapidjson::CrtAllocator, writeFlags>
writer(*buffer);
if (!document_.Accept(writer)) {
TRITONJSON_STATUSRETURN(
std::string("Failed to accept document, invalid JSON."));
}
return TRITONJSON_STATUSSUCCESS;
}
// Write JSON representation into a 'buffer' in an easy-to-read
// format. Can only be called for a top-level document value,
// otherwise error is returned.
TRITONJSON_STATUSTYPE PrettyWrite(WriteBuffer* buffer) const
{
if (value_ != nullptr) {
TRITONJSON_STATUSRETURN(
std::string("JSON writing only available for top-level document"));
}
// Can't pass writeFlags with latest release v1.1.0 of rapidjson-dev.
// We would need to build rapidjson from source to capture latest fixes.
// See this issue:
// https://github.com/Tencent/rapidjson/issues/905#issuecomment-370981353
// PrettyWrite is only used for displaying model configs currently, so
// this should not be an issue.
rapidjson::PrettyWriter<WriteBuffer> writer(*buffer);
if (!document_.Accept(writer)) {
TRITONJSON_STATUSRETURN(
std::string("Failed to accept document, invalid JSON."));
}
return TRITONJSON_STATUSSUCCESS;
}
// Swap a value with another.
TRITONJSON_STATUSTYPE Swap(TritonJson::Value& other)
{
rapidjson::Value& value = AsMutableValue();
value.Swap(other.AsMutableValue());
return TRITONJSON_STATUSSUCCESS;
}
// FIXME Should have Set* for all types.
// Set/overwrite a signed integer in a value. This changes the
// type of the value to signed int.
TRITONJSON_STATUSTYPE SetInt(const int64_t value)
{
rapidjson::Value& v = AsMutableValue();
v.SetInt64(value);
return TRITONJSON_STATUSSUCCESS;
}
// Set/overwrite a string in a value. This changes the
// type of the value to string
TRITONJSON_STATUSTYPE SetString(const std::string& value)
{
rapidjson::Value& v = AsMutableValue();
v.SetString(value.c_str(), value.length(), *allocator_);
return TRITONJSON_STATUSSUCCESS;
}
// Set/overwrite a string member with provided name and value in this object
TRITONJSON_STATUSTYPE SetStringObject(
const char* name, const std::string& value)
{
rapidjson::Value& object = AsMutableValue();
if (!object.IsObject()) {
TRITONJSON_STATUSRETURN(
std::string("attempt to add/replace JSON member '") + name +
"' to non-object");
}
auto itr = object.FindMember(name);
if (itr == object.MemberEnd()) {
AddString(name, value);
} else {
object.RemoveMember(itr);
object.AddMember(
rapidjson::Value(rapidjson::StringRef(name)).Move(),
rapidjson::Value(value.c_str(), value.size(), *allocator_),
*allocator_);
}
return TRITONJSON_STATUSSUCCESS;
}
// Add an array or object as a new member to this value. 'value'
// is moved into this value and so on return 'value' should not be
// used. It is assumed that 'name' can be used by reference, it is
// the caller's responsibility to make sure the lifetime of 'name'
// extends at least as long as the object.
TRITONJSON_STATUSTYPE Add(const char* name, TritonJson::Value&& value)
{
rapidjson::Value& object = AsMutableValue();
if (!object.IsObject()) {
TRITONJSON_STATUSRETURN(
std::string("attempt to add JSON member '") + name +
"' to non-object");
}
if (value.value_ == nullptr) {
rapidjson::Value v2;
v2.CopyFrom(value.document_, *allocator_);
object.AddMember(
rapidjson::Value(rapidjson::StringRef(name)).Move(), v2.Move(),
*allocator_);
} else {
object.AddMember(
rapidjson::Value(rapidjson::StringRef(name)).Move(),
value.value_->Move(), *allocator_);
}
value.Release();
return TRITONJSON_STATUSSUCCESS;
}
// Add a copy of a string as a new member to this value. It is
// assumed that 'name' can be used by reference, it is the
// caller's responsibility to make sure the lifetime of 'name'
// extends at least as long as the object.
TRITONJSON_STATUSTYPE AddString(const char* name, const std::string& value)
{
rapidjson::Value& object = AsMutableValue();
if (!object.IsObject()) {
TRITONJSON_STATUSRETURN(
std::string("attempt to add JSON member '") + name +
"' to non-object");
}
object.AddMember(
rapidjson::Value(rapidjson::StringRef(name)).Move(),
rapidjson::Value(value.c_str(), value.size(), *allocator_).Move(),
*allocator_);
return TRITONJSON_STATUSSUCCESS;
}
// Add a copy of a explicit-length string as a new member to this
// value. It is assumed that 'name' can be used by reference, it
// is the caller's responsibility to make sure the lifetime of
// 'name' extends at least as long as the object.
TRITONJSON_STATUSTYPE AddString(
const char* name, const char* value, const size_t len)
{
rapidjson::Value& object = AsMutableValue();
if (!object.IsObject()) {
TRITONJSON_STATUSRETURN(
std::string("attempt to add JSON member '") + name +
"' to non-object");
}
object.AddMember(
rapidjson::Value(rapidjson::StringRef(name)).Move(),
rapidjson::Value(value, len, *allocator_).Move(), *allocator_);
return TRITONJSON_STATUSSUCCESS;
}
// Add a reference to a string as a new member to this value. It
// is assumed that 'name' and 'value' can be used by reference, it
// is the caller's responsibility to make sure the lifetime of
// 'name' and 'value' extend at least as long as the object.
TRITONJSON_STATUSTYPE AddStringRef(const char* name, const char* value)
{
rapidjson::Value& object = AsMutableValue();
if (!object.IsObject()) {
TRITONJSON_STATUSRETURN(
std::string("attempt to add JSON member '") + name +
"' to non-object");
}
object.AddMember(
rapidjson::Value(rapidjson::StringRef(name)).Move(),
rapidjson::StringRef(value), *allocator_);
return TRITONJSON_STATUSSUCCESS;
}
// Add a reference to a expicit-length string as a new member to
// this value. It is assumed that 'name' and 'value' can be used
// by reference, it is the caller's responsibility to make sure
// the lifetime of 'name' and 'value' extend at least as long as
// the object.
TRITONJSON_STATUSTYPE AddStringRef(
const char* name, const char* value, const size_t len)
{
rapidjson::Value& object = AsMutableValue();
if (!object.IsObject()) {
TRITONJSON_STATUSRETURN(
std::string("attempt to add JSON member '") + name +
"' to non-object");
}
object.AddMember(
rapidjson::Value(rapidjson::StringRef(name)).Move(),
rapidjson::StringRef(value, len), *allocator_);
return TRITONJSON_STATUSSUCCESS;
}
// Add a boolean new member to this value. It is assumed that
// 'name' can be used by reference, it is the caller's
// responsibility to make sure the lifetime of 'name' extends at
// least as long as the object.
TRITONJSON_STATUSTYPE AddBool(const char* name, const bool value)
{
rapidjson::Value& object = AsMutableValue();
if (!object.IsObject()) {
TRITONJSON_STATUSRETURN(
std::string("attempt to add JSON member '") + name +
"' to non-object");
}
object.AddMember(
rapidjson::Value(rapidjson::StringRef(name)).Move(),
rapidjson::Value(value).Move(), *allocator_);
return TRITONJSON_STATUSSUCCESS;
}
// Add a signed integer as a new member to this value. It is
// assumed that 'name' can be used by reference, it is the
// caller's responsibility to make sure the lifetime of 'name'
// extends at least as long as the object.
TRITONJSON_STATUSTYPE AddInt(const char* name, const int64_t value)
{
rapidjson::Value& object = AsMutableValue();
if (!object.IsObject()) {
TRITONJSON_STATUSRETURN(
std::string("attempt to add JSON member '") + name +
"' to non-object");
}
object.AddMember(
rapidjson::Value(rapidjson::StringRef(name)).Move(),
rapidjson::Value(value).Move(), *allocator_);
return TRITONJSON_STATUSSUCCESS;
}
// Add an unsigned integer as a new member to this value. It is
// assumed that 'name' can be used by reference, it is the
// caller's responsibility to make sure the lifetime of 'name'
// extends at least as long as the object.
TRITONJSON_STATUSTYPE AddUInt(const char* name, const uint64_t value)
{
rapidjson::Value& object = AsMutableValue();
if (!object.IsObject()) {
TRITONJSON_STATUSRETURN(
std::string("attempt to add JSON member '") + name +
"' to non-object");
}
object.AddMember(
rapidjson::Value(rapidjson::StringRef(name)).Move(),
rapidjson::Value(value).Move(), *allocator_);
return TRITONJSON_STATUSSUCCESS;
}
// Add a double as a new member to this value. It is assumed that
// 'name' can be used by reference, it is the caller's
// responsibility to make sure the lifetime of 'name' extends at
// least as long as the object.
TRITONJSON_STATUSTYPE AddDouble(const char* name, const double value)
{
rapidjson::Value& object = AsMutableValue();
if (!object.IsObject()) {
TRITONJSON_STATUSRETURN(
std::string("attempt to add JSON member '") + name +
"' to non-object");
}
object.AddMember(
rapidjson::Value(rapidjson::StringRef(name)).Move(),
rapidjson::Value(value).Move(), *allocator_);
return TRITONJSON_STATUSSUCCESS;
}
// Append an array or object to this value, which must be an
// array. 'value' is moved into this value and so on return
// 'value' should not be used.
TRITONJSON_STATUSTYPE Append(TritonJson::Value&& value)
{
rapidjson::Value& array = AsMutableValue();
if (!array.IsArray()) {
TRITONJSON_STATUSRETURN(
std::string("attempt to append JSON member to non-array"));
}
if (value.value_ == nullptr) {
rapidjson::Value v2;
v2.CopyFrom(value.document_, *allocator_);
array.PushBack(v2.Move(), *allocator_);
} else {
array.PushBack(value.value_->Move(), *allocator_);
}
value.Release();
return TRITONJSON_STATUSSUCCESS;
}
// Append a copy of a string to this value, which must be an
// array.
TRITONJSON_STATUSTYPE AppendString(const std::string& value)
{
rapidjson::Value& array = AsMutableValue();
if (!array.IsArray()) {
TRITONJSON_STATUSRETURN(
std::string("attempt to append JSON member to non-array"));
}
array.PushBack(
rapidjson::Value(value.c_str(), value.size(), *allocator_).Move(),
*allocator_);
return TRITONJSON_STATUSSUCCESS;
}
// Append a copy of an explicit-length string to this value, which
// must be an array.
TRITONJSON_STATUSTYPE AppendString(const char* value, const size_t len)
{
rapidjson::Value& array = AsMutableValue();
if (!array.IsArray()) {
TRITONJSON_STATUSRETURN(
std::string("attempt to append JSON member to non-array"));
}
array.PushBack(
rapidjson::Value(value, len, *allocator_).Move(), *allocator_);
return TRITONJSON_STATUSSUCCESS;
}
// Append a reference to a string to this value, which must be an
// array. It is assumed that 'value' can be used by reference, it
// is the caller's responsibility to make sure the lifetime of
// 'value' extends at least as long as the object.
TRITONJSON_STATUSTYPE AppendStringRef(const char* value)
{
rapidjson::Value& array = AsMutableValue();
if (!array.IsArray()) {
TRITONJSON_STATUSRETURN(
std::string("attempt to append JSON member to non-array"));
}
array.PushBack(rapidjson::StringRef(value), *allocator_);
return TRITONJSON_STATUSSUCCESS;
}
// Append a reference to a expicit-length string to this value,
// which must be an array. It is assumed that 'value' can be used
// by reference, it is the caller's responsibility to make sure
// the lifetime of 'value' extends at least as long as the object.
TRITONJSON_STATUSTYPE AppendStringRef(const char* value, const size_t len)
{
rapidjson::Value& array = AsMutableValue();
if (!array.IsArray()) {
TRITONJSON_STATUSRETURN(
std::string("attempt to append JSON member to non-array"));
}
array.PushBack(rapidjson::StringRef(value, len), *allocator_);
return TRITONJSON_STATUSSUCCESS;
}
// Append a boolean to this value, which must be an array.
TRITONJSON_STATUSTYPE AppendBool(const bool value)
{
rapidjson::Value& array = AsMutableValue();
if (!array.IsArray()) {
TRITONJSON_STATUSRETURN(
std::string("attempt to append JSON member to non-array"));
}
array.PushBack(rapidjson::Value(value).Move(), *allocator_);
return TRITONJSON_STATUSSUCCESS;
}
// Append a signed integer to this value, which must be an array.
TRITONJSON_STATUSTYPE AppendInt(const int64_t value)
{
rapidjson::Value& array = AsMutableValue();
if (!array.IsArray()) {
TRITONJSON_STATUSRETURN(
std::string("attempt to append JSON member to non-array"));
}
array.PushBack(rapidjson::Value(value).Move(), *allocator_);
return TRITONJSON_STATUSSUCCESS;
}
// Append an unsigned integer to this value, which must be an
// array.
TRITONJSON_STATUSTYPE AppendUInt(const uint64_t value)
{
rapidjson::Value& array = AsMutableValue();
if (!array.IsArray()) {
TRITONJSON_STATUSRETURN(
std::string("attempt to append JSON member to non-array"));
}
array.PushBack(rapidjson::Value(value).Move(), *allocator_);
return TRITONJSON_STATUSSUCCESS;
}
// Append a double to this value, which must be an array.
TRITONJSON_STATUSTYPE AppendDouble(const double value)
{
rapidjson::Value& array = AsMutableValue();
if (!array.IsArray()) {
TRITONJSON_STATUSRETURN(
std::string("attempt to append JSON member to non-array"));
}
array.PushBack(rapidjson::Value(value).Move(), *allocator_);
return TRITONJSON_STATUSSUCCESS;
}
// Remove member from this object
TRITONJSON_STATUSTYPE Remove(const char* name)
{
rapidjson::Value& object = AsMutableValue();
if (!object.IsObject()) {
TRITONJSON_STATUSRETURN(
std::string("attempt to remove JSON member '") + name +
"' to non-object");
}
auto itr = object.FindMember(name);
if (itr != object.MemberEnd()) {
object.RemoveMember(itr);
} // else report success
return TRITONJSON_STATUSSUCCESS;
}
// Check if this value is of the specified type. Return appropriate
// error if not.
TRITONJSON_STATUSTYPE AssertType(TritonJson::ValueType type) const
{
if (static_cast<rapidjson::Type>(type) != AsValue().GetType()) {
TRITONJSON_STATUSRETURN(std::string("unexpected type"));
}
return TRITONJSON_STATUSSUCCESS;
}
// Get the size of an array. If called on non-array returns zero.
size_t ArraySize() const
{
const rapidjson::Value& array = AsValue();
if (!array.IsArray()) {
return 0;
}
return array.GetArray().Size();
}
// Return the specified index contained in this array.
TRITONJSON_STATUSTYPE At(
const size_t idx, TritonJson::Value* value = nullptr)
{
rapidjson::Value& array = AsMutableValue();
if (!array.IsArray() || (idx >= array.GetArray().Size())) {
TRITONJSON_STATUSRETURN(
std::string("attempt to access non-existing array index '") +
std::to_string(idx) + "'");
}
*value = TritonJson::Value(array[idx], allocator_);
return TRITONJSON_STATUSSUCCESS;
}
// Get the names of all members in an object. Error if value is
// not an object.
TRITONJSON_STATUSTYPE Members(std::vector<std::string>* names) const
{
const rapidjson::Value& object = AsValue();
if (!object.IsObject()) {
TRITONJSON_STATUSRETURN(
std::string("attempt to get members for non-object"));
}
for (const auto& m : object.GetObject()) {
names->push_back(m.name.GetString());
}
return TRITONJSON_STATUSSUCCESS;
}
// Return true if this value is an object and the named member is
// contained in this object.
bool Find(const char* name) const
{
const rapidjson::Value& object = AsValue();
return object.IsObject() && object.HasMember(name);
}
// Return true if this value is an object and the named member is
// contained in this object. Return the member in 'value'.
bool Find(const char* name, TritonJson::Value* value)
{
rapidjson::Value& object = AsMutableValue();
if (object.IsObject() && object.HasMember(name)) {
if (value != nullptr) {
*value = TritonJson::Value(object[name], allocator_);
}
return true;
}
return false;
}
// Whether the object is null value. Note that false will also be retuned
// if the object is not a JSON value.
bool IsNull() const { return ((value_ != nullptr) && value_->IsNull()); }
// Return true if the object is an object and it has no members;
// false otherwise.
bool IsEmpty() const
{
const rapidjson::Value& object = AsValue();
if (object.IsObject() && object.MemberCount() == 0) {
return true;
}
return false;
}
// Get value as a string. The string may contain null or other
// special characters and so 'len' must be used to determine length.
// Error if value is not a string.
TRITONJSON_STATUSTYPE AsString(const char** value, size_t* len) const
{
if ((value_ == nullptr) || !value_->IsString()) {
TRITONJSON_STATUSRETURN(
std::string("attempt to access JSON non-string as string"));
}
*value = value_->GetString();
*len = value_->GetStringLength();
return TRITONJSON_STATUSSUCCESS;
}
// Get value as a string. The string may contain null or other
// special characters. Error if value is not a string.
TRITONJSON_STATUSTYPE AsString(std::string* str) const
{
if ((value_ == nullptr) || !value_->IsString()) {
TRITONJSON_STATUSRETURN(
std::string("attempt to access JSON non-string as string"));
}
str->assign(value_->GetString(), value_->GetStringLength());
return TRITONJSON_STATUSSUCCESS;
}
// Get value as a boolean. Error if value is not a boolean.
TRITONJSON_STATUSTYPE AsBool(bool* value) const
{
if ((value_ == nullptr) || !value_->IsBool()) {
TRITONJSON_STATUSRETURN(
std::string("attempt to access JSON non-boolean as boolean"));
}
*value = value_->GetBool();
return TRITONJSON_STATUSSUCCESS;
}
// Get value as a signed integer. Error if value is not a signed
// integer.
TRITONJSON_STATUSTYPE AsInt(int64_t* value) const
{
if ((value_ == nullptr) || !value_->IsInt64()) {
TRITONJSON_STATUSRETURN(std::string(
"attempt to access JSON non-signed-integer as signed-integer"));
}
*value = value_->GetInt64();
return TRITONJSON_STATUSSUCCESS;
}
// Get value as an unsigned integer. Error if value is not an
// unsigned integer.
TRITONJSON_STATUSTYPE AsUInt(uint64_t* value) const
{
if ((value_ == nullptr) || !value_->IsUint64()) {
TRITONJSON_STATUSRETURN(std::string(
"attempt to access JSON non-unsigned-integer as unsigned-integer"));
}
*value = value_->GetUint64();
return TRITONJSON_STATUSSUCCESS;
}
// Get value as a double. Error if value is not a double.
TRITONJSON_STATUSTYPE AsDouble(double* value) const
{
if ((value_ == nullptr) || !value_->IsNumber()) {
TRITONJSON_STATUSRETURN(
std::string("attempt to access JSON non-number as double"));
}
*value = value_->GetDouble();
return TRITONJSON_STATUSSUCCESS;
}
// Get named array member contained in this object.
TRITONJSON_STATUSTYPE MemberAsArray(
const char* name, TritonJson::Value* value)
{
rapidjson::Value& object = AsMutableValue();
if (!object.IsObject() || !object.HasMember(name)) {
TRITONJSON_STATUSRETURN(
std::string("attempt to access non-existing object member '") +
name + "'");
}
auto& v = object[name];
if (!v.IsArray()) {
TRITONJSON_STATUSRETURN(
std::string("attempt to access JSON non-array as array"));
}
*value = TritonJson::Value(v, allocator_);
return TRITONJSON_STATUSSUCCESS;
}
// Get named object member contained in this object.
TRITONJSON_STATUSTYPE MemberAsObject(
const char* name, TritonJson::Value* value)
{
rapidjson::Value& object = AsMutableValue();
if (!object.IsObject() || !object.HasMember(name)) {
TRITONJSON_STATUSRETURN(
std::string("attempt to access non-existing object member '") +
name + "'");
}
auto& v = object[name];
if (!v.IsObject()) {
TRITONJSON_STATUSRETURN(
std::string("attempt to access JSON non-object as object"));
}
*value = TritonJson::Value(v, allocator_);
return TRITONJSON_STATUSSUCCESS;
}
// Get object member as a string. The string may contain null or other
// special characters and so 'len' must be used to determine length.
// Error if this is not an object or if the member is not a string.
TRITONJSON_STATUSTYPE MemberAsString(
const char* name, const char** value, size_t* len) const
{
const rapidjson::Value& object = AsValue();
if (!object.IsObject() || !object.HasMember(name)) {
TRITONJSON_STATUSRETURN(
std::string("attempt to access non-existing object member '") +
name + "'");
}
const auto& v = object[name];
if (!v.IsString()) {
TRITONJSON_STATUSRETURN(
std::string("attempt to access JSON non-string as string"));
}
*value = v.GetString();
*len = v.GetStringLength();
return TRITONJSON_STATUSSUCCESS;
}
// Get object member as a string. The string may contain null or
// other special characters. Error if this is not an object or if
// the member is not a string.
TRITONJSON_STATUSTYPE MemberAsString(
const char* name, std::string* str) const
{
const rapidjson::Value& object = AsValue();
if (!object.IsObject() || !object.HasMember(name)) {
TRITONJSON_STATUSRETURN(
std::string("attempt to access non-existing object member '") +
name + "'");
}
const auto& v = object[name];
if (!v.IsString()) {
TRITONJSON_STATUSRETURN(
std::string("attempt to access JSON non-string as string"));
}
str->assign(v.GetString(), v.GetStringLength());
return TRITONJSON_STATUSSUCCESS;
}
// Get object member as a boolean. Error if this is not an object
// or if the member is not a boolean.
TRITONJSON_STATUSTYPE MemberAsBool(const char* name, bool* value) const
{
const rapidjson::Value& object = AsValue();
if (!object.IsObject() || !object.HasMember(name)) {
TRITONJSON_STATUSRETURN(
std::string("attempt to access non-existing object member '") +
name + "'");
}
const auto& v = object[name];
if (!v.IsBool()) {
TRITONJSON_STATUSRETURN(
std::string("attempt to access JSON non-boolean as boolean"));
}
*value = v.GetBool();
return TRITONJSON_STATUSSUCCESS;
}
// Get object member as a signed integer. Error if this is not an object
// or if the member is not a signed integer.
TRITONJSON_STATUSTYPE MemberAsInt(const char* name, int64_t* value) const
{
const rapidjson::Value& object = AsValue();
if (!object.IsObject() || !object.HasMember(name)) {
TRITONJSON_STATUSRETURN(
std::string("attempt to access non-existing object member '") +
name + "'");
}
const auto& v = object[name];
if (!v.IsInt64()) {
TRITONJSON_STATUSRETURN(std::string(
"attempt to access JSON non-signed-integer as signed-integer"));
}
*value = v.GetInt64();
return TRITONJSON_STATUSSUCCESS;
}
// Get object member as an unsigned integer. Error if this is not an object
// or if the member is not an unsigned integer.
TRITONJSON_STATUSTYPE MemberAsUInt(const char* name, uint64_t* value) const
{
const rapidjson::Value& object = AsValue();
if (!object.IsObject() || !object.HasMember(name)) {
TRITONJSON_STATUSRETURN(
std::string("attempt to access non-existing object member '") +
name + "'");
}
const auto& v = object[name];
if (!v.IsUint64()) {
TRITONJSON_STATUSRETURN(std::string(
"attempt to access JSON non-unsigned-integer as unsigned-integer"));
}
*value = v.GetUint64();
return TRITONJSON_STATUSSUCCESS;
}
// Get object member as a double. Error if this is not an object
// or if the member is not a double.
TRITONJSON_STATUSTYPE MemberAsDouble(const char* name, double* value) const
{
const rapidjson::Value& object = AsValue();
if (!object.IsObject() || !object.HasMember(name)) {
TRITONJSON_STATUSRETURN(
std::string("attempt to access non-existing object member '") +
name + "'");
}
const auto& v = object[name];
if (!v.IsNumber()) {
TRITONJSON_STATUSRETURN(
std::string("attempt to access JSON non-number as double"));
}
*value = v.GetDouble();
return TRITONJSON_STATUSSUCCESS;
}
// Get array element at a given index within this array.
TRITONJSON_STATUSTYPE IndexAsArray(
const size_t idx, TritonJson::Value* value)
{
rapidjson::Value& array = AsMutableValue();
if (!array.IsArray() || (idx >= array.GetArray().Size())) {
TRITONJSON_STATUSRETURN(
std::string("attempt to access non-existing array index '") +
std::to_string(idx) + "'");
}
auto& v = array[idx];
if (!v.IsArray()) {
TRITONJSON_STATUSRETURN(
std::string("attempt to access JSON non-array as array"));
}
*value = TritonJson::Value(v, allocator_);
return TRITONJSON_STATUSSUCCESS;
}
// Get object element at a given index within this array.
TRITONJSON_STATUSTYPE IndexAsObject(
const size_t idx, TritonJson::Value* value)
{
rapidjson::Value& array = AsMutableValue();
if (!array.IsArray() || (idx >= array.GetArray().Size())) {
TRITONJSON_STATUSRETURN(
std::string("attempt to access non-existing array index '") +
std::to_string(idx) + "'");
}
auto& v = array[idx];
if (!v.IsObject()) {
TRITONJSON_STATUSRETURN(
std::string("attempt to access JSON non-object as object"));
}
*value = TritonJson::Value(v, allocator_);
return TRITONJSON_STATUSSUCCESS;
}
// Get array index as a string. The string may contain null or
// other special characters and so 'len' must be used to determine
// length. Error if this is not an array or if the index element
// is not a string.
TRITONJSON_STATUSTYPE IndexAsString(
const size_t idx, const char** value, size_t* len) const
{
const rapidjson::Value& array = AsValue();
if (!array.IsArray() || (idx >= array.GetArray().Size())) {
TRITONJSON_STATUSRETURN(
std::string("attempt to access non-existing array index '") +
std::to_string(idx) + "'");
}
const auto& v = array[idx];
if (!v.IsString()) {
TRITONJSON_STATUSRETURN(
std::string("attempt to access JSON non-string as string"));
}
*value = v.GetString();
*len = v.GetStringLength();
return TRITONJSON_STATUSSUCCESS;
}
// Get array index as a string. The string may contain null or
// other special characters. Error if this is not an array or if
// the index element is not a string.
TRITONJSON_STATUSTYPE IndexAsString(
const size_t idx, std::string* str) const
{
const rapidjson::Value& array = AsValue();
if (!array.IsArray() || (idx >= array.GetArray().Size())) {
TRITONJSON_STATUSRETURN(
std::string("attempt to access non-existing array index '") +
std::to_string(idx) + "'");
}
const auto& v = array[idx];
if (!v.IsString()) {
TRITONJSON_STATUSRETURN(
std::string("attempt to access JSON non-string as string"));
}
str->assign(v.GetString(), v.GetStringLength());
return TRITONJSON_STATUSSUCCESS;
}
// Get array index as a boolean. Error if this is not an array or
// if the index element is not a boolean.
TRITONJSON_STATUSTYPE IndexAsBool(const size_t idx, bool* value) const
{
const rapidjson::Value& array = AsValue();
if (!array.IsArray() || (idx >= array.GetArray().Size())) {
TRITONJSON_STATUSRETURN(
std::string("attempt to access non-existing array index '") +
std::to_string(idx) + "'");
}
const auto& v = array[idx];
if (!v.IsBool()) {
TRITONJSON_STATUSRETURN(
std::string("attempt to access JSON non-boolean as boolean"));
}
*value = v.GetBool();
return TRITONJSON_STATUSSUCCESS;
}
// Get array index as a signed integer. Error if this is not an array or
// if the index element is not a signed integer.
TRITONJSON_STATUSTYPE IndexAsInt(const size_t idx, int64_t* value) const
{
const rapidjson::Value& array = AsValue();
if (!array.IsArray() || (idx >= array.GetArray().Size())) {
TRITONJSON_STATUSRETURN(
std::string("attempt to access non-existing array index '") +
std::to_string(idx) + "'");
}
const auto& v = array[idx];
if (!v.IsInt64()) {
TRITONJSON_STATUSRETURN(std::string(
"attempt to access JSON non-signed-integer as signed-integer"));
}
*value = v.GetInt64();
return TRITONJSON_STATUSSUCCESS;
}
// Get array index as an unsigned integer. Error if this is not an array or
// if the index element is not an unsigned integer.
TRITONJSON_STATUSTYPE IndexAsUInt(const size_t idx, uint64_t* value) const
{
const rapidjson::Value& array = AsValue();
if (!array.IsArray() || (idx >= array.GetArray().Size())) {
TRITONJSON_STATUSRETURN(
std::string("attempt to access non-existing array index '") +
std::to_string(idx) + "'");
}
const auto& v = array[idx];
if (!v.IsUint64()) {
TRITONJSON_STATUSRETURN(std::string(
"attempt to access JSON non-unsigned-integer as unsigned-integer"));
}
*value = v.GetUint64();
return TRITONJSON_STATUSSUCCESS;
}
// Get array index as a double. Error if this is not an array or
// if the index element is not a double.
TRITONJSON_STATUSTYPE IndexAsDouble(const size_t idx, double* value) const
{
const rapidjson::Value& array = AsValue();
if (!array.IsArray() || (idx >= array.GetArray().Size())) {
TRITONJSON_STATUSRETURN(
std::string("attempt to access non-existing array index '") +
std::to_string(idx) + "'");
}
const auto& v = array[idx];
if (!v.IsNumber()) {
TRITONJSON_STATUSRETURN(
std::string("attempt to access JSON non-number as double"));
}
*value = v.GetDouble();
return TRITONJSON_STATUSSUCCESS;
}
// Release/clear a value.
void Release()
{
if (value_ != nullptr) {
allocator_->Free(value_);
}
}
private:
// Construct a non-top-level JSON value that references an
// existing element in a document.
explicit Value(
rapidjson::Value& v, rapidjson::Document::AllocatorType* allocator)
: value_(&v), allocator_(allocator)
{
}
// Return a value object that can be used for both a top-level
// document as well as an element within a document.
const rapidjson::Value& AsValue() const
{
if (value_ == nullptr) {
return document_;
}
return *value_;
}
rapidjson::Value& AsMutableValue()
{
if (value_ == nullptr) {
return document_;
}
return *value_;
}
// If this object a document or value. Based on this only one or
// document_ or value_ is valid.
rapidjson::Document document_;
rapidjson::Value* value_;
rapidjson::Document::AllocatorType* allocator_;
};
};
}} // namespace triton::common
// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
syntax = "proto3";
package inference;
//@@.. cpp:namespace:: inference
import "model_config.proto";
//@@
//@@.. cpp:var:: service InferenceService
//@@
//@@ Inference Server GRPC endpoints.
//@@
service GRPCInferenceService
{
//@@ .. cpp:var:: rpc ServerLive(ServerLiveRequest) returns
//@@ (ServerLiveResponse)
//@@
//@@ Check liveness of the inference server.
//@@
rpc ServerLive(ServerLiveRequest) returns (ServerLiveResponse) {}
//@@ .. cpp:var:: rpc ServerReady(ServerReadyRequest) returns
//@@ (ServerReadyResponse)
//@@
//@@ Check readiness of the inference server.
//@@
rpc ServerReady(ServerReadyRequest) returns (ServerReadyResponse) {}
//@@ .. cpp:var:: rpc ModelReady(ModelReadyRequest) returns
//@@ (ModelReadyResponse)
//@@
//@@ Check readiness of a model in the inference server.
//@@
rpc ModelReady(ModelReadyRequest) returns (ModelReadyResponse) {}
//@@ .. cpp:var:: rpc ServerMetadata(ServerMetadataRequest) returns
//@@ (ServerMetadataResponse)
//@@
//@@ Get server metadata.
//@@
rpc ServerMetadata(ServerMetadataRequest) returns (ServerMetadataResponse) {}
//@@ .. cpp:var:: rpc ModelMetadata(ModelMetadataRequest) returns
//@@ (ModelMetadataResponse)
//@@
//@@ Get model metadata.
//@@
rpc ModelMetadata(ModelMetadataRequest) returns (ModelMetadataResponse) {}
//@@ .. cpp:var:: rpc ModelInfer(ModelInferRequest) returns
//@@ (ModelInferResponse)
//@@
//@@ Perform inference using a specific model.
//@@
rpc ModelInfer(ModelInferRequest) returns (ModelInferResponse) {}
//@@ .. cpp:var:: rpc ModelStreamInfer(stream ModelInferRequest) returns
//@@ (stream ModelStreamInferResponse)
//@@
//@@ Perform streaming inference.
//@@
rpc ModelStreamInfer(stream ModelInferRequest)
returns (stream ModelStreamInferResponse)
{
}
//@@ .. cpp:var:: rpc ModelConfig(ModelConfigRequest) returns
//@@ (ModelConfigResponse)
//@@
//@@ Get model configuration.
//@@
rpc ModelConfig(ModelConfigRequest) returns (ModelConfigResponse) {}
//@@ .. cpp:var:: rpc ModelStatistics(
//@@ ModelStatisticsRequest)
//@@ returns (ModelStatisticsResponse)
//@@
//@@ Get the cumulative inference statistics for a model.
//@@
rpc ModelStatistics(ModelStatisticsRequest) returns (ModelStatisticsResponse)
{
}
//@@ .. cpp:var:: rpc RepositoryIndex(RepositoryIndexRequest) returns
//@@ (RepositoryIndexResponse)
//@@
//@@ Get the index of model repository contents.
//@@
rpc RepositoryIndex(RepositoryIndexRequest) returns (RepositoryIndexResponse)
{
}
//@@ .. cpp:var:: rpc RepositoryModelLoad(RepositoryModelLoadRequest) returns
//@@ (RepositoryModelLoadResponse)
//@@
//@@ Load or reload a model from a repository.
//@@
rpc RepositoryModelLoad(RepositoryModelLoadRequest)
returns (RepositoryModelLoadResponse)
{
}
//@@ .. cpp:var:: rpc RepositoryModelUnload(RepositoryModelUnloadRequest)
//@@ returns (RepositoryModelUnloadResponse)
//@@
//@@ Unload a model.
//@@
rpc RepositoryModelUnload(RepositoryModelUnloadRequest)
returns (RepositoryModelUnloadResponse)
{
}
//@@ .. cpp:var:: rpc SystemSharedMemoryStatus(
//@@ SystemSharedMemoryStatusRequest)
//@@ returns (SystemSharedMemoryStatusRespose)
//@@
//@@ Get the status of all registered system-shared-memory regions.
//@@
rpc SystemSharedMemoryStatus(SystemSharedMemoryStatusRequest)
returns (SystemSharedMemoryStatusResponse)
{
}
//@@ .. cpp:var:: rpc SystemSharedMemoryRegister(
//@@ SystemSharedMemoryRegisterRequest)
//@@ returns (SystemSharedMemoryRegisterResponse)
//@@
//@@ Register a system-shared-memory region.
//@@
rpc SystemSharedMemoryRegister(SystemSharedMemoryRegisterRequest)
returns (SystemSharedMemoryRegisterResponse)
{
}
//@@ .. cpp:var:: rpc SystemSharedMemoryUnregister(
//@@ SystemSharedMemoryUnregisterRequest)
//@@ returns (SystemSharedMemoryUnregisterResponse)
//@@
//@@ Unregister a system-shared-memory region.
//@@
rpc SystemSharedMemoryUnregister(SystemSharedMemoryUnregisterRequest)
returns (SystemSharedMemoryUnregisterResponse)
{
}
//@@ .. cpp:var:: rpc CudaSharedMemoryStatus(
//@@ CudaSharedMemoryStatusRequest)
//@@ returns (CudaSharedMemoryStatusRespose)
//@@
//@@ Get the status of all registered CUDA-shared-memory regions.
//@@
rpc CudaSharedMemoryStatus(CudaSharedMemoryStatusRequest)
returns (CudaSharedMemoryStatusResponse)
{
}
//@@ .. cpp:var:: rpc CudaSharedMemoryRegister(
//@@ CudaSharedMemoryRegisterRequest)
//@@ returns (CudaSharedMemoryRegisterResponse)
//@@
//@@ Register a CUDA-shared-memory region.
//@@
rpc CudaSharedMemoryRegister(CudaSharedMemoryRegisterRequest)
returns (CudaSharedMemoryRegisterResponse)
{
}
//@@ .. cpp:var:: rpc CudaSharedMemoryUnregister(
//@@ CudaSharedMemoryUnregisterRequest)
//@@ returns (CudaSharedMemoryUnregisterResponse)
//@@
//@@ Unregister a CUDA-shared-memory region.
//@@
rpc CudaSharedMemoryUnregister(CudaSharedMemoryUnregisterRequest)
returns (CudaSharedMemoryUnregisterResponse)
{
}
//@@ .. cpp:var:: rpc TraceSetting(TraceSettingRequest)
//@@ returns (TraceSettingResponse)
//@@
//@@ Update and get the trace setting of the Triton server.
//@@
rpc TraceSetting(TraceSettingRequest) returns (TraceSettingResponse)
{
}
//@@ .. cpp:var:: rpc LogSettings(LogSettingsRequest)
//@@ returns (LogSettingsResponse)
//@@
//@@ Update and get the log settings of the Triton server.
//@@
rpc LogSettings(LogSettingsRequest) returns (LogSettingsResponse)
{
}
}
//@@
//@@.. cpp:var:: message ServerLiveRequest
//@@
//@@ Request message for ServerLive.
//@@
message ServerLiveRequest {}
//@@
//@@.. cpp:var:: message ServerLiveResponse
//@@
//@@ Response message for ServerLive.
//@@
message ServerLiveResponse
{
//@@
//@@ .. cpp:var:: bool live
//@@
//@@ True if the inference server is live, false it not live.
//@@
bool live = 1;
}
//@@
//@@.. cpp:var:: message ServerReadyRequest
//@@
//@@ Request message for ServerReady.
//@@
message ServerReadyRequest {}
//@@
//@@.. cpp:var:: message ServerReadyResponse
//@@
//@@ Response message for ServerReady.
//@@
message ServerReadyResponse
{
//@@
//@@ .. cpp:var:: bool ready
//@@
//@@ True if the inference server is ready, false it not ready.
//@@
bool ready = 1;
}
//@@
//@@.. cpp:var:: message ModelReadyRequest
//@@
//@@ Request message for ModelReady.
//@@
message ModelReadyRequest
{
//@@
//@@ .. cpp:var:: string name
//@@
//@@ The name of the model to check for readiness.
//@@
string name = 1;
//@@ .. cpp:var:: string version
//@@
//@@ The version of the model to check for readiness. If not given the
//@@ server will choose a version based on the model and internal policy.
//@@
string version = 2;
}
//@@
//@@.. cpp:var:: message ModelReadyResponse
//@@
//@@ Response message for ModelReady.
//@@
message ModelReadyResponse
{
//@@
//@@ .. cpp:var:: bool ready
//@@
//@@ True if the model is ready, false it not ready.
//@@
bool ready = 1;
}
//@@
//@@.. cpp:var:: message ServerMetadataRequest
//@@
//@@ Request message for ServerMetadata.
//@@
message ServerMetadataRequest {}
//@@
//@@.. cpp:var:: message ServerMetadataResponse
//@@
//@@ Response message for ServerMetadata.
//@@
message ServerMetadataResponse
{
//@@
//@@ .. cpp:var:: string name
//@@
//@@ The server name.
//@@
string name = 1;
//@@
//@@ .. cpp:var:: string version
//@@
//@@ The server version.
//@@
string version = 2;
//@@
//@@ .. cpp:var:: string extensions (repeated)
//@@
//@@ The extensions supported by the server.
//@@
repeated string extensions = 3;
}
//@@
//@@.. cpp:var:: message ModelMetadataRequest
//@@
//@@ Request message for ModelMetadata.
//@@
message ModelMetadataRequest
{
//@@
//@@ .. cpp:var:: string name
//@@
//@@ The name of the model.
//@@
string name = 1;
//@@ .. cpp:var:: string version
//@@
//@@ The version of the model to check for readiness. If not
//@@ given the server will choose a version based on the
//@@ model and internal policy.
//@@
string version = 2;
}
//@@
//@@.. cpp:var:: message ModelMetadataResponse
//@@
//@@ Response message for ModelMetadata.
//@@
message ModelMetadataResponse
{
//@@
//@@ .. cpp:var:: message TensorMetadata
//@@
//@@ Metadata for a tensor.
//@@
message TensorMetadata
{
//@@
//@@ .. cpp:var:: string name
//@@
//@@ The tensor name.
//@@
string name = 1;
//@@
//@@ .. cpp:var:: string datatype
//@@
//@@ The tensor data type.
//@@
string datatype = 2;
//@@
//@@ .. cpp:var:: int64 shape (repeated)
//@@
//@@ The tensor shape. A variable-size dimension is represented
//@@ by a -1 value.
//@@
repeated int64 shape = 3;
}
//@@
//@@ .. cpp:var:: string name
//@@
//@@ The model name.
//@@
string name = 1;
//@@
//@@ .. cpp:var:: string versions (repeated)
//@@
//@@ The versions of the model.
//@@
repeated string versions = 2;
//@@
//@@ .. cpp:var:: string platform
//@@
//@@ The model's platform.
//@@
string platform = 3;
//@@
//@@ .. cpp:var:: TensorMetadata inputs (repeated)
//@@
//@@ The model's inputs.
//@@
repeated TensorMetadata inputs = 4;
//@@
//@@ .. cpp:var:: TensorMetadata outputs (repeated)
//@@
//@@ The model's outputs.
//@@
repeated TensorMetadata outputs = 5;
}
//@@
//@@.. cpp:var:: message InferParameter
//@@
//@@ An inference parameter value.
//@@
message InferParameter
{
//@@ .. cpp:var:: oneof parameter_choice
//@@
//@@ The parameter value can be a string, an int64 or
//@@ a boolean
//@@
oneof parameter_choice
{
//@@ .. cpp:var:: bool bool_param
//@@
//@@ A boolean parameter value.
//@@
bool bool_param = 1;
//@@ .. cpp:var:: int64 int64_param
//@@
//@@ An int64 parameter value.
//@@
int64 int64_param = 2;
//@@ .. cpp:var:: string string_param
//@@
//@@ A string parameter value.
//@@
string string_param = 3;
}
}
//@@
//@@.. cpp:var:: message InferTensorContents
//@@
//@@ The data contained in a tensor represented by the repeated type
//@@ that matches the tensor's data type. Protobuf oneof is not used
//@@ because oneofs cannot contain repeated fields.
//@@
message InferTensorContents
{
//@@
//@@ .. cpp:var:: bool bool_contents (repeated)
//@@
//@@ Representation for BOOL data type. The size must match what is
//@@ expected by the tensor's shape. The contents must be the flattened,
//@@ one-dimensional, row-major order of the tensor elements.
//@@
repeated bool bool_contents = 1;
//@@
//@@ .. cpp:var:: int32 int_contents (repeated)
//@@
//@@ Representation for INT8, INT16, and INT32 data types. The size
//@@ must match what is expected by the tensor's shape. The contents
//@@ must be the flattened, one-dimensional, row-major order of the
//@@ tensor elements.
//@@
repeated int32 int_contents = 2;
//@@
//@@ .. cpp:var:: int64 int64_contents (repeated)
//@@
//@@ Representation for INT64 data types. The size must match what
//@@ is expected by the tensor's shape. The contents must be the
//@@ flattened, one-dimensional, row-major order of the tensor elements.
//@@
repeated int64 int64_contents = 3;
//@@
//@@ .. cpp:var:: uint32 uint_contents (repeated)
//@@
//@@ Representation for UINT8, UINT16, and UINT32 data types. The size
//@@ must match what is expected by the tensor's shape. The contents
//@@ must be the flattened, one-dimensional, row-major order of the
//@@ tensor elements.
//@@
repeated uint32 uint_contents = 4;
//@@
//@@ .. cpp:var:: uint64 uint64_contents (repeated)
//@@
//@@ Representation for UINT64 data types. The size must match what
//@@ is expected by the tensor's shape. The contents must be the
//@@ flattened, one-dimensional, row-major order of the tensor elements.
//@@
repeated uint64 uint64_contents = 5;
//@@
//@@ .. cpp:var:: float fp32_contents (repeated)
//@@
//@@ Representation for FP32 data type. The size must match what is
//@@ expected by the tensor's shape. The contents must be the flattened,
//@@ one-dimensional, row-major order of the tensor elements.
//@@
repeated float fp32_contents = 6;
//@@
//@@ .. cpp:var:: double fp64_contents (repeated)
//@@
//@@ Representation for FP64 data type. The size must match what is
//@@ expected by the tensor's shape. The contents must be the flattened,
//@@ one-dimensional, row-major order of the tensor elements.
//@@
repeated double fp64_contents = 7;
//@@
//@@ .. cpp:var:: bytes bytes_contents (repeated)
//@@
//@@ Representation for BYTES data type. The size must match what is
//@@ expected by the tensor's shape. The contents must be the flattened,
//@@ one-dimensional, row-major order of the tensor elements.
//@@
repeated bytes bytes_contents = 8;
}
//@@
//@@.. cpp:var:: message ModelInferRequest
//@@
//@@ Request message for ModelInfer.
//@@
message ModelInferRequest
{
//@@
//@@ .. cpp:var:: message InferInputTensor
//@@
//@@ An input tensor for an inference request.
//@@
message InferInputTensor
{
//@@
//@@ .. cpp:var:: string name
//@@
//@@ The tensor name.
//@@
string name = 1;
//@@
//@@ .. cpp:var:: string datatype
//@@
//@@ The tensor data type.
//@@
string datatype = 2;
//@@
//@@ .. cpp:var:: int64 shape (repeated)
//@@
//@@ The tensor shape.
//@@
repeated int64 shape = 3;
//@@ .. cpp:var:: map<string,InferParameter> parameters
//@@
//@@ Optional inference input tensor parameters.
//@@
map<string, InferParameter> parameters = 4;
//@@ .. cpp:var:: InferTensorContents contents
//@@
//@@ The tensor contents using a data-type format. This field
//@@ must not be specified if tensor contents are being specified
//@@ in ModelInferRequest.raw_input_contents.
//@@
InferTensorContents contents = 5;
}
//@@
//@@ .. cpp:var:: message InferRequestedOutputTensor
//@@
//@@ An output tensor requested for an inference request.
//@@
message InferRequestedOutputTensor
{
//@@
//@@ .. cpp:var:: string name
//@@
//@@ The tensor name.
//@@
string name = 1;
//@@ .. cpp:var:: map<string,InferParameter> parameters
//@@
//@@ Optional requested output tensor parameters.
//@@
map<string, InferParameter> parameters = 2;
}
//@@ .. cpp:var:: string model_name
//@@
//@@ The name of the model to use for inferencing.
//@@
string model_name = 1;
//@@ .. cpp:var:: string model_version
//@@
//@@ The version of the model to use for inference. If not
//@@ given the latest/most-recent version of the model is used.
//@@
string model_version = 2;
//@@ .. cpp:var:: string id
//@@
//@@ Optional identifier for the request. If specified will be
//@@ returned in the response.
//@@
string id = 3;
//@@ .. cpp:var:: map<string,InferParameter> parameters
//@@
//@@ Optional inference parameters.
//@@
map<string, InferParameter> parameters = 4;
//@@
//@@ .. cpp:var:: InferInputTensor inputs (repeated)
//@@
//@@ The input tensors for the inference.
//@@
repeated InferInputTensor inputs = 5;
//@@
//@@ .. cpp:var:: InferRequestedOutputTensor outputs (repeated)
//@@
//@@ The requested output tensors for the inference. Optional, if not
//@@ specified all outputs specified in the model config will be
//@@ returned.
//@@
repeated InferRequestedOutputTensor outputs = 6;
//@@
//@@ .. cpp:var:: bytes raw_input_contents
//@@
//@@ The data contained in an input tensor can be represented in
//@@ "raw" bytes form or in the repeated type that matches the
//@@ tensor's data type. Using the "raw" bytes form will
//@@ typically allow higher performance due to the way protobuf
//@@ allocation and reuse interacts with GRPC. For example, see
//@@ https://github.com/grpc/grpc/issues/23231.
//@@
//@@ To use the raw representation 'raw_input_contents' must be
//@@ initialized with data for each tensor in the same order as
//@@ 'inputs'. For each tensor, the size of this content must
//@@ match what is expected by the tensor's shape and data
//@@ type. The raw data must be the flattened, one-dimensional,
//@@ row-major order of the tensor elements without any stride
//@@ or padding between the elements. Note that the FP16 and BF16 data
//@@ types must be represented as raw content as there is no
//@@ specific data type for a 16-bit float type.
//@@
//@@ If this field is specified then InferInputTensor::contents
//@@ must not be specified for any input tensor.
//@@
repeated bytes raw_input_contents = 7;
}
//@@
//@@.. cpp:var:: message ModelInferResponse
//@@
//@@ Response message for ModelInfer.
//@@
message ModelInferResponse
{
//@@
//@@ .. cpp:var:: message InferOutputTensor
//@@
//@@ An output tensor returned for an inference request.
//@@
message InferOutputTensor
{
//@@
//@@ .. cpp:var:: string name
//@@
//@@ The tensor name.
//@@
string name = 1;
//@@
//@@ .. cpp:var:: string datatype
//@@
//@@ The tensor data type.
//@@
string datatype = 2;
//@@
//@@ .. cpp:var:: int64 shape (repeated)
//@@
//@@ The tensor shape.
//@@
repeated int64 shape = 3;
//@@ .. cpp:var:: map<string,InferParameter> parameters
//@@
//@@ Optional output tensor parameters.
//@@
map<string, InferParameter> parameters = 4;
//@@ .. cpp:var:: InferTensorContents contents
//@@
//@@ The tensor contents using a data-type format. This field
//@@ must not be specified if tensor contents are being specified
//@@ in ModelInferResponse.raw_output_contents.
//@@
InferTensorContents contents = 5;
}
//@@ .. cpp:var:: string model_name
//@@
//@@ The name of the model used for inference.
//@@
string model_name = 1;
//@@ .. cpp:var:: string model_version
//@@
//@@ The version of the model used for inference.
//@@
string model_version = 2;
//@@ .. cpp:var:: string id
//@@
//@@ The id of the inference request if one was specified.
//@@
string id = 3;
//@@ .. cpp:var:: map<string,InferParameter> parameters
//@@
//@@ Optional inference response parameters.
//@@
map<string, InferParameter> parameters = 4;
//@@
//@@ .. cpp:var:: InferOutputTensor outputs (repeated)
//@@
//@@ The output tensors holding inference results.
//@@
repeated InferOutputTensor outputs = 5;
//@@
//@@ .. cpp:var:: bytes raw_output_contents
//@@
//@@ The data contained in an output tensor can be represented in
//@@ "raw" bytes form or in the repeated type that matches the
//@@ tensor's data type. Using the "raw" bytes form will
//@@ typically allow higher performance due to the way protobuf
//@@ allocation and reuse interacts with GRPC. For example, see
//@@ https://github.com/grpc/grpc/issues/23231.
//@@
//@@ To use the raw representation 'raw_output_contents' must be
//@@ initialized with data for each tensor in the same order as
//@@ 'outputs'. For each tensor, the size of this content must
//@@ match what is expected by the tensor's shape and data
//@@ type. The raw data must be the flattened, one-dimensional,
//@@ row-major order of the tensor elements without any stride
//@@ or padding between the elements. Note that the FP16 and BF16 data
//@@ types must be represented as raw content as there is no
//@@ specific data type for a 16-bit float type.
//@@
//@@ If this field is specified then InferOutputTensor::contents
//@@ must not be specified for any output tensor.
//@@
repeated bytes raw_output_contents = 6;
}
//@@
//@@.. cpp:var:: message ModelStreamInferResponse
//@@
//@@ Response message for ModelStreamInfer.
//@@
message ModelStreamInferResponse
{
//@@
//@@ .. cpp:var:: string error_message
//@@
//@@ The message describing the error. The empty message
//@@ indicates the inference was successful without errors.
//@@
string error_message = 1;
//@@
//@@ .. cpp:var:: ModelInferResponse infer_response
//@@
//@@ Holds the results of the request.
//@@
ModelInferResponse infer_response = 2;
}
//@@
//@@.. cpp:var:: message ModelConfigRequest
//@@
//@@ Request message for ModelConfig.
//@@
message ModelConfigRequest
{
//@@
//@@ .. cpp:var:: string name
//@@
//@@ The name of the model.
//@@
string name = 1;
//@@ .. cpp:var:: string version
//@@
//@@ The version of the model. If not given the model version
//@@ is selected automatically based on the version policy.
//@@
string version = 2;
}
//@@
//@@.. cpp:var:: message ModelConfigResponse
//@@
//@@ Response message for ModelConfig.
//@@
message ModelConfigResponse
{
//@@
//@@ .. cpp:var:: ModelConfig config
//@@
//@@ The model configuration.
//@@
ModelConfig config = 1;
}
//@@
//@@.. cpp:var:: message ModelStatisticsRequest
//@@
//@@ Request message for ModelStatistics.
//@@
message ModelStatisticsRequest
{
//@@ .. cpp:var:: string name
//@@
//@@ The name of the model. If not given returns statistics for
//@@ all models.
//@@
string name = 1;
//@@ .. cpp:var:: string version
//@@
//@@ The version of the model. If not given returns statistics for
//@@ all model versions.
//@@
string version = 2;
}
//@@
//@@.. cpp:var:: message StatisticDuration
//@@
//@@ Statistic recording a cumulative duration metric.
//@@
message StatisticDuration
{
//@@ .. cpp:var:: uint64 count
//@@
//@@ Cumulative number of times this metric occurred.
//@@
uint64 count = 1;
//@@ .. cpp:var:: uint64 total_time_ns
//@@
//@@ Total collected duration of this metric in nanoseconds.
//@@
uint64 ns = 2;
}
//@@
//@@.. cpp:var:: message InferStatistics
//@@
//@@ Inference statistics.
//@@
message InferStatistics
{
//@@ .. cpp:var:: StatisticDuration success
//@@
//@@ Cumulative count and duration for successful inference
//@@ request. The "success" count and cumulative duration includes
//@@ cache hits.
//@@
StatisticDuration success = 1;
//@@ .. cpp:var:: StatisticDuration fail
//@@
//@@ Cumulative count and duration for failed inference
//@@ request.
//@@
StatisticDuration fail = 2;
//@@ .. cpp:var:: StatisticDuration queue
//@@
//@@ The count and cumulative duration that inference requests wait in
//@@ scheduling or other queues. The "queue" count and cumulative
//@@ duration includes cache hits.
//@@
StatisticDuration queue = 3;
//@@ .. cpp:var:: StatisticDuration compute_input
//@@
//@@ The count and cumulative duration to prepare input tensor data as
//@@ required by the model framework / backend. For example, this duration
//@@ should include the time to copy input tensor data to the GPU.
//@@ The "compute_input" count and cumulative duration do not account for
//@@ requests that were a cache hit. See the "cache_hit" field for more
//@@ info.
//@@
StatisticDuration compute_input = 4;
//@@ .. cpp:var:: StatisticDuration compute_infer
//@@
//@@ The count and cumulative duration to execute the model.
//@@ The "compute_infer" count and cumulative duration do not account for
//@@ requests that were a cache hit. See the "cache_hit" field for more
//@@ info.
//@@
StatisticDuration compute_infer = 5;
//@@ .. cpp:var:: StatisticDuration compute_output
//@@
//@@ The count and cumulative duration to extract output tensor data
//@@ produced by the model framework / backend. For example, this duration
//@@ should include the time to copy output tensor data from the GPU.
//@@ The "compute_output" count and cumulative duration do not account for
//@@ requests that were a cache hit. See the "cache_hit" field for more
//@@ info.
//@@
StatisticDuration compute_output = 6;
//@@ .. cpp:var:: StatisticDuration cache_hit
//@@
//@@ The count of response cache hits and cumulative duration to lookup
//@@ and extract output tensor data from the Response Cache on a cache
//@@ hit. For example, this duration should include the time to copy
//@@ output tensor data from the Response Cache to the response object.
//@@ On cache hits, triton does not need to go to the model/backend
//@@ for the output tensor data, so the "compute_input", "compute_infer",
//@@ and "compute_output" fields are not updated. Assuming the response
//@@ cache is enabled for a given model, a cache hit occurs for a
//@@ request to that model when the request metadata (model name,
//@@ model version, model inputs) hashes to an existing entry in the
//@@ cache. On a cache miss, the request hash and response output tensor
//@@ data is added to the cache. See response cache docs for more info:
//@@ https://github.com/triton-inference-server/server/blob/main/docs/response_cache.md
//@@
StatisticDuration cache_hit = 7;
//@@ .. cpp:var:: StatisticDuration cache_miss
//@@
//@@ The count of response cache misses and cumulative duration to lookup
//@@ and insert output tensor data from the computed response to the cache.
//@@ For example, this duration should include the time to copy
//@@ output tensor data from the response object to the Response Cache.
//@@ Assuming the response cache is enabled for a given model, a cache
//@@ miss occurs for a request to that model when the request metadata
//@@ does NOT hash to an existing entry in the cache. See the response
//@@ cache docs for more info:
//@@ https://github.com/triton-inference-server/server/blob/main/docs/response_cache.md
//@@
StatisticDuration cache_miss = 8;
}
//@@
//@@.. cpp:var:: message InferBatchStatistics
//@@
//@@ Inference batch statistics.
//@@
message InferBatchStatistics
{
//@@ .. cpp:var:: uint64 batch_size
//@@
//@@ The size of the batch.
//@@
uint64 batch_size = 1;
//@@ .. cpp:var:: StatisticDuration compute_input
//@@
//@@ The count and cumulative duration to prepare input tensor data as
//@@ required by the model framework / backend with the given batch size.
//@@ For example, this duration should include the time to copy input
//@@ tensor data to the GPU.
//@@
StatisticDuration compute_input = 2;
//@@ .. cpp:var:: StatisticDuration compute_infer
//@@
//@@ The count and cumulative duration to execute the model with the given
//@@ batch size.
//@@
StatisticDuration compute_infer = 3;
//@@ .. cpp:var:: StatisticDuration compute_output
//@@
//@@ The count and cumulative duration to extract output tensor data
//@@ produced by the model framework / backend with the given batch size.
//@@ For example, this duration should include the time to copy output
//@@ tensor data from the GPU.
//@@
StatisticDuration compute_output = 4;
}
//@@
//@@.. cpp:var:: message ModelStatistics
//@@
//@@ Statistics for a specific model and version.
//@@
message ModelStatistics
{
//@@ .. cpp:var:: string name
//@@
//@@ The name of the model. If not given returns statistics for all
//@@
string name = 1;
//@@ .. cpp:var:: string version
//@@
//@@ The version of the model.
//@@
string version = 2;
//@@ .. cpp:var:: uint64 last_inference
//@@
//@@ The timestamp of the last inference request made for this model,
//@@ as milliseconds since the epoch.
//@@
uint64 last_inference = 3;
//@@ .. cpp:var:: uint64 last_inference
//@@
//@@ The cumulative count of successful inference requests made for this
//@@ model. Each inference in a batched request is counted as an
//@@ individual inference. For example, if a client sends a single
//@@ inference request with batch size 64, "inference_count" will be
//@@ incremented by 64. Similarly, if a clients sends 64 individual
//@@ requests each with batch size 1, "inference_count" will be
//@@ incremented by 64. The "inference_count" value DOES NOT include
//@@ cache hits.
//@@
uint64 inference_count = 4;
//@@ .. cpp:var:: uint64 last_inference
//@@
//@@ The cumulative count of the number of successful inference executions
//@@ performed for the model. When dynamic batching is enabled, a single
//@@ model execution can perform inferencing for more than one inference
//@@ request. For example, if a clients sends 64 individual requests each
//@@ with batch size 1 and the dynamic batcher batches them into a single
//@@ large batch for model execution then "execution_count" will be
//@@ incremented by 1. If, on the other hand, the dynamic batcher is not
//@@ enabled for that each of the 64 individual requests is executed
//@@ independently, then "execution_count" will be incremented by 64.
//@@ The "execution_count" value DOES NOT include cache hits.
//@@
uint64 execution_count = 5;
//@@ .. cpp:var:: InferStatistics inference_stats
//@@
//@@ The aggregate statistics for the model/version.
//@@
InferStatistics inference_stats = 6;
//@@ .. cpp:var:: InferBatchStatistics batch_stats (repeated)
//@@
//@@ The aggregate statistics for each different batch size that is
//@@ executed in the model. The batch statistics indicate how many actual
//@@ model executions were performed and show differences due to different
//@@ batch size (for example, larger batches typically take longer to
//@@ compute).
//@@
repeated InferBatchStatistics batch_stats = 7;
}
//@@
//@@.. cpp:var:: message ModelStatisticsResponse
//@@
//@@ Response message for ModelStatistics.
//@@
message ModelStatisticsResponse
{
//@@ .. cpp:var:: ModelStatistics model_stats (repeated)
//@@
//@@ Statistics for each requested model.
//@@
repeated ModelStatistics model_stats = 1;
}
//@@
//@@.. cpp:var:: message ModelRepositoryParameter
//@@
//@@ An model repository parameter value.
//@@
message ModelRepositoryParameter
{
//@@ .. cpp:var:: oneof parameter_choice
//@@
//@@ The parameter value can be a string, an int64 or
//@@ a boolean
//@@
oneof parameter_choice
{
//@@ .. cpp:var:: bool bool_param
//@@
//@@ A boolean parameter value.
//@@
bool bool_param = 1;
//@@ .. cpp:var:: int64 int64_param
//@@
//@@ An int64 parameter value.
//@@
int64 int64_param = 2;
//@@ .. cpp:var:: string string_param
//@@
//@@ A string parameter value.
//@@
string string_param = 3;
//@@ .. cpp:var:: bytes bytes_param
//@@
//@@ A bytes parameter value.
//@@
bytes bytes_param = 4;
}
}
//@@
//@@.. cpp:var:: message RepositoryIndexRequest
//@@
//@@ Request message for RepositoryIndex.
//@@
message RepositoryIndexRequest
{
//@@ .. cpp:var:: string repository_name
//@@
//@@ The name of the repository. If empty the index is returned
//@@ for all repositories.
//@@
string repository_name = 1;
//@@ .. cpp:var:: bool ready
//@@
//@@ If true returned only models currently ready for inferencing.
//@@
bool ready = 2;
}
//@@
//@@.. cpp:var:: message RepositoryIndexResponse
//@@
//@@ Response message for RepositoryIndex.
//@@
message RepositoryIndexResponse
{
//@@
//@@ .. cpp:var:: message ModelIndex
//@@
//@@ Index entry for a model.
//@@
message ModelIndex
{
//@@
//@@ .. cpp:var:: string name
//@@
//@@ The name of the model.
//@@
string name = 1;
//@@ .. cpp:var:: string version
//@@
//@@ The version of the model.
//@@
string version = 2;
//@@
//@@ .. cpp:var:: string state
//@@
//@@ The state of the model.
//@@
string state = 3;
//@@
//@@ .. cpp:var:: string reason
//@@
//@@ The reason, if any, that the model is in the given state.
//@@
string reason = 4;
}
//@@
//@@ .. cpp:var:: ModelIndex models (repeated)
//@@
//@@ An index entry for each model.
//@@
repeated ModelIndex models = 1;
}
//@@
//@@.. cpp:var:: message RepositoryModelLoadRequest
//@@
//@@ Request message for RepositoryModelLoad.
//@@
message RepositoryModelLoadRequest
{
//@@ .. cpp:var:: string repository_name
//@@
//@@ The name of the repository to load from. If empty the model
//@@ is loaded from any repository.
//@@
string repository_name = 1;
//@@ .. cpp:var:: string repository_name
//@@
//@@ The name of the model to load, or reload.
//@@
string model_name = 2;
//@@ .. cpp:var:: map<string,ModelRepositoryParameter> parameters
//@@
//@@ Optional model repository request parameters.
//@@
map<string, ModelRepositoryParameter> parameters = 3;
}
//@@
//@@.. cpp:var:: message RepositoryModelLoadResponse
//@@
//@@ Response message for RepositoryModelLoad.
//@@
message RepositoryModelLoadResponse {}
//@@
//@@.. cpp:var:: message RepositoryModelUnloadRequest
//@@
//@@ Request message for RepositoryModelUnload.
//@@
message RepositoryModelUnloadRequest
{
//@@ .. cpp:var:: string repository_name
//@@
//@@ The name of the repository from which the model was originally
//@@ loaded. If empty the repository is not considered.
//@@
string repository_name = 1;
//@@ .. cpp:var:: string repository_name
//@@
//@@ The name of the model to unload.
//@@
string model_name = 2;
//@@ .. cpp:var:: map<string,ModelRepositoryParameter> parameters
//@@
//@@ Optional model repository request parameters.
//@@
map<string, ModelRepositoryParameter> parameters = 3;
}
//@@
//@@.. cpp:var:: message RepositoryModelUnloadResponse
//@@
//@@ Response message for RepositoryModelUnload.
//@@
message RepositoryModelUnloadResponse {}
//@@
//@@.. cpp:var:: message SystemSharedMemoryStatusRequest
//@@
//@@ Request message for SystemSharedMemoryStatus.
//@@
message SystemSharedMemoryStatusRequest
{
//@@
//@@ .. cpp:var:: string name
//@@
//@@ The name of the region to get status for. If empty the
//@@ status is returned for all registered regions.
//@@
string name = 1;
}
//@@
//@@.. cpp:var:: message SystemSharedMemoryStatusResponse
//@@
//@@ Response message for SystemSharedMemoryStatus.
//@@
message SystemSharedMemoryStatusResponse
{
//@@
//@@ .. cpp:var:: message RegionStatus
//@@
//@@ Status for a shared memory region.
//@@
message RegionStatus
{
//@@
//@@ .. cpp:var:: string name
//@@
//@@ The name for the shared memory region.
//@@
string name = 1;
//@@ .. cpp:var:: string shared_memory_key
//@@
//@@ The key of the underlying memory object that contains the
//@@ shared memory region.
//@@
string key = 2;
//@@ .. cpp:var:: uint64 offset
//@@
//@@ Offset, in bytes, within the underlying memory object to
//@@ the start of the shared memory region.
//@@
uint64 offset = 3;
//@@ .. cpp:var:: uint64 byte_size
//@@
//@@ Size of the shared memory region, in bytes.
//@@
uint64 byte_size = 4;
}
//@@
//@@ .. cpp:var:: map<string,RegionStatus> regions
//@@
//@@ Status for each of the registered regions, indexed by
//@@ region name.
//@@
map<string, RegionStatus> regions = 1;
}
//@@
//@@.. cpp:var:: message SystemSharedMemoryRegisterRequest
//@@
//@@ Request message for SystemSharedMemoryRegister.
//@@
message SystemSharedMemoryRegisterRequest
{
//@@
//@@ .. cpp:var:: string name
//@@
//@@ The name of the region to register.
//@@
string name = 1;
//@@ .. cpp:var:: string shared_memory_key
//@@
//@@ The key of the underlying memory object that contains the
//@@ shared memory region.
//@@
string key = 2;
//@@ .. cpp:var:: uint64 offset
//@@
//@@ Offset, in bytes, within the underlying memory object to
//@@ the start of the shared memory region.
//@@
uint64 offset = 3;
//@@ .. cpp:var:: uint64 byte_size
//@@
//@@ Size of the shared memory region, in bytes.
//@@
uint64 byte_size = 4;
}
//@@
//@@.. cpp:var:: message SystemSharedMemoryRegisterResponse
//@@
//@@ Response message for SystemSharedMemoryRegister.
//@@
message SystemSharedMemoryRegisterResponse {}
//@@
//@@.. cpp:var:: message SystemSharedMemoryUnregisterRequest
//@@
//@@ Request message for SystemSharedMemoryUnregister.
//@@
message SystemSharedMemoryUnregisterRequest
{
//@@
//@@ .. cpp:var:: string name
//@@
//@@ The name of the system region to unregister. If empty
//@@ all system shared-memory regions are unregistered.
//@@
string name = 1;
}
//@@
//@@.. cpp:var:: message SystemSharedMemoryUnregisterResponse
//@@
//@@ Response message for SystemSharedMemoryUnregister.
//@@
message SystemSharedMemoryUnregisterResponse {}
//@@
//@@.. cpp:var:: message CudaSharedMemoryStatusRequest
//@@
//@@ Request message for CudaSharedMemoryStatus.
//@@
message CudaSharedMemoryStatusRequest
{
//@@
//@@ .. cpp:var:: string name
//@@
//@@ The name of the region to get status for. If empty the
//@@ status is returned for all registered regions.
//@@
string name = 1;
}
//@@
//@@.. cpp:var:: message CudaSharedMemoryStatusResponse
//@@
//@@ Response message for CudaSharedMemoryStatus.
//@@
message CudaSharedMemoryStatusResponse
{
//@@
//@@ .. cpp:var:: message RegionStatus
//@@
//@@ Status for a shared memory region.
//@@
message RegionStatus
{
//@@
//@@ .. cpp:var:: string name
//@@
//@@ The name for the shared memory region.
//@@
string name = 1;
//@@ .. cpp:var:: uin64 device_id
//@@
//@@ The GPU device ID where the cudaIPC handle was created.
//@@
uint64 device_id = 2;
//@@ .. cpp:var:: uint64 byte_size
//@@
//@@ Size of the shared memory region, in bytes.
//@@
uint64 byte_size = 3;
}
//@@
//@@ .. cpp:var:: map<string,RegionStatus> regions
//@@
//@@ Status for each of the registered regions, indexed by
//@@ region name.
//@@
map<string, RegionStatus> regions = 1;
}
//@@
//@@.. cpp:var:: message CudaSharedMemoryRegisterRequest
//@@
//@@ Request message for CudaSharedMemoryRegister.
//@@
message CudaSharedMemoryRegisterRequest
{
//@@
//@@ .. cpp:var:: string name
//@@
//@@ The name of the region to register.
//@@
string name = 1;
//@@ .. cpp:var:: bytes raw_handle
//@@
//@@ The raw serialized cudaIPC handle.
//@@
bytes raw_handle = 2;
//@@ .. cpp:var:: int64 device_id
//@@
//@@ The GPU device ID on which the cudaIPC handle was created.
//@@
int64 device_id = 3;
//@@ .. cpp:var:: uint64 byte_size
//@@
//@@ Size of the shared memory block, in bytes.
//@@
uint64 byte_size = 4;
}
//@@
//@@.. cpp:var:: message CudaSharedMemoryRegisterResponse
//@@
//@@ Response message for CudaSharedMemoryRegister.
//@@
message CudaSharedMemoryRegisterResponse {}
//@@
//@@.. cpp:var:: message CudaSharedMemoryUnregisterRequest
//@@
//@@ Request message for CudaSharedMemoryUnregister.
//@@
message CudaSharedMemoryUnregisterRequest
{
//@@
//@@ .. cpp:var:: string name
//@@
//@@ The name of the cuda region to unregister. If empty
//@@ all cuda shared-memory regions are unregistered.
//@@
string name = 1;
}
//@@
//@@.. cpp:var:: message CudaSharedMemoryUnregisterResponse
//@@
//@@ Response message for CudaSharedMemoryUnregister.
//@@
message CudaSharedMemoryUnregisterResponse {}
//@@
//@@.. cpp:var:: message TraceSettingRequest
//@@
//@@ Request message for TraceSetting.
//@@
message TraceSettingRequest
{
//@@
//@@ .. cpp:var:: message SettingValue
//@@
//@@ The values to be associated with a trace setting.
//@@ If no value is provided, the setting will be clear and
//@@ the global setting value will be used.
//@@
message SettingValue
{
//@@
//@@ .. cpp:var:: string value (repeated)
//@@
//@@ The value.
//@@
repeated string value = 1;
}
//@@ .. cpp:var:: map<string,SettingValue> settings
//@@
//@@ The new setting values to be updated,
//@@ settings that are not specified will remain unchanged.
//@@
map<string, SettingValue> settings = 1;
//@@
//@@ .. cpp:var:: string model_name
//@@
//@@ The name of the model to apply the new trace settings.
//@@ If not given, the new settings will be applied globally.
//@@
string model_name = 2;
}
//@@
//@@.. cpp:var:: message TraceSettingResponse
//@@
//@@ Response message for TraceSetting.
//@@
message TraceSettingResponse
{
//@@
//@@ .. cpp:var:: message SettingValue
//@@
//@@ The values to be associated with a trace setting.
//@@
message SettingValue
{
//@@
//@@ .. cpp:var:: string value (repeated)
//@@
//@@ The value.
//@@
repeated string value = 1;
}
//@@ .. cpp:var:: map<string,SettingValue> settings
//@@
//@@ The current trace settings, including any changes specified
//@@ by TraceSettingRequest.
//@@
map<string, SettingValue> settings = 1;
}
//@@
//@@.. cpp:var:: message LogSettingsRequest
//@@
//@@ Request message for LogSettings.
//@@
message LogSettingsRequest
{
message SettingValue
{
oneof parameter_choice
{
//@@ .. cpp:var:: bool bool_param
//@@
//@@ A boolean parameter value.
//@@
bool bool_param = 1;
//@@ .. cpp:var:: uint32 uint32_param
//@@
//@@ An uint32 parameter value.
//@@
uint32 uint32_param = 2;
//@@ .. cpp:var:: string string_param
//@@
//@@ A string parameter value.
//@@
string string_param = 3;
}
}
//@@ .. cpp:var:: map<string,SettingValue> settings
//@@
//@@ The current log settings.
//@@
map<string, SettingValue> settings = 1;
}
//@@
//@@.. cpp:var:: message LogSettingsResponse
//@@
//@@ Response message for LogSettings.
//@@
message LogSettingsResponse
{
message SettingValue
{
oneof parameter_choice
{
//@@ .. cpp:var:: bool bool_param
//@@
//@@ A boolean parameter value.
//@@
bool bool_param = 1;
//@@ .. cpp:var:: uint32 uint32_param
//@@
//@@ An int32 parameter value.
//@@
uint32 uint32_param = 2;
//@@ .. cpp:var:: string string_param
//@@
//@@ A string parameter value.
//@@
string string_param = 3;
}
}
//@@ .. cpp:var:: map<string,SettingValue> settings
//@@
//@@ The current log settings.
//@@
map<string, SettingValue> settings = 1;
}
// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2018, TensorFlow Authors. All rights reserved.
syntax = "proto3";
package inference;
//@@.. cpp:namespace:: inference
//@@
//@@.. cpp:enum:: DataType
//@@
//@@ Data types supported for input and output tensors.
//@@
enum DataType {
//@@ .. cpp:enumerator:: DataType::INVALID = 0
TYPE_INVALID = 0;
//@@ .. cpp:enumerator:: DataType::BOOL = 1
TYPE_BOOL = 1;
//@@ .. cpp:enumerator:: DataType::UINT8 = 2
TYPE_UINT8 = 2;
//@@ .. cpp:enumerator:: DataType::UINT16 = 3
TYPE_UINT16 = 3;
//@@ .. cpp:enumerator:: DataType::UINT32 = 4
TYPE_UINT32 = 4;
//@@ .. cpp:enumerator:: DataType::UINT64 = 5
TYPE_UINT64 = 5;
//@@ .. cpp:enumerator:: DataType::INT8 = 6
TYPE_INT8 = 6;
//@@ .. cpp:enumerator:: DataType::INT16 = 7
TYPE_INT16 = 7;
//@@ .. cpp:enumerator:: DataType::INT32 = 8
TYPE_INT32 = 8;
//@@ .. cpp:enumerator:: DataType::INT64 = 9
TYPE_INT64 = 9;
//@@ .. cpp:enumerator:: DataType::FP16 = 10
TYPE_FP16 = 10;
//@@ .. cpp:enumerator:: DataType::FP32 = 11
TYPE_FP32 = 11;
//@@ .. cpp:enumerator:: DataType::FP64 = 12
TYPE_FP64 = 12;
//@@ .. cpp:enumerator:: DataType::STRING = 13
TYPE_STRING = 13;
//@@ .. cpp:enumerator:: DataType::BF16 = 14
TYPE_BF16 = 14;
}
//@@
//@@ .. cpp:var:: message ModelRateLimiter
//@@
//@@ The specifications required by the rate limiter to properly
//@@ schedule the inference requests across the different models
//@@ and their instances.
//@@
message ModelRateLimiter
{
//@@ .. cpp:var:: message Resource
//@@
//@@ The resource property.
//@@
message Resource
{
//@@ .. cpp:var:: string name
//@@
//@@ The name associated with the resource.
//@@
string name = 1;
//@@ .. cpp:var:: bool global
//@@
//@@ Whether or not the resource is global. If true then the resource
//@@ is assumed to be shared among the devices otherwise specified
//@@ count of the resource is assumed for each device associated
//@@ with the instance.
//@@
bool global = 2;
//@@ .. cpp:var:: uint32 count
//@@
//@@ The number of resources required for the execution of the model
//@@ instance.
//@@
uint32 count = 3;
}
//@@ .. cpp:var:: Resource resources (repeated)
//@@
//@@ The resources required to execute the request on a model instance.
//@@ Resources are just names with a corresponding count. The execution
//@@ of the instance will be blocked until the specificied resources are
//@@ available. By default an instance uses no rate-limiter resources.
//@@
repeated Resource resources = 1;
//@@ .. cpp:var:: uint32 priority
//@@
//@@ The optional weighting value to be used for prioritizing across
//@@ instances. An instance with priority 2 will be given 1/2 the
//@@ number of scheduling chances as an instance_group with priority
//@@ 1. The default priority is 1. The priority of value 0 will be
//@@ treated as priority 1.
//@@
uint32 priority = 2;
}
//@@
//@@.. cpp:var:: message ModelInstanceGroup
//@@
//@@ A group of one or more instances of a model and resources made
//@@ available for those instances.
//@@
message ModelInstanceGroup
{
//@@
//@@ .. cpp:enum:: Kind
//@@
//@@ Kind of this instance group.
//@@
enum Kind {
//@@ .. cpp:enumerator:: Kind::KIND_AUTO = 0
//@@
//@@ This instance group represents instances that can run on either
//@@ CPU or GPU. If all GPUs listed in 'gpus' are available then
//@@ instances will be created on GPU(s), otherwise instances will
//@@ be created on CPU.
//@@
KIND_AUTO = 0;
//@@ .. cpp:enumerator:: Kind::KIND_GPU = 1
//@@
//@@ This instance group represents instances that must run on the
//@@ GPU.
//@@
KIND_GPU = 1;
//@@ .. cpp:enumerator:: Kind::KIND_CPU = 2
//@@
//@@ This instance group represents instances that must run on the
//@@ CPU.
//@@
KIND_CPU = 2;
//@@ .. cpp:enumerator:: Kind::KIND_MODEL = 3
//@@
//@@ This instance group represents instances that should run on the
//@@ CPU and/or GPU(s) as specified by the model or backend itself.
//@@ The inference server will not override the model/backend
//@@ settings.
//@@
KIND_MODEL = 3;
}
//@@
//@@ .. cpp:var:: message SecondaryDevice
//@@
//@@ A secondary device required for a model instance.
//@@
message SecondaryDevice
{
//@@
//@@ .. cpp:enum:: SecondaryDeviceKind
//@@
//@@ The kind of the secondary device.
//@@
enum SecondaryDeviceKind {
//@@ .. cpp:enumerator:: SecondaryDeviceKind::KIND_NVDLA = 0
//@@
//@@ An NVDLA core. http://nvdla.org
//@@ Currently KIND_NVDLA is only supported by the TensorRT backend.
//@@
KIND_NVDLA = 0;
}
//@@ .. cpp:var:: SecondaryDeviceKind kind
//@@
//@@ The secondary device kind.
//@@
SecondaryDeviceKind kind = 1;
//@@ .. cpp:var:: int64 device_id
//@@
//@@ Identifier for the secondary device.
//@@
int64 device_id = 2;
}
//@@ .. cpp:var:: string name
//@@
//@@ Optional name of this group of instances. If not specified the
//@@ name will be formed as <model name>_<group number>. The name of
//@@ individual instances will be further formed by a unique instance
//@@ number and GPU index:
//@@
string name = 1;
//@@ .. cpp:var:: Kind kind
//@@
//@@ The kind of this instance group. Default is KIND_AUTO. If
//@@ KIND_AUTO or KIND_GPU then both 'count' and 'gpu' are valid and
//@@ may be specified. If KIND_CPU or KIND_MODEL only 'count' is valid
//@@ and 'gpu' cannot be specified.
//@@
Kind kind = 4;
//@@ .. cpp:var:: int32 count
//@@
//@@ For a group assigned to GPU, the number of instances created for
//@@ each GPU listed in 'gpus'. For a group assigned to CPU the number
//@@ of instances created. Default is 1.
int32 count = 2;
//@@ .. cpp:var:: ModelRateLimiter rate_limiter
//@@
//@@ The rate limiter specific settings to be associated with this
//@@ instance group. Optional, if not specified no rate limiting
//@@ will be applied to this instance group.
//@@
ModelRateLimiter rate_limiter = 6;
//@@ .. cpp:var:: int32 gpus (repeated)
//@@
//@@ GPU(s) where instances should be available. For each GPU listed,
//@@ 'count' instances of the model will be available. Setting 'gpus'
//@@ to empty (or not specifying at all) is eqivalent to listing all
//@@ available GPUs.
//@@
repeated int32 gpus = 3;
//@@ .. cpp:var:: SecondaryDevice secondary_devices (repeated)
//@@
//@@ Secondary devices that are required by instances specified by this
//@@ instance group. Optional.
//@@
repeated SecondaryDevice secondary_devices = 8;
//@@ .. cpp:var:: string profile (repeated)
//@@
//@@ For TensorRT models containing multiple optimization profile, this
//@@ parameter specifies a set of optimization profiles available to this
//@@ instance group. The inference server will choose the optimal profile
//@@ based on the shapes of the input tensors. This field should lie
//@@ between 0 and <TotalNumberOfOptimizationProfilesInPlanModel> - 1
//@@ and be specified only for TensorRT backend, otherwise an error will
//@@ be generated. If not specified, the server will select the first
//@@ optimization profile by default.
//@@
repeated string profile = 5;
//@@ .. cpp:var:: bool passive
//@@
//@@ Whether the instances within this instance group will be accepting
//@@ inference requests from the scheduler. If true, the instances will
//@@ not be added to the scheduler. Default value is false.
//@@
bool passive = 7;
//@@ .. cpp:var:: string host_policy
//@@
//@@ The host policy name that the instance to be associated with.
//@@ The default value is set to reflect the device kind of the instance,
//@@ for instance, KIND_CPU is "cpu", KIND_MODEL is "model" and
//@@ KIND_GPU is "gpu_<gpu_id>".
//@@
string host_policy = 9;
}
//@@
//@@.. cpp:var:: message ModelTensorReshape
//@@
//@@ Reshape specification for input and output tensors.
//@@
message ModelTensorReshape
{
//@@ .. cpp:var:: int64 shape (repeated)
//@@
//@@ The shape to use for reshaping.
//@@
repeated int64 shape = 1;
}
//@@
//@@.. cpp:var:: message ModelInput
//@@
//@@ An input required by the model.
//@@
message ModelInput
{
//@@
//@@ .. cpp:enum:: Format
//@@
//@@ The format for the input.
//@@
enum Format {
//@@ .. cpp:enumerator:: Format::FORMAT_NONE = 0
//@@
//@@ The input has no specific format. This is the default.
//@@
FORMAT_NONE = 0;
//@@ .. cpp:enumerator:: Format::FORMAT_NHWC = 1
//@@
//@@ HWC image format. Tensors with this format require 3 dimensions
//@@ if the model does not support batching (max_batch_size = 0) or 4
//@@ dimensions if the model does support batching (max_batch_size
//@@ >= 1). In either case the 'dims' below should only specify the
//@@ 3 non-batch dimensions (i.e. HWC or CHW).
//@@
FORMAT_NHWC = 1;
//@@ .. cpp:enumerator:: Format::FORMAT_NCHW = 2
//@@
//@@ CHW image format. Tensors with this format require 3 dimensions
//@@ if the model does not support batching (max_batch_size = 0) or 4
//@@ dimensions if the model does support batching (max_batch_size
//@@ >= 1). In either case the 'dims' below should only specify the
//@@ 3 non-batch dimensions (i.e. HWC or CHW).
//@@
FORMAT_NCHW = 2;
}
//@@ .. cpp:var:: string name
//@@
//@@ The name of the input.
//@@
string name = 1;
//@@ .. cpp:var:: DataType data_type
//@@
//@@ The data-type of the input.
//@@
DataType data_type = 2;
//@@ .. cpp:var:: Format format
//@@
//@@ The format of the input. Optional.
//@@
Format format = 3;
//@@ .. cpp:var:: int64 dims (repeated)
//@@
//@@ The dimensions/shape of the input tensor that must be provided
//@@ when invoking the inference API for this model.
//@@
repeated int64 dims = 4;
//@@ .. cpp:var:: ModelTensorReshape reshape
//@@
//@@ The shape expected for this input by the backend. The input will
//@@ be reshaped to this before being presented to the backend. The
//@@ reshape must have the same number of elements as the input shape
//@@ specified by 'dims'. Optional.
//@@
ModelTensorReshape reshape = 5;
//@@ .. cpp:var:: bool is_shape_tensor
//@@
//@@ Whether or not the input is a shape tensor to the model. This field
//@@ is currently supported only for the TensorRT model. An error will be
//@@ generated if this specification does not comply with underlying
//@@ model.
//@@
bool is_shape_tensor = 6;
//@@ .. cpp:var:: bool allow_ragged_batch
//@@
//@@ Whether or not the input is allowed to be "ragged" in a dynamically
//@@ created batch. Default is false indicating that two requests will
//@@ only be batched if this tensor has the same shape in both requests.
//@@ True indicates that two requests can be batched even if this tensor
//@@ has a different shape in each request.
//@@
bool allow_ragged_batch = 7;
//@@ .. cpp:var:: bool optional
//@@
//@@ Whether or not the input is optional for the model execution.
//@@ If true, the input is not required in the inference request.
//@@ Default value is false.
//@@
bool optional = 8;
}
//@@
//@@.. cpp:var:: message ModelOutput
//@@
//@@ An output produced by the model.
//@@
message ModelOutput
{
//@@ .. cpp:var:: string name
//@@
//@@ The name of the output.
//@@
string name = 1;
//@@ .. cpp:var:: DataType data_type
//@@
//@@ The data-type of the output.
//@@
DataType data_type = 2;
//@@ .. cpp:var:: int64 dims (repeated)
//@@
//@@ The dimensions/shape of the output tensor.
//@@
repeated int64 dims = 3;
//@@ .. cpp:var:: ModelTensorReshape reshape
//@@
//@@ The shape produced for this output by the backend. The output will
//@@ be reshaped from this to the shape specifed in 'dims' before being
//@@ returned in the inference response. The reshape must have the same
//@@ number of elements as the output shape specified by 'dims'. Optional.
//@@
ModelTensorReshape reshape = 5;
//@@ .. cpp:var:: string label_filename
//@@
//@@ The label file associated with this output. Should be specified only
//@@ for outputs that represent classifications. Optional.
//@@
string label_filename = 4;
//@@ .. cpp:var:: bool is_shape_tensor
//@@
//@@ Whether or not the output is a shape tensor to the model. This field
//@@ is currently supported only for the TensorRT model. An error will be
//@@ generated if this specification does not comply with underlying
//@@ model.
//@@
bool is_shape_tensor = 6;
}
//@@ .. cpp:var:: message BatchInput
//@@
//@@ A batch input is an additional input that must be added by
//@@ the backend based on all the requests in a batch.
//@@
message BatchInput
{
//@@
//@@ .. cpp:enum:: Kind
//@@
//@@ The kind of the batch input.
//@@
enum Kind {
//@@ .. cpp:enumerator:: Kind::BATCH_ELEMENT_COUNT = 0
//@@
//@@ The element count of the 'source_input' will be added as
//@@ input with shape [1].
//@@
BATCH_ELEMENT_COUNT = 0;
//@@ .. cpp:enumerator:: Kind::BATCH_ACCUMULATED_ELEMENT_COUNT = 1
//@@
//@@ The accumulated element count of the 'source_input' will be
//@@ added as input with shape [1]. For example, if there is a
//@@ batch of two request, each with 2 elements, an input of value
//@@ 2 will be added to the first request, and an input of value
//@@ 4 will be added to the second request.
//@@
BATCH_ACCUMULATED_ELEMENT_COUNT = 1;
//@@ .. cpp:enumerator::
//@@ Kind::BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO = 2
//@@
//@@ The accumulated element count of the 'source_input' will be
//@@ added as input with shape [1], except for the first request
//@@ in the batch. For the first request in the batch, the input
//@@ will have shape [2] where the first element is value 0.
//@@
BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO = 2;
//@@ .. cpp:enumerator:: Kind::BATCH_MAX_ELEMENT_COUNT_AS_SHAPE = 3
//@@
//@@ Among the requests in the batch, the max element count of the
//@@ 'source_input' will be added as input with shape
//@@ [max_element_count] for the first request in the batch.
//@@ For other requests, such input will be with shape [0].
//@@ The data of the tensor will be uninitialized.
//@@
BATCH_MAX_ELEMENT_COUNT_AS_SHAPE = 3;
//@@ .. cpp:enumerator:: Kind::BATCH_ITEM_SHAPE = 4
//@@
//@@ Among the requests in the batch, the shape of the
//@@ 'source_input' will be added as input with shape
//@@ [batch_size, len(input_dim)]. For example, if one
//@@ batch-2 input with shape [3, 1] and batch-1 input
//@@ with shape [2, 2] are batched, the batch input will
//@@ have shape [3, 2] and value [ [3, 1], [3, 1], [2, 2]].
//@@
BATCH_ITEM_SHAPE = 4;
//@@ .. cpp:enumerator:: Kind::BATCH_ITEM_SHAPE_FLATTEN = 5
//@@
//@@ Among the requests in the batch, the shape of the
//@@ 'source_input' will be added as input with single dimensional
//@@ shape [batch_size * len(input_dim)]. For example, if one
//@@ batch-2 input with shape [3, 1] and batch-1 input
//@@ with shape [2, 2] are batched, the batch input will
//@@ have shape [6] and value [3, 1, 3, 1, 2, 2].
//@@
BATCH_ITEM_SHAPE_FLATTEN = 5;
}
//@@ .. cpp:var:: Kind kind
//@@
//@@ The kind of this batch input.
//@@
Kind kind = 1;
//@@ .. cpp:var:: string target_name (repeated)
//@@
//@@ The name of the model inputs that the backend will create
//@@ for this batch input.
//@@
repeated string target_name = 2;
//@@ .. cpp:var:: DataType data_type
//@@
//@@ The input's datatype. The data type can be TYPE_INT32 or
//@@ TYPE_FP32.
//@@
DataType data_type = 3;
//@@ .. cpp:var:: string source_input (repeated)
//@@
//@@ The backend derives the value for each batch input from one or
//@@ more other inputs. 'source_input' gives the names of those
//@@ inputs.
//@@
repeated string source_input = 4;
}
//@@.. cpp:var:: message BatchOutput
//@@
//@@ A batch output is an output produced by the model that must be handled
//@@ differently by the backend based on all the requests in a batch.
//@@
message BatchOutput
{
//@@
//@@ .. cpp:enum:: Kind
//@@
//@@ The kind of the batch output.
//@@
enum Kind {
//@@ .. cpp:enumerator:: Kind::BATCH_SCATTER_WITH_INPUT_SHAPE = 0
//@@
//@@ The output should be scattered according to the shape of
//@@ 'source_input'. The dynamic dimension of the output will
//@@ be set to the value of the same dimension in the input.
//@@
BATCH_SCATTER_WITH_INPUT_SHAPE = 0;
}
//@@ .. cpp:var:: string target_name (repeated)
//@@
//@@ The name of the outputs to be produced by this batch output
//@@ specification.
//@@
repeated string target_name = 1;
//@@ .. cpp:var:: Kind kind
//@@
//@@ The kind of this batch output.
//@@
Kind kind = 2;
//@@ .. cpp:var:: string source_input (repeated)
//@@
//@@ The backend derives each batch output from one or more inputs.
//@@ 'source_input' gives the names of those inputs.
//@@
repeated string source_input = 3;
}
//@@
//@@.. cpp:var:: message ModelVersionPolicy
//@@
//@@ Policy indicating which versions of a model should be made
//@@ available by the inference server.
//@@
message ModelVersionPolicy
{
//@@ .. cpp:var:: message Latest
//@@
//@@ Serve only the latest version(s) of a model. This is
//@@ the default policy.
//@@
message Latest
{
//@@ .. cpp:var:: uint32 num_versions
//@@
//@@ Serve only the 'num_versions' highest-numbered versions. T
//@@ The default value of 'num_versions' is 1, indicating that by
//@@ default only the single highest-number version of a
//@@ model will be served.
//@@
uint32 num_versions = 1;
}
//@@ .. cpp:var:: message All
//@@
//@@ Serve all versions of the model.
//@@
message All {}
//@@ .. cpp:var:: message Specific
//@@
//@@ Serve only specific versions of the model.
//@@
message Specific
{
//@@ .. cpp:var:: int64 versions (repeated)
//@@
//@@ The specific versions of the model that will be served.
//@@
repeated int64 versions = 1;
}
//@@ .. cpp:var:: oneof policy_choice
//@@
//@@ Each model must implement only a single version policy. The
//@@ default policy is 'Latest'.
//@@
oneof policy_choice
{
//@@ .. cpp:var:: Latest latest
//@@
//@@ Serve only latest version(s) of the model.
//@@
Latest latest = 1;
//@@ .. cpp:var:: All all
//@@
//@@ Serve all versions of the model.
//@@
All all = 2;
//@@ .. cpp:var:: Specific specific
//@@
//@@ Serve only specific version(s) of the model.
//@@
Specific specific = 3;
}
}
//@@
//@@.. cpp:var:: message ModelOptimizationPolicy
//@@
//@@ Optimization settings for a model. These settings control if/how a
//@@ model is optimized and prioritized by the backend framework when
//@@ it is loaded.
//@@
message ModelOptimizationPolicy
{
//@@
//@@ .. cpp:var:: message Graph
//@@
//@@ Enable generic graph optimization of the model. If not specified
//@@ the framework's default level of optimization is used. Supports
//@@ TensorFlow graphdef and savedmodel and Onnx models. For TensorFlow
//@@ causes XLA to be enabled/disabled for the model. For Onnx defaults
//@@ to enabling all optimizations, -1 enables only basic optimizations,
//@@ +1 enables only basic and extended optimizations.
//@@
message Graph
{
//@@ .. cpp:var:: int32 level
//@@
//@@ The optimization level. Defaults to 0 (zero) if not specified.
//@@
//@@ - -1: Disabled
//@@ - 0: Framework default
//@@ - 1+: Enable optimization level (greater values indicate
//@@ higher optimization levels)
//@@
int32 level = 1;
}
//@@
//@@ .. cpp:enum:: ModelPriority
//@@
//@@ Model priorities. A model will be given scheduling and execution
//@@ preference over models at lower priorities. Current model
//@@ priorities only work for TensorRT models.
//@@
enum ModelPriority {
//@@ .. cpp:enumerator:: ModelPriority::PRIORITY_DEFAULT = 0
//@@
//@@ The default model priority.
//@@
PRIORITY_DEFAULT = 0;
//@@ .. cpp:enumerator:: ModelPriority::PRIORITY_MAX = 1
//@@
//@@ The maximum model priority.
//@@
PRIORITY_MAX = 1;
//@@ .. cpp:enumerator:: ModelPriority::PRIORITY_MIN = 2
//@@
//@@ The minimum model priority.
//@@
PRIORITY_MIN = 2;
}
//@@
//@@ .. cpp:var:: message Cuda
//@@
//@@ CUDA-specific optimization settings.
//@@
message Cuda
{
//@@ .. cpp:var:: message GraphSpec
//@@
//@@ Specification of the CUDA graph to be captured.
//@@
message GraphSpec
{
//@@ .. cpp:var:: message Dims
//@@
//@@ Specification of tensor dimension.
//@@
message Shape
{
//@@ .. cpp:var:: int64 dim (repeated)
//@@
//@@ The dimension.
//@@
repeated int64 dim = 1;
}
message LowerBound
{
//@@ .. cpp:var:: int32 batch_size
//@@
//@@ The batch size of the CUDA graph. If 'max_batch_size' is 0,
//@@ 'batch_size' must be set to 0. Otherwise, 'batch_size' must
//@@ be set to value between 1 and 'max_batch_size'.
//@@
int32 batch_size = 1;
//@@ .. cpp:var:: map<string, Shape> input
//@@
//@@ The specification of the inputs. 'Shape' is the shape of
//@@ the input without batching dimension.
//@@
map<string, Shape> input = 2;
}
//@@ .. cpp:var:: int32 batch_size
//@@
//@@ The batch size of the CUDA graph. If 'max_batch_size' is 0,
//@@ 'batch_size' must be set to 0. Otherwise, 'batch_size' must
//@@ be set to value between 1 and 'max_batch_size'.
//@@
int32 batch_size = 1;
//@@ .. cpp:var:: map<string, Shape> input
//@@
//@@ The specification of the inputs. 'Shape' is the shape of the
//@@ input without batching dimension.
//@@
map<string, Shape> input = 2;
//@@ .. cpp:var:: LowerBound graph_lower_bound
//@@
//@@ Specify the lower bound of the CUDA graph. Optional.
//@@ If specified, the graph can be used for input shapes and
//@@ batch sizes that are in closed interval between the lower
//@@ bound specification and graph specification. For dynamic
//@@ shape model, this allows CUDA graphs to be launched
//@@ frequently without capturing all possible shape combinations.
//@@ However, using graph for shape combinations different from
//@@ the one used for capturing introduces uninitialized data for
//@@ execution and it may distort the inference result if
//@@ the model is sensitive to uninitialized data.
//@@
LowerBound graph_lower_bound = 3;
}
//@@ .. cpp:var:: bool graphs
//@@
//@@ Use CUDA graphs API to capture model operations and execute
//@@ them more efficiently. Default value is false.
//@@ Currently only recognized by TensorRT backend.
//@@
bool graphs = 1;
//@@ .. cpp:var:: bool busy_wait_events
//@@
//@@ Use busy-waiting to synchronize CUDA events to achieve minimum
//@@ latency from event complete to host thread to be notified, with
//@@ the cost of high CPU load. Default value is false.
//@@ Currently only recognized by TensorRT backend.
//@@
bool busy_wait_events = 2;
//@@ .. cpp:var:: GraphSpec graph_spec (repeated)
//@@
//@@ Specification of the CUDA graph to be captured. If not specified
//@@ and 'graphs' is true, the default CUDA graphs will be captured
//@@ based on model settings.
//@@ Currently only recognized by TensorRT backend.
//@@
repeated GraphSpec graph_spec = 3;
//@@ .. cpp:var:: bool output_copy_stream
//@@
//@@ Uses a CUDA stream separate from the inference stream to copy the
//@@ output to host. However, be aware that setting this option to
//@@ true will lead to an increase in the memory consumption of the
//@@ model as Triton will allocate twice as much GPU memory for its
//@@ I/O tensor buffers. Default value is false.
//@@ Currently only recognized by TensorRT backend.
//@@
bool output_copy_stream = 4;
}
//@@
//@@ .. cpp:var:: message ExecutionAccelerators
//@@
//@@ Specify the preferred execution accelerators to be used to execute
//@@ the model. Currently only recognized by ONNX Runtime backend and
//@@ TensorFlow backend.
//@@
//@@ For ONNX Runtime backend, it will deploy the model with the execution
//@@ accelerators by priority, the priority is determined based on the
//@@ order that they are set, i.e. the provider at the front has highest
//@@ priority. Overall, the priority will be in the following order:
//@@ <gpu_execution_accelerator> (if instance is on GPU)
//@@ CUDA Execution Provider (if instance is on GPU)
//@@ <cpu_execution_accelerator>
//@@ Default CPU Execution Provider
//@@
message ExecutionAccelerators
{
//@@
//@@ .. cpp:var:: message Accelerator
//@@
//@@ Specify the accelerator to be used to execute the model.
//@@ Accelerator with the same name may accept different parameters
//@@ depending on the backends.
//@@
message Accelerator
{
//@@ .. cpp:var:: string name
//@@
//@@ The name of the execution accelerator.
//@@
string name = 1;
//@@ .. cpp:var:: map<string, string> parameters
//@@
//@@ Additional paremeters used to configure the accelerator.
//@@
map<string, string> parameters = 2;
}
//@@ .. cpp:var:: Accelerator gpu_execution_accelerator (repeated)
//@@
//@@ The preferred execution provider to be used if the model instance
//@@ is deployed on GPU.
//@@
//@@ For ONNX Runtime backend, possible value is "tensorrt" as name,
//@@ and no parameters are required.
//@@
//@@ For TensorFlow backend, possible values are "tensorrt",
//@@ "auto_mixed_precision", "gpu_io".
//@@
//@@ For "tensorrt", the following parameters can be specified:
//@@ "precision_mode": The precision used for optimization.
//@@ Allowed values are "FP32" and "FP16". Default value is "FP32".
//@@
//@@ "max_cached_engines": The maximum number of cached TensorRT
//@@ engines in dynamic TensorRT ops. Default value is 100.
//@@
//@@ "minimum_segment_size": The smallest model subgraph that will
//@@ be considered for optimization by TensorRT. Default value is 3.
//@@
//@@ "max_workspace_size_bytes": The maximum GPU memory the model
//@@ can use temporarily during execution. Default value is 1GB.
//@@
//@@ For "auto_mixed_precision", no parameters are required. If set,
//@@ the model will try to use FP16 for better performance.
//@@ This optimization can not be set with "tensorrt".
//@@
//@@ For "gpu_io", no parameters are required. If set, the model will
//@@ be executed using TensorFlow Callable API to set input and output
//@@ tensors in GPU memory if possible, which can reduce data transfer
//@@ overhead if the model is used in ensemble. However, the Callable
//@@ object will be created on model creation and it will request all
//@@ outputs for every model execution, which may impact the
//@@ performance if a request does not require all outputs. This
//@@ optimization will only take affect if the model instance is
//@@ created with KIND_GPU.
//@@
repeated Accelerator gpu_execution_accelerator = 1;
//@@ .. cpp:var:: Accelerator cpu_execution_accelerator (repeated)
//@@
//@@ The preferred execution provider to be used if the model instance
//@@ is deployed on CPU.
//@@
//@@ For ONNX Runtime backend, possible value is "openvino" as name,
//@@ and no parameters are required.
//@@
repeated Accelerator cpu_execution_accelerator = 2;
}
//@@
//@@ .. cpp:var:: message PinnedMemoryBuffer
//@@
//@@ Specify whether to use a pinned memory buffer when transferring data
//@@ between non-pinned system memory and GPU memory. Using a pinned
//@@ memory buffer for system from/to GPU transfers will typically provide
//@@ increased performance. For example, in the common use case where the
//@@ request provides inputs and delivers outputs via non-pinned system
//@@ memory, if the model instance accepts GPU IOs, the inputs will be
//@@ processed by two copies: from non-pinned system memory to pinned
//@@ memory, and from pinned memory to GPU memory. Similarly, pinned
//@@ memory will be used for delivering the outputs.
//@@
message PinnedMemoryBuffer
{
//@@ .. cpp:var:: bool enable
//@@
//@@ Use pinned memory buffer. Default is true.
//@@
bool enable = 1;
}
//@@ .. cpp:var:: Graph graph
//@@
//@@ The graph optimization setting for the model. Optional.
//@@
Graph graph = 1;
//@@ .. cpp:var:: ModelPriority priority
//@@
//@@ The priority setting for the model. Optional.
//@@
ModelPriority priority = 2;
//@@ .. cpp:var:: Cuda cuda
//@@
//@@ CUDA-specific optimization settings. Optional.
//@@
Cuda cuda = 3;
//@@ .. cpp:var:: ExecutionAccelerators execution_accelerators
//@@
//@@ The accelerators used for the model. Optional.
//@@
ExecutionAccelerators execution_accelerators = 4;
//@@ .. cpp:var:: PinnedMemoryBuffer input_pinned_memory
//@@
//@@ Use pinned memory buffer when the data transfer for inputs
//@@ is between GPU memory and non-pinned system memory.
//@@ Default is true.
//@@
PinnedMemoryBuffer input_pinned_memory = 5;
//@@ .. cpp:var:: PinnedMemoryBuffer output_pinned_memory
//@@
//@@ Use pinned memory buffer when the data transfer for outputs
//@@ is between GPU memory and non-pinned system memory.
//@@ Default is true.
//@@
PinnedMemoryBuffer output_pinned_memory = 6;
//@@ .. cpp:var:: uint32 gather_kernel_buffer_threshold
//@@
//@@ The backend may use a gather kernel to gather input data if the
//@@ device has direct access to the source buffer and the destination
//@@ buffer. In such case, the gather kernel will be used only if the
//@@ number of buffers to be gathered is greater or equal to
//@@ the specifed value. If 0, the gather kernel will be disabled.
//@@ Default value is 0.
//@@ Currently only recognized by TensorRT backend.
//@@
uint32 gather_kernel_buffer_threshold = 7;
//@@ .. cpp:var:: bool eager_batching
//@@
//@@ Start preparing the next batch before the model instance is ready
//@@ for the next inference. This option can be used to overlap the
//@@ batch preparation with model execution, with the trade-off that
//@@ the next batch might be smaller than what it could have been.
//@@ Default value is false.
//@@ Currently only recognized by TensorRT backend.
//@@
bool eager_batching = 8;
}
//@@
//@@.. cpp:var:: message ModelQueuePolicy
//@@
//@@ Queue policy for inference requests.
//@@
message ModelQueuePolicy
{
//@@
//@@ .. cpp:enum:: TimeoutAction
//@@
//@@ The action applied to timed-out requests.
//@@
enum TimeoutAction {
//@@ .. cpp:enumerator:: Action::REJECT = 0
//@@
//@@ Reject the request and return error message accordingly.
//@@
REJECT = 0;
//@@ .. cpp:enumerator:: Action::DELAY = 1
//@@
//@@ Delay the request until all other requests at the same
//@@ (or higher) priority levels that have not reached their timeouts
//@@ are processed. A delayed request will eventually be processed,
//@@ but may be delayed indefinitely due to newly arriving requests.
//@@
DELAY = 1;
}
//@@
//@@ .. cpp:var:: TimeoutAction timeout_action
//@@
//@@ The action applied to timed-out request.
//@@ The default action is REJECT.
//@@
TimeoutAction timeout_action = 1;
//@@
//@@ .. cpp:var:: uint64 default_timeout_microseconds
//@@
//@@ The default timeout for every request, in microseconds.
//@@ The default value is 0 which indicates that no timeout is set.
//@@
uint64 default_timeout_microseconds = 2;
//@@
//@@ .. cpp:var:: bool allow_timeout_override
//@@
//@@ Whether individual request can override the default timeout value.
//@@ When true, individual requests can set a timeout that is less than
//@@ the default timeout value but may not increase the timeout.
//@@ The default value is false.
//@@
bool allow_timeout_override = 3;
//@@
//@@ .. cpp:var:: uint32 max_queue_size
//@@
//@@ The maximum queue size for holding requests. A request will be
//@@ rejected immediately if it can't be enqueued because the queue is
//@@ full. The default value is 0 which indicates that no maximum
//@@ queue size is enforced.
//@@
uint32 max_queue_size = 4;
}
//@@
//@@.. cpp:var:: message ModelDynamicBatching
//@@
//@@ Dynamic batching configuration. These settings control how dynamic
//@@ batching operates for the model.
//@@
message ModelDynamicBatching
{
//@@ .. cpp:var:: int32 preferred_batch_size (repeated)
//@@
//@@ Preferred batch sizes for dynamic batching. If a batch of one of
//@@ these sizes can be formed it will be executed immediately. If
//@@ not specified a preferred batch size will be chosen automatically
//@@ based on model and GPU characteristics.
//@@
repeated int32 preferred_batch_size = 1;
//@@ .. cpp:var:: uint64 max_queue_delay_microseconds
//@@
//@@ The maximum time, in microseconds, a request will be delayed in
//@@ the scheduling queue to wait for additional requests for
//@@ batching. Default is 0.
//@@
uint64 max_queue_delay_microseconds = 2;
//@@ .. cpp:var:: bool preserve_ordering
//@@
//@@ Should the dynamic batcher preserve the ordering of responses to
//@@ match the order of requests received by the scheduler. Default is
//@@ false. If true, the responses will be returned in the same order as
//@@ the order of requests sent to the scheduler. If false, the responses
//@@ may be returned in arbitrary order. This option is specifically
//@@ needed when a sequence of related inference requests (i.e. inference
//@@ requests with the same correlation ID) are sent to the dynamic
//@@ batcher to ensure that the sequence responses are in the correct
//@@ order.
//@@
bool preserve_ordering = 3;
//@@ .. cpp:var:: uint32 priority_levels
//@@
//@@ The number of priority levels to be enabled for the model,
//@@ the priority level starts from 1 and 1 is the highest priority.
//@@ Requests are handled in priority order with all priority 1 requests
//@@ processed before priority 2, all priority 2 requests processed before
//@@ priority 3, etc. Requests with the same priority level will be
//@@ handled in the order that they are received.
//@@
uint32 priority_levels = 4;
//@@ .. cpp:var:: uint32 default_priority_level
//@@
//@@ The priority level used for requests that don't specify their
//@@ priority. The value must be in the range [ 1, 'priority_levels' ].
//@@
uint32 default_priority_level = 5;
//@@ .. cpp:var:: ModelQueuePolicy default_queue_policy
//@@
//@@ The default queue policy used for requests that don't require
//@@ priority handling and requests that specify priority levels where
//@@ there is no specific policy given. If not specified, a policy with
//@@ default field values will be used.
//@@
ModelQueuePolicy default_queue_policy = 6;
//@@ .. cpp:var:: map<uint32, ModelQueuePolicy> priority_queue_policy
//@@
//@@ Specify the queue policy for the priority level. The default queue
//@@ policy will be used if a priority level doesn't specify a queue
//@@ policy.
//@@
map<uint32, ModelQueuePolicy> priority_queue_policy = 7;
}
//@@
//@@.. cpp:var:: message ModelSequenceBatching
//@@
//@@ Sequence batching configuration. These settings control how sequence
//@@ batching operates for the model.
//@@
message ModelSequenceBatching
{
//@@ .. cpp:var:: message Control
//@@
//@@ A control is a signal that the sequence batcher uses to
//@@ communicate with a backend.
//@@
message Control
{
//@@
//@@ .. cpp:enum:: Kind
//@@
//@@ The kind of the control.
//@@
enum Kind {
//@@ .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_START = 0
//@@
//@@ A new sequence is/is-not starting. If true a sequence is
//@@ starting, if false a sequence is continuing. Must
//@@ specify either int32_false_true, fp32_false_true or
//@@ bool_false_true for this control. This control is optional.
//@@
CONTROL_SEQUENCE_START = 0;
//@@ .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_READY = 1
//@@
//@@ A sequence is/is-not ready for inference. If true the
//@@ input tensor data is valid and should be used. If false
//@@ the input tensor data is invalid and inferencing should
//@@ be "skipped". Must specify either int32_false_true,
//@@ fp32_false_true or bool_false_true for this control. This
//@@ control is optional.
//@@
CONTROL_SEQUENCE_READY = 1;
//@@ .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_END = 2
//@@
//@@ A sequence is/is-not ending. If true a sequence is
//@@ ending, if false a sequence is continuing. Must specify
//@@ either int32_false_true, fp32_false_true or bool_false_true
//@@ for this control. This control is optional.
//@@
CONTROL_SEQUENCE_END = 2;
//@@ .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_CORRID = 3
//@@
//@@ The correlation ID of the sequence. The correlation ID
//@@ is an uint64_t value that is communicated in whole or
//@@ in part by the tensor. The tensor's datatype must be
//@@ specified by data_type and must be TYPE_UINT64, TYPE_INT64,
//@@ TYPE_UINT32 or TYPE_INT32. If a 32-bit datatype is specified
//@@ the correlation ID will be truncated to the low-order 32
//@@ bits. This control is optional.
//@@
CONTROL_SEQUENCE_CORRID = 3;
}
//@@ .. cpp:var:: Kind kind
//@@
//@@ The kind of this control.
//@@
Kind kind = 1;
//@@ .. cpp:var:: int32 int32_false_true (repeated)
//@@
//@@ The control's true and false setting is indicated by setting
//@@ a value in an int32 tensor. The tensor must be a
//@@ 1-dimensional tensor with size equal to the batch size of
//@@ the request. 'int32_false_true' must have two entries: the
//@@ first the false value and the second the true value.
//@@
repeated int32 int32_false_true = 2;
//@@ .. cpp:var:: float fp32_false_true (repeated)
//@@
//@@ The control's true and false setting is indicated by setting
//@@ a value in a fp32 tensor. The tensor must be a
//@@ 1-dimensional tensor with size equal to the batch size of
//@@ the request. 'fp32_false_true' must have two entries: the
//@@ first the false value and the second the true value.
//@@
repeated float fp32_false_true = 3;
//@@ .. cpp:var:: bool bool_false_true (repeated)
//@@
//@@ The control's true and false setting is indicated by setting
//@@ a value in a bool tensor. The tensor must be a
//@@ 1-dimensional tensor with size equal to the batch size of
//@@ the request. 'bool_false_true' must have two entries: the
//@@ first the false value and the second the true value.
//@@
repeated bool bool_false_true = 5;
//@@ .. cpp:var:: DataType data_type
//@@
//@@ The control's datatype.
//@@
DataType data_type = 4;
}
//@@ .. cpp:var:: message ControlInput
//@@
//@@ The sequence control values to communicate by a model input.
//@@
message ControlInput
{
//@@ .. cpp:var:: string name
//@@
//@@ The name of the model input.
//@@
string name = 1;
//@@ .. cpp:var:: Control control (repeated)
//@@
//@@ The control value(s) that should be communicated to the
//@@ model using this model input.
//@@
repeated Control control = 2;
}
//@@
//@@ .. cpp:var:: message InitialState
//@@
//@@ Settings used to initialize data for implicit state.
//@@
message InitialState
{
//@@ .. cpp:var:: DataType data_type
//@@
//@@ The data-type of the state.
//@@
DataType data_type = 1;
//@@ .. cpp:var:: int64 dims (repeated)
//@@
//@@ The shape of the state tensor, not including the batch dimension.
//@@
repeated int64 dims = 2;
//@@ .. cpp:var:: oneof state_data
//@@
//@@ Specify how the initial state data is generated.
//@@
oneof state_data
{
//@@
//@@ .. cpp:var:: bool zero_data
//@@
//@@ The identifier for using zeros as initial state data.
//@@ Note that the value of 'zero_data' will not be checked,
//@@ instead, zero data will be used as long as the field is set.
//@@
bool zero_data = 3;
//@@ .. cpp:var:: string data_file
//@@
//@@ The file whose content will be used as the initial data for
//@@ the state in row-major order. The file must be provided in
//@@ sub-directory 'initial_state' under the model directory.
//@@
string data_file = 4;
}
//@@ .. cpp:var:: string name
//@@
//@@ The name of the state initialization.
//@@
string name = 5;
}
//@@ .. cpp:var:: message State
//@@
//@@ An input / output pair of tensors that carry state for the sequence.
//@@
message State
{
//@@ .. cpp:var:: string input_name
//@@
//@@ The name of the model state input.
//@@
string input_name = 1;
//@@ .. cpp:var:: string output_name
//@@
//@@ The name of the model state output.
//@@
string output_name = 2;
//@@ .. cpp:var:: DataType data_type
//@@
//@@ The data-type of the state.
//@@
DataType data_type = 3;
//@@ .. cpp:var:: int64 dim (repeated)
//@@
//@@ The dimension.
//@@
repeated int64 dims = 4;
//@@ .. cpp:var:: InitialState initial_state (repeated)
//@@
//@@ The optional field to specify the initial state for the model.
//@@
repeated InitialState initial_state = 5;
}
//@@ .. cpp:var:: message StrategyDirect
//@@
//@@ The sequence batcher uses a specific, unique batch
//@@ slot for each sequence. All inference requests in a
//@@ sequence are directed to the same batch slot in the same
//@@ model instance over the lifetime of the sequence. This
//@@ is the default strategy.
//@@
message StrategyDirect
{
//@@ .. cpp:var:: uint64 max_queue_delay_microseconds
//@@
//@@ The maximum time, in microseconds, a candidate request
//@@ will be delayed in the sequence batch scheduling queue to
//@@ wait for additional requests for batching. Default is 0.
//@@
uint64 max_queue_delay_microseconds = 1;
//@@ .. cpp:var:: float minimum_slot_utilization
//@@
//@@ The minimum slot utilization that must be satisfied to
//@@ execute the batch before 'max_queue_delay_microseconds' expires.
//@@ For example, a value of 0.5 indicates that the batch should be
//@@ executed as soon as 50% or more of the slots are ready even if
//@@ the 'max_queue_delay_microseconds' timeout has not expired.
//@@ The default is 0.0, indicating that a batch will be executed
//@@ before 'max_queue_delay_microseconds' timeout expires if at least
//@@ one batch slot is ready. 'max_queue_delay_microseconds' will be
//@@ ignored unless minimum_slot_utilization is set to a non-zero
//@@ value.
//@@
float minimum_slot_utilization = 2;
}
//@@ .. cpp:var:: message StrategyOldest
//@@
//@@ The sequence batcher maintains up to 'max_candidate_sequences'
//@@ candidate sequences. 'max_candidate_sequences' can be greater
//@@ than the model's 'max_batch_size'. For inferencing the batcher
//@@ chooses from the candidate sequences up to 'max_batch_size'
//@@ inference requests. Requests are chosen in an oldest-first
//@@ manner across all candidate sequences. A given sequence is
//@@ not guaranteed to be assigned to the same batch slot for
//@@ all inference requests of that sequence.
//@@
message StrategyOldest
{
//@@ .. cpp:var:: int32 max_candidate_sequences
//@@
//@@ Maximum number of candidate sequences that the batcher
//@@ maintains. Excess seqences are kept in an ordered backlog
//@@ and become candidates when existing candidate sequences
//@@ complete.
//@@
int32 max_candidate_sequences = 1;
//@@ .. cpp:var:: int32 preferred_batch_size (repeated)
//@@
//@@ Preferred batch sizes for dynamic batching of candidate
//@@ sequences. If a batch of one of these sizes can be formed
//@@ it will be executed immediately. If not specified a
//@@ preferred batch size will be chosen automatically
//@@ based on model and GPU characteristics.
//@@
repeated int32 preferred_batch_size = 2;
//@@ .. cpp:var:: uint64 max_queue_delay_microseconds
//@@
//@@ The maximum time, in microseconds, a candidate request
//@@ will be delayed in the dynamic batch scheduling queue to
//@@ wait for additional requests for batching. Default is 0.
//@@
uint64 max_queue_delay_microseconds = 3;
}
//@@ .. cpp:var:: oneof strategy_choice
//@@
//@@ The strategy used by the sequence batcher. Default strategy
//@@ is 'direct'.
//@@
oneof strategy_choice
{
//@@ .. cpp:var:: StrategyDirect direct
//@@
//@@ StrategyDirect scheduling strategy.
//@@
StrategyDirect direct = 3;
//@@ .. cpp:var:: StrategyOldest oldest
//@@
//@@ StrategyOldest scheduling strategy.
//@@
StrategyOldest oldest = 4;
}
//@@ .. cpp:var:: uint64 max_sequence_idle_microseconds
//@@
//@@ The maximum time, in microseconds, that a sequence is allowed to
//@@ be idle before it is aborted. The inference server considers a
//@@ sequence idle when it does not have any inference request queued
//@@ for the sequence. If this limit is exceeded, the inference server
//@@ will free the sequence slot allocated by the sequence and make it
//@@ available for another sequence. If not specified (or specified as
//@@ zero) a default value of 1000000 (1 second) is used.
//@@
uint64 max_sequence_idle_microseconds = 1;
//@@ .. cpp:var:: ControlInput control_input (repeated)
//@@
//@@ The model input(s) that the server should use to communicate
//@@ sequence start, stop, ready and similar control values to the
//@@ model.
//@@
repeated ControlInput control_input = 2;
//@@ .. cpp:var:: State state (repeated)
//@@
//@@ The optional state that can be stored in Triton for performing
//@@ inference requests on a sequence. Each sequence holds an implicit
//@@ state local to itself. The output state tensor provided by the
//@@ model in 'output_name' field of the current inference request will
//@@ be transferred as an input tensor named 'input_name' in the next
//@@ request of the same sequence. The input state of the first request
//@@ in the sequence contains garbage data.
//@@
repeated State state = 5;
}
//@@
//@@.. cpp:var:: message ModelEnsembling
//@@
//@@ Model ensembling configuration. These settings specify the models that
//@@ compose the ensemble and how data flows between the models.
//@@
message ModelEnsembling
{
//@@ .. cpp:var:: message Step
//@@
//@@ Each step specifies a model included in the ensemble,
//@@ maps ensemble tensor names to the model input tensors,
//@@ and maps model output tensors to ensemble tensor names
//@@
message Step
{
//@@ .. cpp:var:: string model_name
//@@
//@@ The name of the model to execute for this step of the ensemble.
//@@
string model_name = 1;
//@@ .. cpp:var:: int64 model_version
//@@
//@@ The version of the model to use for inference. If -1
//@@ the latest/most-recent version of the model is used.
//@@
int64 model_version = 2;
//@@ .. cpp:var:: map<string,string> input_map
//@@
//@@ Map from name of an input tensor on this step's model to ensemble
//@@ tensor name. The ensemble tensor must have the same data type and
//@@ shape as the model input. Each model input must be assigned to
//@@ one ensemble tensor, but the same ensemble tensor can be assigned
//@@ to multiple model inputs.
//@@
map<string, string> input_map = 3;
//@@ .. cpp:var:: map<string,string> output_map
//@@
//@@ Map from name of an output tensor on this step's model to ensemble
//@@ tensor name. The data type and shape of the ensemble tensor will
//@@ be inferred from the model output. It is optional to assign all
//@@ model outputs to ensemble tensors. One ensemble tensor name
//@@ can appear in an output map only once.
//@@
map<string, string> output_map = 4;
}
//@@ .. cpp:var:: Step step (repeated)
//@@
//@@ The models and the input / output mappings used within the ensemble.
//@@
repeated Step step = 1;
}
//@@
//@@.. cpp:var:: message ModelParameter
//@@
//@@ A model parameter.
//@@
message ModelParameter
{
//@@ .. cpp:var:: string string_value
//@@
//@@ The string value of the parameter.
//@@
string string_value = 1;
}
//@@
//@@.. cpp:var:: message ModelWarmup
//@@
//@@ Settings used to construct the request sample for model warmup.
//@@
message ModelWarmup
{
//@@
//@@ .. cpp:var:: message Input
//@@
//@@ Meta data associated with an input.
//@@
message Input
{
//@@ .. cpp:var:: DataType data_type
//@@
//@@ The data-type of the input.
//@@
DataType data_type = 1;
//@@ .. cpp:var:: int64 dims (repeated)
//@@
//@@ The shape of the input tensor, not including the batch dimension.
//@@
repeated int64 dims = 2;
//@@ .. cpp:var:: oneof input_data_type
//@@
//@@ Specify how the input data is generated. If the input has STRING
//@@ data type and 'random_data' is set, the data generation will fall
//@@ back to 'zero_data'.
//@@
oneof input_data_type
{
//@@
//@@ .. cpp:var:: bool zero_data
//@@
//@@ The identifier for using zeros as input data. Note that the
//@@ value of 'zero_data' will not be checked, instead, zero data
//@@ will be used as long as the field is set.
//@@
bool zero_data = 3;
//@@
//@@ .. cpp:var:: bool random_data
//@@
//@@ The identifier for using random data as input data. Note that
//@@ the value of 'random_data' will not be checked, instead,
//@@ random data will be used as long as the field is set.
//@@
bool random_data = 4;
//@@ .. cpp:var:: string input_data_file
//@@
//@@ The file whose content will be used as raw input data in
//@@ row-major order. The file must be provided in a sub-directory
//@@ 'warmup' under the model directory. The file contents should be
//@@ in binary format. For TYPE_STRING data-type, an element is
//@@ represented by a 4-byte unsigned integer giving the length
//@@ followed by the actual bytes.
//@@
string input_data_file = 5;
}
}
//@@ .. cpp:var:: string name
//@@
//@@ The name of the request sample.
//@@
string name = 1;
//@@ .. cpp:var:: uint32 batch_size
//@@
//@@ The batch size of the inference request. This must be >= 1. For
//@@ models that don't support batching, batch_size must be 1. If
//@@ batch_size > 1, the 'inputs' specified below will be duplicated to
//@@ match the batch size requested.
//@@
uint32 batch_size = 2;
//@@ .. cpp:var:: map<string, Input> inputs
//@@
//@@ The warmup meta data associated with every model input, including
//@@ control tensors.
//@@
map<string, Input> inputs = 3;
//@@ .. cpp:var:: uint32 count
//@@
//@@ The number of iterations that this warmup sample will be executed.
//@@ For example, if this field is set to 2, 2 model executions using this
//@@ sample will be scheduled for warmup. Default value is 0 which
//@@ indicates that this sample will be used only once.
//@@ Note that for sequence model, 'count' may not work well
//@@ because the model often expect a valid sequence of requests which
//@@ should be represented by a series of warmup samples. 'count > 1'
//@@ essentially "resends" one of the sample, which may invalidate the
//@@ sequence and result in unexpected warmup failure.
//@@
uint32 count = 4;
}
//@@
//@@ .. cpp:var:: message ModelOperations
//@@
//@@ The metadata of libraries providing custom operations for this model.
//@@
message ModelOperations
{
//@@ .. cpp:var:: string op_library_filename (repeated)
//@@
//@@ Optional paths of the libraries providing custom operations for
//@@ this model. Valid only for ONNX models.
//@@
repeated string op_library_filename = 1;
}
//@@
//@@ .. cpp:var:: message ModelTransactionPolicy
//@@
//@@ The specification that describes the nature of transactions
//@@ to be expected from the model.
//@@
message ModelTransactionPolicy
{
//@@ .. cpp:var:: bool decoupled
//@@
//@@ Indicates whether responses generated by the model are decoupled with
//@@ the requests issued to it, which means the number of responses
//@@ generated by model may differ from number of requests issued, and
//@@ that the responses may be out of order relative to the order of
//@@ requests. The default is false, which means the model will generate
//@@ exactly one response for each request.
//@@
bool decoupled = 1;
}
//@@
//@@.. cpp:var:: message ModelRepositoryAgents
//@@
//@@ The repository agents for the model.
//@@
message ModelRepositoryAgents
{
//@@
//@@ .. cpp:var:: message Agent
//@@
//@@ A repository agent that should be invoked for the specified
//@@ repository actions for this model.
//@@
message Agent
{
//@@ .. cpp:var:: string name
//@@
//@@ The name of the agent.
//@@
string name = 1;
//@@ .. cpp:var:: map<string, string> parameters
//@@
//@@ The parameters for the agent.
//@@
map<string, string> parameters = 2;
}
//@@
//@@ .. cpp:var:: Agent agents (repeated)
//@@
//@@ The ordered list of agents for the model. These agents will be
//@@ invoked in order to respond to repository actions occuring for the
//@@ model.
//@@
repeated Agent agents = 1;
}
//@@
//@@.. cpp:var:: message ModelResponseCache
//@@
//@@ The response cache setting for the model.
//@@
message ModelResponseCache
{
//@@
//@@ .. cpp::var:: bool enable
//@@
//@@ Whether or not to use response cache for the model. If True, the
//@@ responses from the model are cached and when identical request
//@@ is encountered, instead of going through the model execution,
//@@ the response from the cache is utilized. By default, response
//@@ cache is disabled for the models.
//@@
bool enable = 1;
}
//@@
//@@.. cpp:var:: message ModelConfig
//@@
//@@ A model configuration.
//@@
message ModelConfig
{
//@@ .. cpp:var:: string name
//@@
//@@ The name of the model.
//@@
string name = 1;
//@@ .. cpp:var:: string platform
//@@
//@@ The framework for the model. Possible values are
//@@ "tensorrt_plan", "tensorflow_graphdef",
//@@ "tensorflow_savedmodel", "onnxruntime_onnx",
//@@ "pytorch_libtorch".
//@@
string platform = 2;
//@@ .. cpp:var:: string backend
//@@
//@@ The backend used by the model.
//@@
string backend = 17;
//@@ .. cpp:var:: ModelVersionPolicy version_policy
//@@
//@@ Policy indicating which version(s) of the model will be served.
//@@
ModelVersionPolicy version_policy = 3;
//@@ .. cpp:var:: int32 max_batch_size
//@@
//@@ Maximum batch size allowed for inference. This can only decrease
//@@ what is allowed by the model itself. A max_batch_size value of 0
//@@ indicates that batching is not allowed for the model and the
//@@ dimension/shape of the input and output tensors must exactly
//@@ match what is specified in the input and output configuration. A
//@@ max_batch_size value > 0 indicates that batching is allowed and
//@@ so the model expects the input tensors to have an additional
//@@ initial dimension for the batching that is not specified in the
//@@ input (for example, if the model supports batched inputs of
//@@ 2-dimensional tensors then the model configuration will specify
//@@ the input shape as [ X, Y ] but the model will expect the actual
//@@ input tensors to have shape [ N, X, Y ]). For max_batch_size > 0
//@@ returned outputs will also have an additional initial dimension
//@@ for the batch.
//@@
int32 max_batch_size = 4;
//@@ .. cpp:var:: ModelInput input (repeated)
//@@
//@@ The inputs request by the model.
//@@
repeated ModelInput input = 5;
//@@ .. cpp:var:: ModelOutput output (repeated)
//@@
//@@ The outputs produced by the model.
//@@
repeated ModelOutput output = 6;
//@@ .. cpp:var:: BatchInput batch_input (repeated)
//@@
//@@ The model input(s) that the server should use to communicate
//@@ batch related values to the model.
//@@
repeated BatchInput batch_input = 20;
//@@ .. cpp:var:: BatchOutput batch_output (repeated)
//@@
//@@ The outputs produced by the model that requires special handling
//@@ by the model backend.
//@@
repeated BatchOutput batch_output = 21;
//@@ .. cpp:var:: ModelOptimizationPolicy optimization
//@@
//@@ Optimization configuration for the model. If not specified
//@@ then default optimization policy is used.
//@@
ModelOptimizationPolicy optimization = 12;
//@@ .. cpp:var:: oneof scheduling_choice
//@@
//@@ The scheduling policy for the model. If not specified the
//@@ default scheduling policy is used for the model. The default
//@@ policy is to execute each inference request independently.
//@@
oneof scheduling_choice
{
//@@ .. cpp:var:: ModelDynamicBatching dynamic_batching
//@@
//@@ If specified, enables the dynamic-batching scheduling
//@@ policy. With dynamic-batching the scheduler may group
//@@ together independent requests into a single batch to
//@@ improve inference throughput.
//@@
ModelDynamicBatching dynamic_batching = 11;
//@@ .. cpp:var:: ModelSequenceBatching sequence_batching
//@@
//@@ If specified, enables the sequence-batching scheduling
//@@ policy. With sequence-batching, inference requests
//@@ with the same correlation ID are routed to the same
//@@ model instance. Multiple sequences of inference requests
//@@ may be batched together into a single batch to
//@@ improve inference throughput.
//@@
ModelSequenceBatching sequence_batching = 13;
//@@ .. cpp:var:: ModelEnsembling ensemble_scheduling
//@@
//@@ If specified, enables the model-ensembling scheduling
//@@ policy. With model-ensembling, inference requests
//@@ will be processed according to the specification, such as an
//@@ execution sequence of models. The input specified in this model
//@@ config will be the input for the ensemble, and the output
//@@ specified will be the output of the ensemble.
//@@
ModelEnsembling ensemble_scheduling = 15;
}
//@@ .. cpp:var:: ModelInstanceGroup instance_group (repeated)
//@@
//@@ Instances of this model. If not specified, one instance
//@@ of the model will be instantiated on each available GPU.
//@@
repeated ModelInstanceGroup instance_group = 7;
//@@ .. cpp:var:: string default_model_filename
//@@
//@@ Optional filename of the model file to use if a
//@@ compute-capability specific model is not specified in
//@@ :cpp:var:`cc_model_filenames`. If not specified the default name
//@@ is 'model.graphdef', 'model.savedmodel', 'model.plan' or
//@@ 'model.pt' depending on the model type.
//@@
string default_model_filename = 8;
//@@ .. cpp:var:: map<string,string> cc_model_filenames
//@@
//@@ Optional map from CUDA compute capability to the filename of
//@@ the model that supports that compute capability. The filename
//@@ refers to a file within the model version directory.
//@@
map<string, string> cc_model_filenames = 9;
//@@ .. cpp:var:: map<string,string> metric_tags
//@@
//@@ Optional metric tags. User-specific key-value pairs for metrics
//@@ reported for this model. These tags are applied to the metrics
//@@ reported on the HTTP metrics port.
//@@
map<string, string> metric_tags = 10;
//@@ .. cpp:var:: map<string,ModelParameter> parameters
//@@
//@@ Optional model parameters. User-specified parameter values.
//@@
map<string, ModelParameter> parameters = 14;
//@@ .. cpp:var:: ModelWarmup model_warmup (repeated)
//@@
//@@ Warmup setting of this model. If specified, all instances
//@@ will be run with the request samples in sequence before
//@@ serving the model.
//@@ This field can only be specified if the model is not an ensemble
//@@ model.
//@@
repeated ModelWarmup model_warmup = 16;
//@@ .. cpp:var:: ModelOperations model_operations
//@@
//@@ Optional metadata of the libraries providing custom operations for
//@@ this model.
//@@
ModelOperations model_operations = 18;
//@@ .. cpp:var:: ModelTransactionPolicy model_transaction_policy
//@@
//@@ Optional specification that describes the nature of transactions
//@@ to be expected from the model.
//@@
ModelTransactionPolicy model_transaction_policy = 19;
//@@ .. cpp:var:: ModelRepositoryAgents model_repository_agents
//@@
//@@ Optional specification of the agent(s) that should be invoked
//@@ with repository actions are performed for this model.
//@@
ModelRepositoryAgents model_repository_agents = 23;
//@@ .. cpp:var:: ModelResponseCache response_cache
//@@
//@@ Optional setting for utilizing the response cache for this
//@@ model.
//@@
ModelResponseCache response_cache = 24;
}
// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "triton/common/async_work_queue.h"
namespace triton { namespace common {
AsyncWorkQueue::~AsyncWorkQueue()
{
GetSingleton()->thread_pool_.reset();
}
AsyncWorkQueue*
AsyncWorkQueue::GetSingleton()
{
static AsyncWorkQueue singleton;
return &singleton;
}
Error
AsyncWorkQueue::Initialize(size_t worker_count)
{
if (worker_count < 1) {
return Error(
Error::Code::INVALID_ARG,
"Async work queue must be initialized with positive 'worker_count'");
}
static std::mutex init_mtx;
std::lock_guard<std::mutex> lk(init_mtx);
if (GetSingleton()->thread_pool_) {
return Error(
Error::Code::ALREADY_EXISTS,
"Async work queue has been initialized with " +
std::to_string(GetSingleton()->thread_pool_->Size()) +
" 'worker_count'");
}
GetSingleton()->thread_pool_.reset(new ThreadPool(worker_count));
return Error::Success;
}
size_t
AsyncWorkQueue::WorkerCount()
{
if (!GetSingleton()->thread_pool_) {
return 0;
}
return GetSingleton()->thread_pool_->Size();
}
Error
AsyncWorkQueue::AddTask(std::function<void(void)>&& task)
{
if (!GetSingleton()->thread_pool_) {
return Error(
Error::Code::UNAVAILABLE,
"Async work queue must be initialized before adding task");
}
GetSingleton()->thread_pool_->Enqueue(std::move(task));
return Error::Success;
}
void
AsyncWorkQueue::Reset()
{
// Reconstruct the singleton to reset it
GetSingleton()->~AsyncWorkQueue();
new (GetSingleton()) AsyncWorkQueue();
}
}} // namespace triton::common
// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "triton/common/error.h"
namespace triton { namespace common {
const Error Error::Success(Error::Code::SUCCESS);
std::string
Error::AsString() const
{
std::string str(CodeString(code_));
str += ": " + msg_;
return str;
}
const char*
Error::CodeString(const Code code)
{
switch (code) {
case Error::Code::SUCCESS:
return "OK";
case Error::Code::UNKNOWN:
return "Unknown";
case Error::Code::INTERNAL:
return "Internal";
case Error::Code::NOT_FOUND:
return "Not found";
case Error::Code::INVALID_ARG:
return "Invalid argument";
case Error::Code::UNAVAILABLE:
return "Unavailable";
case Error::Code::UNSUPPORTED:
return "Unsupported";
case Error::Code::ALREADY_EXISTS:
return "Already exists";
default:
break;
}
return "<invalid code>";
}
}} // namespace triton::common
// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "triton/common/logging.h"
#ifdef _WIN32
// suppress the min and max definitions in Windef.h.
#define NOMINMAX
#include <Windows.h>
#else
#include <sys/time.h>
#include <sys/types.h>
#include <time.h>
#include <unistd.h>
#endif
#include <algorithm>
#include <iomanip>
#include <iostream>
namespace triton { namespace common {
Logger gLogger_;
Logger::Logger()
: enables_{true, true, true}, vlevel_(0), format_(Format::kDEFAULT)
{
}
void
Logger::Log(const std::string& msg)
{
const std::lock_guard<std::mutex> lock(mutex_);
if (file_stream_.is_open()) {
file_stream_ << msg << std::endl;
} else {
std::cerr << msg << std::endl;
}
}
void
Logger::Flush()
{
std::cerr << std::flush;
}
const std::vector<char> LogMessage::level_name_{'E', 'W', 'I'};
LogMessage::LogMessage(const char* file, int line, uint32_t level)
{
std::string path(file);
size_t pos = path.rfind('/');
if (pos != std::string::npos) {
path = path.substr(pos + 1, std::string::npos);
}
// 'L' below is placeholder for showing log level
switch (gLogger_.LogFormat()) {
case Logger::Format::kDEFAULT: {
// LMMDD hh:mm:ss.ssssss
#ifdef _WIN32
SYSTEMTIME system_time;
GetSystemTime(&system_time);
stream_ << level_name_[std::min(level, (uint32_t)Level::kINFO)]
<< std::setfill('0') << std::setw(2) << system_time.wMonth
<< std::setw(2) << system_time.wDay << ' ' << std::setw(2)
<< system_time.wHour << ':' << std::setw(2) << system_time.wMinute
<< ':' << std::setw(2) << system_time.wSecond << '.'
<< std::setw(6) << system_time.wMilliseconds * 1000 << ' '
<< static_cast<uint32_t>(GetCurrentProcessId()) << ' ' << path
<< ':' << line << "] ";
#else
struct timeval tv;
gettimeofday(&tv, NULL);
struct tm tm_time;
gmtime_r(((time_t*)&(tv.tv_sec)), &tm_time);
stream_ << level_name_[std::min(level, (uint32_t)Level::kINFO)]
<< std::setfill('0') << std::setw(2) << (tm_time.tm_mon + 1)
<< std::setw(2) << tm_time.tm_mday << ' ' << std::setw(2)
<< tm_time.tm_hour << ':' << std::setw(2) << tm_time.tm_min << ':'
<< std::setw(2) << tm_time.tm_sec << '.' << std::setw(6)
<< tv.tv_usec << ' ' << static_cast<uint32_t>(getpid()) << ' '
<< path << ':' << line << "] ";
#endif
break;
}
case Logger::Format::kISO8601: {
// YYYY-MM-DDThh:mm:ssZ L
#ifdef _WIN32
SYSTEMTIME system_time;
GetSystemTime(&system_time);
stream_ << system_time.wYear << '-' << std::setfill('0') << std::setw(2)
<< system_time.wMonth << '-' << std::setw(2) << system_time.wDay
<< 'T' << std::setw(2) << system_time.wHour << ':' << std::setw(2)
<< system_time.wMinute << ':' << std::setw(2)
<< system_time.wSecond << "Z "
<< level_name_[std::min(level, (uint32_t)Level::kINFO)] << ' '
<< static_cast<uint32_t>(GetCurrentProcessId()) << ' ' << path
<< ':' << line << "] ";
#else
struct timeval tv;
gettimeofday(&tv, NULL);
struct tm tm_time;
gmtime_r(((time_t*)&(tv.tv_sec)), &tm_time);
stream_ << (tm_time.tm_year + 1900) << '-' << std::setfill('0')
<< std::setw(2) << (tm_time.tm_mon + 1) << '-' << std::setw(2)
<< tm_time.tm_mday << 'T' << std::setw(2) << tm_time.tm_hour
<< ':' << std::setw(2) << tm_time.tm_min << ':' << std::setw(2)
<< tm_time.tm_sec << "Z "
<< level_name_[std::min(level, (uint32_t)Level::kINFO)] << ' '
<< static_cast<uint32_t>(getpid()) << ' ' << path << ':' << line
<< "] ";
#endif
break;
}
}
}
LogMessage::~LogMessage()
{
gLogger_.Log(stream_.str());
}
}} // namespace triton::common
// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "triton/common/model_config.h"
namespace triton { namespace common {
bool
IsFixedSizeDataType(const inference::DataType dtype)
{
return dtype != inference::DataType::TYPE_STRING;
}
size_t
GetDataTypeByteSize(const inference::DataType dtype)
{
switch (dtype) {
case inference::DataType::TYPE_BOOL:
return 1;
case inference::DataType::TYPE_UINT8:
return 1;
case inference::DataType::TYPE_UINT16:
return 2;
case inference::DataType::TYPE_UINT32:
return 4;
case inference::DataType::TYPE_UINT64:
return 8;
case inference::DataType::TYPE_INT8:
return 1;
case inference::DataType::TYPE_INT16:
return 2;
case inference::DataType::TYPE_INT32:
return 4;
case inference::DataType::TYPE_INT64:
return 8;
case inference::DataType::TYPE_FP16:
return 2;
case inference::DataType::TYPE_FP32:
return 4;
case inference::DataType::TYPE_FP64:
return 8;
case inference::DataType::TYPE_STRING:
return 0;
case inference::DataType::TYPE_BF16:
return 2;
default:
break;
}
return 0;
}
int64_t
GetElementCount(const DimsList& dims)
{
bool first = true;
int64_t cnt = 0;
for (auto dim : dims) {
if (dim == WILDCARD_DIM) {
return -1;
}
if (first) {
cnt = dim;
first = false;
} else {
cnt *= dim;
}
}
return cnt;
}
int64_t
GetElementCount(const std::vector<int64_t>& dims)
{
bool first = true;
int64_t cnt = 0;
for (auto dim : dims) {
if (dim == WILDCARD_DIM) {
return -1;
}
if (first) {
cnt = dim;
first = false;
} else {
cnt *= dim;
}
}
return cnt;
}
int64_t
GetElementCount(const inference::ModelInput& mio)
{
return GetElementCount(mio.dims());
}
int64_t
GetElementCount(const inference::ModelOutput& mio)
{
return GetElementCount(mio.dims());
}
int64_t
GetByteSize(const inference::DataType& dtype, const DimsList& dims)
{
size_t dt_size = GetDataTypeByteSize(dtype);
if (dt_size == 0) {
return -1;
}
int64_t cnt = GetElementCount(dims);
if (cnt == -1) {
return -1;
}
return cnt * dt_size;
}
int64_t
GetByteSize(const inference::DataType& dtype, const std::vector<int64_t>& dims)
{
size_t dt_size = GetDataTypeByteSize(dtype);
if (dt_size == 0) {
return -1;
}
int64_t cnt = GetElementCount(dims);
if (cnt == -1) {
return -1;
}
return cnt * dt_size;
}
int64_t
GetByteSize(
const int batch_size, const inference::DataType& dtype,
const DimsList& dims)
{
if (dims.size() == 0) {
return batch_size * GetDataTypeByteSize(dtype);
}
int64_t bs = GetByteSize(dtype, dims);
if (bs == -1) {
return -1;
}
return std::max(1, batch_size) * bs;
}
int64_t
GetByteSize(
const int batch_size, const inference::DataType& dtype,
const std::vector<int64_t>& dims)
{
if (dims.size() == 0) {
return batch_size * GetDataTypeByteSize(dtype);
}
int64_t bs = GetByteSize(dtype, dims);
if (bs == -1) {
return -1;
}
return std::max(1, batch_size) * bs;
}
int64_t
GetByteSize(const inference::ModelInput& mio)
{
return GetByteSize(mio.data_type(), mio.dims());
}
int64_t
GetByteSize(const inference::ModelOutput& mio)
{
return GetByteSize(mio.data_type(), mio.dims());
}
int
GetCpuNiceLevel(const inference::ModelConfig& config)
{
int nice = SCHEDULER_DEFAULT_NICE;
if (config.has_optimization()) {
switch (config.optimization().priority()) {
case inference::ModelOptimizationPolicy::PRIORITY_MAX:
nice = 0;
break;
case inference::ModelOptimizationPolicy::PRIORITY_MIN:
nice = 19;
break;
default:
nice = SCHEDULER_DEFAULT_NICE;
break;
}
}
return nice;
}
bool
CompareDims(const DimsList& dims0, const DimsList& dims1)
{
if (dims0.size() != dims1.size()) {
return false;
}
for (int i = 0; i < dims0.size(); ++i) {
if (dims0[i] != dims1[i]) {
return false;
}
}
return true;
}
bool
CompareDims(
const std::vector<int64_t>& dims0, const std::vector<int64_t>& dims1)
{
if (dims0.size() != dims1.size()) {
return false;
}
for (size_t i = 0; i < dims0.size(); ++i) {
if (dims0[i] != dims1[i]) {
return false;
}
}
return true;
}
bool
CompareDimsWithWildcard(const DimsList& dims0, const DimsList& dims1)
{
if (dims0.size() != dims1.size()) {
return false;
}
for (int i = 0; i < dims0.size(); ++i) {
if ((dims0[i] != WILDCARD_DIM) && (dims1[i] != WILDCARD_DIM) &&
(dims0[i] != dims1[i])) {
return false;
}
}
return true;
}
bool
CompareDimsWithWildcard(
const DimsList& dims0, const std::vector<int64_t>& dims1)
{
if (dims0.size() != (int64_t)dims1.size()) {
return false;
}
for (int i = 0; i < dims0.size(); ++i) {
if ((dims0[i] != WILDCARD_DIM) && (dims1[i] != WILDCARD_DIM) &&
(dims0[i] != dims1[i])) {
return false;
}
}
return true;
}
std::string
DimsListToString(const DimsList& dims)
{
bool first = true;
std::string str("[");
for (const auto& dim : dims) {
if (!first) {
str += ",";
}
str += std::to_string(dim);
first = false;
}
str += "]";
return str;
}
std::string
DimsListToString(const std::vector<int64_t>& dims, const int start_idx)
{
int idx = 0;
std::string str("[");
for (const auto& dim : dims) {
if (idx >= start_idx) {
if (idx > start_idx) {
str += ",";
}
str += std::to_string(dim);
}
idx++;
}
str += "]";
return str;
}
const char*
DataTypeToProtocolString(const inference::DataType dtype)
{
switch (dtype) {
case inference::DataType::TYPE_BOOL:
return "BOOL";
case inference::DataType::TYPE_UINT8:
return "UINT8";
case inference::DataType::TYPE_UINT16:
return "UINT16";
case inference::DataType::TYPE_UINT32:
return "UINT32";
case inference::DataType::TYPE_UINT64:
return "UINT64";
case inference::DataType::TYPE_INT8:
return "INT8";
case inference::DataType::TYPE_INT16:
return "INT16";
case inference::DataType::TYPE_INT32:
return "INT32";
case inference::DataType::TYPE_INT64:
return "INT64";
case inference::DataType::TYPE_FP16:
return "FP16";
case inference::DataType::TYPE_FP32:
return "FP32";
case inference::DataType::TYPE_FP64:
return "FP64";
case inference::DataType::TYPE_STRING:
return "BYTES";
case inference::DataType::TYPE_BF16:
return "BF16";
default:
break;
}
return "<invalid>";
}
inference::DataType
ProtocolStringToDataType(const std::string& dtype)
{
return ProtocolStringToDataType(dtype.c_str(), dtype.size());
}
inference::DataType
ProtocolStringToDataType(const char* dtype, size_t len)
{
if (len < 4 || len > 6) {
return inference::DataType::TYPE_INVALID;
}
if ((*dtype == 'I') && (len != 6)) {
if ((dtype[1] == 'N') && (dtype[2] == 'T')) {
if ((dtype[3] == '8') && (len == 4)) {
return inference::DataType::TYPE_INT8;
} else if ((dtype[3] == '1') && (dtype[4] == '6')) {
return inference::DataType::TYPE_INT16;
} else if ((dtype[3] == '3') && (dtype[4] == '2')) {
return inference::DataType::TYPE_INT32;
} else if ((dtype[3] == '6') && (dtype[4] == '4')) {
return inference::DataType::TYPE_INT64;
}
}
} else if ((*dtype == 'U') && (len != 4)) {
if ((dtype[1] == 'I') && (dtype[2] == 'N') && (dtype[3] == 'T')) {
if ((dtype[4] == '8') && (len == 5)) {
return inference::DataType::TYPE_UINT8;
} else if ((dtype[4] == '1') && (dtype[5] == '6')) {
return inference::DataType::TYPE_UINT16;
} else if ((dtype[4] == '3') && (dtype[5] == '2')) {
return inference::DataType::TYPE_UINT32;
} else if ((dtype[4] == '6') && (dtype[5] == '4')) {
return inference::DataType::TYPE_UINT64;
}
}
} else if ((*dtype == 'F') && (dtype[1] == 'P') && (len == 4)) {
if ((dtype[2] == '1') && (dtype[3] == '6')) {
return inference::DataType::TYPE_FP16;
} else if ((dtype[2] == '3') && (dtype[3] == '2')) {
return inference::DataType::TYPE_FP32;
} else if ((dtype[2] == '6') && (dtype[3] == '4')) {
return inference::DataType::TYPE_FP64;
}
} else if (*dtype == 'B') {
switch (dtype[1]) {
case 'Y':
if (!strcmp(dtype + 2, "TES")) {
return inference::DataType::TYPE_STRING;
}
break;
case 'O':
if (!strcmp(dtype + 2, "OL")) {
return inference::DataType::TYPE_BOOL;
}
break;
case 'F':
if (!strcmp(dtype + 2, "16")) {
return inference::DataType::TYPE_BF16;
}
break;
}
}
return inference::DataType::TYPE_INVALID;
}
}} // namespace triton::common
// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "triton/common/table_printer.h"
#ifdef _WIN32
// suppress the min and max definitions in Windef.h.
#define NOMINMAX
#include <Windows.h>
#else
#include <sys/ioctl.h>
#include <unistd.h>
#endif
#include <algorithm>
#include <iomanip>
#include <iostream>
#include <memory>
#include <numeric>
#include <sstream>
#include <string>
#include <vector>
namespace triton { namespace common {
//
// ASCII table printer.
//
void
TablePrinter::InsertRow(const std::vector<std::string>& row)
{
std::vector<std::vector<std::string>> table_row;
// Number of lines in each field in the record
size_t max_height = 0;
// Update max length of data items in each row
for (size_t i = 0; i < row.size(); ++i) {
table_row.push_back(std::vector<std::string>{});
std::stringstream ss(row[i]);
std::string line;
size_t max_width = 0;
while (std::getline(ss, line, '\n')) {
table_row[i].push_back(line);
if (line.size() > max_width)
max_width = line.size();
}
if (max_width > max_widths_[i])
max_widths_[i] = max_width;
size_t number_of_lines = table_row[i].size();
if (max_height < number_of_lines)
max_height = number_of_lines;
}
max_heights_.push_back(max_height);
data_.emplace_back(table_row);
}
void
TablePrinter::FairShare()
{
// initialize original index locations
size_t array_size = max_widths_.size();
std::vector<size_t> idx(array_size);
iota(idx.begin(), idx.end(), 0);
stable_sort(idx.begin(), idx.end(), [this](size_t i1, size_t i2) {
return this->max_widths_[i1] < this->max_widths_[i2];
});
size_t loop_index = 1;
for (auto itr = idx.begin(); itr != idx.end(); ++itr) {
// If a column is not using all the space allocated to it
if (max_widths_[*itr] < shares_[*itr]) {
float excess = shares_[*itr] - max_widths_[*itr];
shares_[*itr] -= excess;
if (itr == idx.end() - 1)
break;
auto update_itr = idx.begin() + (itr - idx.begin() + 1);
// excess amount of unused space that must be distributed evenly to the
// next columns
float excess_per_column = excess / (array_size - loop_index);
for (; update_itr != idx.end(); ++update_itr) {
shares_[*update_itr] += excess_per_column;
excess -= excess_per_column;
}
}
++loop_index;
}
// Remove any decimal shares
for (auto itr = idx.begin(); itr != idx.end(); ++itr) {
shares_[*itr] = (size_t)shares_[*itr];
}
// For each record
for (size_t i = 0; i < data_.size(); i++) {
auto current_row = data_[i];
// For each field in the record
for (size_t j = 0; j < current_row.size(); j++) {
// For each line in the record
for (size_t line_index = 0; line_index < current_row[j].size();
line_index++) {
std::string line = current_row[j][line_index];
size_t num_rows = (line.size() + shares_[j] - 1) / shares_[j];
// If the number of rows required for this record is larger than 1, we
// will break that line and put it in multiple lines
if (num_rows > 1) {
// Remove the multi-line field, it will be replaced by the line
// that can fits the column size
data_[i][j].erase(data_[i][j].begin() + line_index);
for (size_t k = 0; k < num_rows; k++) {
size_t start_index =
std::min((size_t)(k * shares_[j]), line.size());
size_t end_index =
std::min((size_t)((k + 1) * shares_[j]), line.size());
data_[i][j].insert(
data_[i][j].begin() + line_index + k,
line.substr(start_index, end_index - start_index));
}
// We need to advance the index for the splitted lines.
line_index += num_rows - 1;
}
if (max_heights_[i] < (num_rows - 1 + current_row[j].size()))
max_heights_[i] += num_rows - 1;
}
}
}
}
void
TablePrinter::AddRow(std::stringstream& table, size_t row_index)
{
auto row = data_[row_index];
size_t max_height = max_heights_[row_index];
for (size_t j = 0; j < max_height; j++) {
table << "|" << std::left;
for (size_t i = 0; i < row.size(); i++) {
if (j < row[i].size())
table << " " << std::setw(shares_[i]) << row[i][j] << " |";
else
table << " " << std::setw(shares_[i]) << " "
<< " |";
}
// Do not add new line if this is the last row of this record
if (j != max_height - 1)
table << "\n";
}
table << "\n";
}
void
TablePrinter::AddRowDivider(std::stringstream& table)
{
table << "+";
for (const auto& share : shares_) {
for (size_t i = 0; i < share + 2; i++) table << "-";
table << "+";
}
table << "\n";
}
std::string
TablePrinter::PrintTable()
{
std::stringstream table;
table << "\n";
FairShare();
AddRowDivider(table);
// Add table headers
AddRow(table, 0);
AddRowDivider(table);
for (size_t j = 1; j < data_.size(); j++) {
AddRow(table, j);
}
AddRowDivider(table);
return table.str();
}
// TablePrinter will take the ownership of `headers`.
TablePrinter::TablePrinter(const std::vector<std::string>& headers)
{
// terminal size
size_t column_size = 500;
#ifdef _WIN32
CONSOLE_SCREEN_BUFFER_INFO csbi;
int ret = GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi);
if (ret && (csbi.dwSize.X != 0)) {
column_size = csbi.dwSize.X;
}
#else
struct winsize terminal_size;
int status = ioctl(STDOUT_FILENO, TIOCGWINSZ, &terminal_size);
if ((status == 0) && (terminal_size.ws_col != 0)) {
column_size = terminal_size.ws_col;
}
#endif
for (size_t i = 0; i < headers.size(); ++i) {
max_widths_.emplace_back(0);
}
// Calculate fair share of every column
size_t number_of_columns = headers.size();
// Terminal width is the actual terminal width minus two times spaces
// required before and after each column and number of columns plus 1 for
// the pipes between the columns
size_t terminal_width =
column_size - (2 * number_of_columns) - (number_of_columns + 1);
int equal_share = terminal_width / headers.size();
for (size_t i = 0; i < headers.size(); ++i) {
shares_.emplace_back(equal_share);
terminal_width -= equal_share;
}
InsertRow(headers);
}
}} // namespace triton::common
// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "triton/common/thread_pool.h"
#include <stdexcept>
namespace triton { namespace common {
ThreadPool::ThreadPool(size_t thread_count)
{
if (!thread_count) {
throw std::invalid_argument("Thread count must be greater than zero.");
}
// Define infinite loop for each thread to wait for a task to complete
const auto worker_loop = [this]() {
while (true) {
Task task;
{
std::unique_lock<std::mutex> lk(queue_mtx_);
// Wake if there's a task to do, or the pool has been stopped.
cv_.wait(lk, [&]() { return !task_queue_.empty() || stop_; });
// Exit condition
if (stop_ && task_queue_.empty()) {
break;
}
task = std::move(task_queue_.front());
task_queue_.pop();
}
// Execute task - ensure function has a valid target
if (task) {
task();
}
}
};
workers_.reserve(thread_count);
for (size_t i = 0; i < thread_count; ++i) {
workers_.emplace_back(worker_loop);
}
}
ThreadPool::~ThreadPool()
{
{
std::lock_guard<std::mutex> lk(queue_mtx_);
// Signal to each worker that it should exit loop when tasks are finished
stop_ = true;
}
// Wake all threads to clean up
cv_.notify_all();
for (auto& t : workers_) {
t.join();
}
}
void
ThreadPool::Enqueue(Task&& task)
{
{
std::lock_guard<std::mutex> lk(queue_mtx_);
// Don't accept more work if pool is shutting down
if (stop_) {
return;
}
task_queue_.push(std::move(task));
}
// Only wake one thread per task
// Todo: DLIS-3859 if ThreadPool gets used more.
cv_.notify_one();
}
}} // namespace triton::common
#!/usr/bin/python
# Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import argparse
import os
import subprocess
import yapf
FLAGS = None
FORMAT_EXTS = ('proto', 'cc', 'cu', 'h')
SKIP_PATHS = ('tools',)
def visit(path):
if FLAGS.verbose:
print("visiting " + path)
valid_ext = False
python_file = False
for ext in FORMAT_EXTS:
if path.endswith('.' + ext):
valid_ext = True
break
if path.endswith('.py'):
valid_ext = True
python_file = True
if not valid_ext:
if FLAGS.verbose:
print("skipping due to extension: " + path)
return True
for skip in SKIP_PATHS:
if path.startswith(skip):
if FLAGS.verbose:
print("skipping due to path prefix: " + path)
return True
if python_file:
yapf.yapflib.yapf_api.FormatFile(path,
in_place=True,
style_config='google')
return True
else:
args = ['clang-format-6.0', '--style=file', '-i']
if FLAGS.verbose:
args.append('-verbose')
args.append(path)
ret = subprocess.call(args)
if ret != 0:
print("format failed for " + path)
return False
return True
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-v',
'--verbose',
action="store_true",
required=False,
default=False,
help='Enable verbose output')
parser.add_argument('paths',
type=str,
nargs='*',
default=None,
help='Directories or files to format')
FLAGS = parser.parse_args()
# Check the version of yapf. Needs a consistent version
# of yapf to prevent unneccessary changes in the code.
if (yapf.__version__ != '0.30.0'):
print("Needs yapf 0.30.0, but got yapf {}".format(yapf.__version__))
if (FLAGS.paths is None) or (len(FLAGS.paths) == 0):
parser.print_help()
exit(1)
ret = True
for path in FLAGS.paths:
if not os.path.isdir(path):
if not visit(path):
ret = False
else:
for root, dirs, files in os.walk(path):
for name in files:
if not visit(os.path.join(root, name)):
ret = False
exit(0 if ret else 1)
#!/bin/bash
# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
###############################################################################
#
# Git pre-commit hook for Triton related projects
#
# To install this hook for a project, copy "pre-commit" and "format.py" into
# ".git/hooks/" directory of the project
#
###############################################################################
###############################################################################
#
# Run formatter script
#
###############################################################################
# Repo root
GIT_REPO_ROOT=$(git rev-parse --show-toplevel)
PYTHON_CMD=python3
FORMATTER_PY=${GIT_REPO_ROOT}/.git/hooks/format.py
CHANGED_FILES="$(git --no-pager diff --name-status --no-color --cached | awk '{ if (match($1, /R[0-9]+/)) { print $3 } else if ($1 != "D") { print $2 } }')"
echo "Running Python auto-format..."
for CHANGED_FILE in $CHANGED_FILES;
do
${PYTHON_CMD} ${FORMATTER_PY} ${GIT_REPO_ROOT}/${CHANGED_FILE}
git add ${GIT_REPO_ROOT}/${CHANGED_FILE}
done
---
BasedOnStyle: Google
IndentWidth: 2
ContinuationIndentWidth: 4
UseTab: Never
MaxEmptyLinesToKeep: 2
SortIncludes: true
CompactNamespaces: true
ReflowComments: true
DerivePointerAlignment: false
PointerAlignment: Left
AllowShortIfStatementsOnASingleLine: false
AllowShortBlocksOnASingleLine: false
AllowShortFunctionsOnASingleLine: Inline
AlwaysBreakAfterReturnType: TopLevelDefinitions
AlignAfterOpenBracket: AlwaysBreak
BreakBeforeBraces: Custom
BraceWrapping:
AfterClass: false
AfterControlStatement: false
AfterEnum: false
AfterFunction: true
AfterNamespace: false
AfterStruct: false
AfterUnion: false
BeforeCatch: true
BinPackArguments: true
BinPackParameters: true
ConstructorInitializerAllOnOneLineOrOnePerLine: false
IndentCaseLabels: true
\ No newline at end of file
# Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#cmake_minimum_required(VERSION 3.18)
cmake_minimum_required(VERSION 3.16)
project(tritoncore LANGUAGES C CXX)
# Control building of shared library vs. only headers and stub. By
# default only the headers and library stub is built. Set
# TRITON_CORE_HEADERS_ONLY=OFF to also build libtritonserver.so.
option(TRITON_CORE_HEADERS_ONLY "Build only headers and stub" ON)
#
# Triton Server API
#
add_library(
triton-core-serverapi INTERFACE
)
add_library(
TritonCore::triton-core-serverapi ALIAS triton-core-serverapi
)
target_include_directories(
triton-core-serverapi
INTERFACE
$<INSTALL_INTERFACE:include>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
)
#
# Triton Backend API
#
add_library(
triton-core-backendapi INTERFACE
)
add_library(
TritonCore::triton-core-backendapi ALIAS triton-core-backendapi
)
target_include_directories(
triton-core-backendapi
INTERFACE
$<INSTALL_INTERFACE:include>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
)
#
# Triton RepoAgent API
#
add_library(
triton-core-repoagentapi INTERFACE
)
add_library(
TritonCore::triton-core-repoagentapi ALIAS triton-core-repoagentapi
)
target_include_directories(
triton-core-repoagentapi
INTERFACE
$<INSTALL_INTERFACE:include>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
)
#
# Stub library for libtritonserver.so that stubs Triton Server API and
# Triton Backend API
#
add_library(
triton-core-serverstub SHARED
${CMAKE_CURRENT_SOURCE_DIR}/src/tritonserver_stub.cc
)
add_library(
TritonCore::triton-core-serverstub ALIAS triton-core-serverstub
)
target_compile_features(triton-core-serverstub PRIVATE cxx_std_11)
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
message("Using MSVC as compiler, default target on Windows 10. "
"If the target system is not Windows 10, please update _WIN32_WINNT "
"to corresponding value.")
target_compile_options(
triton-core-serverstub
PRIVATE
/Wall /D_WIN32_WINNT=0x0A00 /EHsc
)
else()
target_compile_options(
triton-core-serverstub
PRIVATE
-Wall -Wextra -Wno-unused-parameter -Werror
)
endif()
set_target_properties(
triton-core-serverstub
PROPERTIES
POSITION_INDEPENDENT_CODE ON
OUTPUT_NAME tritonserver
)
#
# Shared library implementing Triton Server API
#
if(NOT TRITON_CORE_HEADERS_ONLY)
include(CMakeDependentOption)
set(TRITON_VERSION "0.0.0" CACHE STRING "The version of the Triton shared library" )
option(TRITON_ENABLE_LOGGING "Include logging support in server" ON)
option(TRITON_ENABLE_STATS "Include statistics collections in server" ON)
option(TRITON_ENABLE_TRACING "Include tracing support in server" OFF)
option(TRITON_ENABLE_NVTX "Include NVTX support in server" OFF)
option(TRITON_ENABLE_GPU "Enable GPU support in server" ON)
option(TRITON_ENABLE_MALI_GPU "Enable Arm Mali GPU support in server" OFF)
set(TRITON_MIN_COMPUTE_CAPABILITY "6.0" CACHE STRING
"The minimum CUDA compute capability supported by Triton" )
set(TRITON_EXTRA_LIB_PATHS "" CACHE PATH "Extra library paths for Triton Server build")
# Ensemble
option(TRITON_ENABLE_ENSEMBLE "Include ensemble support in server" OFF)
# Metrics
option(TRITON_ENABLE_METRICS "Include metrics support in server" ON)
option(TRITON_ENABLE_METRICS_GPU "Include GPU metrics support in server" ON)
option(TRITON_ENABLE_METRICS_CPU "Include CPU metrics support in server" ON)
# Cloud storage
option(TRITON_ENABLE_GCS "Include GCS Filesystem support in server" OFF)
option(TRITON_ENABLE_S3 "Include S3 Filesystem support in server" OFF)
option(TRITON_ENABLE_AZURE_STORAGE "Include Azure Storage Filesystem support in server" OFF)
# Repo tags
set(TRITON_COMMON_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/common repo")
set(TRITON_THIRD_PARTY_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/third_party repo")
# Third-party location
set(TRITON_THIRD_PARTY_INSTALL_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/third-party" CACHE STRING "Location of third-party build")
set(TRITON_THIRD_PARTY_SRC_INSTALL_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/third-party-src" CACHE STRING "Location of third-party source")
if(TRITON_ENABLE_METRICS AND NOT TRITON_ENABLE_STATS)
message(FATAL_ERROR "TRITON_ENABLE_METRICS=ON requires TRITON_ENABLE_STATS=ON")
endif()
if(TRITON_ENABLE_TRACING AND NOT TRITON_ENABLE_STATS)
message(FATAL_ERROR "TRITON_ENABLE_TRACING=ON requires TRITON_ENABLE_STATS=ON")
endif()
if (TRITON_ENABLE_METRICS_CPU AND NOT TRITON_ENABLE_METRICS)
message(FATAL_ERROR "TRITON_ENABLE_METRICS_CPU=ON requires TRITON_ENABLE_METRICS=ON")
endif()
if (TRITON_ENABLE_METRICS_GPU AND NOT TRITON_ENABLE_METRICS)
message(FATAL_ERROR "TRITON_ENABLE_METRICS_GPU=ON requires TRITON_ENABLE_METRICS=ON")
endif()
if (TRITON_ENABLE_METRICS_GPU AND NOT TRITON_ENABLE_GPU)
message(FATAL_ERROR "TRITON_ENABLE_METRICS_GPU=ON requires TRITON_ENABLE_GPU=ON")
endif()
include(FetchContent)
FetchContent_Declare(
repo-third-party
GIT_REPOSITORY https://github.com/triton-inference-server/third_party.git
GIT_TAG ${TRITON_THIRD_PARTY_REPO_TAG}
)
FetchContent_MakeAvailable(repo-third-party)
# Need to use ExternalProject for our builds so that we can get the
# correct dependencies between Triton shared library components and
# the ExternalProject dependencies (found in the third_party repo)
include(ExternalProject)
# If CMAKE_TOOLCHAIN_FILE is set, propagate that hint path to the external
# projects.
set(_CMAKE_ARGS_CMAKE_TOOLCHAIN_FILE "")
if (CMAKE_TOOLCHAIN_FILE)
set(_CMAKE_ARGS_CMAKE_TOOLCHAIN_FILE "-DCMAKE_TOOLCHAIN_FILE:PATH=${CMAKE_TOOLCHAIN_FILE}")
endif()
# If VCPKG_TARGET_TRIPLET is set, propagate that hint path to the external
# projects.
set(_CMAKE_ARGS_VCPKG_TARGET_TRIPLET "")
if (VCPKG_TARGET_TRIPLET)
set(_CMAKE_ARGS_VCPKG_TARGET_TRIPLET "-DVCPKG_TARGET_TRIPLET:STRING=${VCPKG_TARGET_TRIPLET}")
endif()
# If OPENSSL_ROOT_DIR is set, propagate that hint path to the external
# projects with OpenSSL dependency.
set(_CMAKE_ARGS_OPENSSL_ROOT_DIR "")
if (OPENSSL_ROOT_DIR)
set(_CMAKE_ARGS_OPENSSL_ROOT_DIR "-DOPENSSL_ROOT_DIR:PATH=${OPENSSL_ROOT_DIR}")
endif()
# Location where protobuf-config.cmake will be installed varies by
# platform
if (WIN32)
set(_FINDPACKAGE_PROTOBUF_CONFIG_DIR "${TRITON_THIRD_PARTY_INSTALL_PREFIX}/protobuf/cmake")
else()
set(_FINDPACKAGE_PROTOBUF_CONFIG_DIR "${TRITON_THIRD_PARTY_INSTALL_PREFIX}/protobuf/lib/cmake/protobuf")
endif()
if (CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
set(TRITON_INSTALL_PREFIX ${CMAKE_CURRENT_BINARY_DIR}/install)
else()
set(TRITON_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX})
endif()
set(TRITON_DEPENDS googletest protobuf)
if(${TRITON_ENABLE_GCS})
set(TRITON_DEPENDS ${TRITON_DEPENDS} google-cloud-cpp)
endif() # TRITON_ENABLE_GCS
if(${TRITON_ENABLE_S3})
set(TRITON_DEPENDS ${TRITON_DEPENDS} aws-sdk-cpp)
endif() # TRITON_ENABLE_S3
if(${TRITON_ENABLE_AZURE_STORAGE})
set(TRITON_DEPENDS ${TRITON_DEPENDS} azure-storage-cpplite)
endif() # TRITON_ENABLE_AZURE_STORAGE
if(${TRITON_ENABLE_METRICS})
set(TRITON_DEPENDS ${TRITON_DEPENDS} prometheus-cpp)
endif() # TRITON_ENABLE_METRICS
if(${TRITON_ENABLE_GPU})
set(TRITON_DEPENDS ${TRITON_DEPENDS} cnmem)
endif() # TRITON_ENABLE_GPU
ExternalProject_Add(triton-core
PREFIX triton-core
SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src"
BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/triton-core"
CMAKE_CACHE_ARGS
-DProtobuf_DIR:PATH=${_FINDPACKAGE_PROTOBUF_CONFIG_DIR}
${_CMAKE_ARGS_OPENSSL_ROOT_DIR}
${_CMAKE_ARGS_CMAKE_TOOLCHAIN_FILE}
${_CMAKE_ARGS_VCPKG_TARGET_TRIPLET}
-DGTEST_ROOT:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/googletest
-DgRPC_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/grpc/lib/cmake/grpc
-Dc-ares_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/c-ares/lib/cmake/c-ares
-Dabsl_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/absl/lib/cmake/absl
-Dnlohmann_json_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/nlohmann_json/lib/cmake/nlohmann_json
-Dprometheus-cpp_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/prometheus-cpp/lib/cmake/prometheus-cpp
-Dgoogle_cloud_cpp_storage_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/google-cloud-cpp/lib/cmake/google_cloud_cpp_storage
-Dgoogle_cloud_cpp_rest_internal_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/google-cloud-cpp/lib/cmake/google_cloud_cpp_rest_internal
-Dazure-storage-cpplite_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/azure-storage-cpplite
-Dgoogle_cloud_cpp_common_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/google-cloud-cpp/lib/cmake/google_cloud_cpp_common
-DCrc32c_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/crc32c/lib/cmake/Crc32c
-DAWSSDK_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/aws-sdk-cpp/lib/cmake/AWSSDK
-Daws-cpp-sdk-core_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/aws-sdk-cpp/lib/cmake/aws-cpp-sdk-core
-Daws-cpp-sdk-s3_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/aws-sdk-cpp/lib/cmake/aws-cpp-sdk-s3
-Daws-c-event-stream_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/aws-sdk-cpp/lib/aws-c-event-stream/cmake
-Daws-c-common_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/aws-sdk-cpp/lib/aws-c-common/cmake
-Daws-checksums_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/aws-sdk-cpp/lib/aws-checksums/cmake
-DCNMEM_PATH:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/cnmem
-DTRITON_COMMON_REPO_TAG:STRING=${TRITON_COMMON_REPO_TAG}
-DTRITON_EXTRA_LIB_PATHS:PATH=${TRITON_EXTRA_LIB_PATHS}
-DTRITON_ENABLE_NVTX:BOOL=${TRITON_ENABLE_NVTX}
-DTRITON_ENABLE_TRACING:BOOL=${TRITON_ENABLE_TRACING}
-DTRITON_ENABLE_LOGGING:BOOL=${TRITON_ENABLE_LOGGING}
-DTRITON_ENABLE_STATS:BOOL=${TRITON_ENABLE_STATS}
-DTRITON_ENABLE_GPU:BOOL=${TRITON_ENABLE_GPU}
-DTRITON_ENABLE_MALI_GPU:BOOL=${TRITON_ENABLE_MALI_GPU}
-DTRITON_MIN_COMPUTE_CAPABILITY:STRING=${TRITON_MIN_COMPUTE_CAPABILITY}
-DTRITON_ENABLE_METRICS:BOOL=${TRITON_ENABLE_METRICS}
-DTRITON_ENABLE_METRICS_GPU:BOOL=${TRITON_ENABLE_METRICS_GPU}
-DTRITON_ENABLE_METRICS_CPU:BOOL=${TRITON_ENABLE_METRICS_CPU}
-DTRITON_ENABLE_GCS:BOOL=${TRITON_ENABLE_GCS}
-DTRITON_ENABLE_AZURE_STORAGE:BOOL=${TRITON_ENABLE_AZURE_STORAGE}
-DTRITON_ENABLE_S3:BOOL=${TRITON_ENABLE_S3}
-DTRITON_ENABLE_ENSEMBLE:BOOL=${TRITON_ENABLE_ENSEMBLE}
-DCMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}
-DCMAKE_INSTALL_PREFIX:PATH=${TRITON_INSTALL_PREFIX}
-DTRITON_VERSION:STRING=${TRITON_VERSION}
DEPENDS ${TRITON_DEPENDS}
)
endif() # NOT TRITON_CORE_HEADERS_ONLY
#
# Install
#
include(GNUInstallDirs)
set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/TritonCore)
install(
TARGETS
triton-core-backendapi
triton-core-repoagentapi
triton-core-serverapi
EXPORT
triton-core-targets
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
)
install(
TARGETS
triton-core-serverstub
EXPORT
triton-core-targets
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/stubs
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}/stubs
RUNTIME DESTINATION ${CMAKE_INSTALL_LIBDIR}/stubs
)
install(
DIRECTORY include/
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
)
install(
EXPORT
triton-core-targets
FILE
TritonCoreTargets.cmake
NAMESPACE
TritonCore::
DESTINATION
${INSTALL_CONFIGDIR}
)
include(CMakePackageConfigHelpers)
configure_package_config_file(
${CMAKE_CURRENT_LIST_DIR}/cmake/TritonCoreConfig.cmake.in
${CMAKE_CURRENT_BINARY_DIR}/TritonCoreConfig.cmake
INSTALL_DESTINATION ${INSTALL_CONFIGDIR}
)
install(
FILES
${CMAKE_CURRENT_BINARY_DIR}/TritonCoreConfig.cmake
DESTINATION
${INSTALL_CONFIGDIR}
)
#
# Export from build tree
#
export(
EXPORT
triton-core-targets
FILE
${CMAKE_CURRENT_BINARY_DIR}/TritonCoreTargets.cmake
NAMESPACE
TritonCore::
)
export(PACKAGE TritonCore)
Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of NVIDIA CORPORATION nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
<!--
# Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->
[![License](https://img.shields.io/badge/License-BSD3-lightgrey.svg)](https://opensource.org/licenses/BSD-3-Clause)
# Triton Inference Server Core
This repository holds the source code and headers for the library that
implements the core functionality of Triton. The *core* library can be
built as described below and used directly via its [C
API](https://github.com/triton-inference-server/server/blob/main/docs/customization_guide/inference_protocols.md#in-process-triton-server-api). To
be useful the core library must be paired with one or more backends.
You can learn more about backends in the [backend
repo](https://github.com/triton-inference-server/backend).
Typically you do not build or use the core library on its own, but as
part of the *tritonserver* executable. The *tritonserver* executable
is built in the [server
repo](https://github.com/triton-inference-server/server) as described
in the [server build
documentation](https://github.com/triton-inference-server/server/blob/main/docs/customization_guide/build.md).
Ask questions or report problems in the main Triton [issues
page](https://github.com/triton-inference-server/server/issues).
## Build the Triton Core Library
Before building the Triton core library, your build system must
install the required dependencies described in the [build
documentation](https://github.com/triton-inference-server/server/blob/main/docs/customization_guide/build.md). For
example, if you are building the core library with GPU support
(-DTRITON_ENABLE_GPU=ON), then you must install the CUDA, cuDNN, and
TensorRT dependencies required for the version of Triton you are
building.
To build, first clone the release branch matching the Triton release
you are interest in (*rxx.yy*), or the *main* branch to build the
top-of-tree. The Triton core library is built with CMake.
```
$ mkdir build
$ cd build
$ cmake -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install -DTRITON_CORE_HEADERS_ONLY=OFF ..
$ make install
```
When the build completes, the install directory will contain the
Triton core shared library (install/lib/libtritonserver.so on Linux,
install/bin/tritonserver.dll on Windows), and the core library headers
files in install/include/triton/core.
### Build a Release Branch
The following required Triton repositories will be pulled and used in
the build. By default the "main" branch/tag will be used for each repo
but the listed CMake argument can be used to override.
* triton-inference-server/third_party: -DTRITON_THIRD_PARTY_REPO_TAG=[tag]
* triton-inference-server/common: -DTRITON_COMMON_REPO_TAG=[tag]
You will need to override if you are building from a release
branch. For example, if you are building the r22.03 version of Triton,
you would clone the r22.03 branch of the core repo and use the
following cmake command.
```
$ cmake -DTRITON_THIRD_PARTY_REPO_TAG=r22.03 -DTRITON_COMMON_REPO_TAG=r22.03 -DTRITON_CORE_HEADERS_ONLY=OFF ..
```
### Build Options
The [CMakeLists.txt](CMakeLists.txt) file contains the options
available when build the core library. For example, to build the core
library with the default settings plus S3 cloud storage and ensembling
support use the following command.
```
$ cmake -DTRITON_CORE_HEADERS_ONLY=OFF -DTRITON_ENABLE_S3=ON -DTRITON_ENABLE_ENSEMBLE=ON ..
```
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment