// Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // * Neither the name of NVIDIA CORPORATION nor the names of its // contributors may be used to endorse or promote products derived // from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include "buffer_attributes.h" #include "cuda_utils.h" #include "infer_parameter.h" #include "infer_request.h" #include "infer_response.h" #include "infer_stats.h" #include "metric_family.h" #include "metrics.h" #include "model.h" #include "model_config_utils.h" #include "model_repository_manager.h" #include "rate_limiter.h" #include "response_allocator.h" #include "server.h" #include "server_message.h" #include "status.h" #include "triton/common/logging.h" #include "triton/common/model_config.h" #include "triton/common/nvtx.h" #include "triton/common/table_printer.h" #include "triton/common/triton_json.h" #include "tritonserver_apis.h" // For unknown reason, windows will not export some functions declared // with dllexport in tritonrepoagent.h and tritonbackend.h. To get // those functions exported it is (also?) necessary to mark the // definitions in this file with dllexport as well. The TRITONSERVER_* // functions are getting exported but for consistency adding the // declspec to these definitions as well. #if defined(_MSC_VER) #define TRITONAPI_DECLSPEC __declspec(dllexport) #elif defined(__GNUC__) #define TRITONAPI_DECLSPEC __attribute__((__visibility__("default"))) #else #define TRITONAPI_DECLSPEC #endif namespace tc = triton::core; namespace { std::string ResourceString(const std::string& name, const int count, const int device_id) { return std::string( "{\"name\":\"" + name + "\", \"count\":" + std::to_string(count) + " \"device\":" + std::to_string(device_id) + "}"); } std::string RateLimitModeToString(const tc::RateLimitMode rate_limit_mode) { std::string rl_mode_str(""); switch (rate_limit_mode) { case tc::RateLimitMode::RL_EXEC_COUNT: { rl_mode_str = "EXEC_COUNT"; break; } case tc::RateLimitMode::RL_OFF: { rl_mode_str = "OFF"; break; } } return rl_mode_str; } // // TritonServerError // // Implementation for TRITONSERVER_Error. // class TritonServerError { public: static TRITONSERVER_Error* Create( TRITONSERVER_Error_Code code, const char* msg); static TRITONSERVER_Error* Create( TRITONSERVER_Error_Code code, const std::string& msg); static TRITONSERVER_Error* Create(const tc::Status& status); TRITONSERVER_Error_Code Code() const { return code_; } const std::string& Message() const { return msg_; } private: TritonServerError(TRITONSERVER_Error_Code code, const std::string& msg) : code_(code), msg_(msg) { } TritonServerError(TRITONSERVER_Error_Code code, const char* msg) : code_(code), msg_(msg) { } TRITONSERVER_Error_Code code_; const std::string msg_; }; TRITONSERVER_Error* TritonServerError::Create(TRITONSERVER_Error_Code code, const char* msg) { return reinterpret_cast( new TritonServerError(code, msg)); } TRITONSERVER_Error* TritonServerError::Create(TRITONSERVER_Error_Code code, const std::string& msg) { return reinterpret_cast( new TritonServerError(code, msg)); } TRITONSERVER_Error* TritonServerError::Create(const tc::Status& status) { // If 'status' is success then return nullptr as that indicates // success if (status.IsOk()) { return nullptr; } return Create( tc::StatusCodeToTritonCode(status.StatusCode()), status.Message()); } #define RETURN_IF_STATUS_ERROR(S) \ do { \ const tc::Status& status__ = (S); \ if (!status__.IsOk()) { \ return TritonServerError::Create(status__); \ } \ } while (false) // // TritonServerMetrics // // Implementation for TRITONSERVER_Metrics. // class TritonServerMetrics { public: TritonServerMetrics() = default; TRITONSERVER_Error* Serialize(const char** base, size_t* byte_size); private: std::string serialized_; }; TRITONSERVER_Error* TritonServerMetrics::Serialize(const char** base, size_t* byte_size) { #ifdef TRITON_ENABLE_METRICS serialized_ = tc::Metrics::SerializedMetrics(); *base = serialized_.c_str(); *byte_size = serialized_.size(); return nullptr; // Success #else *base = nullptr; *byte_size = 0; return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_UNSUPPORTED, "metrics not supported"); #endif // TRITON_ENABLE_METRICS } // // TritonServerOptions // // Implementation for TRITONSERVER_ServerOptions. // class TritonServerOptions { public: TritonServerOptions(); const std::string& ServerId() const { return server_id_; } void SetServerId(const char* id) { server_id_ = id; } const std::set& ModelRepositoryPaths() const { return repo_paths_; } void SetModelRepositoryPath(const char* p) { repo_paths_.insert(p); } tc::ModelControlMode ModelControlMode() const { return model_control_mode_; } void SetModelControlMode(tc::ModelControlMode m) { model_control_mode_ = m; } const std::set& StartupModels() const { return models_; } void SetStartupModel(const char* m) { models_.insert(m); } bool ExitOnError() const { return exit_on_error_; } void SetExitOnError(bool b) { exit_on_error_ = b; } bool StrictModelConfig() const { return strict_model_config_; } void SetStrictModelConfig(bool b) { strict_model_config_ = b; } tc::RateLimitMode RateLimiterMode() const { return rate_limit_mode_; } void SetRateLimiterMode(tc::RateLimitMode m) { rate_limit_mode_ = m; } TRITONSERVER_Error* AddRateLimiterResource( const std::string& resource, const size_t count, const int device); // The resource map is the map from device id to the map of // of resources with their respective counts for that device. const tc::RateLimiter::ResourceMap& RateLimiterResources() const { return rate_limit_resource_map_; } uint64_t PinnedMemoryPoolByteSize() const { return pinned_memory_pool_size_; } void SetPinnedMemoryPoolByteSize(uint64_t s) { pinned_memory_pool_size_ = s; } uint64_t ResponseCacheByteSize() const { return response_cache_byte_size_; } void SetResponseCacheByteSize(uint64_t s) { response_cache_byte_size_ = s; } const std::map& CudaMemoryPoolByteSize() const { return cuda_memory_pool_size_; } void SetCudaMemoryPoolByteSize(int id, uint64_t s) { cuda_memory_pool_size_[id] = s; } double MinSupportedComputeCapability() const { return min_compute_capability_; } void SetMinSupportedComputeCapability(double c) { min_compute_capability_ = c; } bool StrictReadiness() const { return strict_readiness_; } void SetStrictReadiness(bool b) { strict_readiness_ = b; } unsigned int ExitTimeout() const { return exit_timeout_; } void SetExitTimeout(unsigned int t) { exit_timeout_ = t; } unsigned int BufferManagerThreadCount() const { return buffer_manager_thread_count_; } void SetBufferManagerThreadCount(unsigned int c) { buffer_manager_thread_count_ = c; } unsigned int ModelLoadThreadCount() const { return model_load_thread_count_; } void SetModelLoadThreadCount(unsigned int c) { model_load_thread_count_ = c; } bool Metrics() const { return metrics_; } void SetMetrics(bool b) { metrics_ = b; } bool GpuMetrics() const { return gpu_metrics_; } void SetGpuMetrics(bool b) { gpu_metrics_ = b; } bool CpuMetrics() const { return cpu_metrics_; } void SetCpuMetrics(bool b) { cpu_metrics_ = b; } uint64_t MetricsInterval() const { return metrics_interval_; } void SetMetricsInterval(uint64_t m) { metrics_interval_ = m; } const std::string& BackendDir() const { return backend_dir_; } void SetBackendDir(const std::string& bd) { backend_dir_ = bd; } const std::string& RepoAgentDir() const { return repoagent_dir_; } void SetRepoAgentDir(const std::string& rad) { repoagent_dir_ = rad; } // The backend config map is a map from backend name to the // setting=value pairs for that backend. The empty backend name ("") // is used to communicate configuration information that is used // internally. const triton::common::BackendCmdlineConfigMap& BackendCmdlineConfigMap() const { return backend_cmdline_config_map_; } TRITONSERVER_Error* AddBackendConfig( const std::string& backend_name, const std::string& setting, const std::string& value); TRITONSERVER_Error* SetHostPolicy( const std::string& policy_name, const std::string& setting, const std::string& value); const triton::common::HostPolicyCmdlineConfigMap& HostPolicyCmdlineConfigMap() const { return host_policy_map_; } private: std::string server_id_; std::set repo_paths_; tc::ModelControlMode model_control_mode_; std::set models_; bool exit_on_error_; bool strict_model_config_; bool strict_readiness_; tc::RateLimitMode rate_limit_mode_; tc::RateLimiter::ResourceMap rate_limit_resource_map_; bool metrics_; bool gpu_metrics_; bool cpu_metrics_; uint64_t metrics_interval_; unsigned int exit_timeout_; uint64_t pinned_memory_pool_size_; uint64_t response_cache_byte_size_; unsigned int buffer_manager_thread_count_; unsigned int model_load_thread_count_; std::map cuda_memory_pool_size_; double min_compute_capability_; std::string backend_dir_; std::string repoagent_dir_; triton::common::BackendCmdlineConfigMap backend_cmdline_config_map_; triton::common::HostPolicyCmdlineConfigMap host_policy_map_; }; TritonServerOptions::TritonServerOptions() : server_id_("triton"), model_control_mode_(tc::ModelControlMode::MODE_POLL), exit_on_error_(true), strict_model_config_(true), strict_readiness_(true), rate_limit_mode_(tc::RateLimitMode::RL_OFF), metrics_(true), gpu_metrics_(true), cpu_metrics_(true), metrics_interval_(2000), exit_timeout_(30), pinned_memory_pool_size_(1 << 28), response_cache_byte_size_(0), buffer_manager_thread_count_(0), model_load_thread_count_( std::max(2u, 2 * std::thread::hardware_concurrency())), #ifdef TRITON_ENABLE_GPU min_compute_capability_(TRITON_MIN_COMPUTE_CAPABILITY), #else min_compute_capability_(0), #endif // TRITON_ENABLE_GPU backend_dir_("/opt/tritonserver/backends"), repoagent_dir_("/opt/tritonserver/repoagents") { #ifndef TRITON_ENABLE_METRICS metrics_ = false; gpu_metrics_ = false; cpu_metrics_ = false; #endif // TRITON_ENABLE_METRICS #ifndef TRITON_ENABLE_METRICS_GPU gpu_metrics_ = false; #endif // TRITON_ENABLE_METRICS_GPU #ifndef TRITON_ENABLE_METRICS_CPU cpu_metrics_ = false; #endif // TRITON_ENABLE_METRICS_CPU } TRITONSERVER_Error* TritonServerOptions::AddRateLimiterResource( const std::string& name, const size_t count, const int device) { auto ditr = rate_limit_resource_map_.find(device); if (ditr == rate_limit_resource_map_.end()) { ditr = rate_limit_resource_map_ .emplace(device, std::map()) .first; } auto ritr = ditr->second.find(name); if (ritr == ditr->second.end()) { ditr->second.emplace(name, count).first; } else { // If already present then store the minimum of the two. if (ritr->second > count) { ritr->second = count; } } return nullptr; // success } TRITONSERVER_Error* TritonServerOptions::AddBackendConfig( const std::string& backend_name, const std::string& setting, const std::string& value) { triton::common::BackendCmdlineConfig& cc = backend_cmdline_config_map_[backend_name]; cc.push_back(std::make_pair(setting, value)); return nullptr; // success } TRITONSERVER_Error* TritonServerOptions::SetHostPolicy( const std::string& policy_name, const std::string& setting, const std::string& value) { // Check if supported setting is passed if ((setting != "numa-node") && (setting != "cpu-cores")) { return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_UNSUPPORTED, std::string( "Unsupported host policy setting '" + setting + "' is specified, supported settings are 'numa-node', 'cpu-cores'") .c_str()); } triton::common::HostPolicyCmdlineConfig& hp = host_policy_map_[policy_name]; hp[setting] = value; return nullptr; // success } #define SetDurationStat(DOC, PARENT, STAT_NAME, COUNT, NS) \ do { \ triton::common::TritonJson::Value dstat( \ DOC, triton::common::TritonJson::ValueType::OBJECT); \ dstat.AddUInt("count", (COUNT)); \ dstat.AddUInt("ns", (NS)); \ PARENT.Add(STAT_NAME, std::move(dstat)); \ } while (false) } // namespace extern "C" { // // TRITONSERVER API Version // TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ApiVersion(uint32_t* major, uint32_t* minor) { *major = TRITONSERVER_API_VERSION_MAJOR; *minor = TRITONSERVER_API_VERSION_MINOR; return nullptr; // success } // // TRITONSERVER_DataType // TRITONAPI_DECLSPEC const char* TRITONSERVER_DataTypeString(TRITONSERVER_DataType datatype) { switch (datatype) { case TRITONSERVER_TYPE_BOOL: return "BOOL"; case TRITONSERVER_TYPE_UINT8: return "UINT8"; case TRITONSERVER_TYPE_UINT16: return "UINT16"; case TRITONSERVER_TYPE_UINT32: return "UINT32"; case TRITONSERVER_TYPE_UINT64: return "UINT64"; case TRITONSERVER_TYPE_INT8: return "INT8"; case TRITONSERVER_TYPE_INT16: return "INT16"; case TRITONSERVER_TYPE_INT32: return "INT32"; case TRITONSERVER_TYPE_INT64: return "INT64"; case TRITONSERVER_TYPE_FP16: return "FP16"; case TRITONSERVER_TYPE_FP32: return "FP32"; case TRITONSERVER_TYPE_FP64: return "FP64"; case TRITONSERVER_TYPE_BYTES: return "BYTES"; case TRITONSERVER_TYPE_BF16: return "BF16"; default: break; } return ""; } TRITONAPI_DECLSPEC TRITONSERVER_DataType TRITONSERVER_StringToDataType(const char* dtype) { const size_t len = strlen(dtype); return tc::DataTypeToTriton( triton::common::ProtocolStringToDataType(dtype, len)); } TRITONAPI_DECLSPEC uint32_t TRITONSERVER_DataTypeByteSize(TRITONSERVER_DataType datatype) { switch (datatype) { case TRITONSERVER_TYPE_BOOL: case TRITONSERVER_TYPE_INT8: case TRITONSERVER_TYPE_UINT8: return 1; case TRITONSERVER_TYPE_INT16: case TRITONSERVER_TYPE_UINT16: case TRITONSERVER_TYPE_FP16: case TRITONSERVER_TYPE_BF16: return 2; case TRITONSERVER_TYPE_INT32: case TRITONSERVER_TYPE_UINT32: case TRITONSERVER_TYPE_FP32: return 4; case TRITONSERVER_TYPE_INT64: case TRITONSERVER_TYPE_UINT64: case TRITONSERVER_TYPE_FP64: return 8; case TRITONSERVER_TYPE_BYTES: return 0; default: break; } return 0; } // // TRITONSERVER_MemoryType // TRITONAPI_DECLSPEC const char* TRITONSERVER_MemoryTypeString(TRITONSERVER_MemoryType memtype) { switch (memtype) { case TRITONSERVER_MEMORY_CPU: return "CPU"; case TRITONSERVER_MEMORY_CPU_PINNED: return "CPU_PINNED"; case TRITONSERVER_MEMORY_GPU: return "GPU"; default: break; } return ""; } // // TRITONSERVER_Parameter // TRITONAPI_DECLSPEC const char* TRITONSERVER_ParameterTypeString(TRITONSERVER_ParameterType paramtype) { switch (paramtype) { case TRITONSERVER_PARAMETER_STRING: return "STRING"; case TRITONSERVER_PARAMETER_INT: return "INT"; case TRITONSERVER_PARAMETER_BOOL: return "BOOL"; default: break; } return ""; } TRITONAPI_DECLSPEC TRITONSERVER_Parameter* TRITONSERVER_ParameterNew( const char* name, const TRITONSERVER_ParameterType type, const void* value) { std::unique_ptr lparam; switch (type) { case TRITONSERVER_PARAMETER_STRING: lparam.reset(new tc::InferenceParameter( name, reinterpret_cast(value))); break; case TRITONSERVER_PARAMETER_INT: lparam.reset(new tc::InferenceParameter( name, *reinterpret_cast(value))); break; case TRITONSERVER_PARAMETER_BOOL: lparam.reset(new tc::InferenceParameter( name, *reinterpret_cast(value))); break; default: break; } return reinterpret_cast(lparam.release()); } TRITONAPI_DECLSPEC TRITONSERVER_Parameter* TRITONSERVER_ParameterBytesNew( const char* name, const void* byte_ptr, const uint64_t size) { std::unique_ptr lparam( new tc::InferenceParameter(name, byte_ptr, size)); return reinterpret_cast(lparam.release()); } TRITONAPI_DECLSPEC void TRITONSERVER_ParameterDelete(TRITONSERVER_Parameter* parameter) { delete reinterpret_cast(parameter); } // // TRITONSERVER_InstanceGroupKind // TRITONAPI_DECLSPEC const char* TRITONSERVER_InstanceGroupKindString(TRITONSERVER_InstanceGroupKind kind) { switch (kind) { case TRITONSERVER_INSTANCEGROUPKIND_AUTO: return "AUTO"; case TRITONSERVER_INSTANCEGROUPKIND_CPU: return "CPU"; case TRITONSERVER_INSTANCEGROUPKIND_GPU: return "GPU"; case TRITONSERVER_INSTANCEGROUPKIND_MODEL: return "MODEL"; default: break; } return ""; } // // TRITONSERVER_Log // TRITONAPI_DECLSPEC bool TRITONSERVER_LogIsEnabled(TRITONSERVER_LogLevel level) { switch (level) { case TRITONSERVER_LOG_INFO: return LOG_INFO_IS_ON; case TRITONSERVER_LOG_WARN: return LOG_WARNING_IS_ON; case TRITONSERVER_LOG_ERROR: return LOG_ERROR_IS_ON; case TRITONSERVER_LOG_VERBOSE: return LOG_VERBOSE_IS_ON(1); } return false; } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_LogMessage( TRITONSERVER_LogLevel level, const char* filename, const int line, const char* msg) { switch (level) { case TRITONSERVER_LOG_INFO: LOG_INFO_FL(filename, line) << msg; return nullptr; case TRITONSERVER_LOG_WARN: LOG_WARNING_FL(filename, line) << msg; return nullptr; case TRITONSERVER_LOG_ERROR: LOG_ERROR_FL(filename, line) << msg; return nullptr; case TRITONSERVER_LOG_VERBOSE: LOG_VERBOSE_FL(1, filename, line) << msg; return nullptr; default: return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_INVALID_ARG, std::string("unknown logging level '" + std::to_string(level) + "'") .c_str()); } } // // TRITONSERVER_Error // TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ErrorNew(TRITONSERVER_Error_Code code, const char* msg) { return reinterpret_cast( TritonServerError::Create(code, msg)); } TRITONAPI_DECLSPEC void TRITONSERVER_ErrorDelete(TRITONSERVER_Error* error) { TritonServerError* lerror = reinterpret_cast(error); delete lerror; } TRITONSERVER_Error_Code TRITONSERVER_ErrorCode(TRITONSERVER_Error* error) { TritonServerError* lerror = reinterpret_cast(error); return lerror->Code(); } TRITONAPI_DECLSPEC const char* TRITONSERVER_ErrorCodeString(TRITONSERVER_Error* error) { TritonServerError* lerror = reinterpret_cast(error); return tc::Status::CodeString(tc::TritonCodeToStatusCode(lerror->Code())); } TRITONAPI_DECLSPEC const char* TRITONSERVER_ErrorMessage(TRITONSERVER_Error* error) { TritonServerError* lerror = reinterpret_cast(error); return lerror->Message().c_str(); } // // TRITONSERVER_ResponseAllocator // TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ResponseAllocatorNew( TRITONSERVER_ResponseAllocator** allocator, TRITONSERVER_ResponseAllocatorAllocFn_t alloc_fn, TRITONSERVER_ResponseAllocatorReleaseFn_t release_fn, TRITONSERVER_ResponseAllocatorStartFn_t start_fn) { *allocator = reinterpret_cast( new tc::ResponseAllocator(alloc_fn, release_fn, start_fn)); return nullptr; // Success } TRITONSERVER_Error* TRITONSERVER_ResponseAllocatorSetQueryFunction( TRITONSERVER_ResponseAllocator* allocator, TRITONSERVER_ResponseAllocatorQueryFn_t query_fn) { reinterpret_cast(allocator)->SetQueryFunction( query_fn); return nullptr; // success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ResponseAllocatorSetBufferAttributesFunction( TRITONSERVER_ResponseAllocator* allocator, TRITONSERVER_ResponseAllocatorBufferAttributesFn_t buffer_attributes_fn) { reinterpret_cast(allocator) ->SetBufferAttributesFunction(buffer_attributes_fn); return nullptr; // success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ResponseAllocatorDelete(TRITONSERVER_ResponseAllocator* allocator) { tc::ResponseAllocator* lalloc = reinterpret_cast(allocator); delete lalloc; return nullptr; // Success } // // TRITONSERVER_Message // TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_MessageNewFromSerializedJson( TRITONSERVER_Message** message, const char* base, size_t byte_size) { *message = reinterpret_cast( new tc::TritonServerMessage({base, byte_size})); return nullptr; } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_MessageDelete(TRITONSERVER_Message* message) { tc::TritonServerMessage* lmessage = reinterpret_cast(message); delete lmessage; return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_MessageSerializeToJson( TRITONSERVER_Message* message, const char** base, size_t* byte_size) { tc::TritonServerMessage* lmessage = reinterpret_cast(message); lmessage->Serialize(base, byte_size); return nullptr; // Success } // // TRITONSERVER_Metrics // TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_MetricsDelete(TRITONSERVER_Metrics* metrics) { TritonServerMetrics* lmetrics = reinterpret_cast(metrics); delete lmetrics; return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_MetricsFormatted( TRITONSERVER_Metrics* metrics, TRITONSERVER_MetricFormat format, const char** base, size_t* byte_size) { TritonServerMetrics* lmetrics = reinterpret_cast(metrics); switch (format) { case TRITONSERVER_METRIC_PROMETHEUS: { return lmetrics->Serialize(base, byte_size); } default: break; } return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_INVALID_ARG, std::string("unknown metrics format '" + std::to_string(format) + "'") .c_str()); } // // TRITONSERVER_InferenceTrace // TRITONAPI_DECLSPEC const char* TRITONSERVER_InferenceTraceLevelString(TRITONSERVER_InferenceTraceLevel level) { switch (level) { case TRITONSERVER_TRACE_LEVEL_DISABLED: return "DISABLED"; case TRITONSERVER_TRACE_LEVEL_MIN: return "MIN"; case TRITONSERVER_TRACE_LEVEL_MAX: return "MAX"; case TRITONSERVER_TRACE_LEVEL_TIMESTAMPS: return "TIMESTAMPS"; case TRITONSERVER_TRACE_LEVEL_TENSORS: return "TENSORS"; } return ""; } TRITONAPI_DECLSPEC const char* TRITONSERVER_InferenceTraceActivityString( TRITONSERVER_InferenceTraceActivity activity) { switch (activity) { case TRITONSERVER_TRACE_REQUEST_START: return "REQUEST_START"; case TRITONSERVER_TRACE_QUEUE_START: return "QUEUE_START"; case TRITONSERVER_TRACE_COMPUTE_START: return "COMPUTE_START"; case TRITONSERVER_TRACE_COMPUTE_INPUT_END: return "COMPUTE_INPUT_END"; case TRITONSERVER_TRACE_COMPUTE_OUTPUT_START: return "COMPUTE_OUTPUT_START"; case TRITONSERVER_TRACE_COMPUTE_END: return "COMPUTE_END"; case TRITONSERVER_TRACE_REQUEST_END: return "REQUEST_END"; case TRITONSERVER_TRACE_TENSOR_QUEUE_INPUT: return "TENSOR_QUEUE_INPUT"; case TRITONSERVER_TRACE_TENSOR_BACKEND_INPUT: return "TENSOR_BACKEND_INPUT"; case TRITONSERVER_TRACE_TENSOR_BACKEND_OUTPUT: return "TENSOR_BACKEND_OUTPUT"; } return ""; } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceTraceNew( TRITONSERVER_InferenceTrace** trace, TRITONSERVER_InferenceTraceLevel level, uint64_t parent_id, TRITONSERVER_InferenceTraceActivityFn_t activity_fn, TRITONSERVER_InferenceTraceReleaseFn_t release_fn, void* trace_userp) { #ifdef TRITON_ENABLE_TRACING if ((level & TRITONSERVER_TRACE_LEVEL_MIN) > 0) { level = static_cast( (level ^ TRITONSERVER_TRACE_LEVEL_MIN) | TRITONSERVER_TRACE_LEVEL_TIMESTAMPS); } if ((level & TRITONSERVER_TRACE_LEVEL_MAX) > 0) { level = static_cast( (level ^ TRITONSERVER_TRACE_LEVEL_MAX) | TRITONSERVER_TRACE_LEVEL_TIMESTAMPS); } tc::InferenceTrace* ltrace = new tc::InferenceTrace( level, parent_id, activity_fn, nullptr, release_fn, trace_userp); *trace = reinterpret_cast(ltrace); return nullptr; // Success #else *trace = nullptr; return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_UNSUPPORTED, "inference tracing not supported"); #endif // TRITON_ENABLE_TRACING } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceTraceTensorNew( TRITONSERVER_InferenceTrace** trace, TRITONSERVER_InferenceTraceLevel level, uint64_t parent_id, TRITONSERVER_InferenceTraceActivityFn_t activity_fn, TRITONSERVER_InferenceTraceTensorActivityFn_t tensor_activity_fn, TRITONSERVER_InferenceTraceReleaseFn_t release_fn, void* trace_userp) { #ifdef TRITON_ENABLE_TRACING if ((level & TRITONSERVER_TRACE_LEVEL_MIN) > 0) { level = static_cast( (level ^ TRITONSERVER_TRACE_LEVEL_MIN) | TRITONSERVER_TRACE_LEVEL_TIMESTAMPS); } if ((level & TRITONSERVER_TRACE_LEVEL_MAX) > 0) { level = static_cast( (level ^ TRITONSERVER_TRACE_LEVEL_MAX) | TRITONSERVER_TRACE_LEVEL_TIMESTAMPS); } tc::InferenceTrace* ltrace = new tc::InferenceTrace( level, parent_id, activity_fn, tensor_activity_fn, release_fn, trace_userp); *trace = reinterpret_cast(ltrace); return nullptr; // Success #else *trace = nullptr; return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_UNSUPPORTED, "inference tracing not supported"); #endif // TRITON_ENABLE_TRACING } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceTraceDelete(TRITONSERVER_InferenceTrace* trace) { #ifdef TRITON_ENABLE_TRACING tc::InferenceTrace* ltrace = reinterpret_cast(trace); delete ltrace; return nullptr; // Success #else return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_UNSUPPORTED, "inference tracing not supported"); #endif // TRITON_ENABLE_TRACING } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceTraceId(TRITONSERVER_InferenceTrace* trace, uint64_t* id) { #ifdef TRITON_ENABLE_TRACING tc::InferenceTrace* ltrace = reinterpret_cast(trace); *id = ltrace->Id(); return nullptr; // Success #else return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_UNSUPPORTED, "inference tracing not supported"); #endif // TRITON_ENABLE_TRACING } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceTraceParentId( TRITONSERVER_InferenceTrace* trace, uint64_t* parent_id) { #ifdef TRITON_ENABLE_TRACING tc::InferenceTrace* ltrace = reinterpret_cast(trace); *parent_id = ltrace->ParentId(); return nullptr; // Success #else return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_UNSUPPORTED, "inference tracing not supported"); #endif // TRITON_ENABLE_TRACING } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceTraceModelName( TRITONSERVER_InferenceTrace* trace, const char** model_name) { #ifdef TRITON_ENABLE_TRACING tc::InferenceTrace* ltrace = reinterpret_cast(trace); *model_name = ltrace->ModelName().c_str(); return nullptr; // Success #else return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_UNSUPPORTED, "inference tracing not supported"); #endif // TRITON_ENABLE_TRACING } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceTraceModelVersion( TRITONSERVER_InferenceTrace* trace, int64_t* model_version) { #ifdef TRITON_ENABLE_TRACING tc::InferenceTrace* ltrace = reinterpret_cast(trace); *model_version = ltrace->ModelVersion(); return nullptr; // Success #else return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_UNSUPPORTED, "inference tracing not supported"); #endif // TRITON_ENABLE_TRACING } // // TRITONSERVER_ServerOptions // TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsNew(TRITONSERVER_ServerOptions** options) { *options = reinterpret_cast(new TritonServerOptions()); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsDelete(TRITONSERVER_ServerOptions* options) { TritonServerOptions* loptions = reinterpret_cast(options); delete loptions; return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetServerId( TRITONSERVER_ServerOptions* options, const char* server_id) { TritonServerOptions* loptions = reinterpret_cast(options); loptions->SetServerId(server_id); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetModelRepositoryPath( TRITONSERVER_ServerOptions* options, const char* model_repository_path) { TritonServerOptions* loptions = reinterpret_cast(options); loptions->SetModelRepositoryPath(model_repository_path); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetModelControlMode( TRITONSERVER_ServerOptions* options, TRITONSERVER_ModelControlMode mode) { TritonServerOptions* loptions = reinterpret_cast(options); // convert mode from TRITONSERVER_ to triton::core switch (mode) { case TRITONSERVER_MODEL_CONTROL_NONE: { loptions->SetModelControlMode(tc::ModelControlMode::MODE_NONE); break; } case TRITONSERVER_MODEL_CONTROL_POLL: { loptions->SetModelControlMode(tc::ModelControlMode::MODE_POLL); break; } case TRITONSERVER_MODEL_CONTROL_EXPLICIT: { loptions->SetModelControlMode(tc::ModelControlMode::MODE_EXPLICIT); break; } default: { return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_INVALID_ARG, std::string("unknown control mode '" + std::to_string(mode) + "'") .c_str()); } } return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetStartupModel( TRITONSERVER_ServerOptions* options, const char* model_name) { TritonServerOptions* loptions = reinterpret_cast(options); loptions->SetStartupModel(model_name); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetExitOnError( TRITONSERVER_ServerOptions* options, bool exit) { TritonServerOptions* loptions = reinterpret_cast(options); loptions->SetExitOnError(exit); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetStrictModelConfig( TRITONSERVER_ServerOptions* options, bool strict) { TritonServerOptions* loptions = reinterpret_cast(options); loptions->SetStrictModelConfig(strict); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetRateLimiterMode( TRITONSERVER_ServerOptions* options, TRITONSERVER_RateLimitMode mode) { TritonServerOptions* loptions = reinterpret_cast(options); // convert mode from TRITONSERVER_ to triton::core switch (mode) { case TRITONSERVER_RATE_LIMIT_EXEC_COUNT: { loptions->SetRateLimiterMode(tc::RateLimitMode::RL_EXEC_COUNT); break; } case TRITONSERVER_RATE_LIMIT_OFF: { loptions->SetRateLimiterMode(tc::RateLimitMode::RL_OFF); break; } default: { return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_INVALID_ARG, std::string("unknown rate limit mode '" + std::to_string(mode) + "'") .c_str()); } } return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsAddRateLimiterResource( TRITONSERVER_ServerOptions* options, const char* name, const size_t count, const int device) { TritonServerOptions* loptions = reinterpret_cast(options); return loptions->AddRateLimiterResource(name, count, device); } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetPinnedMemoryPoolByteSize( TRITONSERVER_ServerOptions* options, uint64_t size) { TritonServerOptions* loptions = reinterpret_cast(options); loptions->SetPinnedMemoryPoolByteSize(size); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetCudaMemoryPoolByteSize( TRITONSERVER_ServerOptions* options, int gpu_device, uint64_t size) { TritonServerOptions* loptions = reinterpret_cast(options); loptions->SetCudaMemoryPoolByteSize(gpu_device, size); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetResponseCacheByteSize( TRITONSERVER_ServerOptions* options, uint64_t size) { TritonServerOptions* loptions = reinterpret_cast(options); loptions->SetResponseCacheByteSize(size); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetMinSupportedComputeCapability( TRITONSERVER_ServerOptions* options, double cc) { TritonServerOptions* loptions = reinterpret_cast(options); loptions->SetMinSupportedComputeCapability(cc); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetStrictReadiness( TRITONSERVER_ServerOptions* options, bool strict) { TritonServerOptions* loptions = reinterpret_cast(options); loptions->SetStrictReadiness(strict); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetExitTimeout( TRITONSERVER_ServerOptions* options, unsigned int timeout) { TritonServerOptions* loptions = reinterpret_cast(options); loptions->SetExitTimeout(timeout); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetBufferManagerThreadCount( TRITONSERVER_ServerOptions* options, unsigned int thread_count) { TritonServerOptions* loptions = reinterpret_cast(options); loptions->SetBufferManagerThreadCount(thread_count); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetModelLoadThreadCount( TRITONSERVER_ServerOptions* options, unsigned int thread_count) { TritonServerOptions* loptions = reinterpret_cast(options); loptions->SetModelLoadThreadCount(thread_count); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetLogFile( TRITONSERVER_ServerOptions* options, const char* file) { #ifdef TRITON_ENABLE_LOGGING std::string out_file; if (file != nullptr) { out_file = std::string(file); } const std::string& error = LOG_SET_OUT_FILE(out_file); if (!error.empty()) { return TRITONSERVER_ErrorNew(TRITONSERVER_ERROR_INTERNAL, (error).c_str()); } return nullptr; // Success #else return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_UNSUPPORTED, "logging not supported"); #endif // TRITON_ENABLE_LOGGING } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetLogInfo( TRITONSERVER_ServerOptions* options, bool log) { #ifdef TRITON_ENABLE_LOGGING // Logging is global for now... LOG_ENABLE_INFO(log); return nullptr; // Success #else return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_UNSUPPORTED, "logging not supported"); #endif // TRITON_ENABLE_LOGGING } // Enable or disable warning level logging. TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetLogWarn( TRITONSERVER_ServerOptions* options, bool log) { #ifdef TRITON_ENABLE_LOGGING // Logging is global for now... LOG_ENABLE_WARNING(log); return nullptr; // Success #else return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_UNSUPPORTED, "logging not supported"); #endif // TRITON_ENABLE_LOGGING } // Enable or disable error level logging. TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetLogError( TRITONSERVER_ServerOptions* options, bool log) { #ifdef TRITON_ENABLE_LOGGING // Logging is global for now... LOG_ENABLE_ERROR(log); return nullptr; // Success #else return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_UNSUPPORTED, "logging not supported"); #endif // TRITON_ENABLE_LOGGING } // Set verbose logging level. Level zero disables verbose logging. TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetLogVerbose( TRITONSERVER_ServerOptions* options, int level) { #ifdef TRITON_ENABLE_LOGGING // Logging is global for now... LOG_SET_VERBOSE(level); #else return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_UNSUPPORTED, "logging not supported"); #endif // TRITON_ENABLE_LOGGING return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetLogFormat( TRITONSERVER_ServerOptions* options, const TRITONSERVER_LogFormat format) { #ifdef TRITON_ENABLE_LOGGING // Logging is global for now... switch (format) { case TRITONSERVER_LOG_DEFAULT: LOG_SET_FORMAT(triton::common::Logger::Format::kDEFAULT); break; case TRITONSERVER_LOG_ISO8601: LOG_SET_FORMAT(triton::common::Logger::Format::kISO8601); break; } #else return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_UNSUPPORTED, "logging not supported"); #endif // TRITON_ENABLE_LOGGING return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetMetrics( TRITONSERVER_ServerOptions* options, bool metrics) { #ifdef TRITON_ENABLE_METRICS TritonServerOptions* loptions = reinterpret_cast(options); loptions->SetMetrics(metrics); return nullptr; // Success #else return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_UNSUPPORTED, "metrics not supported"); #endif // TRITON_ENABLE_METRICS } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetGpuMetrics( TRITONSERVER_ServerOptions* options, bool gpu_metrics) { #ifdef TRITON_ENABLE_METRICS TritonServerOptions* loptions = reinterpret_cast(options); loptions->SetGpuMetrics(gpu_metrics); return nullptr; // Success #else return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_UNSUPPORTED, "metrics not supported"); #endif // TRITON_ENABLE_METRICS } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetCpuMetrics( TRITONSERVER_ServerOptions* options, bool cpu_metrics) { #ifdef TRITON_ENABLE_METRICS TritonServerOptions* loptions = reinterpret_cast(options); loptions->SetCpuMetrics(cpu_metrics); return nullptr; // Success #else return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_UNSUPPORTED, "metrics not supported"); #endif // TRITON_ENABLE_METRICS } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetMetricsInterval( TRITONSERVER_ServerOptions* options, uint64_t metrics_interval_ms) { #ifdef TRITON_ENABLE_METRICS TritonServerOptions* loptions = reinterpret_cast(options); loptions->SetMetricsInterval(metrics_interval_ms); return nullptr; // Success #else return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_UNSUPPORTED, "metrics not supported"); #endif // TRITON_ENABLE_METRICS } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetBackendDirectory( TRITONSERVER_ServerOptions* options, const char* backend_dir) { TritonServerOptions* loptions = reinterpret_cast(options); loptions->SetBackendDir(backend_dir); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetRepoAgentDirectory( TRITONSERVER_ServerOptions* options, const char* repoagent_dir) { TritonServerOptions* loptions = reinterpret_cast(options); loptions->SetRepoAgentDir(repoagent_dir); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetModelLoadDeviceLimit( TRITONSERVER_ServerOptions* options, const TRITONSERVER_InstanceGroupKind kind, const int device_id, const double fraction) { if (device_id < 0) { return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_INVALID_ARG, (std::string("expects device ID >= 0, got ") + std::to_string(device_id)) .c_str()); } else if ((fraction < 0.0) || (fraction > 1.0)) { return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_INVALID_ARG, (std::string("expects limit fraction to be in range [0.0, 1.0], got ") + std::to_string(fraction)) .c_str()); } TritonServerOptions* loptions = reinterpret_cast(options); switch (kind) { case TRITONSERVER_INSTANCEGROUPKIND_GPU: { static std::string key_prefix = "model-load-gpu-limit-device-"; return loptions->AddBackendConfig( "", key_prefix + std::to_string(device_id), std::to_string(fraction)); } default: return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_INVALID_ARG, (std::string("given device kind is not supported, got: ") + TRITONSERVER_InstanceGroupKindString(kind)) .c_str()); } } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetBackendConfig( TRITONSERVER_ServerOptions* options, const char* backend_name, const char* setting, const char* value) { TritonServerOptions* loptions = reinterpret_cast(options); return loptions->AddBackendConfig(backend_name, setting, value); } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetHostPolicy( TRITONSERVER_ServerOptions* options, const char* policy_name, const char* setting, const char* value) { TritonServerOptions* loptions = reinterpret_cast(options); return loptions->SetHostPolicy(policy_name, setting, value); } // // TRITONSERVER_InferenceRequest // TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestNew( TRITONSERVER_InferenceRequest** inference_request, TRITONSERVER_Server* server, const char* model_name, const int64_t model_version) { tc::InferenceServer* lserver = reinterpret_cast(server); std::shared_ptr model; RETURN_IF_STATUS_ERROR(lserver->GetModel(model_name, model_version, &model)); *inference_request = reinterpret_cast( new tc::InferenceRequest(model, model_version)); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestDelete( TRITONSERVER_InferenceRequest* inference_request) { tc::InferenceRequest* lrequest = reinterpret_cast(inference_request); delete lrequest; return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestId( TRITONSERVER_InferenceRequest* inference_request, const char** id) { tc::InferenceRequest* lrequest = reinterpret_cast(inference_request); *id = lrequest->Id().c_str(); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestSetId( TRITONSERVER_InferenceRequest* inference_request, const char* id) { tc::InferenceRequest* lrequest = reinterpret_cast(inference_request); lrequest->SetId(id); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestFlags( TRITONSERVER_InferenceRequest* inference_request, uint32_t* flags) { tc::InferenceRequest* lrequest = reinterpret_cast(inference_request); *flags = lrequest->Flags(); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestSetFlags( TRITONSERVER_InferenceRequest* inference_request, uint32_t flags) { tc::InferenceRequest* lrequest = reinterpret_cast(inference_request); lrequest->SetFlags(flags); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestCorrelationId( TRITONSERVER_InferenceRequest* inference_request, uint64_t* correlation_id) { tc::InferenceRequest* lrequest = reinterpret_cast(inference_request); const tc::InferenceRequest::SequenceId& corr_id = lrequest->CorrelationId(); if (corr_id.Type() != tc::InferenceRequest::SequenceId::DataType::UINT64) { return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_INVALID_ARG, std::string("given request's correlation id is not an unsigned int") .c_str()); } *correlation_id = corr_id.UnsignedIntValue(); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestCorrelationIdString( TRITONSERVER_InferenceRequest* inference_request, const char** correlation_id) { tc::InferenceRequest* lrequest = reinterpret_cast(inference_request); const tc::InferenceRequest::SequenceId& corr_id = lrequest->CorrelationId(); if (corr_id.Type() != tc::InferenceRequest::SequenceId::DataType::STRING) { return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_INVALID_ARG, std::string("given request's correlation id is not a string").c_str()); } *correlation_id = corr_id.StringValue().c_str(); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestSetCorrelationId( TRITONSERVER_InferenceRequest* inference_request, uint64_t correlation_id) { tc::InferenceRequest* lrequest = reinterpret_cast(inference_request); lrequest->SetCorrelationId(tc::InferenceRequest::SequenceId(correlation_id)); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestSetCorrelationIdString( TRITONSERVER_InferenceRequest* inference_request, const char* correlation_id) { tc::InferenceRequest* lrequest = reinterpret_cast(inference_request); if (std::string(correlation_id).length() > 128) { return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_UNSUPPORTED, std::string( "string correlation ID cannot be longer than 128 characters") .c_str()); } lrequest->SetCorrelationId(tc::InferenceRequest::SequenceId(correlation_id)); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestPriority( TRITONSERVER_InferenceRequest* inference_request, uint32_t* priority) { tc::InferenceRequest* lrequest = reinterpret_cast(inference_request); *priority = lrequest->Priority(); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestSetPriority( TRITONSERVER_InferenceRequest* inference_request, uint32_t priority) { tc::InferenceRequest* lrequest = reinterpret_cast(inference_request); lrequest->SetPriority(priority); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestTimeoutMicroseconds( TRITONSERVER_InferenceRequest* inference_request, uint64_t* timeout_us) { tc::InferenceRequest* lrequest = reinterpret_cast(inference_request); *timeout_us = lrequest->TimeoutMicroseconds(); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestSetTimeoutMicroseconds( TRITONSERVER_InferenceRequest* inference_request, uint64_t timeout_us) { tc::InferenceRequest* lrequest = reinterpret_cast(inference_request); lrequest->SetTimeoutMicroseconds(timeout_us); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestAddInput( TRITONSERVER_InferenceRequest* inference_request, const char* name, const TRITONSERVER_DataType datatype, const int64_t* shape, uint64_t dim_count) { tc::InferenceRequest* lrequest = reinterpret_cast(inference_request); RETURN_IF_STATUS_ERROR(lrequest->AddOriginalInput( name, tc::TritonToDataType(datatype), shape, dim_count)); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestAddRawInput( TRITONSERVER_InferenceRequest* inference_request, const char* name) { tc::InferenceRequest* lrequest = reinterpret_cast(inference_request); RETURN_IF_STATUS_ERROR(lrequest->AddRawInput(name)); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestRemoveInput( TRITONSERVER_InferenceRequest* inference_request, const char* name) { tc::InferenceRequest* lrequest = reinterpret_cast(inference_request); RETURN_IF_STATUS_ERROR(lrequest->RemoveOriginalInput(name)); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestRemoveAllInputs( TRITONSERVER_InferenceRequest* inference_request) { tc::InferenceRequest* lrequest = reinterpret_cast(inference_request); RETURN_IF_STATUS_ERROR(lrequest->RemoveAllOriginalInputs()); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestAppendInputData( TRITONSERVER_InferenceRequest* inference_request, const char* name, const void* base, size_t byte_size, TRITONSERVER_MemoryType memory_type, int64_t memory_type_id) { tc::InferenceRequest* lrequest = reinterpret_cast(inference_request); tc::InferenceRequest::Input* input; RETURN_IF_STATUS_ERROR(lrequest->MutableOriginalInput(name, &input)); RETURN_IF_STATUS_ERROR( input->AppendData(base, byte_size, memory_type, memory_type_id)); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy( TRITONSERVER_InferenceRequest* inference_request, const char* name, const void* base, size_t byte_size, TRITONSERVER_MemoryType memory_type, int64_t memory_type_id, const char* host_policy_name) { tc::InferenceRequest* lrequest = reinterpret_cast(inference_request); tc::InferenceRequest::Input* input; RETURN_IF_STATUS_ERROR(lrequest->MutableOriginalInput(name, &input)); RETURN_IF_STATUS_ERROR(input->AppendDataWithHostPolicy( base, byte_size, memory_type, memory_type_id, host_policy_name)); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestAppendInputDataWithBufferAttributes( TRITONSERVER_InferenceRequest* inference_request, const char* name, const void* base, TRITONSERVER_BufferAttributes* buffer_attributes) { tc::InferenceRequest* lrequest = reinterpret_cast(inference_request); tc::BufferAttributes* lbuffer_attributes = reinterpret_cast(buffer_attributes); tc::InferenceRequest::Input* input; RETURN_IF_STATUS_ERROR(lrequest->MutableOriginalInput(name, &input)); RETURN_IF_STATUS_ERROR( input->AppendDataWithBufferAttributes(base, lbuffer_attributes)); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestRemoveAllInputData( TRITONSERVER_InferenceRequest* inference_request, const char* name) { tc::InferenceRequest* lrequest = reinterpret_cast(inference_request); tc::InferenceRequest::Input* input; RETURN_IF_STATUS_ERROR(lrequest->MutableOriginalInput(name, &input)); RETURN_IF_STATUS_ERROR(input->RemoveAllData()); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestAddRequestedOutput( TRITONSERVER_InferenceRequest* inference_request, const char* name) { tc::InferenceRequest* lrequest = reinterpret_cast(inference_request); RETURN_IF_STATUS_ERROR(lrequest->AddOriginalRequestedOutput(name)); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestRemoveRequestedOutput( TRITONSERVER_InferenceRequest* inference_request, const char* name) { tc::InferenceRequest* lrequest = reinterpret_cast(inference_request); RETURN_IF_STATUS_ERROR(lrequest->RemoveOriginalRequestedOutput(name)); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestRemoveAllRequestedOutputs( TRITONSERVER_InferenceRequest* inference_request) { tc::InferenceRequest* lrequest = reinterpret_cast(inference_request); RETURN_IF_STATUS_ERROR(lrequest->RemoveAllOriginalRequestedOutputs()); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestSetReleaseCallback( TRITONSERVER_InferenceRequest* inference_request, TRITONSERVER_InferenceRequestReleaseFn_t request_release_fn, void* request_release_userp) { tc::InferenceRequest* lrequest = reinterpret_cast(inference_request); RETURN_IF_STATUS_ERROR( lrequest->SetReleaseCallback(request_release_fn, request_release_userp)); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestSetResponseCallback( TRITONSERVER_InferenceRequest* inference_request, TRITONSERVER_ResponseAllocator* response_allocator, void* response_allocator_userp, TRITONSERVER_InferenceResponseCompleteFn_t response_fn, void* response_userp) { tc::InferenceRequest* lrequest = reinterpret_cast(inference_request); tc::ResponseAllocator* lallocator = reinterpret_cast(response_allocator); RETURN_IF_STATUS_ERROR(lrequest->SetResponseCallback( lallocator, response_allocator_userp, response_fn, response_userp)); return nullptr; // Success } // // TRITONSERVER_InferenceResponse // TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceResponseDelete( TRITONSERVER_InferenceResponse* inference_response) { tc::InferenceResponse* lresponse = reinterpret_cast(inference_response); delete lresponse; return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceResponseError( TRITONSERVER_InferenceResponse* inference_response) { tc::InferenceResponse* lresponse = reinterpret_cast(inference_response); RETURN_IF_STATUS_ERROR(lresponse->ResponseStatus()); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceResponseModel( TRITONSERVER_InferenceResponse* inference_response, const char** model_name, int64_t* model_version) { tc::InferenceResponse* lresponse = reinterpret_cast(inference_response); *model_name = lresponse->ModelName().c_str(); *model_version = lresponse->ActualModelVersion(); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceResponseId( TRITONSERVER_InferenceResponse* inference_response, const char** request_id) { tc::InferenceResponse* lresponse = reinterpret_cast(inference_response); *request_id = lresponse->Id().c_str(); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceResponseParameterCount( TRITONSERVER_InferenceResponse* inference_response, uint32_t* count) { tc::InferenceResponse* lresponse = reinterpret_cast(inference_response); const auto& parameters = lresponse->Parameters(); *count = parameters.size(); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceResponseParameter( TRITONSERVER_InferenceResponse* inference_response, const uint32_t index, const char** name, TRITONSERVER_ParameterType* type, const void** vvalue) { tc::InferenceResponse* lresponse = reinterpret_cast(inference_response); const auto& parameters = lresponse->Parameters(); if (index >= parameters.size()) { return TritonServerError::Create( TRITONSERVER_ERROR_INVALID_ARG, "out of bounds index " + std::to_string(index) + std::string(": response has ") + std::to_string(parameters.size()) + " parameters"); } const tc::InferenceParameter& param = parameters[index]; *name = param.Name().c_str(); *type = param.Type(); *vvalue = param.ValuePointer(); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceResponseOutputCount( TRITONSERVER_InferenceResponse* inference_response, uint32_t* count) { tc::InferenceResponse* lresponse = reinterpret_cast(inference_response); const auto& outputs = lresponse->Outputs(); *count = outputs.size(); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceResponseOutput( TRITONSERVER_InferenceResponse* inference_response, const uint32_t index, const char** name, TRITONSERVER_DataType* datatype, const int64_t** shape, uint64_t* dim_count, const void** base, size_t* byte_size, TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id, void** userp) { tc::InferenceResponse* lresponse = reinterpret_cast(inference_response); const auto& outputs = lresponse->Outputs(); if (index >= outputs.size()) { return TritonServerError::Create( TRITONSERVER_ERROR_INVALID_ARG, "out of bounds index " + std::to_string(index) + std::string(": response has ") + std::to_string(outputs.size()) + " outputs"); } const tc::InferenceResponse::Output& output = outputs[index]; *name = output.Name().c_str(); *datatype = tc::DataTypeToTriton(output.DType()); const std::vector& oshape = output.Shape(); *shape = &oshape[0]; *dim_count = oshape.size(); RETURN_IF_STATUS_ERROR( output.DataBuffer(base, byte_size, memory_type, memory_type_id, userp)); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceResponseOutputClassificationLabel( TRITONSERVER_InferenceResponse* inference_response, const uint32_t index, const size_t class_index, const char** label) { tc::InferenceResponse* lresponse = reinterpret_cast(inference_response); const auto& outputs = lresponse->Outputs(); if (index >= outputs.size()) { return TritonServerError::Create( TRITONSERVER_ERROR_INVALID_ARG, "out of bounds index " + std::to_string(index) + std::string(": response has ") + std::to_string(outputs.size()) + " outputs"); } const tc::InferenceResponse::Output& output = outputs[index]; RETURN_IF_STATUS_ERROR( lresponse->ClassificationLabel(output, class_index, label)); return nullptr; // Success } // // TRITONSERVER_BufferAttributes // TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_BufferAttributesNew( TRITONSERVER_BufferAttributes** buffer_attributes) { tc::BufferAttributes* lbuffer_attributes = new tc::BufferAttributes(); *buffer_attributes = reinterpret_cast(lbuffer_attributes); return nullptr; // success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_BufferAttributesDelete( TRITONSERVER_BufferAttributes* buffer_attributes) { tc::BufferAttributes* lbuffer_attributes = reinterpret_cast(buffer_attributes); delete lbuffer_attributes; return nullptr; // success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_BufferAttributesSetMemoryTypeId( TRITONSERVER_BufferAttributes* buffer_attributes, int64_t memory_type_id) { tc::BufferAttributes* lbuffer_attributes = reinterpret_cast(buffer_attributes); lbuffer_attributes->SetMemoryTypeId(memory_type_id); return nullptr; // success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_BufferAttributesSetMemoryType( TRITONSERVER_BufferAttributes* buffer_attributes, TRITONSERVER_MemoryType memory_type) { tc::BufferAttributes* lbuffer_attributes = reinterpret_cast(buffer_attributes); lbuffer_attributes->SetMemoryType(memory_type); return nullptr; // success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_BufferAttributesSetCudaIpcHandle( TRITONSERVER_BufferAttributes* buffer_attributes, void* cuda_ipc_handle) { tc::BufferAttributes* lbuffer_attributes = reinterpret_cast(buffer_attributes); lbuffer_attributes->SetCudaIpcHandle(cuda_ipc_handle); return nullptr; // success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_BufferAttributesSetByteSize( TRITONSERVER_BufferAttributes* buffer_attributes, size_t byte_size) { tc::BufferAttributes* lbuffer_attributes = reinterpret_cast(buffer_attributes); lbuffer_attributes->SetByteSize(byte_size); return nullptr; // success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_BufferAttributesMemoryTypeId( TRITONSERVER_BufferAttributes* buffer_attributes, int64_t* memory_type_id) { tc::BufferAttributes* lbuffer_attributes = reinterpret_cast(buffer_attributes); *memory_type_id = lbuffer_attributes->MemoryTypeId(); return nullptr; // success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_BufferAttributesMemoryType( TRITONSERVER_BufferAttributes* buffer_attributes, TRITONSERVER_MemoryType* memory_type) { tc::BufferAttributes* lbuffer_attributes = reinterpret_cast(buffer_attributes); *memory_type = lbuffer_attributes->MemoryType(); return nullptr; // success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_BufferAttributesCudaIpcHandle( TRITONSERVER_BufferAttributes* buffer_attributes, void** cuda_ipc_handle) { tc::BufferAttributes* lbuffer_attributes = reinterpret_cast(buffer_attributes); *cuda_ipc_handle = lbuffer_attributes->CudaIpcHandle(); return nullptr; // success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_BufferAttributesByteSize( TRITONSERVER_BufferAttributes* buffer_attributes, size_t* byte_size) { tc::BufferAttributes* lbuffer_attributes = reinterpret_cast(buffer_attributes); *byte_size = lbuffer_attributes->ByteSize(); return nullptr; // success } // // TRITONSERVER_Server // TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerNew( TRITONSERVER_Server** server, TRITONSERVER_ServerOptions* options) { tc::InferenceServer* lserver = new tc::InferenceServer(); TritonServerOptions* loptions = reinterpret_cast(options); NVTX_INITIALIZE; #ifdef TRITON_ENABLE_METRICS // NOTE: Metrics must be enabled before backends are setup if (loptions->Metrics()) { tc::Metrics::EnableMetrics(); tc::Metrics::SetMetricsInterval(loptions->MetricsInterval()); } #endif // TRITON_ENABLE_METRICS lserver->SetId(loptions->ServerId()); lserver->SetModelRepositoryPaths(loptions->ModelRepositoryPaths()); lserver->SetModelControlMode(loptions->ModelControlMode()); lserver->SetStartupModels(loptions->StartupModels()); bool strict_model_config = loptions->StrictModelConfig(); lserver->SetStrictModelConfigEnabled(strict_model_config); lserver->SetRateLimiterMode(loptions->RateLimiterMode()); lserver->SetRateLimiterResources(loptions->RateLimiterResources()); lserver->SetPinnedMemoryPoolByteSize(loptions->PinnedMemoryPoolByteSize()); lserver->SetResponseCacheByteSize(loptions->ResponseCacheByteSize()); lserver->SetCudaMemoryPoolByteSize(loptions->CudaMemoryPoolByteSize()); double min_compute_capability = loptions->MinSupportedComputeCapability(); lserver->SetMinSupportedComputeCapability(min_compute_capability); lserver->SetStrictReadinessEnabled(loptions->StrictReadiness()); lserver->SetExitTimeoutSeconds(loptions->ExitTimeout()); lserver->SetHostPolicyCmdlineConfig(loptions->HostPolicyCmdlineConfigMap()); lserver->SetRepoAgentDir(loptions->RepoAgentDir()); lserver->SetBufferManagerThreadCount(loptions->BufferManagerThreadCount()); lserver->SetModelLoadThreadCount(loptions->ModelLoadThreadCount()); // SetBackendCmdlineConfig must be called after all AddBackendConfig calls // have completed. // Note that the auto complete config condition is reverted // due to setting name being different loptions->AddBackendConfig( std::string(), "auto-complete-config", strict_model_config ? "false" : "true"); loptions->AddBackendConfig( std::string(), "min-compute-capability", std::to_string(min_compute_capability)); loptions->AddBackendConfig( std::string(), "backend-directory", loptions->BackendDir()); lserver->SetBackendCmdlineConfig(loptions->BackendCmdlineConfigMap()); // Initialize server tc::Status status = lserver->Init(); #ifdef TRITON_ENABLE_METRICS if (loptions->Metrics() && lserver->ResponseCacheEnabled()) { // NOTE: Cache metrics must be enabled after cache initialized in // server->Init() tc::Metrics::EnableCacheMetrics(lserver->GetResponseCache()); } #ifdef TRITON_ENABLE_METRICS_GPU if (loptions->Metrics() && loptions->GpuMetrics()) { tc::Metrics::EnableGPUMetrics(); } #endif // TRITON_ENABLE_METRICS_GPU #ifdef TRITON_ENABLE_METRICS_CPU if (loptions->Metrics() && loptions->CpuMetrics()) { tc::Metrics::EnableCpuMetrics(); } #endif // TRITON_ENABLE_METRICS_CPU const bool poll_metrics = (lserver->ResponseCacheEnabled() || loptions->GpuMetrics() || loptions->CpuMetrics()); if (loptions->Metrics() && poll_metrics) { // Start thread to poll enabled metrics periodically tc::Metrics::StartPollingThreadSingleton(lserver->GetResponseCache()); } #endif // TRITON_ENABLE_METRICS // Setup tritonserver options table std::vector options_headers; options_headers.emplace_back("Option"); options_headers.emplace_back("Value"); triton::common::TablePrinter options_table(options_headers); options_table.InsertRow(std::vector{"server_id", lserver->Id()}); options_table.InsertRow( std::vector{"server_version", lserver->Version()}); auto extensions = lserver->Extensions(); std::string exts; for (const auto& ext : extensions) { exts.append(ext); exts.append(" "); } // Remove the trailing space if (exts.size() > 0) exts.pop_back(); options_table.InsertRow(std::vector{"server_extensions", exts}); size_t i = 0; for (const auto& model_repository_path : lserver->ModelRepositoryPaths()) { options_table.InsertRow(std::vector{ "model_repository_path[" + std::to_string(i) + "]", model_repository_path}); ++i; } std::string model_control_mode; auto control_mode = lserver->GetModelControlMode(); switch (control_mode) { case tc::ModelControlMode::MODE_NONE: { model_control_mode = "MODE_NONE"; break; } case tc::ModelControlMode::MODE_POLL: { model_control_mode = "MODE_POLL"; break; } case tc::ModelControlMode::MODE_EXPLICIT: { model_control_mode = "MODE_EXPLICIT"; break; } default: { model_control_mode = ""; } } options_table.InsertRow( std::vector{"model_control_mode", model_control_mode}); i = 0; for (const auto& startup_model : lserver->StartupModels()) { options_table.InsertRow(std::vector{ "startup_models_" + std::to_string(i), startup_model}); ++i; } options_table.InsertRow(std::vector{ "strict_model_config", std::to_string(lserver->StrictModelConfigEnabled())}); std::string rate_limit = RateLimitModeToString(lserver->RateLimiterMode()); options_table.InsertRow(std::vector{"rate_limit", rate_limit}); i = 0; for (const auto& device_resources : lserver->RateLimiterResources()) { for (const auto& resource : device_resources.second) { options_table.InsertRow(std::vector{ "rate_limit_resource[" + std::to_string(i) + "]", ResourceString( resource.first, resource.second, device_resources.first)}); ++i; } } options_table.InsertRow(std::vector{ "pinned_memory_pool_byte_size", std::to_string(lserver->PinnedMemoryPoolByteSize())}); for (const auto& cuda_memory_pool : lserver->CudaMemoryPoolByteSize()) { options_table.InsertRow(std::vector{ "cuda_memory_pool_byte_size{" + std::to_string(cuda_memory_pool.first) + "}", std::to_string(cuda_memory_pool.second)}); } options_table.InsertRow(std::vector{ "response_cache_byte_size", std::to_string(lserver->ResponseCacheByteSize())}); std::stringstream compute_capability_ss; compute_capability_ss.setf(std::ios::fixed); compute_capability_ss.precision(1); compute_capability_ss << lserver->MinSupportedComputeCapability(); options_table.InsertRow(std::vector{ "min_supported_compute_capability", compute_capability_ss.str()}); options_table.InsertRow(std::vector{ "strict_readiness", std::to_string(lserver->StrictReadinessEnabled())}); options_table.InsertRow(std::vector{ "exit_timeout", std::to_string(lserver->ExitTimeoutSeconds())}); std::string options_table_string = options_table.PrintTable(); LOG_INFO << options_table_string; if (!status.IsOk()) { if (loptions->ExitOnError()) { lserver->Stop(true /* force */); delete lserver; RETURN_IF_STATUS_ERROR(status); } LOG_ERROR << status.AsString(); } *server = reinterpret_cast(lserver); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerDelete(TRITONSERVER_Server* server) { tc::InferenceServer* lserver = reinterpret_cast(server); if (lserver != nullptr) { RETURN_IF_STATUS_ERROR(lserver->Stop()); } delete lserver; return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerStop(TRITONSERVER_Server* server) { tc::InferenceServer* lserver = reinterpret_cast(server); if (lserver != nullptr) { RETURN_IF_STATUS_ERROR(lserver->Stop()); } return nullptr; // Success } TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerRegisterModelRepository( TRITONSERVER_Server* server, const char* repository_path, const TRITONSERVER_Parameter** name_mapping, const uint32_t mapping_count) { tc::InferenceServer* lserver = reinterpret_cast(server); if ((name_mapping == nullptr) && (mapping_count != 0)) { return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_INVALID_ARG, "model mappings are not provided while mapping count is non-zero"); } std::unordered_map model_mapping; for (size_t i = 0; i < mapping_count; ++i) { auto mapping = reinterpret_cast(name_mapping[i]); auto subdir = mapping->Name(); if (mapping->Type() != TRITONSERVER_PARAMETER_STRING) { return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_INVALID_ARG, std::string( "Mapped model name must be a string, found " "another type for " + subdir) .c_str()); } auto model_name = std::string(reinterpret_cast(mapping->ValuePointer())); if (!(model_mapping.emplace(model_name, subdir).second)) { return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_INVALID_ARG, (std::string("failed to register '") + repository_path + "', there is a conflicting mapping for '" + std::string(model_name) + "'") .c_str()); } } RETURN_IF_STATUS_ERROR( lserver->RegisterModelRepository(repository_path, model_mapping)); return nullptr; // Success } TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerUnregisterModelRepository( TRITONSERVER_Server* server, const char* repository_path) { tc::InferenceServer* lserver = reinterpret_cast(server); RETURN_IF_STATUS_ERROR(lserver->UnregisterModelRepository(repository_path)); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerPollModelRepository(TRITONSERVER_Server* server) { tc::InferenceServer* lserver = reinterpret_cast(server); RETURN_IF_STATUS_ERROR(lserver->PollModelRepository()); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerIsLive(TRITONSERVER_Server* server, bool* live) { tc::InferenceServer* lserver = reinterpret_cast(server); RETURN_IF_STATUS_ERROR(lserver->IsLive(live)); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerIsReady(TRITONSERVER_Server* server, bool* ready) { tc::InferenceServer* lserver = reinterpret_cast(server); RETURN_IF_STATUS_ERROR(lserver->IsReady(ready)); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerModelIsReady( TRITONSERVER_Server* server, const char* model_name, const int64_t model_version, bool* ready) { tc::InferenceServer* lserver = reinterpret_cast(server); RETURN_IF_STATUS_ERROR( lserver->ModelIsReady(model_name, model_version, ready)); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerModelBatchProperties( TRITONSERVER_Server* server, const char* model_name, const int64_t model_version, uint32_t* flags, void** voidp) { tc::InferenceServer* lserver = reinterpret_cast(server); if (voidp != nullptr) { *voidp = nullptr; } std::shared_ptr model; RETURN_IF_STATUS_ERROR(lserver->GetModel(model_name, model_version, &model)); if (model->Config().max_batch_size() > 0) { *flags = TRITONSERVER_BATCH_FIRST_DIM; } else { *flags = TRITONSERVER_BATCH_UNKNOWN; } return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerModelTransactionProperties( TRITONSERVER_Server* server, const char* model_name, const int64_t model_version, uint32_t* txn_flags, void** voidp) { tc::InferenceServer* lserver = reinterpret_cast(server); if (voidp != nullptr) { *voidp = nullptr; } *txn_flags = 0; std::shared_ptr model; RETURN_IF_STATUS_ERROR(lserver->GetModel(model_name, model_version, &model)); if (model->Config().model_transaction_policy().decoupled()) { *txn_flags = TRITONSERVER_TXN_DECOUPLED; } else { *txn_flags = TRITONSERVER_TXN_ONE_TO_ONE; } return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerMetadata( TRITONSERVER_Server* server, TRITONSERVER_Message** server_metadata) { tc::InferenceServer* lserver = reinterpret_cast(server); triton::common::TritonJson::Value metadata( triton::common::TritonJson::ValueType::OBJECT); // Just store string reference in JSON object since it will be // serialized to another buffer before lserver->Id() or // lserver->Version() lifetime ends. RETURN_IF_STATUS_ERROR(metadata.AddStringRef("name", lserver->Id().c_str())); RETURN_IF_STATUS_ERROR( metadata.AddStringRef("version", lserver->Version().c_str())); triton::common::TritonJson::Value extensions( metadata, triton::common::TritonJson::ValueType::ARRAY); const std::vector& exts = lserver->Extensions(); for (const auto ext : exts) { RETURN_IF_STATUS_ERROR(extensions.AppendStringRef(ext)); } RETURN_IF_STATUS_ERROR(metadata.Add("extensions", std::move(extensions))); *server_metadata = reinterpret_cast( new tc::TritonServerMessage(metadata)); return nullptr; // Success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerModelMetadata( TRITONSERVER_Server* server, const char* model_name, const int64_t model_version, TRITONSERVER_Message** model_metadata) { tc::InferenceServer* lserver = reinterpret_cast(server); std::shared_ptr model; RETURN_IF_STATUS_ERROR(lserver->GetModel(model_name, model_version, &model)); std::vector ready_versions; RETURN_IF_STATUS_ERROR( lserver->ModelReadyVersions(model_name, &ready_versions)); triton::common::TritonJson::Value metadata( triton::common::TritonJson::ValueType::OBJECT); // Can use string ref in this function even though model can be // unloaded and config becomes invalid, because TritonServeMessage // serializes the json when it is constructed below. RETURN_IF_STATUS_ERROR(metadata.AddStringRef("name", model_name)); triton::common::TritonJson::Value versions( metadata, triton::common::TritonJson::ValueType::ARRAY); if (model_version != -1) { RETURN_IF_STATUS_ERROR( versions.AppendString(std::move(std::to_string(model_version)))); } else { for (const auto v : ready_versions) { RETURN_IF_STATUS_ERROR( versions.AppendString(std::move(std::to_string(v)))); } } RETURN_IF_STATUS_ERROR(metadata.Add("versions", std::move(versions))); const auto& model_config = model->Config(); if (!model_config.platform().empty()) { RETURN_IF_STATUS_ERROR( metadata.AddStringRef("platform", model_config.platform().c_str())); } else { RETURN_IF_STATUS_ERROR( metadata.AddStringRef("platform", model_config.backend().c_str())); } triton::common::TritonJson::Value inputs( metadata, triton::common::TritonJson::ValueType::ARRAY); for (const auto& io : model_config.input()) { triton::common::TritonJson::Value io_metadata( metadata, triton::common::TritonJson::ValueType::OBJECT); RETURN_IF_STATUS_ERROR(io_metadata.AddStringRef("name", io.name().c_str())); RETURN_IF_STATUS_ERROR(io_metadata.AddStringRef( "datatype", triton::common::DataTypeToProtocolString(io.data_type()))); // Input shape. If the model supports batching then must include // '-1' for the batch dimension. triton::common::TritonJson::Value io_metadata_shape( metadata, triton::common::TritonJson::ValueType::ARRAY); if (model_config.max_batch_size() >= 1) { RETURN_IF_STATUS_ERROR(io_metadata_shape.AppendInt(-1)); } for (const auto d : io.dims()) { RETURN_IF_STATUS_ERROR(io_metadata_shape.AppendInt(d)); } RETURN_IF_STATUS_ERROR( io_metadata.Add("shape", std::move(io_metadata_shape))); RETURN_IF_STATUS_ERROR(inputs.Append(std::move(io_metadata))); } RETURN_IF_STATUS_ERROR(metadata.Add("inputs", std::move(inputs))); triton::common::TritonJson::Value outputs( metadata, triton::common::TritonJson::ValueType::ARRAY); for (const auto& io : model_config.output()) { triton::common::TritonJson::Value io_metadata( metadata, triton::common::TritonJson::ValueType::OBJECT); RETURN_IF_STATUS_ERROR(io_metadata.AddStringRef("name", io.name().c_str())); RETURN_IF_STATUS_ERROR(io_metadata.AddStringRef( "datatype", triton::common::DataTypeToProtocolString(io.data_type()))); // Output shape. If the model supports batching then must include // '-1' for the batch dimension. triton::common::TritonJson::Value io_metadata_shape( metadata, triton::common::TritonJson::ValueType::ARRAY); if (model_config.max_batch_size() >= 1) { RETURN_IF_STATUS_ERROR(io_metadata_shape.AppendInt(-1)); } for (const auto d : io.dims()) { RETURN_IF_STATUS_ERROR(io_metadata_shape.AppendInt(d)); } RETURN_IF_STATUS_ERROR( io_metadata.Add("shape", std::move(io_metadata_shape))); RETURN_IF_STATUS_ERROR(outputs.Append(std::move(io_metadata))); } RETURN_IF_STATUS_ERROR(metadata.Add("outputs", std::move(outputs))); *model_metadata = reinterpret_cast( new tc::TritonServerMessage(metadata)); return nullptr; // success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerModelStatistics( TRITONSERVER_Server* server, const char* model_name, const int64_t model_version, TRITONSERVER_Message** model_stats) { #ifndef TRITON_ENABLE_STATS return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_UNSUPPORTED, "statistics not supported"); #else tc::InferenceServer* lserver = reinterpret_cast(server); auto model_name_string = std::string(model_name); std::map> ready_model_versions; if (model_name_string.empty()) { RETURN_IF_STATUS_ERROR(lserver->ModelReadyVersions(&ready_model_versions)); } else { std::vector ready_versions; RETURN_IF_STATUS_ERROR( lserver->ModelReadyVersions(model_name_string, &ready_versions)); if (ready_versions.empty()) { return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_INVALID_ARG, std::string( "requested model '" + model_name_string + "' is not available") .c_str()); } if (model_version == -1) { ready_model_versions.emplace( model_name_string, std::move(ready_versions)); } else { bool found = false; for (const auto v : ready_versions) { if (v == model_version) { found = true; break; } } if (found) { ready_model_versions.emplace( model_name_string, std::vector{model_version}); } else { return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_INVALID_ARG, std::string( "requested model version is not available for model '" + model_name_string + "'") .c_str()); } } } // Can use string ref in this function because TritonServeMessage // serializes the json when it is constructed below. triton::common::TritonJson::Value metadata( triton::common::TritonJson::ValueType::OBJECT); triton::common::TritonJson::Value model_stats_json( metadata, triton::common::TritonJson::ValueType::ARRAY); for (const auto& mv_pair : ready_model_versions) { for (const auto& version : mv_pair.second) { std::shared_ptr model; RETURN_IF_STATUS_ERROR(lserver->GetModel(mv_pair.first, version, &model)); const auto& infer_stats = model->StatsAggregator().ImmutableInferStats(); const auto& infer_batch_stats = model->StatsAggregator().ImmutableInferBatchStats(); triton::common::TritonJson::Value inference_stats( metadata, triton::common::TritonJson::ValueType::OBJECT); // Compute figures only calculated when not going through cache, so // subtract cache_hit count from success count. Cache hit count will // simply be 0 when cache is disabled. uint64_t compute_count = infer_stats.success_count_ - infer_stats.cache_hit_count_; SetDurationStat( metadata, inference_stats, "success", infer_stats.success_count_, infer_stats.request_duration_ns_); SetDurationStat( metadata, inference_stats, "fail", infer_stats.failure_count_, infer_stats.failure_duration_ns_); SetDurationStat( metadata, inference_stats, "queue", infer_stats.success_count_, infer_stats.queue_duration_ns_); SetDurationStat( metadata, inference_stats, "compute_input", compute_count, infer_stats.compute_input_duration_ns_); SetDurationStat( metadata, inference_stats, "compute_infer", compute_count, infer_stats.compute_infer_duration_ns_); SetDurationStat( metadata, inference_stats, "compute_output", compute_count, infer_stats.compute_output_duration_ns_); SetDurationStat( metadata, inference_stats, "cache_hit", infer_stats.cache_hit_count_, infer_stats.cache_hit_lookup_duration_ns_); // NOTE: cache_miss_count_ should equal compute_count if non-zero SetDurationStat( metadata, inference_stats, "cache_miss", infer_stats.cache_miss_count_, infer_stats.cache_miss_lookup_duration_ns_ + infer_stats.cache_miss_insertion_duration_ns_); triton::common::TritonJson::Value batch_stats( metadata, triton::common::TritonJson::ValueType::ARRAY); for (const auto& batch : infer_batch_stats) { triton::common::TritonJson::Value batch_stat( metadata, triton::common::TritonJson::ValueType::OBJECT); RETURN_IF_STATUS_ERROR(batch_stat.AddUInt("batch_size", batch.first)); SetDurationStat( metadata, batch_stat, "compute_input", batch.second.count_, batch.second.compute_input_duration_ns_); SetDurationStat( metadata, batch_stat, "compute_infer", batch.second.count_, batch.second.compute_infer_duration_ns_); SetDurationStat( metadata, batch_stat, "compute_output", batch.second.count_, batch.second.compute_output_duration_ns_); RETURN_IF_STATUS_ERROR(batch_stats.Append(std::move(batch_stat))); } triton::common::TritonJson::Value model_stat( metadata, triton::common::TritonJson::ValueType::OBJECT); RETURN_IF_STATUS_ERROR( model_stat.AddStringRef("name", mv_pair.first.c_str())); RETURN_IF_STATUS_ERROR( model_stat.AddString("version", std::move(std::to_string(version)))); RETURN_IF_STATUS_ERROR(model_stat.AddUInt( "last_inference", model->StatsAggregator().LastInferenceMs())); RETURN_IF_STATUS_ERROR(model_stat.AddUInt( "inference_count", model->StatsAggregator().InferenceCount())); RETURN_IF_STATUS_ERROR(model_stat.AddUInt( "execution_count", model->StatsAggregator().ExecutionCount())); RETURN_IF_STATUS_ERROR( model_stat.Add("inference_stats", std::move(inference_stats))); RETURN_IF_STATUS_ERROR( model_stat.Add("batch_stats", std::move(batch_stats))); RETURN_IF_STATUS_ERROR(model_stats_json.Append(std::move(model_stat))); } } RETURN_IF_STATUS_ERROR( metadata.Add("model_stats", std::move(model_stats_json))); *model_stats = reinterpret_cast( new tc::TritonServerMessage(metadata)); return nullptr; // success #endif // TRITON_ENABLE_STATS } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerModelConfig( TRITONSERVER_Server* server, const char* model_name, const int64_t model_version, const uint32_t config_version, TRITONSERVER_Message** model_config) { tc::InferenceServer* lserver = reinterpret_cast(server); std::shared_ptr model; RETURN_IF_STATUS_ERROR(lserver->GetModel(model_name, model_version, &model)); std::string model_config_json; RETURN_IF_STATUS_ERROR(tc::ModelConfigToJson( model->Config(), config_version, &model_config_json)); *model_config = reinterpret_cast( new tc::TritonServerMessage(std::move(model_config_json))); return nullptr; // success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerModelIndex( TRITONSERVER_Server* server, uint32_t flags, TRITONSERVER_Message** repository_index) { tc::InferenceServer* lserver = reinterpret_cast(server); const bool ready_only = ((flags & TRITONSERVER_INDEX_FLAG_READY) != 0); std::vector index; RETURN_IF_STATUS_ERROR(lserver->RepositoryIndex(ready_only, &index)); // Can use string ref in this function because TritonServerMessage // serializes the json when it is constructed below. triton::common::TritonJson::Value repository_index_json( triton::common::TritonJson::ValueType::ARRAY); for (const auto& in : index) { triton::common::TritonJson::Value model_index( repository_index_json, triton::common::TritonJson::ValueType::OBJECT); RETURN_IF_STATUS_ERROR(model_index.AddStringRef("name", in.name_.c_str())); if (!in.name_only_) { if (in.version_ >= 0) { RETURN_IF_STATUS_ERROR(model_index.AddString( "version", std::move(std::to_string(in.version_)))); } RETURN_IF_STATUS_ERROR(model_index.AddStringRef( "state", tc::ModelReadyStateString(in.state_).c_str())); if (!in.reason_.empty()) { RETURN_IF_STATUS_ERROR( model_index.AddStringRef("reason", in.reason_.c_str())); } } RETURN_IF_STATUS_ERROR( repository_index_json.Append(std::move(model_index))); } *repository_index = reinterpret_cast( new tc::TritonServerMessage(repository_index_json)); return nullptr; // success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerLoadModel( TRITONSERVER_Server* server, const char* model_name) { tc::InferenceServer* lserver = reinterpret_cast(server); RETURN_IF_STATUS_ERROR(lserver->LoadModel({{std::string(model_name), {}}})); return nullptr; // success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerLoadModelWithParameters( TRITONSERVER_Server* server, const char* model_name, const TRITONSERVER_Parameter** parameters, const uint64_t parameter_count) { if ((parameters == nullptr) && (parameter_count != 0)) { return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_INVALID_ARG, "load parameters are not provided while parameter count is non-zero"); } tc::InferenceServer* lserver = reinterpret_cast(server); std::unordered_map> models; std::vector mp; for (size_t i = 0; i < parameter_count; ++i) { mp.emplace_back( reinterpret_cast(parameters[i])); } models[model_name] = std::move(mp); RETURN_IF_STATUS_ERROR(lserver->LoadModel(models)); return nullptr; // success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerUnloadModel( TRITONSERVER_Server* server, const char* model_name) { tc::InferenceServer* lserver = reinterpret_cast(server); RETURN_IF_STATUS_ERROR(lserver->UnloadModel( std::string(model_name), false /* unload_dependents */)); return nullptr; // success } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerUnloadModelAndDependents( TRITONSERVER_Server* server, const char* model_name) { { tc::InferenceServer* lserver = reinterpret_cast(server); RETURN_IF_STATUS_ERROR(lserver->UnloadModel( std::string(model_name), true /* unload_dependents */)); return nullptr; // success } } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerMetrics( TRITONSERVER_Server* server, TRITONSERVER_Metrics** metrics) { #ifdef TRITON_ENABLE_METRICS TritonServerMetrics* lmetrics = new TritonServerMetrics(); *metrics = reinterpret_cast(lmetrics); return nullptr; // Success #else *metrics = nullptr; return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_UNSUPPORTED, "metrics not supported"); #endif // TRITON_ENABLE_METRICS } TRITONAPI_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerInferAsync( TRITONSERVER_Server* server, TRITONSERVER_InferenceRequest* inference_request, TRITONSERVER_InferenceTrace* trace) { tc::InferenceServer* lserver = reinterpret_cast(server); tc::InferenceRequest* lrequest = reinterpret_cast(inference_request); RETURN_IF_STATUS_ERROR(lrequest->PrepareForInference()); // Set the trace object in the request so that activity associated // with the request can be recorded as the request flows through // Triton. if (trace != nullptr) { #ifdef TRITON_ENABLE_TRACING tc::InferenceTrace* ltrace = reinterpret_cast(trace); ltrace->SetModelName(lrequest->ModelName()); ltrace->SetModelVersion(lrequest->ActualModelVersion()); lrequest->SetTrace(std::make_shared(ltrace)); #else return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_UNSUPPORTED, "inference tracing not supported"); #endif // TRITON_ENABLE_TRACING } // We wrap the request in a unique pointer to ensure that it flows // through inferencing with clear ownership. std::unique_ptr ureq(lrequest); // Run inference... tc::Status status = lserver->InferAsync(ureq); // If there is an error then must explicitly release any trace // object associated with the inference request above. #ifdef TRITON_ENABLE_TRACING if (!status.IsOk()) { ureq->ReleaseTrace(); } #endif // TRITON_ENABLE_TRACING // If there is an error then ureq will still have 'lrequest' and we // must release it from unique_ptr since the caller should retain // ownership when there is error. If there is not an error then ureq // == nullptr and so this release is a nop. ureq.release(); RETURN_IF_STATUS_ERROR(status); return nullptr; // Success } // // TRITONSERVER_MetricFamily // TRITONSERVER_Error* TRITONSERVER_MetricFamilyNew( TRITONSERVER_MetricFamily** family, TRITONSERVER_MetricKind kind, const char* name, const char* description) { #ifdef TRITON_ENABLE_METRICS try { *family = reinterpret_cast( new tc::MetricFamily(kind, name, description)); } catch (std::invalid_argument const& ex) { // Catch invalid kinds passed to constructor return TRITONSERVER_ErrorNew(TRITONSERVER_ERROR_INVALID_ARG, ex.what()); } return nullptr; // Success #else return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_UNSUPPORTED, "metrics not supported"); #endif // TRITON_ENABLE_METRICS } TRITONSERVER_Error* TRITONSERVER_MetricFamilyDelete(TRITONSERVER_MetricFamily* family) { #ifdef TRITON_ENABLE_METRICS auto lfamily = reinterpret_cast(family); if (lfamily->NumMetrics() > 0) { return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_INTERNAL, "Must call MetricDelete on all dependent metrics before calling " "MetricFamilyDelete."); } delete lfamily; return nullptr; // Success #else return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_UNSUPPORTED, "metrics not supported"); #endif // TRITON_ENABLE_METRICS } // // TRITONSERVER_Metric // TRITONSERVER_Error* TRITONSERVER_MetricNew( TRITONSERVER_Metric** metric, TRITONSERVER_MetricFamily* family, const TRITONSERVER_Parameter** labels, const uint64_t label_count) { #ifdef TRITON_ENABLE_METRICS std::vector labels_vec; for (size_t i = 0; i < label_count; i++) { labels_vec.emplace_back( reinterpret_cast(labels[i])); } try { *metric = reinterpret_cast( new tc::Metric(family, labels_vec)); } catch (std::invalid_argument const& ex) { // Catch invalid kinds passed to constructor return TRITONSERVER_ErrorNew(TRITONSERVER_ERROR_INVALID_ARG, ex.what()); } return nullptr; // Success #else return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_UNSUPPORTED, "metrics not supported"); #endif // TRITON_ENABLE_METRICS } TRITONSERVER_Error* TRITONSERVER_MetricDelete(TRITONSERVER_Metric* metric) { #ifdef TRITON_ENABLE_METRICS auto lmetric = reinterpret_cast(metric); if (lmetric->Family() == nullptr) { return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_INTERNAL, "MetricFamily reference was invalidated before Metric was deleted. " "Must call MetricDelete on all dependent metrics before calling " "MetricFamilyDelete."); } delete lmetric; return nullptr; // success #else return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_UNSUPPORTED, "metrics not supported"); #endif // TRITON_ENABLE_METRICS } TRITONSERVER_Error* TRITONSERVER_MetricValue(TRITONSERVER_Metric* metric, double* value) { #ifdef TRITON_ENABLE_METRICS return reinterpret_cast(metric)->Value(value); #else return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_UNSUPPORTED, "metrics not supported"); #endif // TRITON_ENABLE_METRICS } TRITONSERVER_Error* TRITONSERVER_MetricIncrement(TRITONSERVER_Metric* metric, double value) { #ifdef TRITON_ENABLE_METRICS return reinterpret_cast(metric)->Increment(value); #else return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_UNSUPPORTED, "metrics not supported"); #endif // TRITON_ENABLE_METRICS } TRITONSERVER_Error* TRITONSERVER_MetricSet(TRITONSERVER_Metric* metric, double value) { #ifdef TRITON_ENABLE_METRICS return reinterpret_cast(metric)->Set(value); #else return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_UNSUPPORTED, "metrics not supported"); #endif // TRITON_ENABLE_METRICS } TRITONSERVER_Error* TRITONSERVER_GetMetricKind( TRITONSERVER_Metric* metric, TRITONSERVER_MetricKind* kind) { #ifdef TRITON_ENABLE_METRICS *kind = reinterpret_cast(metric)->Kind(); return nullptr; // Success #else return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_UNSUPPORTED, "metrics not supported"); #endif // TRITON_ENABLE_METRICS } } // extern C