// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & // AFFILIATES. All rights reserved. SPDX-License-Identifier: Apache-2.0 // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. syntax = "proto3"; package triton.distributed.icp; //@@ //@@.. cpp:var:: message ModelInferRequest //@@ //@@ Request message for ModelInfer. //@@ message ModelInferRequest { //@@ //@@ .. cpp:var:: message InferInputTensor //@@ //@@ An input tensor for an inference request. //@@ message InferInputTensor { //@@ //@@ .. cpp:var:: string name //@@ //@@ The tensor name. //@@ string name = 1; //@@ //@@ .. cpp:var:: string datatype //@@ //@@ The tensor data type. //@@ string datatype = 2; //@@ //@@ .. cpp:var:: int64 shape (repeated) //@@ //@@ The tensor shape. //@@ repeated int64 shape = 3; //@@ .. cpp:var:: map parameters //@@ //@@ Optional inference input tensor parameters. //@@ map parameters = 4; //@@ .. cpp:var:: InferTensorContents contents //@@ //@@ The tensor contents using a data-type format. This field //@@ must not be specified if tensor contents are being specified //@@ in ModelInferRequest.raw_input_contents. //@@ InferTensorContents contents = 5; } //@@ //@@ .. cpp:var:: message InferRequestedOutputTensor //@@ //@@ An output tensor requested for an inference request. //@@ message InferRequestedOutputTensor { //@@ //@@ .. cpp:var:: string name //@@ //@@ The tensor name. //@@ string name = 1; //@@ .. cpp:var:: map parameters //@@ //@@ Optional requested output tensor parameters. //@@ map parameters = 2; } //@@ .. cpp:var:: string model_name //@@ //@@ The name of the model to use for inferencing. //@@ string model_name = 1; //@@ .. cpp:var:: string model_version //@@ //@@ The version of the model to use for inference. If not //@@ given the latest/most-recent version of the model is used. //@@ string model_version = 2; //@@ .. cpp:var:: string id //@@ //@@ Optional identifier for the request. If specified will be //@@ returned in the response. //@@ string id = 3; //@@ .. cpp:var:: map parameters //@@ //@@ Optional inference parameters. //@@ map parameters = 4; //@@ //@@ .. cpp:var:: InferInputTensor inputs (repeated) //@@ //@@ The input tensors for the inference. //@@ repeated InferInputTensor inputs = 5; //@@ //@@ .. cpp:var:: InferRequestedOutputTensor outputs (repeated) //@@ //@@ The requested output tensors for the inference. Optional, if not //@@ specified all outputs specified in the model config will be //@@ returned. //@@ repeated InferRequestedOutputTensor outputs = 6; } //@@ //@@.. cpp:var:: message ModelInferResponse //@@ //@@ Response message for ModelInfer. //@@ message ModelInferResponse { //@@ //@@ .. cpp:var:: message InferOutputTensor //@@ //@@ An output tensor returned for an inference request. //@@ message InferOutputTensor { //@@ //@@ .. cpp:var:: string name //@@ //@@ The tensor name. //@@ string name = 1; //@@ //@@ .. cpp:var:: string datatype //@@ //@@ The tensor data type. //@@ string datatype = 2; //@@ //@@ .. cpp:var:: int64 shape (repeated) //@@ //@@ The tensor shape. //@@ repeated int64 shape = 3; //@@ .. cpp:var:: map parameters //@@ //@@ Optional output tensor parameters. //@@ map parameters = 4; //@@ .. cpp:var:: InferTensorContents contents //@@ //@@ The tensor contents using a data-type format. This field //@@ must not be specified if tensor contents are being specified //@@ in ModelInferResponse.raw_output_contents. //@@ InferTensorContents contents = 5; } //@@ .. cpp:var:: string model_name //@@ //@@ The name of the model used for inference. //@@ string model_name = 1; //@@ .. cpp:var:: string model_version //@@ //@@ The version of the model used for inference. //@@ string model_version = 2; //@@ .. cpp:var:: string id //@@ //@@ The id of the inference request if one was specified. //@@ string id = 3; //@@ .. cpp:var:: map parameters //@@ //@@ Optional inference response parameters. //@@ map parameters = 4; //@@ //@@ .. cpp:var:: InferOutputTensor outputs (repeated) //@@ //@@ The output tensors holding inference results. //@@ repeated InferOutputTensor outputs = 5; } //@@ //@@.. cpp:var:: message InferParameter //@@ //@@ An inference parameter value. //@@ message InferParameter { //@@ .. cpp:var:: oneof parameter_choice //@@ //@@ The parameter value can be a string, an int64, //@@ an uint64, a double, or a boolean //@@ //@@ Note: double and uint64 are currently //@@ placeholders for future use and //@@ are not supported for custom parameters //@@ oneof parameter_choice { //@@ .. cpp:var:: bool bool_param //@@ //@@ A boolean parameter value. //@@ bool bool_param = 1; //@@ .. cpp:var:: int64 int64_param //@@ //@@ An int64 parameter value. //@@ int64 int64_param = 2; //@@ .. cpp:var:: string string_param //@@ //@@ A string parameter value. //@@ string string_param = 3; //@@ .. cpp:var:: double double_param //@@ //@@ A double parameter value. //@@ double double_param = 4; //@@ .. cpp:var:: uint64 uint64_param //@@ //@@ A uint64 parameter value. //@@ //@@ Not supported for custom parameters //@@ uint64 uint64_param = 5; } } //@@ //@@.. cpp:var:: message InferTensorContents //@@ //@@ The data contained in a tensor represented by the repeated type //@@ that matches the tensor's data type. Protobuf oneof is not used //@@ because oneofs cannot contain repeated fields. //@@ message InferTensorContents { //@@ //@@ .. cpp:var:: bytes bytes_contents (repeated) //@@ //@@ The size must match what is expected by the tensor's shape. //@@ The contents must be the flattened, one-dimensional, //@@ row-major order of the tensor elements. //@@ repeated bytes bytes_contents = 8; }