Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Lmdeploy
Commits
b30f3cdb
Commit
b30f3cdb
authored
Nov 14, 2023
by
xiabo
Browse files
添加下载的代码
parent
e38ee081
Changes
157
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
2686 additions
and
0 deletions
+2686
-0
3rdparty/backend-r22.12/examples/backends/recommended/src/recommended.cc
...d-r22.12/examples/backends/recommended/src/recommended.cc
+750
-0
3rdparty/backend-r22.12/examples/clients/bls_client
3rdparty/backend-r22.12/examples/clients/bls_client
+86
-0
3rdparty/backend-r22.12/examples/clients/minimal_client
3rdparty/backend-r22.12/examples/clients/minimal_client
+92
-0
3rdparty/backend-r22.12/examples/clients/recommended_client
3rdparty/backend-r22.12/examples/clients/recommended_client
+91
-0
3rdparty/backend-r22.12/examples/model_repos/bls_models/addsub_python/1/model.py
.../examples/model_repos/bls_models/addsub_python/1/model.py
+74
-0
3rdparty/backend-r22.12/examples/model_repos/bls_models/addsub_python/config.pbtxt
...xamples/model_repos/bls_models/addsub_python/config.pbtxt
+58
-0
3rdparty/backend-r22.12/examples/model_repos/bls_models/addsub_tf/1/model.savedmodel/saved_model.pb
...os/bls_models/addsub_tf/1/model.savedmodel/saved_model.pb
+0
-0
3rdparty/backend-r22.12/examples/model_repos/bls_models/addsub_tf/config.pbtxt
...12/examples/model_repos/bls_models/addsub_tf/config.pbtxt
+28
-0
3rdparty/backend-r22.12/examples/model_repos/bls_models/bls_fp32/config.pbtxt
....12/examples/model_repos/bls_models/bls_fp32/config.pbtxt
+63
-0
3rdparty/backend-r22.12/examples/model_repos/minimal_models/batching/1/.gitkeep
...2/examples/model_repos/minimal_models/batching/1/.gitkeep
+0
-0
3rdparty/backend-r22.12/examples/model_repos/minimal_models/batching/config.pbtxt
...examples/model_repos/minimal_models/batching/config.pbtxt
+24
-0
3rdparty/backend-r22.12/examples/model_repos/minimal_models/nonbatching/1/.gitkeep
...xamples/model_repos/minimal_models/nonbatching/1/.gitkeep
+0
-0
3rdparty/backend-r22.12/examples/model_repos/minimal_models/nonbatching/config.pbtxt
...mples/model_repos/minimal_models/nonbatching/config.pbtxt
+21
-0
3rdparty/backend-r22.12/examples/model_repos/recommended_models/batching/1/.gitkeep
...amples/model_repos/recommended_models/batching/1/.gitkeep
+0
-0
3rdparty/backend-r22.12/examples/model_repos/recommended_models/batching/config.pbtxt
...ples/model_repos/recommended_models/batching/config.pbtxt
+24
-0
3rdparty/backend-r22.12/include/triton/backend/backend_common.h
...ty/backend-r22.12/include/triton/backend/backend_common.h
+672
-0
3rdparty/backend-r22.12/include/triton/backend/backend_input_collector.h
...d-r22.12/include/triton/backend/backend_input_collector.h
+301
-0
3rdparty/backend-r22.12/include/triton/backend/backend_memory.h
...ty/backend-r22.12/include/triton/backend/backend_memory.h
+138
-0
3rdparty/backend-r22.12/include/triton/backend/backend_model.h
...rty/backend-r22.12/include/triton/backend/backend_model.h
+146
-0
3rdparty/backend-r22.12/include/triton/backend/backend_model_instance.h
...nd-r22.12/include/triton/backend/backend_model_instance.h
+118
-0
No files found.
Too many changes to show.
To preserve performance only
157 of 157+
files are displayed.
Plain diff
Email patch
3rdparty/backend-r22.12/examples/backends/recommended/src/recommended.cc
0 → 100644
View file @
b30f3cdb
// Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "triton/backend/backend_common.h"
#include "triton/backend/backend_input_collector.h"
#include "triton/backend/backend_model.h"
#include "triton/backend/backend_model_instance.h"
#include "triton/backend/backend_output_responder.h"
#include "triton/core/tritonbackend.h"
namespace
triton
{
namespace
backend
{
namespace
recommended
{
//
// Backend that demonstrates the TRITONBACKEND API. This backend works
// for any model that has 1 input with any datatype and any shape and
// 1 output with the same shape and datatype as the input. The backend
// supports both batching and non-batching models.
//
// For each batch of requests, the backend returns the input tensor
// value in the output tensor.
//
/////////////
extern
"C"
{
// Triton calls TRITONBACKEND_Initialize when a backend is loaded into
// Triton to allow the backend to create and initialize any state that
// is intended to be shared across all models and model instances that
// use the backend. The backend should also verify version
// compatibility with Triton in this function.
//
TRITONSERVER_Error
*
TRITONBACKEND_Initialize
(
TRITONBACKEND_Backend
*
backend
)
{
const
char
*
cname
;
RETURN_IF_ERROR
(
TRITONBACKEND_BackendName
(
backend
,
&
cname
));
std
::
string
name
(
cname
);
LOG_MESSAGE
(
TRITONSERVER_LOG_INFO
,
(
std
::
string
(
"TRITONBACKEND_Initialize: "
)
+
name
).
c_str
());
// Check the backend API version that Triton supports vs. what this
// backend was compiled against. Make sure that the Triton major
// version is the same and the minor version is >= what this backend
// uses.
uint32_t
api_version_major
,
api_version_minor
;
RETURN_IF_ERROR
(
TRITONBACKEND_ApiVersion
(
&
api_version_major
,
&
api_version_minor
));
LOG_MESSAGE
(
TRITONSERVER_LOG_INFO
,
(
std
::
string
(
"Triton TRITONBACKEND API version: "
)
+
std
::
to_string
(
api_version_major
)
+
"."
+
std
::
to_string
(
api_version_minor
))
.
c_str
());
LOG_MESSAGE
(
TRITONSERVER_LOG_INFO
,
(
std
::
string
(
"'"
)
+
name
+
"' TRITONBACKEND API version: "
+
std
::
to_string
(
TRITONBACKEND_API_VERSION_MAJOR
)
+
"."
+
std
::
to_string
(
TRITONBACKEND_API_VERSION_MINOR
))
.
c_str
());
if
((
api_version_major
!=
TRITONBACKEND_API_VERSION_MAJOR
)
||
(
api_version_minor
<
TRITONBACKEND_API_VERSION_MINOR
))
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_UNSUPPORTED
,
"triton backend API version does not support this backend"
);
}
// The backend configuration may contain information needed by the
// backend, such as tritonserver command-line arguments. This
// backend doesn't use any such configuration but for this example
// print whatever is available.
TRITONSERVER_Message
*
backend_config_message
;
RETURN_IF_ERROR
(
TRITONBACKEND_BackendConfig
(
backend
,
&
backend_config_message
));
const
char
*
buffer
;
size_t
byte_size
;
RETURN_IF_ERROR
(
TRITONSERVER_MessageSerializeToJson
(
backend_config_message
,
&
buffer
,
&
byte_size
));
LOG_MESSAGE
(
TRITONSERVER_LOG_INFO
,
(
std
::
string
(
"backend configuration:
\n
"
)
+
buffer
).
c_str
());
// This backend does not require any "global" state but as an
// example create a string to demonstrate.
std
::
string
*
state
=
new
std
::
string
(
"backend state"
);
RETURN_IF_ERROR
(
TRITONBACKEND_BackendSetState
(
backend
,
reinterpret_cast
<
void
*>
(
state
)));
return
nullptr
;
// success
}
// Triton calls TRITONBACKEND_Finalize when a backend is no longer
// needed.
//
TRITONSERVER_Error
*
TRITONBACKEND_Finalize
(
TRITONBACKEND_Backend
*
backend
)
{
// Delete the "global" state associated with the backend.
void
*
vstate
;
RETURN_IF_ERROR
(
TRITONBACKEND_BackendState
(
backend
,
&
vstate
));
std
::
string
*
state
=
reinterpret_cast
<
std
::
string
*>
(
vstate
);
LOG_MESSAGE
(
TRITONSERVER_LOG_INFO
,
(
std
::
string
(
"TRITONBACKEND_Finalize: state is '"
)
+
*
state
+
"'"
)
.
c_str
());
delete
state
;
return
nullptr
;
// success
}
}
// extern "C"
/////////////
//
// ModelState
//
// State associated with a model that is using this backend. An object
// of this class is created and associated with each
// TRITONBACKEND_Model. ModelState is derived from BackendModel class
// provided in the backend utilities that provides many common
// functions.
//
class
ModelState
:
public
BackendModel
{
public:
static
TRITONSERVER_Error
*
Create
(
TRITONBACKEND_Model
*
triton_model
,
ModelState
**
state
);
virtual
~
ModelState
()
=
default
;
// Name of the input and output tensor
const
std
::
string
&
InputTensorName
()
const
{
return
input_name_
;
}
const
std
::
string
&
OutputTensorName
()
const
{
return
output_name_
;
}
// Datatype of the input and output tensor
TRITONSERVER_DataType
TensorDataType
()
const
{
return
datatype_
;
}
// Shape of the input and output tensor as given in the model
// configuration file. This shape will not include the batch
// dimension (if the model has one).
const
std
::
vector
<
int64_t
>&
TensorNonBatchShape
()
const
{
return
nb_shape_
;
}
// Shape of the input and output tensor, including the batch
// dimension (if the model has one). This method cannot be called
// until the model is completely loaded and initialized, including
// all instances of the model. In practice, this means that backend
// should only call it in TRITONBACKEND_ModelInstanceExecute.
TRITONSERVER_Error
*
TensorShape
(
std
::
vector
<
int64_t
>&
shape
);
// Validate that this model is supported by this backend.
TRITONSERVER_Error
*
ValidateModelConfig
();
private:
ModelState
(
TRITONBACKEND_Model
*
triton_model
);
std
::
string
input_name_
;
std
::
string
output_name_
;
TRITONSERVER_DataType
datatype_
;
bool
shape_initialized_
;
std
::
vector
<
int64_t
>
nb_shape_
;
std
::
vector
<
int64_t
>
shape_
;
};
ModelState
::
ModelState
(
TRITONBACKEND_Model
*
triton_model
)
:
BackendModel
(
triton_model
),
shape_initialized_
(
false
)
{
// Validate that the model's configuration matches what is supported
// by this backend.
THROW_IF_BACKEND_MODEL_ERROR
(
ValidateModelConfig
());
}
TRITONSERVER_Error
*
ModelState
::
Create
(
TRITONBACKEND_Model
*
triton_model
,
ModelState
**
state
)
{
try
{
*
state
=
new
ModelState
(
triton_model
);
}
catch
(
const
BackendModelException
&
ex
)
{
RETURN_ERROR_IF_TRUE
(
ex
.
err_
==
nullptr
,
TRITONSERVER_ERROR_INTERNAL
,
std
::
string
(
"unexpected nullptr in BackendModelException"
));
RETURN_IF_ERROR
(
ex
.
err_
);
}
return
nullptr
;
// success
}
TRITONSERVER_Error
*
ModelState
::
TensorShape
(
std
::
vector
<
int64_t
>&
shape
)
{
// This backend supports models that batch along the first dimension
// and those that don't batch. For non-batch models the output shape
// will be the shape from the model configuration. For batch models
// the output shape will be the shape from the model configuration
// prepended with [ -1 ] to represent the batch dimension. The
// backend "responder" utility used below will set the appropriate
// batch dimension value for each response. The shape needs to be
// initialized lazily because the SupportsFirstDimBatching function
// cannot be used until the model is completely loaded.
if
(
!
shape_initialized_
)
{
bool
supports_first_dim_batching
;
RETURN_IF_ERROR
(
SupportsFirstDimBatching
(
&
supports_first_dim_batching
));
if
(
supports_first_dim_batching
)
{
shape_
.
push_back
(
-
1
);
}
shape_
.
insert
(
shape_
.
end
(),
nb_shape_
.
begin
(),
nb_shape_
.
end
());
shape_initialized_
=
true
;
}
shape
=
shape_
;
return
nullptr
;
// success
}
TRITONSERVER_Error
*
ModelState
::
ValidateModelConfig
()
{
// If verbose logging is enabled, dump the model's configuration as
// JSON into the console output.
if
(
TRITONSERVER_LogIsEnabled
(
TRITONSERVER_LOG_VERBOSE
))
{
common
::
TritonJson
::
WriteBuffer
buffer
;
RETURN_IF_ERROR
(
ModelConfig
().
PrettyWrite
(
&
buffer
));
LOG_MESSAGE
(
TRITONSERVER_LOG_VERBOSE
,
(
std
::
string
(
"model configuration:
\n
"
)
+
buffer
.
Contents
()).
c_str
());
}
// ModelConfig is the model configuration as a TritonJson
// object. Use the TritonJson utilities to parse the JSON and
// determine if the configuration is supported by this backend.
common
::
TritonJson
::
Value
inputs
,
outputs
;
RETURN_IF_ERROR
(
ModelConfig
().
MemberAsArray
(
"input"
,
&
inputs
));
RETURN_IF_ERROR
(
ModelConfig
().
MemberAsArray
(
"output"
,
&
outputs
));
// The model must have exactly 1 input and 1 output.
RETURN_ERROR_IF_FALSE
(
inputs
.
ArraySize
()
==
1
,
TRITONSERVER_ERROR_INVALID_ARG
,
std
::
string
(
"model configuration must have 1 input"
));
RETURN_ERROR_IF_FALSE
(
outputs
.
ArraySize
()
==
1
,
TRITONSERVER_ERROR_INVALID_ARG
,
std
::
string
(
"model configuration must have 1 output"
));
common
::
TritonJson
::
Value
input
,
output
;
RETURN_IF_ERROR
(
inputs
.
IndexAsObject
(
0
,
&
input
));
RETURN_IF_ERROR
(
outputs
.
IndexAsObject
(
0
,
&
output
));
// Record the input and output name in the model state.
const
char
*
input_name
;
size_t
input_name_len
;
RETURN_IF_ERROR
(
input
.
MemberAsString
(
"name"
,
&
input_name
,
&
input_name_len
));
input_name_
=
std
::
string
(
input_name
);
const
char
*
output_name
;
size_t
output_name_len
;
RETURN_IF_ERROR
(
output
.
MemberAsString
(
"name"
,
&
output_name
,
&
output_name_len
));
output_name_
=
std
::
string
(
output_name
);
// Input and output must have same datatype
std
::
string
input_dtype
,
output_dtype
;
RETURN_IF_ERROR
(
input
.
MemberAsString
(
"data_type"
,
&
input_dtype
));
RETURN_IF_ERROR
(
output
.
MemberAsString
(
"data_type"
,
&
output_dtype
));
RETURN_ERROR_IF_FALSE
(
input_dtype
==
output_dtype
,
TRITONSERVER_ERROR_INVALID_ARG
,
std
::
string
(
"expected input and output datatype to match, got "
)
+
input_dtype
+
" and "
+
output_dtype
);
datatype_
=
ModelConfigDataTypeToTritonServerDataType
(
input_dtype
);
// Input and output must have same shape. Reshape is not supported
// on either input or output so flag an error is the model
// configuration uses it.
triton
::
common
::
TritonJson
::
Value
reshape
;
RETURN_ERROR_IF_TRUE
(
input
.
Find
(
"reshape"
,
&
reshape
),
TRITONSERVER_ERROR_UNSUPPORTED
,
std
::
string
(
"reshape not supported for input tensor"
));
RETURN_ERROR_IF_TRUE
(
output
.
Find
(
"reshape"
,
&
reshape
),
TRITONSERVER_ERROR_UNSUPPORTED
,
std
::
string
(
"reshape not supported for output tensor"
));
std
::
vector
<
int64_t
>
input_shape
,
output_shape
;
RETURN_IF_ERROR
(
backend
::
ParseShape
(
input
,
"dims"
,
&
input_shape
));
RETURN_IF_ERROR
(
backend
::
ParseShape
(
output
,
"dims"
,
&
output_shape
));
RETURN_ERROR_IF_FALSE
(
input_shape
==
output_shape
,
TRITONSERVER_ERROR_INVALID_ARG
,
std
::
string
(
"expected input and output shape to match, got "
)
+
backend
::
ShapeToString
(
input_shape
)
+
" and "
+
backend
::
ShapeToString
(
output_shape
));
nb_shape_
=
input_shape
;
return
nullptr
;
// success
}
extern
"C"
{
// Triton calls TRITONBACKEND_ModelInitialize when a model is loaded
// to allow the backend to create any state associated with the model,
// and to also examine the model configuration to determine if the
// configuration is suitable for the backend. Any errors reported by
// this function will prevent the model from loading.
//
TRITONSERVER_Error
*
TRITONBACKEND_ModelInitialize
(
TRITONBACKEND_Model
*
model
)
{
// Create a ModelState object and associate it with the
// TRITONBACKEND_Model. If anything goes wrong with initialization
// of the model state then an error is returned and Triton will fail
// to load the model.
ModelState
*
model_state
;
RETURN_IF_ERROR
(
ModelState
::
Create
(
model
,
&
model_state
));
RETURN_IF_ERROR
(
TRITONBACKEND_ModelSetState
(
model
,
reinterpret_cast
<
void
*>
(
model_state
)));
return
nullptr
;
// success
}
// Triton calls TRITONBACKEND_ModelFinalize when a model is no longer
// needed. The backend should cleanup any state associated with the
// model. This function will not be called until all model instances
// of the model have been finalized.
//
TRITONSERVER_Error
*
TRITONBACKEND_ModelFinalize
(
TRITONBACKEND_Model
*
model
)
{
void
*
vstate
;
RETURN_IF_ERROR
(
TRITONBACKEND_ModelState
(
model
,
&
vstate
));
ModelState
*
model_state
=
reinterpret_cast
<
ModelState
*>
(
vstate
);
delete
model_state
;
return
nullptr
;
// success
}
}
// extern "C"
/////////////
//
// ModelInstanceState
//
// State associated with a model instance. An object of this class is
// created and associated with each
// TRITONBACKEND_ModelInstance. ModelInstanceState is derived from
// BackendModelInstance class provided in the backend utilities that
// provides many common functions.
//
class
ModelInstanceState
:
public
BackendModelInstance
{
public:
static
TRITONSERVER_Error
*
Create
(
ModelState
*
model_state
,
TRITONBACKEND_ModelInstance
*
triton_model_instance
,
ModelInstanceState
**
state
);
virtual
~
ModelInstanceState
()
=
default
;
// Get the state of the model that corresponds to this instance.
ModelState
*
StateForModel
()
const
{
return
model_state_
;
}
private:
ModelInstanceState
(
ModelState
*
model_state
,
TRITONBACKEND_ModelInstance
*
triton_model_instance
)
:
BackendModelInstance
(
model_state
,
triton_model_instance
),
model_state_
(
model_state
)
{
}
ModelState
*
model_state_
;
};
TRITONSERVER_Error
*
ModelInstanceState
::
Create
(
ModelState
*
model_state
,
TRITONBACKEND_ModelInstance
*
triton_model_instance
,
ModelInstanceState
**
state
)
{
try
{
*
state
=
new
ModelInstanceState
(
model_state
,
triton_model_instance
);
}
catch
(
const
BackendModelInstanceException
&
ex
)
{
RETURN_ERROR_IF_TRUE
(
ex
.
err_
==
nullptr
,
TRITONSERVER_ERROR_INTERNAL
,
std
::
string
(
"unexpected nullptr in BackendModelInstanceException"
));
RETURN_IF_ERROR
(
ex
.
err_
);
}
return
nullptr
;
// success
}
extern
"C"
{
// Triton calls TRITONBACKEND_ModelInstanceInitialize when a model
// instance is created to allow the backend to initialize any state
// associated with the instance.
//
TRITONSERVER_Error
*
TRITONBACKEND_ModelInstanceInitialize
(
TRITONBACKEND_ModelInstance
*
instance
)
{
// Get the model state associated with this instance's model.
TRITONBACKEND_Model
*
model
;
RETURN_IF_ERROR
(
TRITONBACKEND_ModelInstanceModel
(
instance
,
&
model
));
void
*
vmodelstate
;
RETURN_IF_ERROR
(
TRITONBACKEND_ModelState
(
model
,
&
vmodelstate
));
ModelState
*
model_state
=
reinterpret_cast
<
ModelState
*>
(
vmodelstate
);
// Create a ModelInstanceState object and associate it with the
// TRITONBACKEND_ModelInstance.
ModelInstanceState
*
instance_state
;
RETURN_IF_ERROR
(
ModelInstanceState
::
Create
(
model_state
,
instance
,
&
instance_state
));
RETURN_IF_ERROR
(
TRITONBACKEND_ModelInstanceSetState
(
instance
,
reinterpret_cast
<
void
*>
(
instance_state
)));
return
nullptr
;
// success
}
// Triton calls TRITONBACKEND_ModelInstanceFinalize when a model
// instance is no longer needed. The backend should cleanup any state
// associated with the model instance.
//
TRITONSERVER_Error
*
TRITONBACKEND_ModelInstanceFinalize
(
TRITONBACKEND_ModelInstance
*
instance
)
{
void
*
vstate
;
RETURN_IF_ERROR
(
TRITONBACKEND_ModelInstanceState
(
instance
,
&
vstate
));
ModelInstanceState
*
instance_state
=
reinterpret_cast
<
ModelInstanceState
*>
(
vstate
);
delete
instance_state
;
return
nullptr
;
// success
}
}
// extern "C"
/////////////
extern
"C"
{
// When Triton calls TRITONBACKEND_ModelInstanceExecute it is required
// that a backend create a response for each request in the batch. A
// response may be the output tensors required for that request or may
// be an error that is returned in the response.
//
TRITONSERVER_Error
*
TRITONBACKEND_ModelInstanceExecute
(
TRITONBACKEND_ModelInstance
*
instance
,
TRITONBACKEND_Request
**
requests
,
const
uint32_t
request_count
)
{
// Collect various timestamps during the execution of this batch or
// requests. These values are reported below before returning from
// the function.
uint64_t
exec_start_ns
=
0
;
SET_TIMESTAMP
(
exec_start_ns
);
// Triton will not call this function simultaneously for the same
// 'instance'. But since this backend could be used by multiple
// instances from multiple models the implementation needs to handle
// multiple calls to this function at the same time (with different
// 'instance' objects). Best practice for a high-performance
// implementation is to avoid introducing mutex/lock and instead use
// only function-local and model-instance-specific state.
ModelInstanceState
*
instance_state
;
RETURN_IF_ERROR
(
TRITONBACKEND_ModelInstanceState
(
instance
,
reinterpret_cast
<
void
**>
(
&
instance_state
)));
ModelState
*
model_state
=
instance_state
->
StateForModel
();
// 'responses' is initialized as a parallel array to 'requests',
// with one TRITONBACKEND_Response object for each
// TRITONBACKEND_Request object. If something goes wrong while
// creating these response objects, the backend simply returns an
// error from TRITONBACKEND_ModelInstanceExecute, indicating to
// Triton that this backend did not create or send any responses and
// so it is up to Triton to create and send an appropriate error
// response for each request. RETURN_IF_ERROR is one of several
// useful macros for error handling that can be found in
// backend_common.h.
std
::
vector
<
TRITONBACKEND_Response
*>
responses
;
responses
.
reserve
(
request_count
);
for
(
uint32_t
r
=
0
;
r
<
request_count
;
++
r
)
{
TRITONBACKEND_Request
*
request
=
requests
[
r
];
TRITONBACKEND_Response
*
response
;
RETURN_IF_ERROR
(
TRITONBACKEND_ResponseNew
(
&
response
,
request
));
responses
.
push_back
(
response
);
}
// At this point, the backend takes ownership of 'requests', which
// means that it is responsible for sending a response for every
// request. From here, even if something goes wrong in processing,
// the backend must return 'nullptr' from this function to indicate
// success. Any errors and failures must be communicated via the
// response objects.
//
// To simplify error handling, the backend utilities manage
// 'responses' in a specific way and it is recommended that backends
// follow this same pattern. When an error is detected in the
// processing of a request, an appropriate error response is sent
// and the corresponding TRITONBACKEND_Response object within
// 'responses' is set to nullptr to indicate that the
// request/response has already been handled and no futher processing
// should be performed for that request. Even if all responses fail,
// the backend still allows execution to flow to the end of the
// function so that statistics are correctly reported by the calls
// to TRITONBACKEND_ModelInstanceReportStatistics and
// TRITONBACKEND_ModelInstanceReportBatchStatistics.
// RESPOND_AND_SET_NULL_IF_ERROR, and
// RESPOND_ALL_AND_SET_NULL_IF_ERROR are macros from
// backend_common.h that assist in this management of response
// objects.
// The backend could iterate over the 'requests' and process each
// one separately. But for performance reasons it is usually
// preferred to create batched input tensors that are processed
// simultaneously. This is especially true for devices like GPUs
// that are capable of exploiting the large amount parallelism
// exposed by larger data sets.
//
// The backend utilities provide a "collector" to facilitate this
// batching process. The 'collector's ProcessTensor function will
// combine a tensor's value from each request in the batch into a
// single contiguous buffer. The buffer can be provided by the
// backend or 'collector' can create and manage it. In this backend,
// there is not a specific buffer into which the batch should be
// created, so use ProcessTensor arguments that cause collector to
// manage it. ProcessTensor does NOT support TRITONSERVER_TYPE_BYTES
// data type.
BackendInputCollector
collector
(
requests
,
request_count
,
&
responses
,
model_state
->
TritonMemoryManager
(),
false
/* pinned_enabled */
,
nullptr
/* stream*/
);
// To instruct ProcessTensor to "gather" the entire batch of input
// tensors into a single contiguous buffer in CPU memory, set the
// "allowed input types" to be the CPU ones (see tritonserver.h in
// the triton-inference-server/core repo for allowed memory types).
std
::
vector
<
std
::
pair
<
TRITONSERVER_MemoryType
,
int64_t
>>
allowed_input_types
=
{{
TRITONSERVER_MEMORY_CPU_PINNED
,
0
},
{
TRITONSERVER_MEMORY_CPU
,
0
}};
const
char
*
input_buffer
;
size_t
input_buffer_byte_size
;
TRITONSERVER_MemoryType
input_buffer_memory_type
;
int64_t
input_buffer_memory_type_id
;
RESPOND_ALL_AND_SET_NULL_IF_ERROR
(
responses
,
request_count
,
collector
.
ProcessTensor
(
model_state
->
InputTensorName
().
c_str
(),
nullptr
/* existing_buffer */
,
0
/* existing_buffer_byte_size */
,
allowed_input_types
,
&
input_buffer
,
&
input_buffer_byte_size
,
&
input_buffer_memory_type
,
&
input_buffer_memory_type_id
));
// Finalize the collector. If 'true' is returned, 'input_buffer'
// will not be valid until the backend synchronizes the CUDA
// stream or event that was used when creating the collector. For
// this backend, GPU is not supported and so no CUDA sync should
// be needed; so if 'true' is returned simply log an error.
const
bool
need_cuda_input_sync
=
collector
.
Finalize
();
if
(
need_cuda_input_sync
)
{
LOG_MESSAGE
(
TRITONSERVER_LOG_ERROR
,
"'recommended' backend: unexpected CUDA sync required by collector"
);
}
// 'input_buffer' contains the batched input tensor. The backend can
// implement whatever logic is necessary to produce the output
// tensor. This backend simply logs the input tensor value and then
// returns the input tensor value in the output tensor so no actual
// computation is needed.
uint64_t
compute_start_ns
=
0
;
SET_TIMESTAMP
(
compute_start_ns
);
LOG_MESSAGE
(
TRITONSERVER_LOG_INFO
,
(
std
::
string
(
"model "
)
+
model_state
->
Name
()
+
": requests in batch "
+
std
::
to_string
(
request_count
))
.
c_str
());
std
::
string
tstr
;
IGNORE_ERROR
(
BufferAsTypedString
(
tstr
,
input_buffer
,
input_buffer_byte_size
,
model_state
->
TensorDataType
()));
LOG_MESSAGE
(
TRITONSERVER_LOG_INFO
,
(
std
::
string
(
"batched "
+
model_state
->
InputTensorName
()
+
" value: "
)
+
tstr
)
.
c_str
());
const
char
*
output_buffer
=
input_buffer
;
TRITONSERVER_MemoryType
output_buffer_memory_type
=
input_buffer_memory_type
;
int64_t
output_buffer_memory_type_id
=
input_buffer_memory_type_id
;
uint64_t
compute_end_ns
=
0
;
SET_TIMESTAMP
(
compute_end_ns
);
bool
supports_first_dim_batching
;
RESPOND_ALL_AND_SET_NULL_IF_ERROR
(
responses
,
request_count
,
model_state
->
SupportsFirstDimBatching
(
&
supports_first_dim_batching
));
std
::
vector
<
int64_t
>
tensor_shape
;
RESPOND_ALL_AND_SET_NULL_IF_ERROR
(
responses
,
request_count
,
model_state
->
TensorShape
(
tensor_shape
));
// Because the output tensor values are concatenated into a single
// contiguous 'output_buffer', the backend must "scatter" them out
// to the individual response output tensors. The backend utilities
// provide a "responder" to facilitate this scattering process.
// BackendOutputResponder does NOT support TRITONSERVER_TYPE_BYTES
// data type.
// The 'responders's ProcessTensor function will copy the portion of
// 'output_buffer' corresonding to each request's output into the
// response for that request.
BackendOutputResponder
responder
(
requests
,
request_count
,
&
responses
,
model_state
->
TritonMemoryManager
(),
supports_first_dim_batching
,
false
/* pinned_enabled */
,
nullptr
/* stream*/
);
responder
.
ProcessTensor
(
model_state
->
OutputTensorName
().
c_str
(),
model_state
->
TensorDataType
(),
tensor_shape
,
output_buffer
,
output_buffer_memory_type
,
output_buffer_memory_type_id
);
// Finalize the responder. If 'true' is returned, the output
// tensors' data will not be valid until the backend synchronizes
// the CUDA stream or event that was used when creating the
// responder. For this backend, GPU is not supported and so no CUDA
// sync should be needed; so if 'true' is returned simply log an
// error.
const
bool
need_cuda_output_sync
=
responder
.
Finalize
();
if
(
need_cuda_output_sync
)
{
LOG_MESSAGE
(
TRITONSERVER_LOG_ERROR
,
"'recommended' backend: unexpected CUDA sync required by responder"
);
}
// Send all the responses that haven't already been sent because of
// an earlier error.
for
(
auto
&
response
:
responses
)
{
if
(
response
!=
nullptr
)
{
LOG_IF_ERROR
(
TRITONBACKEND_ResponseSend
(
response
,
TRITONSERVER_RESPONSE_COMPLETE_FINAL
,
nullptr
),
"failed to send response"
);
}
}
uint64_t
exec_end_ns
=
0
;
SET_TIMESTAMP
(
exec_end_ns
);
#ifdef TRITON_ENABLE_STATS
// For batch statistics need to know the total batch size of the
// requests. This is not necessarily just the number of requests,
// because if the model supports batching then any request can be a
// batched request itself.
size_t
total_batch_size
=
0
;
if
(
!
supports_first_dim_batching
)
{
total_batch_size
=
request_count
;
}
else
{
for
(
uint32_t
r
=
0
;
r
<
request_count
;
++
r
)
{
auto
&
request
=
requests
[
r
];
TRITONBACKEND_Input
*
input
=
nullptr
;
LOG_IF_ERROR
(
TRITONBACKEND_RequestInputByIndex
(
request
,
0
/* index */
,
&
input
),
"failed getting request input"
);
if
(
input
!=
nullptr
)
{
const
int64_t
*
shape
=
nullptr
;
LOG_IF_ERROR
(
TRITONBACKEND_InputProperties
(
input
,
nullptr
,
nullptr
,
&
shape
,
nullptr
,
nullptr
,
nullptr
),
"failed getting input properties"
);
if
(
shape
!=
nullptr
)
{
total_batch_size
+=
shape
[
0
];
}
}
}
}
#else
(
void
)
exec_start_ns
;
(
void
)
exec_end_ns
;
(
void
)
compute_start_ns
;
(
void
)
compute_end_ns
;
#endif // TRITON_ENABLE_STATS
// Report statistics for each request, and then release the request.
for
(
uint32_t
r
=
0
;
r
<
request_count
;
++
r
)
{
auto
&
request
=
requests
[
r
];
#ifdef TRITON_ENABLE_STATS
LOG_IF_ERROR
(
TRITONBACKEND_ModelInstanceReportStatistics
(
instance_state
->
TritonModelInstance
(),
request
,
(
responses
[
r
]
!=
nullptr
)
/* success */
,
exec_start_ns
,
compute_start_ns
,
compute_end_ns
,
exec_end_ns
),
"failed reporting request statistics"
);
#endif // TRITON_ENABLE_STATS
LOG_IF_ERROR
(
TRITONBACKEND_RequestRelease
(
request
,
TRITONSERVER_REQUEST_RELEASE_ALL
),
"failed releasing request"
);
}
#ifdef TRITON_ENABLE_STATS
// Report batch statistics.
LOG_IF_ERROR
(
TRITONBACKEND_ModelInstanceReportBatchStatistics
(
instance_state
->
TritonModelInstance
(),
total_batch_size
,
exec_start_ns
,
compute_start_ns
,
compute_end_ns
,
exec_end_ns
),
"failed reporting batch request statistics"
);
#endif // TRITON_ENABLE_STATS
return
nullptr
;
// success
}
}
// extern "C"
}}}
// namespace triton::backend::recommended
3rdparty/backend-r22.12/examples/clients/bls_client
0 → 100644
View file @
b30f3cdb
#!/usr/bin/python
# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import
sys
import
argparse
import
numpy
as
np
import
tritonhttpclient
as
httpclient
from
tritonclientutils
import
np_to_triton_dtype
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'-u'
,
'--url'
,
type
=
str
,
required
=
False
,
default
=
'localhost:8000'
,
help
=
'Inference server URL. Default is localhost:8000.'
)
FLAGS
=
parser
.
parse_args
()
model_name
=
"bls_fp32"
shape
=
[
16
]
with
httpclient
.
InferenceServerClient
(
url
=
FLAGS
.
url
)
as
client
:
input0_data
=
np
.
random
.
rand
(
*
shape
).
astype
(
np
.
float32
)
input1_data
=
np
.
random
.
rand
(
*
shape
).
astype
(
np
.
float32
)
inputs
=
[
httpclient
.
InferInput
(
"INPUT0"
,
input0_data
.
shape
,
np_to_triton_dtype
(
input0_data
.
dtype
)),
httpclient
.
InferInput
(
"INPUT1"
,
input1_data
.
shape
,
np_to_triton_dtype
(
input1_data
.
dtype
)),
]
inputs
[
0
].
set_data_from_numpy
(
input0_data
)
inputs
[
1
].
set_data_from_numpy
(
input1_data
)
outputs
=
[
httpclient
.
InferRequestedOutput
(
"OUTPUT0"
),
httpclient
.
InferRequestedOutput
(
"OUTPUT1"
),
]
response
=
client
.
infer
(
model_name
,
inputs
,
request_id
=
str
(
1
),
outputs
=
outputs
)
result
=
response
.
get_response
()
output0_data
=
response
.
as_numpy
(
"OUTPUT0"
)
output1_data
=
response
.
as_numpy
(
"OUTPUT1"
)
print
(
"INPUT0 ({}) + INPUT1 ({}) = OUTPUT0 ({})"
.
format
(
input0_data
,
input1_data
,
output0_data
))
print
(
"INPUT0 ({}) - INPUT1 ({}) = OUTPUT1 ({})"
.
format
(
input0_data
,
input1_data
,
output1_data
))
if
not
np
.
allclose
(
input0_data
+
input1_data
,
output0_data
):
print
(
"error: incorrect sum"
)
sys
.
exit
(
1
)
if
not
np
.
allclose
(
input0_data
-
input1_data
,
output1_data
):
print
(
"error: incorrect difference"
)
sys
.
exit
(
1
)
print
(
'
\n
PASS'
)
sys
.
exit
(
0
)
3rdparty/backend-r22.12/examples/clients/minimal_client
0 → 100644
View file @
b30f3cdb
#!/usr/bin/env python
# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import
argparse
import
numpy
as
np
import
tritonclient.http
as
httpclient
from
tritonclient.utils
import
InferenceServerException
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'-u'
,
'--url'
,
type
=
str
,
required
=
False
,
default
=
'localhost:8000'
,
help
=
'Inference server URL. Default is localhost:8000.'
)
FLAGS
=
parser
.
parse_args
()
# For the HTTP client, need to specify large enough concurrency to
# issue all the inference requests to the server in parallel. For
# this example we want to be able to send 2 requests concurrently.
try
:
concurrent_request_count
=
2
triton_client
=
httpclient
.
InferenceServerClient
(
url
=
FLAGS
.
url
,
concurrency
=
concurrent_request_count
)
except
Exception
as
e
:
print
(
"channel creation failed: "
+
str
(
e
))
sys
.
exit
(
1
)
# First send a single request to the nonbatching model.
print
(
'========='
)
input0_data
=
np
.
array
([
1
,
2
,
3
,
4
],
dtype
=
np
.
int32
)
print
(
'Sending request to nonbatching model: IN0 = {}'
.
format
(
input0_data
))
inputs
=
[
httpclient
.
InferInput
(
'IN0'
,
[
4
],
"INT32"
)
]
inputs
[
0
].
set_data_from_numpy
(
input0_data
)
result
=
triton_client
.
infer
(
'nonbatching'
,
inputs
)
print
(
'Response: {}'
.
format
(
result
.
get_response
()))
print
(
'OUT0 = {}'
.
format
(
result
.
as_numpy
(
'OUT0'
)))
# Send 2 requests to the batching model. Because these are sent
# asynchronously and Triton's dynamic batcher is configured to
# delay up to 5 seconds when forming a batch for this model, we
# expect these 2 requests to be batched within Triton and sent to
# the minimal backend as a single batch.
print
(
'
\n
========='
)
async_requests
=
[]
input0_data
=
np
.
array
([[
10
,
11
,
12
,
13
]],
dtype
=
np
.
int32
)
print
(
'Sending request to batching model: IN0 = {}'
.
format
(
input0_data
))
inputs
=
[
httpclient
.
InferInput
(
'IN0'
,
[
1
,
4
],
"INT32"
)
]
inputs
[
0
].
set_data_from_numpy
(
input0_data
)
async_requests
.
append
(
triton_client
.
async_infer
(
'batching'
,
inputs
))
input0_data
=
np
.
array
([[
20
,
21
,
22
,
23
]],
dtype
=
np
.
int32
)
print
(
'Sending request to batching model: IN0 = {}'
.
format
(
input0_data
))
inputs
=
[
httpclient
.
InferInput
(
'IN0'
,
[
1
,
4
],
"INT32"
)
]
inputs
[
0
].
set_data_from_numpy
(
input0_data
)
async_requests
.
append
(
triton_client
.
async_infer
(
'batching'
,
inputs
))
for
async_request
in
async_requests
:
# Get the result from the initiated asynchronous inference
# request. This call will block till the server responds.
result
=
async_request
.
get_result
()
print
(
'Response: {}'
.
format
(
result
.
get_response
()))
print
(
'OUT0 = {}'
.
format
(
result
.
as_numpy
(
'OUT0'
)))
3rdparty/backend-r22.12/examples/clients/recommended_client
0 → 100644
View file @
b30f3cdb
#!/usr/bin/env python
# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import
argparse
import
numpy
as
np
import
tritonclient.http
as
httpclient
from
tritonclient.utils
import
InferenceServerException
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'-u'
,
'--url'
,
type
=
str
,
required
=
False
,
default
=
'localhost:8000'
,
help
=
'Inference server URL. Default is localhost:8000.'
)
FLAGS
=
parser
.
parse_args
()
# For the HTTP client, need to specify large enough concurrency to
# issue all the inference requests to the server in parallel. For
# this example we want to be able to send 2 requests concurrently.
try
:
concurrent_request_count
=
2
triton_client
=
httpclient
.
InferenceServerClient
(
url
=
FLAGS
.
url
,
concurrency
=
concurrent_request_count
)
except
Exception
as
e
:
print
(
"channel creation failed: "
+
str
(
e
))
sys
.
exit
(
1
)
# Send 2 requests to the batching model. Because these are sent
# asynchronously and Triton's dynamic batcher is configured to
# delay up to 5 seconds when forming a batch for this model, we
# expect these 2 requests to be batched within Triton and sent to
# the backend as a single batch.
#
# The recommended backend can handle any model with 1 input and 1
# output as long as the input and output datatype and shape are
# the same. The batching model uses datatype FP32 and shape
# [ 4, 4 ].
print
(
'
\n
========='
)
async_requests
=
[]
input0_data
=
np
.
array
([[[
1.0
,
1.1
,
1.2
,
1.3
],
[
2.0
,
2.1
,
2.2
,
2.3
],
[
3.0
,
3.1
,
3.2
,
3.3
],
[
4.0
,
4.1
,
4.2
,
4.3
]]],
dtype
=
np
.
float32
)
print
(
'Sending request to batching model: input = {}'
.
format
(
input0_data
))
inputs
=
[
httpclient
.
InferInput
(
'INPUT'
,
[
1
,
4
,
4
],
"FP32"
)
]
inputs
[
0
].
set_data_from_numpy
(
input0_data
)
async_requests
.
append
(
triton_client
.
async_infer
(
'batching'
,
inputs
))
input0_data
=
np
.
array
([[[
10.0
,
10.1
,
10.2
,
10.3
],
[
20.0
,
20.1
,
20.2
,
20.3
],
[
30.0
,
30.1
,
30.2
,
30.3
],
[
40.0
,
40.1
,
40.2
,
40.3
]]],
dtype
=
np
.
float32
)
print
(
'Sending request to batching model: input = {}'
.
format
(
input0_data
))
inputs
=
[
httpclient
.
InferInput
(
'INPUT'
,
[
1
,
4
,
4
],
"FP32"
)
]
inputs
[
0
].
set_data_from_numpy
(
input0_data
)
async_requests
.
append
(
triton_client
.
async_infer
(
'batching'
,
inputs
))
for
async_request
in
async_requests
:
# Get the result from the initiated asynchronous inference
# request. This call will block till the server responds.
result
=
async_request
.
get_result
()
print
(
'Response: {}'
.
format
(
result
.
get_response
()))
print
(
'OUTPUT = {}'
.
format
(
result
.
as_numpy
(
'OUTPUT'
)))
3rdparty/backend-r22.12/examples/model_repos/bls_models/addsub_python/1/model.py
0 → 100644
View file @
b30f3cdb
# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import
json
import
triton_python_backend_utils
as
pb_utils
# This model calculates the sum and difference of the INPUT0 and INPUT1 and put
# the results in OUTPUT0 and OUTPUT1 respectively. For more information
# regarding how this model.py was written, please refer to Python Backend.
class
TritonPythonModel
:
def
initialize
(
self
,
args
):
self
.
model_config
=
model_config
=
json
.
loads
(
args
[
'model_config'
])
output0_config
=
pb_utils
.
get_output_config_by_name
(
model_config
,
"OUTPUT0"
)
output1_config
=
pb_utils
.
get_output_config_by_name
(
model_config
,
"OUTPUT1"
)
self
.
output0_dtype
=
pb_utils
.
triton_string_to_numpy
(
output0_config
[
'data_type'
])
self
.
output1_dtype
=
pb_utils
.
triton_string_to_numpy
(
output1_config
[
'data_type'
])
def
execute
(
self
,
requests
):
output0_dtype
=
self
.
output0_dtype
output1_dtype
=
self
.
output1_dtype
responses
=
[]
for
request
in
requests
:
in_0
=
pb_utils
.
get_input_tensor_by_name
(
request
,
"INPUT0"
)
in_1
=
pb_utils
.
get_input_tensor_by_name
(
request
,
"INPUT1"
)
out_0
,
out_1
=
(
in_0
.
as_numpy
()
+
in_1
.
as_numpy
(),
in_0
.
as_numpy
()
-
in_1
.
as_numpy
())
out_tensor_0
=
pb_utils
.
Tensor
(
"OUTPUT0"
,
out_0
.
astype
(
output0_dtype
))
out_tensor_1
=
pb_utils
.
Tensor
(
"OUTPUT1"
,
out_1
.
astype
(
output1_dtype
))
inference_response
=
pb_utils
.
InferenceResponse
(
output_tensors
=
[
out_tensor_0
,
out_tensor_1
])
responses
.
append
(
inference_response
)
return
responses
def
finalize
(
self
):
print
(
'Cleaning up...'
)
3rdparty/backend-r22.12/examples/model_repos/bls_models/addsub_python/config.pbtxt
0 → 100644
View file @
b30f3cdb
# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
name: "addsub_python"
backend: "python"
max_batch_size: 0
input [
{
name: "INPUT0"
data_type: TYPE_FP32
dims: [ 16 ]
}
]
input [
{
name: "INPUT1"
data_type: TYPE_FP32
dims: [ 16 ]
}
]
output [
{
name: "OUTPUT0"
data_type: TYPE_FP32
dims: [ 16 ]
}
]
output [
{
name: "OUTPUT1"
data_type: TYPE_FP32
dims: [ 16 ]
}
]
3rdparty/backend-r22.12/examples/model_repos/bls_models/addsub_tf/1/model.savedmodel/saved_model.pb
0 → 100644
View file @
b30f3cdb
File added
3rdparty/backend-r22.12/examples/model_repos/bls_models/addsub_tf/config.pbtxt
0 → 100644
View file @
b30f3cdb
name: "addsub_tf"
platform: "tensorflow_savedmodel"
max_batch_size: 0
input [
{
name: "INPUT0"
data_type: TYPE_FP32
dims: [ 16 ]
},
{
name: "INPUT1"
data_type: TYPE_FP32
dims: [ 16 ]
}
]
output [
{
name: "OUTPUT0"
data_type: TYPE_FP32
dims: [ 16 ]
},
{
name: "OUTPUT1"
data_type: TYPE_FP32
dims: [ 16 ]
}
]
3rdparty/backend-r22.12/examples/model_repos/bls_models/bls_fp32/config.pbtxt
0 → 100644
View file @
b30f3cdb
# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
name: "bls_fp32"
backend: "bls"
max_batch_size: 0
input [
{
name: "INPUT0"
data_type: TYPE_FP32
dims: [ 16 ]
}
]
input [
{
name: "INPUT1"
data_type: TYPE_FP32
dims: [ 16 ]
}
]
output [
{
name: "OUTPUT0"
data_type: TYPE_FP32
dims: [ 16 ]
}
]
output [
{
name: "OUTPUT1"
data_type: TYPE_FP32
dims: [ 16 ]
}
]
instance_group [
{
kind: KIND_CPU
}
]
3rdparty/backend-r22.12/examples/model_repos/minimal_models/batching/1/.gitkeep
0 → 100644
View file @
b30f3cdb
3rdparty/backend-r22.12/examples/model_repos/minimal_models/batching/config.pbtxt
0 → 100644
View file @
b30f3cdb
backend: "minimal"
max_batch_size: 8
dynamic_batching {
max_queue_delay_microseconds: 5000000
}
input [
{
name: "IN0"
data_type: TYPE_INT32
dims: [ 4 ]
}
]
output [
{
name: "OUT0"
data_type: TYPE_INT32
dims: [ 4 ]
}
]
instance_group [
{
kind: KIND_CPU
}
]
3rdparty/backend-r22.12/examples/model_repos/minimal_models/nonbatching/1/.gitkeep
0 → 100644
View file @
b30f3cdb
3rdparty/backend-r22.12/examples/model_repos/minimal_models/nonbatching/config.pbtxt
0 → 100644
View file @
b30f3cdb
backend: "minimal"
max_batch_size: 0
input [
{
name: "IN0"
data_type: TYPE_INT32
dims: [ 4 ]
}
]
output [
{
name: "OUT0"
data_type: TYPE_INT32
dims: [ 4 ]
}
]
instance_group [
{
kind: KIND_CPU
}
]
3rdparty/backend-r22.12/examples/model_repos/recommended_models/batching/1/.gitkeep
0 → 100644
View file @
b30f3cdb
3rdparty/backend-r22.12/examples/model_repos/recommended_models/batching/config.pbtxt
0 → 100644
View file @
b30f3cdb
backend: "recommended"
max_batch_size: 8
dynamic_batching {
max_queue_delay_microseconds: 5000000
}
input [
{
name: "INPUT"
data_type: TYPE_FP32
dims: [ 4, 4 ]
}
]
output [
{
name: "OUTPUT"
data_type: TYPE_FP32
dims: [ 4, 4 ]
}
]
instance_group [
{
kind: KIND_CPU
}
]
3rdparty/backend-r22.12/include/triton/backend/backend_common.h
0 → 100644
View file @
b30f3cdb
// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <chrono>
#include <condition_variable>
#include <deque>
#include <iostream>
#include <mutex>
#include <set>
#include <string>
#include <unordered_map>
#include <vector>
#include "triton/common/error.h"
#include "triton/core/tritonbackend.h"
#define TRITONJSON_STATUSTYPE TRITONSERVER_Error*
#define TRITONJSON_STATUSRETURN(M) \
return TRITONSERVER_ErrorNew(TRITONSERVER_ERROR_INTERNAL, (M).c_str())
#define TRITONJSON_STATUSSUCCESS nullptr
#include "triton/common/triton_json.h"
#ifdef TRITON_ENABLE_GPU
#include <cuda_runtime_api.h>
#endif // TRITON_ENABLE_GPU
namespace
triton
{
namespace
backend
{
#define IGNORE_ERROR(X) \
do { \
TRITONSERVER_Error* ie_err__ = (X); \
if (ie_err__ != nullptr) { \
TRITONSERVER_ErrorDelete(ie_err__); \
} \
} while (false)
#define LOG_IF_ERROR(X, MSG) \
do { \
TRITONSERVER_Error* lie_err__ = (X); \
if (lie_err__ != nullptr) { \
IGNORE_ERROR(TRITONSERVER_LogMessage( \
TRITONSERVER_LOG_INFO, __FILE__, __LINE__, \
(std::string(MSG) + ": " + TRITONSERVER_ErrorCodeString(lie_err__) + \
" - " + TRITONSERVER_ErrorMessage(lie_err__)) \
.c_str())); \
TRITONSERVER_ErrorDelete(lie_err__); \
} \
} while (false)
#define LOG_MESSAGE(LEVEL, MSG) \
do { \
LOG_IF_ERROR( \
TRITONSERVER_LogMessage(LEVEL, __FILE__, __LINE__, MSG), \
("failed to log message: ")); \
} while (false)
#define RETURN_ERROR_IF_FALSE(P, C, MSG) \
do { \
if (!(P)) { \
return TRITONSERVER_ErrorNew(C, (MSG).c_str()); \
} \
} while (false)
#define RETURN_ERROR_IF_TRUE(P, C, MSG) \
do { \
if ((P)) { \
return TRITONSERVER_ErrorNew(C, (MSG).c_str()); \
} \
} while (false)
#define RETURN_IF_ERROR(X) \
do { \
TRITONSERVER_Error* rie_err__ = (X); \
if (rie_err__ != nullptr) { \
return rie_err__; \
} \
} while (false)
#ifdef TRITON_ENABLE_GPU
#define LOG_IF_CUDA_ERROR(X, MSG) \
do { \
cudaError_t lice_err__ = (X); \
if (lice_err__ != cudaSuccess) { \
IGNORE_ERROR(TRITONSERVER_LogMessage( \
TRITONSERVER_LOG_INFO, __FILE__, __LINE__, \
(std::string(MSG) + ": " + cudaGetErrorString(lice_err__)) \
.c_str())); \
} \
} while (false)
#define RETURN_IF_CUDA_ERROR(X, C, MSG) \
do { \
cudaError_t rice_err__ = (X); \
if (rice_err__ != cudaSuccess) { \
return TRITONSERVER_ErrorNew( \
C, ((MSG) + ": " + cudaGetErrorString(rice_err__)).c_str()); \
} \
} while (false)
#endif // TRITON_ENABLE_GPU
#define RESPOND_AND_SET_NULL_IF_ERROR(RESPONSE_PTR, X) \
do { \
TRITONSERVER_Error* rarie_err__ = (X); \
if (rarie_err__ != nullptr) { \
if (*RESPONSE_PTR != nullptr) { \
LOG_IF_ERROR( \
TRITONBACKEND_ResponseSend( \
*RESPONSE_PTR, TRITONSERVER_RESPONSE_COMPLETE_FINAL, \
rarie_err__), \
"failed to send error response"); \
*RESPONSE_PTR = nullptr; \
} \
TRITONSERVER_ErrorDelete(rarie_err__); \
} \
} while (false)
#define RESPOND_ALL_AND_SET_NULL_IF_ERROR(RESPONSES, RESPONSES_COUNT, X) \
do { \
TRITONSERVER_Error* raasnie_err__ = (X); \
if (raasnie_err__ != nullptr) { \
for (size_t ridx = 0; ridx < RESPONSES_COUNT; ++ridx) { \
if (RESPONSES[ridx] != nullptr) { \
LOG_IF_ERROR( \
TRITONBACKEND_ResponseSend( \
RESPONSES[ridx], TRITONSERVER_RESPONSE_COMPLETE_FINAL, \
raasnie_err__), \
"failed to send error response"); \
RESPONSES[ridx] = nullptr; \
} \
} \
TRITONSERVER_ErrorDelete(raasnie_err__); \
} \
} while (false)
#define RESPOND_ALL_AND_SET_TRUE_IF_ERROR(RESPONSES, RESPONSES_COUNT, BOOL, X) \
do { \
TRITONSERVER_Error* raasnie_err__ = (X); \
if (raasnie_err__ != nullptr) { \
BOOL = true; \
for (size_t ridx = 0; ridx < RESPONSES_COUNT; ++ridx) { \
if (RESPONSES[ridx] != nullptr) { \
LOG_IF_ERROR( \
TRITONBACKEND_ResponseSend( \
RESPONSES[ridx], TRITONSERVER_RESPONSE_COMPLETE_FINAL, \
raasnie_err__), \
"failed to send error response"); \
RESPONSES[ridx] = nullptr; \
} \
} \
TRITONSERVER_ErrorDelete(raasnie_err__); \
} \
} while (false)
#ifdef TRITON_ENABLE_STATS
#define TIMESPEC_TO_NANOS(TS) ((TS).tv_sec * 1000000000 + (TS).tv_nsec)
#define SET_TIMESTAMP(TS_NS) \
{ \
TS_NS = std::chrono::duration_cast<std::chrono::nanoseconds>( \
std::chrono::steady_clock::now().time_since_epoch()) \
.count(); \
}
#define DECL_TIMESTAMP(TS_NS) \
uint64_t TS_NS; \
SET_TIMESTAMP(TS_NS);
#else
#define DECL_TIMESTAMP(TS_NS)
#define SET_TIMESTAMP(TS_NS)
#endif // TRITON_ENABLE_STATS
#ifndef TRITON_ENABLE_GPU
using
cudaStream_t
=
void
*
;
#endif // !TRITON_ENABLE_GPU
/// Convenience deleter for TRITONBACKEND_ResponseFactory.
struct
ResponseFactoryDeleter
{
void
operator
()(
TRITONBACKEND_ResponseFactory
*
f
)
{
LOG_IF_ERROR
(
TRITONBACKEND_ResponseFactoryDelete
(
f
),
"failed deleting response factory"
);
}
};
// A representation of the BatchInput message in model config
class
BatchInput
{
public:
enum
class
Kind
{
BATCH_ELEMENT_COUNT
,
BATCH_ACCUMULATED_ELEMENT_COUNT
,
BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO
,
BATCH_MAX_ELEMENT_COUNT_AS_SHAPE
,
BATCH_ITEM_SHAPE
,
BATCH_ITEM_SHAPE_FLATTEN
};
static
TRITONSERVER_Error
*
ParseFromModelConfig
(
triton
::
common
::
TritonJson
::
Value
&
config
,
std
::
vector
<
BatchInput
>*
batch_inputs
);
const
std
::
vector
<
std
::
string
>&
TargetNames
()
const
{
return
target_names_
;
}
TRITONSERVER_DataType
DataType
()
const
{
return
data_type_
;
}
Kind
BatchInputKind
()
const
{
return
kind_
;
}
std
::
string
BatchInputKindString
()
const
{
return
kind_str_
;
}
const
std
::
vector
<
std
::
string
>&
SourceInputs
()
const
{
return
source_inputs_
;
}
private:
TRITONSERVER_Error
*
Init
(
triton
::
common
::
TritonJson
::
Value
&
bi_config
);
Kind
kind_
;
std
::
string
kind_str_
;
std
::
vector
<
std
::
string
>
target_names_
;
TRITONSERVER_DataType
data_type_
;
std
::
vector
<
std
::
string
>
source_inputs_
;
};
// A representation of the BatchOutput message in model config
class
BatchOutput
{
public:
enum
class
Kind
{
BATCH_SCATTER_WITH_INPUT_SHAPE
};
static
TRITONSERVER_Error
*
ParseFromModelConfig
(
triton
::
common
::
TritonJson
::
Value
&
config
,
std
::
vector
<
BatchOutput
>*
batch_outputs
);
const
std
::
vector
<
std
::
string
>&
TargetNames
()
const
{
return
target_names_
;
}
TRITONSERVER_DataType
DataType
()
const
{
return
data_type_
;
}
const
std
::
vector
<
int64_t
>&
OutputShape
()
const
{
return
shape_
;
}
Kind
BatchOutputKind
()
const
{
return
kind_
;
}
const
std
::
vector
<
std
::
string
>&
SourceInputs
()
const
{
return
source_inputs_
;
}
private:
Kind
kind_
;
std
::
vector
<
std
::
string
>
target_names_
;
TRITONSERVER_DataType
data_type_
;
std
::
vector
<
int64_t
>
shape_
;
std
::
vector
<
std
::
string
>
source_inputs_
;
};
struct
CopyParams
{
CopyParams
(
void
*
dst
,
const
void
*
src
,
const
size_t
byte_size
)
:
dst_
(
dst
),
src_
(
src
),
byte_size_
(
byte_size
)
{
}
void
*
dst_
;
const
void
*
src_
;
const
size_t
byte_size_
;
};
/// The value for a dimension in a shape that indicates that that
/// dimension can take on any size.
constexpr
int
WILDCARD_DIM
=
-
1
;
constexpr
char
kTensorRTExecutionAccelerator
[]
=
"tensorrt"
;
constexpr
char
kOpenVINOExecutionAccelerator
[]
=
"openvino"
;
constexpr
char
kGPUIOExecutionAccelerator
[]
=
"gpu_io"
;
constexpr
char
kAutoMixedPrecisionExecutionAccelerator
[]
=
"auto_mixed_precision"
;
TRITONSERVER_MemoryType
GetUsePinnedMemoryType
(
TRITONSERVER_MemoryType
ref_buffer_type
);
TRITONSERVER_Error
*
CommonErrorToTritonError
(
triton
::
common
::
Error
error
);
TRITONSERVER_Error_Code
StatusCodeToTritonCode
(
triton
::
common
::
Error
::
Code
error_code
);
/// Parse an array in a JSON object into the corresponding shape. The
/// array must be composed of integers.
///
/// \param io The JSON object containing the member array.
/// \param name The name of the array member in the JSON object.
/// \param shape Returns the shape.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_Error
*
ParseShape
(
common
::
TritonJson
::
Value
&
io
,
const
std
::
string
&
name
,
std
::
vector
<
int64_t
>*
shape
);
/// Return the string representation of a shape.
///
/// \param dims The shape dimensions.
/// \param dims_count The number of dimensions.
/// \return The string representation.
std
::
string
ShapeToString
(
const
int64_t
*
dims
,
const
size_t
dims_count
);
/// Return the string representation of a shape.
///
/// \param shape The shape as a vector of dimensions.
/// \return The string representation.
std
::
string
ShapeToString
(
const
std
::
vector
<
int64_t
>&
shape
);
/// Return the number of elements of a shape.
///
/// \param dims The shape dimensions.
/// \param dims_count The number of dimensions.
/// \return The number of elements.
int64_t
GetElementCount
(
const
int64_t
*
dims
,
const
size_t
dims_count
);
/// Return the number of elements of a shape.
///
/// \param shape The shape as a vector of dimensions.
/// \return The number of elements.
int64_t
GetElementCount
(
const
std
::
vector
<
int64_t
>&
shape
);
/// Get the size, in bytes, of a tensor based on datatype and
/// shape.
/// \param dtype The data-type.
/// \param dims The shape.
/// \return The size, in bytes, of the corresponding tensor, or -1 if
/// unable to determine the size.
int64_t
GetByteSize
(
const
TRITONSERVER_DataType
&
dtype
,
const
std
::
vector
<
int64_t
>&
dims
);
/// Get an input tensor's contents into a buffer. This overload expects
/// both 'buffer' and buffers of the input to be in CPU.
///
/// \param request The inference request.
/// \param input_name The name of the input buffer.
/// \param buffer The buffer where the input tensor content is copied into.
/// \param buffer_byte_size Acts as both input and output. On input
/// gives the size of 'buffer', in bytes. The function will fail if
/// the buffer is not large enough to hold the input tensor
/// contents. Returns the size of the input tensor data returned in
/// 'buffer'.
/// \param host_policy_name The host policy name to look up the input buffer.
/// Default input buffer will be used if nullptr is provided.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_Error
*
ReadInputTensor
(
TRITONBACKEND_Request
*
request
,
const
std
::
string
&
input_name
,
char
*
buffer
,
size_t
*
buffer_byte_size
,
const
char
*
host_policy_name
=
nullptr
);
/// Get an input tensor's contents into a buffer. This overload of
/// 'ReadInputTensor' supports input buffers that can be in any memory.
///
/// \param request The inference request.
/// \param input_name The name of the input buffer.
/// \param buffer The buffer where the input tensor content is copied into.
/// \param buffer_byte_size Acts as both input and output. On input
/// gives the size of 'buffer', in bytes. The function will fail if
/// the buffer is not large enough to hold the input tensor
/// contents. Returns the size of the input tensor data returned in
/// 'buffer'.
/// \param host_policy_name The host policy name to look up the input buffer.
/// Default input buffer will be used if nullptr is provided.
/// \param memory_type The memory type of the buffer provided.
/// \param memory_type_id The memory type id of the buffer provided.
/// \param cuda_stream specifies the stream to be associated with, and 0 can be
/// passed for default stream.
/// \param cuda_used returns whether a CUDA memory copy is initiated. If true,
/// the caller should synchronize on the given 'cuda_stream' to ensure data copy
/// is completed.
/// \param copy_on_stream whether the memory copies should be performed in cuda
/// host functions on the 'cuda_stream'.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_Error
*
ReadInputTensor
(
TRITONBACKEND_Request
*
request
,
const
std
::
string
&
input_name
,
char
*
buffer
,
size_t
*
buffer_byte_size
,
TRITONSERVER_MemoryType
memory_type
,
int64_t
memory_type_id
,
cudaStream_t
cuda_stream
,
bool
*
cuda_used
,
const
char
*
host_policy_name
=
nullptr
,
const
bool
copy_on_stream
=
false
);
/// Validate that an input matches one of the allowed input names.
/// \param io The model input.
/// \param allowed The set of allowed input names.
/// \return The error status. A non-OK status indicates the input
/// is not valid.
TRITONSERVER_Error
*
CheckAllowedModelInput
(
common
::
TritonJson
::
Value
&
io
,
const
std
::
set
<
std
::
string
>&
allowed
);
/// Validate that an output matches one of the allowed output names.
/// \param io The model output.
/// \param allowed The set of allowed output names.
/// \return The error status. A non-OK status indicates the output
/// is not valid.
TRITONSERVER_Error
*
CheckAllowedModelOutput
(
common
::
TritonJson
::
Value
&
io
,
const
std
::
set
<
std
::
string
>&
allowed
);
/// Get the tensor name, false value, and true value for a boolean
/// sequence batcher control kind. If 'required' is true then must
/// find a tensor for the control. If 'required' is false, return
/// 'tensor_name' as empty-string if the control is not mapped to any
/// tensor.
///
/// \param batcher The JSON object of the sequence batcher.
/// \param model_name The name of the model.
/// \param control_kind The kind of control tensor to look for.
/// \param required Whether the tensor must be specified.
/// \param tensor_name Returns the name of the tensor.
/// \param tensor_datatype Returns the data type of the tensor.
/// \param fp32_false_value Returns the float value for false if
/// the tensor type is FP32.
/// \param fp32_true_value Returns the float value for true if
/// the tensor type is FP32.
/// \param int32_false_value Returns the int value for false if
/// the tensor type is INT32.
/// \param int32_true_value Returns the int value for true if
/// the tensor type is INT32.
/// \param bool_false_value Returns the bool value for false if
/// the tensor type is BOOL.
/// \param bool_true_value Returns the bool value for true if
/// the tensor type is BOOL.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_Error
*
GetBooleanSequenceControlProperties
(
common
::
TritonJson
::
Value
&
batcher
,
const
std
::
string
&
model_name
,
const
std
::
string
&
control_kind
,
const
bool
required
,
std
::
string
*
tensor_name
,
std
::
string
*
tensor_datatype
,
float
*
fp32_false_value
,
float
*
fp32_true_value
,
int32_t
*
int32_false_value
,
int32_t
*
int32_true_value
,
bool
*
bool_false_value
,
bool
*
bool_true_value
);
/// Get the tensor name and datatype for a non-boolean sequence
/// batcher control kind. If 'required' is true then must find a
/// tensor for the control. If 'required' is false, return
/// 'tensor_name' as empty-string if the control is not mapped to any
/// tensor. 'tensor_datatype' returns the required datatype for the
/// control.
///
/// \param batcher The JSON object of the sequence batcher.
/// \param model_name The name of the model.
/// \param control_kind The kind of control tensor to look for.
/// \param required Whether the tensor must be specified.
/// \param tensor_name Returns the name of the tensor.
/// \param tensor_datatype Returns the data type of the tensor.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_Error
*
GetTypedSequenceControlProperties
(
common
::
TritonJson
::
Value
&
batcher
,
const
std
::
string
&
model_name
,
const
std
::
string
&
control_kind
,
const
bool
required
,
std
::
string
*
tensor_name
,
std
::
string
*
tensor_datatype
);
/// Create and send an error response for a set of requests. This
/// function takes ownership of 'response_err' and so the caller must
/// not access or delete it after this call returns.
///
/// \param requests The requests.
/// \param request_count The number of 'requests'.
/// \param response_err The error to send to each request.
/// \param release_request If true, the requests will be released after
/// sending the error responses and the request pointers are set to
/// nullptr.
void
RequestsRespondWithError
(
TRITONBACKEND_Request
**
requests
,
const
uint32_t
request_count
,
TRITONSERVER_Error
*
response_err
,
const
bool
release_request
=
true
);
/// Send an error response for a set of responses. This function takes
/// ownership of 'response_err' and so the caller must not access or
/// delete it after this call returns.
///
/// \param responses The responses.
/// \param response_count The number of 'responses'.
/// \param response_err The error to send.
void
SendErrorForResponses
(
std
::
vector
<
TRITONBACKEND_Response
*>*
responses
,
const
uint32_t
response_count
,
TRITONSERVER_Error
*
response_err
);
/// Copy buffer from 'src' to 'dst' for given 'byte_size'. The buffer location
/// is identified by the memory type and id, and the corresponding copy will be
/// initiated.
/// \param msg The message to be prepended in error message.
/// \param src_memory_type The memory type of the source buffer.
/// \param src_memory_type_id The memory type id of the source buffer.
/// \param dst_memory_type The memory type of the destination buffer.
/// \param dst_memory_type_id The memory type id of the destination buffer.
/// \param byte_size The byte size of the source buffer.
/// \param src The pointer to the source buffer.
/// \param dst The pointer to the destination buffer.
/// \param cuda_stream specifies the stream to be associated with, and 0 can be
/// passed for default stream.
/// \param cuda_used returns whether a CUDA memory copy is initiated. If true,
/// the caller should synchronize on the given 'cuda_stream' to ensure data copy
/// is completed.
/// \param copy_on_stream whether the memory copies should be performed in cuda
/// host functions on the 'cuda_stream'.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_Error
*
CopyBuffer
(
const
std
::
string
&
msg
,
const
TRITONSERVER_MemoryType
src_memory_type
,
const
int64_t
src_memory_type_id
,
const
TRITONSERVER_MemoryType
dst_memory_type
,
const
int64_t
dst_memory_type_id
,
const
size_t
byte_size
,
const
void
*
src
,
void
*
dst
,
cudaStream_t
cuda_stream
,
bool
*
cuda_used
,
const
bool
copy_on_stream
=
false
);
/// Does a file or directory exist?
/// \param path The path to check for existance.
/// \param exists Returns true if file/dir exists
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_Error
*
FileExists
(
const
std
::
string
&
path
,
bool
*
exists
);
/// Read a text file into a string.
/// \param path The path of the file.
/// \param contents Returns the contents of the file.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_Error
*
ReadTextFile
(
const
std
::
string
&
path
,
std
::
string
*
contents
);
/// Is a path a directory?
/// \param path The path to check.
/// \param is_dir Returns true if path represents a directory
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_Error
*
IsDirectory
(
const
std
::
string
&
path
,
bool
*
is_dir
);
/// Join path segments into a longer path
/// \param segments The path segments.
/// \return the path formed by joining the segments.
std
::
string
JoinPath
(
std
::
initializer_list
<
std
::
string
>
segments
);
/// Returns the content in the model version path and the path to the content as
/// key-value pair.
/// \param model_repository_path The path to the model repository.
/// \param version The version of the model.
/// \param ignore_directories Whether the directories will be ignored.
/// \param ignore_files Whether the files will be ignored.
/// \param model_paths Returns the content in the model version path and
/// the path to the content.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_Error
*
ModelPaths
(
const
std
::
string
&
model_repository_path
,
uint64_t
version
,
const
bool
ignore_directories
,
const
bool
ignore_files
,
std
::
unordered_map
<
std
::
string
,
std
::
string
>*
model_paths
);
/// Create a CUDA stream appropriate for GPU<->CPU data transfer
/// operations for a given GPU device. The caller takes ownership of
/// the stream. 'stream' returns nullptr if GPU support is disabled.
///
/// \param device_id The ID of the GPU.
/// \param priority The stream priority. Use 0 for normal priority.
/// \param stream Returns the created stream.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_Error
*
CreateCudaStream
(
const
int
device_id
,
const
int
cuda_stream_priority
,
cudaStream_t
*
stream
);
/// Parse the string as long long integer.
///
/// \param value The string.
/// \param parse_value The long long integral value of the string.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_Error
*
ParseLongLongValue
(
const
std
::
string
&
value
,
int64_t
*
parsed_value
);
/// Parse the string as unsigned long long integer.
///
/// \param value The string.
/// \param parse_value The unsigned long long integral value of the string.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_Error
*
ParseUnsignedLongLongValue
(
const
std
::
string
&
value
,
uint64_t
*
parsed_value
);
/// Parse the string as boolean.
///
/// \param value The string.
/// \param parse_value The boolean value of the string.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_Error
*
ParseBoolValue
(
const
std
::
string
&
value
,
bool
*
parsed_value
);
/// Parse the string as integer.
///
/// \param value The string.
/// \param parse_value The integral value of the string.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_Error
*
ParseIntValue
(
const
std
::
string
&
value
,
int
*
parsed_value
);
/// Parse the string as double.
///
/// \param value The string.
/// \param parse_value The double value of the string.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_Error
*
ParseDoubleValue
(
const
std
::
string
&
value
,
double
*
parsed_value
);
/// Return the value of the specified key in a JSON object.
///
/// \param params The JSON object containing the key-value mapping.
/// \param key The key to look up the value in the JSON object.
/// \param value Returns the value.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_Error
*
GetParameterValue
(
triton
::
common
::
TritonJson
::
Value
&
params
,
const
std
::
string
&
key
,
std
::
string
*
value
);
/// Return the Triton server data type of the data type string specified
/// in model config JSON.
///
/// \param data_type_str The string representation of the data type.
/// \return the Triton server data type.
TRITONSERVER_DataType
ModelConfigDataTypeToTritonServerDataType
(
const
std
::
string
&
data_type_str
);
/// Try to parse the requested parameter.
///
/// \param params The param in model config
/// \param mkey Key in the model config.
/// \param value The parsed string value.
/// \param default_value Default value to use when key is not found.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_Error
*
TryParseModelStringParameter
(
triton
::
common
::
TritonJson
::
Value
&
params
,
const
std
::
string
&
mkey
,
std
::
string
*
value
,
const
std
::
string
&
default_value
);
/// Try to parse the requested parameter.
///
/// \param params The param in model config
/// \param mkey Key in the model config.
/// \param value The parsed int value.
/// \param default_value Default value to use when key is not found.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_Error
*
TryParseModelStringParameter
(
triton
::
common
::
TritonJson
::
Value
&
params
,
const
std
::
string
&
mkey
,
int
*
value
,
const
int
&
default_value
);
/// Try to parse the requested parameter.
///
/// \param params The param in model config
/// \param mkey Key in the model config.
/// \param value The parsed bool value.
/// \param default_value Default value to use when key is not found.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_Error
*
TryParseModelStringParameter
(
triton
::
common
::
TritonJson
::
Value
&
params
,
const
std
::
string
&
mkey
,
bool
*
value
,
const
bool
&
default_value
);
/// Try to parse the requested parameter.
///
/// \param params The param in model config
/// \param mkey Key in the model config.
/// \param value The parsed uint64 value.
/// \param default_value Default value to use when key is not found.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_Error
*
TryParseModelStringParameter
(
triton
::
common
::
TritonJson
::
Value
&
params
,
const
std
::
string
&
mkey
,
uint64_t
*
value
,
const
uint64_t
&
default_value
);
/// Get a string representation of a tensor buffer.
///
/// \param str Returns the string.
/// \param buffer The base pointer to the tensor buffer.
/// \param buffer_byte_size The size of the buffer in bytes.
/// \param datatype The type of the tensor
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_Error
*
BufferAsTypedString
(
std
::
string
&
str
,
const
char
*
buffer
,
size_t
buffer_byte_size
,
TRITONSERVER_DataType
datatype
);
/// Get the ID of the request as a string formatted for logging.
///
/// \param request Request of which to get the ID.
/// \return a formatted string for logging the request ID.
std
::
string
GetRequestId
(
TRITONBACKEND_Request
*
request
);
}}
// namespace triton::backend
3rdparty/backend-r22.12/include/triton/backend/backend_input_collector.h
0 → 100644
View file @
b30f3cdb
// Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <list>
#include <memory>
#include <string>
#include <vector>
#include "triton/backend/backend_common.h"
#include "triton/backend/backend_memory.h"
#include "triton/common/async_work_queue.h"
#include "triton/common/sync_queue.h"
#include "triton/core/tritonbackend.h"
#ifdef TRITON_ENABLE_GPU
#include <cuda_runtime_api.h>
#endif // TRITON_ENABLE_GPU
namespace
triton
{
namespace
backend
{
#ifndef TRITON_ENABLE_GPU
using
cudaStream_t
=
void
*
;
using
cudaEvent_t
=
void
*
;
#endif // !TRITON_ENABLE_GPU
//
// BackendInputCollector
//
class
BackendInputCollector
{
public:
// The caller can optionally provide 'event' for internal synchronization
// instead of using 'stream'. If 'host_policy_name' is provided, it must be
// valid for the lifetime of the collector
explicit
BackendInputCollector
(
TRITONBACKEND_Request
**
requests
,
const
uint32_t
request_count
,
std
::
vector
<
TRITONBACKEND_Response
*>*
responses
,
TRITONBACKEND_MemoryManager
*
memory_manager
,
const
bool
pinned_enabled
,
cudaStream_t
stream
,
cudaEvent_t
event
=
nullptr
,
cudaEvent_t
buffer_ready_event
=
nullptr
,
const
size_t
kernel_buffer_threshold
=
0
,
const
char
*
host_policy_name
=
nullptr
,
const
bool
copy_on_stream
=
false
,
const
bool
coalesce_request_input
=
false
)
:
need_sync_
(
false
),
requests_
(
requests
),
request_count_
(
request_count
),
responses_
(
responses
),
memory_manager_
(
memory_manager
),
pinned_enabled_
(
pinned_enabled
),
use_async_cpu_copy_
(
triton
::
common
::
AsyncWorkQueue
::
WorkerCount
()
>
1
),
stream_
(
stream
),
event_
(
event
),
buffer_ready_event_
(
buffer_ready_event
),
kernel_buffer_threshold_
(
kernel_buffer_threshold
),
pending_pinned_byte_size_
(
0
),
pending_pinned_offset_
(
0
),
pending_copy_kernel_buffer_byte_size_
(
0
),
pending_copy_kernel_buffer_offset_
(
0
),
pending_copy_kernel_input_buffer_counts_
(
0
),
async_task_count_
(
0
),
host_policy_cstr_
(
host_policy_name
),
copy_on_stream_
(
copy_on_stream
),
coalesce_request_input_
(
coalesce_request_input
)
{
}
~
BackendInputCollector
()
=
default
;
// Process all requests for a named input tensor and return the
// concatenated values of those requests in a single contiguous
// buffer. This overload of the function can avoid data copy if the
// tensor values are already contiguous and the caller doesn't
// provide a destination 'buffer'.
//
// 'buffer' is used to determine whether the input should be placed at the
// 'buffer' provided by the caller. If 'buffer' == nullptr, the returned
// buffer will be managed by the BackendInputCollector object and
// has the same lifecycle as the BackendInputCollector object.
// 'buffer_byte_size' is the byte size of 'buffer' if it is not nullptr.
// 'allowed_input_types' is the ordered list of the memory type and id pairs
// that the returned buffer can be. It must only contain the memory type
// and id of 'buffer' if 'buffer' is not nullptr.
// 'dst_buffer' returns the contiguous buffer of the input tensor.
// 'dst_buffer_byte_size' the byte size of 'dst_buffer'.
// 'dst_memory_type' returns the memory type of 'dst_buffer'.
// 'dst_memory_type_id' returns the memory type id of 'dst_buffer'.
TRITONSERVER_Error
*
ProcessTensor
(
const
char
*
input_name
,
char
*
buffer
,
const
size_t
buffer_byte_size
,
const
std
::
vector
<
std
::
pair
<
TRITONSERVER_MemoryType
,
int64_t
>>&
allowed_input_types
,
const
char
**
dst_buffer
,
size_t
*
dst_buffer_byte_size
,
TRITONSERVER_MemoryType
*
dst_memory_type
,
int64_t
*
dst_memory_type_id
);
// Process all requests for a named input tensor and return the
// concatenated values of those requests in a single contiguous
// 'buffer'.
//
// 'buffer' The buffer to hold the concatenates tensor value. Must
// be large enough to hold all tensor value.
// 'buffer_byte_size' is the byte size of 'buffer'.
// 'dst_memory_type' The memory type of 'buffer'.
// 'dst_memory_type_id' The memory type id of 'buffer'.
void
ProcessTensor
(
const
char
*
input_name
,
char
*
buffer
,
const
size_t
buffer_byte_size
,
const
TRITONSERVER_MemoryType
memory_type
,
const
int64_t
memory_type_id
);
// Process the batch input and return its shape. Returning error indicates
// that the batch input can't be formed properly and the caller should abort
// the whole batch.
TRITONSERVER_Error
*
BatchInputShape
(
const
BatchInput
&
batch_input
,
std
::
vector
<
int64_t
>*
shape
);
// Process the batch input and derive its value into 'buffer'. Returning
// error indicates that the batch input can't be formed properly and
// the caller should abort the whole batch.
// 'buffer' is used to determine whether the input should be placed at the
// 'buffer' provided by the caller. If 'buffer' == nullptr, the returned
// buffer will be managed by the BackendInputCollector object and
// has the same lifecycle as the BackendInputCollector object.
// 'buffer_byte_size' is the byte size of 'buffer' if it is not nullptr.
// 'allowed_input_types' is the ordered list of the memory type and id pairs
// that the returned buffer can be. It must only contain the memory type
// and id of 'buffer' if it is not nullptr.
// 'dst_buffer' returns the contiguous buffer of the input tensor.
// 'dst_memory_type' returns the memory type of 'dst_buffer'.
// 'dst_memory_type_id' returns the memory type id of 'dst_buffer'.
TRITONSERVER_Error
*
ProcessBatchInput
(
const
BatchInput
&
batch_input
,
char
*
buffer
,
const
size_t
buffer_byte_size
,
const
std
::
vector
<
std
::
pair
<
TRITONSERVER_MemoryType
,
int64_t
>>&
allowed_input_types
,
const
char
**
dst_buffer
,
size_t
*
dst_buffer_byte_size
,
TRITONSERVER_MemoryType
*
dst_memory_type
,
int64_t
*
dst_memory_type_id
);
// Finalize processing of all requests for all input tensors. Return
// true if cudaMemcpyAsync is called, and the caller should call
// cudaStreamSynchronize (or cudaEventSynchronize on 'event') before
// using the data.
bool
Finalize
();
private:
struct
ContiguousBuffer
{
ContiguousBuffer
()
:
start_request_idx_
(
0
),
end_request_idx_
(
0
)
{}
MemoryDesc
memory_desc_
;
size_t
start_request_idx_
;
size_t
end_request_idx_
;
};
class
InputIterator
{
public:
InputIterator
(
TRITONBACKEND_Request
**
requests
,
const
uint32_t
request_count
,
std
::
vector
<
TRITONBACKEND_Response
*>*
responses
,
const
char
*
input_name
,
const
char
*
host_policy_name
,
const
bool
coalesce_request_input
);
// Return false if iterator reaches the end of inputs, 'input' is not set.
bool
GetNextContiguousInput
(
ContiguousBuffer
*
input
);
private:
TRITONBACKEND_Request
**
requests_
;
const
uint32_t
request_count_
;
std
::
vector
<
TRITONBACKEND_Response
*>*
responses_
;
const
char
*
input_name_
;
const
char
*
host_policy_
;
const
bool
coalesce_request_input_
;
TRITONBACKEND_Input
*
curr_input_
;
size_t
curr_request_idx_
;
size_t
curr_buffer_idx_
;
uint32_t
curr_buffer_cnt_
;
bool
reach_end_
;
};
// Return whether the entire input is in a contiguous buffer. If returns true,
// the properties of the contiguous input buffer will also be returned.
// Otherwise, only 'buffer_byte_size' will be set and return the total byte
// size of the input.
bool
GetInputBufferIfContiguous
(
const
char
*
input_name
,
const
char
**
buffer
,
size_t
*
buffer_byte_size
,
TRITONSERVER_MemoryType
*
memory_type
,
int64_t
*
memory_type_id
);
bool
FlushPendingPinned
(
char
*
tensor_buffer
,
const
size_t
tensor_buffer_byte_size
,
const
TRITONSERVER_MemoryType
tensor_memory_type
,
const
int64_t
tensor_memory_type_id
);
bool
FlushPendingCopyKernel
(
char
*
tensor_buffer
,
const
size_t
tensor_buffer_byte_size
,
const
TRITONSERVER_MemoryType
tensor_memory_type
,
const
int64_t
tensor_memory_type_id
);
TRITONSERVER_Error
*
LaunchCopyKernel
(
char
*
tensor_buffer
,
const
size_t
tensor_buffer_byte_size
,
const
TRITONSERVER_MemoryType
tensor_memory_type
,
const
int64_t
tensor_memory_type_id
);
bool
SetInputTensor
(
const
char
*
input_name
,
const
ContiguousBuffer
&
input
,
char
*
tensor_buffer
,
const
size_t
tensor_buffer_byte_size
,
const
TRITONSERVER_MemoryType
tensor_memory_type
,
const
int64_t
tensor_memory_type_id
,
const
size_t
tensor_buffer_offset
,
const
TRITONSERVER_MemoryType
use_pinned_memory_type
,
const
bool
use_kernel
,
const
bool
wait_buffer
);
template
<
typename
T
>
TRITONSERVER_Error
*
SetElementCount
(
const
std
::
string
&
source_input
,
char
*
buffer
,
const
size_t
buffer_byte_size
);
template
<
typename
T
>
TRITONSERVER_Error
*
SetAccumulatedElementCount
(
const
std
::
string
&
source_input
,
char
*
buffer
,
const
size_t
buffer_byte_size
);
template
<
typename
T
>
TRITONSERVER_Error
*
SetBatchItemShape
(
const
std
::
string
&
source_input
,
char
*
buffer
,
const
size_t
buffer_byte_size
);
bool
need_sync_
;
TRITONBACKEND_Request
**
requests_
;
const
uint32_t
request_count_
;
std
::
vector
<
TRITONBACKEND_Response
*>*
responses_
;
TRITONBACKEND_MemoryManager
*
memory_manager_
;
const
bool
pinned_enabled_
;
const
bool
use_async_cpu_copy_
;
cudaStream_t
stream_
;
cudaEvent_t
event_
;
cudaEvent_t
buffer_ready_event_
;
const
size_t
kernel_buffer_threshold_
;
size_t
pending_pinned_byte_size_
;
size_t
pending_pinned_offset_
;
std
::
list
<
ContiguousBuffer
>
pending_pinned_input_buffers_
;
// managed memories that need to live over the lifetime of this
// BackendInputCollector object.
std
::
list
<
std
::
unique_ptr
<
BackendMemory
>>
in_use_memories_
;
size_t
pending_copy_kernel_buffer_byte_size_
;
size_t
pending_copy_kernel_buffer_offset_
;
size_t
pending_copy_kernel_input_buffer_counts_
;
std
::
list
<
ContiguousBuffer
>
pending_copy_kernel_input_buffers_
;
std
::
vector
<
std
::
unique_ptr
<
std
::
vector
<
int8_t
*>>>
input_ptr_buffer_host_
;
std
::
vector
<
std
::
unique_ptr
<
std
::
vector
<
size_t
>>>
byte_size_buffer_host_
;
std
::
vector
<
std
::
unique_ptr
<
std
::
vector
<
size_t
>>>
byte_size_offset_buffer_host_
;
// Pinned memory buffers and the corresponding request_inputs where
// the final copy to the tensor is deferred until Finalize() after
// waiting for all in-flight copies.
struct
DeferredPinned
{
DeferredPinned
(
char
*
pinned_memory
,
const
size_t
pinned_memory_size
,
char
*
tensor_buffer
,
const
size_t
tensor_buffer_offset
,
const
TRITONSERVER_MemoryType
tensor_memory_type
,
const
int64_t
tensor_memory_id
,
std
::
list
<
ContiguousBuffer
>&&
request_buffers
,
std
::
vector
<
TRITONBACKEND_Response
*>*
responses
)
:
finalized_
(
false
),
pinned_memory_
(
pinned_memory
),
pinned_memory_size_
(
pinned_memory_size
),
tensor_buffer_
(
tensor_buffer
),
tensor_buffer_offset_
(
tensor_buffer_offset
),
tensor_memory_type_
(
tensor_memory_type
),
tensor_memory_id_
(
tensor_memory_id
),
requests_
(
std
::
move
(
request_buffers
)),
responses_
(
responses
)
{
}
bool
Finalize
(
cudaStream_t
stream
);
bool
finalized_
;
// Holding reference to the pinned memory buffer, which is managed
// by BackendInputCollector as 'pinned_memory'
char
*
pinned_memory_
;
const
size_t
pinned_memory_size_
;
char
*
tensor_buffer_
;
const
size_t
tensor_buffer_offset_
;
const
TRITONSERVER_MemoryType
tensor_memory_type_
;
const
int64_t
tensor_memory_id_
;
std
::
list
<
ContiguousBuffer
>
requests_
;
std
::
vector
<
TRITONBACKEND_Response
*>*
responses_
;
};
std
::
list
<
DeferredPinned
>
deferred_pinned_
;
// FIXME use future to maintain an issue-order queue to drop task count
triton
::
common
::
SyncQueue
<
bool
>
completion_queue_
;
size_t
async_task_count_
;
const
char
*
host_policy_cstr_
;
const
bool
copy_on_stream_
;
const
bool
coalesce_request_input_
;
};
}}
// namespace triton::backend
3rdparty/backend-r22.12/include/triton/backend/backend_memory.h
0 → 100644
View file @
b30f3cdb
// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <string>
#include <vector>
#include "triton/core/tritonbackend.h"
#include "triton/core/tritonserver.h"
namespace
triton
{
namespace
backend
{
// Colletion of common properties that describes a buffer in Triton
struct
MemoryDesc
{
MemoryDesc
()
:
buffer_
(
nullptr
),
byte_size_
(
0
),
memory_type_
(
TRITONSERVER_MEMORY_CPU
),
memory_type_id_
(
0
)
{
}
MemoryDesc
(
const
char
*
buffer
,
size_t
byte_size
,
TRITONSERVER_MemoryType
memory_type
,
int64_t
memory_type_id
)
:
buffer_
(
buffer
),
byte_size_
(
byte_size
),
memory_type_
(
memory_type
),
memory_type_id_
(
memory_type_id
)
{
}
const
char
*
buffer_
;
size_t
byte_size_
;
TRITONSERVER_MemoryType
memory_type_
;
int64_t
memory_type_id_
;
};
//
// BackendMemory
//
// Utility class for allocating and deallocating memory using both
// TRITONBACKEND_MemoryManager and direct GPU and CPU malloc/free.
//
class
BackendMemory
{
public:
enum
class
AllocationType
{
CPU
,
CPU_PINNED
,
GPU
,
CPU_PINNED_POOL
,
GPU_POOL
};
// Allocate a contiguous block of 'alloc_type' memory. 'mem'
// returns the pointer to the allocated memory.
//
// CPU, CPU_PINNED_POOL and GPU_POOL are allocated using
// TRITONBACKEND_MemoryManagerAllocate. Note that CPU_PINNED and GPU
// allocations can be much slower than the POOL variants.
//
// Two error codes have specific interpretations for this function:
//
// TRITONSERVER_ERROR_UNSUPPORTED: Indicates that function is
// incapable of allocating the requested memory type and memory
// type ID. Requests for the memory type and ID will always fail
// no matter 'byte_size' of the request.
//
// TRITONSERVER_ERROR_UNAVAILABLE: Indicates that function can
// allocate the memory type and ID but that currently it cannot
// allocate a contiguous block of memory of the requested
// 'byte_size'.
static
TRITONSERVER_Error
*
Create
(
TRITONBACKEND_MemoryManager
*
manager
,
const
AllocationType
alloc_type
,
const
int64_t
memory_type_id
,
const
size_t
byte_size
,
BackendMemory
**
mem
);
// Allocate a contiguous block of memory by attempting the
// allocation using 'alloc_types' in order until one is successful.
// See BackendMemory::Create() above for details.
static
TRITONSERVER_Error
*
Create
(
TRITONBACKEND_MemoryManager
*
manager
,
const
std
::
vector
<
AllocationType
>&
alloc_types
,
const
int64_t
memory_type_id
,
const
size_t
byte_size
,
BackendMemory
**
mem
);
// Creates a BackendMemory object from a pre-allocated buffer. The buffer
// is not owned by the object created with this function. Hence, for
// proper operation, the lifetime of the buffer should atleast extend till
// the corresponding BackendMemory.
static
TRITONSERVER_Error
*
Create
(
TRITONBACKEND_MemoryManager
*
manager
,
const
AllocationType
alloc_type
,
const
int64_t
memory_type_id
,
void
*
buffer
,
const
size_t
byte_size
,
BackendMemory
**
mem
);
~
BackendMemory
();
AllocationType
AllocType
()
const
{
return
alloctype_
;
}
int64_t
MemoryTypeId
()
const
{
return
memtype_id_
;
}
char
*
MemoryPtr
()
{
return
buffer_
;
}
size_t
ByteSize
()
const
{
return
byte_size_
;
}
TRITONSERVER_MemoryType
MemoryType
()
const
{
return
AllocTypeToMemoryType
(
alloctype_
);
}
static
TRITONSERVER_MemoryType
AllocTypeToMemoryType
(
const
AllocationType
a
);
static
const
char
*
AllocTypeString
(
const
AllocationType
a
);
private:
BackendMemory
(
TRITONBACKEND_MemoryManager
*
manager
,
const
AllocationType
alloctype
,
const
int64_t
memtype_id
,
char
*
buffer
,
const
size_t
byte_size
,
const
bool
owns_buffer
=
true
)
:
manager_
(
manager
),
alloctype_
(
alloctype
),
memtype_id_
(
memtype_id
),
buffer_
(
buffer
),
byte_size_
(
byte_size
),
owns_buffer_
(
owns_buffer
)
{
}
TRITONBACKEND_MemoryManager
*
manager_
;
AllocationType
alloctype_
;
int64_t
memtype_id_
;
char
*
buffer_
;
size_t
byte_size_
;
bool
owns_buffer_
;
};
}}
// namespace triton::backend
3rdparty/backend-r22.12/include/triton/backend/backend_model.h
0 → 100644
View file @
b30f3cdb
// Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <map>
#include <set>
#include <string>
#include "triton/backend/backend_common.h"
#include "triton/core/tritonbackend.h"
#include "triton/core/tritonserver.h"
namespace
triton
{
namespace
backend
{
//
// BackendModel
//
// Common functionality for a backend model. This class is provided as
// a convenience; backends are not required to use this class.
//
class
BackendModel
{
public:
BackendModel
(
TRITONBACKEND_Model
*
triton_model
,
const
bool
allow_optional
=
false
);
virtual
~
BackendModel
()
=
default
;
// Get the handle to the TRITONBACKEND server hosting this model.
TRITONSERVER_Server
*
TritonServer
()
{
return
triton_server_
;
}
// Get the handle to the memory manager for this model.
TRITONBACKEND_MemoryManager
*
TritonMemoryManager
()
{
return
triton_memory_manager_
;
}
// Get the handle to the TRITONBACKEND model.
TRITONBACKEND_Model
*
TritonModel
()
{
return
triton_model_
;
}
// Get the name and version of the model.
const
std
::
string
&
Name
()
const
{
return
name_
;
}
uint64_t
Version
()
const
{
return
version_
;
}
const
std
::
string
&
RepositoryPath
()
const
{
return
repository_path_
;
}
// The model configuration.
common
::
TritonJson
::
Value
&
ModelConfig
()
{
return
model_config_
;
}
// Sets the updated model configuration to the core.
TRITONSERVER_Error
*
SetModelConfig
();
// Parses information out of the model configuration.
TRITONSERVER_Error
*
ParseModelConfig
();
// Maximum batch size supported by the model. A value of 0
// indicates that the model does not support batching.
int
MaxBatchSize
()
const
{
return
max_batch_size_
;
}
// Set the max batch size for the model. When a backend
// auto-completes a configuration it may set or change the maximum
// batch size.
void
SetMaxBatchSize
(
const
int
b
)
{
max_batch_size_
=
b
;
}
// Does this model support batching in the first dimension?
TRITONSERVER_Error
*
SupportsFirstDimBatching
(
bool
*
supports
);
// Use indirect pinned memory buffer when copying an input or output
// tensor to/from the model.
bool
EnablePinnedInput
()
const
{
return
enable_pinned_input_
;
}
bool
EnablePinnedOutput
()
const
{
return
enable_pinned_output_
;
}
const
std
::
vector
<
BatchInput
>&
BatchInputs
()
const
{
return
batch_inputs_
;
}
const
std
::
vector
<
BatchOutput
>&
BatchOutputs
()
const
{
return
batch_outputs_
;
}
const
BatchOutput
*
FindBatchOutput
(
const
std
::
string
&
output_name
)
const
;
bool
IsInputRagged
(
const
std
::
string
&
input_name
)
const
{
return
(
ragged_inputs_
.
find
(
input_name
)
!=
ragged_inputs_
.
end
());
}
bool
IsInputOptional
(
const
std
::
string
&
input_name
)
const
{
return
(
optional_inputs_
.
find
(
input_name
)
!=
optional_inputs_
.
end
());
}
protected:
TRITONSERVER_Server
*
triton_server_
;
TRITONBACKEND_MemoryManager
*
triton_memory_manager_
;
TRITONBACKEND_Model
*
triton_model_
;
std
::
string
name_
;
uint64_t
version_
;
std
::
string
repository_path_
;
bool
allow_optional_
;
common
::
TritonJson
::
Value
model_config_
;
int
max_batch_size_
;
bool
enable_pinned_input_
;
bool
enable_pinned_output_
;
std
::
vector
<
BatchInput
>
batch_inputs_
;
std
::
vector
<
BatchOutput
>
batch_outputs_
;
std
::
map
<
std
::
string
,
const
BatchOutput
*>
batch_output_map_
;
std
::
set
<
std
::
string
>
ragged_inputs_
;
std
::
set
<
std
::
string
>
optional_inputs_
;
};
//
// BackendModelException
//
// Exception thrown if error occurs while constructing an
// BackendModel.
//
struct
BackendModelException
{
BackendModelException
(
TRITONSERVER_Error
*
err
)
:
err_
(
err
)
{}
TRITONSERVER_Error
*
err_
;
};
#define THROW_IF_BACKEND_MODEL_ERROR(X) \
do { \
TRITONSERVER_Error* tie_err__ = (X); \
if (tie_err__ != nullptr) { \
throw triton::backend::BackendModelException(tie_err__); \
} \
} while (false)
}}
// namespace triton::backend
3rdparty/backend-r22.12/include/triton/backend/backend_model_instance.h
0 → 100644
View file @
b30f3cdb
// Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <string>
#include "triton/core/tritonbackend.h"
#ifdef TRITON_ENABLE_GPU
#include <cuda_runtime_api.h>
#endif // TRITON_ENABLE_GPU
namespace
triton
{
namespace
backend
{
#ifndef TRITON_ENABLE_GPU
using
cudaStream_t
=
void
*
;
#endif // !TRITON_ENABLE_GPU
class
BackendModel
;
//
// BackendModelInstance
//
// Common functionality for a backend model instance. This class is
// provided as a convenience; backends are not required to use this
// class.
//
class
BackendModelInstance
{
public:
BackendModelInstance
(
BackendModel
*
backend_model
,
TRITONBACKEND_ModelInstance
*
triton_model_instance
);
virtual
~
BackendModelInstance
();
// Get the name, kind and device ID of the instance.
const
std
::
string
&
Name
()
const
{
return
name_
;
}
TRITONSERVER_InstanceGroupKind
Kind
()
const
{
return
kind_
;
}
int32_t
DeviceId
()
const
{
return
device_id_
;
}
// Get the handle to the TRITONBACKEND model instance.
TRITONBACKEND_ModelInstance
*
TritonModelInstance
()
{
return
triton_model_instance_
;
}
// Get the BackendModel representing the model that corresponds to
// this instance.
BackendModel
*
Model
()
const
{
return
backend_model_
;
}
// The model configuration 'default_model_filename' value, or the
// value in model configuration 'cc_model_filenames' for the GPU
// targeted by this instance. If neither are specified in the model
// configuration, the return empty string.
const
std
::
string
&
ArtifactFilename
()
const
{
return
artifact_filename_
;
}
// Returns the stream associated with this instance that can be used
// for GPU<->CPU memory transfers. Returns nullptr if GPU support is
// disabled or if this instance is not executing on a GPU.
cudaStream_t
CudaStream
()
{
return
stream_
;
}
const
std
::
string
&
HostPolicyName
()
const
{
return
host_policy_name_
;
}
protected:
BackendModel
*
backend_model_
;
TRITONBACKEND_ModelInstance
*
triton_model_instance_
;
std
::
string
name_
;
TRITONSERVER_InstanceGroupKind
kind_
;
int32_t
device_id_
;
std
::
string
artifact_filename_
;
cudaStream_t
stream_
;
std
::
string
host_policy_name_
;
};
//
// BackendModelInstanceException
//
// Exception thrown if error occurs while constructing an
// BackendModelInstance.
//
struct
BackendModelInstanceException
{
BackendModelInstanceException
(
TRITONSERVER_Error
*
err
)
:
err_
(
err
)
{}
TRITONSERVER_Error
*
err_
;
};
#define THROW_IF_BACKEND_INSTANCE_ERROR(X) \
do { \
TRITONSERVER_Error* tie_err__ = (X); \
if (tie_err__ != nullptr) { \
throw triton::backend::BackendModelInstanceException(tie_err__); \
} \
} while (false)
}}
// namespace triton::backend
Prev
1
2
3
4
5
6
…
8
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment