Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Lmdeploy
Commits
0a21fff9
Commit
0a21fff9
authored
Dec 20, 2023
by
xiabo
Browse files
Adapt to 0.1.0
parent
9484fd1c
Changes
158
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
2598 additions
and
0 deletions
+2598
-0
3rdparty/backend-r22.12/examples/backends/recommended/src/libtriton_recommended.ldscript
...s/backends/recommended/src/libtriton_recommended.ldscript
+30
-0
3rdparty/backend-r22.12/examples/backends/recommended/src/recommended.cc
...d-r22.12/examples/backends/recommended/src/recommended.cc
+750
-0
3rdparty/backend-r22.12/examples/clients/bls_client
3rdparty/backend-r22.12/examples/clients/bls_client
+86
-0
3rdparty/backend-r22.12/examples/clients/minimal_client
3rdparty/backend-r22.12/examples/clients/minimal_client
+92
-0
3rdparty/backend-r22.12/examples/clients/recommended_client
3rdparty/backend-r22.12/examples/clients/recommended_client
+91
-0
3rdparty/backend-r22.12/examples/model_repos/bls_models/addsub_python/1/model.py
.../examples/model_repos/bls_models/addsub_python/1/model.py
+74
-0
3rdparty/backend-r22.12/examples/model_repos/bls_models/addsub_python/config.pbtxt
...xamples/model_repos/bls_models/addsub_python/config.pbtxt
+58
-0
3rdparty/backend-r22.12/examples/model_repos/bls_models/addsub_tf/1/model.savedmodel/saved_model.pb
...os/bls_models/addsub_tf/1/model.savedmodel/saved_model.pb
+0
-0
3rdparty/backend-r22.12/examples/model_repos/bls_models/addsub_tf/config.pbtxt
...12/examples/model_repos/bls_models/addsub_tf/config.pbtxt
+28
-0
3rdparty/backend-r22.12/examples/model_repos/bls_models/bls_fp32/config.pbtxt
....12/examples/model_repos/bls_models/bls_fp32/config.pbtxt
+63
-0
3rdparty/backend-r22.12/examples/model_repos/minimal_models/batching/1/.gitkeep
...2/examples/model_repos/minimal_models/batching/1/.gitkeep
+0
-0
3rdparty/backend-r22.12/examples/model_repos/minimal_models/batching/config.pbtxt
...examples/model_repos/minimal_models/batching/config.pbtxt
+24
-0
3rdparty/backend-r22.12/examples/model_repos/minimal_models/nonbatching/1/.gitkeep
...xamples/model_repos/minimal_models/nonbatching/1/.gitkeep
+0
-0
3rdparty/backend-r22.12/examples/model_repos/minimal_models/nonbatching/config.pbtxt
...mples/model_repos/minimal_models/nonbatching/config.pbtxt
+21
-0
3rdparty/backend-r22.12/examples/model_repos/recommended_models/batching/1/.gitkeep
...amples/model_repos/recommended_models/batching/1/.gitkeep
+0
-0
3rdparty/backend-r22.12/examples/model_repos/recommended_models/batching/config.pbtxt
...ples/model_repos/recommended_models/batching/config.pbtxt
+24
-0
3rdparty/backend-r22.12/include/triton/backend/backend_common.h
...ty/backend-r22.12/include/triton/backend/backend_common.h
+672
-0
3rdparty/backend-r22.12/include/triton/backend/backend_input_collector.h
...d-r22.12/include/triton/backend/backend_input_collector.h
+301
-0
3rdparty/backend-r22.12/include/triton/backend/backend_memory.h
...ty/backend-r22.12/include/triton/backend/backend_memory.h
+138
-0
3rdparty/backend-r22.12/include/triton/backend/backend_model.h
...rty/backend-r22.12/include/triton/backend/backend_model.h
+146
-0
No files found.
Too many changes to show.
To preserve performance only
158 of 158+
files are displayed.
Plain diff
Email patch
3rdparty/backend-r22.12/examples/backends/recommended/src/libtriton_recommended.ldscript
0 → 100644
View file @
0a21fff9
# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
{
global:
TRITONBACKEND_*;
local: *;
};
3rdparty/backend-r22.12/examples/backends/recommended/src/recommended.cc
0 → 100644
View file @
0a21fff9
// Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "triton/backend/backend_common.h"
#include "triton/backend/backend_input_collector.h"
#include "triton/backend/backend_model.h"
#include "triton/backend/backend_model_instance.h"
#include "triton/backend/backend_output_responder.h"
#include "triton/core/tritonbackend.h"
namespace
triton
{
namespace
backend
{
namespace
recommended
{
//
// Backend that demonstrates the TRITONBACKEND API. This backend works
// for any model that has 1 input with any datatype and any shape and
// 1 output with the same shape and datatype as the input. The backend
// supports both batching and non-batching models.
//
// For each batch of requests, the backend returns the input tensor
// value in the output tensor.
//
/////////////
extern
"C"
{
// Triton calls TRITONBACKEND_Initialize when a backend is loaded into
// Triton to allow the backend to create and initialize any state that
// is intended to be shared across all models and model instances that
// use the backend. The backend should also verify version
// compatibility with Triton in this function.
//
TRITONSERVER_Error
*
TRITONBACKEND_Initialize
(
TRITONBACKEND_Backend
*
backend
)
{
const
char
*
cname
;
RETURN_IF_ERROR
(
TRITONBACKEND_BackendName
(
backend
,
&
cname
));
std
::
string
name
(
cname
);
LOG_MESSAGE
(
TRITONSERVER_LOG_INFO
,
(
std
::
string
(
"TRITONBACKEND_Initialize: "
)
+
name
).
c_str
());
// Check the backend API version that Triton supports vs. what this
// backend was compiled against. Make sure that the Triton major
// version is the same and the minor version is >= what this backend
// uses.
uint32_t
api_version_major
,
api_version_minor
;
RETURN_IF_ERROR
(
TRITONBACKEND_ApiVersion
(
&
api_version_major
,
&
api_version_minor
));
LOG_MESSAGE
(
TRITONSERVER_LOG_INFO
,
(
std
::
string
(
"Triton TRITONBACKEND API version: "
)
+
std
::
to_string
(
api_version_major
)
+
"."
+
std
::
to_string
(
api_version_minor
))
.
c_str
());
LOG_MESSAGE
(
TRITONSERVER_LOG_INFO
,
(
std
::
string
(
"'"
)
+
name
+
"' TRITONBACKEND API version: "
+
std
::
to_string
(
TRITONBACKEND_API_VERSION_MAJOR
)
+
"."
+
std
::
to_string
(
TRITONBACKEND_API_VERSION_MINOR
))
.
c_str
());
if
((
api_version_major
!=
TRITONBACKEND_API_VERSION_MAJOR
)
||
(
api_version_minor
<
TRITONBACKEND_API_VERSION_MINOR
))
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_UNSUPPORTED
,
"triton backend API version does not support this backend"
);
}
// The backend configuration may contain information needed by the
// backend, such as tritonserver command-line arguments. This
// backend doesn't use any such configuration but for this example
// print whatever is available.
TRITONSERVER_Message
*
backend_config_message
;
RETURN_IF_ERROR
(
TRITONBACKEND_BackendConfig
(
backend
,
&
backend_config_message
));
const
char
*
buffer
;
size_t
byte_size
;
RETURN_IF_ERROR
(
TRITONSERVER_MessageSerializeToJson
(
backend_config_message
,
&
buffer
,
&
byte_size
));
LOG_MESSAGE
(
TRITONSERVER_LOG_INFO
,
(
std
::
string
(
"backend configuration:
\n
"
)
+
buffer
).
c_str
());
// This backend does not require any "global" state but as an
// example create a string to demonstrate.
std
::
string
*
state
=
new
std
::
string
(
"backend state"
);
RETURN_IF_ERROR
(
TRITONBACKEND_BackendSetState
(
backend
,
reinterpret_cast
<
void
*>
(
state
)));
return
nullptr
;
// success
}
// Triton calls TRITONBACKEND_Finalize when a backend is no longer
// needed.
//
TRITONSERVER_Error
*
TRITONBACKEND_Finalize
(
TRITONBACKEND_Backend
*
backend
)
{
// Delete the "global" state associated with the backend.
void
*
vstate
;
RETURN_IF_ERROR
(
TRITONBACKEND_BackendState
(
backend
,
&
vstate
));
std
::
string
*
state
=
reinterpret_cast
<
std
::
string
*>
(
vstate
);
LOG_MESSAGE
(
TRITONSERVER_LOG_INFO
,
(
std
::
string
(
"TRITONBACKEND_Finalize: state is '"
)
+
*
state
+
"'"
)
.
c_str
());
delete
state
;
return
nullptr
;
// success
}
}
// extern "C"
/////////////
//
// ModelState
//
// State associated with a model that is using this backend. An object
// of this class is created and associated with each
// TRITONBACKEND_Model. ModelState is derived from BackendModel class
// provided in the backend utilities that provides many common
// functions.
//
class
ModelState
:
public
BackendModel
{
public:
static
TRITONSERVER_Error
*
Create
(
TRITONBACKEND_Model
*
triton_model
,
ModelState
**
state
);
virtual
~
ModelState
()
=
default
;
// Name of the input and output tensor
const
std
::
string
&
InputTensorName
()
const
{
return
input_name_
;
}
const
std
::
string
&
OutputTensorName
()
const
{
return
output_name_
;
}
// Datatype of the input and output tensor
TRITONSERVER_DataType
TensorDataType
()
const
{
return
datatype_
;
}
// Shape of the input and output tensor as given in the model
// configuration file. This shape will not include the batch
// dimension (if the model has one).
const
std
::
vector
<
int64_t
>&
TensorNonBatchShape
()
const
{
return
nb_shape_
;
}
// Shape of the input and output tensor, including the batch
// dimension (if the model has one). This method cannot be called
// until the model is completely loaded and initialized, including
// all instances of the model. In practice, this means that backend
// should only call it in TRITONBACKEND_ModelInstanceExecute.
TRITONSERVER_Error
*
TensorShape
(
std
::
vector
<
int64_t
>&
shape
);
// Validate that this model is supported by this backend.
TRITONSERVER_Error
*
ValidateModelConfig
();
private:
ModelState
(
TRITONBACKEND_Model
*
triton_model
);
std
::
string
input_name_
;
std
::
string
output_name_
;
TRITONSERVER_DataType
datatype_
;
bool
shape_initialized_
;
std
::
vector
<
int64_t
>
nb_shape_
;
std
::
vector
<
int64_t
>
shape_
;
};
ModelState
::
ModelState
(
TRITONBACKEND_Model
*
triton_model
)
:
BackendModel
(
triton_model
),
shape_initialized_
(
false
)
{
// Validate that the model's configuration matches what is supported
// by this backend.
THROW_IF_BACKEND_MODEL_ERROR
(
ValidateModelConfig
());
}
TRITONSERVER_Error
*
ModelState
::
Create
(
TRITONBACKEND_Model
*
triton_model
,
ModelState
**
state
)
{
try
{
*
state
=
new
ModelState
(
triton_model
);
}
catch
(
const
BackendModelException
&
ex
)
{
RETURN_ERROR_IF_TRUE
(
ex
.
err_
==
nullptr
,
TRITONSERVER_ERROR_INTERNAL
,
std
::
string
(
"unexpected nullptr in BackendModelException"
));
RETURN_IF_ERROR
(
ex
.
err_
);
}
return
nullptr
;
// success
}
TRITONSERVER_Error
*
ModelState
::
TensorShape
(
std
::
vector
<
int64_t
>&
shape
)
{
// This backend supports models that batch along the first dimension
// and those that don't batch. For non-batch models the output shape
// will be the shape from the model configuration. For batch models
// the output shape will be the shape from the model configuration
// prepended with [ -1 ] to represent the batch dimension. The
// backend "responder" utility used below will set the appropriate
// batch dimension value for each response. The shape needs to be
// initialized lazily because the SupportsFirstDimBatching function
// cannot be used until the model is completely loaded.
if
(
!
shape_initialized_
)
{
bool
supports_first_dim_batching
;
RETURN_IF_ERROR
(
SupportsFirstDimBatching
(
&
supports_first_dim_batching
));
if
(
supports_first_dim_batching
)
{
shape_
.
push_back
(
-
1
);
}
shape_
.
insert
(
shape_
.
end
(),
nb_shape_
.
begin
(),
nb_shape_
.
end
());
shape_initialized_
=
true
;
}
shape
=
shape_
;
return
nullptr
;
// success
}
TRITONSERVER_Error
*
ModelState
::
ValidateModelConfig
()
{
// If verbose logging is enabled, dump the model's configuration as
// JSON into the console output.
if
(
TRITONSERVER_LogIsEnabled
(
TRITONSERVER_LOG_VERBOSE
))
{
common
::
TritonJson
::
WriteBuffer
buffer
;
RETURN_IF_ERROR
(
ModelConfig
().
PrettyWrite
(
&
buffer
));
LOG_MESSAGE
(
TRITONSERVER_LOG_VERBOSE
,
(
std
::
string
(
"model configuration:
\n
"
)
+
buffer
.
Contents
()).
c_str
());
}
// ModelConfig is the model configuration as a TritonJson
// object. Use the TritonJson utilities to parse the JSON and
// determine if the configuration is supported by this backend.
common
::
TritonJson
::
Value
inputs
,
outputs
;
RETURN_IF_ERROR
(
ModelConfig
().
MemberAsArray
(
"input"
,
&
inputs
));
RETURN_IF_ERROR
(
ModelConfig
().
MemberAsArray
(
"output"
,
&
outputs
));
// The model must have exactly 1 input and 1 output.
RETURN_ERROR_IF_FALSE
(
inputs
.
ArraySize
()
==
1
,
TRITONSERVER_ERROR_INVALID_ARG
,
std
::
string
(
"model configuration must have 1 input"
));
RETURN_ERROR_IF_FALSE
(
outputs
.
ArraySize
()
==
1
,
TRITONSERVER_ERROR_INVALID_ARG
,
std
::
string
(
"model configuration must have 1 output"
));
common
::
TritonJson
::
Value
input
,
output
;
RETURN_IF_ERROR
(
inputs
.
IndexAsObject
(
0
,
&
input
));
RETURN_IF_ERROR
(
outputs
.
IndexAsObject
(
0
,
&
output
));
// Record the input and output name in the model state.
const
char
*
input_name
;
size_t
input_name_len
;
RETURN_IF_ERROR
(
input
.
MemberAsString
(
"name"
,
&
input_name
,
&
input_name_len
));
input_name_
=
std
::
string
(
input_name
);
const
char
*
output_name
;
size_t
output_name_len
;
RETURN_IF_ERROR
(
output
.
MemberAsString
(
"name"
,
&
output_name
,
&
output_name_len
));
output_name_
=
std
::
string
(
output_name
);
// Input and output must have same datatype
std
::
string
input_dtype
,
output_dtype
;
RETURN_IF_ERROR
(
input
.
MemberAsString
(
"data_type"
,
&
input_dtype
));
RETURN_IF_ERROR
(
output
.
MemberAsString
(
"data_type"
,
&
output_dtype
));
RETURN_ERROR_IF_FALSE
(
input_dtype
==
output_dtype
,
TRITONSERVER_ERROR_INVALID_ARG
,
std
::
string
(
"expected input and output datatype to match, got "
)
+
input_dtype
+
" and "
+
output_dtype
);
datatype_
=
ModelConfigDataTypeToTritonServerDataType
(
input_dtype
);
// Input and output must have same shape. Reshape is not supported
// on either input or output so flag an error is the model
// configuration uses it.
triton
::
common
::
TritonJson
::
Value
reshape
;
RETURN_ERROR_IF_TRUE
(
input
.
Find
(
"reshape"
,
&
reshape
),
TRITONSERVER_ERROR_UNSUPPORTED
,
std
::
string
(
"reshape not supported for input tensor"
));
RETURN_ERROR_IF_TRUE
(
output
.
Find
(
"reshape"
,
&
reshape
),
TRITONSERVER_ERROR_UNSUPPORTED
,
std
::
string
(
"reshape not supported for output tensor"
));
std
::
vector
<
int64_t
>
input_shape
,
output_shape
;
RETURN_IF_ERROR
(
backend
::
ParseShape
(
input
,
"dims"
,
&
input_shape
));
RETURN_IF_ERROR
(
backend
::
ParseShape
(
output
,
"dims"
,
&
output_shape
));
RETURN_ERROR_IF_FALSE
(
input_shape
==
output_shape
,
TRITONSERVER_ERROR_INVALID_ARG
,
std
::
string
(
"expected input and output shape to match, got "
)
+
backend
::
ShapeToString
(
input_shape
)
+
" and "
+
backend
::
ShapeToString
(
output_shape
));
nb_shape_
=
input_shape
;
return
nullptr
;
// success
}
extern
"C"
{
// Triton calls TRITONBACKEND_ModelInitialize when a model is loaded
// to allow the backend to create any state associated with the model,
// and to also examine the model configuration to determine if the
// configuration is suitable for the backend. Any errors reported by
// this function will prevent the model from loading.
//
TRITONSERVER_Error
*
TRITONBACKEND_ModelInitialize
(
TRITONBACKEND_Model
*
model
)
{
// Create a ModelState object and associate it with the
// TRITONBACKEND_Model. If anything goes wrong with initialization
// of the model state then an error is returned and Triton will fail
// to load the model.
ModelState
*
model_state
;
RETURN_IF_ERROR
(
ModelState
::
Create
(
model
,
&
model_state
));
RETURN_IF_ERROR
(
TRITONBACKEND_ModelSetState
(
model
,
reinterpret_cast
<
void
*>
(
model_state
)));
return
nullptr
;
// success
}
// Triton calls TRITONBACKEND_ModelFinalize when a model is no longer
// needed. The backend should cleanup any state associated with the
// model. This function will not be called until all model instances
// of the model have been finalized.
//
TRITONSERVER_Error
*
TRITONBACKEND_ModelFinalize
(
TRITONBACKEND_Model
*
model
)
{
void
*
vstate
;
RETURN_IF_ERROR
(
TRITONBACKEND_ModelState
(
model
,
&
vstate
));
ModelState
*
model_state
=
reinterpret_cast
<
ModelState
*>
(
vstate
);
delete
model_state
;
return
nullptr
;
// success
}
}
// extern "C"
/////////////
//
// ModelInstanceState
//
// State associated with a model instance. An object of this class is
// created and associated with each
// TRITONBACKEND_ModelInstance. ModelInstanceState is derived from
// BackendModelInstance class provided in the backend utilities that
// provides many common functions.
//
class
ModelInstanceState
:
public
BackendModelInstance
{
public:
static
TRITONSERVER_Error
*
Create
(
ModelState
*
model_state
,
TRITONBACKEND_ModelInstance
*
triton_model_instance
,
ModelInstanceState
**
state
);
virtual
~
ModelInstanceState
()
=
default
;
// Get the state of the model that corresponds to this instance.
ModelState
*
StateForModel
()
const
{
return
model_state_
;
}
private:
ModelInstanceState
(
ModelState
*
model_state
,
TRITONBACKEND_ModelInstance
*
triton_model_instance
)
:
BackendModelInstance
(
model_state
,
triton_model_instance
),
model_state_
(
model_state
)
{
}
ModelState
*
model_state_
;
};
TRITONSERVER_Error
*
ModelInstanceState
::
Create
(
ModelState
*
model_state
,
TRITONBACKEND_ModelInstance
*
triton_model_instance
,
ModelInstanceState
**
state
)
{
try
{
*
state
=
new
ModelInstanceState
(
model_state
,
triton_model_instance
);
}
catch
(
const
BackendModelInstanceException
&
ex
)
{
RETURN_ERROR_IF_TRUE
(
ex
.
err_
==
nullptr
,
TRITONSERVER_ERROR_INTERNAL
,
std
::
string
(
"unexpected nullptr in BackendModelInstanceException"
));
RETURN_IF_ERROR
(
ex
.
err_
);
}
return
nullptr
;
// success
}
extern
"C"
{
// Triton calls TRITONBACKEND_ModelInstanceInitialize when a model
// instance is created to allow the backend to initialize any state
// associated with the instance.
//
TRITONSERVER_Error
*
TRITONBACKEND_ModelInstanceInitialize
(
TRITONBACKEND_ModelInstance
*
instance
)
{
// Get the model state associated with this instance's model.
TRITONBACKEND_Model
*
model
;
RETURN_IF_ERROR
(
TRITONBACKEND_ModelInstanceModel
(
instance
,
&
model
));
void
*
vmodelstate
;
RETURN_IF_ERROR
(
TRITONBACKEND_ModelState
(
model
,
&
vmodelstate
));
ModelState
*
model_state
=
reinterpret_cast
<
ModelState
*>
(
vmodelstate
);
// Create a ModelInstanceState object and associate it with the
// TRITONBACKEND_ModelInstance.
ModelInstanceState
*
instance_state
;
RETURN_IF_ERROR
(
ModelInstanceState
::
Create
(
model_state
,
instance
,
&
instance_state
));
RETURN_IF_ERROR
(
TRITONBACKEND_ModelInstanceSetState
(
instance
,
reinterpret_cast
<
void
*>
(
instance_state
)));
return
nullptr
;
// success
}
// Triton calls TRITONBACKEND_ModelInstanceFinalize when a model
// instance is no longer needed. The backend should cleanup any state
// associated with the model instance.
//
TRITONSERVER_Error
*
TRITONBACKEND_ModelInstanceFinalize
(
TRITONBACKEND_ModelInstance
*
instance
)
{
void
*
vstate
;
RETURN_IF_ERROR
(
TRITONBACKEND_ModelInstanceState
(
instance
,
&
vstate
));
ModelInstanceState
*
instance_state
=
reinterpret_cast
<
ModelInstanceState
*>
(
vstate
);
delete
instance_state
;
return
nullptr
;
// success
}
}
// extern "C"
/////////////
extern
"C"
{
// When Triton calls TRITONBACKEND_ModelInstanceExecute it is required
// that a backend create a response for each request in the batch. A
// response may be the output tensors required for that request or may
// be an error that is returned in the response.
//
TRITONSERVER_Error
*
TRITONBACKEND_ModelInstanceExecute
(
TRITONBACKEND_ModelInstance
*
instance
,
TRITONBACKEND_Request
**
requests
,
const
uint32_t
request_count
)
{
// Collect various timestamps during the execution of this batch or
// requests. These values are reported below before returning from
// the function.
uint64_t
exec_start_ns
=
0
;
SET_TIMESTAMP
(
exec_start_ns
);
// Triton will not call this function simultaneously for the same
// 'instance'. But since this backend could be used by multiple
// instances from multiple models the implementation needs to handle
// multiple calls to this function at the same time (with different
// 'instance' objects). Best practice for a high-performance
// implementation is to avoid introducing mutex/lock and instead use
// only function-local and model-instance-specific state.
ModelInstanceState
*
instance_state
;
RETURN_IF_ERROR
(
TRITONBACKEND_ModelInstanceState
(
instance
,
reinterpret_cast
<
void
**>
(
&
instance_state
)));
ModelState
*
model_state
=
instance_state
->
StateForModel
();
// 'responses' is initialized as a parallel array to 'requests',
// with one TRITONBACKEND_Response object for each
// TRITONBACKEND_Request object. If something goes wrong while
// creating these response objects, the backend simply returns an
// error from TRITONBACKEND_ModelInstanceExecute, indicating to
// Triton that this backend did not create or send any responses and
// so it is up to Triton to create and send an appropriate error
// response for each request. RETURN_IF_ERROR is one of several
// useful macros for error handling that can be found in
// backend_common.h.
std
::
vector
<
TRITONBACKEND_Response
*>
responses
;
responses
.
reserve
(
request_count
);
for
(
uint32_t
r
=
0
;
r
<
request_count
;
++
r
)
{
TRITONBACKEND_Request
*
request
=
requests
[
r
];
TRITONBACKEND_Response
*
response
;
RETURN_IF_ERROR
(
TRITONBACKEND_ResponseNew
(
&
response
,
request
));
responses
.
push_back
(
response
);
}
// At this point, the backend takes ownership of 'requests', which
// means that it is responsible for sending a response for every
// request. From here, even if something goes wrong in processing,
// the backend must return 'nullptr' from this function to indicate
// success. Any errors and failures must be communicated via the
// response objects.
//
// To simplify error handling, the backend utilities manage
// 'responses' in a specific way and it is recommended that backends
// follow this same pattern. When an error is detected in the
// processing of a request, an appropriate error response is sent
// and the corresponding TRITONBACKEND_Response object within
// 'responses' is set to nullptr to indicate that the
// request/response has already been handled and no futher processing
// should be performed for that request. Even if all responses fail,
// the backend still allows execution to flow to the end of the
// function so that statistics are correctly reported by the calls
// to TRITONBACKEND_ModelInstanceReportStatistics and
// TRITONBACKEND_ModelInstanceReportBatchStatistics.
// RESPOND_AND_SET_NULL_IF_ERROR, and
// RESPOND_ALL_AND_SET_NULL_IF_ERROR are macros from
// backend_common.h that assist in this management of response
// objects.
// The backend could iterate over the 'requests' and process each
// one separately. But for performance reasons it is usually
// preferred to create batched input tensors that are processed
// simultaneously. This is especially true for devices like GPUs
// that are capable of exploiting the large amount parallelism
// exposed by larger data sets.
//
// The backend utilities provide a "collector" to facilitate this
// batching process. The 'collector's ProcessTensor function will
// combine a tensor's value from each request in the batch into a
// single contiguous buffer. The buffer can be provided by the
// backend or 'collector' can create and manage it. In this backend,
// there is not a specific buffer into which the batch should be
// created, so use ProcessTensor arguments that cause collector to
// manage it. ProcessTensor does NOT support TRITONSERVER_TYPE_BYTES
// data type.
BackendInputCollector
collector
(
requests
,
request_count
,
&
responses
,
model_state
->
TritonMemoryManager
(),
false
/* pinned_enabled */
,
nullptr
/* stream*/
);
// To instruct ProcessTensor to "gather" the entire batch of input
// tensors into a single contiguous buffer in CPU memory, set the
// "allowed input types" to be the CPU ones (see tritonserver.h in
// the triton-inference-server/core repo for allowed memory types).
std
::
vector
<
std
::
pair
<
TRITONSERVER_MemoryType
,
int64_t
>>
allowed_input_types
=
{{
TRITONSERVER_MEMORY_CPU_PINNED
,
0
},
{
TRITONSERVER_MEMORY_CPU
,
0
}};
const
char
*
input_buffer
;
size_t
input_buffer_byte_size
;
TRITONSERVER_MemoryType
input_buffer_memory_type
;
int64_t
input_buffer_memory_type_id
;
RESPOND_ALL_AND_SET_NULL_IF_ERROR
(
responses
,
request_count
,
collector
.
ProcessTensor
(
model_state
->
InputTensorName
().
c_str
(),
nullptr
/* existing_buffer */
,
0
/* existing_buffer_byte_size */
,
allowed_input_types
,
&
input_buffer
,
&
input_buffer_byte_size
,
&
input_buffer_memory_type
,
&
input_buffer_memory_type_id
));
// Finalize the collector. If 'true' is returned, 'input_buffer'
// will not be valid until the backend synchronizes the CUDA
// stream or event that was used when creating the collector. For
// this backend, GPU is not supported and so no CUDA sync should
// be needed; so if 'true' is returned simply log an error.
const
bool
need_cuda_input_sync
=
collector
.
Finalize
();
if
(
need_cuda_input_sync
)
{
LOG_MESSAGE
(
TRITONSERVER_LOG_ERROR
,
"'recommended' backend: unexpected CUDA sync required by collector"
);
}
// 'input_buffer' contains the batched input tensor. The backend can
// implement whatever logic is necessary to produce the output
// tensor. This backend simply logs the input tensor value and then
// returns the input tensor value in the output tensor so no actual
// computation is needed.
uint64_t
compute_start_ns
=
0
;
SET_TIMESTAMP
(
compute_start_ns
);
LOG_MESSAGE
(
TRITONSERVER_LOG_INFO
,
(
std
::
string
(
"model "
)
+
model_state
->
Name
()
+
": requests in batch "
+
std
::
to_string
(
request_count
))
.
c_str
());
std
::
string
tstr
;
IGNORE_ERROR
(
BufferAsTypedString
(
tstr
,
input_buffer
,
input_buffer_byte_size
,
model_state
->
TensorDataType
()));
LOG_MESSAGE
(
TRITONSERVER_LOG_INFO
,
(
std
::
string
(
"batched "
+
model_state
->
InputTensorName
()
+
" value: "
)
+
tstr
)
.
c_str
());
const
char
*
output_buffer
=
input_buffer
;
TRITONSERVER_MemoryType
output_buffer_memory_type
=
input_buffer_memory_type
;
int64_t
output_buffer_memory_type_id
=
input_buffer_memory_type_id
;
uint64_t
compute_end_ns
=
0
;
SET_TIMESTAMP
(
compute_end_ns
);
bool
supports_first_dim_batching
;
RESPOND_ALL_AND_SET_NULL_IF_ERROR
(
responses
,
request_count
,
model_state
->
SupportsFirstDimBatching
(
&
supports_first_dim_batching
));
std
::
vector
<
int64_t
>
tensor_shape
;
RESPOND_ALL_AND_SET_NULL_IF_ERROR
(
responses
,
request_count
,
model_state
->
TensorShape
(
tensor_shape
));
// Because the output tensor values are concatenated into a single
// contiguous 'output_buffer', the backend must "scatter" them out
// to the individual response output tensors. The backend utilities
// provide a "responder" to facilitate this scattering process.
// BackendOutputResponder does NOT support TRITONSERVER_TYPE_BYTES
// data type.
// The 'responders's ProcessTensor function will copy the portion of
// 'output_buffer' corresonding to each request's output into the
// response for that request.
BackendOutputResponder
responder
(
requests
,
request_count
,
&
responses
,
model_state
->
TritonMemoryManager
(),
supports_first_dim_batching
,
false
/* pinned_enabled */
,
nullptr
/* stream*/
);
responder
.
ProcessTensor
(
model_state
->
OutputTensorName
().
c_str
(),
model_state
->
TensorDataType
(),
tensor_shape
,
output_buffer
,
output_buffer_memory_type
,
output_buffer_memory_type_id
);
// Finalize the responder. If 'true' is returned, the output
// tensors' data will not be valid until the backend synchronizes
// the CUDA stream or event that was used when creating the
// responder. For this backend, GPU is not supported and so no CUDA
// sync should be needed; so if 'true' is returned simply log an
// error.
const
bool
need_cuda_output_sync
=
responder
.
Finalize
();
if
(
need_cuda_output_sync
)
{
LOG_MESSAGE
(
TRITONSERVER_LOG_ERROR
,
"'recommended' backend: unexpected CUDA sync required by responder"
);
}
// Send all the responses that haven't already been sent because of
// an earlier error.
for
(
auto
&
response
:
responses
)
{
if
(
response
!=
nullptr
)
{
LOG_IF_ERROR
(
TRITONBACKEND_ResponseSend
(
response
,
TRITONSERVER_RESPONSE_COMPLETE_FINAL
,
nullptr
),
"failed to send response"
);
}
}
uint64_t
exec_end_ns
=
0
;
SET_TIMESTAMP
(
exec_end_ns
);
#ifdef TRITON_ENABLE_STATS
// For batch statistics need to know the total batch size of the
// requests. This is not necessarily just the number of requests,
// because if the model supports batching then any request can be a
// batched request itself.
size_t
total_batch_size
=
0
;
if
(
!
supports_first_dim_batching
)
{
total_batch_size
=
request_count
;
}
else
{
for
(
uint32_t
r
=
0
;
r
<
request_count
;
++
r
)
{
auto
&
request
=
requests
[
r
];
TRITONBACKEND_Input
*
input
=
nullptr
;
LOG_IF_ERROR
(
TRITONBACKEND_RequestInputByIndex
(
request
,
0
/* index */
,
&
input
),
"failed getting request input"
);
if
(
input
!=
nullptr
)
{
const
int64_t
*
shape
=
nullptr
;
LOG_IF_ERROR
(
TRITONBACKEND_InputProperties
(
input
,
nullptr
,
nullptr
,
&
shape
,
nullptr
,
nullptr
,
nullptr
),
"failed getting input properties"
);
if
(
shape
!=
nullptr
)
{
total_batch_size
+=
shape
[
0
];
}
}
}
}
#else
(
void
)
exec_start_ns
;
(
void
)
exec_end_ns
;
(
void
)
compute_start_ns
;
(
void
)
compute_end_ns
;
#endif // TRITON_ENABLE_STATS
// Report statistics for each request, and then release the request.
for
(
uint32_t
r
=
0
;
r
<
request_count
;
++
r
)
{
auto
&
request
=
requests
[
r
];
#ifdef TRITON_ENABLE_STATS
LOG_IF_ERROR
(
TRITONBACKEND_ModelInstanceReportStatistics
(
instance_state
->
TritonModelInstance
(),
request
,
(
responses
[
r
]
!=
nullptr
)
/* success */
,
exec_start_ns
,
compute_start_ns
,
compute_end_ns
,
exec_end_ns
),
"failed reporting request statistics"
);
#endif // TRITON_ENABLE_STATS
LOG_IF_ERROR
(
TRITONBACKEND_RequestRelease
(
request
,
TRITONSERVER_REQUEST_RELEASE_ALL
),
"failed releasing request"
);
}
#ifdef TRITON_ENABLE_STATS
// Report batch statistics.
LOG_IF_ERROR
(
TRITONBACKEND_ModelInstanceReportBatchStatistics
(
instance_state
->
TritonModelInstance
(),
total_batch_size
,
exec_start_ns
,
compute_start_ns
,
compute_end_ns
,
exec_end_ns
),
"failed reporting batch request statistics"
);
#endif // TRITON_ENABLE_STATS
return
nullptr
;
// success
}
}
// extern "C"
}}}
// namespace triton::backend::recommended
3rdparty/backend-r22.12/examples/clients/bls_client
0 → 100644
View file @
0a21fff9
#!/usr/bin/python
# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import
sys
import
argparse
import
numpy
as
np
import
tritonhttpclient
as
httpclient
from
tritonclientutils
import
np_to_triton_dtype
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'-u'
,
'--url'
,
type
=
str
,
required
=
False
,
default
=
'localhost:8000'
,
help
=
'Inference server URL. Default is localhost:8000.'
)
FLAGS
=
parser
.
parse_args
()
model_name
=
"bls_fp32"
shape
=
[
16
]
with
httpclient
.
InferenceServerClient
(
url
=
FLAGS
.
url
)
as
client
:
input0_data
=
np
.
random
.
rand
(
*
shape
).
astype
(
np
.
float32
)
input1_data
=
np
.
random
.
rand
(
*
shape
).
astype
(
np
.
float32
)
inputs
=
[
httpclient
.
InferInput
(
"INPUT0"
,
input0_data
.
shape
,
np_to_triton_dtype
(
input0_data
.
dtype
)),
httpclient
.
InferInput
(
"INPUT1"
,
input1_data
.
shape
,
np_to_triton_dtype
(
input1_data
.
dtype
)),
]
inputs
[
0
].
set_data_from_numpy
(
input0_data
)
inputs
[
1
].
set_data_from_numpy
(
input1_data
)
outputs
=
[
httpclient
.
InferRequestedOutput
(
"OUTPUT0"
),
httpclient
.
InferRequestedOutput
(
"OUTPUT1"
),
]
response
=
client
.
infer
(
model_name
,
inputs
,
request_id
=
str
(
1
),
outputs
=
outputs
)
result
=
response
.
get_response
()
output0_data
=
response
.
as_numpy
(
"OUTPUT0"
)
output1_data
=
response
.
as_numpy
(
"OUTPUT1"
)
print
(
"INPUT0 ({}) + INPUT1 ({}) = OUTPUT0 ({})"
.
format
(
input0_data
,
input1_data
,
output0_data
))
print
(
"INPUT0 ({}) - INPUT1 ({}) = OUTPUT1 ({})"
.
format
(
input0_data
,
input1_data
,
output1_data
))
if
not
np
.
allclose
(
input0_data
+
input1_data
,
output0_data
):
print
(
"error: incorrect sum"
)
sys
.
exit
(
1
)
if
not
np
.
allclose
(
input0_data
-
input1_data
,
output1_data
):
print
(
"error: incorrect difference"
)
sys
.
exit
(
1
)
print
(
'
\n
PASS'
)
sys
.
exit
(
0
)
3rdparty/backend-r22.12/examples/clients/minimal_client
0 → 100644
View file @
0a21fff9
#!/usr/bin/env python
# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import
argparse
import
numpy
as
np
import
tritonclient.http
as
httpclient
from
tritonclient.utils
import
InferenceServerException
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'-u'
,
'--url'
,
type
=
str
,
required
=
False
,
default
=
'localhost:8000'
,
help
=
'Inference server URL. Default is localhost:8000.'
)
FLAGS
=
parser
.
parse_args
()
# For the HTTP client, need to specify large enough concurrency to
# issue all the inference requests to the server in parallel. For
# this example we want to be able to send 2 requests concurrently.
try
:
concurrent_request_count
=
2
triton_client
=
httpclient
.
InferenceServerClient
(
url
=
FLAGS
.
url
,
concurrency
=
concurrent_request_count
)
except
Exception
as
e
:
print
(
"channel creation failed: "
+
str
(
e
))
sys
.
exit
(
1
)
# First send a single request to the nonbatching model.
print
(
'========='
)
input0_data
=
np
.
array
([
1
,
2
,
3
,
4
],
dtype
=
np
.
int32
)
print
(
'Sending request to nonbatching model: IN0 = {}'
.
format
(
input0_data
))
inputs
=
[
httpclient
.
InferInput
(
'IN0'
,
[
4
],
"INT32"
)
]
inputs
[
0
].
set_data_from_numpy
(
input0_data
)
result
=
triton_client
.
infer
(
'nonbatching'
,
inputs
)
print
(
'Response: {}'
.
format
(
result
.
get_response
()))
print
(
'OUT0 = {}'
.
format
(
result
.
as_numpy
(
'OUT0'
)))
# Send 2 requests to the batching model. Because these are sent
# asynchronously and Triton's dynamic batcher is configured to
# delay up to 5 seconds when forming a batch for this model, we
# expect these 2 requests to be batched within Triton and sent to
# the minimal backend as a single batch.
print
(
'
\n
========='
)
async_requests
=
[]
input0_data
=
np
.
array
([[
10
,
11
,
12
,
13
]],
dtype
=
np
.
int32
)
print
(
'Sending request to batching model: IN0 = {}'
.
format
(
input0_data
))
inputs
=
[
httpclient
.
InferInput
(
'IN0'
,
[
1
,
4
],
"INT32"
)
]
inputs
[
0
].
set_data_from_numpy
(
input0_data
)
async_requests
.
append
(
triton_client
.
async_infer
(
'batching'
,
inputs
))
input0_data
=
np
.
array
([[
20
,
21
,
22
,
23
]],
dtype
=
np
.
int32
)
print
(
'Sending request to batching model: IN0 = {}'
.
format
(
input0_data
))
inputs
=
[
httpclient
.
InferInput
(
'IN0'
,
[
1
,
4
],
"INT32"
)
]
inputs
[
0
].
set_data_from_numpy
(
input0_data
)
async_requests
.
append
(
triton_client
.
async_infer
(
'batching'
,
inputs
))
for
async_request
in
async_requests
:
# Get the result from the initiated asynchronous inference
# request. This call will block till the server responds.
result
=
async_request
.
get_result
()
print
(
'Response: {}'
.
format
(
result
.
get_response
()))
print
(
'OUT0 = {}'
.
format
(
result
.
as_numpy
(
'OUT0'
)))
3rdparty/backend-r22.12/examples/clients/recommended_client
0 → 100644
View file @
0a21fff9
#!/usr/bin/env python
# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import
argparse
import
numpy
as
np
import
tritonclient.http
as
httpclient
from
tritonclient.utils
import
InferenceServerException
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'-u'
,
'--url'
,
type
=
str
,
required
=
False
,
default
=
'localhost:8000'
,
help
=
'Inference server URL. Default is localhost:8000.'
)
FLAGS
=
parser
.
parse_args
()
# For the HTTP client, need to specify large enough concurrency to
# issue all the inference requests to the server in parallel. For
# this example we want to be able to send 2 requests concurrently.
try
:
concurrent_request_count
=
2
triton_client
=
httpclient
.
InferenceServerClient
(
url
=
FLAGS
.
url
,
concurrency
=
concurrent_request_count
)
except
Exception
as
e
:
print
(
"channel creation failed: "
+
str
(
e
))
sys
.
exit
(
1
)
# Send 2 requests to the batching model. Because these are sent
# asynchronously and Triton's dynamic batcher is configured to
# delay up to 5 seconds when forming a batch for this model, we
# expect these 2 requests to be batched within Triton and sent to
# the backend as a single batch.
#
# The recommended backend can handle any model with 1 input and 1
# output as long as the input and output datatype and shape are
# the same. The batching model uses datatype FP32 and shape
# [ 4, 4 ].
print
(
'
\n
========='
)
async_requests
=
[]
input0_data
=
np
.
array
([[[
1.0
,
1.1
,
1.2
,
1.3
],
[
2.0
,
2.1
,
2.2
,
2.3
],
[
3.0
,
3.1
,
3.2
,
3.3
],
[
4.0
,
4.1
,
4.2
,
4.3
]]],
dtype
=
np
.
float32
)
print
(
'Sending request to batching model: input = {}'
.
format
(
input0_data
))
inputs
=
[
httpclient
.
InferInput
(
'INPUT'
,
[
1
,
4
,
4
],
"FP32"
)
]
inputs
[
0
].
set_data_from_numpy
(
input0_data
)
async_requests
.
append
(
triton_client
.
async_infer
(
'batching'
,
inputs
))
input0_data
=
np
.
array
([[[
10.0
,
10.1
,
10.2
,
10.3
],
[
20.0
,
20.1
,
20.2
,
20.3
],
[
30.0
,
30.1
,
30.2
,
30.3
],
[
40.0
,
40.1
,
40.2
,
40.3
]]],
dtype
=
np
.
float32
)
print
(
'Sending request to batching model: input = {}'
.
format
(
input0_data
))
inputs
=
[
httpclient
.
InferInput
(
'INPUT'
,
[
1
,
4
,
4
],
"FP32"
)
]
inputs
[
0
].
set_data_from_numpy
(
input0_data
)
async_requests
.
append
(
triton_client
.
async_infer
(
'batching'
,
inputs
))
for
async_request
in
async_requests
:
# Get the result from the initiated asynchronous inference
# request. This call will block till the server responds.
result
=
async_request
.
get_result
()
print
(
'Response: {}'
.
format
(
result
.
get_response
()))
print
(
'OUTPUT = {}'
.
format
(
result
.
as_numpy
(
'OUTPUT'
)))
3rdparty/backend-r22.12/examples/model_repos/bls_models/addsub_python/1/model.py
0 → 100644
View file @
0a21fff9
# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import
json
import
triton_python_backend_utils
as
pb_utils
# This model calculates the sum and difference of the INPUT0 and INPUT1 and put
# the results in OUTPUT0 and OUTPUT1 respectively. For more information
# regarding how this model.py was written, please refer to Python Backend.
class
TritonPythonModel
:
def
initialize
(
self
,
args
):
self
.
model_config
=
model_config
=
json
.
loads
(
args
[
'model_config'
])
output0_config
=
pb_utils
.
get_output_config_by_name
(
model_config
,
"OUTPUT0"
)
output1_config
=
pb_utils
.
get_output_config_by_name
(
model_config
,
"OUTPUT1"
)
self
.
output0_dtype
=
pb_utils
.
triton_string_to_numpy
(
output0_config
[
'data_type'
])
self
.
output1_dtype
=
pb_utils
.
triton_string_to_numpy
(
output1_config
[
'data_type'
])
def
execute
(
self
,
requests
):
output0_dtype
=
self
.
output0_dtype
output1_dtype
=
self
.
output1_dtype
responses
=
[]
for
request
in
requests
:
in_0
=
pb_utils
.
get_input_tensor_by_name
(
request
,
"INPUT0"
)
in_1
=
pb_utils
.
get_input_tensor_by_name
(
request
,
"INPUT1"
)
out_0
,
out_1
=
(
in_0
.
as_numpy
()
+
in_1
.
as_numpy
(),
in_0
.
as_numpy
()
-
in_1
.
as_numpy
())
out_tensor_0
=
pb_utils
.
Tensor
(
"OUTPUT0"
,
out_0
.
astype
(
output0_dtype
))
out_tensor_1
=
pb_utils
.
Tensor
(
"OUTPUT1"
,
out_1
.
astype
(
output1_dtype
))
inference_response
=
pb_utils
.
InferenceResponse
(
output_tensors
=
[
out_tensor_0
,
out_tensor_1
])
responses
.
append
(
inference_response
)
return
responses
def
finalize
(
self
):
print
(
'Cleaning up...'
)
3rdparty/backend-r22.12/examples/model_repos/bls_models/addsub_python/config.pbtxt
0 → 100644
View file @
0a21fff9
# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
name: "addsub_python"
backend: "python"
max_batch_size: 0
input [
{
name: "INPUT0"
data_type: TYPE_FP32
dims: [ 16 ]
}
]
input [
{
name: "INPUT1"
data_type: TYPE_FP32
dims: [ 16 ]
}
]
output [
{
name: "OUTPUT0"
data_type: TYPE_FP32
dims: [ 16 ]
}
]
output [
{
name: "OUTPUT1"
data_type: TYPE_FP32
dims: [ 16 ]
}
]
3rdparty/backend-r22.12/examples/model_repos/bls_models/addsub_tf/1/model.savedmodel/saved_model.pb
0 → 100644
View file @
0a21fff9
File added
3rdparty/backend-r22.12/examples/model_repos/bls_models/addsub_tf/config.pbtxt
0 → 100644
View file @
0a21fff9
name: "addsub_tf"
platform: "tensorflow_savedmodel"
max_batch_size: 0
input [
{
name: "INPUT0"
data_type: TYPE_FP32
dims: [ 16 ]
},
{
name: "INPUT1"
data_type: TYPE_FP32
dims: [ 16 ]
}
]
output [
{
name: "OUTPUT0"
data_type: TYPE_FP32
dims: [ 16 ]
},
{
name: "OUTPUT1"
data_type: TYPE_FP32
dims: [ 16 ]
}
]
3rdparty/backend-r22.12/examples/model_repos/bls_models/bls_fp32/config.pbtxt
0 → 100644
View file @
0a21fff9
# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
name: "bls_fp32"
backend: "bls"
max_batch_size: 0
input [
{
name: "INPUT0"
data_type: TYPE_FP32
dims: [ 16 ]
}
]
input [
{
name: "INPUT1"
data_type: TYPE_FP32
dims: [ 16 ]
}
]
output [
{
name: "OUTPUT0"
data_type: TYPE_FP32
dims: [ 16 ]
}
]
output [
{
name: "OUTPUT1"
data_type: TYPE_FP32
dims: [ 16 ]
}
]
instance_group [
{
kind: KIND_CPU
}
]
3rdparty/backend-r22.12/examples/model_repos/minimal_models/batching/1/.gitkeep
0 → 100644
View file @
0a21fff9
3rdparty/backend-r22.12/examples/model_repos/minimal_models/batching/config.pbtxt
0 → 100644
View file @
0a21fff9
backend: "minimal"
max_batch_size: 8
dynamic_batching {
max_queue_delay_microseconds: 5000000
}
input [
{
name: "IN0"
data_type: TYPE_INT32
dims: [ 4 ]
}
]
output [
{
name: "OUT0"
data_type: TYPE_INT32
dims: [ 4 ]
}
]
instance_group [
{
kind: KIND_CPU
}
]
3rdparty/backend-r22.12/examples/model_repos/minimal_models/nonbatching/1/.gitkeep
0 → 100644
View file @
0a21fff9
3rdparty/backend-r22.12/examples/model_repos/minimal_models/nonbatching/config.pbtxt
0 → 100644
View file @
0a21fff9
backend: "minimal"
max_batch_size: 0
input [
{
name: "IN0"
data_type: TYPE_INT32
dims: [ 4 ]
}
]
output [
{
name: "OUT0"
data_type: TYPE_INT32
dims: [ 4 ]
}
]
instance_group [
{
kind: KIND_CPU
}
]
3rdparty/backend-r22.12/examples/model_repos/recommended_models/batching/1/.gitkeep
0 → 100644
View file @
0a21fff9
3rdparty/backend-r22.12/examples/model_repos/recommended_models/batching/config.pbtxt
0 → 100644
View file @
0a21fff9
backend: "recommended"
max_batch_size: 8
dynamic_batching {
max_queue_delay_microseconds: 5000000
}
input [
{
name: "INPUT"
data_type: TYPE_FP32
dims: [ 4, 4 ]
}
]
output [
{
name: "OUTPUT"
data_type: TYPE_FP32
dims: [ 4, 4 ]
}
]
instance_group [
{
kind: KIND_CPU
}
]
3rdparty/backend-r22.12/include/triton/backend/backend_common.h
0 → 100644
View file @
0a21fff9
// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <chrono>
#include <condition_variable>
#include <deque>
#include <iostream>
#include <mutex>
#include <set>
#include <string>
#include <unordered_map>
#include <vector>
#include "triton/common/error.h"
#include "triton/core/tritonbackend.h"
#define TRITONJSON_STATUSTYPE TRITONSERVER_Error*
#define TRITONJSON_STATUSRETURN(M) \
return TRITONSERVER_ErrorNew(TRITONSERVER_ERROR_INTERNAL, (M).c_str())
#define TRITONJSON_STATUSSUCCESS nullptr
#include "triton/common/triton_json.h"
#ifdef TRITON_ENABLE_GPU
#include <cuda_runtime_api.h>
#endif // TRITON_ENABLE_GPU
namespace
triton
{
namespace
backend
{
#define IGNORE_ERROR(X) \
do { \
TRITONSERVER_Error* ie_err__ = (X); \
if (ie_err__ != nullptr) { \
TRITONSERVER_ErrorDelete(ie_err__); \
} \
} while (false)
#define LOG_IF_ERROR(X, MSG) \
do { \
TRITONSERVER_Error* lie_err__ = (X); \
if (lie_err__ != nullptr) { \
IGNORE_ERROR(TRITONSERVER_LogMessage( \
TRITONSERVER_LOG_INFO, __FILE__, __LINE__, \
(std::string(MSG) + ": " + TRITONSERVER_ErrorCodeString(lie_err__) + \
" - " + TRITONSERVER_ErrorMessage(lie_err__)) \
.c_str())); \
TRITONSERVER_ErrorDelete(lie_err__); \
} \
} while (false)
#define LOG_MESSAGE(LEVEL, MSG) \
do { \
LOG_IF_ERROR( \
TRITONSERVER_LogMessage(LEVEL, __FILE__, __LINE__, MSG), \
("failed to log message: ")); \
} while (false)
#define RETURN_ERROR_IF_FALSE(P, C, MSG) \
do { \
if (!(P)) { \
return TRITONSERVER_ErrorNew(C, (MSG).c_str()); \
} \
} while (false)
#define RETURN_ERROR_IF_TRUE(P, C, MSG) \
do { \
if ((P)) { \
return TRITONSERVER_ErrorNew(C, (MSG).c_str()); \
} \
} while (false)
#define RETURN_IF_ERROR(X) \
do { \
TRITONSERVER_Error* rie_err__ = (X); \
if (rie_err__ != nullptr) { \
return rie_err__; \
} \
} while (false)
#ifdef TRITON_ENABLE_GPU
#define LOG_IF_CUDA_ERROR(X, MSG) \
do { \
cudaError_t lice_err__ = (X); \
if (lice_err__ != cudaSuccess) { \
IGNORE_ERROR(TRITONSERVER_LogMessage( \
TRITONSERVER_LOG_INFO, __FILE__, __LINE__, \
(std::string(MSG) + ": " + cudaGetErrorString(lice_err__)) \
.c_str())); \
} \
} while (false)
#define RETURN_IF_CUDA_ERROR(X, C, MSG) \
do { \
cudaError_t rice_err__ = (X); \
if (rice_err__ != cudaSuccess) { \
return TRITONSERVER_ErrorNew( \
C, ((MSG) + ": " + cudaGetErrorString(rice_err__)).c_str()); \
} \
} while (false)
#endif // TRITON_ENABLE_GPU
#define RESPOND_AND_SET_NULL_IF_ERROR(RESPONSE_PTR, X) \
do { \
TRITONSERVER_Error* rarie_err__ = (X); \
if (rarie_err__ != nullptr) { \
if (*RESPONSE_PTR != nullptr) { \
LOG_IF_ERROR( \
TRITONBACKEND_ResponseSend( \
*RESPONSE_PTR, TRITONSERVER_RESPONSE_COMPLETE_FINAL, \
rarie_err__), \
"failed to send error response"); \
*RESPONSE_PTR = nullptr; \
} \
TRITONSERVER_ErrorDelete(rarie_err__); \
} \
} while (false)
#define RESPOND_ALL_AND_SET_NULL_IF_ERROR(RESPONSES, RESPONSES_COUNT, X) \
do { \
TRITONSERVER_Error* raasnie_err__ = (X); \
if (raasnie_err__ != nullptr) { \
for (size_t ridx = 0; ridx < RESPONSES_COUNT; ++ridx) { \
if (RESPONSES[ridx] != nullptr) { \
LOG_IF_ERROR( \
TRITONBACKEND_ResponseSend( \
RESPONSES[ridx], TRITONSERVER_RESPONSE_COMPLETE_FINAL, \
raasnie_err__), \
"failed to send error response"); \
RESPONSES[ridx] = nullptr; \
} \
} \
TRITONSERVER_ErrorDelete(raasnie_err__); \
} \
} while (false)
#define RESPOND_ALL_AND_SET_TRUE_IF_ERROR(RESPONSES, RESPONSES_COUNT, BOOL, X) \
do { \
TRITONSERVER_Error* raasnie_err__ = (X); \
if (raasnie_err__ != nullptr) { \
BOOL = true; \
for (size_t ridx = 0; ridx < RESPONSES_COUNT; ++ridx) { \
if (RESPONSES[ridx] != nullptr) { \
LOG_IF_ERROR( \
TRITONBACKEND_ResponseSend( \
RESPONSES[ridx], TRITONSERVER_RESPONSE_COMPLETE_FINAL, \
raasnie_err__), \
"failed to send error response"); \
RESPONSES[ridx] = nullptr; \
} \
} \
TRITONSERVER_ErrorDelete(raasnie_err__); \
} \
} while (false)
#ifdef TRITON_ENABLE_STATS
#define TIMESPEC_TO_NANOS(TS) ((TS).tv_sec * 1000000000 + (TS).tv_nsec)
#define SET_TIMESTAMP(TS_NS) \
{ \
TS_NS = std::chrono::duration_cast<std::chrono::nanoseconds>( \
std::chrono::steady_clock::now().time_since_epoch()) \
.count(); \
}
#define DECL_TIMESTAMP(TS_NS) \
uint64_t TS_NS; \
SET_TIMESTAMP(TS_NS);
#else
#define DECL_TIMESTAMP(TS_NS)
#define SET_TIMESTAMP(TS_NS)
#endif // TRITON_ENABLE_STATS
#ifndef TRITON_ENABLE_GPU
using
cudaStream_t
=
void
*
;
#endif // !TRITON_ENABLE_GPU
/// Convenience deleter for TRITONBACKEND_ResponseFactory.
struct
ResponseFactoryDeleter
{
void
operator
()(
TRITONBACKEND_ResponseFactory
*
f
)
{
LOG_IF_ERROR
(
TRITONBACKEND_ResponseFactoryDelete
(
f
),
"failed deleting response factory"
);
}
};
// A representation of the BatchInput message in model config
class
BatchInput
{
public:
enum
class
Kind
{
BATCH_ELEMENT_COUNT
,
BATCH_ACCUMULATED_ELEMENT_COUNT
,
BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO
,
BATCH_MAX_ELEMENT_COUNT_AS_SHAPE
,
BATCH_ITEM_SHAPE
,
BATCH_ITEM_SHAPE_FLATTEN
};
static
TRITONSERVER_Error
*
ParseFromModelConfig
(
triton
::
common
::
TritonJson
::
Value
&
config
,
std
::
vector
<
BatchInput
>*
batch_inputs
);
const
std
::
vector
<
std
::
string
>&
TargetNames
()
const
{
return
target_names_
;
}
TRITONSERVER_DataType
DataType
()
const
{
return
data_type_
;
}
Kind
BatchInputKind
()
const
{
return
kind_
;
}
std
::
string
BatchInputKindString
()
const
{
return
kind_str_
;
}
const
std
::
vector
<
std
::
string
>&
SourceInputs
()
const
{
return
source_inputs_
;
}
private:
TRITONSERVER_Error
*
Init
(
triton
::
common
::
TritonJson
::
Value
&
bi_config
);
Kind
kind_
;
std
::
string
kind_str_
;
std
::
vector
<
std
::
string
>
target_names_
;
TRITONSERVER_DataType
data_type_
;
std
::
vector
<
std
::
string
>
source_inputs_
;
};
// A representation of the BatchOutput message in model config
class
BatchOutput
{
public:
enum
class
Kind
{
BATCH_SCATTER_WITH_INPUT_SHAPE
};
static
TRITONSERVER_Error
*
ParseFromModelConfig
(
triton
::
common
::
TritonJson
::
Value
&
config
,
std
::
vector
<
BatchOutput
>*
batch_outputs
);
const
std
::
vector
<
std
::
string
>&
TargetNames
()
const
{
return
target_names_
;
}
TRITONSERVER_DataType
DataType
()
const
{
return
data_type_
;
}
const
std
::
vector
<
int64_t
>&
OutputShape
()
const
{
return
shape_
;
}
Kind
BatchOutputKind
()
const
{
return
kind_
;
}
const
std
::
vector
<
std
::
string
>&
SourceInputs
()
const
{
return
source_inputs_
;
}
private:
Kind
kind_
;
std
::
vector
<
std
::
string
>
target_names_
;
TRITONSERVER_DataType
data_type_
;
std
::
vector
<
int64_t
>
shape_
;
std
::
vector
<
std
::
string
>
source_inputs_
;
};
struct
CopyParams
{
CopyParams
(
void
*
dst
,
const
void
*
src
,
const
size_t
byte_size
)
:
dst_
(
dst
),
src_
(
src
),
byte_size_
(
byte_size
)
{
}
void
*
dst_
;
const
void
*
src_
;
const
size_t
byte_size_
;
};
/// The value for a dimension in a shape that indicates that that
/// dimension can take on any size.
constexpr
int
WILDCARD_DIM
=
-
1
;
constexpr
char
kTensorRTExecutionAccelerator
[]
=
"tensorrt"
;
constexpr
char
kOpenVINOExecutionAccelerator
[]
=
"openvino"
;
constexpr
char
kGPUIOExecutionAccelerator
[]
=
"gpu_io"
;
constexpr
char
kAutoMixedPrecisionExecutionAccelerator
[]
=
"auto_mixed_precision"
;
TRITONSERVER_MemoryType
GetUsePinnedMemoryType
(
TRITONSERVER_MemoryType
ref_buffer_type
);
TRITONSERVER_Error
*
CommonErrorToTritonError
(
triton
::
common
::
Error
error
);
TRITONSERVER_Error_Code
StatusCodeToTritonCode
(
triton
::
common
::
Error
::
Code
error_code
);
/// Parse an array in a JSON object into the corresponding shape. The
/// array must be composed of integers.
///
/// \param io The JSON object containing the member array.
/// \param name The name of the array member in the JSON object.
/// \param shape Returns the shape.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_Error
*
ParseShape
(
common
::
TritonJson
::
Value
&
io
,
const
std
::
string
&
name
,
std
::
vector
<
int64_t
>*
shape
);
/// Return the string representation of a shape.
///
/// \param dims The shape dimensions.
/// \param dims_count The number of dimensions.
/// \return The string representation.
std
::
string
ShapeToString
(
const
int64_t
*
dims
,
const
size_t
dims_count
);
/// Return the string representation of a shape.
///
/// \param shape The shape as a vector of dimensions.
/// \return The string representation.
std
::
string
ShapeToString
(
const
std
::
vector
<
int64_t
>&
shape
);
/// Return the number of elements of a shape.
///
/// \param dims The shape dimensions.
/// \param dims_count The number of dimensions.
/// \return The number of elements.
int64_t
GetElementCount
(
const
int64_t
*
dims
,
const
size_t
dims_count
);
/// Return the number of elements of a shape.
///
/// \param shape The shape as a vector of dimensions.
/// \return The number of elements.
int64_t
GetElementCount
(
const
std
::
vector
<
int64_t
>&
shape
);
/// Get the size, in bytes, of a tensor based on datatype and
/// shape.
/// \param dtype The data-type.
/// \param dims The shape.
/// \return The size, in bytes, of the corresponding tensor, or -1 if
/// unable to determine the size.
int64_t
GetByteSize
(
const
TRITONSERVER_DataType
&
dtype
,
const
std
::
vector
<
int64_t
>&
dims
);
/// Get an input tensor's contents into a buffer. This overload expects
/// both 'buffer' and buffers of the input to be in CPU.
///
/// \param request The inference request.
/// \param input_name The name of the input buffer.
/// \param buffer The buffer where the input tensor content is copied into.
/// \param buffer_byte_size Acts as both input and output. On input
/// gives the size of 'buffer', in bytes. The function will fail if
/// the buffer is not large enough to hold the input tensor
/// contents. Returns the size of the input tensor data returned in
/// 'buffer'.
/// \param host_policy_name The host policy name to look up the input buffer.
/// Default input buffer will be used if nullptr is provided.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_Error
*
ReadInputTensor
(
TRITONBACKEND_Request
*
request
,
const
std
::
string
&
input_name
,
char
*
buffer
,
size_t
*
buffer_byte_size
,
const
char
*
host_policy_name
=
nullptr
);
/// Get an input tensor's contents into a buffer. This overload of
/// 'ReadInputTensor' supports input buffers that can be in any memory.
///
/// \param request The inference request.
/// \param input_name The name of the input buffer.
/// \param buffer The buffer where the input tensor content is copied into.
/// \param buffer_byte_size Acts as both input and output. On input
/// gives the size of 'buffer', in bytes. The function will fail if
/// the buffer is not large enough to hold the input tensor
/// contents. Returns the size of the input tensor data returned in
/// 'buffer'.
/// \param host_policy_name The host policy name to look up the input buffer.
/// Default input buffer will be used if nullptr is provided.
/// \param memory_type The memory type of the buffer provided.
/// \param memory_type_id The memory type id of the buffer provided.
/// \param cuda_stream specifies the stream to be associated with, and 0 can be
/// passed for default stream.
/// \param cuda_used returns whether a CUDA memory copy is initiated. If true,
/// the caller should synchronize on the given 'cuda_stream' to ensure data copy
/// is completed.
/// \param copy_on_stream whether the memory copies should be performed in cuda
/// host functions on the 'cuda_stream'.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_Error
*
ReadInputTensor
(
TRITONBACKEND_Request
*
request
,
const
std
::
string
&
input_name
,
char
*
buffer
,
size_t
*
buffer_byte_size
,
TRITONSERVER_MemoryType
memory_type
,
int64_t
memory_type_id
,
cudaStream_t
cuda_stream
,
bool
*
cuda_used
,
const
char
*
host_policy_name
=
nullptr
,
const
bool
copy_on_stream
=
false
);
/// Validate that an input matches one of the allowed input names.
/// \param io The model input.
/// \param allowed The set of allowed input names.
/// \return The error status. A non-OK status indicates the input
/// is not valid.
TRITONSERVER_Error
*
CheckAllowedModelInput
(
common
::
TritonJson
::
Value
&
io
,
const
std
::
set
<
std
::
string
>&
allowed
);
/// Validate that an output matches one of the allowed output names.
/// \param io The model output.
/// \param allowed The set of allowed output names.
/// \return The error status. A non-OK status indicates the output
/// is not valid.
TRITONSERVER_Error
*
CheckAllowedModelOutput
(
common
::
TritonJson
::
Value
&
io
,
const
std
::
set
<
std
::
string
>&
allowed
);
/// Get the tensor name, false value, and true value for a boolean
/// sequence batcher control kind. If 'required' is true then must
/// find a tensor for the control. If 'required' is false, return
/// 'tensor_name' as empty-string if the control is not mapped to any
/// tensor.
///
/// \param batcher The JSON object of the sequence batcher.
/// \param model_name The name of the model.
/// \param control_kind The kind of control tensor to look for.
/// \param required Whether the tensor must be specified.
/// \param tensor_name Returns the name of the tensor.
/// \param tensor_datatype Returns the data type of the tensor.
/// \param fp32_false_value Returns the float value for false if
/// the tensor type is FP32.
/// \param fp32_true_value Returns the float value for true if
/// the tensor type is FP32.
/// \param int32_false_value Returns the int value for false if
/// the tensor type is INT32.
/// \param int32_true_value Returns the int value for true if
/// the tensor type is INT32.
/// \param bool_false_value Returns the bool value for false if
/// the tensor type is BOOL.
/// \param bool_true_value Returns the bool value for true if
/// the tensor type is BOOL.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_Error
*
GetBooleanSequenceControlProperties
(
common
::
TritonJson
::
Value
&
batcher
,
const
std
::
string
&
model_name
,
const
std
::
string
&
control_kind
,
const
bool
required
,
std
::
string
*
tensor_name
,
std
::
string
*
tensor_datatype
,
float
*
fp32_false_value
,
float
*
fp32_true_value
,
int32_t
*
int32_false_value
,
int32_t
*
int32_true_value
,
bool
*
bool_false_value
,
bool
*
bool_true_value
);
/// Get the tensor name and datatype for a non-boolean sequence
/// batcher control kind. If 'required' is true then must find a
/// tensor for the control. If 'required' is false, return
/// 'tensor_name' as empty-string if the control is not mapped to any
/// tensor. 'tensor_datatype' returns the required datatype for the
/// control.
///
/// \param batcher The JSON object of the sequence batcher.
/// \param model_name The name of the model.
/// \param control_kind The kind of control tensor to look for.
/// \param required Whether the tensor must be specified.
/// \param tensor_name Returns the name of the tensor.
/// \param tensor_datatype Returns the data type of the tensor.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_Error
*
GetTypedSequenceControlProperties
(
common
::
TritonJson
::
Value
&
batcher
,
const
std
::
string
&
model_name
,
const
std
::
string
&
control_kind
,
const
bool
required
,
std
::
string
*
tensor_name
,
std
::
string
*
tensor_datatype
);
/// Create and send an error response for a set of requests. This
/// function takes ownership of 'response_err' and so the caller must
/// not access or delete it after this call returns.
///
/// \param requests The requests.
/// \param request_count The number of 'requests'.
/// \param response_err The error to send to each request.
/// \param release_request If true, the requests will be released after
/// sending the error responses and the request pointers are set to
/// nullptr.
void
RequestsRespondWithError
(
TRITONBACKEND_Request
**
requests
,
const
uint32_t
request_count
,
TRITONSERVER_Error
*
response_err
,
const
bool
release_request
=
true
);
/// Send an error response for a set of responses. This function takes
/// ownership of 'response_err' and so the caller must not access or
/// delete it after this call returns.
///
/// \param responses The responses.
/// \param response_count The number of 'responses'.
/// \param response_err The error to send.
void
SendErrorForResponses
(
std
::
vector
<
TRITONBACKEND_Response
*>*
responses
,
const
uint32_t
response_count
,
TRITONSERVER_Error
*
response_err
);
/// Copy buffer from 'src' to 'dst' for given 'byte_size'. The buffer location
/// is identified by the memory type and id, and the corresponding copy will be
/// initiated.
/// \param msg The message to be prepended in error message.
/// \param src_memory_type The memory type of the source buffer.
/// \param src_memory_type_id The memory type id of the source buffer.
/// \param dst_memory_type The memory type of the destination buffer.
/// \param dst_memory_type_id The memory type id of the destination buffer.
/// \param byte_size The byte size of the source buffer.
/// \param src The pointer to the source buffer.
/// \param dst The pointer to the destination buffer.
/// \param cuda_stream specifies the stream to be associated with, and 0 can be
/// passed for default stream.
/// \param cuda_used returns whether a CUDA memory copy is initiated. If true,
/// the caller should synchronize on the given 'cuda_stream' to ensure data copy
/// is completed.
/// \param copy_on_stream whether the memory copies should be performed in cuda
/// host functions on the 'cuda_stream'.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_Error
*
CopyBuffer
(
const
std
::
string
&
msg
,
const
TRITONSERVER_MemoryType
src_memory_type
,
const
int64_t
src_memory_type_id
,
const
TRITONSERVER_MemoryType
dst_memory_type
,
const
int64_t
dst_memory_type_id
,
const
size_t
byte_size
,
const
void
*
src
,
void
*
dst
,
cudaStream_t
cuda_stream
,
bool
*
cuda_used
,
const
bool
copy_on_stream
=
false
);
/// Does a file or directory exist?
/// \param path The path to check for existance.
/// \param exists Returns true if file/dir exists
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_Error
*
FileExists
(
const
std
::
string
&
path
,
bool
*
exists
);
/// Read a text file into a string.
/// \param path The path of the file.
/// \param contents Returns the contents of the file.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_Error
*
ReadTextFile
(
const
std
::
string
&
path
,
std
::
string
*
contents
);
/// Is a path a directory?
/// \param path The path to check.
/// \param is_dir Returns true if path represents a directory
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_Error
*
IsDirectory
(
const
std
::
string
&
path
,
bool
*
is_dir
);
/// Join path segments into a longer path
/// \param segments The path segments.
/// \return the path formed by joining the segments.
std
::
string
JoinPath
(
std
::
initializer_list
<
std
::
string
>
segments
);
/// Returns the content in the model version path and the path to the content as
/// key-value pair.
/// \param model_repository_path The path to the model repository.
/// \param version The version of the model.
/// \param ignore_directories Whether the directories will be ignored.
/// \param ignore_files Whether the files will be ignored.
/// \param model_paths Returns the content in the model version path and
/// the path to the content.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_Error
*
ModelPaths
(
const
std
::
string
&
model_repository_path
,
uint64_t
version
,
const
bool
ignore_directories
,
const
bool
ignore_files
,
std
::
unordered_map
<
std
::
string
,
std
::
string
>*
model_paths
);
/// Create a CUDA stream appropriate for GPU<->CPU data transfer
/// operations for a given GPU device. The caller takes ownership of
/// the stream. 'stream' returns nullptr if GPU support is disabled.
///
/// \param device_id The ID of the GPU.
/// \param priority The stream priority. Use 0 for normal priority.
/// \param stream Returns the created stream.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_Error
*
CreateCudaStream
(
const
int
device_id
,
const
int
cuda_stream_priority
,
cudaStream_t
*
stream
);
/// Parse the string as long long integer.
///
/// \param value The string.
/// \param parse_value The long long integral value of the string.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_Error
*
ParseLongLongValue
(
const
std
::
string
&
value
,
int64_t
*
parsed_value
);
/// Parse the string as unsigned long long integer.
///
/// \param value The string.
/// \param parse_value The unsigned long long integral value of the string.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_Error
*
ParseUnsignedLongLongValue
(
const
std
::
string
&
value
,
uint64_t
*
parsed_value
);
/// Parse the string as boolean.
///
/// \param value The string.
/// \param parse_value The boolean value of the string.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_Error
*
ParseBoolValue
(
const
std
::
string
&
value
,
bool
*
parsed_value
);
/// Parse the string as integer.
///
/// \param value The string.
/// \param parse_value The integral value of the string.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_Error
*
ParseIntValue
(
const
std
::
string
&
value
,
int
*
parsed_value
);
/// Parse the string as double.
///
/// \param value The string.
/// \param parse_value The double value of the string.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_Error
*
ParseDoubleValue
(
const
std
::
string
&
value
,
double
*
parsed_value
);
/// Return the value of the specified key in a JSON object.
///
/// \param params The JSON object containing the key-value mapping.
/// \param key The key to look up the value in the JSON object.
/// \param value Returns the value.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_Error
*
GetParameterValue
(
triton
::
common
::
TritonJson
::
Value
&
params
,
const
std
::
string
&
key
,
std
::
string
*
value
);
/// Return the Triton server data type of the data type string specified
/// in model config JSON.
///
/// \param data_type_str The string representation of the data type.
/// \return the Triton server data type.
TRITONSERVER_DataType
ModelConfigDataTypeToTritonServerDataType
(
const
std
::
string
&
data_type_str
);
/// Try to parse the requested parameter.
///
/// \param params The param in model config
/// \param mkey Key in the model config.
/// \param value The parsed string value.
/// \param default_value Default value to use when key is not found.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_Error
*
TryParseModelStringParameter
(
triton
::
common
::
TritonJson
::
Value
&
params
,
const
std
::
string
&
mkey
,
std
::
string
*
value
,
const
std
::
string
&
default_value
);
/// Try to parse the requested parameter.
///
/// \param params The param in model config
/// \param mkey Key in the model config.
/// \param value The parsed int value.
/// \param default_value Default value to use when key is not found.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_Error
*
TryParseModelStringParameter
(
triton
::
common
::
TritonJson
::
Value
&
params
,
const
std
::
string
&
mkey
,
int
*
value
,
const
int
&
default_value
);
/// Try to parse the requested parameter.
///
/// \param params The param in model config
/// \param mkey Key in the model config.
/// \param value The parsed bool value.
/// \param default_value Default value to use when key is not found.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_Error
*
TryParseModelStringParameter
(
triton
::
common
::
TritonJson
::
Value
&
params
,
const
std
::
string
&
mkey
,
bool
*
value
,
const
bool
&
default_value
);
/// Try to parse the requested parameter.
///
/// \param params The param in model config
/// \param mkey Key in the model config.
/// \param value The parsed uint64 value.
/// \param default_value Default value to use when key is not found.
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_Error
*
TryParseModelStringParameter
(
triton
::
common
::
TritonJson
::
Value
&
params
,
const
std
::
string
&
mkey
,
uint64_t
*
value
,
const
uint64_t
&
default_value
);
/// Get a string representation of a tensor buffer.
///
/// \param str Returns the string.
/// \param buffer The base pointer to the tensor buffer.
/// \param buffer_byte_size The size of the buffer in bytes.
/// \param datatype The type of the tensor
/// \return a TRITONSERVER_Error indicating success or failure.
TRITONSERVER_Error
*
BufferAsTypedString
(
std
::
string
&
str
,
const
char
*
buffer
,
size_t
buffer_byte_size
,
TRITONSERVER_DataType
datatype
);
/// Get the ID of the request as a string formatted for logging.
///
/// \param request Request of which to get the ID.
/// \return a formatted string for logging the request ID.
std
::
string
GetRequestId
(
TRITONBACKEND_Request
*
request
);
}}
// namespace triton::backend
3rdparty/backend-r22.12/include/triton/backend/backend_input_collector.h
0 → 100644
View file @
0a21fff9
// Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <list>
#include <memory>
#include <string>
#include <vector>
#include "triton/backend/backend_common.h"
#include "triton/backend/backend_memory.h"
#include "triton/common/async_work_queue.h"
#include "triton/common/sync_queue.h"
#include "triton/core/tritonbackend.h"
#ifdef TRITON_ENABLE_GPU
#include <cuda_runtime_api.h>
#endif // TRITON_ENABLE_GPU
namespace
triton
{
namespace
backend
{
#ifndef TRITON_ENABLE_GPU
using
cudaStream_t
=
void
*
;
using
cudaEvent_t
=
void
*
;
#endif // !TRITON_ENABLE_GPU
//
// BackendInputCollector
//
class
BackendInputCollector
{
public:
// The caller can optionally provide 'event' for internal synchronization
// instead of using 'stream'. If 'host_policy_name' is provided, it must be
// valid for the lifetime of the collector
explicit
BackendInputCollector
(
TRITONBACKEND_Request
**
requests
,
const
uint32_t
request_count
,
std
::
vector
<
TRITONBACKEND_Response
*>*
responses
,
TRITONBACKEND_MemoryManager
*
memory_manager
,
const
bool
pinned_enabled
,
cudaStream_t
stream
,
cudaEvent_t
event
=
nullptr
,
cudaEvent_t
buffer_ready_event
=
nullptr
,
const
size_t
kernel_buffer_threshold
=
0
,
const
char
*
host_policy_name
=
nullptr
,
const
bool
copy_on_stream
=
false
,
const
bool
coalesce_request_input
=
false
)
:
need_sync_
(
false
),
requests_
(
requests
),
request_count_
(
request_count
),
responses_
(
responses
),
memory_manager_
(
memory_manager
),
pinned_enabled_
(
pinned_enabled
),
use_async_cpu_copy_
(
triton
::
common
::
AsyncWorkQueue
::
WorkerCount
()
>
1
),
stream_
(
stream
),
event_
(
event
),
buffer_ready_event_
(
buffer_ready_event
),
kernel_buffer_threshold_
(
kernel_buffer_threshold
),
pending_pinned_byte_size_
(
0
),
pending_pinned_offset_
(
0
),
pending_copy_kernel_buffer_byte_size_
(
0
),
pending_copy_kernel_buffer_offset_
(
0
),
pending_copy_kernel_input_buffer_counts_
(
0
),
async_task_count_
(
0
),
host_policy_cstr_
(
host_policy_name
),
copy_on_stream_
(
copy_on_stream
),
coalesce_request_input_
(
coalesce_request_input
)
{
}
~
BackendInputCollector
()
=
default
;
// Process all requests for a named input tensor and return the
// concatenated values of those requests in a single contiguous
// buffer. This overload of the function can avoid data copy if the
// tensor values are already contiguous and the caller doesn't
// provide a destination 'buffer'.
//
// 'buffer' is used to determine whether the input should be placed at the
// 'buffer' provided by the caller. If 'buffer' == nullptr, the returned
// buffer will be managed by the BackendInputCollector object and
// has the same lifecycle as the BackendInputCollector object.
// 'buffer_byte_size' is the byte size of 'buffer' if it is not nullptr.
// 'allowed_input_types' is the ordered list of the memory type and id pairs
// that the returned buffer can be. It must only contain the memory type
// and id of 'buffer' if 'buffer' is not nullptr.
// 'dst_buffer' returns the contiguous buffer of the input tensor.
// 'dst_buffer_byte_size' the byte size of 'dst_buffer'.
// 'dst_memory_type' returns the memory type of 'dst_buffer'.
// 'dst_memory_type_id' returns the memory type id of 'dst_buffer'.
TRITONSERVER_Error
*
ProcessTensor
(
const
char
*
input_name
,
char
*
buffer
,
const
size_t
buffer_byte_size
,
const
std
::
vector
<
std
::
pair
<
TRITONSERVER_MemoryType
,
int64_t
>>&
allowed_input_types
,
const
char
**
dst_buffer
,
size_t
*
dst_buffer_byte_size
,
TRITONSERVER_MemoryType
*
dst_memory_type
,
int64_t
*
dst_memory_type_id
);
// Process all requests for a named input tensor and return the
// concatenated values of those requests in a single contiguous
// 'buffer'.
//
// 'buffer' The buffer to hold the concatenates tensor value. Must
// be large enough to hold all tensor value.
// 'buffer_byte_size' is the byte size of 'buffer'.
// 'dst_memory_type' The memory type of 'buffer'.
// 'dst_memory_type_id' The memory type id of 'buffer'.
void
ProcessTensor
(
const
char
*
input_name
,
char
*
buffer
,
const
size_t
buffer_byte_size
,
const
TRITONSERVER_MemoryType
memory_type
,
const
int64_t
memory_type_id
);
// Process the batch input and return its shape. Returning error indicates
// that the batch input can't be formed properly and the caller should abort
// the whole batch.
TRITONSERVER_Error
*
BatchInputShape
(
const
BatchInput
&
batch_input
,
std
::
vector
<
int64_t
>*
shape
);
// Process the batch input and derive its value into 'buffer'. Returning
// error indicates that the batch input can't be formed properly and
// the caller should abort the whole batch.
// 'buffer' is used to determine whether the input should be placed at the
// 'buffer' provided by the caller. If 'buffer' == nullptr, the returned
// buffer will be managed by the BackendInputCollector object and
// has the same lifecycle as the BackendInputCollector object.
// 'buffer_byte_size' is the byte size of 'buffer' if it is not nullptr.
// 'allowed_input_types' is the ordered list of the memory type and id pairs
// that the returned buffer can be. It must only contain the memory type
// and id of 'buffer' if it is not nullptr.
// 'dst_buffer' returns the contiguous buffer of the input tensor.
// 'dst_memory_type' returns the memory type of 'dst_buffer'.
// 'dst_memory_type_id' returns the memory type id of 'dst_buffer'.
TRITONSERVER_Error
*
ProcessBatchInput
(
const
BatchInput
&
batch_input
,
char
*
buffer
,
const
size_t
buffer_byte_size
,
const
std
::
vector
<
std
::
pair
<
TRITONSERVER_MemoryType
,
int64_t
>>&
allowed_input_types
,
const
char
**
dst_buffer
,
size_t
*
dst_buffer_byte_size
,
TRITONSERVER_MemoryType
*
dst_memory_type
,
int64_t
*
dst_memory_type_id
);
// Finalize processing of all requests for all input tensors. Return
// true if cudaMemcpyAsync is called, and the caller should call
// cudaStreamSynchronize (or cudaEventSynchronize on 'event') before
// using the data.
bool
Finalize
();
private:
struct
ContiguousBuffer
{
ContiguousBuffer
()
:
start_request_idx_
(
0
),
end_request_idx_
(
0
)
{}
MemoryDesc
memory_desc_
;
size_t
start_request_idx_
;
size_t
end_request_idx_
;
};
class
InputIterator
{
public:
InputIterator
(
TRITONBACKEND_Request
**
requests
,
const
uint32_t
request_count
,
std
::
vector
<
TRITONBACKEND_Response
*>*
responses
,
const
char
*
input_name
,
const
char
*
host_policy_name
,
const
bool
coalesce_request_input
);
// Return false if iterator reaches the end of inputs, 'input' is not set.
bool
GetNextContiguousInput
(
ContiguousBuffer
*
input
);
private:
TRITONBACKEND_Request
**
requests_
;
const
uint32_t
request_count_
;
std
::
vector
<
TRITONBACKEND_Response
*>*
responses_
;
const
char
*
input_name_
;
const
char
*
host_policy_
;
const
bool
coalesce_request_input_
;
TRITONBACKEND_Input
*
curr_input_
;
size_t
curr_request_idx_
;
size_t
curr_buffer_idx_
;
uint32_t
curr_buffer_cnt_
;
bool
reach_end_
;
};
// Return whether the entire input is in a contiguous buffer. If returns true,
// the properties of the contiguous input buffer will also be returned.
// Otherwise, only 'buffer_byte_size' will be set and return the total byte
// size of the input.
bool
GetInputBufferIfContiguous
(
const
char
*
input_name
,
const
char
**
buffer
,
size_t
*
buffer_byte_size
,
TRITONSERVER_MemoryType
*
memory_type
,
int64_t
*
memory_type_id
);
bool
FlushPendingPinned
(
char
*
tensor_buffer
,
const
size_t
tensor_buffer_byte_size
,
const
TRITONSERVER_MemoryType
tensor_memory_type
,
const
int64_t
tensor_memory_type_id
);
bool
FlushPendingCopyKernel
(
char
*
tensor_buffer
,
const
size_t
tensor_buffer_byte_size
,
const
TRITONSERVER_MemoryType
tensor_memory_type
,
const
int64_t
tensor_memory_type_id
);
TRITONSERVER_Error
*
LaunchCopyKernel
(
char
*
tensor_buffer
,
const
size_t
tensor_buffer_byte_size
,
const
TRITONSERVER_MemoryType
tensor_memory_type
,
const
int64_t
tensor_memory_type_id
);
bool
SetInputTensor
(
const
char
*
input_name
,
const
ContiguousBuffer
&
input
,
char
*
tensor_buffer
,
const
size_t
tensor_buffer_byte_size
,
const
TRITONSERVER_MemoryType
tensor_memory_type
,
const
int64_t
tensor_memory_type_id
,
const
size_t
tensor_buffer_offset
,
const
TRITONSERVER_MemoryType
use_pinned_memory_type
,
const
bool
use_kernel
,
const
bool
wait_buffer
);
template
<
typename
T
>
TRITONSERVER_Error
*
SetElementCount
(
const
std
::
string
&
source_input
,
char
*
buffer
,
const
size_t
buffer_byte_size
);
template
<
typename
T
>
TRITONSERVER_Error
*
SetAccumulatedElementCount
(
const
std
::
string
&
source_input
,
char
*
buffer
,
const
size_t
buffer_byte_size
);
template
<
typename
T
>
TRITONSERVER_Error
*
SetBatchItemShape
(
const
std
::
string
&
source_input
,
char
*
buffer
,
const
size_t
buffer_byte_size
);
bool
need_sync_
;
TRITONBACKEND_Request
**
requests_
;
const
uint32_t
request_count_
;
std
::
vector
<
TRITONBACKEND_Response
*>*
responses_
;
TRITONBACKEND_MemoryManager
*
memory_manager_
;
const
bool
pinned_enabled_
;
const
bool
use_async_cpu_copy_
;
cudaStream_t
stream_
;
cudaEvent_t
event_
;
cudaEvent_t
buffer_ready_event_
;
const
size_t
kernel_buffer_threshold_
;
size_t
pending_pinned_byte_size_
;
size_t
pending_pinned_offset_
;
std
::
list
<
ContiguousBuffer
>
pending_pinned_input_buffers_
;
// managed memories that need to live over the lifetime of this
// BackendInputCollector object.
std
::
list
<
std
::
unique_ptr
<
BackendMemory
>>
in_use_memories_
;
size_t
pending_copy_kernel_buffer_byte_size_
;
size_t
pending_copy_kernel_buffer_offset_
;
size_t
pending_copy_kernel_input_buffer_counts_
;
std
::
list
<
ContiguousBuffer
>
pending_copy_kernel_input_buffers_
;
std
::
vector
<
std
::
unique_ptr
<
std
::
vector
<
int8_t
*>>>
input_ptr_buffer_host_
;
std
::
vector
<
std
::
unique_ptr
<
std
::
vector
<
size_t
>>>
byte_size_buffer_host_
;
std
::
vector
<
std
::
unique_ptr
<
std
::
vector
<
size_t
>>>
byte_size_offset_buffer_host_
;
// Pinned memory buffers and the corresponding request_inputs where
// the final copy to the tensor is deferred until Finalize() after
// waiting for all in-flight copies.
struct
DeferredPinned
{
DeferredPinned
(
char
*
pinned_memory
,
const
size_t
pinned_memory_size
,
char
*
tensor_buffer
,
const
size_t
tensor_buffer_offset
,
const
TRITONSERVER_MemoryType
tensor_memory_type
,
const
int64_t
tensor_memory_id
,
std
::
list
<
ContiguousBuffer
>&&
request_buffers
,
std
::
vector
<
TRITONBACKEND_Response
*>*
responses
)
:
finalized_
(
false
),
pinned_memory_
(
pinned_memory
),
pinned_memory_size_
(
pinned_memory_size
),
tensor_buffer_
(
tensor_buffer
),
tensor_buffer_offset_
(
tensor_buffer_offset
),
tensor_memory_type_
(
tensor_memory_type
),
tensor_memory_id_
(
tensor_memory_id
),
requests_
(
std
::
move
(
request_buffers
)),
responses_
(
responses
)
{
}
bool
Finalize
(
cudaStream_t
stream
);
bool
finalized_
;
// Holding reference to the pinned memory buffer, which is managed
// by BackendInputCollector as 'pinned_memory'
char
*
pinned_memory_
;
const
size_t
pinned_memory_size_
;
char
*
tensor_buffer_
;
const
size_t
tensor_buffer_offset_
;
const
TRITONSERVER_MemoryType
tensor_memory_type_
;
const
int64_t
tensor_memory_id_
;
std
::
list
<
ContiguousBuffer
>
requests_
;
std
::
vector
<
TRITONBACKEND_Response
*>*
responses_
;
};
std
::
list
<
DeferredPinned
>
deferred_pinned_
;
// FIXME use future to maintain an issue-order queue to drop task count
triton
::
common
::
SyncQueue
<
bool
>
completion_queue_
;
size_t
async_task_count_
;
const
char
*
host_policy_cstr_
;
const
bool
copy_on_stream_
;
const
bool
coalesce_request_input_
;
};
}}
// namespace triton::backend
3rdparty/backend-r22.12/include/triton/backend/backend_memory.h
0 → 100644
View file @
0a21fff9
// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <string>
#include <vector>
#include "triton/core/tritonbackend.h"
#include "triton/core/tritonserver.h"
namespace
triton
{
namespace
backend
{
// Colletion of common properties that describes a buffer in Triton
struct
MemoryDesc
{
MemoryDesc
()
:
buffer_
(
nullptr
),
byte_size_
(
0
),
memory_type_
(
TRITONSERVER_MEMORY_CPU
),
memory_type_id_
(
0
)
{
}
MemoryDesc
(
const
char
*
buffer
,
size_t
byte_size
,
TRITONSERVER_MemoryType
memory_type
,
int64_t
memory_type_id
)
:
buffer_
(
buffer
),
byte_size_
(
byte_size
),
memory_type_
(
memory_type
),
memory_type_id_
(
memory_type_id
)
{
}
const
char
*
buffer_
;
size_t
byte_size_
;
TRITONSERVER_MemoryType
memory_type_
;
int64_t
memory_type_id_
;
};
//
// BackendMemory
//
// Utility class for allocating and deallocating memory using both
// TRITONBACKEND_MemoryManager and direct GPU and CPU malloc/free.
//
class
BackendMemory
{
public:
enum
class
AllocationType
{
CPU
,
CPU_PINNED
,
GPU
,
CPU_PINNED_POOL
,
GPU_POOL
};
// Allocate a contiguous block of 'alloc_type' memory. 'mem'
// returns the pointer to the allocated memory.
//
// CPU, CPU_PINNED_POOL and GPU_POOL are allocated using
// TRITONBACKEND_MemoryManagerAllocate. Note that CPU_PINNED and GPU
// allocations can be much slower than the POOL variants.
//
// Two error codes have specific interpretations for this function:
//
// TRITONSERVER_ERROR_UNSUPPORTED: Indicates that function is
// incapable of allocating the requested memory type and memory
// type ID. Requests for the memory type and ID will always fail
// no matter 'byte_size' of the request.
//
// TRITONSERVER_ERROR_UNAVAILABLE: Indicates that function can
// allocate the memory type and ID but that currently it cannot
// allocate a contiguous block of memory of the requested
// 'byte_size'.
static
TRITONSERVER_Error
*
Create
(
TRITONBACKEND_MemoryManager
*
manager
,
const
AllocationType
alloc_type
,
const
int64_t
memory_type_id
,
const
size_t
byte_size
,
BackendMemory
**
mem
);
// Allocate a contiguous block of memory by attempting the
// allocation using 'alloc_types' in order until one is successful.
// See BackendMemory::Create() above for details.
static
TRITONSERVER_Error
*
Create
(
TRITONBACKEND_MemoryManager
*
manager
,
const
std
::
vector
<
AllocationType
>&
alloc_types
,
const
int64_t
memory_type_id
,
const
size_t
byte_size
,
BackendMemory
**
mem
);
// Creates a BackendMemory object from a pre-allocated buffer. The buffer
// is not owned by the object created with this function. Hence, for
// proper operation, the lifetime of the buffer should atleast extend till
// the corresponding BackendMemory.
static
TRITONSERVER_Error
*
Create
(
TRITONBACKEND_MemoryManager
*
manager
,
const
AllocationType
alloc_type
,
const
int64_t
memory_type_id
,
void
*
buffer
,
const
size_t
byte_size
,
BackendMemory
**
mem
);
~
BackendMemory
();
AllocationType
AllocType
()
const
{
return
alloctype_
;
}
int64_t
MemoryTypeId
()
const
{
return
memtype_id_
;
}
char
*
MemoryPtr
()
{
return
buffer_
;
}
size_t
ByteSize
()
const
{
return
byte_size_
;
}
TRITONSERVER_MemoryType
MemoryType
()
const
{
return
AllocTypeToMemoryType
(
alloctype_
);
}
static
TRITONSERVER_MemoryType
AllocTypeToMemoryType
(
const
AllocationType
a
);
static
const
char
*
AllocTypeString
(
const
AllocationType
a
);
private:
BackendMemory
(
TRITONBACKEND_MemoryManager
*
manager
,
const
AllocationType
alloctype
,
const
int64_t
memtype_id
,
char
*
buffer
,
const
size_t
byte_size
,
const
bool
owns_buffer
=
true
)
:
manager_
(
manager
),
alloctype_
(
alloctype
),
memtype_id_
(
memtype_id
),
buffer_
(
buffer
),
byte_size_
(
byte_size
),
owns_buffer_
(
owns_buffer
)
{
}
TRITONBACKEND_MemoryManager
*
manager_
;
AllocationType
alloctype_
;
int64_t
memtype_id_
;
char
*
buffer_
;
size_t
byte_size_
;
bool
owns_buffer_
;
};
}}
// namespace triton::backend
3rdparty/backend-r22.12/include/triton/backend/backend_model.h
0 → 100644
View file @
0a21fff9
// Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <map>
#include <set>
#include <string>
#include "triton/backend/backend_common.h"
#include "triton/core/tritonbackend.h"
#include "triton/core/tritonserver.h"
namespace
triton
{
namespace
backend
{
//
// BackendModel
//
// Common functionality for a backend model. This class is provided as
// a convenience; backends are not required to use this class.
//
class
BackendModel
{
public:
BackendModel
(
TRITONBACKEND_Model
*
triton_model
,
const
bool
allow_optional
=
false
);
virtual
~
BackendModel
()
=
default
;
// Get the handle to the TRITONBACKEND server hosting this model.
TRITONSERVER_Server
*
TritonServer
()
{
return
triton_server_
;
}
// Get the handle to the memory manager for this model.
TRITONBACKEND_MemoryManager
*
TritonMemoryManager
()
{
return
triton_memory_manager_
;
}
// Get the handle to the TRITONBACKEND model.
TRITONBACKEND_Model
*
TritonModel
()
{
return
triton_model_
;
}
// Get the name and version of the model.
const
std
::
string
&
Name
()
const
{
return
name_
;
}
uint64_t
Version
()
const
{
return
version_
;
}
const
std
::
string
&
RepositoryPath
()
const
{
return
repository_path_
;
}
// The model configuration.
common
::
TritonJson
::
Value
&
ModelConfig
()
{
return
model_config_
;
}
// Sets the updated model configuration to the core.
TRITONSERVER_Error
*
SetModelConfig
();
// Parses information out of the model configuration.
TRITONSERVER_Error
*
ParseModelConfig
();
// Maximum batch size supported by the model. A value of 0
// indicates that the model does not support batching.
int
MaxBatchSize
()
const
{
return
max_batch_size_
;
}
// Set the max batch size for the model. When a backend
// auto-completes a configuration it may set or change the maximum
// batch size.
void
SetMaxBatchSize
(
const
int
b
)
{
max_batch_size_
=
b
;
}
// Does this model support batching in the first dimension?
TRITONSERVER_Error
*
SupportsFirstDimBatching
(
bool
*
supports
);
// Use indirect pinned memory buffer when copying an input or output
// tensor to/from the model.
bool
EnablePinnedInput
()
const
{
return
enable_pinned_input_
;
}
bool
EnablePinnedOutput
()
const
{
return
enable_pinned_output_
;
}
const
std
::
vector
<
BatchInput
>&
BatchInputs
()
const
{
return
batch_inputs_
;
}
const
std
::
vector
<
BatchOutput
>&
BatchOutputs
()
const
{
return
batch_outputs_
;
}
const
BatchOutput
*
FindBatchOutput
(
const
std
::
string
&
output_name
)
const
;
bool
IsInputRagged
(
const
std
::
string
&
input_name
)
const
{
return
(
ragged_inputs_
.
find
(
input_name
)
!=
ragged_inputs_
.
end
());
}
bool
IsInputOptional
(
const
std
::
string
&
input_name
)
const
{
return
(
optional_inputs_
.
find
(
input_name
)
!=
optional_inputs_
.
end
());
}
protected:
TRITONSERVER_Server
*
triton_server_
;
TRITONBACKEND_MemoryManager
*
triton_memory_manager_
;
TRITONBACKEND_Model
*
triton_model_
;
std
::
string
name_
;
uint64_t
version_
;
std
::
string
repository_path_
;
bool
allow_optional_
;
common
::
TritonJson
::
Value
model_config_
;
int
max_batch_size_
;
bool
enable_pinned_input_
;
bool
enable_pinned_output_
;
std
::
vector
<
BatchInput
>
batch_inputs_
;
std
::
vector
<
BatchOutput
>
batch_outputs_
;
std
::
map
<
std
::
string
,
const
BatchOutput
*>
batch_output_map_
;
std
::
set
<
std
::
string
>
ragged_inputs_
;
std
::
set
<
std
::
string
>
optional_inputs_
;
};
//
// BackendModelException
//
// Exception thrown if error occurs while constructing an
// BackendModel.
//
struct
BackendModelException
{
BackendModelException
(
TRITONSERVER_Error
*
err
)
:
err_
(
err
)
{}
TRITONSERVER_Error
*
err_
;
};
#define THROW_IF_BACKEND_MODEL_ERROR(X) \
do { \
TRITONSERVER_Error* tie_err__ = (X); \
if (tie_err__ != nullptr) { \
throw triton::backend::BackendModelException(tie_err__); \
} \
} while (false)
}}
// namespace triton::backend
Prev
1
2
3
4
5
6
…
8
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment