Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Lmdeploy
Commits
b30f3cdb
Commit
b30f3cdb
authored
Nov 14, 2023
by
xiabo
Browse files
添加下载的代码
parent
e38ee081
Changes
418
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
2686 additions
and
0 deletions
+2686
-0
3rdparty/backend-r22.12/examples/backends/recommended/src/recommended.cc
...d-r22.12/examples/backends/recommended/src/recommended.cc
+750
-0
3rdparty/backend-r22.12/examples/clients/bls_client
3rdparty/backend-r22.12/examples/clients/bls_client
+86
-0
3rdparty/backend-r22.12/examples/clients/minimal_client
3rdparty/backend-r22.12/examples/clients/minimal_client
+92
-0
3rdparty/backend-r22.12/examples/clients/recommended_client
3rdparty/backend-r22.12/examples/clients/recommended_client
+91
-0
3rdparty/backend-r22.12/examples/model_repos/bls_models/addsub_python/1/model.py
.../examples/model_repos/bls_models/addsub_python/1/model.py
+74
-0
3rdparty/backend-r22.12/examples/model_repos/bls_models/addsub_python/config.pbtxt
...xamples/model_repos/bls_models/addsub_python/config.pbtxt
+58
-0
3rdparty/backend-r22.12/examples/model_repos/bls_models/addsub_tf/1/model.savedmodel/saved_model.pb
...os/bls_models/addsub_tf/1/model.savedmodel/saved_model.pb
+0
-0
3rdparty/backend-r22.12/examples/model_repos/bls_models/addsub_tf/config.pbtxt
...12/examples/model_repos/bls_models/addsub_tf/config.pbtxt
+28
-0
3rdparty/backend-r22.12/examples/model_repos/bls_models/bls_fp32/config.pbtxt
....12/examples/model_repos/bls_models/bls_fp32/config.pbtxt
+63
-0
3rdparty/backend-r22.12/examples/model_repos/minimal_models/batching/1/.gitkeep
...2/examples/model_repos/minimal_models/batching/1/.gitkeep
+0
-0
3rdparty/backend-r22.12/examples/model_repos/minimal_models/batching/config.pbtxt
...examples/model_repos/minimal_models/batching/config.pbtxt
+24
-0
3rdparty/backend-r22.12/examples/model_repos/minimal_models/nonbatching/1/.gitkeep
...xamples/model_repos/minimal_models/nonbatching/1/.gitkeep
+0
-0
3rdparty/backend-r22.12/examples/model_repos/minimal_models/nonbatching/config.pbtxt
...mples/model_repos/minimal_models/nonbatching/config.pbtxt
+21
-0
3rdparty/backend-r22.12/examples/model_repos/recommended_models/batching/1/.gitkeep
...amples/model_repos/recommended_models/batching/1/.gitkeep
+0
-0
3rdparty/backend-r22.12/examples/model_repos/recommended_models/batching/config.pbtxt
...ples/model_repos/recommended_models/batching/config.pbtxt
+24
-0
3rdparty/backend-r22.12/include/triton/backend/backend_common.h
...ty/backend-r22.12/include/triton/backend/backend_common.h
+672
-0
3rdparty/backend-r22.12/include/triton/backend/backend_input_collector.h
...d-r22.12/include/triton/backend/backend_input_collector.h
+301
-0
3rdparty/backend-r22.12/include/triton/backend/backend_memory.h
...ty/backend-r22.12/include/triton/backend/backend_memory.h
+138
-0
3rdparty/backend-r22.12/include/triton/backend/backend_model.h
...rty/backend-r22.12/include/triton/backend/backend_model.h
+146
-0
3rdparty/backend-r22.12/include/triton/backend/backend_model_instance.h
...nd-r22.12/include/triton/backend/backend_model_instance.h
+118
-0
No files found.
3rdparty/backend-r22.12/examples/backends/recommended/src/recommended.cc
0 → 100644
View file @
b30f3cdb
This diff is collapsed.
Click to expand it.
3rdparty/backend-r22.12/examples/clients/bls_client
0 → 100644
View file @
b30f3cdb
#!/usr/bin/python
# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import
sys
import
argparse
import
numpy
as
np
import
tritonhttpclient
as
httpclient
from
tritonclientutils
import
np_to_triton_dtype
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'-u'
,
'--url'
,
type
=
str
,
required
=
False
,
default
=
'localhost:8000'
,
help
=
'Inference server URL. Default is localhost:8000.'
)
FLAGS
=
parser
.
parse_args
()
model_name
=
"bls_fp32"
shape
=
[
16
]
with
httpclient
.
InferenceServerClient
(
url
=
FLAGS
.
url
)
as
client
:
input0_data
=
np
.
random
.
rand
(
*
shape
).
astype
(
np
.
float32
)
input1_data
=
np
.
random
.
rand
(
*
shape
).
astype
(
np
.
float32
)
inputs
=
[
httpclient
.
InferInput
(
"INPUT0"
,
input0_data
.
shape
,
np_to_triton_dtype
(
input0_data
.
dtype
)),
httpclient
.
InferInput
(
"INPUT1"
,
input1_data
.
shape
,
np_to_triton_dtype
(
input1_data
.
dtype
)),
]
inputs
[
0
].
set_data_from_numpy
(
input0_data
)
inputs
[
1
].
set_data_from_numpy
(
input1_data
)
outputs
=
[
httpclient
.
InferRequestedOutput
(
"OUTPUT0"
),
httpclient
.
InferRequestedOutput
(
"OUTPUT1"
),
]
response
=
client
.
infer
(
model_name
,
inputs
,
request_id
=
str
(
1
),
outputs
=
outputs
)
result
=
response
.
get_response
()
output0_data
=
response
.
as_numpy
(
"OUTPUT0"
)
output1_data
=
response
.
as_numpy
(
"OUTPUT1"
)
print
(
"INPUT0 ({}) + INPUT1 ({}) = OUTPUT0 ({})"
.
format
(
input0_data
,
input1_data
,
output0_data
))
print
(
"INPUT0 ({}) - INPUT1 ({}) = OUTPUT1 ({})"
.
format
(
input0_data
,
input1_data
,
output1_data
))
if
not
np
.
allclose
(
input0_data
+
input1_data
,
output0_data
):
print
(
"error: incorrect sum"
)
sys
.
exit
(
1
)
if
not
np
.
allclose
(
input0_data
-
input1_data
,
output1_data
):
print
(
"error: incorrect difference"
)
sys
.
exit
(
1
)
print
(
'
\n
PASS'
)
sys
.
exit
(
0
)
3rdparty/backend-r22.12/examples/clients/minimal_client
0 → 100644
View file @
b30f3cdb
#!/usr/bin/env python
# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import
argparse
import
numpy
as
np
import
tritonclient.http
as
httpclient
from
tritonclient.utils
import
InferenceServerException
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'-u'
,
'--url'
,
type
=
str
,
required
=
False
,
default
=
'localhost:8000'
,
help
=
'Inference server URL. Default is localhost:8000.'
)
FLAGS
=
parser
.
parse_args
()
# For the HTTP client, need to specify large enough concurrency to
# issue all the inference requests to the server in parallel. For
# this example we want to be able to send 2 requests concurrently.
try
:
concurrent_request_count
=
2
triton_client
=
httpclient
.
InferenceServerClient
(
url
=
FLAGS
.
url
,
concurrency
=
concurrent_request_count
)
except
Exception
as
e
:
print
(
"channel creation failed: "
+
str
(
e
))
sys
.
exit
(
1
)
# First send a single request to the nonbatching model.
print
(
'========='
)
input0_data
=
np
.
array
([
1
,
2
,
3
,
4
],
dtype
=
np
.
int32
)
print
(
'Sending request to nonbatching model: IN0 = {}'
.
format
(
input0_data
))
inputs
=
[
httpclient
.
InferInput
(
'IN0'
,
[
4
],
"INT32"
)
]
inputs
[
0
].
set_data_from_numpy
(
input0_data
)
result
=
triton_client
.
infer
(
'nonbatching'
,
inputs
)
print
(
'Response: {}'
.
format
(
result
.
get_response
()))
print
(
'OUT0 = {}'
.
format
(
result
.
as_numpy
(
'OUT0'
)))
# Send 2 requests to the batching model. Because these are sent
# asynchronously and Triton's dynamic batcher is configured to
# delay up to 5 seconds when forming a batch for this model, we
# expect these 2 requests to be batched within Triton and sent to
# the minimal backend as a single batch.
print
(
'
\n
========='
)
async_requests
=
[]
input0_data
=
np
.
array
([[
10
,
11
,
12
,
13
]],
dtype
=
np
.
int32
)
print
(
'Sending request to batching model: IN0 = {}'
.
format
(
input0_data
))
inputs
=
[
httpclient
.
InferInput
(
'IN0'
,
[
1
,
4
],
"INT32"
)
]
inputs
[
0
].
set_data_from_numpy
(
input0_data
)
async_requests
.
append
(
triton_client
.
async_infer
(
'batching'
,
inputs
))
input0_data
=
np
.
array
([[
20
,
21
,
22
,
23
]],
dtype
=
np
.
int32
)
print
(
'Sending request to batching model: IN0 = {}'
.
format
(
input0_data
))
inputs
=
[
httpclient
.
InferInput
(
'IN0'
,
[
1
,
4
],
"INT32"
)
]
inputs
[
0
].
set_data_from_numpy
(
input0_data
)
async_requests
.
append
(
triton_client
.
async_infer
(
'batching'
,
inputs
))
for
async_request
in
async_requests
:
# Get the result from the initiated asynchronous inference
# request. This call will block till the server responds.
result
=
async_request
.
get_result
()
print
(
'Response: {}'
.
format
(
result
.
get_response
()))
print
(
'OUT0 = {}'
.
format
(
result
.
as_numpy
(
'OUT0'
)))
3rdparty/backend-r22.12/examples/clients/recommended_client
0 → 100644
View file @
b30f3cdb
#!/usr/bin/env python
# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import
argparse
import
numpy
as
np
import
tritonclient.http
as
httpclient
from
tritonclient.utils
import
InferenceServerException
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'-u'
,
'--url'
,
type
=
str
,
required
=
False
,
default
=
'localhost:8000'
,
help
=
'Inference server URL. Default is localhost:8000.'
)
FLAGS
=
parser
.
parse_args
()
# For the HTTP client, need to specify large enough concurrency to
# issue all the inference requests to the server in parallel. For
# this example we want to be able to send 2 requests concurrently.
try
:
concurrent_request_count
=
2
triton_client
=
httpclient
.
InferenceServerClient
(
url
=
FLAGS
.
url
,
concurrency
=
concurrent_request_count
)
except
Exception
as
e
:
print
(
"channel creation failed: "
+
str
(
e
))
sys
.
exit
(
1
)
# Send 2 requests to the batching model. Because these are sent
# asynchronously and Triton's dynamic batcher is configured to
# delay up to 5 seconds when forming a batch for this model, we
# expect these 2 requests to be batched within Triton and sent to
# the backend as a single batch.
#
# The recommended backend can handle any model with 1 input and 1
# output as long as the input and output datatype and shape are
# the same. The batching model uses datatype FP32 and shape
# [ 4, 4 ].
print
(
'
\n
========='
)
async_requests
=
[]
input0_data
=
np
.
array
([[[
1.0
,
1.1
,
1.2
,
1.3
],
[
2.0
,
2.1
,
2.2
,
2.3
],
[
3.0
,
3.1
,
3.2
,
3.3
],
[
4.0
,
4.1
,
4.2
,
4.3
]]],
dtype
=
np
.
float32
)
print
(
'Sending request to batching model: input = {}'
.
format
(
input0_data
))
inputs
=
[
httpclient
.
InferInput
(
'INPUT'
,
[
1
,
4
,
4
],
"FP32"
)
]
inputs
[
0
].
set_data_from_numpy
(
input0_data
)
async_requests
.
append
(
triton_client
.
async_infer
(
'batching'
,
inputs
))
input0_data
=
np
.
array
([[[
10.0
,
10.1
,
10.2
,
10.3
],
[
20.0
,
20.1
,
20.2
,
20.3
],
[
30.0
,
30.1
,
30.2
,
30.3
],
[
40.0
,
40.1
,
40.2
,
40.3
]]],
dtype
=
np
.
float32
)
print
(
'Sending request to batching model: input = {}'
.
format
(
input0_data
))
inputs
=
[
httpclient
.
InferInput
(
'INPUT'
,
[
1
,
4
,
4
],
"FP32"
)
]
inputs
[
0
].
set_data_from_numpy
(
input0_data
)
async_requests
.
append
(
triton_client
.
async_infer
(
'batching'
,
inputs
))
for
async_request
in
async_requests
:
# Get the result from the initiated asynchronous inference
# request. This call will block till the server responds.
result
=
async_request
.
get_result
()
print
(
'Response: {}'
.
format
(
result
.
get_response
()))
print
(
'OUTPUT = {}'
.
format
(
result
.
as_numpy
(
'OUTPUT'
)))
3rdparty/backend-r22.12/examples/model_repos/bls_models/addsub_python/1/model.py
0 → 100644
View file @
b30f3cdb
# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import
json
import
triton_python_backend_utils
as
pb_utils
# This model calculates the sum and difference of the INPUT0 and INPUT1 and put
# the results in OUTPUT0 and OUTPUT1 respectively. For more information
# regarding how this model.py was written, please refer to Python Backend.
class
TritonPythonModel
:
def
initialize
(
self
,
args
):
self
.
model_config
=
model_config
=
json
.
loads
(
args
[
'model_config'
])
output0_config
=
pb_utils
.
get_output_config_by_name
(
model_config
,
"OUTPUT0"
)
output1_config
=
pb_utils
.
get_output_config_by_name
(
model_config
,
"OUTPUT1"
)
self
.
output0_dtype
=
pb_utils
.
triton_string_to_numpy
(
output0_config
[
'data_type'
])
self
.
output1_dtype
=
pb_utils
.
triton_string_to_numpy
(
output1_config
[
'data_type'
])
def
execute
(
self
,
requests
):
output0_dtype
=
self
.
output0_dtype
output1_dtype
=
self
.
output1_dtype
responses
=
[]
for
request
in
requests
:
in_0
=
pb_utils
.
get_input_tensor_by_name
(
request
,
"INPUT0"
)
in_1
=
pb_utils
.
get_input_tensor_by_name
(
request
,
"INPUT1"
)
out_0
,
out_1
=
(
in_0
.
as_numpy
()
+
in_1
.
as_numpy
(),
in_0
.
as_numpy
()
-
in_1
.
as_numpy
())
out_tensor_0
=
pb_utils
.
Tensor
(
"OUTPUT0"
,
out_0
.
astype
(
output0_dtype
))
out_tensor_1
=
pb_utils
.
Tensor
(
"OUTPUT1"
,
out_1
.
astype
(
output1_dtype
))
inference_response
=
pb_utils
.
InferenceResponse
(
output_tensors
=
[
out_tensor_0
,
out_tensor_1
])
responses
.
append
(
inference_response
)
return
responses
def
finalize
(
self
):
print
(
'Cleaning up...'
)
3rdparty/backend-r22.12/examples/model_repos/bls_models/addsub_python/config.pbtxt
0 → 100644
View file @
b30f3cdb
# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
name: "addsub_python"
backend: "python"
max_batch_size: 0
input [
{
name: "INPUT0"
data_type: TYPE_FP32
dims: [ 16 ]
}
]
input [
{
name: "INPUT1"
data_type: TYPE_FP32
dims: [ 16 ]
}
]
output [
{
name: "OUTPUT0"
data_type: TYPE_FP32
dims: [ 16 ]
}
]
output [
{
name: "OUTPUT1"
data_type: TYPE_FP32
dims: [ 16 ]
}
]
3rdparty/backend-r22.12/examples/model_repos/bls_models/addsub_tf/1/model.savedmodel/saved_model.pb
0 → 100644
View file @
b30f3cdb
File added
3rdparty/backend-r22.12/examples/model_repos/bls_models/addsub_tf/config.pbtxt
0 → 100644
View file @
b30f3cdb
name: "addsub_tf"
platform: "tensorflow_savedmodel"
max_batch_size: 0
input [
{
name: "INPUT0"
data_type: TYPE_FP32
dims: [ 16 ]
},
{
name: "INPUT1"
data_type: TYPE_FP32
dims: [ 16 ]
}
]
output [
{
name: "OUTPUT0"
data_type: TYPE_FP32
dims: [ 16 ]
},
{
name: "OUTPUT1"
data_type: TYPE_FP32
dims: [ 16 ]
}
]
3rdparty/backend-r22.12/examples/model_repos/bls_models/bls_fp32/config.pbtxt
0 → 100644
View file @
b30f3cdb
# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
name: "bls_fp32"
backend: "bls"
max_batch_size: 0
input [
{
name: "INPUT0"
data_type: TYPE_FP32
dims: [ 16 ]
}
]
input [
{
name: "INPUT1"
data_type: TYPE_FP32
dims: [ 16 ]
}
]
output [
{
name: "OUTPUT0"
data_type: TYPE_FP32
dims: [ 16 ]
}
]
output [
{
name: "OUTPUT1"
data_type: TYPE_FP32
dims: [ 16 ]
}
]
instance_group [
{
kind: KIND_CPU
}
]
3rdparty/backend-r22.12/examples/model_repos/minimal_models/batching/1/.gitkeep
0 → 100644
View file @
b30f3cdb
3rdparty/backend-r22.12/examples/model_repos/minimal_models/batching/config.pbtxt
0 → 100644
View file @
b30f3cdb
backend: "minimal"
max_batch_size: 8
dynamic_batching {
max_queue_delay_microseconds: 5000000
}
input [
{
name: "IN0"
data_type: TYPE_INT32
dims: [ 4 ]
}
]
output [
{
name: "OUT0"
data_type: TYPE_INT32
dims: [ 4 ]
}
]
instance_group [
{
kind: KIND_CPU
}
]
3rdparty/backend-r22.12/examples/model_repos/minimal_models/nonbatching/1/.gitkeep
0 → 100644
View file @
b30f3cdb
3rdparty/backend-r22.12/examples/model_repos/minimal_models/nonbatching/config.pbtxt
0 → 100644
View file @
b30f3cdb
backend: "minimal"
max_batch_size: 0
input [
{
name: "IN0"
data_type: TYPE_INT32
dims: [ 4 ]
}
]
output [
{
name: "OUT0"
data_type: TYPE_INT32
dims: [ 4 ]
}
]
instance_group [
{
kind: KIND_CPU
}
]
3rdparty/backend-r22.12/examples/model_repos/recommended_models/batching/1/.gitkeep
0 → 100644
View file @
b30f3cdb
3rdparty/backend-r22.12/examples/model_repos/recommended_models/batching/config.pbtxt
0 → 100644
View file @
b30f3cdb
backend: "recommended"
max_batch_size: 8
dynamic_batching {
max_queue_delay_microseconds: 5000000
}
input [
{
name: "INPUT"
data_type: TYPE_FP32
dims: [ 4, 4 ]
}
]
output [
{
name: "OUTPUT"
data_type: TYPE_FP32
dims: [ 4, 4 ]
}
]
instance_group [
{
kind: KIND_CPU
}
]
3rdparty/backend-r22.12/include/triton/backend/backend_common.h
0 → 100644
View file @
b30f3cdb
This diff is collapsed.
Click to expand it.
3rdparty/backend-r22.12/include/triton/backend/backend_input_collector.h
0 → 100644
View file @
b30f3cdb
// Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <list>
#include <memory>
#include <string>
#include <vector>
#include "triton/backend/backend_common.h"
#include "triton/backend/backend_memory.h"
#include "triton/common/async_work_queue.h"
#include "triton/common/sync_queue.h"
#include "triton/core/tritonbackend.h"
#ifdef TRITON_ENABLE_GPU
#include <cuda_runtime_api.h>
#endif // TRITON_ENABLE_GPU
namespace
triton
{
namespace
backend
{
#ifndef TRITON_ENABLE_GPU
using
cudaStream_t
=
void
*
;
using
cudaEvent_t
=
void
*
;
#endif // !TRITON_ENABLE_GPU
//
// BackendInputCollector
//
class
BackendInputCollector
{
public:
// The caller can optionally provide 'event' for internal synchronization
// instead of using 'stream'. If 'host_policy_name' is provided, it must be
// valid for the lifetime of the collector
explicit
BackendInputCollector
(
TRITONBACKEND_Request
**
requests
,
const
uint32_t
request_count
,
std
::
vector
<
TRITONBACKEND_Response
*>*
responses
,
TRITONBACKEND_MemoryManager
*
memory_manager
,
const
bool
pinned_enabled
,
cudaStream_t
stream
,
cudaEvent_t
event
=
nullptr
,
cudaEvent_t
buffer_ready_event
=
nullptr
,
const
size_t
kernel_buffer_threshold
=
0
,
const
char
*
host_policy_name
=
nullptr
,
const
bool
copy_on_stream
=
false
,
const
bool
coalesce_request_input
=
false
)
:
need_sync_
(
false
),
requests_
(
requests
),
request_count_
(
request_count
),
responses_
(
responses
),
memory_manager_
(
memory_manager
),
pinned_enabled_
(
pinned_enabled
),
use_async_cpu_copy_
(
triton
::
common
::
AsyncWorkQueue
::
WorkerCount
()
>
1
),
stream_
(
stream
),
event_
(
event
),
buffer_ready_event_
(
buffer_ready_event
),
kernel_buffer_threshold_
(
kernel_buffer_threshold
),
pending_pinned_byte_size_
(
0
),
pending_pinned_offset_
(
0
),
pending_copy_kernel_buffer_byte_size_
(
0
),
pending_copy_kernel_buffer_offset_
(
0
),
pending_copy_kernel_input_buffer_counts_
(
0
),
async_task_count_
(
0
),
host_policy_cstr_
(
host_policy_name
),
copy_on_stream_
(
copy_on_stream
),
coalesce_request_input_
(
coalesce_request_input
)
{
}
~
BackendInputCollector
()
=
default
;
// Process all requests for a named input tensor and return the
// concatenated values of those requests in a single contiguous
// buffer. This overload of the function can avoid data copy if the
// tensor values are already contiguous and the caller doesn't
// provide a destination 'buffer'.
//
// 'buffer' is used to determine whether the input should be placed at the
// 'buffer' provided by the caller. If 'buffer' == nullptr, the returned
// buffer will be managed by the BackendInputCollector object and
// has the same lifecycle as the BackendInputCollector object.
// 'buffer_byte_size' is the byte size of 'buffer' if it is not nullptr.
// 'allowed_input_types' is the ordered list of the memory type and id pairs
// that the returned buffer can be. It must only contain the memory type
// and id of 'buffer' if 'buffer' is not nullptr.
// 'dst_buffer' returns the contiguous buffer of the input tensor.
// 'dst_buffer_byte_size' the byte size of 'dst_buffer'.
// 'dst_memory_type' returns the memory type of 'dst_buffer'.
// 'dst_memory_type_id' returns the memory type id of 'dst_buffer'.
TRITONSERVER_Error
*
ProcessTensor
(
const
char
*
input_name
,
char
*
buffer
,
const
size_t
buffer_byte_size
,
const
std
::
vector
<
std
::
pair
<
TRITONSERVER_MemoryType
,
int64_t
>>&
allowed_input_types
,
const
char
**
dst_buffer
,
size_t
*
dst_buffer_byte_size
,
TRITONSERVER_MemoryType
*
dst_memory_type
,
int64_t
*
dst_memory_type_id
);
// Process all requests for a named input tensor and return the
// concatenated values of those requests in a single contiguous
// 'buffer'.
//
// 'buffer' The buffer to hold the concatenates tensor value. Must
// be large enough to hold all tensor value.
// 'buffer_byte_size' is the byte size of 'buffer'.
// 'dst_memory_type' The memory type of 'buffer'.
// 'dst_memory_type_id' The memory type id of 'buffer'.
void
ProcessTensor
(
const
char
*
input_name
,
char
*
buffer
,
const
size_t
buffer_byte_size
,
const
TRITONSERVER_MemoryType
memory_type
,
const
int64_t
memory_type_id
);
// Process the batch input and return its shape. Returning error indicates
// that the batch input can't be formed properly and the caller should abort
// the whole batch.
TRITONSERVER_Error
*
BatchInputShape
(
const
BatchInput
&
batch_input
,
std
::
vector
<
int64_t
>*
shape
);
// Process the batch input and derive its value into 'buffer'. Returning
// error indicates that the batch input can't be formed properly and
// the caller should abort the whole batch.
// 'buffer' is used to determine whether the input should be placed at the
// 'buffer' provided by the caller. If 'buffer' == nullptr, the returned
// buffer will be managed by the BackendInputCollector object and
// has the same lifecycle as the BackendInputCollector object.
// 'buffer_byte_size' is the byte size of 'buffer' if it is not nullptr.
// 'allowed_input_types' is the ordered list of the memory type and id pairs
// that the returned buffer can be. It must only contain the memory type
// and id of 'buffer' if it is not nullptr.
// 'dst_buffer' returns the contiguous buffer of the input tensor.
// 'dst_memory_type' returns the memory type of 'dst_buffer'.
// 'dst_memory_type_id' returns the memory type id of 'dst_buffer'.
TRITONSERVER_Error
*
ProcessBatchInput
(
const
BatchInput
&
batch_input
,
char
*
buffer
,
const
size_t
buffer_byte_size
,
const
std
::
vector
<
std
::
pair
<
TRITONSERVER_MemoryType
,
int64_t
>>&
allowed_input_types
,
const
char
**
dst_buffer
,
size_t
*
dst_buffer_byte_size
,
TRITONSERVER_MemoryType
*
dst_memory_type
,
int64_t
*
dst_memory_type_id
);
// Finalize processing of all requests for all input tensors. Return
// true if cudaMemcpyAsync is called, and the caller should call
// cudaStreamSynchronize (or cudaEventSynchronize on 'event') before
// using the data.
bool
Finalize
();
private:
struct
ContiguousBuffer
{
ContiguousBuffer
()
:
start_request_idx_
(
0
),
end_request_idx_
(
0
)
{}
MemoryDesc
memory_desc_
;
size_t
start_request_idx_
;
size_t
end_request_idx_
;
};
class
InputIterator
{
public:
InputIterator
(
TRITONBACKEND_Request
**
requests
,
const
uint32_t
request_count
,
std
::
vector
<
TRITONBACKEND_Response
*>*
responses
,
const
char
*
input_name
,
const
char
*
host_policy_name
,
const
bool
coalesce_request_input
);
// Return false if iterator reaches the end of inputs, 'input' is not set.
bool
GetNextContiguousInput
(
ContiguousBuffer
*
input
);
private:
TRITONBACKEND_Request
**
requests_
;
const
uint32_t
request_count_
;
std
::
vector
<
TRITONBACKEND_Response
*>*
responses_
;
const
char
*
input_name_
;
const
char
*
host_policy_
;
const
bool
coalesce_request_input_
;
TRITONBACKEND_Input
*
curr_input_
;
size_t
curr_request_idx_
;
size_t
curr_buffer_idx_
;
uint32_t
curr_buffer_cnt_
;
bool
reach_end_
;
};
// Return whether the entire input is in a contiguous buffer. If returns true,
// the properties of the contiguous input buffer will also be returned.
// Otherwise, only 'buffer_byte_size' will be set and return the total byte
// size of the input.
bool
GetInputBufferIfContiguous
(
const
char
*
input_name
,
const
char
**
buffer
,
size_t
*
buffer_byte_size
,
TRITONSERVER_MemoryType
*
memory_type
,
int64_t
*
memory_type_id
);
bool
FlushPendingPinned
(
char
*
tensor_buffer
,
const
size_t
tensor_buffer_byte_size
,
const
TRITONSERVER_MemoryType
tensor_memory_type
,
const
int64_t
tensor_memory_type_id
);
bool
FlushPendingCopyKernel
(
char
*
tensor_buffer
,
const
size_t
tensor_buffer_byte_size
,
const
TRITONSERVER_MemoryType
tensor_memory_type
,
const
int64_t
tensor_memory_type_id
);
TRITONSERVER_Error
*
LaunchCopyKernel
(
char
*
tensor_buffer
,
const
size_t
tensor_buffer_byte_size
,
const
TRITONSERVER_MemoryType
tensor_memory_type
,
const
int64_t
tensor_memory_type_id
);
bool
SetInputTensor
(
const
char
*
input_name
,
const
ContiguousBuffer
&
input
,
char
*
tensor_buffer
,
const
size_t
tensor_buffer_byte_size
,
const
TRITONSERVER_MemoryType
tensor_memory_type
,
const
int64_t
tensor_memory_type_id
,
const
size_t
tensor_buffer_offset
,
const
TRITONSERVER_MemoryType
use_pinned_memory_type
,
const
bool
use_kernel
,
const
bool
wait_buffer
);
template
<
typename
T
>
TRITONSERVER_Error
*
SetElementCount
(
const
std
::
string
&
source_input
,
char
*
buffer
,
const
size_t
buffer_byte_size
);
template
<
typename
T
>
TRITONSERVER_Error
*
SetAccumulatedElementCount
(
const
std
::
string
&
source_input
,
char
*
buffer
,
const
size_t
buffer_byte_size
);
template
<
typename
T
>
TRITONSERVER_Error
*
SetBatchItemShape
(
const
std
::
string
&
source_input
,
char
*
buffer
,
const
size_t
buffer_byte_size
);
bool
need_sync_
;
TRITONBACKEND_Request
**
requests_
;
const
uint32_t
request_count_
;
std
::
vector
<
TRITONBACKEND_Response
*>*
responses_
;
TRITONBACKEND_MemoryManager
*
memory_manager_
;
const
bool
pinned_enabled_
;
const
bool
use_async_cpu_copy_
;
cudaStream_t
stream_
;
cudaEvent_t
event_
;
cudaEvent_t
buffer_ready_event_
;
const
size_t
kernel_buffer_threshold_
;
size_t
pending_pinned_byte_size_
;
size_t
pending_pinned_offset_
;
std
::
list
<
ContiguousBuffer
>
pending_pinned_input_buffers_
;
// managed memories that need to live over the lifetime of this
// BackendInputCollector object.
std
::
list
<
std
::
unique_ptr
<
BackendMemory
>>
in_use_memories_
;
size_t
pending_copy_kernel_buffer_byte_size_
;
size_t
pending_copy_kernel_buffer_offset_
;
size_t
pending_copy_kernel_input_buffer_counts_
;
std
::
list
<
ContiguousBuffer
>
pending_copy_kernel_input_buffers_
;
std
::
vector
<
std
::
unique_ptr
<
std
::
vector
<
int8_t
*>>>
input_ptr_buffer_host_
;
std
::
vector
<
std
::
unique_ptr
<
std
::
vector
<
size_t
>>>
byte_size_buffer_host_
;
std
::
vector
<
std
::
unique_ptr
<
std
::
vector
<
size_t
>>>
byte_size_offset_buffer_host_
;
// Pinned memory buffers and the corresponding request_inputs where
// the final copy to the tensor is deferred until Finalize() after
// waiting for all in-flight copies.
struct
DeferredPinned
{
DeferredPinned
(
char
*
pinned_memory
,
const
size_t
pinned_memory_size
,
char
*
tensor_buffer
,
const
size_t
tensor_buffer_offset
,
const
TRITONSERVER_MemoryType
tensor_memory_type
,
const
int64_t
tensor_memory_id
,
std
::
list
<
ContiguousBuffer
>&&
request_buffers
,
std
::
vector
<
TRITONBACKEND_Response
*>*
responses
)
:
finalized_
(
false
),
pinned_memory_
(
pinned_memory
),
pinned_memory_size_
(
pinned_memory_size
),
tensor_buffer_
(
tensor_buffer
),
tensor_buffer_offset_
(
tensor_buffer_offset
),
tensor_memory_type_
(
tensor_memory_type
),
tensor_memory_id_
(
tensor_memory_id
),
requests_
(
std
::
move
(
request_buffers
)),
responses_
(
responses
)
{
}
bool
Finalize
(
cudaStream_t
stream
);
bool
finalized_
;
// Holding reference to the pinned memory buffer, which is managed
// by BackendInputCollector as 'pinned_memory'
char
*
pinned_memory_
;
const
size_t
pinned_memory_size_
;
char
*
tensor_buffer_
;
const
size_t
tensor_buffer_offset_
;
const
TRITONSERVER_MemoryType
tensor_memory_type_
;
const
int64_t
tensor_memory_id_
;
std
::
list
<
ContiguousBuffer
>
requests_
;
std
::
vector
<
TRITONBACKEND_Response
*>*
responses_
;
};
std
::
list
<
DeferredPinned
>
deferred_pinned_
;
// FIXME use future to maintain an issue-order queue to drop task count
triton
::
common
::
SyncQueue
<
bool
>
completion_queue_
;
size_t
async_task_count_
;
const
char
*
host_policy_cstr_
;
const
bool
copy_on_stream_
;
const
bool
coalesce_request_input_
;
};
}}
// namespace triton::backend
3rdparty/backend-r22.12/include/triton/backend/backend_memory.h
0 → 100644
View file @
b30f3cdb
// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <string>
#include <vector>
#include "triton/core/tritonbackend.h"
#include "triton/core/tritonserver.h"
namespace
triton
{
namespace
backend
{
// Colletion of common properties that describes a buffer in Triton
struct
MemoryDesc
{
MemoryDesc
()
:
buffer_
(
nullptr
),
byte_size_
(
0
),
memory_type_
(
TRITONSERVER_MEMORY_CPU
),
memory_type_id_
(
0
)
{
}
MemoryDesc
(
const
char
*
buffer
,
size_t
byte_size
,
TRITONSERVER_MemoryType
memory_type
,
int64_t
memory_type_id
)
:
buffer_
(
buffer
),
byte_size_
(
byte_size
),
memory_type_
(
memory_type
),
memory_type_id_
(
memory_type_id
)
{
}
const
char
*
buffer_
;
size_t
byte_size_
;
TRITONSERVER_MemoryType
memory_type_
;
int64_t
memory_type_id_
;
};
//
// BackendMemory
//
// Utility class for allocating and deallocating memory using both
// TRITONBACKEND_MemoryManager and direct GPU and CPU malloc/free.
//
class
BackendMemory
{
public:
enum
class
AllocationType
{
CPU
,
CPU_PINNED
,
GPU
,
CPU_PINNED_POOL
,
GPU_POOL
};
// Allocate a contiguous block of 'alloc_type' memory. 'mem'
// returns the pointer to the allocated memory.
//
// CPU, CPU_PINNED_POOL and GPU_POOL are allocated using
// TRITONBACKEND_MemoryManagerAllocate. Note that CPU_PINNED and GPU
// allocations can be much slower than the POOL variants.
//
// Two error codes have specific interpretations for this function:
//
// TRITONSERVER_ERROR_UNSUPPORTED: Indicates that function is
// incapable of allocating the requested memory type and memory
// type ID. Requests for the memory type and ID will always fail
// no matter 'byte_size' of the request.
//
// TRITONSERVER_ERROR_UNAVAILABLE: Indicates that function can
// allocate the memory type and ID but that currently it cannot
// allocate a contiguous block of memory of the requested
// 'byte_size'.
static
TRITONSERVER_Error
*
Create
(
TRITONBACKEND_MemoryManager
*
manager
,
const
AllocationType
alloc_type
,
const
int64_t
memory_type_id
,
const
size_t
byte_size
,
BackendMemory
**
mem
);
// Allocate a contiguous block of memory by attempting the
// allocation using 'alloc_types' in order until one is successful.
// See BackendMemory::Create() above for details.
static
TRITONSERVER_Error
*
Create
(
TRITONBACKEND_MemoryManager
*
manager
,
const
std
::
vector
<
AllocationType
>&
alloc_types
,
const
int64_t
memory_type_id
,
const
size_t
byte_size
,
BackendMemory
**
mem
);
// Creates a BackendMemory object from a pre-allocated buffer. The buffer
// is not owned by the object created with this function. Hence, for
// proper operation, the lifetime of the buffer should atleast extend till
// the corresponding BackendMemory.
static
TRITONSERVER_Error
*
Create
(
TRITONBACKEND_MemoryManager
*
manager
,
const
AllocationType
alloc_type
,
const
int64_t
memory_type_id
,
void
*
buffer
,
const
size_t
byte_size
,
BackendMemory
**
mem
);
~
BackendMemory
();
AllocationType
AllocType
()
const
{
return
alloctype_
;
}
int64_t
MemoryTypeId
()
const
{
return
memtype_id_
;
}
char
*
MemoryPtr
()
{
return
buffer_
;
}
size_t
ByteSize
()
const
{
return
byte_size_
;
}
TRITONSERVER_MemoryType
MemoryType
()
const
{
return
AllocTypeToMemoryType
(
alloctype_
);
}
static
TRITONSERVER_MemoryType
AllocTypeToMemoryType
(
const
AllocationType
a
);
static
const
char
*
AllocTypeString
(
const
AllocationType
a
);
private:
BackendMemory
(
TRITONBACKEND_MemoryManager
*
manager
,
const
AllocationType
alloctype
,
const
int64_t
memtype_id
,
char
*
buffer
,
const
size_t
byte_size
,
const
bool
owns_buffer
=
true
)
:
manager_
(
manager
),
alloctype_
(
alloctype
),
memtype_id_
(
memtype_id
),
buffer_
(
buffer
),
byte_size_
(
byte_size
),
owns_buffer_
(
owns_buffer
)
{
}
TRITONBACKEND_MemoryManager
*
manager_
;
AllocationType
alloctype_
;
int64_t
memtype_id_
;
char
*
buffer_
;
size_t
byte_size_
;
bool
owns_buffer_
;
};
}}
// namespace triton::backend
3rdparty/backend-r22.12/include/triton/backend/backend_model.h
0 → 100644
View file @
b30f3cdb
This diff is collapsed.
Click to expand it.
3rdparty/backend-r22.12/include/triton/backend/backend_model_instance.h
0 → 100644
View file @
b30f3cdb
This diff is collapsed.
Click to expand it.
Prev
1
2
3
4
5
6
…
21
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment