Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
Qwen_lmdeploy
Commits
b30f3cdb
Commit
b30f3cdb
authored
Nov 14, 2023
by
xiabo
Browse files
添加下载的代码
parent
e38ee081
Changes
418
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
2686 additions
and
0 deletions
+2686
-0
3rdparty/backend-r22.12/examples/backends/recommended/src/recommended.cc
...d-r22.12/examples/backends/recommended/src/recommended.cc
+750
-0
3rdparty/backend-r22.12/examples/clients/bls_client
3rdparty/backend-r22.12/examples/clients/bls_client
+86
-0
3rdparty/backend-r22.12/examples/clients/minimal_client
3rdparty/backend-r22.12/examples/clients/minimal_client
+92
-0
3rdparty/backend-r22.12/examples/clients/recommended_client
3rdparty/backend-r22.12/examples/clients/recommended_client
+91
-0
3rdparty/backend-r22.12/examples/model_repos/bls_models/addsub_python/1/model.py
.../examples/model_repos/bls_models/addsub_python/1/model.py
+74
-0
3rdparty/backend-r22.12/examples/model_repos/bls_models/addsub_python/config.pbtxt
...xamples/model_repos/bls_models/addsub_python/config.pbtxt
+58
-0
3rdparty/backend-r22.12/examples/model_repos/bls_models/addsub_tf/1/model.savedmodel/saved_model.pb
...os/bls_models/addsub_tf/1/model.savedmodel/saved_model.pb
+0
-0
3rdparty/backend-r22.12/examples/model_repos/bls_models/addsub_tf/config.pbtxt
...12/examples/model_repos/bls_models/addsub_tf/config.pbtxt
+28
-0
3rdparty/backend-r22.12/examples/model_repos/bls_models/bls_fp32/config.pbtxt
....12/examples/model_repos/bls_models/bls_fp32/config.pbtxt
+63
-0
3rdparty/backend-r22.12/examples/model_repos/minimal_models/batching/1/.gitkeep
...2/examples/model_repos/minimal_models/batching/1/.gitkeep
+0
-0
3rdparty/backend-r22.12/examples/model_repos/minimal_models/batching/config.pbtxt
...examples/model_repos/minimal_models/batching/config.pbtxt
+24
-0
3rdparty/backend-r22.12/examples/model_repos/minimal_models/nonbatching/1/.gitkeep
...xamples/model_repos/minimal_models/nonbatching/1/.gitkeep
+0
-0
3rdparty/backend-r22.12/examples/model_repos/minimal_models/nonbatching/config.pbtxt
...mples/model_repos/minimal_models/nonbatching/config.pbtxt
+21
-0
3rdparty/backend-r22.12/examples/model_repos/recommended_models/batching/1/.gitkeep
...amples/model_repos/recommended_models/batching/1/.gitkeep
+0
-0
3rdparty/backend-r22.12/examples/model_repos/recommended_models/batching/config.pbtxt
...ples/model_repos/recommended_models/batching/config.pbtxt
+24
-0
3rdparty/backend-r22.12/include/triton/backend/backend_common.h
...ty/backend-r22.12/include/triton/backend/backend_common.h
+672
-0
3rdparty/backend-r22.12/include/triton/backend/backend_input_collector.h
...d-r22.12/include/triton/backend/backend_input_collector.h
+301
-0
3rdparty/backend-r22.12/include/triton/backend/backend_memory.h
...ty/backend-r22.12/include/triton/backend/backend_memory.h
+138
-0
3rdparty/backend-r22.12/include/triton/backend/backend_model.h
...rty/backend-r22.12/include/triton/backend/backend_model.h
+146
-0
3rdparty/backend-r22.12/include/triton/backend/backend_model_instance.h
...nd-r22.12/include/triton/backend/backend_model_instance.h
+118
-0
No files found.
3rdparty/backend-r22.12/examples/backends/recommended/src/recommended.cc
0 → 100644
View file @
b30f3cdb
This diff is collapsed.
Click to expand it.
3rdparty/backend-r22.12/examples/clients/bls_client
0 → 100644
View file @
b30f3cdb
#!/usr/bin/python
# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import
sys
import
argparse
import
numpy
as
np
import
tritonhttpclient
as
httpclient
from
tritonclientutils
import
np_to_triton_dtype
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'-u'
,
'--url'
,
type
=
str
,
required
=
False
,
default
=
'localhost:8000'
,
help
=
'Inference server URL. Default is localhost:8000.'
)
FLAGS
=
parser
.
parse_args
()
model_name
=
"bls_fp32"
shape
=
[
16
]
with
httpclient
.
InferenceServerClient
(
url
=
FLAGS
.
url
)
as
client
:
input0_data
=
np
.
random
.
rand
(
*
shape
).
astype
(
np
.
float32
)
input1_data
=
np
.
random
.
rand
(
*
shape
).
astype
(
np
.
float32
)
inputs
=
[
httpclient
.
InferInput
(
"INPUT0"
,
input0_data
.
shape
,
np_to_triton_dtype
(
input0_data
.
dtype
)),
httpclient
.
InferInput
(
"INPUT1"
,
input1_data
.
shape
,
np_to_triton_dtype
(
input1_data
.
dtype
)),
]
inputs
[
0
].
set_data_from_numpy
(
input0_data
)
inputs
[
1
].
set_data_from_numpy
(
input1_data
)
outputs
=
[
httpclient
.
InferRequestedOutput
(
"OUTPUT0"
),
httpclient
.
InferRequestedOutput
(
"OUTPUT1"
),
]
response
=
client
.
infer
(
model_name
,
inputs
,
request_id
=
str
(
1
),
outputs
=
outputs
)
result
=
response
.
get_response
()
output0_data
=
response
.
as_numpy
(
"OUTPUT0"
)
output1_data
=
response
.
as_numpy
(
"OUTPUT1"
)
print
(
"INPUT0 ({}) + INPUT1 ({}) = OUTPUT0 ({})"
.
format
(
input0_data
,
input1_data
,
output0_data
))
print
(
"INPUT0 ({}) - INPUT1 ({}) = OUTPUT1 ({})"
.
format
(
input0_data
,
input1_data
,
output1_data
))
if
not
np
.
allclose
(
input0_data
+
input1_data
,
output0_data
):
print
(
"error: incorrect sum"
)
sys
.
exit
(
1
)
if
not
np
.
allclose
(
input0_data
-
input1_data
,
output1_data
):
print
(
"error: incorrect difference"
)
sys
.
exit
(
1
)
print
(
'
\n
PASS'
)
sys
.
exit
(
0
)
3rdparty/backend-r22.12/examples/clients/minimal_client
0 → 100644
View file @
b30f3cdb
#!/usr/bin/env python
# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import
argparse
import
numpy
as
np
import
tritonclient.http
as
httpclient
from
tritonclient.utils
import
InferenceServerException
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'-u'
,
'--url'
,
type
=
str
,
required
=
False
,
default
=
'localhost:8000'
,
help
=
'Inference server URL. Default is localhost:8000.'
)
FLAGS
=
parser
.
parse_args
()
# For the HTTP client, need to specify large enough concurrency to
# issue all the inference requests to the server in parallel. For
# this example we want to be able to send 2 requests concurrently.
try
:
concurrent_request_count
=
2
triton_client
=
httpclient
.
InferenceServerClient
(
url
=
FLAGS
.
url
,
concurrency
=
concurrent_request_count
)
except
Exception
as
e
:
print
(
"channel creation failed: "
+
str
(
e
))
sys
.
exit
(
1
)
# First send a single request to the nonbatching model.
print
(
'========='
)
input0_data
=
np
.
array
([
1
,
2
,
3
,
4
],
dtype
=
np
.
int32
)
print
(
'Sending request to nonbatching model: IN0 = {}'
.
format
(
input0_data
))
inputs
=
[
httpclient
.
InferInput
(
'IN0'
,
[
4
],
"INT32"
)
]
inputs
[
0
].
set_data_from_numpy
(
input0_data
)
result
=
triton_client
.
infer
(
'nonbatching'
,
inputs
)
print
(
'Response: {}'
.
format
(
result
.
get_response
()))
print
(
'OUT0 = {}'
.
format
(
result
.
as_numpy
(
'OUT0'
)))
# Send 2 requests to the batching model. Because these are sent
# asynchronously and Triton's dynamic batcher is configured to
# delay up to 5 seconds when forming a batch for this model, we
# expect these 2 requests to be batched within Triton and sent to
# the minimal backend as a single batch.
print
(
'
\n
========='
)
async_requests
=
[]
input0_data
=
np
.
array
([[
10
,
11
,
12
,
13
]],
dtype
=
np
.
int32
)
print
(
'Sending request to batching model: IN0 = {}'
.
format
(
input0_data
))
inputs
=
[
httpclient
.
InferInput
(
'IN0'
,
[
1
,
4
],
"INT32"
)
]
inputs
[
0
].
set_data_from_numpy
(
input0_data
)
async_requests
.
append
(
triton_client
.
async_infer
(
'batching'
,
inputs
))
input0_data
=
np
.
array
([[
20
,
21
,
22
,
23
]],
dtype
=
np
.
int32
)
print
(
'Sending request to batching model: IN0 = {}'
.
format
(
input0_data
))
inputs
=
[
httpclient
.
InferInput
(
'IN0'
,
[
1
,
4
],
"INT32"
)
]
inputs
[
0
].
set_data_from_numpy
(
input0_data
)
async_requests
.
append
(
triton_client
.
async_infer
(
'batching'
,
inputs
))
for
async_request
in
async_requests
:
# Get the result from the initiated asynchronous inference
# request. This call will block till the server responds.
result
=
async_request
.
get_result
()
print
(
'Response: {}'
.
format
(
result
.
get_response
()))
print
(
'OUT0 = {}'
.
format
(
result
.
as_numpy
(
'OUT0'
)))
3rdparty/backend-r22.12/examples/clients/recommended_client
0 → 100644
View file @
b30f3cdb
#!/usr/bin/env python
# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import
argparse
import
numpy
as
np
import
tritonclient.http
as
httpclient
from
tritonclient.utils
import
InferenceServerException
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'-u'
,
'--url'
,
type
=
str
,
required
=
False
,
default
=
'localhost:8000'
,
help
=
'Inference server URL. Default is localhost:8000.'
)
FLAGS
=
parser
.
parse_args
()
# For the HTTP client, need to specify large enough concurrency to
# issue all the inference requests to the server in parallel. For
# this example we want to be able to send 2 requests concurrently.
try
:
concurrent_request_count
=
2
triton_client
=
httpclient
.
InferenceServerClient
(
url
=
FLAGS
.
url
,
concurrency
=
concurrent_request_count
)
except
Exception
as
e
:
print
(
"channel creation failed: "
+
str
(
e
))
sys
.
exit
(
1
)
# Send 2 requests to the batching model. Because these are sent
# asynchronously and Triton's dynamic batcher is configured to
# delay up to 5 seconds when forming a batch for this model, we
# expect these 2 requests to be batched within Triton and sent to
# the backend as a single batch.
#
# The recommended backend can handle any model with 1 input and 1
# output as long as the input and output datatype and shape are
# the same. The batching model uses datatype FP32 and shape
# [ 4, 4 ].
print
(
'
\n
========='
)
async_requests
=
[]
input0_data
=
np
.
array
([[[
1.0
,
1.1
,
1.2
,
1.3
],
[
2.0
,
2.1
,
2.2
,
2.3
],
[
3.0
,
3.1
,
3.2
,
3.3
],
[
4.0
,
4.1
,
4.2
,
4.3
]]],
dtype
=
np
.
float32
)
print
(
'Sending request to batching model: input = {}'
.
format
(
input0_data
))
inputs
=
[
httpclient
.
InferInput
(
'INPUT'
,
[
1
,
4
,
4
],
"FP32"
)
]
inputs
[
0
].
set_data_from_numpy
(
input0_data
)
async_requests
.
append
(
triton_client
.
async_infer
(
'batching'
,
inputs
))
input0_data
=
np
.
array
([[[
10.0
,
10.1
,
10.2
,
10.3
],
[
20.0
,
20.1
,
20.2
,
20.3
],
[
30.0
,
30.1
,
30.2
,
30.3
],
[
40.0
,
40.1
,
40.2
,
40.3
]]],
dtype
=
np
.
float32
)
print
(
'Sending request to batching model: input = {}'
.
format
(
input0_data
))
inputs
=
[
httpclient
.
InferInput
(
'INPUT'
,
[
1
,
4
,
4
],
"FP32"
)
]
inputs
[
0
].
set_data_from_numpy
(
input0_data
)
async_requests
.
append
(
triton_client
.
async_infer
(
'batching'
,
inputs
))
for
async_request
in
async_requests
:
# Get the result from the initiated asynchronous inference
# request. This call will block till the server responds.
result
=
async_request
.
get_result
()
print
(
'Response: {}'
.
format
(
result
.
get_response
()))
print
(
'OUTPUT = {}'
.
format
(
result
.
as_numpy
(
'OUTPUT'
)))
3rdparty/backend-r22.12/examples/model_repos/bls_models/addsub_python/1/model.py
0 → 100644
View file @
b30f3cdb
# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import
json
import
triton_python_backend_utils
as
pb_utils
# This model calculates the sum and difference of the INPUT0 and INPUT1 and put
# the results in OUTPUT0 and OUTPUT1 respectively. For more information
# regarding how this model.py was written, please refer to Python Backend.
class
TritonPythonModel
:
def
initialize
(
self
,
args
):
self
.
model_config
=
model_config
=
json
.
loads
(
args
[
'model_config'
])
output0_config
=
pb_utils
.
get_output_config_by_name
(
model_config
,
"OUTPUT0"
)
output1_config
=
pb_utils
.
get_output_config_by_name
(
model_config
,
"OUTPUT1"
)
self
.
output0_dtype
=
pb_utils
.
triton_string_to_numpy
(
output0_config
[
'data_type'
])
self
.
output1_dtype
=
pb_utils
.
triton_string_to_numpy
(
output1_config
[
'data_type'
])
def
execute
(
self
,
requests
):
output0_dtype
=
self
.
output0_dtype
output1_dtype
=
self
.
output1_dtype
responses
=
[]
for
request
in
requests
:
in_0
=
pb_utils
.
get_input_tensor_by_name
(
request
,
"INPUT0"
)
in_1
=
pb_utils
.
get_input_tensor_by_name
(
request
,
"INPUT1"
)
out_0
,
out_1
=
(
in_0
.
as_numpy
()
+
in_1
.
as_numpy
(),
in_0
.
as_numpy
()
-
in_1
.
as_numpy
())
out_tensor_0
=
pb_utils
.
Tensor
(
"OUTPUT0"
,
out_0
.
astype
(
output0_dtype
))
out_tensor_1
=
pb_utils
.
Tensor
(
"OUTPUT1"
,
out_1
.
astype
(
output1_dtype
))
inference_response
=
pb_utils
.
InferenceResponse
(
output_tensors
=
[
out_tensor_0
,
out_tensor_1
])
responses
.
append
(
inference_response
)
return
responses
def
finalize
(
self
):
print
(
'Cleaning up...'
)
3rdparty/backend-r22.12/examples/model_repos/bls_models/addsub_python/config.pbtxt
0 → 100644
View file @
b30f3cdb
# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
name: "addsub_python"
backend: "python"
max_batch_size: 0
input [
{
name: "INPUT0"
data_type: TYPE_FP32
dims: [ 16 ]
}
]
input [
{
name: "INPUT1"
data_type: TYPE_FP32
dims: [ 16 ]
}
]
output [
{
name: "OUTPUT0"
data_type: TYPE_FP32
dims: [ 16 ]
}
]
output [
{
name: "OUTPUT1"
data_type: TYPE_FP32
dims: [ 16 ]
}
]
3rdparty/backend-r22.12/examples/model_repos/bls_models/addsub_tf/1/model.savedmodel/saved_model.pb
0 → 100644
View file @
b30f3cdb
File added
3rdparty/backend-r22.12/examples/model_repos/bls_models/addsub_tf/config.pbtxt
0 → 100644
View file @
b30f3cdb
name: "addsub_tf"
platform: "tensorflow_savedmodel"
max_batch_size: 0
input [
{
name: "INPUT0"
data_type: TYPE_FP32
dims: [ 16 ]
},
{
name: "INPUT1"
data_type: TYPE_FP32
dims: [ 16 ]
}
]
output [
{
name: "OUTPUT0"
data_type: TYPE_FP32
dims: [ 16 ]
},
{
name: "OUTPUT1"
data_type: TYPE_FP32
dims: [ 16 ]
}
]
3rdparty/backend-r22.12/examples/model_repos/bls_models/bls_fp32/config.pbtxt
0 → 100644
View file @
b30f3cdb
# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
name: "bls_fp32"
backend: "bls"
max_batch_size: 0
input [
{
name: "INPUT0"
data_type: TYPE_FP32
dims: [ 16 ]
}
]
input [
{
name: "INPUT1"
data_type: TYPE_FP32
dims: [ 16 ]
}
]
output [
{
name: "OUTPUT0"
data_type: TYPE_FP32
dims: [ 16 ]
}
]
output [
{
name: "OUTPUT1"
data_type: TYPE_FP32
dims: [ 16 ]
}
]
instance_group [
{
kind: KIND_CPU
}
]
3rdparty/backend-r22.12/examples/model_repos/minimal_models/batching/1/.gitkeep
0 → 100644
View file @
b30f3cdb
3rdparty/backend-r22.12/examples/model_repos/minimal_models/batching/config.pbtxt
0 → 100644
View file @
b30f3cdb
backend: "minimal"
max_batch_size: 8
dynamic_batching {
max_queue_delay_microseconds: 5000000
}
input [
{
name: "IN0"
data_type: TYPE_INT32
dims: [ 4 ]
}
]
output [
{
name: "OUT0"
data_type: TYPE_INT32
dims: [ 4 ]
}
]
instance_group [
{
kind: KIND_CPU
}
]
3rdparty/backend-r22.12/examples/model_repos/minimal_models/nonbatching/1/.gitkeep
0 → 100644
View file @
b30f3cdb
3rdparty/backend-r22.12/examples/model_repos/minimal_models/nonbatching/config.pbtxt
0 → 100644
View file @
b30f3cdb
backend: "minimal"
max_batch_size: 0
input [
{
name: "IN0"
data_type: TYPE_INT32
dims: [ 4 ]
}
]
output [
{
name: "OUT0"
data_type: TYPE_INT32
dims: [ 4 ]
}
]
instance_group [
{
kind: KIND_CPU
}
]
3rdparty/backend-r22.12/examples/model_repos/recommended_models/batching/1/.gitkeep
0 → 100644
View file @
b30f3cdb
3rdparty/backend-r22.12/examples/model_repos/recommended_models/batching/config.pbtxt
0 → 100644
View file @
b30f3cdb
backend: "recommended"
max_batch_size: 8
dynamic_batching {
max_queue_delay_microseconds: 5000000
}
input [
{
name: "INPUT"
data_type: TYPE_FP32
dims: [ 4, 4 ]
}
]
output [
{
name: "OUTPUT"
data_type: TYPE_FP32
dims: [ 4, 4 ]
}
]
instance_group [
{
kind: KIND_CPU
}
]
3rdparty/backend-r22.12/include/triton/backend/backend_common.h
0 → 100644
View file @
b30f3cdb
This diff is collapsed.
Click to expand it.
3rdparty/backend-r22.12/include/triton/backend/backend_input_collector.h
0 → 100644
View file @
b30f3cdb
// Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <list>
#include <memory>
#include <string>
#include <vector>
#include "triton/backend/backend_common.h"
#include "triton/backend/backend_memory.h"
#include "triton/common/async_work_queue.h"
#include "triton/common/sync_queue.h"
#include "triton/core/tritonbackend.h"
#ifdef TRITON_ENABLE_GPU
#include <cuda_runtime_api.h>
#endif // TRITON_ENABLE_GPU
namespace
triton
{
namespace
backend
{
#ifndef TRITON_ENABLE_GPU
using
cudaStream_t
=
void
*
;
using
cudaEvent_t
=
void
*
;
#endif // !TRITON_ENABLE_GPU
//
// BackendInputCollector
//
class
BackendInputCollector
{
public:
// The caller can optionally provide 'event' for internal synchronization
// instead of using 'stream'. If 'host_policy_name' is provided, it must be
// valid for the lifetime of the collector
explicit
BackendInputCollector
(
TRITONBACKEND_Request
**
requests
,
const
uint32_t
request_count
,
std
::
vector
<
TRITONBACKEND_Response
*>*
responses
,
TRITONBACKEND_MemoryManager
*
memory_manager
,
const
bool
pinned_enabled
,
cudaStream_t
stream
,
cudaEvent_t
event
=
nullptr
,
cudaEvent_t
buffer_ready_event
=
nullptr
,
const
size_t
kernel_buffer_threshold
=
0
,
const
char
*
host_policy_name
=
nullptr
,
const
bool
copy_on_stream
=
false
,
const
bool
coalesce_request_input
=
false
)
:
need_sync_
(
false
),
requests_
(
requests
),
request_count_
(
request_count
),
responses_
(
responses
),
memory_manager_
(
memory_manager
),
pinned_enabled_
(
pinned_enabled
),
use_async_cpu_copy_
(
triton
::
common
::
AsyncWorkQueue
::
WorkerCount
()
>
1
),
stream_
(
stream
),
event_
(
event
),
buffer_ready_event_
(
buffer_ready_event
),
kernel_buffer_threshold_
(
kernel_buffer_threshold
),
pending_pinned_byte_size_
(
0
),
pending_pinned_offset_
(
0
),
pending_copy_kernel_buffer_byte_size_
(
0
),
pending_copy_kernel_buffer_offset_
(
0
),
pending_copy_kernel_input_buffer_counts_
(
0
),
async_task_count_
(
0
),
host_policy_cstr_
(
host_policy_name
),
copy_on_stream_
(
copy_on_stream
),
coalesce_request_input_
(
coalesce_request_input
)
{
}
~
BackendInputCollector
()
=
default
;
// Process all requests for a named input tensor and return the
// concatenated values of those requests in a single contiguous
// buffer. This overload of the function can avoid data copy if the
// tensor values are already contiguous and the caller doesn't
// provide a destination 'buffer'.
//
// 'buffer' is used to determine whether the input should be placed at the
// 'buffer' provided by the caller. If 'buffer' == nullptr, the returned
// buffer will be managed by the BackendInputCollector object and
// has the same lifecycle as the BackendInputCollector object.
// 'buffer_byte_size' is the byte size of 'buffer' if it is not nullptr.
// 'allowed_input_types' is the ordered list of the memory type and id pairs
// that the returned buffer can be. It must only contain the memory type
// and id of 'buffer' if 'buffer' is not nullptr.
// 'dst_buffer' returns the contiguous buffer of the input tensor.
// 'dst_buffer_byte_size' the byte size of 'dst_buffer'.
// 'dst_memory_type' returns the memory type of 'dst_buffer'.
// 'dst_memory_type_id' returns the memory type id of 'dst_buffer'.
TRITONSERVER_Error
*
ProcessTensor
(
const
char
*
input_name
,
char
*
buffer
,
const
size_t
buffer_byte_size
,
const
std
::
vector
<
std
::
pair
<
TRITONSERVER_MemoryType
,
int64_t
>>&
allowed_input_types
,
const
char
**
dst_buffer
,
size_t
*
dst_buffer_byte_size
,
TRITONSERVER_MemoryType
*
dst_memory_type
,
int64_t
*
dst_memory_type_id
);
// Process all requests for a named input tensor and return the
// concatenated values of those requests in a single contiguous
// 'buffer'.
//
// 'buffer' The buffer to hold the concatenates tensor value. Must
// be large enough to hold all tensor value.
// 'buffer_byte_size' is the byte size of 'buffer'.
// 'dst_memory_type' The memory type of 'buffer'.
// 'dst_memory_type_id' The memory type id of 'buffer'.
void
ProcessTensor
(
const
char
*
input_name
,
char
*
buffer
,
const
size_t
buffer_byte_size
,
const
TRITONSERVER_MemoryType
memory_type
,
const
int64_t
memory_type_id
);
// Process the batch input and return its shape. Returning error indicates
// that the batch input can't be formed properly and the caller should abort
// the whole batch.
TRITONSERVER_Error
*
BatchInputShape
(
const
BatchInput
&
batch_input
,
std
::
vector
<
int64_t
>*
shape
);
// Process the batch input and derive its value into 'buffer'. Returning
// error indicates that the batch input can't be formed properly and
// the caller should abort the whole batch.
// 'buffer' is used to determine whether the input should be placed at the
// 'buffer' provided by the caller. If 'buffer' == nullptr, the returned
// buffer will be managed by the BackendInputCollector object and
// has the same lifecycle as the BackendInputCollector object.
// 'buffer_byte_size' is the byte size of 'buffer' if it is not nullptr.
// 'allowed_input_types' is the ordered list of the memory type and id pairs
// that the returned buffer can be. It must only contain the memory type
// and id of 'buffer' if it is not nullptr.
// 'dst_buffer' returns the contiguous buffer of the input tensor.
// 'dst_memory_type' returns the memory type of 'dst_buffer'.
// 'dst_memory_type_id' returns the memory type id of 'dst_buffer'.
TRITONSERVER_Error
*
ProcessBatchInput
(
const
BatchInput
&
batch_input
,
char
*
buffer
,
const
size_t
buffer_byte_size
,
const
std
::
vector
<
std
::
pair
<
TRITONSERVER_MemoryType
,
int64_t
>>&
allowed_input_types
,
const
char
**
dst_buffer
,
size_t
*
dst_buffer_byte_size
,
TRITONSERVER_MemoryType
*
dst_memory_type
,
int64_t
*
dst_memory_type_id
);
// Finalize processing of all requests for all input tensors. Return
// true if cudaMemcpyAsync is called, and the caller should call
// cudaStreamSynchronize (or cudaEventSynchronize on 'event') before
// using the data.
bool
Finalize
();
private:
struct
ContiguousBuffer
{
ContiguousBuffer
()
:
start_request_idx_
(
0
),
end_request_idx_
(
0
)
{}
MemoryDesc
memory_desc_
;
size_t
start_request_idx_
;
size_t
end_request_idx_
;
};
class
InputIterator
{
public:
InputIterator
(
TRITONBACKEND_Request
**
requests
,
const
uint32_t
request_count
,
std
::
vector
<
TRITONBACKEND_Response
*>*
responses
,
const
char
*
input_name
,
const
char
*
host_policy_name
,
const
bool
coalesce_request_input
);
// Return false if iterator reaches the end of inputs, 'input' is not set.
bool
GetNextContiguousInput
(
ContiguousBuffer
*
input
);
private:
TRITONBACKEND_Request
**
requests_
;
const
uint32_t
request_count_
;
std
::
vector
<
TRITONBACKEND_Response
*>*
responses_
;
const
char
*
input_name_
;
const
char
*
host_policy_
;
const
bool
coalesce_request_input_
;
TRITONBACKEND_Input
*
curr_input_
;
size_t
curr_request_idx_
;
size_t
curr_buffer_idx_
;
uint32_t
curr_buffer_cnt_
;
bool
reach_end_
;
};
// Return whether the entire input is in a contiguous buffer. If returns true,
// the properties of the contiguous input buffer will also be returned.
// Otherwise, only 'buffer_byte_size' will be set and return the total byte
// size of the input.
bool
GetInputBufferIfContiguous
(
const
char
*
input_name
,
const
char
**
buffer
,
size_t
*
buffer_byte_size
,
TRITONSERVER_MemoryType
*
memory_type
,
int64_t
*
memory_type_id
);
bool
FlushPendingPinned
(
char
*
tensor_buffer
,
const
size_t
tensor_buffer_byte_size
,
const
TRITONSERVER_MemoryType
tensor_memory_type
,
const
int64_t
tensor_memory_type_id
);
bool
FlushPendingCopyKernel
(
char
*
tensor_buffer
,
const
size_t
tensor_buffer_byte_size
,
const
TRITONSERVER_MemoryType
tensor_memory_type
,
const
int64_t
tensor_memory_type_id
);
TRITONSERVER_Error
*
LaunchCopyKernel
(
char
*
tensor_buffer
,
const
size_t
tensor_buffer_byte_size
,
const
TRITONSERVER_MemoryType
tensor_memory_type
,
const
int64_t
tensor_memory_type_id
);
bool
SetInputTensor
(
const
char
*
input_name
,
const
ContiguousBuffer
&
input
,
char
*
tensor_buffer
,
const
size_t
tensor_buffer_byte_size
,
const
TRITONSERVER_MemoryType
tensor_memory_type
,
const
int64_t
tensor_memory_type_id
,
const
size_t
tensor_buffer_offset
,
const
TRITONSERVER_MemoryType
use_pinned_memory_type
,
const
bool
use_kernel
,
const
bool
wait_buffer
);
template
<
typename
T
>
TRITONSERVER_Error
*
SetElementCount
(
const
std
::
string
&
source_input
,
char
*
buffer
,
const
size_t
buffer_byte_size
);
template
<
typename
T
>
TRITONSERVER_Error
*
SetAccumulatedElementCount
(
const
std
::
string
&
source_input
,
char
*
buffer
,
const
size_t
buffer_byte_size
);
template
<
typename
T
>
TRITONSERVER_Error
*
SetBatchItemShape
(
const
std
::
string
&
source_input
,
char
*
buffer
,
const
size_t
buffer_byte_size
);
bool
need_sync_
;
TRITONBACKEND_Request
**
requests_
;
const
uint32_t
request_count_
;
std
::
vector
<
TRITONBACKEND_Response
*>*
responses_
;
TRITONBACKEND_MemoryManager
*
memory_manager_
;
const
bool
pinned_enabled_
;
const
bool
use_async_cpu_copy_
;
cudaStream_t
stream_
;
cudaEvent_t
event_
;
cudaEvent_t
buffer_ready_event_
;
const
size_t
kernel_buffer_threshold_
;
size_t
pending_pinned_byte_size_
;
size_t
pending_pinned_offset_
;
std
::
list
<
ContiguousBuffer
>
pending_pinned_input_buffers_
;
// managed memories that need to live over the lifetime of this
// BackendInputCollector object.
std
::
list
<
std
::
unique_ptr
<
BackendMemory
>>
in_use_memories_
;
size_t
pending_copy_kernel_buffer_byte_size_
;
size_t
pending_copy_kernel_buffer_offset_
;
size_t
pending_copy_kernel_input_buffer_counts_
;
std
::
list
<
ContiguousBuffer
>
pending_copy_kernel_input_buffers_
;
std
::
vector
<
std
::
unique_ptr
<
std
::
vector
<
int8_t
*>>>
input_ptr_buffer_host_
;
std
::
vector
<
std
::
unique_ptr
<
std
::
vector
<
size_t
>>>
byte_size_buffer_host_
;
std
::
vector
<
std
::
unique_ptr
<
std
::
vector
<
size_t
>>>
byte_size_offset_buffer_host_
;
// Pinned memory buffers and the corresponding request_inputs where
// the final copy to the tensor is deferred until Finalize() after
// waiting for all in-flight copies.
struct
DeferredPinned
{
DeferredPinned
(
char
*
pinned_memory
,
const
size_t
pinned_memory_size
,
char
*
tensor_buffer
,
const
size_t
tensor_buffer_offset
,
const
TRITONSERVER_MemoryType
tensor_memory_type
,
const
int64_t
tensor_memory_id
,
std
::
list
<
ContiguousBuffer
>&&
request_buffers
,
std
::
vector
<
TRITONBACKEND_Response
*>*
responses
)
:
finalized_
(
false
),
pinned_memory_
(
pinned_memory
),
pinned_memory_size_
(
pinned_memory_size
),
tensor_buffer_
(
tensor_buffer
),
tensor_buffer_offset_
(
tensor_buffer_offset
),
tensor_memory_type_
(
tensor_memory_type
),
tensor_memory_id_
(
tensor_memory_id
),
requests_
(
std
::
move
(
request_buffers
)),
responses_
(
responses
)
{
}
bool
Finalize
(
cudaStream_t
stream
);
bool
finalized_
;
// Holding reference to the pinned memory buffer, which is managed
// by BackendInputCollector as 'pinned_memory'
char
*
pinned_memory_
;
const
size_t
pinned_memory_size_
;
char
*
tensor_buffer_
;
const
size_t
tensor_buffer_offset_
;
const
TRITONSERVER_MemoryType
tensor_memory_type_
;
const
int64_t
tensor_memory_id_
;
std
::
list
<
ContiguousBuffer
>
requests_
;
std
::
vector
<
TRITONBACKEND_Response
*>*
responses_
;
};
std
::
list
<
DeferredPinned
>
deferred_pinned_
;
// FIXME use future to maintain an issue-order queue to drop task count
triton
::
common
::
SyncQueue
<
bool
>
completion_queue_
;
size_t
async_task_count_
;
const
char
*
host_policy_cstr_
;
const
bool
copy_on_stream_
;
const
bool
coalesce_request_input_
;
};
}}
// namespace triton::backend
3rdparty/backend-r22.12/include/triton/backend/backend_memory.h
0 → 100644
View file @
b30f3cdb
// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <string>
#include <vector>
#include "triton/core/tritonbackend.h"
#include "triton/core/tritonserver.h"
namespace
triton
{
namespace
backend
{
// Colletion of common properties that describes a buffer in Triton
struct
MemoryDesc
{
MemoryDesc
()
:
buffer_
(
nullptr
),
byte_size_
(
0
),
memory_type_
(
TRITONSERVER_MEMORY_CPU
),
memory_type_id_
(
0
)
{
}
MemoryDesc
(
const
char
*
buffer
,
size_t
byte_size
,
TRITONSERVER_MemoryType
memory_type
,
int64_t
memory_type_id
)
:
buffer_
(
buffer
),
byte_size_
(
byte_size
),
memory_type_
(
memory_type
),
memory_type_id_
(
memory_type_id
)
{
}
const
char
*
buffer_
;
size_t
byte_size_
;
TRITONSERVER_MemoryType
memory_type_
;
int64_t
memory_type_id_
;
};
//
// BackendMemory
//
// Utility class for allocating and deallocating memory using both
// TRITONBACKEND_MemoryManager and direct GPU and CPU malloc/free.
//
class
BackendMemory
{
public:
enum
class
AllocationType
{
CPU
,
CPU_PINNED
,
GPU
,
CPU_PINNED_POOL
,
GPU_POOL
};
// Allocate a contiguous block of 'alloc_type' memory. 'mem'
// returns the pointer to the allocated memory.
//
// CPU, CPU_PINNED_POOL and GPU_POOL are allocated using
// TRITONBACKEND_MemoryManagerAllocate. Note that CPU_PINNED and GPU
// allocations can be much slower than the POOL variants.
//
// Two error codes have specific interpretations for this function:
//
// TRITONSERVER_ERROR_UNSUPPORTED: Indicates that function is
// incapable of allocating the requested memory type and memory
// type ID. Requests for the memory type and ID will always fail
// no matter 'byte_size' of the request.
//
// TRITONSERVER_ERROR_UNAVAILABLE: Indicates that function can
// allocate the memory type and ID but that currently it cannot
// allocate a contiguous block of memory of the requested
// 'byte_size'.
static
TRITONSERVER_Error
*
Create
(
TRITONBACKEND_MemoryManager
*
manager
,
const
AllocationType
alloc_type
,
const
int64_t
memory_type_id
,
const
size_t
byte_size
,
BackendMemory
**
mem
);
// Allocate a contiguous block of memory by attempting the
// allocation using 'alloc_types' in order until one is successful.
// See BackendMemory::Create() above for details.
static
TRITONSERVER_Error
*
Create
(
TRITONBACKEND_MemoryManager
*
manager
,
const
std
::
vector
<
AllocationType
>&
alloc_types
,
const
int64_t
memory_type_id
,
const
size_t
byte_size
,
BackendMemory
**
mem
);
// Creates a BackendMemory object from a pre-allocated buffer. The buffer
// is not owned by the object created with this function. Hence, for
// proper operation, the lifetime of the buffer should atleast extend till
// the corresponding BackendMemory.
static
TRITONSERVER_Error
*
Create
(
TRITONBACKEND_MemoryManager
*
manager
,
const
AllocationType
alloc_type
,
const
int64_t
memory_type_id
,
void
*
buffer
,
const
size_t
byte_size
,
BackendMemory
**
mem
);
~
BackendMemory
();
AllocationType
AllocType
()
const
{
return
alloctype_
;
}
int64_t
MemoryTypeId
()
const
{
return
memtype_id_
;
}
char
*
MemoryPtr
()
{
return
buffer_
;
}
size_t
ByteSize
()
const
{
return
byte_size_
;
}
TRITONSERVER_MemoryType
MemoryType
()
const
{
return
AllocTypeToMemoryType
(
alloctype_
);
}
static
TRITONSERVER_MemoryType
AllocTypeToMemoryType
(
const
AllocationType
a
);
static
const
char
*
AllocTypeString
(
const
AllocationType
a
);
private:
BackendMemory
(
TRITONBACKEND_MemoryManager
*
manager
,
const
AllocationType
alloctype
,
const
int64_t
memtype_id
,
char
*
buffer
,
const
size_t
byte_size
,
const
bool
owns_buffer
=
true
)
:
manager_
(
manager
),
alloctype_
(
alloctype
),
memtype_id_
(
memtype_id
),
buffer_
(
buffer
),
byte_size_
(
byte_size
),
owns_buffer_
(
owns_buffer
)
{
}
TRITONBACKEND_MemoryManager
*
manager_
;
AllocationType
alloctype_
;
int64_t
memtype_id_
;
char
*
buffer_
;
size_t
byte_size_
;
bool
owns_buffer_
;
};
}}
// namespace triton::backend
3rdparty/backend-r22.12/include/triton/backend/backend_model.h
0 → 100644
View file @
b30f3cdb
This diff is collapsed.
Click to expand it.
3rdparty/backend-r22.12/include/triton/backend/backend_model_instance.h
0 → 100644
View file @
b30f3cdb
This diff is collapsed.
Click to expand it.
Prev
1
2
3
4
5
6
…
21
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment