Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Lmdeploy
Commits
0a21fff9
Commit
0a21fff9
authored
Dec 20, 2023
by
xiabo
Browse files
Adapt to 0.1.0
parent
9484fd1c
Changes
422
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
2598 additions
and
0 deletions
+2598
-0
3rdparty/backend-r22.12/examples/backends/recommended/src/libtriton_recommended.ldscript
...s/backends/recommended/src/libtriton_recommended.ldscript
+30
-0
3rdparty/backend-r22.12/examples/backends/recommended/src/recommended.cc
...d-r22.12/examples/backends/recommended/src/recommended.cc
+750
-0
3rdparty/backend-r22.12/examples/clients/bls_client
3rdparty/backend-r22.12/examples/clients/bls_client
+86
-0
3rdparty/backend-r22.12/examples/clients/minimal_client
3rdparty/backend-r22.12/examples/clients/minimal_client
+92
-0
3rdparty/backend-r22.12/examples/clients/recommended_client
3rdparty/backend-r22.12/examples/clients/recommended_client
+91
-0
3rdparty/backend-r22.12/examples/model_repos/bls_models/addsub_python/1/model.py
.../examples/model_repos/bls_models/addsub_python/1/model.py
+74
-0
3rdparty/backend-r22.12/examples/model_repos/bls_models/addsub_python/config.pbtxt
...xamples/model_repos/bls_models/addsub_python/config.pbtxt
+58
-0
3rdparty/backend-r22.12/examples/model_repos/bls_models/addsub_tf/1/model.savedmodel/saved_model.pb
...os/bls_models/addsub_tf/1/model.savedmodel/saved_model.pb
+0
-0
3rdparty/backend-r22.12/examples/model_repos/bls_models/addsub_tf/config.pbtxt
...12/examples/model_repos/bls_models/addsub_tf/config.pbtxt
+28
-0
3rdparty/backend-r22.12/examples/model_repos/bls_models/bls_fp32/config.pbtxt
....12/examples/model_repos/bls_models/bls_fp32/config.pbtxt
+63
-0
3rdparty/backend-r22.12/examples/model_repos/minimal_models/batching/1/.gitkeep
...2/examples/model_repos/minimal_models/batching/1/.gitkeep
+0
-0
3rdparty/backend-r22.12/examples/model_repos/minimal_models/batching/config.pbtxt
...examples/model_repos/minimal_models/batching/config.pbtxt
+24
-0
3rdparty/backend-r22.12/examples/model_repos/minimal_models/nonbatching/1/.gitkeep
...xamples/model_repos/minimal_models/nonbatching/1/.gitkeep
+0
-0
3rdparty/backend-r22.12/examples/model_repos/minimal_models/nonbatching/config.pbtxt
...mples/model_repos/minimal_models/nonbatching/config.pbtxt
+21
-0
3rdparty/backend-r22.12/examples/model_repos/recommended_models/batching/1/.gitkeep
...amples/model_repos/recommended_models/batching/1/.gitkeep
+0
-0
3rdparty/backend-r22.12/examples/model_repos/recommended_models/batching/config.pbtxt
...ples/model_repos/recommended_models/batching/config.pbtxt
+24
-0
3rdparty/backend-r22.12/include/triton/backend/backend_common.h
...ty/backend-r22.12/include/triton/backend/backend_common.h
+672
-0
3rdparty/backend-r22.12/include/triton/backend/backend_input_collector.h
...d-r22.12/include/triton/backend/backend_input_collector.h
+301
-0
3rdparty/backend-r22.12/include/triton/backend/backend_memory.h
...ty/backend-r22.12/include/triton/backend/backend_memory.h
+138
-0
3rdparty/backend-r22.12/include/triton/backend/backend_model.h
...rty/backend-r22.12/include/triton/backend/backend_model.h
+146
-0
No files found.
3rdparty/backend-r22.12/examples/backends/recommended/src/libtriton_recommended.ldscript
0 → 100644
View file @
0a21fff9
# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
{
global:
TRITONBACKEND_*;
local: *;
};
3rdparty/backend-r22.12/examples/backends/recommended/src/recommended.cc
0 → 100644
View file @
0a21fff9
This diff is collapsed.
Click to expand it.
3rdparty/backend-r22.12/examples/clients/bls_client
0 → 100644
View file @
0a21fff9
#!/usr/bin/python
# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import
sys
import
argparse
import
numpy
as
np
import
tritonhttpclient
as
httpclient
from
tritonclientutils
import
np_to_triton_dtype
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'-u'
,
'--url'
,
type
=
str
,
required
=
False
,
default
=
'localhost:8000'
,
help
=
'Inference server URL. Default is localhost:8000.'
)
FLAGS
=
parser
.
parse_args
()
model_name
=
"bls_fp32"
shape
=
[
16
]
with
httpclient
.
InferenceServerClient
(
url
=
FLAGS
.
url
)
as
client
:
input0_data
=
np
.
random
.
rand
(
*
shape
).
astype
(
np
.
float32
)
input1_data
=
np
.
random
.
rand
(
*
shape
).
astype
(
np
.
float32
)
inputs
=
[
httpclient
.
InferInput
(
"INPUT0"
,
input0_data
.
shape
,
np_to_triton_dtype
(
input0_data
.
dtype
)),
httpclient
.
InferInput
(
"INPUT1"
,
input1_data
.
shape
,
np_to_triton_dtype
(
input1_data
.
dtype
)),
]
inputs
[
0
].
set_data_from_numpy
(
input0_data
)
inputs
[
1
].
set_data_from_numpy
(
input1_data
)
outputs
=
[
httpclient
.
InferRequestedOutput
(
"OUTPUT0"
),
httpclient
.
InferRequestedOutput
(
"OUTPUT1"
),
]
response
=
client
.
infer
(
model_name
,
inputs
,
request_id
=
str
(
1
),
outputs
=
outputs
)
result
=
response
.
get_response
()
output0_data
=
response
.
as_numpy
(
"OUTPUT0"
)
output1_data
=
response
.
as_numpy
(
"OUTPUT1"
)
print
(
"INPUT0 ({}) + INPUT1 ({}) = OUTPUT0 ({})"
.
format
(
input0_data
,
input1_data
,
output0_data
))
print
(
"INPUT0 ({}) - INPUT1 ({}) = OUTPUT1 ({})"
.
format
(
input0_data
,
input1_data
,
output1_data
))
if
not
np
.
allclose
(
input0_data
+
input1_data
,
output0_data
):
print
(
"error: incorrect sum"
)
sys
.
exit
(
1
)
if
not
np
.
allclose
(
input0_data
-
input1_data
,
output1_data
):
print
(
"error: incorrect difference"
)
sys
.
exit
(
1
)
print
(
'
\n
PASS'
)
sys
.
exit
(
0
)
3rdparty/backend-r22.12/examples/clients/minimal_client
0 → 100644
View file @
0a21fff9
#!/usr/bin/env python
# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import
argparse
import
numpy
as
np
import
tritonclient.http
as
httpclient
from
tritonclient.utils
import
InferenceServerException
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'-u'
,
'--url'
,
type
=
str
,
required
=
False
,
default
=
'localhost:8000'
,
help
=
'Inference server URL. Default is localhost:8000.'
)
FLAGS
=
parser
.
parse_args
()
# For the HTTP client, need to specify large enough concurrency to
# issue all the inference requests to the server in parallel. For
# this example we want to be able to send 2 requests concurrently.
try
:
concurrent_request_count
=
2
triton_client
=
httpclient
.
InferenceServerClient
(
url
=
FLAGS
.
url
,
concurrency
=
concurrent_request_count
)
except
Exception
as
e
:
print
(
"channel creation failed: "
+
str
(
e
))
sys
.
exit
(
1
)
# First send a single request to the nonbatching model.
print
(
'========='
)
input0_data
=
np
.
array
([
1
,
2
,
3
,
4
],
dtype
=
np
.
int32
)
print
(
'Sending request to nonbatching model: IN0 = {}'
.
format
(
input0_data
))
inputs
=
[
httpclient
.
InferInput
(
'IN0'
,
[
4
],
"INT32"
)
]
inputs
[
0
].
set_data_from_numpy
(
input0_data
)
result
=
triton_client
.
infer
(
'nonbatching'
,
inputs
)
print
(
'Response: {}'
.
format
(
result
.
get_response
()))
print
(
'OUT0 = {}'
.
format
(
result
.
as_numpy
(
'OUT0'
)))
# Send 2 requests to the batching model. Because these are sent
# asynchronously and Triton's dynamic batcher is configured to
# delay up to 5 seconds when forming a batch for this model, we
# expect these 2 requests to be batched within Triton and sent to
# the minimal backend as a single batch.
print
(
'
\n
========='
)
async_requests
=
[]
input0_data
=
np
.
array
([[
10
,
11
,
12
,
13
]],
dtype
=
np
.
int32
)
print
(
'Sending request to batching model: IN0 = {}'
.
format
(
input0_data
))
inputs
=
[
httpclient
.
InferInput
(
'IN0'
,
[
1
,
4
],
"INT32"
)
]
inputs
[
0
].
set_data_from_numpy
(
input0_data
)
async_requests
.
append
(
triton_client
.
async_infer
(
'batching'
,
inputs
))
input0_data
=
np
.
array
([[
20
,
21
,
22
,
23
]],
dtype
=
np
.
int32
)
print
(
'Sending request to batching model: IN0 = {}'
.
format
(
input0_data
))
inputs
=
[
httpclient
.
InferInput
(
'IN0'
,
[
1
,
4
],
"INT32"
)
]
inputs
[
0
].
set_data_from_numpy
(
input0_data
)
async_requests
.
append
(
triton_client
.
async_infer
(
'batching'
,
inputs
))
for
async_request
in
async_requests
:
# Get the result from the initiated asynchronous inference
# request. This call will block till the server responds.
result
=
async_request
.
get_result
()
print
(
'Response: {}'
.
format
(
result
.
get_response
()))
print
(
'OUT0 = {}'
.
format
(
result
.
as_numpy
(
'OUT0'
)))
3rdparty/backend-r22.12/examples/clients/recommended_client
0 → 100644
View file @
0a21fff9
#!/usr/bin/env python
# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import
argparse
import
numpy
as
np
import
tritonclient.http
as
httpclient
from
tritonclient.utils
import
InferenceServerException
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'-u'
,
'--url'
,
type
=
str
,
required
=
False
,
default
=
'localhost:8000'
,
help
=
'Inference server URL. Default is localhost:8000.'
)
FLAGS
=
parser
.
parse_args
()
# For the HTTP client, need to specify large enough concurrency to
# issue all the inference requests to the server in parallel. For
# this example we want to be able to send 2 requests concurrently.
try
:
concurrent_request_count
=
2
triton_client
=
httpclient
.
InferenceServerClient
(
url
=
FLAGS
.
url
,
concurrency
=
concurrent_request_count
)
except
Exception
as
e
:
print
(
"channel creation failed: "
+
str
(
e
))
sys
.
exit
(
1
)
# Send 2 requests to the batching model. Because these are sent
# asynchronously and Triton's dynamic batcher is configured to
# delay up to 5 seconds when forming a batch for this model, we
# expect these 2 requests to be batched within Triton and sent to
# the backend as a single batch.
#
# The recommended backend can handle any model with 1 input and 1
# output as long as the input and output datatype and shape are
# the same. The batching model uses datatype FP32 and shape
# [ 4, 4 ].
print
(
'
\n
========='
)
async_requests
=
[]
input0_data
=
np
.
array
([[[
1.0
,
1.1
,
1.2
,
1.3
],
[
2.0
,
2.1
,
2.2
,
2.3
],
[
3.0
,
3.1
,
3.2
,
3.3
],
[
4.0
,
4.1
,
4.2
,
4.3
]]],
dtype
=
np
.
float32
)
print
(
'Sending request to batching model: input = {}'
.
format
(
input0_data
))
inputs
=
[
httpclient
.
InferInput
(
'INPUT'
,
[
1
,
4
,
4
],
"FP32"
)
]
inputs
[
0
].
set_data_from_numpy
(
input0_data
)
async_requests
.
append
(
triton_client
.
async_infer
(
'batching'
,
inputs
))
input0_data
=
np
.
array
([[[
10.0
,
10.1
,
10.2
,
10.3
],
[
20.0
,
20.1
,
20.2
,
20.3
],
[
30.0
,
30.1
,
30.2
,
30.3
],
[
40.0
,
40.1
,
40.2
,
40.3
]]],
dtype
=
np
.
float32
)
print
(
'Sending request to batching model: input = {}'
.
format
(
input0_data
))
inputs
=
[
httpclient
.
InferInput
(
'INPUT'
,
[
1
,
4
,
4
],
"FP32"
)
]
inputs
[
0
].
set_data_from_numpy
(
input0_data
)
async_requests
.
append
(
triton_client
.
async_infer
(
'batching'
,
inputs
))
for
async_request
in
async_requests
:
# Get the result from the initiated asynchronous inference
# request. This call will block till the server responds.
result
=
async_request
.
get_result
()
print
(
'Response: {}'
.
format
(
result
.
get_response
()))
print
(
'OUTPUT = {}'
.
format
(
result
.
as_numpy
(
'OUTPUT'
)))
3rdparty/backend-r22.12/examples/model_repos/bls_models/addsub_python/1/model.py
0 → 100644
View file @
0a21fff9
# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import
json
import
triton_python_backend_utils
as
pb_utils
# This model calculates the sum and difference of the INPUT0 and INPUT1 and put
# the results in OUTPUT0 and OUTPUT1 respectively. For more information
# regarding how this model.py was written, please refer to Python Backend.
class
TritonPythonModel
:
def
initialize
(
self
,
args
):
self
.
model_config
=
model_config
=
json
.
loads
(
args
[
'model_config'
])
output0_config
=
pb_utils
.
get_output_config_by_name
(
model_config
,
"OUTPUT0"
)
output1_config
=
pb_utils
.
get_output_config_by_name
(
model_config
,
"OUTPUT1"
)
self
.
output0_dtype
=
pb_utils
.
triton_string_to_numpy
(
output0_config
[
'data_type'
])
self
.
output1_dtype
=
pb_utils
.
triton_string_to_numpy
(
output1_config
[
'data_type'
])
def
execute
(
self
,
requests
):
output0_dtype
=
self
.
output0_dtype
output1_dtype
=
self
.
output1_dtype
responses
=
[]
for
request
in
requests
:
in_0
=
pb_utils
.
get_input_tensor_by_name
(
request
,
"INPUT0"
)
in_1
=
pb_utils
.
get_input_tensor_by_name
(
request
,
"INPUT1"
)
out_0
,
out_1
=
(
in_0
.
as_numpy
()
+
in_1
.
as_numpy
(),
in_0
.
as_numpy
()
-
in_1
.
as_numpy
())
out_tensor_0
=
pb_utils
.
Tensor
(
"OUTPUT0"
,
out_0
.
astype
(
output0_dtype
))
out_tensor_1
=
pb_utils
.
Tensor
(
"OUTPUT1"
,
out_1
.
astype
(
output1_dtype
))
inference_response
=
pb_utils
.
InferenceResponse
(
output_tensors
=
[
out_tensor_0
,
out_tensor_1
])
responses
.
append
(
inference_response
)
return
responses
def
finalize
(
self
):
print
(
'Cleaning up...'
)
3rdparty/backend-r22.12/examples/model_repos/bls_models/addsub_python/config.pbtxt
0 → 100644
View file @
0a21fff9
# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
name: "addsub_python"
backend: "python"
max_batch_size: 0
input [
{
name: "INPUT0"
data_type: TYPE_FP32
dims: [ 16 ]
}
]
input [
{
name: "INPUT1"
data_type: TYPE_FP32
dims: [ 16 ]
}
]
output [
{
name: "OUTPUT0"
data_type: TYPE_FP32
dims: [ 16 ]
}
]
output [
{
name: "OUTPUT1"
data_type: TYPE_FP32
dims: [ 16 ]
}
]
3rdparty/backend-r22.12/examples/model_repos/bls_models/addsub_tf/1/model.savedmodel/saved_model.pb
0 → 100644
View file @
0a21fff9
File added
3rdparty/backend-r22.12/examples/model_repos/bls_models/addsub_tf/config.pbtxt
0 → 100644
View file @
0a21fff9
name: "addsub_tf"
platform: "tensorflow_savedmodel"
max_batch_size: 0
input [
{
name: "INPUT0"
data_type: TYPE_FP32
dims: [ 16 ]
},
{
name: "INPUT1"
data_type: TYPE_FP32
dims: [ 16 ]
}
]
output [
{
name: "OUTPUT0"
data_type: TYPE_FP32
dims: [ 16 ]
},
{
name: "OUTPUT1"
data_type: TYPE_FP32
dims: [ 16 ]
}
]
3rdparty/backend-r22.12/examples/model_repos/bls_models/bls_fp32/config.pbtxt
0 → 100644
View file @
0a21fff9
# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
name: "bls_fp32"
backend: "bls"
max_batch_size: 0
input [
{
name: "INPUT0"
data_type: TYPE_FP32
dims: [ 16 ]
}
]
input [
{
name: "INPUT1"
data_type: TYPE_FP32
dims: [ 16 ]
}
]
output [
{
name: "OUTPUT0"
data_type: TYPE_FP32
dims: [ 16 ]
}
]
output [
{
name: "OUTPUT1"
data_type: TYPE_FP32
dims: [ 16 ]
}
]
instance_group [
{
kind: KIND_CPU
}
]
3rdparty/backend-r22.12/examples/model_repos/minimal_models/batching/1/.gitkeep
0 → 100644
View file @
0a21fff9
3rdparty/backend-r22.12/examples/model_repos/minimal_models/batching/config.pbtxt
0 → 100644
View file @
0a21fff9
backend: "minimal"
max_batch_size: 8
dynamic_batching {
max_queue_delay_microseconds: 5000000
}
input [
{
name: "IN0"
data_type: TYPE_INT32
dims: [ 4 ]
}
]
output [
{
name: "OUT0"
data_type: TYPE_INT32
dims: [ 4 ]
}
]
instance_group [
{
kind: KIND_CPU
}
]
3rdparty/backend-r22.12/examples/model_repos/minimal_models/nonbatching/1/.gitkeep
0 → 100644
View file @
0a21fff9
3rdparty/backend-r22.12/examples/model_repos/minimal_models/nonbatching/config.pbtxt
0 → 100644
View file @
0a21fff9
backend: "minimal"
max_batch_size: 0
input [
{
name: "IN0"
data_type: TYPE_INT32
dims: [ 4 ]
}
]
output [
{
name: "OUT0"
data_type: TYPE_INT32
dims: [ 4 ]
}
]
instance_group [
{
kind: KIND_CPU
}
]
3rdparty/backend-r22.12/examples/model_repos/recommended_models/batching/1/.gitkeep
0 → 100644
View file @
0a21fff9
3rdparty/backend-r22.12/examples/model_repos/recommended_models/batching/config.pbtxt
0 → 100644
View file @
0a21fff9
backend: "recommended"
max_batch_size: 8
dynamic_batching {
max_queue_delay_microseconds: 5000000
}
input [
{
name: "INPUT"
data_type: TYPE_FP32
dims: [ 4, 4 ]
}
]
output [
{
name: "OUTPUT"
data_type: TYPE_FP32
dims: [ 4, 4 ]
}
]
instance_group [
{
kind: KIND_CPU
}
]
3rdparty/backend-r22.12/include/triton/backend/backend_common.h
0 → 100644
View file @
0a21fff9
This diff is collapsed.
Click to expand it.
3rdparty/backend-r22.12/include/triton/backend/backend_input_collector.h
0 → 100644
View file @
0a21fff9
This diff is collapsed.
Click to expand it.
3rdparty/backend-r22.12/include/triton/backend/backend_memory.h
0 → 100644
View file @
0a21fff9
// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <string>
#include <vector>
#include "triton/core/tritonbackend.h"
#include "triton/core/tritonserver.h"
namespace
triton
{
namespace
backend
{
// Colletion of common properties that describes a buffer in Triton
struct
MemoryDesc
{
MemoryDesc
()
:
buffer_
(
nullptr
),
byte_size_
(
0
),
memory_type_
(
TRITONSERVER_MEMORY_CPU
),
memory_type_id_
(
0
)
{
}
MemoryDesc
(
const
char
*
buffer
,
size_t
byte_size
,
TRITONSERVER_MemoryType
memory_type
,
int64_t
memory_type_id
)
:
buffer_
(
buffer
),
byte_size_
(
byte_size
),
memory_type_
(
memory_type
),
memory_type_id_
(
memory_type_id
)
{
}
const
char
*
buffer_
;
size_t
byte_size_
;
TRITONSERVER_MemoryType
memory_type_
;
int64_t
memory_type_id_
;
};
//
// BackendMemory
//
// Utility class for allocating and deallocating memory using both
// TRITONBACKEND_MemoryManager and direct GPU and CPU malloc/free.
//
class
BackendMemory
{
public:
enum
class
AllocationType
{
CPU
,
CPU_PINNED
,
GPU
,
CPU_PINNED_POOL
,
GPU_POOL
};
// Allocate a contiguous block of 'alloc_type' memory. 'mem'
// returns the pointer to the allocated memory.
//
// CPU, CPU_PINNED_POOL and GPU_POOL are allocated using
// TRITONBACKEND_MemoryManagerAllocate. Note that CPU_PINNED and GPU
// allocations can be much slower than the POOL variants.
//
// Two error codes have specific interpretations for this function:
//
// TRITONSERVER_ERROR_UNSUPPORTED: Indicates that function is
// incapable of allocating the requested memory type and memory
// type ID. Requests for the memory type and ID will always fail
// no matter 'byte_size' of the request.
//
// TRITONSERVER_ERROR_UNAVAILABLE: Indicates that function can
// allocate the memory type and ID but that currently it cannot
// allocate a contiguous block of memory of the requested
// 'byte_size'.
static
TRITONSERVER_Error
*
Create
(
TRITONBACKEND_MemoryManager
*
manager
,
const
AllocationType
alloc_type
,
const
int64_t
memory_type_id
,
const
size_t
byte_size
,
BackendMemory
**
mem
);
// Allocate a contiguous block of memory by attempting the
// allocation using 'alloc_types' in order until one is successful.
// See BackendMemory::Create() above for details.
static
TRITONSERVER_Error
*
Create
(
TRITONBACKEND_MemoryManager
*
manager
,
const
std
::
vector
<
AllocationType
>&
alloc_types
,
const
int64_t
memory_type_id
,
const
size_t
byte_size
,
BackendMemory
**
mem
);
// Creates a BackendMemory object from a pre-allocated buffer. The buffer
// is not owned by the object created with this function. Hence, for
// proper operation, the lifetime of the buffer should atleast extend till
// the corresponding BackendMemory.
static
TRITONSERVER_Error
*
Create
(
TRITONBACKEND_MemoryManager
*
manager
,
const
AllocationType
alloc_type
,
const
int64_t
memory_type_id
,
void
*
buffer
,
const
size_t
byte_size
,
BackendMemory
**
mem
);
~
BackendMemory
();
AllocationType
AllocType
()
const
{
return
alloctype_
;
}
int64_t
MemoryTypeId
()
const
{
return
memtype_id_
;
}
char
*
MemoryPtr
()
{
return
buffer_
;
}
size_t
ByteSize
()
const
{
return
byte_size_
;
}
TRITONSERVER_MemoryType
MemoryType
()
const
{
return
AllocTypeToMemoryType
(
alloctype_
);
}
static
TRITONSERVER_MemoryType
AllocTypeToMemoryType
(
const
AllocationType
a
);
static
const
char
*
AllocTypeString
(
const
AllocationType
a
);
private:
BackendMemory
(
TRITONBACKEND_MemoryManager
*
manager
,
const
AllocationType
alloctype
,
const
int64_t
memtype_id
,
char
*
buffer
,
const
size_t
byte_size
,
const
bool
owns_buffer
=
true
)
:
manager_
(
manager
),
alloctype_
(
alloctype
),
memtype_id_
(
memtype_id
),
buffer_
(
buffer
),
byte_size_
(
byte_size
),
owns_buffer_
(
owns_buffer
)
{
}
TRITONBACKEND_MemoryManager
*
manager_
;
AllocationType
alloctype_
;
int64_t
memtype_id_
;
char
*
buffer_
;
size_t
byte_size_
;
bool
owns_buffer_
;
};
}}
// namespace triton::backend
3rdparty/backend-r22.12/include/triton/backend/backend_model.h
0 → 100644
View file @
0a21fff9
This diff is collapsed.
Click to expand it.
Prev
1
2
3
4
5
6
…
22
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment