Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Lmdeploy
Commits
0a21fff9
Commit
0a21fff9
authored
Dec 20, 2023
by
xiabo
Browse files
Adapt to 0.1.0
parent
9484fd1c
Changes
158
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
5528 additions
and
0 deletions
+5528
-0
3rdparty/backend-r22.12/include/triton/backend/backend_model_instance.h
...nd-r22.12/include/triton/backend/backend_model_instance.h
+118
-0
3rdparty/backend-r22.12/include/triton/backend/backend_output_responder.h
...-r22.12/include/triton/backend/backend_output_responder.h
+195
-0
3rdparty/backend-r22.12/src/backend_common.cc
3rdparty/backend-r22.12/src/backend_common.cc
+1374
-0
3rdparty/backend-r22.12/src/backend_input_collector.cc
3rdparty/backend-r22.12/src/backend_input_collector.cc
+1310
-0
3rdparty/backend-r22.12/src/backend_memory.cc
3rdparty/backend-r22.12/src/backend_memory.cc
+231
-0
3rdparty/backend-r22.12/src/backend_model.cc
3rdparty/backend-r22.12/src/backend_model.cc
+192
-0
3rdparty/backend-r22.12/src/backend_model_instance.cc
3rdparty/backend-r22.12/src/backend_model_instance.cc
+171
-0
3rdparty/backend-r22.12/src/backend_output_responder.cc
3rdparty/backend-r22.12/src/backend_output_responder.cc
+607
-0
3rdparty/backend-r22.12/src/kernel.cu
3rdparty/backend-r22.12/src/kernel.cu
+81
-0
3rdparty/backend-r22.12/src/kernel.h
3rdparty/backend-r22.12/src/kernel.h
+42
-0
3rdparty/common-r22.12/.clang-format
3rdparty/common-r22.12/.clang-format
+37
-0
3rdparty/common-r22.12/.gitignore
3rdparty/common-r22.12/.gitignore
+3
-0
3rdparty/common-r22.12/CMakeLists.txt
3rdparty/common-r22.12/CMakeLists.txt
+431
-0
3rdparty/common-r22.12/LICENSE
3rdparty/common-r22.12/LICENSE
+25
-0
3rdparty/common-r22.12/README.md
3rdparty/common-r22.12/README.md
+51
-0
3rdparty/common-r22.12/cmake/TritonCommonConfig.cmake.in
3rdparty/common-r22.12/cmake/TritonCommonConfig.cmake.in
+51
-0
3rdparty/common-r22.12/include/triton/common/async_work_queue.h
...ty/common-r22.12/include/triton/common/async_work_queue.h
+59
-0
3rdparty/common-r22.12/include/triton/common/error.h
3rdparty/common-r22.12/include/triton/common/error.h
+78
-0
3rdparty/common-r22.12/include/triton/common/logging.h
3rdparty/common-r22.12/include/triton/common/logging.h
+229
-0
3rdparty/common-r22.12/include/triton/common/model_config.h
3rdparty/common-r22.12/include/triton/common/model_config.h
+243
-0
No files found.
Too many changes to show.
To preserve performance only
158 of 158+
files are displayed.
Plain diff
Email patch
3rdparty/backend-r22.12/include/triton/backend/backend_model_instance.h
0 → 100644
View file @
0a21fff9
// Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <string>
#include "triton/core/tritonbackend.h"
#ifdef TRITON_ENABLE_GPU
#include <cuda_runtime_api.h>
#endif // TRITON_ENABLE_GPU
namespace
triton
{
namespace
backend
{
#ifndef TRITON_ENABLE_GPU
using
cudaStream_t
=
void
*
;
#endif // !TRITON_ENABLE_GPU
class
BackendModel
;
//
// BackendModelInstance
//
// Common functionality for a backend model instance. This class is
// provided as a convenience; backends are not required to use this
// class.
//
class
BackendModelInstance
{
public:
BackendModelInstance
(
BackendModel
*
backend_model
,
TRITONBACKEND_ModelInstance
*
triton_model_instance
);
virtual
~
BackendModelInstance
();
// Get the name, kind and device ID of the instance.
const
std
::
string
&
Name
()
const
{
return
name_
;
}
TRITONSERVER_InstanceGroupKind
Kind
()
const
{
return
kind_
;
}
int32_t
DeviceId
()
const
{
return
device_id_
;
}
// Get the handle to the TRITONBACKEND model instance.
TRITONBACKEND_ModelInstance
*
TritonModelInstance
()
{
return
triton_model_instance_
;
}
// Get the BackendModel representing the model that corresponds to
// this instance.
BackendModel
*
Model
()
const
{
return
backend_model_
;
}
// The model configuration 'default_model_filename' value, or the
// value in model configuration 'cc_model_filenames' for the GPU
// targeted by this instance. If neither are specified in the model
// configuration, the return empty string.
const
std
::
string
&
ArtifactFilename
()
const
{
return
artifact_filename_
;
}
// Returns the stream associated with this instance that can be used
// for GPU<->CPU memory transfers. Returns nullptr if GPU support is
// disabled or if this instance is not executing on a GPU.
cudaStream_t
CudaStream
()
{
return
stream_
;
}
const
std
::
string
&
HostPolicyName
()
const
{
return
host_policy_name_
;
}
protected:
BackendModel
*
backend_model_
;
TRITONBACKEND_ModelInstance
*
triton_model_instance_
;
std
::
string
name_
;
TRITONSERVER_InstanceGroupKind
kind_
;
int32_t
device_id_
;
std
::
string
artifact_filename_
;
cudaStream_t
stream_
;
std
::
string
host_policy_name_
;
};
//
// BackendModelInstanceException
//
// Exception thrown if error occurs while constructing an
// BackendModelInstance.
//
struct
BackendModelInstanceException
{
BackendModelInstanceException
(
TRITONSERVER_Error
*
err
)
:
err_
(
err
)
{}
TRITONSERVER_Error
*
err_
;
};
#define THROW_IF_BACKEND_INSTANCE_ERROR(X) \
do { \
TRITONSERVER_Error* tie_err__ = (X); \
if (tie_err__ != nullptr) { \
throw triton::backend::BackendModelInstanceException(tie_err__); \
} \
} while (false)
}}
// namespace triton::backend
3rdparty/backend-r22.12/include/triton/backend/backend_output_responder.h
0 → 100644
View file @
0a21fff9
// Copyright 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <list>
#include <string>
#include <vector>
#include "triton/backend/backend_common.h"
#include "triton/common/async_work_queue.h"
#include "triton/core/tritonbackend.h"
#ifdef TRITON_ENABLE_GPU
#include <cuda_runtime_api.h>
#endif // TRITON_ENABLE_GPU
namespace
triton
{
namespace
backend
{
#ifndef TRITON_ENABLE_GPU
using
cudaStream_t
=
void
*
;
using
cudaEvent_t
=
void
*
;
#endif // !TRITON_ENABLE_GPU
//
// BackendOutputResponder
//
class
BackendOutputResponder
{
public:
// The caller can optionally provide 'event' for internal synchronization
// instead of using 'stream'.
explicit
BackendOutputResponder
(
TRITONBACKEND_Request
**
requests
,
const
uint32_t
request_count
,
std
::
vector
<
TRITONBACKEND_Response
*>*
responses
,
TRITONBACKEND_MemoryManager
*
memory_manager
,
const
bool
first_dim_batching
,
const
bool
pinned_enabled
,
cudaStream_t
stream
,
cudaEvent_t
event
=
nullptr
,
bool
copy_on_stream
=
false
)
:
need_sync_
(
false
),
requests_
(
requests
),
request_count_
(
request_count
),
responses_
(
responses
),
memory_manager_
(
memory_manager
),
first_dim_batching_
(
first_dim_batching
),
pinned_enabled_
(
pinned_enabled
),
use_async_cpu_copy_
(
triton
::
common
::
AsyncWorkQueue
::
WorkerCount
()
>
1
),
stream_
(
stream
),
event_
(
event
),
pending_pinned_byte_size_
(
0
),
copy_on_stream_
(
copy_on_stream
)
{
}
// Legacy constructor for backwards compatibility. The above
// constructor should be used for all new cases. The responder needs
// to know if the model is batching along the first dimension. With
// this constructor we derive that information from the
// max_batch_size value instead of having it provided directly as in
// the above constructor.
explicit
BackendOutputResponder
(
TRITONBACKEND_Request
**
requests
,
const
uint32_t
request_count
,
std
::
vector
<
TRITONBACKEND_Response
*>*
responses
,
const
int
max_batch_size
,
TRITONBACKEND_MemoryManager
*
memory_manager
,
const
bool
pinned_enabled
,
cudaStream_t
stream
,
cudaEvent_t
event
=
nullptr
,
bool
copy_on_stream
=
false
)
:
need_sync_
(
false
),
requests_
(
requests
),
request_count_
(
request_count
),
responses_
(
responses
),
memory_manager_
(
memory_manager
),
first_dim_batching_
(
max_batch_size
>=
1
),
pinned_enabled_
(
pinned_enabled
),
use_async_cpu_copy_
(
triton
::
common
::
AsyncWorkQueue
::
WorkerCount
()
>
1
),
stream_
(
stream
),
event_
(
event
),
pending_pinned_byte_size_
(
0
),
copy_on_stream_
(
copy_on_stream
)
{
}
~
BackendOutputResponder
();
// Process all responses for a named output tensor.
// 'batchn_shape' may be modified by the call.
void
ProcessTensor
(
const
std
::
string
&
name
,
const
TRITONSERVER_DataType
datatype
,
std
::
vector
<
int64_t
>&
batchn_shape
,
const
char
*
buffer
,
const
TRITONSERVER_MemoryType
memory_type
,
const
int64_t
memory_type_id
);
// Process all responses for a named state tensor. Returns a vector of
// TRITONBACKEND_State objects that the backend can use to update the state.
// If TRITONBACKEND_StateUpdate is not called on the vector elements, the
// state will not be updated.
// 'batchn_shape' may be modified by the call.
std
::
vector
<
TRITONBACKEND_State
*>
ProcessStateTensor
(
const
std
::
string
&
name
,
const
TRITONSERVER_DataType
datatype
,
std
::
vector
<
int64_t
>&
batchn_shape
,
const
char
*
buffer
,
const
TRITONSERVER_MemoryType
memory_type
,
const
int64_t
memory_type_id
);
// Process all responses for a batch output and derive its value from
// 'buffer'.
void
ProcessBatchOutput
(
const
std
::
string
&
name
,
const
BatchOutput
&
batch_output
,
const
char
*
buffer
,
const
TRITONSERVER_MemoryType
memory_type
,
const
int64_t
memory_type_id
);
// Finalize processing of all responses for all output
// tensors. Return true if cudaMemcpyAsync is called, and the caller
// should call cudaStreamSynchronize (or cudaEventSynchronize on 'event')
// before using the data.
bool
Finalize
();
private:
bool
FlushPendingPinned
(
const
char
*
tensor_buffer
,
const
TRITONSERVER_MemoryType
tensor_memory_type
,
const
int64_t
tensor_memory_type_id
);
bool
SetFixedSizeBuffer
(
TRITONBACKEND_Response
**
response
,
void
*
response_state_or_output
,
const
std
::
string
&
output_name
,
const
size_t
tensor_byte_size
,
const
size_t
tensor_offset
,
const
char
*
tensor_buffer
,
const
TRITONSERVER_MemoryType
tensor_memory_type
,
const
int64_t
tensor_memory_type_id
,
const
TRITONSERVER_MemoryType
use_pinned_memory_type
,
bool
state
);
struct
OutputData
{
OutputData
(
const
std
::
string
&
name
,
void
*
buffer
,
const
size_t
buffer_byte_size
,
const
TRITONSERVER_MemoryType
memory_type
,
const
int64_t
memory_type_id
)
:
name_
(
name
),
buffer_
(
buffer
),
buffer_byte_size_
(
buffer_byte_size
),
memory_type_
(
memory_type
),
memory_type_id_
(
memory_type_id
)
{
}
const
std
::
string
name_
;
void
*
buffer_
;
const
size_t
buffer_byte_size_
;
const
TRITONSERVER_MemoryType
memory_type_
;
const
int64_t
memory_type_id_
;
};
bool
need_sync_
;
TRITONBACKEND_Request
**
requests_
;
const
uint32_t
request_count_
;
std
::
vector
<
TRITONBACKEND_Response
*>*
responses_
;
TRITONBACKEND_MemoryManager
*
memory_manager_
;
const
bool
first_dim_batching_
;
const
bool
pinned_enabled_
;
const
bool
use_async_cpu_copy_
;
cudaStream_t
stream_
;
cudaEvent_t
event_
;
using
ResponsesList
=
std
::
list
<
std
::
pair
<
TRITONBACKEND_Response
**
,
OutputData
>>
;
size_t
pending_pinned_byte_size_
;
size_t
pending_pinned_offset_
;
ResponsesList
pending_pinned_outputs_
;
const
bool
copy_on_stream_
;
// Pinned memories that need to live over the lifetime of this
// BackendOutputResponder object.
std
::
list
<
char
*>
pinned_memories_
;
// Pinned memory buffers and the corresponding response outputs
// where the final copy to the response is deferred until Finalize()
// after waiting for all in-flight copies.
struct
DeferredPinned
{
DeferredPinned
(
char
*
pinned_memory
,
const
size_t
pinned_memory_size
,
ResponsesList
&&
responses
)
:
pinned_memory_
(
pinned_memory
),
pinned_memory_size_
(
pinned_memory_size
),
responses_
(
std
::
move
(
responses
))
{
}
char
*
pinned_memory_
;
const
size_t
pinned_memory_size_
;
ResponsesList
responses_
;
};
std
::
list
<
DeferredPinned
>
deferred_pinned_
;
};
}}
// namespace triton::backend
3rdparty/backend-r22.12/src/backend_common.cc
0 → 100644
View file @
0a21fff9
// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "triton/backend/backend_common.h"
#ifdef _WIN32
// suppress the min and max definitions in Windef.h.
#define NOMINMAX
#include <Windows.h>
// _CRT_INTERNAL_NONSTDC_NAMES 1 before including Microsoft provided C Runtime
// library to expose declarations without "_" prefix to match POSIX style.
#define _CRT_INTERNAL_NONSTDC_NAMES 1
#include <direct.h>
#include <io.h>
#else
#include <dirent.h>
#include <unistd.h>
#endif
#include <sys/stat.h>
#include <algorithm>
#include <cerrno>
#include <fstream>
#include <functional>
#include <memory>
#ifdef _WIN32
// <sys/stat.h> in Windows doesn't define S_ISDIR macro
#if !defined(S_ISDIR) && defined(S_IFMT) && defined(S_IFDIR)
#define S_ISDIR(m) (((m)&S_IFMT) == S_IFDIR)
#endif
#define F_OK 0
#endif
namespace
triton
{
namespace
backend
{
#ifdef TRITON_ENABLE_GPU
void
CUDART_CB
MemcpyHost
(
void
*
args
)
{
auto
*
copy_params
=
reinterpret_cast
<
CopyParams
*>
(
args
);
memcpy
(
copy_params
->
dst_
,
copy_params
->
src_
,
copy_params
->
byte_size_
);
delete
copy_params
;
}
#endif // TRITON_ENABLE_GPU
TRITONSERVER_MemoryType
GetUsePinnedMemoryType
(
TRITONSERVER_MemoryType
ref_buffer_type
)
{
// The following matrix is used for both input and output.
// src \ dest | non-pinned | pinned | device
// non-pinned | memcpy | memcpy | buffer needed
// pinned | memcpy | memcpy | cudaMemcpy
// device | buffer needed | cudaMemcpy | cudaMemcpy
if
(
ref_buffer_type
==
TRITONSERVER_MEMORY_CPU_PINNED
)
{
return
TRITONSERVER_MEMORY_CPU_PINNED
;
}
return
(
ref_buffer_type
==
TRITONSERVER_MEMORY_CPU
)
?
TRITONSERVER_MEMORY_GPU
:
TRITONSERVER_MEMORY_CPU
;
}
TRITONSERVER_Error_Code
StatusCodeToTritonCode
(
triton
::
common
::
Error
::
Code
error_code
)
{
switch
(
error_code
)
{
case
triton
::
common
::
Error
::
Code
::
UNKNOWN
:
return
TRITONSERVER_ERROR_UNKNOWN
;
case
triton
::
common
::
Error
::
Code
::
INTERNAL
:
return
TRITONSERVER_ERROR_INTERNAL
;
case
triton
::
common
::
Error
::
Code
::
NOT_FOUND
:
return
TRITONSERVER_ERROR_NOT_FOUND
;
case
triton
::
common
::
Error
::
Code
::
INVALID_ARG
:
return
TRITONSERVER_ERROR_INVALID_ARG
;
case
triton
::
common
::
Error
::
Code
::
UNAVAILABLE
:
return
TRITONSERVER_ERROR_UNAVAILABLE
;
case
triton
::
common
::
Error
::
Code
::
UNSUPPORTED
:
return
TRITONSERVER_ERROR_UNSUPPORTED
;
case
triton
::
common
::
Error
::
Code
::
ALREADY_EXISTS
:
return
TRITONSERVER_ERROR_ALREADY_EXISTS
;
default:
break
;
}
return
TRITONSERVER_ERROR_UNKNOWN
;
}
TRITONSERVER_Error
*
CommonErrorToTritonError
(
triton
::
common
::
Error
error
)
{
return
TRITONSERVER_ErrorNew
(
StatusCodeToTritonCode
(
error
.
ErrorCode
()),
error
.
Message
().
c_str
());
}
TRITONSERVER_Error
*
ParseShape
(
common
::
TritonJson
::
Value
&
io
,
const
std
::
string
&
name
,
std
::
vector
<
int64_t
>*
shape
)
{
common
::
TritonJson
::
Value
shape_array
;
RETURN_IF_ERROR
(
io
.
MemberAsArray
(
name
.
c_str
(),
&
shape_array
));
for
(
size_t
i
=
0
;
i
<
shape_array
.
ArraySize
();
++
i
)
{
int64_t
d
=
0
;
RETURN_IF_ERROR
(
shape_array
.
IndexAsInt
(
i
,
&
d
));
shape
->
push_back
(
d
);
}
return
nullptr
;
// success
}
std
::
string
ShapeToString
(
const
int64_t
*
dims
,
const
size_t
dims_count
)
{
bool
first
=
true
;
std
::
string
str
(
"["
);
for
(
size_t
i
=
0
;
i
<
dims_count
;
++
i
)
{
const
int64_t
dim
=
dims
[
i
];
if
(
!
first
)
{
str
+=
","
;
}
str
+=
std
::
to_string
(
dim
);
first
=
false
;
}
str
+=
"]"
;
return
str
;
}
std
::
string
ShapeToString
(
const
std
::
vector
<
int64_t
>&
shape
)
{
return
ShapeToString
(
shape
.
data
(),
shape
.
size
());
}
int64_t
GetElementCount
(
const
int64_t
*
dims
,
const
size_t
dims_count
)
{
bool
first
=
true
;
int64_t
cnt
=
0
;
for
(
size_t
i
=
0
;
i
<
dims_count
;
i
++
)
{
if
(
dims
[
i
]
==
WILDCARD_DIM
)
{
return
-
1
;
}
if
(
first
)
{
cnt
=
dims
[
i
];
first
=
false
;
}
else
{
cnt
*=
dims
[
i
];
}
}
return
cnt
;
}
int64_t
GetElementCount
(
const
std
::
vector
<
int64_t
>&
shape
)
{
return
GetElementCount
(
shape
.
data
(),
shape
.
size
());
}
int64_t
GetByteSize
(
const
TRITONSERVER_DataType
&
dtype
,
const
std
::
vector
<
int64_t
>&
dims
)
{
size_t
dt_size
=
TRITONSERVER_DataTypeByteSize
(
dtype
);
if
(
dt_size
==
0
)
{
return
-
1
;
}
int64_t
cnt
=
GetElementCount
(
dims
);
if
(
cnt
==
-
1
)
{
return
-
1
;
}
return
cnt
*
dt_size
;
}
TRITONSERVER_Error
*
ReadInputTensor
(
TRITONBACKEND_Request
*
request
,
const
std
::
string
&
input_name
,
char
*
buffer
,
size_t
*
buffer_byte_size
,
TRITONSERVER_MemoryType
memory_type
,
int64_t
memory_type_id
,
cudaStream_t
cuda_stream
,
bool
*
cuda_used
,
const
char
*
host_policy_name
,
const
bool
copy_on_stream
)
{
TRITONBACKEND_Input
*
input
;
RETURN_IF_ERROR
(
TRITONBACKEND_RequestInput
(
request
,
input_name
.
c_str
(),
&
input
));
uint64_t
input_byte_size
;
uint32_t
input_buffer_count
;
RETURN_IF_ERROR
(
TRITONBACKEND_InputPropertiesForHostPolicy
(
input
,
host_policy_name
,
nullptr
,
nullptr
,
nullptr
,
nullptr
,
&
input_byte_size
,
&
input_buffer_count
));
RETURN_ERROR_IF_FALSE
(
input_byte_size
<=
*
buffer_byte_size
,
TRITONSERVER_ERROR_INVALID_ARG
,
std
::
string
(
GetRequestId
(
request
)
+
"buffer too small for input tensor '"
+
input_name
+
"', "
+
std
::
to_string
(
*
buffer_byte_size
)
+
" < "
+
std
::
to_string
(
input_byte_size
)));
size_t
output_buffer_offset
=
0
;
for
(
uint32_t
b
=
0
;
b
<
input_buffer_count
;
++
b
)
{
const
void
*
input_buffer
=
nullptr
;
uint64_t
input_buffer_byte_size
=
0
;
TRITONSERVER_MemoryType
input_memory_type
=
TRITONSERVER_MEMORY_CPU
;
int64_t
input_memory_type_id
=
0
;
RETURN_IF_ERROR
(
TRITONBACKEND_InputBufferForHostPolicy
(
input
,
host_policy_name
,
b
,
&
input_buffer
,
&
input_buffer_byte_size
,
&
input_memory_type
,
&
input_memory_type_id
));
RETURN_IF_ERROR
(
CopyBuffer
(
"Failed to copy buffer"
,
input_memory_type
,
input_memory_type_id
,
memory_type
,
memory_type_id
,
input_buffer_byte_size
,
input_buffer
,
buffer
+
output_buffer_offset
,
cuda_stream
,
cuda_used
,
copy_on_stream
));
output_buffer_offset
+=
input_buffer_byte_size
;
}
*
buffer_byte_size
=
input_byte_size
;
return
nullptr
;
// success
}
TRITONSERVER_Error
*
ReadInputTensor
(
TRITONBACKEND_Request
*
request
,
const
std
::
string
&
input_name
,
char
*
buffer
,
size_t
*
buffer_byte_size
,
const
char
*
host_policy_name
)
{
bool
cuda_used
;
return
ReadInputTensor
(
request
,
input_name
,
buffer
,
buffer_byte_size
,
TRITONSERVER_MEMORY_CPU
/* memory_type */
,
0
/* memory_type_id */
,
0
/* cuda_stream */
,
&
cuda_used
);
}
TRITONSERVER_Error
*
CheckAllowedModelInput
(
common
::
TritonJson
::
Value
&
io
,
const
std
::
set
<
std
::
string
>&
allowed
)
{
std
::
string
io_name
;
RETURN_IF_ERROR
(
io
.
MemberAsString
(
"name"
,
&
io_name
));
if
(
allowed
.
find
(
io_name
)
==
allowed
.
end
())
{
std
::
string
astr
;
for
(
const
auto
&
a
:
allowed
)
{
if
(
!
astr
.
empty
())
{
astr
.
append
(
", "
);
}
astr
.
append
(
a
);
}
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INVALID_ARG
,
std
::
string
(
"unexpected inference input '"
+
io_name
+
"', allowed inputs are: "
+
astr
)
.
c_str
());
}
return
nullptr
;
// success
}
TRITONSERVER_Error
*
CheckAllowedModelOutput
(
common
::
TritonJson
::
Value
&
io
,
const
std
::
set
<
std
::
string
>&
allowed
)
{
std
::
string
io_name
;
RETURN_IF_ERROR
(
io
.
MemberAsString
(
"name"
,
&
io_name
));
if
(
allowed
.
find
(
io_name
)
==
allowed
.
end
())
{
std
::
string
astr
;
for
(
const
auto
&
a
:
allowed
)
{
if
(
!
astr
.
empty
())
{
astr
.
append
(
", "
);
}
astr
.
append
(
a
);
}
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INVALID_ARG
,
std
::
string
(
"unexpected inference output '"
+
io_name
+
"', allowed outputs are: "
+
astr
)
.
c_str
());
}
return
nullptr
;
// success
}
TRITONSERVER_Error
*
GetBooleanSequenceControlProperties
(
common
::
TritonJson
::
Value
&
batcher
,
const
std
::
string
&
model_name
,
const
std
::
string
&
control_kind
,
const
bool
required
,
std
::
string
*
tensor_name
,
std
::
string
*
tensor_datatype
,
float
*
fp32_false_value
,
float
*
fp32_true_value
,
int32_t
*
int32_false_value
,
int32_t
*
int32_true_value
,
bool
*
bool_false_value
,
bool
*
bool_true_value
)
{
// Make sure same tensor is not configured for multiple controls
std
::
set
<
std
::
string
>
seen_tensors
;
// Make sure the control kind is not mentioned multiple times.
bool
seen_control
=
false
;
common
::
TritonJson
::
Value
control_inputs
;
if
(
batcher
.
Find
(
"control_input"
,
&
control_inputs
))
{
for
(
size_t
ci_idx
=
0
;
ci_idx
<
control_inputs
.
ArraySize
();
ci_idx
++
)
{
common
::
TritonJson
::
Value
control_input
;
RETURN_IF_ERROR
(
control_inputs
.
IndexAsObject
(
ci_idx
,
&
control_input
));
std
::
string
input_name
;
RETURN_IF_ERROR
(
control_input
.
MemberAsString
(
"name"
,
&
input_name
));
if
(
input_name
.
empty
())
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INVALID_ARG
,
(
std
::
string
(
"sequence batching control tensor must have a name for "
)
+
model_name
)
.
c_str
());
}
if
(
seen_tensors
.
find
(
input_name
)
!=
seen_tensors
.
end
())
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INVALID_ARG
,
(
std
::
string
(
"sequence batching control tensor '"
)
+
input_name
+
"' is specified for multiple control kinds for "
+
model_name
)
.
c_str
());
}
seen_tensors
.
insert
(
input_name
);
common
::
TritonJson
::
Value
controls
;
if
(
control_input
.
Find
(
"control"
,
&
controls
))
{
for
(
size_t
c_idx
=
0
;
c_idx
<
controls
.
ArraySize
();
c_idx
++
)
{
common
::
TritonJson
::
Value
c
;
RETURN_IF_ERROR
(
controls
.
IndexAsObject
(
c_idx
,
&
c
));
std
::
string
kind_str
;
RETURN_IF_ERROR
(
c
.
MemberAsString
(
"kind"
,
&
kind_str
));
if
(
kind_str
==
control_kind
)
{
if
(
seen_control
)
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INVALID_ARG
,
(
std
::
string
(
"sequence batching specifies multiple "
+
control_kind
+
" tensors for "
+
model_name
)
.
c_str
()));
}
*
tensor_name
=
input_name
;
seen_control
=
true
;
common
::
TritonJson
::
Value
int32_false_true
,
fp32_false_true
,
bool_false_true
;
bool
found_int32
=
(
c
.
Find
(
"int32_false_true"
,
&
int32_false_true
)
&&
(
int32_false_true
.
ArraySize
()
>
0
));
bool
found_fp32
=
(
c
.
Find
(
"fp32_false_true"
,
&
fp32_false_true
)
&&
(
fp32_false_true
.
ArraySize
()
>
0
));
bool
found_bool
=
(
c
.
Find
(
"bool_false_true"
,
&
bool_false_true
)
&&
(
bool_false_true
.
ArraySize
()
>
0
));
// Make sure only one of int, float, or bool type is specified.
if
(
!
(
found_int32
||
found_fp32
||
found_bool
))
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INVALID_ARG
,
(
std
::
string
(
"sequence batching must specify either "
"'int32_false_true', 'fp32_false_true' or "
"'bool_false_true' for "
+
control_kind
+
" for "
+
model_name
))
.
c_str
());
}
else
if
(
(
found_fp32
&&
found_int32
)
||
(
found_fp32
&&
found_bool
)
||
(
found_int32
&&
found_bool
))
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INVALID_ARG
,
(
std
::
string
(
"sequence batching specifies more than one from "
"'int32_false_true', 'fp32_false_true' and "
"'bool_false_true' for "
+
control_kind
+
" for "
+
model_name
))
.
c_str
());
}
if
(
found_int32
)
{
if
(
int32_false_true
.
ArraySize
()
!=
2
)
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INVALID_ARG
,
(
std
::
string
(
"sequence batching control 'int32_false_true' must "
"have "
"exactly 2 entries for "
+
control_kind
+
" for "
+
model_name
))
.
c_str
());
}
if
(
tensor_datatype
!=
nullptr
)
{
*
tensor_datatype
=
"TYPE_INT32"
;
}
if
(
int32_false_value
!=
nullptr
)
{
int64_t
value
;
RETURN_IF_ERROR
(
int32_false_true
.
IndexAsInt
(
0
,
&
value
));
*
int32_false_value
=
value
;
}
if
(
int32_true_value
!=
nullptr
)
{
int64_t
value
;
RETURN_IF_ERROR
(
int32_false_true
.
IndexAsInt
(
1
,
&
value
));
*
int32_true_value
=
value
;
}
}
else
if
(
found_fp32
)
{
if
(
fp32_false_true
.
ArraySize
()
!=
2
)
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INVALID_ARG
,
(
std
::
string
(
"sequence batching control 'fp32_false_true' must "
"have exactly "
"2 entries for "
+
control_kind
+
" for "
+
model_name
))
.
c_str
());
}
if
(
tensor_datatype
!=
nullptr
)
{
*
tensor_datatype
=
"TYPE_FP32"
;
}
if
(
fp32_false_value
!=
nullptr
)
{
double
value
=
0.0
;
RETURN_IF_ERROR
(
fp32_false_true
.
IndexAsDouble
(
0
,
&
value
));
*
fp32_false_value
=
value
;
}
if
(
fp32_true_value
!=
nullptr
)
{
double
value
=
0.0
;
RETURN_IF_ERROR
(
fp32_false_true
.
IndexAsDouble
(
1
,
&
value
));
*
fp32_true_value
=
value
;
}
}
else
{
if
(
bool_false_true
.
ArraySize
()
!=
2
)
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INVALID_ARG
,
(
std
::
string
(
"sequence batching control 'bool_false_true' must "
"have exactly "
"2 entries for "
+
control_kind
+
" for "
+
model_name
))
.
c_str
());
}
if
(
tensor_datatype
!=
nullptr
)
{
*
tensor_datatype
=
"TYPE_BOOL"
;
}
if
(
bool_false_value
!=
nullptr
)
{
bool
value
;
RETURN_IF_ERROR
(
bool_false_true
.
IndexAsBool
(
0
,
&
value
));
*
bool_false_value
=
value
;
}
if
(
bool_true_value
!=
nullptr
)
{
bool
value
;
RETURN_IF_ERROR
(
bool_false_true
.
IndexAsBool
(
1
,
&
value
));
*
bool_true_value
=
value
;
}
}
}
}
}
}
}
if
(
!
seen_control
)
{
if
(
required
)
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INVALID_ARG
,
(
std
::
string
(
"sequence batching control tensor must specify a "
+
control_kind
+
" value for "
+
model_name
))
.
c_str
());
}
tensor_name
->
clear
();
}
return
nullptr
;
// success
}
TRITONSERVER_Error
*
GetTypedSequenceControlProperties
(
common
::
TritonJson
::
Value
&
batcher
,
const
std
::
string
&
model_name
,
const
std
::
string
&
control_kind
,
const
bool
required
,
std
::
string
*
tensor_name
,
std
::
string
*
tensor_datatype
)
{
// Make sure same tensor is not configured for multiple controls
std
::
set
<
std
::
string
>
seen_tensors
;
// Make sure the control kind is not mentioned multiple times.
bool
seen_control
=
false
;
common
::
TritonJson
::
Value
control_inputs
;
if
(
batcher
.
Find
(
"control_input"
,
&
control_inputs
))
{
for
(
size_t
ci_idx
=
0
;
ci_idx
<
control_inputs
.
ArraySize
();
ci_idx
++
)
{
common
::
TritonJson
::
Value
control_input
;
RETURN_IF_ERROR
(
control_inputs
.
IndexAsObject
(
ci_idx
,
&
control_input
));
std
::
string
input_name
;
RETURN_IF_ERROR
(
control_input
.
MemberAsString
(
"name"
,
&
input_name
));
if
(
input_name
.
empty
())
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INVALID_ARG
,
(
std
::
string
(
"sequence batching control tensor must have a name for "
)
+
model_name
)
.
c_str
());
}
if
(
seen_tensors
.
find
(
input_name
)
!=
seen_tensors
.
end
())
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INVALID_ARG
,
(
std
::
string
(
"sequence batching control tensor '"
)
+
input_name
+
"' is specified for multiple control kinds for "
+
model_name
)
.
c_str
());
}
seen_tensors
.
insert
(
input_name
);
common
::
TritonJson
::
Value
controls
;
if
(
control_input
.
Find
(
"control"
,
&
controls
))
{
for
(
size_t
c_idx
=
0
;
c_idx
<
controls
.
ArraySize
();
c_idx
++
)
{
common
::
TritonJson
::
Value
c
;
RETURN_IF_ERROR
(
controls
.
IndexAsObject
(
c_idx
,
&
c
));
std
::
string
kind_str
;
RETURN_IF_ERROR
(
c
.
MemberAsString
(
"kind"
,
&
kind_str
));
if
(
kind_str
==
control_kind
)
{
if
(
seen_control
)
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INVALID_ARG
,
(
std
::
string
(
"sequence batching specifies multiple "
+
control_kind
+
" tensors for "
+
model_name
)
.
c_str
()));
}
*
tensor_name
=
input_name
;
if
(
tensor_datatype
!=
nullptr
)
{
RETURN_IF_ERROR
(
c
.
MemberAsString
(
"data_type"
,
tensor_datatype
));
}
seen_control
=
true
;
common
::
TritonJson
::
Value
int32_false_true
,
fp32_false_true
,
bool_false_true
;
bool
found_int32
=
(
c
.
Find
(
"int32_false_true"
,
&
int32_false_true
)
&&
(
int32_false_true
.
ArraySize
()
>
0
));
bool
found_fp32
=
(
c
.
Find
(
"fp32_false_true"
,
&
fp32_false_true
)
&&
(
fp32_false_true
.
ArraySize
()
>
0
));
bool
found_bool
=
(
c
.
Find
(
"bool_false_true"
,
&
bool_false_true
)
&&
(
bool_false_true
.
ArraySize
()
>
0
));
if
(
found_fp32
||
found_int32
||
found_bool
)
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INVALID_ARG
,
(
std
::
string
(
"sequence batching must not specify either "
"'int32_false_true', 'fp32_false_true' or "
"'bool_false_true' for "
+
control_kind
+
" for "
+
model_name
))
.
c_str
());
}
}
}
}
}
}
if
(
!
seen_control
)
{
if
(
required
)
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INVALID_ARG
,
(
std
::
string
(
"sequence batching control tensor must specify a "
+
control_kind
+
" value for "
+
model_name
))
.
c_str
());
}
tensor_name
->
clear
();
}
return
nullptr
;
// success
}
void
RequestsRespondWithError
(
TRITONBACKEND_Request
**
requests
,
const
uint32_t
request_count
,
TRITONSERVER_Error
*
response_err
,
const
bool
release_request
)
{
for
(
size_t
i
=
0
;
i
<
request_count
;
i
++
)
{
TRITONBACKEND_Response
*
response
;
auto
err
=
TRITONBACKEND_ResponseNew
(
&
response
,
requests
[
i
]);
if
(
err
!=
nullptr
)
{
LOG_MESSAGE
(
TRITONSERVER_LOG_ERROR
,
(
GetRequestId
(
requests
[
i
])
+
"fail to create response"
).
c_str
());
TRITONSERVER_ErrorDelete
(
err
);
}
else
{
LOG_IF_ERROR
(
TRITONBACKEND_ResponseSend
(
response
,
TRITONSERVER_RESPONSE_COMPLETE_FINAL
,
response_err
),
(
GetRequestId
(
requests
[
i
])
+
"fail to send error response"
).
c_str
());
}
if
(
release_request
)
{
LOG_IF_ERROR
(
TRITONBACKEND_RequestRelease
(
requests
[
i
],
TRITONSERVER_REQUEST_RELEASE_ALL
),
"fail to release request"
);
requests
[
i
]
=
nullptr
;
}
}
TRITONSERVER_ErrorDelete
(
response_err
);
}
void
SendErrorForResponses
(
std
::
vector
<
TRITONBACKEND_Response
*>*
responses
,
const
uint32_t
response_count
,
TRITONSERVER_Error
*
response_err
)
{
for
(
size_t
i
=
0
;
i
<
response_count
;
i
++
)
{
TRITONBACKEND_Response
*
response
=
(
*
responses
)[
i
];
if
(
response
!=
nullptr
)
{
LOG_IF_ERROR
(
TRITONBACKEND_ResponseSend
(
response
,
TRITONSERVER_RESPONSE_COMPLETE_FINAL
,
response_err
),
"fail to send error response"
);
(
*
responses
)[
i
]
=
nullptr
;
}
}
TRITONSERVER_ErrorDelete
(
response_err
);
}
TRITONSERVER_Error
*
CopyBuffer
(
const
std
::
string
&
msg
,
const
TRITONSERVER_MemoryType
src_memory_type
,
const
int64_t
src_memory_type_id
,
const
TRITONSERVER_MemoryType
dst_memory_type
,
const
int64_t
dst_memory_type_id
,
const
size_t
byte_size
,
const
void
*
src
,
void
*
dst
,
cudaStream_t
cuda_stream
,
bool
*
cuda_used
,
const
bool
copy_on_stream
)
{
*
cuda_used
=
false
;
if
(
byte_size
>
0
)
{
if
(
src
==
nullptr
)
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INTERNAL
,
std
::
string
(
msg
+
": attempted a copy of "
+
std
::
to_string
(
byte_size
)
+
" Bytes from an uninitialized memory"
)
.
c_str
());
}
if
(
dst
==
nullptr
)
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INTERNAL
,
std
::
string
(
msg
+
": attempted a copy of "
+
std
::
to_string
(
byte_size
)
+
" Bytes to an uninitialized memory"
)
.
c_str
());
}
}
// For CUDA memcpy, if copy_on_stream is false, all host to host copy will be
// blocked in respect to the host, so use memcpy() directly. In this case,
// need to be careful on whether the src buffer is valid.
if
((
src_memory_type
!=
TRITONSERVER_MEMORY_GPU
)
&&
(
dst_memory_type
!=
TRITONSERVER_MEMORY_GPU
))
{
#ifdef TRITON_ENABLE_GPU
if
(
copy_on_stream
)
{
auto
params
=
new
CopyParams
(
dst
,
src
,
byte_size
);
cudaLaunchHostFunc
(
cuda_stream
,
MemcpyHost
,
reinterpret_cast
<
void
*>
(
params
));
*
cuda_used
=
true
;
}
else
{
memcpy
(
dst
,
src
,
byte_size
);
}
#else
memcpy
(
dst
,
src
,
byte_size
);
#endif // TRITON_ENABLE_GPU
}
else
{
#ifdef TRITON_ENABLE_GPU
// [TODO] use cudaMemcpyDefault if UVM is supported for the device
auto
copy_kind
=
cudaMemcpyDeviceToDevice
;
if
(
src_memory_type
!=
TRITONSERVER_MEMORY_GPU
)
{
copy_kind
=
cudaMemcpyHostToDevice
;
}
else
if
(
dst_memory_type
!=
TRITONSERVER_MEMORY_GPU
)
{
copy_kind
=
cudaMemcpyDeviceToHost
;
}
if
((
src_memory_type_id
!=
dst_memory_type_id
)
&&
(
copy_kind
==
cudaMemcpyDeviceToDevice
))
{
RETURN_IF_CUDA_ERROR
(
cudaMemcpyPeerAsync
(
dst
,
dst_memory_type_id
,
src
,
src_memory_type_id
,
byte_size
,
cuda_stream
),
TRITONSERVER_ERROR_INTERNAL
,
msg
+
": failed to perform CUDA copy"
);
}
else
{
RETURN_IF_CUDA_ERROR
(
cudaMemcpyAsync
(
dst
,
src
,
byte_size
,
copy_kind
,
cuda_stream
),
TRITONSERVER_ERROR_INTERNAL
,
msg
+
": failed to perform CUDA copy"
);
}
*
cuda_used
=
true
;
#else
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INTERNAL
,
std
::
string
(
msg
+
": try to use CUDA copy while GPU is not supported"
)
.
c_str
());
#endif // TRITON_ENABLE_GPU
}
return
nullptr
;
// success
}
TRITONSERVER_Error
*
GetDirectoryContents
(
const
std
::
string
&
path
,
std
::
set
<
std
::
string
>*
contents
)
{
#ifdef _WIN32
WIN32_FIND_DATA
entry
;
HANDLE
dir
=
FindFirstFile
(
path
.
c_str
(),
&
entry
);
if
(
dir
==
INVALID_HANDLE_VALUE
)
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INTERNAL
,
(
std
::
string
(
"failed to open directory: "
)
+
path
).
c_str
());
}
if
((
entry
.
cFileName
!=
"."
)
&&
(
entry
.
cFileName
!=
".."
))
{
contents
->
insert
(
entry
.
cFileName
);
}
while
(
FindNextFileA
(
dir
,
&
entry
))
{
if
((
entry
.
cFileName
!=
"."
)
&&
(
entry
.
cFileName
!=
".."
))
{
contents
->
insert
(
entry
.
cFileName
);
}
}
FindClose
(
dir
);
#else
DIR
*
dir
=
opendir
(
path
.
c_str
());
if
(
dir
==
nullptr
)
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INTERNAL
,
(
std
::
string
(
"failed to open directory: "
)
+
path
).
c_str
());
}
struct
dirent
*
entry
;
while
((
entry
=
readdir
(
dir
))
!=
nullptr
)
{
std
::
string
entryname
=
entry
->
d_name
;
if
((
entryname
!=
"."
)
&&
(
entryname
!=
".."
))
{
contents
->
insert
(
entryname
);
}
}
closedir
(
dir
);
#endif
return
nullptr
;
// success
}
TRITONSERVER_Error
*
FileExists
(
const
std
::
string
&
path
,
bool
*
exists
)
{
*
exists
=
(
access
(
path
.
c_str
(),
F_OK
)
==
0
);
return
nullptr
;
// success
}
TRITONSERVER_Error
*
ReadTextFile
(
const
std
::
string
&
path
,
std
::
string
*
contents
)
{
std
::
ifstream
in
(
path
,
std
::
ios
::
in
|
std
::
ios
::
binary
);
if
(
!
in
)
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INTERNAL
,
(
"failed to open/read file '"
+
path
+
"': "
+
strerror
(
errno
))
.
c_str
());
}
in
.
seekg
(
0
,
std
::
ios
::
end
);
contents
->
resize
(
in
.
tellg
());
in
.
seekg
(
0
,
std
::
ios
::
beg
);
in
.
read
(
&
(
*
contents
)[
0
],
contents
->
size
());
in
.
close
();
return
nullptr
;
// success
}
TRITONSERVER_Error
*
IsDirectory
(
const
std
::
string
&
path
,
bool
*
is_dir
)
{
*
is_dir
=
false
;
struct
stat
st
;
if
(
stat
(
path
.
c_str
(),
&
st
)
!=
0
)
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INTERNAL
,
(
std
::
string
(
"failed to stat file "
)
+
path
).
c_str
());
}
*
is_dir
=
S_ISDIR
(
st
.
st_mode
);
return
nullptr
;
// success
}
std
::
string
JoinPath
(
std
::
initializer_list
<
std
::
string
>
segments
)
{
std
::
string
joined
;
for
(
const
auto
&
seg
:
segments
)
{
if
(
joined
.
empty
())
{
joined
=
seg
;
}
else
if
(
!
seg
.
empty
()
&&
(
seg
[
0
]
==
'/'
))
{
// IsAbsolutePath(seg)
if
(
joined
[
joined
.
size
()
-
1
]
==
'/'
)
{
joined
.
append
(
seg
.
substr
(
1
));
}
else
{
joined
.
append
(
seg
);
}
}
else
{
// !IsAbsolutePath(seg)
if
(
joined
[
joined
.
size
()
-
1
]
!=
'/'
)
{
joined
.
append
(
"/"
);
}
joined
.
append
(
seg
);
}
}
return
joined
;
}
TRITONSERVER_Error
*
ModelPaths
(
const
std
::
string
&
model_repository_path
,
uint64_t
version
,
const
bool
ignore_directories
,
const
bool
ignore_files
,
std
::
unordered_map
<
std
::
string
,
std
::
string
>*
model_paths
)
{
std
::
set
<
std
::
string
>
model_files
;
// Read all the files in 'path' and filter by type for different requirements
auto
path
=
JoinPath
({
model_repository_path
,
std
::
to_string
(
version
)});
RETURN_IF_ERROR
(
GetDirectoryContents
(
path
,
&
model_files
));
if
(
ignore_directories
)
{
// Erase directory entries...
for
(
auto
iter
=
model_files
.
begin
();
iter
!=
model_files
.
end
();)
{
bool
is_dir
;
RETURN_IF_ERROR
(
IsDirectory
(
JoinPath
({
path
,
*
iter
}),
&
is_dir
));
if
(
is_dir
)
{
iter
=
model_files
.
erase
(
iter
);
}
else
{
++
iter
;
}
}
}
if
(
ignore_files
)
{
// Erase non-directory entries...
for
(
auto
iter
=
model_files
.
begin
();
iter
!=
model_files
.
end
();)
{
bool
is_dir
;
RETURN_IF_ERROR
(
IsDirectory
(
JoinPath
({
path
,
*
iter
}),
&
is_dir
));
if
(
!
is_dir
)
{
iter
=
model_files
.
erase
(
iter
);
}
else
{
++
iter
;
}
}
}
for
(
const
auto
&
filename
:
model_files
)
{
const
auto
model_path
=
JoinPath
({
path
,
filename
});
model_paths
->
emplace
(
std
::
piecewise_construct
,
std
::
make_tuple
(
filename
),
std
::
make_tuple
(
model_path
));
}
return
nullptr
;
// success
}
TRITONSERVER_Error
*
CreateCudaStream
(
const
int
device_id
,
const
int
cuda_stream_priority
,
cudaStream_t
*
stream
)
{
*
stream
=
nullptr
;
#ifdef TRITON_ENABLE_GPU
// Make sure that correct device is set before creating stream and
// then restore the device to what was set by the caller.
int
current_device
;
auto
cuerr
=
cudaGetDevice
(
&
current_device
);
bool
overridden
=
false
;
if
(
cuerr
==
cudaSuccess
)
{
overridden
=
(
current_device
!=
device_id
);
if
(
overridden
)
{
cuerr
=
cudaSetDevice
(
device_id
);
}
}
if
(
cuerr
==
cudaSuccess
)
{
cuerr
=
cudaStreamCreateWithPriority
(
stream
,
cudaStreamDefault
,
cuda_stream_priority
);
}
if
(
overridden
)
{
cudaSetDevice
(
current_device
);
}
if
(
cuerr
!=
cudaSuccess
)
{
*
stream
=
nullptr
;
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INTERNAL
,
(
std
::
string
(
"unable to create stream: "
)
+
cudaGetErrorString
(
cuerr
))
.
c_str
());
}
#endif // TRITON_ENABLE_GPU
return
nullptr
;
// success
}
TRITONSERVER_Error
*
ParseLongLongValue
(
const
std
::
string
&
value
,
int64_t
*
parsed_value
)
{
try
{
*
parsed_value
=
std
::
stoll
(
value
);
}
catch
(
const
std
::
invalid_argument
&
ia
)
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INVALID_ARG
,
(
std
::
string
(
"failed to convert '"
)
+
value
+
"' to long long integral number"
)
.
c_str
());
}
return
nullptr
;
// success
}
TRITONSERVER_Error
*
ParseUnsignedLongLongValue
(
const
std
::
string
&
value
,
uint64_t
*
parsed_value
)
{
try
{
*
parsed_value
=
std
::
stoull
(
value
);
}
catch
(
const
std
::
invalid_argument
&
ia
)
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INVALID_ARG
,
(
std
::
string
(
"failed to convert '"
)
+
value
+
"' to unsigned long long integral number"
)
.
c_str
());
}
return
nullptr
;
// success
}
TRITONSERVER_Error
*
ParseBoolValue
(
const
std
::
string
&
value
,
bool
*
parsed_value
)
{
std
::
string
lvalue
=
value
;
std
::
transform
(
lvalue
.
begin
(),
lvalue
.
end
(),
lvalue
.
begin
(),
[](
unsigned
char
c
)
{
return
std
::
tolower
(
c
);
});
if
((
lvalue
==
"true"
)
||
(
lvalue
==
"on"
)
||
(
lvalue
==
"1"
))
{
*
parsed_value
=
true
;
return
nullptr
;
// success
}
if
((
lvalue
==
"false"
)
||
(
lvalue
==
"off"
)
||
(
lvalue
==
"0"
))
{
*
parsed_value
=
false
;
return
nullptr
;
// success
}
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INVALID_ARG
,
(
std
::
string
(
"failed to convert '"
)
+
value
+
"' to boolean"
).
c_str
());
}
TRITONSERVER_Error
*
ParseIntValue
(
const
std
::
string
&
value
,
int
*
parsed_value
)
{
try
{
*
parsed_value
=
std
::
stoi
(
value
);
}
catch
(
const
std
::
invalid_argument
&
ia
)
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INVALID_ARG
,
(
std
::
string
(
"failed to convert '"
)
+
value
+
"' to integral number"
)
.
c_str
());
}
return
nullptr
;
// success
}
TRITONSERVER_Error
*
ParseDoubleValue
(
const
std
::
string
&
value
,
double
*
parsed_value
)
{
try
{
*
parsed_value
=
std
::
stod
(
value
);
}
catch
(
const
std
::
invalid_argument
&
ia
)
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INVALID_ARG
,
(
std
::
string
(
"failed to convert '"
)
+
value
+
"' to double number"
)
.
c_str
());
}
return
nullptr
;
// success
}
TRITONSERVER_Error
*
GetParameterValue
(
triton
::
common
::
TritonJson
::
Value
&
params
,
const
std
::
string
&
key
,
std
::
string
*
value
)
{
triton
::
common
::
TritonJson
::
Value
json_value
;
RETURN_ERROR_IF_FALSE
(
params
.
Find
(
key
.
c_str
(),
&
json_value
),
TRITONSERVER_ERROR_NOT_FOUND
,
std
::
string
(
"model configuration is missing the parameter "
)
+
key
);
RETURN_IF_ERROR
(
json_value
.
MemberAsString
(
"string_value"
,
value
));
return
nullptr
;
// success
}
TRITONSERVER_Error
*
BatchInput
::
ParseFromModelConfig
(
triton
::
common
::
TritonJson
::
Value
&
config
,
std
::
vector
<
BatchInput
>*
batch_inputs
)
{
batch_inputs
->
clear
();
triton
::
common
::
TritonJson
::
Value
bis
;
RETURN_IF_ERROR
(
config
.
MemberAsArray
(
"batch_input"
,
&
bis
));
for
(
size_t
i
=
0
;
i
<
bis
.
ArraySize
();
++
i
)
{
triton
::
common
::
TritonJson
::
Value
bi
;
RETURN_IF_ERROR
(
bis
.
IndexAsObject
(
i
,
&
bi
));
batch_inputs
->
emplace_back
();
RETURN_IF_ERROR
(
batch_inputs
->
back
().
Init
(
bi
));
}
return
nullptr
;
// success
}
TRITONSERVER_Error
*
BatchInput
::
Init
(
triton
::
common
::
TritonJson
::
Value
&
bi_config
)
{
{
triton
::
common
::
TritonJson
::
Value
bi_target_names
;
RETURN_IF_ERROR
(
bi_config
.
MemberAsArray
(
"target_name"
,
&
bi_target_names
));
for
(
size_t
i
=
0
;
i
<
bi_target_names
.
ArraySize
();
++
i
)
{
std
::
string
tn
;
RETURN_IF_ERROR
(
bi_target_names
.
IndexAsString
(
i
,
&
tn
));
target_names_
.
emplace_back
(
std
::
move
(
tn
));
}
}
{
RETURN_IF_ERROR
(
bi_config
.
MemberAsString
(
"kind"
,
&
kind_str_
));
if
(
kind_str_
==
"BATCH_ELEMENT_COUNT"
)
{
kind_
=
Kind
::
BATCH_ELEMENT_COUNT
;
}
else
if
(
kind_str_
==
"BATCH_ACCUMULATED_ELEMENT_COUNT"
)
{
kind_
=
Kind
::
BATCH_ACCUMULATED_ELEMENT_COUNT
;
}
else
if
(
kind_str_
==
"BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO"
)
{
kind_
=
Kind
::
BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO
;
}
else
if
(
kind_str_
==
"BATCH_MAX_ELEMENT_COUNT_AS_SHAPE"
)
{
kind_
=
Kind
::
BATCH_MAX_ELEMENT_COUNT_AS_SHAPE
;
}
else
if
(
kind_str_
==
"BATCH_ITEM_SHAPE"
)
{
kind_
=
Kind
::
BATCH_ITEM_SHAPE
;
}
else
if
(
kind_str_
==
"BATCH_ITEM_SHAPE_FLATTEN"
)
{
kind_
=
Kind
::
BATCH_ITEM_SHAPE_FLATTEN
;
}
else
{
RETURN_ERROR_IF_FALSE
(
false
,
TRITONSERVER_ERROR_INVALID_ARG
,
std
::
string
(
"unexpected batch input kind '"
+
kind_str_
+
"'"
));
}
}
{
std
::
string
bi_dtype
;
RETURN_IF_ERROR
(
bi_config
.
MemberAsString
(
"data_type"
,
&
bi_dtype
));
data_type_
=
ModelConfigDataTypeToTritonServerDataType
(
bi_dtype
);
RETURN_ERROR_IF_TRUE
(
data_type_
==
TRITONSERVER_TYPE_INVALID
,
TRITONSERVER_ERROR_INVALID_ARG
,
std
::
string
(
"unexpected batch input data type '"
+
bi_dtype
+
"'"
));
}
{
triton
::
common
::
TritonJson
::
Value
bi_source_inputs
;
RETURN_IF_ERROR
(
bi_config
.
MemberAsArray
(
"source_input"
,
&
bi_source_inputs
));
for
(
size_t
i
=
0
;
i
<
bi_source_inputs
.
ArraySize
();
++
i
)
{
std
::
string
si
;
RETURN_IF_ERROR
(
bi_source_inputs
.
IndexAsString
(
i
,
&
si
));
source_inputs_
.
emplace_back
(
std
::
move
(
si
));
}
}
return
nullptr
;
// success
}
TRITONSERVER_DataType
ModelConfigDataTypeToTritonServerDataType
(
const
std
::
string
&
data_type_str
)
{
// Must start with "TYPE_".
if
(
data_type_str
.
rfind
(
"TYPE_"
,
0
)
!=
0
)
{
return
TRITONSERVER_TYPE_INVALID
;
}
const
std
::
string
dtype
=
data_type_str
.
substr
(
strlen
(
"TYPE_"
));
if
(
dtype
==
"BOOL"
)
{
return
TRITONSERVER_TYPE_BOOL
;
}
else
if
(
dtype
==
"UINT8"
)
{
return
TRITONSERVER_TYPE_UINT8
;
}
else
if
(
dtype
==
"UINT16"
)
{
return
TRITONSERVER_TYPE_UINT16
;
}
else
if
(
dtype
==
"UINT32"
)
{
return
TRITONSERVER_TYPE_UINT32
;
}
else
if
(
dtype
==
"UINT64"
)
{
return
TRITONSERVER_TYPE_UINT64
;
}
else
if
(
dtype
==
"INT8"
)
{
return
TRITONSERVER_TYPE_INT8
;
}
else
if
(
dtype
==
"INT16"
)
{
return
TRITONSERVER_TYPE_INT16
;
}
else
if
(
dtype
==
"INT32"
)
{
return
TRITONSERVER_TYPE_INT32
;
}
else
if
(
dtype
==
"INT64"
)
{
return
TRITONSERVER_TYPE_INT64
;
}
else
if
(
dtype
==
"FP16"
)
{
return
TRITONSERVER_TYPE_FP16
;
}
else
if
(
dtype
==
"FP32"
)
{
return
TRITONSERVER_TYPE_FP32
;
}
else
if
(
dtype
==
"FP64"
)
{
return
TRITONSERVER_TYPE_FP64
;
}
else
if
(
dtype
==
"STRING"
)
{
return
TRITONSERVER_TYPE_BYTES
;
}
else
if
(
dtype
==
"BF16"
)
{
return
TRITONSERVER_TYPE_BF16
;
}
return
TRITONSERVER_TYPE_INVALID
;
}
TRITONSERVER_Error
*
BatchOutput
::
ParseFromModelConfig
(
triton
::
common
::
TritonJson
::
Value
&
config
,
std
::
vector
<
BatchOutput
>*
batch_outputs
)
{
batch_outputs
->
clear
();
triton
::
common
::
TritonJson
::
Value
bos
;
RETURN_IF_ERROR
(
config
.
MemberAsArray
(
"batch_output"
,
&
bos
));
for
(
size_t
i
=
0
;
i
<
bos
.
ArraySize
();
++
i
)
{
batch_outputs
->
emplace_back
();
auto
&
batch_output
=
batch_outputs
->
back
();
triton
::
common
::
TritonJson
::
Value
bo
;
RETURN_IF_ERROR
(
bos
.
IndexAsObject
(
i
,
&
bo
));
{
triton
::
common
::
TritonJson
::
Value
bo_target_names
;
RETURN_IF_ERROR
(
bo
.
MemberAsArray
(
"target_name"
,
&
bo_target_names
));
for
(
size_t
i
=
0
;
i
<
bo_target_names
.
ArraySize
();
++
i
)
{
std
::
string
tn
;
RETURN_IF_ERROR
(
bo_target_names
.
IndexAsString
(
i
,
&
tn
));
batch_output
.
target_names_
.
emplace_back
(
std
::
move
(
tn
));
}
}
{
std
::
string
bo_kind
;
RETURN_IF_ERROR
(
bo
.
MemberAsString
(
"kind"
,
&
bo_kind
));
if
(
bo_kind
==
"BATCH_SCATTER_WITH_INPUT_SHAPE"
)
{
batch_output
.
kind_
=
Kind
::
BATCH_SCATTER_WITH_INPUT_SHAPE
;
// Keep track of the output info for later cross reference with input
int64_t
mbs
=
0
;
RETURN_IF_ERROR
(
config
.
MemberAsInt
(
"max_batch_size"
,
&
mbs
));
if
(
mbs
!=
0
)
{
batch_output
.
shape_
.
push_back
(
-
1
);
}
triton
::
common
::
TritonJson
::
Value
ios
;
RETURN_IF_ERROR
(
config
.
MemberAsArray
(
"output"
,
&
ios
));
for
(
size_t
i
=
0
;
i
<
ios
.
ArraySize
();
i
++
)
{
triton
::
common
::
TritonJson
::
Value
io
;
RETURN_IF_ERROR
(
ios
.
IndexAsObject
(
i
,
&
io
));
std
::
string
io_name
;
RETURN_IF_ERROR
(
io
.
MemberAsString
(
"name"
,
&
io_name
));
if
(
io_name
==
batch_output
.
target_names_
[
0
])
{
std
::
string
io_dtype
;
RETURN_IF_ERROR
(
io
.
MemberAsString
(
"data_type"
,
&
io_dtype
));
batch_output
.
data_type_
=
ModelConfigDataTypeToTritonServerDataType
(
io_dtype
);
// If a reshape is provided for the input then use that when
// validating that the model matches what is expected.
triton
::
common
::
TritonJson
::
Value
reshape
;
if
(
io
.
Find
(
"reshape"
,
&
reshape
))
{
RETURN_IF_ERROR
(
ParseShape
(
reshape
,
"shape"
,
&
batch_output
.
shape_
));
}
else
{
RETURN_IF_ERROR
(
ParseShape
(
io
,
"dims"
,
&
batch_output
.
shape_
));
}
break
;
}
}
}
else
{
RETURN_ERROR_IF_FALSE
(
false
,
TRITONSERVER_ERROR_INVALID_ARG
,
std
::
string
(
"unexpected batch output kind '"
+
bo_kind
+
"'"
));
}
}
{
triton
::
common
::
TritonJson
::
Value
bo_source_inputs
;
RETURN_IF_ERROR
(
bo
.
MemberAsArray
(
"source_input"
,
&
bo_source_inputs
));
for
(
size_t
i
=
0
;
i
<
bo_source_inputs
.
ArraySize
();
++
i
)
{
std
::
string
si
;
RETURN_IF_ERROR
(
bo_source_inputs
.
IndexAsString
(
i
,
&
si
));
batch_output
.
source_inputs_
.
emplace_back
(
std
::
move
(
si
));
}
}
}
return
nullptr
;
// success
}
TRITONSERVER_Error
*
TryParseModelStringParameter
(
triton
::
common
::
TritonJson
::
Value
&
params
,
const
std
::
string
&
mkey
,
std
::
string
*
value
,
const
std
::
string
&
default_value
)
{
triton
::
common
::
TritonJson
::
Value
json_value
;
if
(
params
.
Find
(
mkey
.
c_str
(),
&
json_value
))
{
RETURN_IF_ERROR
(
json_value
.
MemberAsString
(
"string_value"
,
value
));
}
else
{
*
value
=
default_value
;
}
return
nullptr
;
// success
}
TRITONSERVER_Error
*
TryParseModelStringParameter
(
triton
::
common
::
TritonJson
::
Value
&
params
,
const
std
::
string
&
mkey
,
int
*
value
,
const
int
&
default_value
)
{
triton
::
common
::
TritonJson
::
Value
json_value
;
if
(
params
.
Find
(
mkey
.
c_str
(),
&
json_value
))
{
std
::
string
string_value
;
RETURN_IF_ERROR
(
json_value
.
MemberAsString
(
"string_value"
,
&
string_value
));
return
ParseIntValue
(
string_value
,
value
);
}
else
{
*
value
=
default_value
;
return
nullptr
;
// success
}
}
TRITONSERVER_Error
*
TryParseModelStringParameter
(
triton
::
common
::
TritonJson
::
Value
&
params
,
const
std
::
string
&
mkey
,
bool
*
value
,
const
bool
&
default_value
)
{
triton
::
common
::
TritonJson
::
Value
json_value
;
if
(
params
.
Find
(
mkey
.
c_str
(),
&
json_value
))
{
std
::
string
string_value
;
RETURN_IF_ERROR
(
json_value
.
MemberAsString
(
"string_value"
,
&
string_value
));
return
ParseBoolValue
(
string_value
,
value
);
}
else
{
*
value
=
default_value
;
return
nullptr
;
// success
}
}
TRITONSERVER_Error
*
TryParseModelStringParameter
(
triton
::
common
::
TritonJson
::
Value
&
params
,
const
std
::
string
&
mkey
,
uint64_t
*
value
,
const
uint64_t
&
default_value
)
{
triton
::
common
::
TritonJson
::
Value
json_value
;
if
(
params
.
Find
(
mkey
.
c_str
(),
&
json_value
))
{
std
::
string
string_value
;
RETURN_IF_ERROR
(
json_value
.
MemberAsString
(
"string_value"
,
&
string_value
));
return
ParseUnsignedLongLongValue
(
string_value
,
value
);
}
else
{
*
value
=
default_value
;
return
nullptr
;
// success
}
}
namespace
{
template
<
typename
T
>
TRITONSERVER_Error
*
BufferAsTypedString
(
std
::
string
&
str
,
const
char
*
buffer
,
const
size_t
element_cnt
)
{
const
T
*
vals
=
reinterpret_cast
<
const
T
*>
(
buffer
);
str
+=
"[ "
;
for
(
size_t
i
=
0
;
i
<
element_cnt
;
++
i
)
{
const
T
&
v
=
vals
[
i
];
if
(
i
!=
0
)
{
str
+=
", "
;
}
str
+=
std
::
to_string
(
v
);
}
str
+=
" ]"
;
return
nullptr
;
// success
}
}
// namespace
TRITONSERVER_Error
*
BufferAsTypedString
(
std
::
string
&
str
,
const
char
*
buffer
,
size_t
buffer_byte_size
,
TRITONSERVER_DataType
datatype
)
{
const
size_t
element_cnt
=
buffer_byte_size
/
TRITONSERVER_DataTypeByteSize
(
datatype
);
switch
(
datatype
)
{
case
TRITONSERVER_TYPE_UINT8
:
return
BufferAsTypedString
<
uint8_t
>
(
str
,
buffer
,
element_cnt
);
case
TRITONSERVER_TYPE_UINT16
:
return
BufferAsTypedString
<
uint16_t
>
(
str
,
buffer
,
element_cnt
);
case
TRITONSERVER_TYPE_UINT32
:
return
BufferAsTypedString
<
uint32_t
>
(
str
,
buffer
,
element_cnt
);
case
TRITONSERVER_TYPE_UINT64
:
return
BufferAsTypedString
<
uint64_t
>
(
str
,
buffer
,
element_cnt
);
case
TRITONSERVER_TYPE_INT8
:
return
BufferAsTypedString
<
int8_t
>
(
str
,
buffer
,
element_cnt
);
case
TRITONSERVER_TYPE_INT16
:
return
BufferAsTypedString
<
int16_t
>
(
str
,
buffer
,
element_cnt
);
case
TRITONSERVER_TYPE_INT32
:
return
BufferAsTypedString
<
int32_t
>
(
str
,
buffer
,
element_cnt
);
case
TRITONSERVER_TYPE_INT64
:
return
BufferAsTypedString
<
int64_t
>
(
str
,
buffer
,
element_cnt
);
case
TRITONSERVER_TYPE_FP32
:
return
BufferAsTypedString
<
float
>
(
str
,
buffer
,
element_cnt
);
case
TRITONSERVER_TYPE_FP64
:
return
BufferAsTypedString
<
double
>
(
str
,
buffer
,
element_cnt
);
default:
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INVALID_ARG
,
std
::
string
(
std
::
string
(
"class result not available for output due to "
"unsupported type '"
)
+
std
::
string
(
TRITONSERVER_DataTypeString
(
datatype
))
+
"'"
)
.
c_str
());
}
return
nullptr
;
// success
}
std
::
string
GetRequestId
(
TRITONBACKEND_Request
*
request
)
{
const
char
*
request_id
=
nullptr
;
LOG_IF_ERROR
(
TRITONBACKEND_RequestId
(
request
,
&
request_id
),
"unable to retrieve request ID string"
);
if
((
request_id
==
nullptr
)
||
(
request_id
[
0
]
==
'\0'
))
{
request_id
=
"<id_unknown>"
;
}
return
std
::
string
(
"[request id: "
)
+
request_id
+
"] "
;
}
}}
// namespace triton::backend
3rdparty/backend-r22.12/src/backend_input_collector.cc
0 → 100644
View file @
0a21fff9
// Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "triton/backend/backend_input_collector.h"
#include <atomic>
#include "triton/backend/backend_common.h"
#ifdef TRITON_ENABLE_GPU
#include "kernel.h"
#endif // TRITON_ENABLE_GPU
namespace
triton
{
namespace
backend
{
//
// BackendInputCollector::InputIterator
//
BackendInputCollector
::
InputIterator
::
InputIterator
(
TRITONBACKEND_Request
**
requests
,
const
uint32_t
request_count
,
std
::
vector
<
TRITONBACKEND_Response
*>*
responses
,
const
char
*
input_name
,
const
char
*
host_policy_name
,
const
bool
coalesce_request_input
)
:
requests_
(
requests
),
request_count_
(
request_count
),
responses_
(
responses
),
input_name_
(
input_name
),
host_policy_
(
host_policy_name
),
coalesce_request_input_
(
coalesce_request_input
),
curr_request_idx_
(
0
),
curr_buffer_idx_
(
0
),
reach_end_
(
false
)
{
auto
&
response
=
(
*
responses_
)[
curr_request_idx_
];
RESPOND_AND_SET_NULL_IF_ERROR
(
&
response
,
TRITONBACKEND_RequestInput
(
requests_
[
curr_request_idx_
],
input_name_
,
&
curr_input_
));
RESPOND_AND_SET_NULL_IF_ERROR
(
&
response
,
TRITONBACKEND_InputPropertiesForHostPolicy
(
curr_input_
,
host_policy_
,
nullptr
,
nullptr
,
nullptr
,
nullptr
,
nullptr
,
&
curr_buffer_cnt_
));
}
bool
BackendInputCollector
::
InputIterator
::
GetNextContiguousInput
(
ContiguousBuffer
*
input
)
{
if
(
reach_end_
||
(
curr_buffer_idx_
>=
curr_buffer_cnt_
))
{
return
false
;
}
// Get the first buffer
TRITONBACKEND_InputBufferForHostPolicy
(
curr_input_
,
host_policy_
,
curr_buffer_idx_
,
reinterpret_cast
<
const
void
**>
(
&
input
->
memory_desc_
.
buffer_
),
&
input
->
memory_desc_
.
byte_size_
,
&
input
->
memory_desc_
.
memory_type_
,
&
input
->
memory_desc_
.
memory_type_id_
);
++
curr_buffer_idx_
;
input
->
start_request_idx_
=
curr_request_idx_
;
input
->
end_request_idx_
=
curr_request_idx_
;
if
(
!
coalesce_request_input_
)
{
if
(
curr_buffer_idx_
>=
curr_buffer_cnt_
)
{
++
curr_request_idx_
;
if
(
curr_request_idx_
<
request_count_
)
{
auto
&
response
=
(
*
responses_
)[
curr_request_idx_
];
RESPOND_AND_SET_NULL_IF_ERROR
(
&
response
,
TRITONBACKEND_RequestInput
(
requests_
[
curr_request_idx_
],
input_name_
,
&
curr_input_
));
RESPOND_AND_SET_NULL_IF_ERROR
(
&
response
,
TRITONBACKEND_InputPropertiesForHostPolicy
(
curr_input_
,
host_policy_
,
nullptr
,
nullptr
,
nullptr
,
nullptr
,
nullptr
,
&
curr_buffer_cnt_
));
// reset buffer idx
curr_buffer_idx_
=
0
;
}
else
{
reach_end_
=
true
;
}
}
return
true
;
}
do
{
for
(;
curr_buffer_idx_
<
curr_buffer_cnt_
;
++
curr_buffer_idx_
)
{
const
void
*
next_buffer
;
size_t
next_buffer_byte_size
;
TRITONSERVER_MemoryType
next_memory_type
;
int64_t
next_memory_type_id
;
TRITONBACKEND_InputBufferForHostPolicy
(
curr_input_
,
host_policy_
,
curr_buffer_idx_
,
&
next_buffer
,
&
next_buffer_byte_size
,
&
next_memory_type
,
&
next_memory_type_id
);
if
(((
input
->
memory_desc_
.
buffer_
+
input
->
memory_desc_
.
byte_size_
)
!=
next_buffer
)
||
(
input
->
memory_desc_
.
memory_type_
!=
next_memory_type
)
||
(
input
->
memory_desc_
.
memory_type_id_
!=
next_memory_type_id
))
{
return
true
;
}
input
->
memory_desc_
.
byte_size_
+=
next_buffer_byte_size
;
input
->
end_request_idx_
=
curr_request_idx_
;
}
// Iterated all buffers for current request, check next
++
curr_request_idx_
;
if
(
curr_request_idx_
<
request_count_
)
{
auto
&
response
=
(
*
responses_
)[
curr_request_idx_
];
RESPOND_AND_SET_NULL_IF_ERROR
(
&
response
,
TRITONBACKEND_RequestInput
(
requests_
[
curr_request_idx_
],
input_name_
,
&
curr_input_
));
RESPOND_AND_SET_NULL_IF_ERROR
(
&
response
,
TRITONBACKEND_InputPropertiesForHostPolicy
(
curr_input_
,
host_policy_
,
nullptr
,
nullptr
,
nullptr
,
nullptr
,
nullptr
,
&
curr_buffer_cnt_
));
// reset buffer idx
curr_buffer_idx_
=
0
;
}
}
while
(
curr_request_idx_
<
request_count_
);
reach_end_
=
true
;
return
true
;
}
//
// BackendInputCollector
//
bool
BackendInputCollector
::
GetInputBufferIfContiguous
(
const
char
*
input_name
,
const
char
**
buffer
,
size_t
*
buffer_byte_size
,
TRITONSERVER_MemoryType
*
memory_type
,
int64_t
*
memory_type_id
)
{
*
buffer
=
nullptr
;
*
buffer_byte_size
=
0
;
const
char
*
expected_next_buffer
=
nullptr
;
bool
contiguous
=
true
;
for
(
size_t
idx
=
0
;
idx
<
request_count_
;
idx
++
)
{
auto
&
request
=
requests_
[
idx
];
auto
&
response
=
(
*
responses_
)[
idx
];
TRITONBACKEND_Input
*
input
;
RESPOND_AND_SET_NULL_IF_ERROR
(
&
response
,
TRITONBACKEND_RequestInput
(
request
,
input_name
,
&
input
));
uint64_t
byte_size
;
uint32_t
buffer_count
;
RESPOND_AND_SET_NULL_IF_ERROR
(
&
response
,
TRITONBACKEND_InputPropertiesForHostPolicy
(
input
,
host_policy_cstr_
,
nullptr
,
nullptr
,
nullptr
,
nullptr
,
&
byte_size
,
&
buffer_count
));
for
(
size_t
idx
=
0
;
idx
<
buffer_count
;
++
idx
)
{
const
void
*
src_buffer
;
size_t
src_byte_size
;
TRITONSERVER_MemoryType
src_memory_type
;
int64_t
src_memory_type_id
;
RESPOND_AND_SET_NULL_IF_ERROR
(
&
response
,
TRITONBACKEND_InputBufferForHostPolicy
(
input
,
host_policy_cstr_
,
idx
,
&
src_buffer
,
&
src_byte_size
,
&
src_memory_type
,
&
src_memory_type_id
));
if
(
*
buffer
!=
nullptr
)
{
// If have seen the second buffer while coalescing input is not
// requested, treat the inputs are not contiguous
if
(
coalesce_request_input_
&&
(
expected_next_buffer
==
src_buffer
)
&&
(
*
memory_type
==
src_memory_type
)
&&
(
*
memory_type_id
==
src_memory_type_id
))
{
expected_next_buffer
+=
src_byte_size
;
}
else
{
contiguous
=
false
;
}
// Want to know total buffer byte size even if it is not contiguous
*
buffer_byte_size
+=
src_byte_size
;
}
else
{
*
buffer
=
reinterpret_cast
<
const
char
*>
(
src_buffer
);
*
memory_type
=
src_memory_type
;
*
memory_type_id
=
src_memory_type_id
;
*
buffer_byte_size
=
src_byte_size
;
expected_next_buffer
=
*
buffer
+
src_byte_size
;
}
}
}
return
contiguous
;
}
void
BackendInputCollector
::
ProcessTensor
(
const
char
*
input_name
,
char
*
buffer
,
const
size_t
buffer_byte_size
,
const
TRITONSERVER_MemoryType
memory_type
,
const
int64_t
memory_type_id
)
{
// A value of CPU_PINNED indicates that pinned memory buffer is not
// needed for this tensor. Any other value indicates that a pinned
// memory buffer is needed when the target memory type matches
// 'use_pinned_memory_type'.
TRITONSERVER_MemoryType
use_pinned_memory_type
=
TRITONSERVER_MEMORY_CPU_PINNED
;
if
(
pinned_enabled_
)
{
use_pinned_memory_type
=
GetUsePinnedMemoryType
(
memory_type
);
}
const
bool
use_kernel
=
(
kernel_buffer_threshold_
!=
0
);
size_t
buffer_offset
=
0
;
InputIterator
ii
(
requests_
,
request_count_
,
responses_
,
input_name
,
host_policy_cstr_
,
coalesce_request_input_
);
ContiguousBuffer
input
;
while
(
ii
.
GetNextContiguousInput
(
&
input
))
{
// If there are pending copies from tensor buffer that is not
// contiguous with 'response's part of that buffer, then need to
// go ahead and perform the pending copies so that can start a new
// contiguous region if necessary.
if
((
pending_pinned_byte_size_
>
0
)
&&
(
buffer_offset
!=
(
pending_pinned_byte_size_
+
pending_pinned_offset_
)))
{
need_sync_
|=
FlushPendingPinned
(
buffer
,
buffer_byte_size
,
memory_type
,
memory_type_id
);
}
if
((
pending_copy_kernel_buffer_byte_size_
>
0
)
&&
(
buffer_offset
!=
(
pending_copy_kernel_buffer_byte_size_
+
pending_copy_kernel_buffer_offset_
)))
{
need_sync_
|=
FlushPendingCopyKernel
(
buffer
,
buffer_byte_size
,
memory_type
,
memory_type_id
);
}
need_sync_
|=
SetInputTensor
(
input_name
,
input
,
buffer
,
buffer_byte_size
,
memory_type
,
memory_type_id
,
buffer_offset
,
use_pinned_memory_type
,
use_kernel
,
true
);
buffer_offset
+=
input
.
memory_desc_
.
byte_size_
;
}
// Done with the tensor, flush any pending pinned copies.
need_sync_
|=
FlushPendingPinned
(
buffer
,
buffer_byte_size
,
memory_type
,
memory_type_id
);
need_sync_
|=
FlushPendingCopyKernel
(
buffer
,
buffer_byte_size
,
memory_type
,
memory_type_id
);
#ifdef TRITON_ENABLE_GPU
if
(
need_sync_
&&
(
event_
!=
nullptr
))
{
cudaEventRecord
(
event_
,
stream_
);
}
#endif // TRITON_ENABLE_GPU
}
TRITONSERVER_Error
*
BackendInputCollector
::
ProcessTensor
(
const
char
*
input_name
,
char
*
buffer
,
const
size_t
buffer_byte_size
,
const
std
::
vector
<
std
::
pair
<
TRITONSERVER_MemoryType
,
int64_t
>>&
allowed_input_types
,
const
char
**
dst_buffer
,
size_t
*
dst_buffer_byte_size
,
TRITONSERVER_MemoryType
*
dst_memory_type
,
int64_t
*
dst_memory_type_id
)
{
if
(
buffer
==
nullptr
)
{
if
(
allowed_input_types
.
size
()
==
0
)
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INTERNAL
,
"'allowed_input_types' must contain at least one pair of memory type "
"and id"
);
}
if
(
GetInputBufferIfContiguous
(
input_name
,
dst_buffer
,
dst_buffer_byte_size
,
dst_memory_type
,
dst_memory_type_id
))
{
// zero size buffer will be treated as contiguous as well,
// but we want to invoke backend memory to have a valid address.
if
(
*
dst_buffer_byte_size
!=
0
)
{
// If the buffer is contiguous, check if the caller expects its type
for
(
const
auto
&
allowed_type
:
allowed_input_types
)
{
if
((
*
dst_memory_type
==
allowed_type
.
first
)
&&
((
*
dst_memory_type_id
==
allowed_type
.
second
)))
{
return
nullptr
;
// success
}
}
}
}
// A separate buffer is needed
BackendMemory
*
backend_memory
=
nullptr
;
for
(
const
auto
&
allowed_type
:
allowed_input_types
)
{
std
::
vector
<
BackendMemory
::
AllocationType
>
alloc_types
;
const
int64_t
memory_type_id
=
allowed_type
.
second
;
switch
(
allowed_type
.
first
)
{
case
TRITONSERVER_MEMORY_GPU
:
alloc_types
=
{
BackendMemory
::
AllocationType
::
GPU_POOL
,
BackendMemory
::
AllocationType
::
GPU
};
break
;
case
TRITONSERVER_MEMORY_CPU_PINNED
:
alloc_types
=
{
BackendMemory
::
AllocationType
::
CPU_PINNED_POOL
,
BackendMemory
::
AllocationType
::
CPU_PINNED
};
break
;
case
TRITONSERVER_MEMORY_CPU
:
alloc_types
=
{
BackendMemory
::
AllocationType
::
CPU
};
break
;
}
auto
err
=
BackendMemory
::
Create
(
memory_manager_
,
alloc_types
,
memory_type_id
,
*
dst_buffer_byte_size
,
&
backend_memory
);
if
(
err
!=
nullptr
)
{
LOG_MESSAGE
(
TRITONSERVER_LOG_VERBOSE
,
(
std
::
string
(
"unable to create backend memory for type: "
)
+
TRITONSERVER_MemoryTypeString
(
allowed_type
.
first
)
+
" id: "
+
std
::
to_string
(
memory_type_id
)
+
": "
+
TRITONSERVER_ErrorMessage
(
err
))
.
c_str
());
TRITONSERVER_ErrorDelete
(
err
);
}
else
{
in_use_memories_
.
emplace_back
(
backend_memory
);
break
;
}
}
if
(
backend_memory
==
nullptr
)
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INTERNAL
,
(
std
::
string
(
"failed to allocate contiguous buffer for input '"
)
+
input_name
+
"'"
)
.
c_str
());
}
buffer
=
backend_memory
->
MemoryPtr
();
*
dst_buffer
=
backend_memory
->
MemoryPtr
();
*
dst_buffer_byte_size
=
backend_memory
->
ByteSize
();
*
dst_memory_type
=
backend_memory
->
MemoryType
();
*
dst_memory_type_id
=
backend_memory
->
MemoryTypeId
();
}
else
{
if
(
allowed_input_types
.
size
()
!=
1
)
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INTERNAL
,
"'allowed_input_types' must only contain the memory type and id of "
"'buffer'"
);
}
*
dst_buffer
=
buffer
;
*
dst_buffer_byte_size
=
buffer_byte_size
;
*
dst_memory_type
=
allowed_input_types
[
0
].
first
;
*
dst_memory_type_id
=
allowed_input_types
[
0
].
second
;
}
if
(
*
dst_buffer_byte_size
!=
0
)
{
ProcessTensor
(
input_name
,
buffer
,
*
dst_buffer_byte_size
,
*
dst_memory_type
,
*
dst_memory_type_id
);
}
return
nullptr
;
// success
}
bool
BackendInputCollector
::
Finalize
()
{
#ifdef TRITON_ENABLE_GPU
if
((
!
deferred_pinned_
.
empty
())
&&
need_sync_
)
{
if
(
event_
!=
nullptr
)
{
cudaEventSynchronize
(
event_
);
}
else
{
cudaStreamSynchronize
(
stream_
);
}
need_sync_
=
false
;
}
#endif // TRITON_ENABLE_GPU
// After the above sync all the GPU->pinned copies are complete. Any
// deferred copies of pinned->CPU can now be done.
#ifdef TRITON_ENABLE_GPU
if
(
buffer_ready_event_
!=
nullptr
)
{
cudaEventSynchronize
(
buffer_ready_event_
);
buffer_ready_event_
=
nullptr
;
}
#endif // TRITON_ENABLE_GPU
for
(
auto
&
def
:
deferred_pinned_
)
{
if
(
!
def
.
finalized_
)
{
need_sync_
|=
def
.
Finalize
(
stream_
);
}
}
for
(
size_t
i
=
0
;
i
<
async_task_count_
;
i
++
)
{
need_sync_
|=
completion_queue_
.
Get
();
}
#ifdef TRITON_ENABLE_GPU
// Record the new event location if deferred copies occur
if
((
!
deferred_pinned_
.
empty
())
&&
need_sync_
&&
(
event_
!=
nullptr
))
{
cudaEventRecord
(
event_
,
stream_
);
}
#endif // TRITON_ENABLE_GPU
return
need_sync_
;
}
bool
BackendInputCollector
::
DeferredPinned
::
Finalize
(
cudaStream_t
stream
)
{
bool
cuda_used
=
false
;
auto
err
=
CopyBuffer
(
"pinned buffer"
,
TRITONSERVER_MEMORY_CPU_PINNED
,
0
,
tensor_memory_type_
,
tensor_memory_id_
,
pinned_memory_size_
,
pinned_memory_
,
tensor_buffer_
+
tensor_buffer_offset_
,
stream
,
&
cuda_used
);
// If something goes wrong with the copy all the pending
// responses fail...
if
(
err
!=
nullptr
)
{
for
(
auto
&
pr
:
requests_
)
{
for
(
size_t
idx
=
pr
.
start_request_idx_
;
idx
<=
pr
.
end_request_idx_
;
++
idx
)
{
if
((
*
responses_
)[
idx
]
!=
nullptr
)
{
LOG_IF_ERROR
(
TRITONBACKEND_ResponseSend
(
(
*
responses_
)[
idx
],
TRITONSERVER_RESPONSE_COMPLETE_FINAL
,
err
),
"failed to send error response"
);
(
*
responses_
)[
idx
]
=
nullptr
;
}
}
}
TRITONSERVER_ErrorDelete
(
err
);
}
return
cuda_used
;
}
bool
BackendInputCollector
::
SetInputTensor
(
const
char
*
input_name
,
const
ContiguousBuffer
&
input
,
char
*
tensor_buffer
,
const
size_t
tensor_buffer_byte_size
,
const
TRITONSERVER_MemoryType
tensor_memory_type
,
const
int64_t
tensor_memory_type_id
,
const
size_t
tensor_buffer_offset
,
const
TRITONSERVER_MemoryType
use_pinned_memory_type
,
const
bool
use_kernel
,
const
bool
wait_buffer
)
{
bool
cuda_copy
=
false
;
if
((
tensor_buffer_offset
+
input
.
memory_desc_
.
byte_size_
)
>
tensor_buffer_byte_size
)
{
for
(
size_t
i
=
input
.
start_request_idx_
;
i
<=
input
.
end_request_idx_
;
++
i
)
{
RESPOND_AND_SET_NULL_IF_ERROR
(
&
(
*
responses_
)[
i
],
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INVALID_ARG
,
std
::
string
(
"unexpected total byte size "
+
std
::
to_string
(
tensor_buffer_offset
+
input
.
memory_desc_
.
byte_size_
)
+
" for input '"
+
input_name
+
"', expecting "
+
std
::
to_string
(
tensor_buffer_byte_size
))
.
c_str
()));
}
return
cuda_copy
;
}
// If the request buffer matches the memory type that should use an
// intermediate pinned memory buffer for the transfer, then just
// record the input as pending and increase the size required for
// the intermediate pinned buffer. We only do this check for the
// first buffer of an input and apply the same policy for all
// buffers. So if an inputs data is split over different memory
// types this may not be ideal but that should be a very rare
// situation.
if
((
use_pinned_memory_type
!=
TRITONSERVER_MEMORY_CPU_PINNED
)
&&
(
input
.
memory_desc_
.
memory_type_
==
use_pinned_memory_type
))
{
if
(
pending_pinned_byte_size_
==
0
)
{
pending_pinned_offset_
=
tensor_buffer_offset
;
}
pending_pinned_byte_size_
+=
input
.
memory_desc_
.
byte_size_
;
pending_pinned_input_buffers_
.
push_back
(
input
);
return
cuda_copy
;
}
// [FIXME] support other direction if prove to be faster, all kernel
// handling code in this class asssumes the destination buffer is on device
// If the request buffer and the destination buffer are accessible by all
// GPUs (i.e. pinned, device), initiate the copy via copy CUDA kernel.
// We only do this check for the
// first buffer of an input and apply the same policy for all
// buffers. So if an inputs data is split over different memory
// types this may not be ideal but that should be a very rare
// situation.
// Currently checked direction:
// pinned -> device
// same device -> device
// different device -> device
if
(
use_kernel
&&
(
input
.
memory_desc_
.
memory_type_
!=
TRITONSERVER_MEMORY_CPU
)
&&
(
tensor_memory_type
==
TRITONSERVER_MEMORY_GPU
))
{
// [FIXME] Currently not allowing copy between devices as it requires
// peer-to-peer access to be enabled. Peer-to-peer is enabled by default,
// but server can still runs even if it fails to enable peer-to-peer.
// Should provide a utility to check whether a device pair allows direct
// access and use gather kernel accordingly
if
((
input
.
memory_desc_
.
memory_type_
!=
TRITONSERVER_MEMORY_GPU
)
||
(
input
.
memory_desc_
.
memory_type_id_
==
tensor_memory_type_id
))
{
if
(
pending_copy_kernel_buffer_byte_size_
==
0
)
{
pending_copy_kernel_buffer_offset_
=
tensor_buffer_offset
;
}
pending_copy_kernel_buffer_byte_size_
+=
input
.
memory_desc_
.
byte_size_
;
++
pending_copy_kernel_input_buffer_counts_
;
pending_copy_kernel_input_buffers_
.
push_back
(
input
);
return
cuda_copy
;
}
}
#ifdef TRITON_ENABLE_GPU
if
(
wait_buffer
&&
(
buffer_ready_event_
!=
nullptr
))
{
cudaEventSynchronize
(
buffer_ready_event_
);
buffer_ready_event_
=
nullptr
;
}
#endif // TRITON_ENABLE_GPU
// Direct copy without intermediate pinned memory.
bool
cuda_used
=
false
;
auto
err
=
CopyBuffer
(
input_name
,
input
.
memory_desc_
.
memory_type_
,
input
.
memory_desc_
.
memory_type_id_
,
tensor_memory_type
,
tensor_memory_type_id
,
input
.
memory_desc_
.
byte_size_
,
input
.
memory_desc_
.
buffer_
,
tensor_buffer
+
tensor_buffer_offset
,
stream_
,
&
cuda_used
,
copy_on_stream_
);
if
(
err
!=
nullptr
)
{
for
(
size_t
i
=
input
.
start_request_idx_
;
i
<=
input
.
end_request_idx_
;
++
i
)
{
RESPOND_AND_SET_NULL_IF_ERROR
(
&
(
*
responses_
)[
i
],
TRITONSERVER_ErrorNew
(
TRITONSERVER_ErrorCode
(
err
),
TRITONSERVER_ErrorMessage
(
err
)));
}
TRITONSERVER_ErrorDelete
(
err
);
}
cuda_copy
|=
cuda_used
;
return
cuda_copy
;
}
bool
BackendInputCollector
::
FlushPendingPinned
(
char
*
tensor_buffer
,
const
size_t
tensor_buffer_byte_size
,
const
TRITONSERVER_MemoryType
tensor_memory_type
,
const
int64_t
tensor_memory_type_id
)
{
bool
cuda_copy
=
false
;
// Will be copying from CPU->pinned->GPU or GPU->pinned->CPU
// Attempt to allocate a pinned buffer to use for staging the
// copy... if we fail to allocated the pinned buffer then we just
// directly go CPU->GPU or GPU->CPU.
char
*
pinned_memory
=
nullptr
;
int64_t
pinned_memory_type_id
=
0
;
TRITONSERVER_MemoryType
pinned_memory_type
;
BackendMemory
*
backend_memory
;
if
(
pending_pinned_byte_size_
>
0
)
{
TRITONSERVER_Error
*
err
=
BackendMemory
::
Create
(
memory_manager_
,
{
BackendMemory
::
AllocationType
::
CPU_PINNED_POOL
,
BackendMemory
::
AllocationType
::
CPU_PINNED
},
0
/* memory_type_id */
,
pending_pinned_byte_size_
,
&
backend_memory
);
if
(
err
!=
nullptr
)
{
TRITONSERVER_ErrorDelete
(
err
);
}
else
{
pinned_memory
=
backend_memory
->
MemoryPtr
();
pinned_memory_type
=
backend_memory
->
MemoryType
();
pinned_memory_type_id
=
backend_memory
->
MemoryTypeId
();
}
}
// If the pinned buffer wasn't actually allocated then just perform
// a direct copy.
if
(
pinned_memory
==
nullptr
)
{
size_t
offset
=
0
;
for
(
auto
&
pr
:
pending_pinned_input_buffers_
)
{
cuda_copy
|=
SetInputTensor
(
"pinned fallback"
,
pr
,
tensor_buffer
,
tensor_buffer_byte_size
,
tensor_memory_type
,
tensor_memory_type_id
,
pending_pinned_offset_
+
offset
,
TRITONSERVER_MEMORY_CPU_PINNED
,
false
,
true
);
offset
+=
pr
.
memory_desc_
.
byte_size_
;
}
}
// We have a pinned buffer so copy the pending input buffer(s) into
// the pinned memory.
else
{
// pinned_memory_type == TRITONSERVER_MEMORY_CPU_PINNED
bool
cuda_used
=
false
;
size_t
offset
=
0
;
if
(
!
use_async_cpu_copy_
)
{
for
(
auto
&
pr
:
pending_pinned_input_buffers_
)
{
cuda_used
|=
SetInputTensor
(
"pinned H2H"
,
pr
,
pinned_memory
,
pending_pinned_byte_size_
,
TRITONSERVER_MEMORY_CPU_PINNED
,
0
/* memory_type_id */
,
offset
,
TRITONSERVER_MEMORY_CPU_PINNED
,
false
,
true
);
offset
+=
pr
.
memory_desc_
.
byte_size_
;
}
cuda_copy
|=
cuda_used
;
// If the copy was not async (i.e. if request input was in CPU so
// a CPU->CPU-PINNED copy was performed above), then the pinned
// buffer now holds the tensor contents and we can immediately
// issue the copies from the pinned buffer to the tensor.
//
// Otherwise the GPU->CPU-PINNED async copies are in flight and we
// simply remember the pinned buffer and the corresponding
// request inputs so that we can do the pinned->CPU copies in
// finalize after we have waited for all async copies to complete.
if
(
!
cuda_used
)
{
#ifdef TRITON_ENABLE_GPU
if
(
buffer_ready_event_
!=
nullptr
)
{
cudaEventSynchronize
(
buffer_ready_event_
);
buffer_ready_event_
=
nullptr
;
}
#endif // TRITON_ENABLE_GPU
auto
err
=
CopyBuffer
(
"pinned input buffer H2D"
,
TRITONSERVER_MEMORY_CPU_PINNED
,
0
/* memory_type_id */
,
tensor_memory_type
,
tensor_memory_type_id
,
pending_pinned_byte_size_
,
pinned_memory
,
tensor_buffer
+
pending_pinned_offset_
,
stream_
,
&
cuda_used
,
copy_on_stream_
);
cuda_copy
|=
cuda_used
;
// If something goes wrong with the copy all the pending
// responses fail...
if
(
err
!=
nullptr
)
{
for
(
auto
&
pr
:
pending_pinned_input_buffers_
)
{
for
(
size_t
idx
=
pr
.
start_request_idx_
;
idx
<=
pr
.
end_request_idx_
;
++
idx
)
{
if
((
*
responses_
)[
idx
]
!=
nullptr
)
{
LOG_IF_ERROR
(
TRITONBACKEND_ResponseSend
(
(
*
responses_
)[
idx
],
TRITONSERVER_RESPONSE_COMPLETE_FINAL
,
err
),
"failed to send error response"
);
(
*
responses_
)[
idx
]
=
nullptr
;
}
}
}
TRITONSERVER_ErrorDelete
(
err
);
}
}
else
{
// cuda_used
deferred_pinned_
.
emplace_back
(
pinned_memory
,
pending_pinned_byte_size_
,
tensor_buffer
,
pending_pinned_offset_
,
tensor_memory_type
,
tensor_memory_type_id
,
std
::
move
(
pending_pinned_input_buffers_
),
responses_
);
}
}
else
{
async_task_count_
++
;
deferred_pinned_
.
emplace_back
(
pinned_memory
,
pending_pinned_byte_size_
,
tensor_buffer
,
pending_pinned_offset_
,
tensor_memory_type
,
tensor_memory_type_id
,
std
::
move
(
pending_pinned_input_buffers_
),
responses_
);
auto
&
deferred_pinned
=
deferred_pinned_
.
back
();
// Mark finalized to avoid duplicated call to DeferredPinned::Finalized()
// in BackendInputCollector::Finalize()
deferred_pinned_
.
back
().
finalized_
=
true
;
auto
incomplete_count
=
new
std
::
atomic
<
size_t
>
(
std
::
min
(
deferred_pinned_
.
back
().
requests_
.
size
(),
triton
::
common
::
AsyncWorkQueue
::
WorkerCount
()));
auto
pending_pinned_byte_size
=
pending_pinned_byte_size_
;
size_t
stride
=
(
deferred_pinned_
.
back
().
requests_
.
size
()
+
triton
::
common
::
AsyncWorkQueue
::
WorkerCount
()
-
1
)
/
triton
::
common
::
AsyncWorkQueue
::
WorkerCount
();
auto
pending_it
=
deferred_pinned_
.
back
().
requests_
.
begin
();
while
(
pending_it
!=
deferred_pinned_
.
back
().
requests_
.
end
())
{
auto
end_it
=
pending_it
;
auto
next_offset
=
offset
;
for
(
size_t
idx
=
0
;
idx
<
stride
;
idx
++
)
{
next_offset
+=
end_it
->
memory_desc_
.
byte_size_
;
end_it
++
;
if
(
end_it
==
deferred_pinned_
.
back
().
requests_
.
end
())
{
break
;
}
}
auto
err
=
CommonErrorToTritonError
(
triton
::
common
::
AsyncWorkQueue
::
AddTask
(
[
this
,
offset
,
pinned_memory
,
pinned_memory_type
,
pending_pinned_byte_size
,
pinned_memory_type_id
,
pending_it
,
end_it
,
incomplete_count
,
&
deferred_pinned
]()
mutable
{
for
(;
pending_it
!=
end_it
;
pending_it
++
)
{
SetInputTensor
(
"pinned async H2H"
,
*
pending_it
,
pinned_memory
,
pending_pinned_byte_size
,
pinned_memory_type
,
pinned_memory_type_id
,
offset
,
TRITONSERVER_MEMORY_CPU_PINNED
,
false
,
false
);
offset
+=
pending_it
->
memory_desc_
.
byte_size_
;
}
// The last segmented task will start the next phase of
// the internal pinned buffer copy
if
(
incomplete_count
->
fetch_sub
(
1
)
==
1
)
{
#ifdef TRITON_ENABLE_GPU
if
(
buffer_ready_event_
!=
nullptr
)
{
cudaEventSynchronize
(
buffer_ready_event_
);
buffer_ready_event_
=
nullptr
;
}
#endif // TRITON_ENABLE_GPU
completion_queue_
.
Put
(
deferred_pinned
.
Finalize
(
stream_
));
delete
incomplete_count
;
}
}));
if
(
err
!=
nullptr
)
{
for
(;
pending_it
!=
end_it
;
pending_it
++
)
{
for
(
size_t
idx
=
pending_it
->
start_request_idx_
;
idx
<=
pending_it
->
end_request_idx_
;
++
idx
)
{
if
((
*
responses_
)[
idx
]
!=
nullptr
)
{
LOG_IF_ERROR
(
TRITONBACKEND_ResponseSend
(
(
*
responses_
)[
idx
],
TRITONSERVER_RESPONSE_COMPLETE_FINAL
,
err
),
"failed to send error response"
);
(
*
responses_
)[
idx
]
=
nullptr
;
}
}
}
}
TRITONSERVER_ErrorDelete
(
err
);
offset
=
next_offset
;
pending_it
=
end_it
;
}
}
}
// Pending pinned copies are handled...
pending_pinned_byte_size_
=
0
;
pending_pinned_offset_
=
0
;
pending_pinned_input_buffers_
.
clear
();
// Need to hold on to the allocated pinned buffer as there are still
// copies in flight. Will delete it in finalize.
if
(
pinned_memory
!=
nullptr
)
{
in_use_memories_
.
emplace_back
(
backend_memory
);
}
return
cuda_copy
;
}
TRITONSERVER_Error
*
BackendInputCollector
::
BatchInputShape
(
const
BatchInput
&
batch_input
,
std
::
vector
<
int64_t
>*
shape
)
{
*
shape
=
std
::
vector
<
int64_t
>
{
0
};
switch
(
batch_input
.
BatchInputKind
())
{
case
BatchInput
::
Kind
::
BATCH_ELEMENT_COUNT
:
case
BatchInput
::
Kind
::
BATCH_ACCUMULATED_ELEMENT_COUNT
:
{
(
*
shape
)[
0
]
=
request_count_
;
break
;
}
case
BatchInput
::
Kind
::
BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO
:
{
(
*
shape
)[
0
]
=
request_count_
+
1
;
break
;
}
case
BatchInput
::
Kind
::
BATCH_MAX_ELEMENT_COUNT_AS_SHAPE
:
{
const
auto
&
source_input
=
batch_input
.
SourceInputs
()[
0
];
for
(
size_t
req_idx
=
0
;
req_idx
<
request_count_
;
req_idx
++
)
{
TRITONBACKEND_Input
*
input
;
RETURN_IF_ERROR
(
TRITONBACKEND_RequestInput
(
requests_
[
req_idx
],
source_input
.
c_str
(),
&
input
));
const
int64_t
*
shape_arr
;
uint32_t
dims_count
;
RETURN_IF_ERROR
(
TRITONBACKEND_InputPropertiesForHostPolicy
(
input
,
host_policy_cstr_
,
nullptr
,
nullptr
,
&
shape_arr
,
&
dims_count
,
nullptr
,
nullptr
));
(
*
shape
)[
0
]
=
std
::
max
((
*
shape
)[
0
],
GetElementCount
(
shape_arr
,
dims_count
));
}
break
;
}
case
BatchInput
::
Kind
::
BATCH_ITEM_SHAPE
:
{
shape
->
emplace_back
(
0
);
const
auto
&
source_input
=
batch_input
.
SourceInputs
()[
0
];
for
(
size_t
req_idx
=
0
;
req_idx
<
request_count_
;
req_idx
++
)
{
TRITONBACKEND_Input
*
input
;
RETURN_IF_ERROR
(
TRITONBACKEND_RequestInput
(
requests_
[
req_idx
],
source_input
.
c_str
(),
&
input
));
const
int64_t
*
shape_arr
;
uint32_t
dims_count
;
RETURN_IF_ERROR
(
TRITONBACKEND_InputPropertiesForHostPolicy
(
input
,
host_policy_cstr_
,
nullptr
,
nullptr
,
&
shape_arr
,
&
dims_count
,
nullptr
,
nullptr
));
// Assuming first dimension is batch size and ragged input is only set
// for batching enabled model.
(
*
shape
)[
0
]
+=
shape_arr
[
0
];
// The batch input tracks the shape without batch dimension for
// each batch item
(
*
shape
)[
1
]
=
(
dims_count
-
1
);
}
break
;
}
case
BatchInput
::
Kind
::
BATCH_ITEM_SHAPE_FLATTEN
:
{
const
auto
&
source_input
=
batch_input
.
SourceInputs
()[
0
];
for
(
size_t
req_idx
=
0
;
req_idx
<
request_count_
;
req_idx
++
)
{
TRITONBACKEND_Input
*
input
;
RETURN_IF_ERROR
(
TRITONBACKEND_RequestInput
(
requests_
[
req_idx
],
source_input
.
c_str
(),
&
input
));
const
int64_t
*
shape_arr
;
uint32_t
dims_count
;
RETURN_IF_ERROR
(
TRITONBACKEND_InputPropertiesForHostPolicy
(
input
,
host_policy_cstr_
,
nullptr
,
nullptr
,
&
shape_arr
,
&
dims_count
,
nullptr
,
nullptr
));
// Assuming first dimension is batch size and ragged input is only set
// for batching enabled model.
// The batch input tracks the shape without batch dimension for
// each batch item
(
*
shape
)[
0
]
+=
(
shape_arr
[
0
]
*
(
dims_count
-
1
));
}
break
;
}
default:
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INTERNAL
,
"unsupported BatchInputKind received"
);
}
return
nullptr
;
// success
}
TRITONSERVER_Error
*
BackendInputCollector
::
ProcessBatchInput
(
const
BatchInput
&
batch_input
,
char
*
buffer
,
const
size_t
buffer_byte_size
,
const
std
::
vector
<
std
::
pair
<
TRITONSERVER_MemoryType
,
int64_t
>>&
allowed_input_types
,
const
char
**
dst_buffer
,
size_t
*
dst_buffer_byte_size
,
TRITONSERVER_MemoryType
*
dst_memory_type
,
int64_t
*
dst_memory_type_id
)
{
#ifdef TRITON_ENABLE_GPU
if
(
buffer_ready_event_
!=
nullptr
)
{
cudaEventSynchronize
(
buffer_ready_event_
);
buffer_ready_event_
=
nullptr
;
}
#endif // TRITON_ENABLE_GPU
if
(
buffer
==
nullptr
)
{
if
(
allowed_input_types
.
size
()
==
0
)
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INTERNAL
,
"'allowed_input_types' must contain at least one pair of memory type "
"and id"
);
}
// Calculate the byte size of the buffer
std
::
vector
<
int64_t
>
shape
;
RETURN_IF_ERROR
(
BatchInputShape
(
batch_input
,
&
shape
));
*
dst_buffer_byte_size
=
GetByteSize
(
batch_input
.
DataType
(),
shape
);
BackendMemory
*
backend_memory
=
nullptr
;
for
(
const
auto
&
allowed_type
:
allowed_input_types
)
{
std
::
vector
<
BackendMemory
::
AllocationType
>
alloc_types
;
const
int64_t
memory_type_id
=
allowed_type
.
second
;
switch
(
allowed_type
.
first
)
{
case
TRITONSERVER_MEMORY_GPU
:
alloc_types
=
{
BackendMemory
::
AllocationType
::
GPU_POOL
,
BackendMemory
::
AllocationType
::
GPU
};
break
;
case
TRITONSERVER_MEMORY_CPU_PINNED
:
alloc_types
=
{
BackendMemory
::
AllocationType
::
CPU_PINNED_POOL
,
BackendMemory
::
AllocationType
::
CPU_PINNED
};
break
;
case
TRITONSERVER_MEMORY_CPU
:
alloc_types
=
{
BackendMemory
::
AllocationType
::
CPU
};
break
;
}
auto
err
=
BackendMemory
::
Create
(
memory_manager_
,
alloc_types
,
memory_type_id
,
*
dst_buffer_byte_size
,
&
backend_memory
);
if
(
err
!=
nullptr
)
{
LOG_MESSAGE
(
TRITONSERVER_LOG_VERBOSE
,
(
std
::
string
(
"unable to create backend memory for type: "
)
+
TRITONSERVER_MemoryTypeString
(
allowed_type
.
first
)
+
" id: "
+
std
::
to_string
(
memory_type_id
)
+
": "
+
TRITONSERVER_ErrorMessage
(
err
))
.
c_str
());
TRITONSERVER_ErrorDelete
(
err
);
}
else
{
in_use_memories_
.
emplace_back
(
backend_memory
);
break
;
}
}
if
(
backend_memory
==
nullptr
)
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INTERNAL
,
(
std
::
string
(
"failed to allocate contiguous buffer for batch input '"
)
+
batch_input
.
TargetNames
()[
0
]
+
"'"
)
.
c_str
());
}
buffer
=
backend_memory
->
MemoryPtr
();
*
dst_buffer
=
backend_memory
->
MemoryPtr
();
*
dst_buffer_byte_size
=
backend_memory
->
ByteSize
();
*
dst_memory_type
=
backend_memory
->
MemoryType
();
*
dst_memory_type_id
=
backend_memory
->
MemoryTypeId
();
}
else
{
if
(
allowed_input_types
.
size
()
!=
1
)
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INTERNAL
,
"'allowed_input_types' must only contain the memory type and id of "
"'buffer'"
);
}
*
dst_buffer
=
buffer
;
*
dst_buffer_byte_size
=
buffer_byte_size
;
*
dst_memory_type
=
allowed_input_types
[
0
].
first
;
*
dst_memory_type_id
=
allowed_input_types
[
0
].
second
;
}
char
*
input_buffer
=
buffer
;
std
::
unique_ptr
<
BackendMemory
>
internal_buffer
;
// Need a CPU buffer for modifying the value
if
(
*
dst_memory_type
==
TRITONSERVER_MEMORY_GPU
)
{
BackendMemory
*
ib
=
nullptr
;
RETURN_IF_ERROR
(
BackendMemory
::
Create
(
memory_manager_
,
{
BackendMemory
::
AllocationType
::
CPU_PINNED_POOL
,
BackendMemory
::
AllocationType
::
CPU
},
0
,
*
dst_buffer_byte_size
,
&
ib
));
internal_buffer
.
reset
(
ib
);
input_buffer
=
internal_buffer
->
MemoryPtr
();
}
const
auto
&
data_type
=
batch_input
.
DataType
();
switch
(
batch_input
.
BatchInputKind
())
{
case
BatchInput
::
Kind
::
BATCH_ELEMENT_COUNT
:
{
const
auto
&
source_input
=
batch_input
.
SourceInputs
()[
0
];
if
(
data_type
==
TRITONSERVER_TYPE_FP32
)
{
RETURN_IF_ERROR
(
SetElementCount
<
float
>
(
source_input
,
input_buffer
,
*
dst_buffer_byte_size
));
}
else
{
RETURN_IF_ERROR
(
SetElementCount
<
int32_t
>
(
source_input
,
input_buffer
,
*
dst_buffer_byte_size
));
}
break
;
}
case
BatchInput
::
Kind
::
BATCH_ACCUMULATED_ELEMENT_COUNT
:
{
const
auto
&
source_input
=
batch_input
.
SourceInputs
()[
0
];
if
(
data_type
==
TRITONSERVER_TYPE_FP32
)
{
RETURN_IF_ERROR
(
SetAccumulatedElementCount
<
float
>
(
source_input
,
input_buffer
,
*
dst_buffer_byte_size
));
}
else
{
RETURN_IF_ERROR
(
SetAccumulatedElementCount
<
int32_t
>
(
source_input
,
input_buffer
,
*
dst_buffer_byte_size
));
}
break
;
}
case
BatchInput
::
Kind
::
BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO
:
{
const
auto
&
source_input
=
batch_input
.
SourceInputs
()[
0
];
if
(
data_type
==
TRITONSERVER_TYPE_FP32
)
{
*
reinterpret_cast
<
float
*>
(
input_buffer
)
=
0
;
RETURN_IF_ERROR
(
SetAccumulatedElementCount
<
float
>
(
source_input
,
input_buffer
+
sizeof
(
float
),
*
dst_buffer_byte_size
-
sizeof
(
float
)));
}
else
{
*
reinterpret_cast
<
int32_t
*>
(
input_buffer
)
=
0
;
RETURN_IF_ERROR
(
SetAccumulatedElementCount
<
int32_t
>
(
source_input
,
input_buffer
+
sizeof
(
int32_t
),
*
dst_buffer_byte_size
-
sizeof
(
int32_t
)));
}
break
;
}
case
BatchInput
::
Kind
::
BATCH_MAX_ELEMENT_COUNT_AS_SHAPE
:
{
// The batch input is described by the shape,
// no data modification is needed
return
nullptr
;
// success
}
case
BatchInput
::
Kind
::
BATCH_ITEM_SHAPE
:
case
BatchInput
::
Kind
::
BATCH_ITEM_SHAPE_FLATTEN
:
{
// Use the same utilities for both types as the data will be the same,
// only difference is the shape of the tensor.
const
auto
&
source_input
=
batch_input
.
SourceInputs
()[
0
];
if
(
data_type
==
TRITONSERVER_TYPE_FP32
)
{
*
reinterpret_cast
<
float
*>
(
input_buffer
)
=
0
;
RETURN_IF_ERROR
(
SetBatchItemShape
<
float
>
(
source_input
,
input_buffer
,
*
dst_buffer_byte_size
));
}
else
{
*
reinterpret_cast
<
int32_t
*>
(
input_buffer
)
=
0
;
RETURN_IF_ERROR
(
SetBatchItemShape
<
int32_t
>
(
source_input
,
input_buffer
,
*
dst_buffer_byte_size
));
}
break
;
}
}
if
(
*
dst_memory_type
==
TRITONSERVER_MEMORY_GPU
)
{
bool
cuda_used
;
RETURN_IF_ERROR
(
CopyBuffer
(
"batch input buffer"
,
internal_buffer
->
MemoryType
(),
internal_buffer
->
MemoryTypeId
(),
*
dst_memory_type
,
*
dst_memory_type_id
,
*
dst_buffer_byte_size
,
input_buffer
,
buffer
,
stream_
,
&
cuda_used
,
copy_on_stream_
));
// Need to keep the backend memory alive in the case of async copy
in_use_memories_
.
emplace_back
(
std
::
move
(
internal_buffer
));
need_sync_
|=
cuda_used
;
}
return
nullptr
;
// success
}
template
<
typename
T
>
TRITONSERVER_Error
*
BackendInputCollector
::
SetElementCount
(
const
std
::
string
&
source_input
,
char
*
buffer
,
const
size_t
buffer_byte_size
)
{
size_t
buffer_offset
=
0
;
for
(
size_t
req_idx
=
0
;
req_idx
<
request_count_
;
req_idx
++
)
{
if
(
buffer_offset
+
sizeof
(
T
)
>
buffer_byte_size
)
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INVALID_ARG
,
"unexpected total byte size for batch input"
);
}
TRITONBACKEND_Input
*
input
;
RETURN_IF_ERROR
(
TRITONBACKEND_RequestInput
(
requests_
[
req_idx
],
source_input
.
c_str
(),
&
input
));
const
int64_t
*
shape
;
uint32_t
dims_count
;
RETURN_IF_ERROR
(
TRITONBACKEND_InputPropertiesForHostPolicy
(
input
,
host_policy_cstr_
,
nullptr
,
nullptr
,
&
shape
,
&
dims_count
,
nullptr
,
nullptr
));
*
(
reinterpret_cast
<
T
*>
(
buffer
)
+
req_idx
)
=
GetElementCount
(
shape
,
dims_count
);
buffer_offset
+=
sizeof
(
T
);
}
// Set the rest of the buffer to 0
for
(;
buffer_offset
+
sizeof
(
T
)
<=
buffer_byte_size
;
buffer_offset
+=
sizeof
(
T
))
{
*
reinterpret_cast
<
T
*>
(
buffer
+
buffer_offset
)
=
0
;
}
return
nullptr
;
// success
}
template
<
typename
T
>
TRITONSERVER_Error
*
BackendInputCollector
::
SetAccumulatedElementCount
(
const
std
::
string
&
source_input
,
char
*
buffer
,
const
size_t
buffer_byte_size
)
{
size_t
accumulated_element_count
=
0
;
size_t
buffer_offset
=
0
;
for
(
size_t
req_idx
=
0
;
req_idx
<
request_count_
;
req_idx
++
)
{
if
(
buffer_offset
+
sizeof
(
T
)
>
buffer_byte_size
)
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INVALID_ARG
,
"unexpected total byte size for batch input"
);
}
TRITONBACKEND_Input
*
input
;
RETURN_IF_ERROR
(
TRITONBACKEND_RequestInput
(
requests_
[
req_idx
],
source_input
.
c_str
(),
&
input
));
const
int64_t
*
shape
;
uint32_t
dims_count
;
RETURN_IF_ERROR
(
TRITONBACKEND_InputPropertiesForHostPolicy
(
input
,
host_policy_cstr_
,
nullptr
,
nullptr
,
&
shape
,
&
dims_count
,
nullptr
,
nullptr
));
accumulated_element_count
+=
GetElementCount
(
shape
,
dims_count
);
*
(
reinterpret_cast
<
T
*>
(
buffer
)
+
req_idx
)
=
accumulated_element_count
;
buffer_offset
+=
sizeof
(
T
);
}
// Set the rest of the buffer to 'accumulated_element_count'
// (no increase in element count)
for
(;
buffer_offset
+
sizeof
(
T
)
<=
buffer_byte_size
;
buffer_offset
+=
sizeof
(
T
))
{
*
reinterpret_cast
<
T
*>
(
buffer
+
buffer_offset
)
=
accumulated_element_count
;
}
return
nullptr
;
// success
}
template
<
typename
T
>
TRITONSERVER_Error
*
BackendInputCollector
::
SetBatchItemShape
(
const
std
::
string
&
source_input
,
char
*
buffer
,
const
size_t
buffer_byte_size
)
{
size_t
buffer_offset
=
0
;
for
(
size_t
req_idx
=
0
;
req_idx
<
request_count_
;
req_idx
++
)
{
TRITONBACKEND_Input
*
input
;
RETURN_IF_ERROR
(
TRITONBACKEND_RequestInput
(
requests_
[
req_idx
],
source_input
.
c_str
(),
&
input
));
const
int64_t
*
shape
;
uint32_t
dims_count
;
RETURN_IF_ERROR
(
TRITONBACKEND_InputPropertiesForHostPolicy
(
input
,
host_policy_cstr_
,
nullptr
,
nullptr
,
&
shape
,
&
dims_count
,
nullptr
,
nullptr
));
// Assuming first dimension is batch size and ragged input is only set
// for batching enabled model.
size_t
batch_1_size
=
sizeof
(
T
)
*
(
dims_count
-
1
);
if
(
buffer_offset
+
(
size_t
)
shape
[
0
]
*
batch_1_size
>
buffer_byte_size
)
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INVALID_ARG
,
(
GetRequestId
(
requests_
[
req_idx
])
+
"unexpected total byte size for batch input"
)
.
c_str
());
}
// The batch input tracks the shape without batch dimension for
// each batch item
for
(
size_t
idx
=
1
;
idx
<
dims_count
;
++
idx
)
{
// Need to set the element explicitly for type conversion
*
(
reinterpret_cast
<
T
*>
(
buffer
+
buffer_offset
)
+
(
idx
-
1
))
=
shape
[
idx
];
}
// memcpy the data repeatedly if the request has batch size > 1
for
(
int64_t
idx
=
1
;
idx
<
shape
[
0
];
++
idx
)
{
memcpy
(
buffer
+
buffer_offset
+
idx
*
batch_1_size
,
buffer
+
buffer_offset
,
batch_1_size
);
}
buffer_offset
+=
batch_1_size
*
(
size_t
)
shape
[
0
];
}
return
nullptr
;
// success
}
bool
BackendInputCollector
::
FlushPendingCopyKernel
(
char
*
tensor_buffer
,
const
size_t
tensor_buffer_byte_size
,
const
TRITONSERVER_MemoryType
tensor_memory_type
,
const
int64_t
tensor_memory_type_id
)
{
if
(
pending_copy_kernel_input_buffers_
.
size
()
==
0
)
{
return
false
;
}
bool
cuda_copy
=
false
;
TRITONSERVER_Error
*
error
=
nullptr
;
// Only try to launch kernel if buffer count is large enough for
// good GPU utilization
if
(
pending_copy_kernel_input_buffer_counts_
>=
kernel_buffer_threshold_
)
{
error
=
LaunchCopyKernel
(
tensor_buffer
,
tensor_buffer_byte_size
,
tensor_memory_type
,
tensor_memory_type_id
);
cuda_copy
=
(
error
==
nullptr
);
LOG_MESSAGE
(
TRITONSERVER_LOG_VERBOSE
,
(
std
::
string
(
"gather kernel launched with status: "
)
+
((
error
==
nullptr
)
?
"Success"
:
TRITONSERVER_ErrorMessage
(
error
)))
.
c_str
());
}
// If kernel can't be launched then just perform a direct copy.
if
((
pending_copy_kernel_input_buffer_counts_
<
kernel_buffer_threshold_
)
||
(
error
!=
nullptr
))
{
size_t
offset
=
0
;
for
(
auto
&
pr
:
pending_copy_kernel_input_buffers_
)
{
cuda_copy
|=
SetInputTensor
(
"gather kernel fallback"
,
pr
,
tensor_buffer
,
tensor_buffer_byte_size
,
tensor_memory_type
,
tensor_memory_type_id
,
pending_copy_kernel_buffer_offset_
+
offset
,
TRITONSERVER_MEMORY_CPU_PINNED
,
false
,
true
);
offset
+=
pr
.
memory_desc_
.
byte_size_
;
}
}
TRITONSERVER_ErrorDelete
(
error
);
// Pending kernel copies are handled...
pending_copy_kernel_buffer_byte_size_
=
0
;
pending_copy_kernel_buffer_offset_
=
0
;
pending_copy_kernel_input_buffer_counts_
=
0
;
pending_copy_kernel_input_buffers_
.
clear
();
return
cuda_copy
;
}
TRITONSERVER_Error
*
BackendInputCollector
::
LaunchCopyKernel
(
char
*
tensor_buffer
,
const
size_t
tensor_buffer_byte_size
,
const
TRITONSERVER_MemoryType
tensor_memory_type
,
const
int64_t
tensor_memory_type_id
)
{
#ifdef TRITON_ENABLE_GPU
input_ptr_buffer_host_
.
emplace_back
(
new
std
::
vector
<
int8_t
*>
());
byte_size_buffer_host_
.
emplace_back
(
new
std
::
vector
<
size_t
>
());
byte_size_offset_buffer_host_
.
emplace_back
(
new
std
::
vector
<
size_t
>
());
auto
&
input_ptr_buffer_host
=
*
input_ptr_buffer_host_
.
back
();
auto
&
byte_size_buffer_host
=
*
byte_size_buffer_host_
.
back
();
auto
&
byte_size_offset_buffer_host
=
*
byte_size_offset_buffer_host_
.
back
();
input_ptr_buffer_host
.
reserve
(
pending_copy_kernel_input_buffer_counts_
);
byte_size_buffer_host
.
reserve
(
pending_copy_kernel_input_buffer_counts_
);
byte_size_offset_buffer_host
.
reserve
(
pending_copy_kernel_input_buffer_counts_
);
size_t
byte_size_offset
=
0
;
for
(
const
auto
&
response_input
:
pending_copy_kernel_input_buffers_
)
{
const
auto
&
input
=
response_input
.
memory_desc_
;
input_ptr_buffer_host
.
emplace_back
(
const_cast
<
int8_t
*>
(
reinterpret_cast
<
const
int8_t
*>
(
input
.
buffer_
)));
byte_size_buffer_host
.
emplace_back
(
input
.
byte_size_
);
byte_size_offset_buffer_host
.
emplace_back
(
byte_size_offset
);
byte_size_offset
+=
input
.
byte_size_
;
}
BackendMemory
*
backend_memory
=
nullptr
;
std
::
vector
<
BackendMemory
::
AllocationType
>
alloc_types
;
switch
(
tensor_memory_type
)
{
case
TRITONSERVER_MEMORY_GPU
:
alloc_types
=
{
BackendMemory
::
AllocationType
::
GPU_POOL
,
BackendMemory
::
AllocationType
::
GPU
};
break
;
case
TRITONSERVER_MEMORY_CPU_PINNED
:
alloc_types
=
{
BackendMemory
::
AllocationType
::
CPU_PINNED_POOL
,
BackendMemory
::
AllocationType
::
CPU_PINNED
};
break
;
case
TRITONSERVER_MEMORY_CPU
:
alloc_types
=
{
BackendMemory
::
AllocationType
::
CPU
};
break
;
}
// input_ptr_buffer
size_t
input_ptr_buffer_byte_size
=
pending_copy_kernel_input_buffer_counts_
*
sizeof
(
int8_t
*
);
auto
err
=
BackendMemory
::
Create
(
memory_manager_
,
alloc_types
,
tensor_memory_type_id
,
input_ptr_buffer_byte_size
,
&
backend_memory
);
if
(
err
!=
nullptr
)
{
LOG_MESSAGE
(
TRITONSERVER_LOG_VERBOSE
,
(
std
::
string
(
"unable to create backend memory for type: "
)
+
TRITONSERVER_MemoryTypeString
(
tensor_memory_type
)
+
" id: "
+
std
::
to_string
(
tensor_memory_type_id
)
+
": "
+
TRITONSERVER_ErrorMessage
(
err
))
.
c_str
());
TRITONSERVER_ErrorDelete
(
err
);
}
else
{
in_use_memories_
.
emplace_back
(
backend_memory
);
}
if
(
backend_memory
==
nullptr
||
(
backend_memory
->
MemoryType
()
!=
tensor_memory_type
)
||
(
backend_memory
->
MemoryTypeId
()
!=
tensor_memory_type_id
))
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INTERNAL
,
"Failed to obtain memory buffer for copy kernel input"
);
}
char
*
input_ptr_buffer
=
backend_memory
->
MemoryPtr
();
// byte_size_buffer
size_t
byte_size_buffer_byte_size
=
pending_copy_kernel_input_buffer_counts_
*
sizeof
(
size_t
);
err
=
BackendMemory
::
Create
(
memory_manager_
,
alloc_types
,
tensor_memory_type_id
,
byte_size_buffer_byte_size
,
&
backend_memory
);
if
(
err
!=
nullptr
)
{
LOG_MESSAGE
(
TRITONSERVER_LOG_VERBOSE
,
(
std
::
string
(
"unable to create backend memory for type: "
)
+
TRITONSERVER_MemoryTypeString
(
tensor_memory_type
)
+
" id: "
+
std
::
to_string
(
tensor_memory_type_id
)
+
": "
+
TRITONSERVER_ErrorMessage
(
err
))
.
c_str
());
TRITONSERVER_ErrorDelete
(
err
);
}
else
{
in_use_memories_
.
emplace_back
(
backend_memory
);
}
if
(
backend_memory
==
nullptr
||
(
backend_memory
->
MemoryType
()
!=
tensor_memory_type
)
||
(
backend_memory
->
MemoryTypeId
()
!=
tensor_memory_type_id
))
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INTERNAL
,
"Failed to obtain memory buffer for copy kernel input"
);
}
char
*
byte_size_buffer
=
backend_memory
->
MemoryPtr
();
// byte_size_offset_buffer
size_t
byte_size_offset_buffer_byte_size
=
pending_copy_kernel_input_buffer_counts_
*
sizeof
(
size_t
);
err
=
BackendMemory
::
Create
(
memory_manager_
,
alloc_types
,
tensor_memory_type_id
,
byte_size_offset_buffer_byte_size
,
&
backend_memory
);
if
(
err
!=
nullptr
)
{
LOG_MESSAGE
(
TRITONSERVER_LOG_VERBOSE
,
(
std
::
string
(
"unable to create backend memory for type: "
)
+
TRITONSERVER_MemoryTypeString
(
tensor_memory_type
)
+
" id: "
+
std
::
to_string
(
tensor_memory_type_id
)
+
": "
+
TRITONSERVER_ErrorMessage
(
err
))
.
c_str
());
TRITONSERVER_ErrorDelete
(
err
);
}
else
{
in_use_memories_
.
emplace_back
(
backend_memory
);
}
if
(
backend_memory
==
nullptr
||
(
backend_memory
->
MemoryType
()
!=
tensor_memory_type
)
||
(
backend_memory
->
MemoryTypeId
()
!=
tensor_memory_type_id
))
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INTERNAL
,
"Failed to obtain memory buffer for copy kernel input"
);
}
char
*
byte_size_offset_buffer
=
backend_memory
->
MemoryPtr
();
cudaMemcpyAsync
(
input_ptr_buffer
,
input_ptr_buffer_host
.
data
(),
pending_copy_kernel_input_buffer_counts_
*
sizeof
(
int8_t
*
),
cudaMemcpyDefault
,
stream_
);
cudaMemcpyAsync
(
byte_size_buffer
,
byte_size_buffer_host
.
data
(),
pending_copy_kernel_input_buffer_counts_
*
sizeof
(
size_t
),
cudaMemcpyDefault
,
stream_
);
cudaMemcpyAsync
(
byte_size_offset_buffer
,
byte_size_offset_buffer_host
.
data
(),
pending_copy_kernel_input_buffer_counts_
*
sizeof
(
size_t
),
cudaMemcpyDefault
,
stream_
);
if
(
buffer_ready_event_
!=
nullptr
)
{
cudaEventSynchronize
(
buffer_ready_event_
);
buffer_ready_event_
=
nullptr
;
}
RETURN_IF_CUDA_ERROR
(
RunGatherKernel
(
(
const
int8_t
**
)
input_ptr_buffer
,
(
const
size_t
*
)
byte_size_buffer
,
(
const
size_t
*
)
byte_size_offset_buffer
,
(
int8_t
*
)
tensor_buffer
+
pending_copy_kernel_buffer_offset_
,
pending_copy_kernel_input_buffer_counts_
,
stream_
),
TRITONSERVER_ERROR_INTERNAL
,
std
::
string
(
"Failed to launch gather kernel"
));
return
nullptr
;
#else
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_UNSUPPORTED
,
"Copy kernel can not be launched with TRITON_ENABLE_GPU=OFF"
);
#endif // TRITON_ENABLE_GPU
}
}}
// namespace triton::backend
3rdparty/backend-r22.12/src/backend_memory.cc
0 → 100644
View file @
0a21fff9
// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "triton/backend/backend_memory.h"
#include <map>
#include "triton/backend/backend_common.h"
namespace
triton
{
namespace
backend
{
TRITONSERVER_Error
*
BackendMemory
::
Create
(
TRITONBACKEND_MemoryManager
*
manager
,
const
AllocationType
alloc_type
,
const
int64_t
memory_type_id
,
const
size_t
byte_size
,
BackendMemory
**
mem
)
{
*
mem
=
nullptr
;
void
*
ptr
=
nullptr
;
switch
(
alloc_type
)
{
case
AllocationType
::
CPU_PINNED
:
{
#ifdef TRITON_ENABLE_GPU
RETURN_IF_CUDA_ERROR
(
cudaHostAlloc
(
&
ptr
,
byte_size
,
cudaHostAllocPortable
),
TRITONSERVER_ERROR_UNAVAILABLE
,
std
::
string
(
"failed to allocate pinned system memory"
));
#else
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_UNSUPPORTED
,
"pinned-memory allocation not supported"
);
#endif // TRITON_ENABLE_GPU
break
;
}
case
AllocationType
::
GPU
:
{
#ifdef TRITON_ENABLE_GPU
int
current_device
;
RETURN_IF_CUDA_ERROR
(
cudaGetDevice
(
&
current_device
),
TRITONSERVER_ERROR_INTERNAL
,
std
::
string
(
"failed to get device"
));
bool
overridden
=
(
current_device
!=
memory_type_id
);
if
(
overridden
)
{
RETURN_IF_CUDA_ERROR
(
cudaSetDevice
(
memory_type_id
),
TRITONSERVER_ERROR_INTERNAL
,
std
::
string
(
"failed to set device"
));
}
auto
err
=
cudaMalloc
(
&
ptr
,
byte_size
);
if
(
overridden
)
{
LOG_IF_CUDA_ERROR
(
cudaSetDevice
(
current_device
),
"failed to set CUDA device"
);
}
RETURN_ERROR_IF_FALSE
(
err
==
cudaSuccess
,
TRITONSERVER_ERROR_UNAVAILABLE
,
std
::
string
(
"failed to allocate GPU memory: "
)
+
cudaGetErrorString
(
err
));
#else
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_UNSUPPORTED
,
"GPU allocation not supported"
);
#endif // TRITON_ENABLE_GPU
break
;
}
case
AllocationType
::
CPU
:
case
AllocationType
::
CPU_PINNED_POOL
:
case
AllocationType
::
GPU_POOL
:
RETURN_IF_ERROR
(
TRITONBACKEND_MemoryManagerAllocate
(
manager
,
&
ptr
,
AllocTypeToMemoryType
(
alloc_type
),
memory_type_id
,
byte_size
));
break
;
}
*
mem
=
new
BackendMemory
(
manager
,
alloc_type
,
memory_type_id
,
reinterpret_cast
<
char
*>
(
ptr
),
byte_size
);
return
nullptr
;
// success
}
TRITONSERVER_Error
*
BackendMemory
::
Create
(
TRITONBACKEND_MemoryManager
*
manager
,
const
std
::
vector
<
AllocationType
>&
alloc_types
,
const
int64_t
memory_type_id
,
const
size_t
byte_size
,
BackendMemory
**
mem
)
{
*
mem
=
nullptr
;
RETURN_ERROR_IF_TRUE
(
alloc_types
.
size
()
==
0
,
TRITONSERVER_ERROR_INVALID_ARG
,
std
::
string
(
"BackendMemory::Create, at least one allocation type must be "
"specified"
));
bool
success
=
false
;
std
::
unordered_map
<
AllocationType
,
TRITONSERVER_Error
*>
errors
;
for
(
const
AllocationType
alloc_type
:
alloc_types
)
{
TRITONSERVER_Error
*
err
=
Create
(
manager
,
alloc_type
,
memory_type_id
,
byte_size
,
mem
);
if
(
err
==
nullptr
)
{
success
=
true
;
break
;
}
errors
.
insert
({
alloc_type
,
err
});
}
// If allocation failed for all allocation types then display all
// the error messages and show the entire allocation request as
// failing.
if
(
!
success
)
{
std
::
string
msg
=
"BackendMemory::Create, all allocation types failed:"
;
for
(
const
auto
&
pr
:
errors
)
{
const
AllocationType
alloc_type
=
pr
.
first
;
TRITONSERVER_Error
*
err
=
pr
.
second
;
msg
+=
std
::
string
(
"
\n\t
"
)
+
AllocTypeString
(
alloc_type
)
+
": "
+
TRITONSERVER_ErrorMessage
(
err
);
TRITONSERVER_ErrorDelete
(
err
);
}
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_UNAVAILABLE
,
msg
.
c_str
());
}
return
nullptr
;
// success
}
TRITONSERVER_Error
*
BackendMemory
::
Create
(
TRITONBACKEND_MemoryManager
*
manager
,
const
AllocationType
alloc_type
,
const
int64_t
memory_type_id
,
void
*
buffer
,
const
size_t
byte_size
,
BackendMemory
**
mem
)
{
*
mem
=
new
BackendMemory
(
manager
,
alloc_type
,
memory_type_id
,
reinterpret_cast
<
char
*>
(
buffer
),
byte_size
,
false
/* owns_buffer */
);
return
nullptr
;
// success
}
BackendMemory
::~
BackendMemory
()
{
if
(
owns_buffer_
)
{
switch
(
alloctype_
)
{
case
AllocationType
::
CPU_PINNED
:
#ifdef TRITON_ENABLE_GPU
if
(
buffer_
!=
nullptr
)
{
LOG_IF_CUDA_ERROR
(
cudaFreeHost
(
buffer_
),
"failed to free pinned memory"
);
}
#endif // TRITON_ENABLE_GPU
break
;
case
AllocationType
::
GPU
:
#ifdef TRITON_ENABLE_GPU
if
(
buffer_
!=
nullptr
)
{
LOG_IF_CUDA_ERROR
(
cudaFree
(
buffer_
),
"failed to free CUDA memory"
);
}
#endif // TRITON_ENABLE_GPU
break
;
case
AllocationType
::
CPU
:
case
AllocationType
::
CPU_PINNED_POOL
:
case
AllocationType
::
GPU_POOL
:
LOG_IF_ERROR
(
TRITONBACKEND_MemoryManagerFree
(
manager_
,
buffer_
,
AllocTypeToMemoryType
(
alloctype_
),
memtype_id_
),
"failed to free memory buffer"
);
break
;
}
}
}
TRITONSERVER_MemoryType
BackendMemory
::
AllocTypeToMemoryType
(
const
AllocationType
a
)
{
switch
(
a
)
{
case
AllocationType
::
CPU
:
return
TRITONSERVER_MEMORY_CPU
;
case
AllocationType
::
CPU_PINNED
:
case
AllocationType
::
CPU_PINNED_POOL
:
return
TRITONSERVER_MEMORY_CPU_PINNED
;
case
AllocationType
::
GPU
:
case
AllocationType
::
GPU_POOL
:
return
TRITONSERVER_MEMORY_GPU
;
}
return
TRITONSERVER_MEMORY_CPU
;
// unreachable
}
const
char
*
BackendMemory
::
AllocTypeString
(
const
AllocationType
a
)
{
switch
(
a
)
{
case
AllocationType
::
CPU
:
return
"CPU"
;
case
AllocationType
::
CPU_PINNED
:
return
"CPU_PINNED"
;
case
AllocationType
::
GPU
:
return
"GPU"
;
case
AllocationType
::
CPU_PINNED_POOL
:
return
"CPU_PINNED_POOL"
;
case
AllocationType
::
GPU_POOL
:
return
"GPU_POOL"
;
}
return
"<unknown>"
;
}
}}
// namespace triton::backend
3rdparty/backend-r22.12/src/backend_model.cc
0 → 100644
View file @
0a21fff9
// Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "triton/backend/backend_model.h"
#include "triton/backend/backend_common.h"
namespace
triton
{
namespace
backend
{
//
// BackendModel
//
BackendModel
::
BackendModel
(
TRITONBACKEND_Model
*
triton_model
,
const
bool
allow_optional
)
:
triton_model_
(
triton_model
),
allow_optional_
(
allow_optional
)
{
const
char
*
model_name
;
THROW_IF_BACKEND_MODEL_ERROR
(
TRITONBACKEND_ModelName
(
triton_model
,
&
model_name
));
name_
=
model_name
;
THROW_IF_BACKEND_MODEL_ERROR
(
TRITONBACKEND_ModelVersion
(
triton_model
,
&
version_
));
const
char
*
repository_path
=
nullptr
;
TRITONBACKEND_ArtifactType
repository_artifact_type
;
THROW_IF_BACKEND_MODEL_ERROR
(
TRITONBACKEND_ModelRepository
(
triton_model
,
&
repository_artifact_type
,
&
repository_path
));
if
(
repository_artifact_type
!=
TRITONBACKEND_ARTIFACT_FILESYSTEM
)
{
throw
BackendModelException
(
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_UNSUPPORTED
,
(
std
::
string
(
"unsupported repository artifact type for model '"
)
+
model_name
+
"'"
)
.
c_str
()));
}
repository_path_
=
repository_path
;
THROW_IF_BACKEND_MODEL_ERROR
(
TRITONBACKEND_ModelServer
(
triton_model
,
&
triton_server_
));
TRITONBACKEND_Backend
*
backend
;
THROW_IF_BACKEND_MODEL_ERROR
(
TRITONBACKEND_ModelBackend
(
triton_model
,
&
backend
));
THROW_IF_BACKEND_MODEL_ERROR
(
TRITONBACKEND_BackendMemoryManager
(
backend
,
&
triton_memory_manager_
));
THROW_IF_BACKEND_MODEL_ERROR
(
ParseModelConfig
());
}
TRITONSERVER_Error
*
BackendModel
::
ParseModelConfig
()
{
TRITONSERVER_Message
*
config_message
;
RETURN_IF_ERROR
(
TRITONBACKEND_ModelConfig
(
triton_model_
,
1
/* config_version */
,
&
config_message
));
// Get the model configuration as a json string from
// config_message. We use TritonJson, which is a wrapper that
// returns nice errors (currently the underlying implementation is
// rapidjson... but others could be added).
const
char
*
buffer
;
size_t
byte_size
;
RETURN_IF_ERROR
(
TRITONSERVER_MessageSerializeToJson
(
config_message
,
&
buffer
,
&
byte_size
));
TRITONSERVER_Error
*
err
=
model_config_
.
Parse
(
buffer
,
byte_size
);
RETURN_IF_ERROR
(
TRITONSERVER_MessageDelete
(
config_message
));
RETURN_IF_ERROR
(
err
);
int64_t
mbs
=
0
;
RETURN_IF_ERROR
(
model_config_
.
MemberAsInt
(
"max_batch_size"
,
&
mbs
));
max_batch_size_
=
mbs
;
enable_pinned_input_
=
false
;
enable_pinned_output_
=
false
;
{
common
::
TritonJson
::
Value
optimization
;
if
(
model_config_
.
Find
(
"optimization"
,
&
optimization
))
{
common
::
TritonJson
::
Value
pinned_memory
;
if
(
optimization
.
Find
(
"input_pinned_memory"
,
&
pinned_memory
))
{
RETURN_IF_ERROR
(
pinned_memory
.
MemberAsBool
(
"enable"
,
&
enable_pinned_input_
));
}
if
(
optimization
.
Find
(
"output_pinned_memory"
,
&
pinned_memory
))
{
RETURN_IF_ERROR
(
pinned_memory
.
MemberAsBool
(
"enable"
,
&
enable_pinned_output_
));
}
}
}
RETURN_IF_ERROR
(
BatchInput
::
ParseFromModelConfig
(
model_config_
,
&
batch_inputs_
));
RETURN_IF_ERROR
(
BatchOutput
::
ParseFromModelConfig
(
model_config_
,
&
batch_outputs_
));
for
(
const
auto
&
batch_output
:
batch_outputs_
)
{
for
(
const
auto
&
name
:
batch_output
.
TargetNames
())
{
batch_output_map_
.
emplace
(
name
,
&
batch_output
);
}
}
triton
::
common
::
TritonJson
::
Value
config_inputs
;
RETURN_IF_ERROR
(
model_config_
.
MemberAsArray
(
"input"
,
&
config_inputs
));
for
(
size_t
i
=
0
;
i
<
config_inputs
.
ArraySize
();
i
++
)
{
triton
::
common
::
TritonJson
::
Value
io
;
RETURN_IF_ERROR
(
config_inputs
.
IndexAsObject
(
i
,
&
io
));
std
::
string
io_name
;
RETURN_IF_ERROR
(
io
.
MemberAsString
(
"name"
,
&
io_name
));
triton
::
common
::
TritonJson
::
Value
input_property_json
;
bool
allow_ragged_batch
=
false
;
if
(
io
.
Find
(
"allow_ragged_batch"
,
&
input_property_json
))
{
RETURN_IF_ERROR
(
input_property_json
.
AsBool
(
&
allow_ragged_batch
));
}
if
(
allow_ragged_batch
)
{
ragged_inputs_
.
emplace
(
io_name
);
}
bool
optional
=
false
;
if
(
io
.
Find
(
"optional"
,
&
input_property_json
))
{
RETURN_IF_ERROR
(
input_property_json
.
AsBool
(
&
optional
));
}
if
(
optional
)
{
if
(
allow_optional_
)
{
optional_inputs_
.
emplace
(
io_name
);
}
else
{
RETURN_IF_ERROR
(
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INVALID_ARG
,
(
std
::
string
(
"'optional' is set to true for input '"
)
+
io_name
+
"' while the backend model doesn't support optional input"
)
.
c_str
()));
}
}
}
return
nullptr
;
}
TRITONSERVER_Error
*
BackendModel
::
SetModelConfig
()
{
triton
::
common
::
TritonJson
::
WriteBuffer
json_buffer
;
RETURN_IF_ERROR
(
ModelConfig
().
Write
(
&
json_buffer
));
TRITONSERVER_Message
*
message
;
RETURN_IF_ERROR
(
TRITONSERVER_MessageNewFromSerializedJson
(
&
message
,
json_buffer
.
Base
(),
json_buffer
.
Size
()));
RETURN_IF_ERROR
(
TRITONBACKEND_ModelSetConfig
(
triton_model_
,
1
/* config_version */
,
message
));
RETURN_IF_ERROR
(
TRITONSERVER_MessageDelete
(
message
));
// Triton core can normalize the missing config settings
// in the above call. We must retrieve the updated model
// configration from the core.
RETURN_IF_ERROR
(
ParseModelConfig
());
return
nullptr
;
}
TRITONSERVER_Error
*
BackendModel
::
SupportsFirstDimBatching
(
bool
*
supports
)
{
*
supports
=
max_batch_size_
>
0
;
return
nullptr
;
}
const
BatchOutput
*
BackendModel
::
FindBatchOutput
(
const
std
::
string
&
output_name
)
const
{
const
auto
it
=
batch_output_map_
.
find
(
output_name
);
return
((
it
==
batch_output_map_
.
end
())
?
nullptr
:
it
->
second
);
}
}}
// namespace triton::backend
3rdparty/backend-r22.12/src/backend_model_instance.cc
0 → 100644
View file @
0a21fff9
// Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "triton/backend/backend_model_instance.h"
#include <vector>
#include "triton/backend/backend_common.h"
#include "triton/backend/backend_model.h"
namespace
triton
{
namespace
backend
{
//
// BackendModelInstance
//
BackendModelInstance
::
BackendModelInstance
(
BackendModel
*
backend_model
,
TRITONBACKEND_ModelInstance
*
triton_model_instance
)
:
backend_model_
(
backend_model
),
triton_model_instance_
(
triton_model_instance
)
{
const
char
*
instance_name
;
THROW_IF_BACKEND_INSTANCE_ERROR
(
TRITONBACKEND_ModelInstanceName
(
triton_model_instance
,
&
instance_name
));
name_
=
instance_name
;
THROW_IF_BACKEND_INSTANCE_ERROR
(
TRITONBACKEND_ModelInstanceKind
(
triton_model_instance
,
&
kind_
));
THROW_IF_BACKEND_INSTANCE_ERROR
(
TRITONBACKEND_ModelInstanceDeviceId
(
triton_model_instance
,
&
device_id_
));
common
::
TritonJson
::
Value
&
model_config
=
backend_model
->
ModelConfig
();
// If the model configuration specifies a 'default_model_filename'
// and/or specifies 'cc_model_filenames' then determine the
// appropriate 'artifact_filename' value. If model configuration
// does not specify then just leave 'artifact_filename' empty and
// the backend can then provide its own logic for determine the
// filename if that is appropriate.
THROW_IF_BACKEND_INSTANCE_ERROR
(
model_config
.
MemberAsString
(
"default_model_filename"
,
&
artifact_filename_
));
switch
(
kind_
)
{
case
TRITONSERVER_INSTANCEGROUPKIND_CPU
:
{
LOG_MESSAGE
(
TRITONSERVER_LOG_VERBOSE
,
(
std
::
string
(
"Creating instance "
)
+
name_
+
" on CPU using artifact '"
+
artifact_filename_
+
"'"
)
.
c_str
());
break
;
}
case
TRITONSERVER_INSTANCEGROUPKIND_MODEL
:
{
LOG_MESSAGE
(
TRITONSERVER_LOG_VERBOSE
,
(
std
::
string
(
"Creating instance "
)
+
name_
+
" on model-specified devices using artifact '"
+
artifact_filename_
+
"'"
)
.
c_str
());
break
;
}
case
TRITONSERVER_INSTANCEGROUPKIND_GPU
:
{
#if defined(TRITON_ENABLE_GPU)
cudaDeviceProp
cuprops
;
cudaError_t
cuerr
=
cudaGetDeviceProperties
(
&
cuprops
,
device_id_
);
if
(
cuerr
!=
cudaSuccess
)
{
throw
BackendModelInstanceException
(
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INTERNAL
,
(
std
::
string
(
"unable to get CUDA device properties for "
)
+
name_
+
": "
+
cudaGetErrorString
(
cuerr
))
.
c_str
()));
}
const
std
::
string
cc
=
std
::
to_string
(
cuprops
.
major
)
+
"."
+
std
::
to_string
(
cuprops
.
minor
);
common
::
TritonJson
::
Value
cc_names
;
common
::
TritonJson
::
Value
cc_name
;
if
((
model_config
.
Find
(
"cc_model_filenames"
,
&
cc_names
))
&&
(
cc_names
.
Find
(
cc
.
c_str
(),
&
cc_name
)))
{
cc_name
.
AsString
(
&
artifact_filename_
);
}
LOG_MESSAGE
(
TRITONSERVER_LOG_VERBOSE
,
(
std
::
string
(
"Creating instance "
)
+
name_
+
" on GPU "
+
std
::
to_string
(
device_id_
)
+
" ("
+
cc
+
") using artifact '"
+
artifact_filename_
+
"'"
)
.
c_str
());
#elif !defined(TRITON_ENABLE_MALI_GPU)
throw
BackendModelInstanceException
(
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INTERNAL
,
"GPU instances not supported"
));
#endif // TRITON_ENABLE_GPU
break
;
}
default:
{
throw
BackendModelInstanceException
(
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INTERNAL
,
(
std
::
string
(
"unexpected instance kind for "
)
+
name_
).
c_str
()));
}
}
stream_
=
nullptr
;
if
(
kind_
==
TRITONSERVER_INSTANCEGROUPKIND_GPU
)
{
THROW_IF_BACKEND_INSTANCE_ERROR
(
CreateCudaStream
(
device_id_
,
0
/* cuda_stream_priority */
,
&
stream_
));
}
// Get the host policy setting as a json string from message,
// and extract the host policy name for the instance.
TRITONSERVER_Message
*
message
=
nullptr
;
THROW_IF_BACKEND_MODEL_ERROR
(
TRITONBACKEND_ModelInstanceHostPolicy
(
triton_model_instance_
,
&
message
));
const
char
*
buffer
;
size_t
byte_size
;
THROW_IF_BACKEND_MODEL_ERROR
(
TRITONSERVER_MessageSerializeToJson
(
message
,
&
buffer
,
&
byte_size
));
common
::
TritonJson
::
Value
host_policy
;
TRITONSERVER_Error
*
err
=
host_policy
.
Parse
(
buffer
,
byte_size
);
THROW_IF_BACKEND_MODEL_ERROR
(
err
);
std
::
vector
<
std
::
string
>
host_policy_name
;
THROW_IF_BACKEND_MODEL_ERROR
(
host_policy
.
Members
(
&
host_policy_name
));
if
(
host_policy_name
.
size
()
!=
1
)
{
throw
BackendModelInstanceException
(
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INTERNAL
,
(
std
::
string
(
"unexpected no host policy for "
)
+
name_
).
c_str
()));
}
host_policy_name_
=
host_policy_name
[
0
];
}
BackendModelInstance
::~
BackendModelInstance
()
{
#ifdef TRITON_ENABLE_GPU
if
(
stream_
!=
nullptr
)
{
cudaError_t
err
=
cudaStreamDestroy
(
stream_
);
if
(
err
!=
cudaSuccess
)
{
TRITONSERVER_LogMessage
(
TRITONSERVER_LOG_ERROR
,
__FILE__
,
__LINE__
,
(
std
::
string
(
"~BackendModelInstance: "
)
+
name_
+
" failed to destroy cuda stream: "
+
cudaGetErrorString
(
err
))
.
c_str
());
}
stream_
=
nullptr
;
}
#endif // TRITON_ENABLE_GPU
}
}}
// namespace triton::backend
3rdparty/backend-r22.12/src/backend_output_responder.cc
0 → 100644
View file @
0a21fff9
// Copyright 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "triton/backend/backend_output_responder.h"
#include "triton/backend/backend_common.h"
#include "triton/backend/backend_model.h"
#include "triton/backend/backend_model_instance.h"
namespace
triton
{
namespace
backend
{
//
// BackendOutputResponder
//
BackendOutputResponder
::~
BackendOutputResponder
()
{
for
(
auto
&
pinned_memory
:
pinned_memories_
)
{
LOG_IF_ERROR
(
TRITONBACKEND_MemoryManagerFree
(
memory_manager_
,
reinterpret_cast
<
void
*>
(
pinned_memory
),
TRITONSERVER_MEMORY_CPU_PINNED
,
0
),
"failed to free pinned memory"
);
}
}
void
BackendOutputResponder
::
ProcessTensor
(
const
std
::
string
&
output_name
,
const
TRITONSERVER_DataType
datatype
,
std
::
vector
<
int64_t
>&
batchn_shape
,
const
char
*
buffer
,
const
TRITONSERVER_MemoryType
memory_type
,
const
int64_t
memory_type_id
)
{
// A value of CPU_PINNED indicates that pinned memory buffer is not
// needed for this tensor. Any other value indicates that a pinned
// memory buffer is needed when the target memory type matches
// 'use_pinned_memory_type'.
TRITONSERVER_MemoryType
use_pinned_memory_type
=
TRITONSERVER_MEMORY_CPU_PINNED
;
if
(
pinned_enabled_
)
{
use_pinned_memory_type
=
GetUsePinnedMemoryType
(
memory_type
);
}
const
int64_t
batchn_batch_size
=
batchn_shape
[
0
];
int64_t
batch_size_offset
=
0
;
size_t
tensor_offset
=
0
;
for
(
size_t
idx
=
0
;
idx
<
responses_
->
size
();
idx
++
)
{
auto
&
request
=
requests_
[
idx
];
auto
&
response
=
(
*
responses_
)[
idx
];
// If then pending copies are from tensor buffer that is not
// contiguous with 'response's part of that buffer, then need to
// go ahead and perform the pending copies so that can start a
// new contiguous region if necessary.
if
((
pending_pinned_byte_size_
>
0
)
&&
(
tensor_offset
!=
(
pending_pinned_byte_size_
+
pending_pinned_offset_
)))
{
need_sync_
|=
FlushPendingPinned
(
buffer
,
memory_type
,
memory_type_id
);
}
// Override shape to be correct for this response.
if
(
first_dim_batching_
)
{
TRITONBACKEND_Input
*
input
;
TRITONBACKEND_RequestInputByIndex
(
request
,
0
,
&
input
);
const
int64_t
*
shape
;
TRITONBACKEND_InputProperties
(
input
,
nullptr
,
nullptr
,
&
shape
,
nullptr
,
nullptr
,
nullptr
);
if
((
batchn_batch_size
!=
-
1
)
&&
((
batch_size_offset
+
shape
[
0
])
>
batchn_batch_size
))
{
if
(
response
!=
nullptr
)
{
RESPOND_AND_SET_NULL_IF_ERROR
(
&
response
,
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_UNSUPPORTED
,
std
::
string
(
GetRequestId
(
request
)
+
"failed to split the output tensor '"
+
output_name
+
"' in responses: expected batch size of atleast "
+
std
::
to_string
(
batch_size_offset
+
shape
[
0
])
+
" in model output, got "
+
std
::
to_string
(
batchn_batch_size
))
.
c_str
()));
}
}
batchn_shape
[
0
]
=
shape
[
0
];
batch_size_offset
+=
shape
[
0
];
}
const
size_t
tensor_byte_size
=
GetByteSize
(
datatype
,
batchn_shape
);
TRITONBACKEND_Output
*
response_output
;
if
(
response
!=
nullptr
)
{
uint32_t
output_count
;
RESPOND_AND_SET_NULL_IF_ERROR
(
&
response
,
TRITONBACKEND_RequestOutputCount
(
request
,
&
output_count
));
if
(
response
!=
nullptr
)
{
for
(
uint32_t
output_idx
=
0
;
output_idx
<
output_count
;
output_idx
++
)
{
const
char
*
name
;
RESPOND_AND_SET_NULL_IF_ERROR
(
&
response
,
TRITONBACKEND_RequestOutputName
(
request
,
output_idx
,
&
name
));
if
((
response
!=
nullptr
)
&&
(
output_name
==
name
))
{
RESPOND_AND_SET_NULL_IF_ERROR
(
&
response
,
TRITONBACKEND_ResponseOutput
(
response
,
&
response_output
,
name
,
datatype
,
batchn_shape
.
data
(),
batchn_shape
.
size
()));
if
(
response
!=
nullptr
)
{
need_sync_
|=
SetFixedSizeBuffer
(
&
response
,
response_output
,
output_name
,
tensor_byte_size
,
tensor_offset
,
buffer
,
memory_type
,
memory_type_id
,
use_pinned_memory_type
,
false
/* state */
);
}
break
;
}
}
}
}
tensor_offset
+=
tensor_byte_size
;
}
// Done with the tensor, flush any pending pinned copies.
need_sync_
|=
FlushPendingPinned
(
buffer
,
memory_type
,
memory_type_id
);
#ifdef TRITON_ENABLE_GPU
if
(
need_sync_
&&
(
event_
!=
nullptr
))
{
cudaEventRecord
(
event_
,
stream_
);
}
#endif // TRITON_ENABLE_GPU
}
std
::
vector
<
TRITONBACKEND_State
*>
BackendOutputResponder
::
ProcessStateTensor
(
const
std
::
string
&
output_state_name
,
const
TRITONSERVER_DataType
datatype
,
std
::
vector
<
int64_t
>&
batchn_shape
,
const
char
*
buffer
,
const
TRITONSERVER_MemoryType
memory_type
,
const
int64_t
memory_type_id
)
{
// A value of CPU_PINNED indicates that pinned memory buffer is not
// needed for this tensor. Any other value indicates that a pinned
// memory buffer is needed when the target memory type matches
// 'use_pinned_memory_type'.
TRITONSERVER_MemoryType
use_pinned_memory_type
=
TRITONSERVER_MEMORY_CPU_PINNED
;
if
(
pinned_enabled_
)
{
use_pinned_memory_type
=
GetUsePinnedMemoryType
(
memory_type
);
}
std
::
vector
<
TRITONBACKEND_State
*>
states
;
const
int64_t
batchn_batch_size
=
batchn_shape
[
0
];
int64_t
batch_size_offset
=
0
;
size_t
tensor_offset
=
0
;
for
(
size_t
idx
=
0
;
idx
<
responses_
->
size
();
idx
++
)
{
auto
&
request
=
requests_
[
idx
];
auto
&
response
=
(
*
responses_
)[
idx
];
// If then pending copies are from tensor buffer that is not
// contiguous with 'response's part of that buffer, then need to
// go ahead and perform the pending copies so that can start a
// new contiguous region if necessary.
if
((
pending_pinned_byte_size_
>
0
)
&&
(
tensor_offset
!=
(
pending_pinned_byte_size_
+
pending_pinned_offset_
)))
{
need_sync_
|=
FlushPendingPinned
(
buffer
,
memory_type
,
memory_type_id
);
}
// Override shape to be correct for this response.
if
(
first_dim_batching_
)
{
TRITONBACKEND_Input
*
input
;
TRITONBACKEND_RequestInputByIndex
(
request
,
0
,
&
input
);
const
int64_t
*
shape
;
TRITONBACKEND_InputProperties
(
input
,
nullptr
,
nullptr
,
&
shape
,
nullptr
,
nullptr
,
nullptr
);
if
((
batchn_batch_size
!=
-
1
)
&&
((
batch_size_offset
+
shape
[
0
])
>
batchn_batch_size
))
{
if
(
response
!=
nullptr
)
{
RESPOND_AND_SET_NULL_IF_ERROR
(
&
response
,
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_UNSUPPORTED
,
std
::
string
(
GetRequestId
(
request
)
+
"failed to split the output state tensor '"
+
output_state_name
+
"' in responses: expected batch size of atleast "
+
std
::
to_string
(
batch_size_offset
+
shape
[
0
])
+
" in model output, got "
+
std
::
to_string
(
batchn_batch_size
))
.
c_str
()));
}
}
batchn_shape
[
0
]
=
shape
[
0
];
batch_size_offset
+=
shape
[
0
];
}
const
size_t
tensor_byte_size
=
GetByteSize
(
datatype
,
batchn_shape
);
TRITONBACKEND_State
*
output_state
;
if
(
response
!=
nullptr
)
{
RESPOND_AND_SET_NULL_IF_ERROR
(
&
response
,
TRITONBACKEND_StateNew
(
&
output_state
,
request
,
output_state_name
.
c_str
(),
datatype
,
batchn_shape
.
data
(),
batchn_shape
.
size
()));
if
(
response
!=
nullptr
)
{
states
.
push_back
(
output_state
);
need_sync_
|=
SetFixedSizeBuffer
(
&
response
,
output_state
,
output_state_name
,
tensor_byte_size
,
tensor_offset
,
buffer
,
memory_type
,
memory_type_id
,
use_pinned_memory_type
,
true
/* state */
);
}
}
tensor_offset
+=
tensor_byte_size
;
}
// Done with the tensor, flush any pending pinned copies.
need_sync_
|=
FlushPendingPinned
(
buffer
,
memory_type
,
memory_type_id
);
#ifdef TRITON_ENABLE_GPU
if
(
need_sync_
&&
(
event_
!=
nullptr
))
{
cudaEventRecord
(
event_
,
stream_
);
}
#endif // TRITON_ENABLE_GPU
return
states
;
}
bool
BackendOutputResponder
::
Finalize
()
{
#ifdef TRITON_ENABLE_GPU
if
((
!
deferred_pinned_
.
empty
())
&&
need_sync_
)
{
if
(
event_
!=
nullptr
)
{
cudaEventSynchronize
(
event_
);
}
else
{
cudaStreamSynchronize
(
stream_
);
}
need_sync_
=
false
;
}
#endif // TRITON_ENABLE_GPU
// After the above sync all the GPU->pinned copies are complete. Any
// deferred copies of pinned->CPU can now be done.
for
(
auto
&
def
:
deferred_pinned_
)
{
auto
pinned_memory_type
=
TRITONSERVER_MEMORY_CPU_PINNED
;
int64_t
pinned_memory_id
=
0
;
char
*
pinned_buffer
=
def
.
pinned_memory_
;
size_t
offset
=
0
;
for
(
auto
&
pr
:
def
.
responses_
)
{
auto
&
response
=
pr
.
first
;
auto
&
response_output
=
pr
.
second
;
bool
cuda_used
=
false
;
RESPOND_AND_SET_NULL_IF_ERROR
(
response
,
CopyBuffer
(
response_output
.
name_
,
pinned_memory_type
,
pinned_memory_id
,
response_output
.
memory_type_
,
response_output
.
memory_type_id_
,
response_output
.
buffer_byte_size_
,
pinned_buffer
+
offset
,
const_cast
<
void
*>
(
response_output
.
buffer_
),
stream_
,
&
cuda_used
,
copy_on_stream_
));
need_sync_
|=
cuda_used
;
offset
+=
response_output
.
buffer_byte_size_
;
}
}
#ifdef TRITON_ENABLE_GPU
// Record the new event location if deferred copies occur
if
((
!
deferred_pinned_
.
empty
())
&&
need_sync_
&&
(
event_
!=
nullptr
))
{
cudaEventRecord
(
event_
,
stream_
);
}
#endif // TRITON_ENABLE_GPU
deferred_pinned_
.
clear
();
return
need_sync_
;
}
bool
BackendOutputResponder
::
SetFixedSizeBuffer
(
TRITONBACKEND_Response
**
response
,
void
*
response_output_or_state
,
const
std
::
string
&
output_name
,
const
size_t
tensor_byte_size
,
const
size_t
tensor_offset
,
const
char
*
tensor_buffer
,
const
TRITONSERVER_MemoryType
tensor_memory_type
,
const
int64_t
tensor_memory_type_id
,
const
TRITONSERVER_MemoryType
use_pinned_memory_type
,
bool
state
)
{
void
*
buffer
=
nullptr
;
bool
cuda_copy
=
false
;
TRITONSERVER_MemoryType
actual_memory_type
=
tensor_memory_type
;
int64_t
actual_memory_type_id
=
tensor_memory_type_id
;
if
(
state
)
{
TRITONBACKEND_State
*
response_state
=
reinterpret_cast
<
TRITONBACKEND_State
*>
(
response_output_or_state
);
auto
err
=
TRITONBACKEND_StateBuffer
(
response_state
,
&
buffer
,
tensor_byte_size
,
&
actual_memory_type
,
&
actual_memory_type_id
);
if
(
err
!=
nullptr
)
{
RESPOND_AND_SET_NULL_IF_ERROR
(
response
,
err
);
return
cuda_copy
;
}
}
else
{
TRITONBACKEND_Output
*
response_output
=
reinterpret_cast
<
TRITONBACKEND_Output
*>
(
response_output_or_state
);
auto
err
=
TRITONBACKEND_OutputBuffer
(
response_output
,
&
buffer
,
tensor_byte_size
,
&
actual_memory_type
,
&
actual_memory_type_id
);
if
(
err
!=
nullptr
)
{
RESPOND_AND_SET_NULL_IF_ERROR
(
response
,
err
);
return
cuda_copy
;
}
}
// If the response buffer matches the memory type that should use an
// intermediate pinned memory buffer for the transfer, then just
// record the response as pending and increase the size required for
// the intermediate pinned buffer.
if
((
use_pinned_memory_type
!=
TRITONSERVER_MEMORY_CPU_PINNED
)
&&
(
actual_memory_type
==
use_pinned_memory_type
))
{
if
(
pending_pinned_byte_size_
==
0
)
{
pending_pinned_offset_
=
tensor_offset
;
}
pending_pinned_byte_size_
+=
tensor_byte_size
;
pending_pinned_outputs_
.
push_back
(
std
::
make_pair
(
response
,
OutputData
(
output_name
,
buffer
,
tensor_byte_size
,
actual_memory_type
,
actual_memory_type_id
)));
}
else
{
// Direct copy without intermediate pinned memory.
bool
cuda_used
=
false
;
auto
err
=
CopyBuffer
(
output_name
,
tensor_memory_type
,
tensor_memory_type_id
,
actual_memory_type
,
actual_memory_type_id
,
tensor_byte_size
,
tensor_buffer
+
tensor_offset
,
buffer
,
stream_
,
&
cuda_used
,
copy_on_stream_
);
cuda_copy
|=
cuda_used
;
if
(
err
!=
nullptr
)
{
RESPOND_AND_SET_NULL_IF_ERROR
(
response
,
err
);
return
cuda_copy
;
}
}
return
cuda_copy
;
}
bool
BackendOutputResponder
::
FlushPendingPinned
(
const
char
*
tensor_buffer
,
const
TRITONSERVER_MemoryType
tensor_memory_type
,
const
int64_t
tensor_memory_type_id
)
{
bool
cuda_copy
=
false
;
// Will be copying from CPU->pinned->GPU or GPU->pinned->CPU
// Attempt to allocate a pinned buffer to use for staging the
// copy... if we fail to allocated the pinned buffer then we just
// directly go CPU->GPU or GPU->CPU.
char
*
pinned_memory
=
nullptr
;
if
(
pending_pinned_byte_size_
>
0
)
{
TRITONSERVER_Error
*
err
=
TRITONBACKEND_MemoryManagerAllocate
(
memory_manager_
,
reinterpret_cast
<
void
**>
(
&
pinned_memory
),
TRITONSERVER_MEMORY_CPU_PINNED
,
0
/* memory_type_id */
,
pending_pinned_byte_size_
);
if
(
err
!=
nullptr
)
{
pinned_memory
=
nullptr
;
TRITONSERVER_ErrorDelete
(
err
);
}
}
// If the pinned buffer wasn't actually allocated then just perform
// a direct copy.
if
(
pinned_memory
==
nullptr
)
{
size_t
offset
=
0
;
for
(
auto
&
pr
:
pending_pinned_outputs_
)
{
auto
&
response
=
pr
.
first
;
auto
&
response_output
=
pr
.
second
;
bool
cuda_used
=
false
;
RESPOND_AND_SET_NULL_IF_ERROR
(
response
,
CopyBuffer
(
response_output
.
name_
,
tensor_memory_type
,
tensor_memory_type_id
,
response_output
.
memory_type_
,
response_output
.
memory_type_id_
,
response_output
.
buffer_byte_size_
,
tensor_buffer
+
pending_pinned_offset_
+
offset
,
const_cast
<
void
*>
(
response_output
.
buffer_
),
stream_
,
&
cuda_used
,
copy_on_stream_
));
cuda_copy
|=
cuda_used
;
offset
+=
response_output
.
buffer_byte_size_
;
}
}
// We have a pinned buffer so do a single copy of a block of tensor
// data to the pinned buffer.
else
{
// pinned_memory_type == TRITONSERVER_MEMORY_CPU_PINNED
bool
cuda_used
=
false
;
auto
err
=
CopyBuffer
(
"pinned buffer"
,
tensor_memory_type
,
tensor_memory_type_id
,
TRITONSERVER_MEMORY_CPU_PINNED
,
0
/* memory_type_id */
,
pending_pinned_byte_size_
,
tensor_buffer
+
pending_pinned_offset_
,
pinned_memory
,
stream_
,
&
cuda_used
,
copy_on_stream_
);
cuda_copy
|=
cuda_used
;
// If something goes wrong with the copy all the pending
// responses fail...
if
(
err
!=
nullptr
)
{
for
(
auto
&
pr
:
pending_pinned_outputs_
)
{
auto
&
response
=
pr
.
first
;
if
(
*
response
!=
nullptr
)
{
LOG_IF_ERROR
(
TRITONBACKEND_ResponseSend
(
*
response
,
TRITONSERVER_RESPONSE_COMPLETE_FINAL
,
err
),
"failed to send TensorFlow error response"
);
*
response
=
nullptr
;
}
}
TRITONSERVER_ErrorDelete
(
err
);
}
// If the copy was not async (i.e. if tensor was in CPU so a
// CPU->CPU-PINNED copy was performed above), then the pinned
// buffer now holds the tensor contents and we can immediately
// issue the copies from the pinned buffer to the
// responses.
//
// Otherwise the GPU->CPU-PINNED async copies are in flight and we
// simply remember the pinned buffer and the corresponding
// response outputs so that we can do the pinned->CPU copies in
// finalize after we have waited for all async copies to complete.
if
(
!
cuda_used
)
{
size_t
offset
=
0
;
for
(
auto
&
pr
:
pending_pinned_outputs_
)
{
auto
&
response
=
pr
.
first
;
auto
&
response_output
=
pr
.
second
;
bool
cuda_used
=
false
;
RESPOND_AND_SET_NULL_IF_ERROR
(
response
,
CopyBuffer
(
response_output
.
name_
,
TRITONSERVER_MEMORY_CPU_PINNED
,
0
/* memory_type_id */
,
response_output
.
memory_type_
,
response_output
.
memory_type_id_
,
response_output
.
buffer_byte_size_
,
pinned_memory
+
offset
,
const_cast
<
void
*>
(
response_output
.
buffer_
),
stream_
,
&
cuda_used
,
copy_on_stream_
));
cuda_copy
|=
cuda_used
;
offset
+=
response_output
.
buffer_byte_size_
;
}
}
else
{
deferred_pinned_
.
emplace_back
(
pinned_memory
,
pending_pinned_byte_size_
,
std
::
move
(
pending_pinned_outputs_
));
}
}
// Pending pinned copies are handled...
pending_pinned_byte_size_
=
0
;
pending_pinned_offset_
=
0
;
pending_pinned_outputs_
.
clear
();
// Need to hold on to the allocated pinned buffer as there are still
// copies in flight. Will delete it in finalize.
if
(
pinned_memory
!=
nullptr
)
{
pinned_memories_
.
push_back
(
pinned_memory
);
}
return
cuda_copy
;
}
void
BackendOutputResponder
::
ProcessBatchOutput
(
const
std
::
string
&
name
,
const
BatchOutput
&
batch_output
,
const
char
*
buffer
,
const
TRITONSERVER_MemoryType
memory_type
,
const
int64_t
memory_type_id
)
{
// A value of CPU_PINNED indicates that pinned memory buffer is not
// needed for this tensor. Any other value indicates that a pinned
// memory buffer is needed when the target memory type matches
// 'use_pinned_memory_type'.
TRITONSERVER_MemoryType
use_pinned_memory_type
=
TRITONSERVER_MEMORY_CPU_PINNED
;
if
(
pinned_enabled_
)
{
use_pinned_memory_type
=
GetUsePinnedMemoryType
(
memory_type
);
}
// Batch output may be processed differently based on the kind
switch
(
batch_output
.
BatchOutputKind
())
{
case
BatchOutput
::
Kind
::
BATCH_SCATTER_WITH_INPUT_SHAPE
:
{
const
auto
&
output_name
=
batch_output
.
TargetNames
()[
0
];
const
auto
&
input_name
=
batch_output
.
SourceInputs
()[
0
];
const
auto
&
datatype
=
batch_output
.
DataType
();
size_t
tensor_offset
=
0
;
for
(
size_t
idx
=
0
;
idx
<
responses_
->
size
();
idx
++
)
{
auto
&
request
=
requests_
[
idx
];
auto
&
response
=
(
*
responses_
)[
idx
];
// If then pending copies are from tensor buffer that is not
// contiguous with 'response's part of that buffer, then need to
// go ahead and perform the pending copies so that can start a
// new contiguous region if necessary.
if
((
pending_pinned_byte_size_
>
0
)
&&
(
tensor_offset
!=
(
pending_pinned_byte_size_
+
pending_pinned_offset_
)))
{
need_sync_
|=
FlushPendingPinned
(
buffer
,
memory_type
,
memory_type_id
);
}
// Override shape to be correct for this response, with a naive
// assumption that the dynamic dimension in output is mapped to the same
// dimension in the input
auto
output_batchn_shape
=
batch_output
.
OutputShape
();
{
TRITONBACKEND_Input
*
input
;
TRITONBACKEND_RequestInput
(
request
,
input_name
.
c_str
(),
&
input
);
const
int64_t
*
shape
;
TRITONBACKEND_InputProperties
(
input
,
nullptr
,
nullptr
,
&
shape
,
nullptr
,
nullptr
,
nullptr
);
for
(
size_t
dim_idx
=
0
;
dim_idx
<
output_batchn_shape
.
size
();
dim_idx
++
)
{
if
(
output_batchn_shape
[
dim_idx
]
==
-
1
)
{
output_batchn_shape
[
dim_idx
]
=
shape
[
dim_idx
];
}
}
}
const
size_t
tensor_byte_size
=
GetByteSize
(
datatype
,
output_batchn_shape
);
TRITONBACKEND_Output
*
response_output
;
if
(
response
!=
nullptr
)
{
uint32_t
output_count
;
RESPOND_AND_SET_NULL_IF_ERROR
(
&
response
,
TRITONBACKEND_RequestOutputCount
(
request
,
&
output_count
));
if
(
response
!=
nullptr
)
{
for
(
uint32_t
output_idx
=
0
;
output_idx
<
output_count
;
output_idx
++
)
{
const
char
*
name
;
RESPOND_AND_SET_NULL_IF_ERROR
(
&
response
,
TRITONBACKEND_RequestOutputName
(
request
,
output_idx
,
&
name
));
if
((
response
!=
nullptr
)
&&
(
output_name
==
name
))
{
RESPOND_AND_SET_NULL_IF_ERROR
(
&
response
,
TRITONBACKEND_ResponseOutput
(
response
,
&
response_output
,
name
,
datatype
,
output_batchn_shape
.
data
(),
output_batchn_shape
.
size
()));
if
(
response
!=
nullptr
)
{
need_sync_
|=
SetFixedSizeBuffer
(
&
response
,
response_output
,
output_name
,
tensor_byte_size
,
tensor_offset
,
buffer
,
memory_type
,
memory_type_id
,
use_pinned_memory_type
,
false
/* state */
);
}
break
;
}
}
}
}
tensor_offset
+=
tensor_byte_size
;
}
break
;
}
}
// Done with the tensor, flush any pending pinned copies.
need_sync_
|=
FlushPendingPinned
(
buffer
,
memory_type
,
memory_type_id
);
#ifdef TRITON_ENABLE_GPU
if
(
need_sync_
&&
(
event_
!=
nullptr
))
{
cudaEventRecord
(
event_
,
stream_
);
}
#endif // TRITON_ENABLE_GPU
}
}}
// namespace triton::backend
3rdparty/backend-r22.12/src/kernel.cu
0 → 100644
View file @
0a21fff9
// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "kernel.h"
#include <cuda.h>
#define THREADBLOCK_SIZE 512
__launch_bounds__
(
THREADBLOCK_SIZE
)
__global__
void
TritonGatherKernel
(
const
int8_t
**
__restrict
input_ptr_buffer
,
const
size_t
*
__restrict
byte_size_buffer
,
const
size_t
*
__restrict
byte_size_offset_buffer
,
int8_t
*
__restrict
output_buffer
)
{
int
request_idx
=
blockIdx
.
x
;
int
lane_id
=
threadIdx
.
x
;
const
int8_t
*
request_input_buffer
=
input_ptr_buffer
[
request_idx
];
int
byte_size
=
byte_size_buffer
[
request_idx
];
int
byte_size_offset
=
byte_size_offset_buffer
[
request_idx
];
int8_t
*
output_buffer_with_offset
=
output_buffer
+
byte_size_offset
;
if
(((
byte_size
%
4
)
==
0
)
&&
(((
uint64_t
)
request_input_buffer
%
4
)
==
0
)
&&
(((
uint64_t
)
output_buffer_with_offset
%
4
)
==
0
))
{
int32_t
*
input_4
=
(
int32_t
*
)
request_input_buffer
;
int32_t
*
output_4
=
(
int32_t
*
)
output_buffer_with_offset
;
int
element_count
=
byte_size
/
4
;
for
(
int
elem_id
=
lane_id
;
elem_id
<
element_count
;
elem_id
+=
THREADBLOCK_SIZE
)
{
output_4
[
elem_id
]
=
input_4
[
elem_id
];
}
}
else
{
for
(
int
elem_id
=
lane_id
;
elem_id
<
byte_size
;
elem_id
+=
THREADBLOCK_SIZE
)
{
output_buffer_with_offset
[
elem_id
]
=
__ldg
(
request_input_buffer
+
elem_id
);
}
}
}
#ifdef __cplusplus
extern
"C"
{
#endif
cudaError_t
RunGatherKernel
(
const
int8_t
**
input_ptr_buffer
,
const
size_t
*
byte_size_buffer
,
const
size_t
*
byte_size_offset_buffer
,
int8_t
*
output_buffer
,
size_t
request_count
,
cudaStream_t
stream
)
{
TritonGatherKernel
<<<
request_count
,
THREADBLOCK_SIZE
,
0
,
stream
>>>
(
input_ptr_buffer
,
byte_size_buffer
,
byte_size_offset_buffer
,
output_buffer
);
return
cudaGetLastError
();
}
#ifdef __cplusplus
}
#endif
3rdparty/backend-r22.12/src/kernel.h
0 → 100644
View file @
0a21fff9
// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <cuda_runtime_api.h>
#include <stdint.h>
#ifdef __cplusplus
extern
"C"
{
#endif
cudaError_t
RunGatherKernel
(
const
int8_t
**
input_ptr_buffer
,
const
size_t
*
byte_size_buffer
,
const
size_t
*
byte_size_offset_buffer
,
int8_t
*
output_buffer
,
size_t
request_count
,
cudaStream_t
stream
);
#ifdef __cplusplus
}
#endif
3rdparty/common-r22.12/.clang-format
0 → 100644
View file @
0a21fff9
---
BasedOnStyle: Google
IndentWidth: 2
ContinuationIndentWidth: 4
UseTab: Never
MaxEmptyLinesToKeep: 2
SortIncludes: true
CompactNamespaces: true
ReflowComments: true
DerivePointerAlignment: false
PointerAlignment: Left
AllowShortIfStatementsOnASingleLine: false
AllowShortBlocksOnASingleLine: false
AllowShortFunctionsOnASingleLine: Inline
AlwaysBreakAfterReturnType: TopLevelDefinitions
AlignAfterOpenBracket: AlwaysBreak
BreakBeforeBraces: Custom
BraceWrapping:
AfterClass: false
AfterControlStatement: false
AfterEnum: false
AfterFunction: true
AfterNamespace: false
AfterStruct: false
AfterUnion: false
BeforeCatch: true
BinPackArguments: true
BinPackParameters: true
ConstructorInitializerAllOnOneLineOrOnePerLine: false
IndentCaseLabels: true
\ No newline at end of file
3rdparty/common-r22.12/.gitignore
0 → 100644
View file @
0a21fff9
/build
/.vscode
*.so
3rdparty/common-r22.12/CMakeLists.txt
0 → 100644
View file @
0a21fff9
# Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#cmake_minimum_required(VERSION 3.17)
cmake_minimum_required
(
VERSION 3.16
)
project
(
tritoncommon LANGUAGES C CXX
)
#
# Options
#
# Some components are expensive to build and have extensive
# dependencies, so those parts of the build must be enabled
# explicitly.
option
(
TRITON_COMMON_ENABLE_PROTOBUF
"Build protobuf artifacts"
OFF
)
option
(
TRITON_COMMON_ENABLE_PROTOBUF_PYTHON
"Build protobuf artifacts for python"
ON
)
option
(
TRITON_COMMON_ENABLE_GRPC
"Build grpc artifacts"
OFF
)
option
(
TRITON_COMMON_ENABLE_JSON
"Build json-related libs"
ON
)
#option(TRITON_COMMON_ENABLE_JSON "Build json-related libs" OFF)
if
(
TRITON_COMMON_ENABLE_JSON
)
find_package
(
RapidJSON CONFIG REQUIRED
)
message
(
STATUS
"RapidJSON found. Headers:
${
RAPIDJSON_INCLUDE_DIRS
}
"
)
endif
()
set
(
THREADS_PREFER_PTHREAD_FLAG TRUE
)
find_package
(
Threads REQUIRED
)
if
(
CMAKE_CXX_COMPILER_ID STREQUAL
"MSVC"
)
message
(
"Using MSVC as compiler, default target on Windows 10. "
"If the target system is not Windows 10, please update _WIN32_WINNT "
"to corresponding value."
)
endif
()
add_library
(
common-compile-settings INTERFACE
)
target_compile_features
(
common-compile-settings INTERFACE cxx_std_11
)
target_compile_options
(
common-compile-settings INTERFACE
$<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
-Wall -Wextra -Wno-unused-parameter -Wno-type-limits -Werror>
$<$<CXX_COMPILER_ID:MSVC>:/W0 /D_WIN32_WINNT=0x0A00 /EHsc>
)
#
# Error
#
add_library
(
triton-common-error
src/error.cc
)
add_library
(
TritonCommon::triton-common-error ALIAS triton-common-error
)
target_include_directories
(
triton-common-error
PUBLIC
$<INSTALL_INTERFACE:include>
$<BUILD_INTERFACE:
${
CMAKE_CURRENT_SOURCE_DIR
}
/include>
PRIVATE
${
CMAKE_CURRENT_SOURCE_DIR
}
/src
)
target_link_libraries
(
triton-common-error PRIVATE common-compile-settings
)
#
# Logging
#
add_library
(
triton-common-logging
src/logging.cc
)
add_library
(
TritonCommon::triton-common-logging ALIAS triton-common-logging
)
target_include_directories
(
triton-common-logging
PUBLIC
$<INSTALL_INTERFACE:include>
$<BUILD_INTERFACE:
${
CMAKE_CURRENT_SOURCE_DIR
}
/include>
PRIVATE
${
CMAKE_CURRENT_SOURCE_DIR
}
/src
)
if
(
${
TRITON_ENABLE_LOGGING
}
)
target_compile_definitions
(
triton-common-logging
PRIVATE TRITON_ENABLE_LOGGING=1
)
endif
()
# TRITON_ENABLE_LOGGING
target_link_libraries
(
triton-common-logging PRIVATE common-compile-settings
)
#
# SyncQueue
#
add_library
(
triton-common-sync-queue INTERFACE
)
add_library
(
TritonCommon::triton-common-sync-queue ALIAS triton-common-sync-queue
)
target_include_directories
(
triton-common-sync-queue
INTERFACE
$<INSTALL_INTERFACE:include>
$<BUILD_INTERFACE:
${
CMAKE_CURRENT_SOURCE_DIR
}
/include>
)
#
# Async Work Queue
#
add_library
(
triton-common-async-work-queue
src/async_work_queue.cc
src/error.cc
src/thread_pool.cc
)
add_library
(
TritonCommon::triton-common-async-work-queue ALIAS triton-common-async-work-queue
)
target_include_directories
(
triton-common-async-work-queue
PUBLIC
$<INSTALL_INTERFACE:include>
$<BUILD_INTERFACE:
${
CMAKE_CURRENT_SOURCE_DIR
}
/include>
PRIVATE
${
CMAKE_CURRENT_SOURCE_DIR
}
/src
)
target_link_libraries
(
triton-common-async-work-queue
PUBLIC
Threads::Threads
PRIVATE
common-compile-settings
)
#
# Thread Pool
#
add_library
(
triton-common-thread-pool
src/thread_pool.cc
)
add_library
(
TritonCommon::triton-common-thread-pool ALIAS triton-common-thread-pool
)
target_include_directories
(
triton-common-thread-pool
PUBLIC
$<INSTALL_INTERFACE:include>
$<BUILD_INTERFACE:
${
CMAKE_CURRENT_SOURCE_DIR
}
/include>
PRIVATE
${
CMAKE_CURRENT_SOURCE_DIR
}
/src
)
target_link_libraries
(
triton-common-thread-pool
PUBLIC
Threads::Threads
PRIVATE
common-compile-settings
)
#
# JSON utilities
#
if
(
TRITON_COMMON_ENABLE_JSON
)
add_library
(
triton-common-json INTERFACE
)
add_library
(
TritonCommon::triton-common-json ALIAS triton-common-json
)
target_include_directories
(
triton-common-json
INTERFACE
$<INSTALL_INTERFACE:include>
$<INSTALL_INTERFACE:
${
RAPIDJSON_INCLUDE_DIRS
}
>
$<BUILD_INTERFACE:
${
CMAKE_CURRENT_SOURCE_DIR
}
/include>
$<BUILD_INTERFACE:
${
RAPIDJSON_INCLUDE_DIRS
}
>
)
endif
()
#
# Table Printer
#
add_library
(
triton-common-table-printer
src/table_printer.cc
)
add_library
(
TritonBackend::triton-common-table-printer ALIAS triton-common-table-printer
)
target_include_directories
(
triton-common-table-printer
PUBLIC
$<INSTALL_INTERFACE:include>
$<BUILD_INTERFACE:
${
CMAKE_CURRENT_SOURCE_DIR
}
/include>
PRIVATE
${
CMAKE_CURRENT_SOURCE_DIR
}
/src
)
target_link_libraries
(
triton-common-table-printer PRIVATE common-compile-settings
)
set_target_properties
(
triton-common-async-work-queue
triton-common-error
triton-common-logging
triton-common-table-printer
triton-common-thread-pool
PROPERTIES
WINDOWS_EXPORT_ALL_SYMBOLS TRUE
POSITION_INDEPENDENT_CODE ON
)
set_target_properties
(
triton-common-async-work-queue
PROPERTIES
OUTPUT_NAME tritonasyncworkqueue
)
set_target_properties
(
triton-common-thread-pool
PROPERTIES
OUTPUT_NAME tritonthreadpool
)
set_target_properties
(
triton-common-error
PROPERTIES
OUTPUT_NAME tritoncommonerror
)
set_target_properties
(
triton-common-logging
PROPERTIES
OUTPUT_NAME tritoncommonlogging
)
set_target_properties
(
triton-common-table-printer
PROPERTIES
OUTPUT_NAME tritontableprinter
)
#
# Protobuf and GRPC artifacts
#
if
(
${
TRITON_COMMON_ENABLE_PROTOBUF
}
OR
${
TRITON_COMMON_ENABLE_GRPC
}
)
add_subdirectory
(
protobuf
)
set
(
protobuf_MODULE_COMPATIBLE TRUE CACHE BOOL
"protobuf_MODULE_COMPATIBLE"
FORCE
)
find_package
(
Protobuf CONFIG REQUIRED
)
message
(
STATUS
"Using protobuf
${
Protobuf_VERSION
}
"
)
#
# Model Config (depends on protobuf & generated .pb.h file)
#
add_library
(
triton-common-model-config
src/model_config.cc
)
add_library
(
TritonCommon::triton-common-model-config ALIAS triton-common-model-config
)
target_include_directories
(
triton-common-model-config
PUBLIC
$<INSTALL_INTERFACE:include>
$<BUILD_INTERFACE:
${
CMAKE_CURRENT_SOURCE_DIR
}
/include>
PRIVATE
${
CMAKE_CURRENT_SOURCE_DIR
}
/src
${
Protobuf_INCLUDE_DIRS
}
)
target_link_libraries
(
triton-common-model-config
PRIVATE
common-compile-settings
protobuf::libprotobuf
proto-library
)
set_target_properties
(
triton-common-model-config
PROPERTIES
WINDOWS_EXPORT_ALL_SYMBOLS TRUE
POSITION_INDEPENDENT_CODE ON
OUTPUT_NAME tritoncommonmodelconfig
)
endif
()
#
# Install
#
include
(
GNUInstallDirs
)
set
(
INSTALL_CONFIGDIR
${
CMAKE_INSTALL_LIBDIR
}
/cmake/TritonCommon
)
install
(
TARGETS
triton-common-async-work-queue
triton-common-error
triton-common-logging
triton-common-sync-queue
triton-common-table-printer
triton-common-thread-pool
common-compile-settings
EXPORT
triton-common-targets
LIBRARY DESTINATION
${
CMAKE_INSTALL_LIBDIR
}
ARCHIVE DESTINATION
${
CMAKE_INSTALL_LIBDIR
}
)
if
(
TRITON_COMMON_ENABLE_JSON
)
install
(
TARGETS
triton-common-json
EXPORT
triton-common-targets
LIBRARY DESTINATION
${
CMAKE_INSTALL_LIBDIR
}
ARCHIVE DESTINATION
${
CMAKE_INSTALL_LIBDIR
}
)
endif
()
if
(
${
TRITON_COMMON_ENABLE_GRPC
}
OR
${
TRITON_COMMON_ENABLE_PROTOBUF
}
)
install
(
TARGETS
proto-library
triton-common-model-config
# proto-py-library
EXPORT
triton-common-targets
LIBRARY DESTINATION
${
CMAKE_INSTALL_LIBDIR
}
ARCHIVE DESTINATION
${
CMAKE_INSTALL_LIBDIR
}
)
endif
()
if
(
${
TRITON_COMMON_ENABLE_GRPC
}
)
install
(
TARGETS
grpc-service-library
# grpc-service-py-library
EXPORT
triton-common-targets
LIBRARY DESTINATION
${
CMAKE_INSTALL_LIBDIR
}
ARCHIVE DESTINATION
${
CMAKE_INSTALL_LIBDIR
}
)
endif
()
install
(
DIRECTORY include/
DESTINATION
${
CMAKE_INSTALL_INCLUDEDIR
}
)
install
(
EXPORT
triton-common-targets
FILE
TritonCommonTargets.cmake
NAMESPACE
TritonCommon::
DESTINATION
${
INSTALL_CONFIGDIR
}
)
include
(
CMakePackageConfigHelpers
)
configure_package_config_file
(
${
CMAKE_CURRENT_LIST_DIR
}
/cmake/TritonCommonConfig.cmake.in
${
CMAKE_CURRENT_BINARY_DIR
}
/TritonCommonConfig.cmake
INSTALL_DESTINATION
${
INSTALL_CONFIGDIR
}
)
install
(
FILES
${
CMAKE_CURRENT_BINARY_DIR
}
/TritonCommonConfig.cmake
DESTINATION
${
INSTALL_CONFIGDIR
}
)
#
# Export from build tree
#
export
(
EXPORT
triton-common-targets
FILE
${
CMAKE_CURRENT_BINARY_DIR
}
/TritonCommonTargets.cmake
NAMESPACE
TritonCommon::
)
export
(
PACKAGE TritonCommon
)
3rdparty/common-r22.12/LICENSE
0 → 100644
View file @
0a21fff9
Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of NVIDIA CORPORATION nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3rdparty/common-r22.12/README.md
0 → 100644
View file @
0a21fff9
<!--
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-->
[

](https://opensource.org/licenses/BSD-3-Clause)
# Triton Inference Server Common
Common source, scripts and utilities shared across all Triton
repositories.
This repo is not typically built directly but is instead included in
the build of other repos. To build directly first install the required
dependencies.
```
$ apt-get install rapidjson-dev
```
Use cmake 3.17 or later to build and install in a local directory.
```
$ mkdir build
$ cd build
$ cmake -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install ..
$ make install
```
3rdparty/common-r22.12/cmake/TritonCommonConfig.cmake.in
0 → 100644
View file @
0a21fff9
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@PACKAGE_INIT@
set_and_check(TRITONCOMMON_CMAKE_DIR "${CMAKE_CURRENT_LIST_DIR}")
list(APPEND CMAKE_MODULE_PATH ${TRITONCOMMON_CMAKE_DIR})
include(CMakeFindDependencyMacro)
find_dependency(Threads)
if(NOT TARGET TritonCommon::triton-common-json)
include("${TRITONCOMMON_CMAKE_DIR}/TritonCommonTargets.cmake")
endif()
check_required_components(triton-common-json
triton-common-sync-queue
triton-common-async-work-queue
triton-common-thread-pool
)
set(TRITONCOMMON_LIBRARIES
TritonCommon::triton-common-json
TritonCommon::triton-common-sync-queue
TritonCommon::triton-common-async-work-queue
TritonCommon::triton-common-thread-pool
)
3rdparty/common-r22.12/include/triton/common/async_work_queue.h
0 → 100644
View file @
0a21fff9
// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include "error.h"
#include "thread_pool.h"
namespace
triton
{
namespace
common
{
// Manager for asynchronous worker threads. Use to accelerate copies and
// other such operations by running them in parallel.
// Call Initialize to start the worker threads (once) and AddTask to tasks to
// the queue.
class
AsyncWorkQueue
{
public:
// Start 'worker_count' number of worker threads.
static
Error
Initialize
(
size_t
worker_count
);
// Get the number of worker threads.
static
size_t
WorkerCount
();
// Add a 'task' to the queue. The function will take ownership of 'task'.
// Therefore std::move should be used when calling AddTask.
static
Error
AddTask
(
std
::
function
<
void
(
void
)
>&&
task
);
protected:
static
void
Reset
();
private:
AsyncWorkQueue
()
=
default
;
~
AsyncWorkQueue
();
static
AsyncWorkQueue
*
GetSingleton
();
std
::
unique_ptr
<
ThreadPool
>
thread_pool_
;
};
}}
// namespace triton::common
3rdparty/common-r22.12/include/triton/common/error.h
0 → 100644
View file @
0a21fff9
// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <string>
namespace
triton
{
namespace
common
{
//
// Error
//
// Error returned by utilities from common repo.
//
class
Error
{
public:
enum
class
Code
{
SUCCESS
,
UNKNOWN
,
INTERNAL
,
NOT_FOUND
,
INVALID_ARG
,
UNAVAILABLE
,
UNSUPPORTED
,
ALREADY_EXISTS
};
explicit
Error
(
Code
code
=
Code
::
SUCCESS
)
:
code_
(
code
)
{}
explicit
Error
(
Code
code
,
const
std
::
string
&
msg
)
:
code_
(
code
),
msg_
(
msg
)
{}
// Convenience "success" value. Can be used as Error::Success to
// indicate no error.
static
const
Error
Success
;
// Return the code for this status.
Code
ErrorCode
()
const
{
return
code_
;
}
// Return the message for this status.
const
std
::
string
&
Message
()
const
{
return
msg_
;
}
// Return true if this status indicates "ok"/"success", false if
// status indicates some kind of failure.
bool
IsOk
()
const
{
return
code_
==
Code
::
SUCCESS
;
}
// Return the status as a string.
std
::
string
AsString
()
const
;
// Return the constant string name for a code.
static
const
char
*
CodeString
(
const
Code
code
);
protected:
Code
code_
;
std
::
string
msg_
;
};
}}
// namespace triton::common
3rdparty/common-r22.12/include/triton/common/logging.h
0 → 100644
View file @
0a21fff9
// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <mutex>
#include <sstream>
#include <string>
#include <vector>
#include <cerrno>
#include <cstring>
#include <fstream>
namespace
triton
{
namespace
common
{
// A log message.
class
LogMessage
{
public:
// Log levels.
enum
Level
{
kERROR
=
0
,
kWARNING
=
1
,
kINFO
=
2
};
LogMessage
(
const
char
*
file
,
int
line
,
uint32_t
level
);
~
LogMessage
();
std
::
stringstream
&
stream
()
{
return
stream_
;
}
private:
static
const
std
::
vector
<
char
>
level_name_
;
std
::
stringstream
stream_
;
};
// Global logger for messages. Controls how log messages are reported.
class
Logger
{
public:
enum
class
Format
{
kDEFAULT
,
kISO8601
};
Logger
();
// Is a log level enabled.
bool
IsEnabled
(
LogMessage
::
Level
level
)
const
{
return
enables_
[
level
];
}
// Set enable for a log Level.
void
SetEnabled
(
LogMessage
::
Level
level
,
bool
enable
)
{
enables_
[
level
]
=
enable
;
}
// Get the current verbose logging level.
uint32_t
VerboseLevel
()
const
{
return
vlevel_
;
}
// Set the current verbose logging level.
void
SetVerboseLevel
(
uint32_t
vlevel
)
{
vlevel_
=
vlevel
;
}
// Get the logging format.
Format
LogFormat
()
{
return
format_
;
}
// Get the logging format as a string.
std
::
string
LogFormatString
()
{
switch
(
format_
)
{
case
Format
::
kISO8601
:
return
"ISO8601"
;
case
Format
::
kDEFAULT
:
return
"default"
;
default:
return
"Invalid format"
;
}
}
// Set the logging format.
void
SetLogFormat
(
Format
format
)
{
format_
=
format
;
}
// Get the log output file name.
const
std
::
string
&
LogFile
()
{
return
filename_
;
}
// Set the log output file. Returns an empty string upon
// success, else returns an error string.
const
std
::
string
SetLogFile
(
const
std
::
string
&
filename
)
{
const
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
file_stream_
.
close
();
std
::
string
revert_name
(
filename_
);
filename_
=
filename
;
if
(
!
filename_
.
empty
())
{
file_stream_
.
open
(
filename_
,
std
::
ios
::
app
);
if
(
file_stream_
.
fail
())
{
std
::
stringstream
error
;
error
<<
__FILE__
<<
" "
<<
__LINE__
<<
": Failed to open log file: "
<<
std
::
strerror
(
errno
)
<<
std
::
endl
;
filename_
=
revert_name
;
file_stream_
.
open
(
filename_
,
std
::
ios
::
app
);
return
error
.
str
();
}
}
// will return an empty string
return
std
::
string
();
}
// Log a message.
void
Log
(
const
std
::
string
&
msg
);
// Flush the log.
void
Flush
();
private:
std
::
vector
<
bool
>
enables_
;
uint32_t
vlevel_
;
Format
format_
;
std
::
mutex
mutex_
;
std
::
string
filename_
;
std
::
ofstream
file_stream_
;
};
extern
Logger
gLogger_
;
#define LOG_ENABLE_INFO(E) \
triton::common::gLogger_.SetEnabled( \
triton::common::LogMessage::Level::kINFO, (E))
#define LOG_ENABLE_WARNING(E) \
triton::common::gLogger_.SetEnabled( \
triton::common::LogMessage::Level::kWARNING, (E))
#define LOG_ENABLE_ERROR(E) \
triton::common::gLogger_.SetEnabled( \
triton::common::LogMessage::Level::kERROR, (E))
#define LOG_SET_VERBOSE(L) \
triton::common::gLogger_.SetVerboseLevel( \
static_cast<uint32_t>(std::max(0, (L))))
#define LOG_SET_OUT_FILE(FN) triton::common::gLogger_.SetLogFile((FN))
#define LOG_SET_FORMAT(F) triton::common::gLogger_.SetLogFormat((F))
#define LOG_VERBOSE_LEVEL triton::common::gLogger_.VerboseLevel()
#define LOG_FORMAT triton::common::gLogger_.LogFormat()
#define LOG_FORMAT_STRING triton::common::gLogger_.LogFormatString()
#define LOG_FILE triton::common::gLogger_.LogFile()
#ifdef TRITON_ENABLE_LOGGING
#define LOG_INFO_IS_ON \
triton::common::gLogger_.IsEnabled(triton::common::LogMessage::Level::kINFO)
#define LOG_WARNING_IS_ON \
triton::common::gLogger_.IsEnabled( \
triton::common::LogMessage::Level::kWARNING)
#define LOG_ERROR_IS_ON \
triton::common::gLogger_.IsEnabled(triton::common::LogMessage::Level::kERROR)
#define LOG_VERBOSE_IS_ON(L) (triton::common::gLogger_.VerboseLevel() >= (L))
#else
// If logging is disabled, define macro to be false to avoid further evaluation
#define LOG_INFO_IS_ON false
#define LOG_WARNING_IS_ON false
#define LOG_ERROR_IS_ON false
#define LOG_VERBOSE_IS_ON(L) false
#endif // TRITON_ENABLE_LOGGING
// Macros that use explicitly given filename and line number.
#define LOG_INFO_FL(FN, LN) \
if (LOG_INFO_IS_ON) \
triton::common::LogMessage( \
(char*)(FN), LN, triton::common::LogMessage::Level::kINFO) \
.stream()
#define LOG_WARNING_FL(FN, LN) \
if (LOG_WARNING_IS_ON) \
triton::common::LogMessage( \
(char*)(FN), LN, triton::common::LogMessage::Level::kWARNING) \
.stream()
#define LOG_ERROR_FL(FN, LN) \
if (LOG_ERROR_IS_ON) \
triton::common::LogMessage( \
(char*)(FN), LN, triton::common::LogMessage::Level::kERROR) \
.stream()
#define LOG_VERBOSE_FL(L, FN, LN) \
if (LOG_VERBOSE_IS_ON(L)) \
triton::common::LogMessage( \
(char*)(FN), LN, triton::common::LogMessage::Level::kINFO) \
.stream()
// Macros that use current filename and line number.
#define LOG_INFO LOG_INFO_FL(__FILE__, __LINE__)
#define LOG_WARNING LOG_WARNING_FL(__FILE__, __LINE__)
#define LOG_ERROR LOG_ERROR_FL(__FILE__, __LINE__)
#define LOG_VERBOSE(L) LOG_VERBOSE_FL(L, __FILE__, __LINE__)
#define LOG_STATUS_ERROR(X, MSG) \
do { \
const Status& status__ = (X); \
if (!status__.IsOk()) { \
LOG_ERROR << (MSG) << ": " << status__.AsString(); \
} \
} while (false)
#define LOG_TRITONSERVER_ERROR(X, MSG) \
do { \
TRITONSERVER_Error* err__ = (X); \
if (err__ != nullptr) { \
LOG_ERROR << (MSG) << ": " << TRITONSERVER_ErrorCodeString(err__) \
<< " - " << TRITONSERVER_ErrorMessage(err__); \
TRITONSERVER_ErrorDelete(err__); \
} \
} while (false)
#define LOG_FLUSH triton::common::gLogger_.Flush()
}}
// namespace triton::common
3rdparty/common-r22.12/include/triton/common/model_config.h
0 → 100644
View file @
0a21fff9
// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <google/protobuf/any.pb.h>
#include <stdint.h>
#include "model_config.pb.h"
namespace
triton
{
namespace
common
{
/// The type for a repeated dims field (used for shape).
using
DimsList
=
::
google
::
protobuf
::
RepeatedField
<::
google
::
protobuf
::
int64
>
;
/// The type for the metric_tags map.
using
MetricTagsMap
=
::
google
::
protobuf
::
Map
<
std
::
string
,
std
::
string
>
;
// Map from a host policy name to <setting, value> map of cmdline
// settings for the host policy.
using
HostPolicyCmdlineConfig
=
std
::
map
<
std
::
string
,
std
::
string
>
;
using
HostPolicyCmdlineConfigMap
=
std
::
unordered_map
<
std
::
string
,
HostPolicyCmdlineConfig
>
;
// Map from backend name to list of setting=value pairs of cmdline
// settings for the backend.
using
BackendCmdlineConfig
=
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>>
;
using
BackendCmdlineConfigMap
=
std
::
unordered_map
<
std
::
string
,
BackendCmdlineConfig
>
;
/// The value for a dimension in a shape that indicates that that
/// dimension can take on any size.
constexpr
int
WILDCARD_DIM
=
-
1
;
constexpr
int
SCHEDULER_DEFAULT_NICE
=
5
;
/// Enumeration for the different platform types.
enum
Platform
{
PLATFORM_UNKNOWN
=
0
,
PLATFORM_TENSORRT_PLAN
=
1
,
PLATFORM_TENSORFLOW_GRAPHDEF
=
2
,
PLATFORM_TENSORFLOW_SAVEDMODEL
=
3
,
PLATFORM_ENSEMBLE
=
4
,
PLATFORM_ONNXRUNTIME_ONNX
=
5
,
PLATFORM_PYTORCH_LIBTORCH
=
6
};
/// Get the number of elements in a shape.
/// \param dims The shape.
/// \return The number of elements, or -1 if the number of elements
/// cannot be determined because the shape contains one or more
/// wilcard dimensions.
int64_t
GetElementCount
(
const
DimsList
&
dims
);
/// Get the number of elements in a shape.
/// \param dims The shape.
/// \return The number of elements, or -1 if the number of elements
/// cannot be determined because the shape contains one or more
/// wilcard dimensions.
int64_t
GetElementCount
(
const
std
::
vector
<
int64_t
>&
dims
);
/// Get the number of elements in the shape of a model input.
/// \param mio The model input.
/// \return The number of elements, or -1 if the number of elements
/// cannot be determined because the shape contains one or more
/// wilcard dimensions.
int64_t
GetElementCount
(
const
inference
::
ModelInput
&
mio
);
/// Get the number of elements in the shape of a model output.
/// \param mio The model output.
/// \return The number of elements, or -1 if the number of elements
/// cannot be determined because the shape contains one or more
/// wilcard dimensions.
int64_t
GetElementCount
(
const
inference
::
ModelOutput
&
mio
);
/// Are values of a datatype fixed-size, or variable-sized.
/// \param dtype The data-type.
/// \return True if datatype values are fixed-sized, false if
/// variable-sized.
bool
IsFixedSizeDataType
(
const
inference
::
DataType
dtype
);
/// Get the size of objects of a given datatype in bytes.
/// \param dtype The data-type.
/// \return The size, in bytes, of objects of the datatype, or 0 if
/// size cannot be determine (for example, values of type TYPE_STRING
/// have variable length and so size cannot be determine just from the
/// type).
size_t
GetDataTypeByteSize
(
const
inference
::
DataType
dtype
);
/// Get the size, in bytes, of a tensor based on datatype and
/// shape.
/// \param dtype The data-type.
/// \param dims The shape.
/// \return The size, in bytes, of the corresponding tensor, or -1 if
/// unable to determine the size.
int64_t
GetByteSize
(
const
inference
::
DataType
&
dtype
,
const
DimsList
&
dims
);
/// Get the size, in bytes, of a tensor based on datatype and
/// shape.
/// \param dtype The data-type.
/// \param dims The shape.
/// \return The size, in bytes, of the corresponding tensor, or -1 if
/// unable to determine the size.
int64_t
GetByteSize
(
const
inference
::
DataType
&
dtype
,
const
std
::
vector
<
int64_t
>&
dims
);
/// Get the size, in bytes, of a tensor based on batch-size, datatype
/// and shape. A tensor that has empty shape [] and non-zero
/// batch-size is sized as a tensor with shape [ batch-size ].
/// \param batch_size The batch-size. May be 0 to indicate no
/// batching.
/// \param dtype The data-type.
/// \param dims The shape.
/// \return The size, in bytes, of the corresponding tensor, or -1 if
/// unable to determine the size.
int64_t
GetByteSize
(
const
int
batch_size
,
const
inference
::
DataType
&
dtype
,
const
DimsList
&
dims
);
/// Get the size, in bytes, of a tensor based on batch-size, datatype
/// and shape. A tensor that has empty shape [] and non-zero
/// batch-size is sized as a tensor with shape [ batch-size ].
/// \param batch_size The batch-size. May be 0 to indicate no
/// batching.
/// \param dtype The data-type.
/// \param dims The shape.
/// \return The size, in bytes, of the corresponding tensor, or -1 if
/// unable to determine the size.
int64_t
GetByteSize
(
const
int
batch_size
,
const
inference
::
DataType
&
dtype
,
const
std
::
vector
<
int64_t
>&
dims
);
/// Get the size, in bytes, of a tensor based on ModelInput.
/// \param mio The ModelInput protobuf.
/// \return The size, in bytes, of the corresponding tensor, or -1 if
/// unable to determine the size.
int64_t
GetByteSize
(
const
inference
::
ModelInput
&
mio
);
/// Get the size, in bytes, of a tensor based on ModelOutput.
/// \param mio The ModelOutput protobuf.
/// \return The size, in bytes, of the corresponding tensor, or -1 if
/// unable to determine the size.
int64_t
GetByteSize
(
const
inference
::
ModelOutput
&
mio
);
/// Get the CPU thread nice level associate with a model
/// configuration's priority.
/// \param config The model configuration.
/// \return The nice level.
int
GetCpuNiceLevel
(
const
inference
::
ModelConfig
&
config
);
/// Compare two model configuration shapes for equality. Wildcard
/// dimensions (that is, dimensions with size WILDCARD_DIM) are
/// compared literally so that to be equal the two shapes must both
/// specify WILDCARD_DIM in the same dimensions.
/// \params dims0 The first shape.
/// \params dims1 The second shape.
/// \return True if the shapes are equal, false if not equal.
bool
CompareDims
(
const
DimsList
&
dims0
,
const
DimsList
&
dims1
);
/// Compare two model configuration shapes for equality. Wildcard
/// dimensions (that is, dimensions with size WILDCARD_DIM) are
/// compared literally so that to be equal the two shapes must both
/// specify WILDCARD_DIM in the same dimensions.
/// \params dims0 The first shape.
/// \params dims1 The second shape.
/// \return True if the shapes are equal, false if not equal.
bool
CompareDims
(
const
std
::
vector
<
int64_t
>&
dims0
,
const
std
::
vector
<
int64_t
>&
dims1
);
/// Compare two model configuration shapes for equality. Wildcard
/// dimensions (that is, dimensions with size WILDCARD_DIM) are
/// allowed to match with any value. So, a dimension in one shape
/// specified as WILDCARD_DIM will always match the same dimension in
/// the other shape.
/// \params dims0 The first shape.
/// \params dims1 The second shape.
/// \return True if the shapes are equal, false if not equal.
bool
CompareDimsWithWildcard
(
const
DimsList
&
dims0
,
const
DimsList
&
dims1
);
/// Compare two model configuration shapes for equality. Wildcard
/// dimensions (that is, dimensions with size WILDCARD_DIM) are
/// allowed to match with any value. So, a dimension in one shape
/// specified as WILDCARD_DIM will always match the same dimension in
/// the other shape.
/// \params dims0 The first shape.
/// \params dims1 The second shape.
/// \return True if the shapes are equal, false if not equal.
bool
CompareDimsWithWildcard
(
const
DimsList
&
dims0
,
const
std
::
vector
<
int64_t
>&
dims1
);
/// Convert a DimsList to string representation.
/// \param dims The DimsList to be converted.
/// \return String representation of the DimsList in pattern
/// "[d0,d1,...,dn]"
std
::
string
DimsListToString
(
const
DimsList
&
dims
);
/// Convert a vector representing a shape to string representation.
/// \param dims The vector of dimensions to be converted.
/// \return String representation of the vector in pattern
/// "[d0,d1,...,dn]"
std
::
string
DimsListToString
(
const
std
::
vector
<
int64_t
>&
dims
,
const
int
start_idx
=
0
);
/// Get the server protocol string representation of a datatype.
/// \param dtype The data type.
/// \return The string representation.
const
char
*
DataTypeToProtocolString
(
const
inference
::
DataType
dtype
);
/// Get the datatype corresponding to a server protocol string
/// representation of a datatype.
/// \param dtype string representation.
/// \return The data type.
inference
::
DataType
ProtocolStringToDataType
(
const
std
::
string
&
dtype
);
/// Get the datatype corresponding to a server protocol string
/// representation of a datatype.
/// \param dtype Pointer to string.
/// \param len Length of the string.
/// \return The data type.
inference
::
DataType
ProtocolStringToDataType
(
const
char
*
dtype
,
size_t
len
);
}}
// namespace triton::common
Prev
1
2
3
4
5
6
7
8
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment