Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Lmdeploy
Commits
0a21fff9
Commit
0a21fff9
authored
Dec 20, 2023
by
xiabo
Browse files
Adapt to 0.1.0
parent
9484fd1c
Changes
158
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
6049 additions
and
0 deletions
+6049
-0
3rdparty/core-r22.12/src/infer_trace.h
3rdparty/core-r22.12/src/infer_trace.h
+205
-0
3rdparty/core-r22.12/src/instance_queue.cc
3rdparty/core-r22.12/src/instance_queue.cc
+99
-0
3rdparty/core-r22.12/src/instance_queue.h
3rdparty/core-r22.12/src/instance_queue.h
+57
-0
3rdparty/core-r22.12/src/label_provider.cc
3rdparty/core-r22.12/src/label_provider.cc
+95
-0
3rdparty/core-r22.12/src/label_provider.h
3rdparty/core-r22.12/src/label_provider.h
+65
-0
3rdparty/core-r22.12/src/libtritonserver.ldscript
3rdparty/core-r22.12/src/libtritonserver.ldscript
+32
-0
3rdparty/core-r22.12/src/memory.cc
3rdparty/core-r22.12/src/memory.cc
+238
-0
3rdparty/core-r22.12/src/memory.h
3rdparty/core-r22.12/src/memory.h
+174
-0
3rdparty/core-r22.12/src/metric_family.cc
3rdparty/core-r22.12/src/metric_family.cc
+321
-0
3rdparty/core-r22.12/src/metric_family.h
3rdparty/core-r22.12/src/metric_family.h
+111
-0
3rdparty/core-r22.12/src/metric_model_reporter.cc
3rdparty/core-r22.12/src/metric_model_reporter.cc
+168
-0
3rdparty/core-r22.12/src/metric_model_reporter.h
3rdparty/core-r22.12/src/metric_model_reporter.h
+138
-0
3rdparty/core-r22.12/src/metrics.cc
3rdparty/core-r22.12/src/metrics.cc
+1035
-0
3rdparty/core-r22.12/src/metrics.h
3rdparty/core-r22.12/src/metrics.h
+335
-0
3rdparty/core-r22.12/src/model.cc
3rdparty/core-r22.12/src/model.cc
+137
-0
3rdparty/core-r22.12/src/model.h
3rdparty/core-r22.12/src/model.h
+162
-0
3rdparty/core-r22.12/src/model_config_cuda.cc
3rdparty/core-r22.12/src/model_config_cuda.cc
+61
-0
3rdparty/core-r22.12/src/model_config_cuda.h
3rdparty/core-r22.12/src/model_config_cuda.h
+40
-0
3rdparty/core-r22.12/src/model_config_utils.cc
3rdparty/core-r22.12/src/model_config_utils.cc
+2294
-0
3rdparty/core-r22.12/src/model_config_utils.h
3rdparty/core-r22.12/src/model_config_utils.h
+282
-0
No files found.
Too many changes to show.
To preserve performance only
158 of 158+
files are displayed.
Plain diff
Email patch
3rdparty/core-r22.12/src/infer_trace.h
0 → 100644
View file @
0a21fff9
// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <atomic>
#include <chrono>
#include <memory>
#include "constants.h"
#include "status.h"
#include "tritonserver_apis.h"
namespace
triton
{
namespace
core
{
#ifdef TRITON_ENABLE_TRACING
//
// InferenceTrace
//
// Interface to TRITONSERVER_InferenceTrace to report trace events.
//
class
InferenceTrace
{
public:
InferenceTrace
(
const
TRITONSERVER_InferenceTraceLevel
level
,
const
uint64_t
parent_id
,
TRITONSERVER_InferenceTraceActivityFn_t
activity_fn
,
TRITONSERVER_InferenceTraceTensorActivityFn_t
tensor_activity_fn
,
TRITONSERVER_InferenceTraceReleaseFn_t
release_fn
,
void
*
userp
)
:
level_
(
level
),
id_
(
next_id_
++
),
parent_id_
(
parent_id
),
activity_fn_
(
activity_fn
),
tensor_activity_fn_
(
tensor_activity_fn
),
release_fn_
(
release_fn
),
userp_
(
userp
)
{
}
InferenceTrace
*
SpawnChildTrace
();
int64_t
Id
()
const
{
return
id_
;
}
int64_t
ParentId
()
const
{
return
parent_id_
;
}
const
std
::
string
&
ModelName
()
const
{
return
model_name_
;
}
int64_t
ModelVersion
()
const
{
return
model_version_
;
}
void
SetModelName
(
const
std
::
string
&
n
)
{
model_name_
=
n
;
}
void
SetModelVersion
(
int64_t
v
)
{
model_version_
=
v
;
}
// Report trace activity.
void
Report
(
const
TRITONSERVER_InferenceTraceActivity
activity
,
uint64_t
timestamp_ns
)
{
if
((
level_
&
TRITONSERVER_TRACE_LEVEL_TIMESTAMPS
)
>
0
)
{
activity_fn_
(
reinterpret_cast
<
TRITONSERVER_InferenceTrace
*>
(
this
),
activity
,
timestamp_ns
,
userp_
);
}
}
// Report trace activity at the current time.
void
ReportNow
(
const
TRITONSERVER_InferenceTraceActivity
activity
)
{
if
((
level_
&
TRITONSERVER_TRACE_LEVEL_TIMESTAMPS
)
>
0
)
{
Report
(
activity
,
std
::
chrono
::
duration_cast
<
std
::
chrono
::
nanoseconds
>
(
std
::
chrono
::
steady_clock
::
now
().
time_since_epoch
())
.
count
());
}
}
// Report tensor trace activity.
void
ReportTensor
(
const
TRITONSERVER_InferenceTraceActivity
activity
,
const
char
*
name
,
TRITONSERVER_DataType
datatype
,
const
void
*
base
,
size_t
byte_size
,
const
int64_t
*
shape
,
uint64_t
dim_count
,
TRITONSERVER_MemoryType
memory_type
,
int64_t
memory_type_id
)
{
if
((
level_
&
TRITONSERVER_TRACE_LEVEL_TENSORS
)
>
0
)
{
tensor_activity_fn_
(
reinterpret_cast
<
TRITONSERVER_InferenceTrace
*>
(
this
),
activity
,
name
,
datatype
,
base
,
byte_size
,
shape
,
dim_count
,
memory_type
,
memory_type_id
,
userp_
);
}
}
// Release the trace. Call the trace release callback.
void
Release
();
private:
const
TRITONSERVER_InferenceTraceLevel
level_
;
const
uint64_t
id_
;
const
uint64_t
parent_id_
;
TRITONSERVER_InferenceTraceActivityFn_t
activity_fn_
;
TRITONSERVER_InferenceTraceTensorActivityFn_t
tensor_activity_fn_
;
TRITONSERVER_InferenceTraceReleaseFn_t
release_fn_
;
void
*
userp_
;
std
::
string
model_name_
;
int64_t
model_version_
;
// Maintain next id statically so that trace id is unique even
// across traces
static
std
::
atomic
<
uint64_t
>
next_id_
;
};
//
// InferenceTraceProxy
//
// Object attached as shared_ptr to InferenceRequest and
// InferenceResponse(s) being traced as part of a single inference
// request.
//
class
InferenceTraceProxy
{
public:
InferenceTraceProxy
(
InferenceTrace
*
trace
)
:
trace_
(
trace
)
{}
~
InferenceTraceProxy
()
{
trace_
->
Release
();
}
int64_t
Id
()
const
{
return
trace_
->
Id
();
}
int64_t
ParentId
()
const
{
return
trace_
->
ParentId
();
}
const
std
::
string
&
ModelName
()
const
{
return
trace_
->
ModelName
();
}
int64_t
ModelVersion
()
const
{
return
trace_
->
ModelVersion
();
}
void
SetModelName
(
const
std
::
string
&
n
)
{
trace_
->
SetModelName
(
n
);
}
void
SetModelVersion
(
int64_t
v
)
{
trace_
->
SetModelVersion
(
v
);
}
void
Report
(
const
TRITONSERVER_InferenceTraceActivity
activity
,
uint64_t
timestamp_ns
)
{
trace_
->
Report
(
activity
,
timestamp_ns
);
}
void
ReportNow
(
const
TRITONSERVER_InferenceTraceActivity
activity
)
{
trace_
->
ReportNow
(
activity
);
}
void
ReportTensor
(
const
TRITONSERVER_InferenceTraceActivity
activity
,
const
char
*
name
,
TRITONSERVER_DataType
datatype
,
const
void
*
base
,
size_t
byte_size
,
const
int64_t
*
shape
,
uint64_t
dim_count
,
TRITONSERVER_MemoryType
memory_type
,
int64_t
memory_type_id
)
{
trace_
->
ReportTensor
(
activity
,
name
,
datatype
,
base
,
byte_size
,
shape
,
dim_count
,
memory_type
,
memory_type_id
);
}
std
::
shared_ptr
<
InferenceTraceProxy
>
SpawnChildTrace
();
private:
InferenceTrace
*
trace_
;
};
#endif // TRITON_ENABLE_TRACING
//
// Macros to generate trace activity
//
#ifdef TRITON_ENABLE_TRACING
#define INFER_TRACE_ACTIVITY(T, A, TS_NS) \
{ \
const auto& trace = (T); \
const auto ts_ns = (TS_NS); \
if (trace != nullptr) { \
trace->Report(A, ts_ns); \
} \
}
#define INFER_TRACE_ACTIVITY_NOW(T, A) \
{ \
const auto& trace = (T); \
if (trace != nullptr) { \
trace->ReportNow(A); \
} \
}
#define INFER_TRACE_TENSOR_ACTIVITY(T, A, N, D, BA, BY, S, DI, MT, MTI) \
{ \
const auto& trace = (T); \
if (trace != nullptr) { \
trace->ReportTensor(A, N, D, BA, BY, S, DI, MT, MTI); \
} \
}
#else
#define INFER_TRACE_ACTIVITY(T, A, TS_NS)
#define INFER_TRACE_ACTIVITY_NOW(T, A)
#define INFER_TRACE_TENSOR_ACTIVITY(T, A, N, D, BA, BY, S, DI, MT, MTI)
#endif // TRITON_ENABLE_TRACING
}}
// namespace triton::core
3rdparty/core-r22.12/src/instance_queue.cc
0 → 100644
View file @
0a21fff9
// Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "instance_queue.h"
#include "triton/common/logging.h"
namespace
triton
{
namespace
core
{
InstanceQueue
::
InstanceQueue
(
size_t
max_batch_size
,
uint64_t
max_queue_delay_ns
)
:
max_batch_size_
(
max_batch_size
),
max_queue_delay_ns_
(
max_queue_delay_ns
)
{
}
size_t
InstanceQueue
::
Size
()
{
return
payload_queue_
.
size
();
}
bool
InstanceQueue
::
Empty
()
{
return
payload_queue_
.
empty
();
}
void
InstanceQueue
::
Enqueue
(
const
std
::
shared_ptr
<
Payload
>&
payload
)
{
payload_queue_
.
push_back
(
payload
);
}
void
InstanceQueue
::
Dequeue
(
std
::
shared_ptr
<
Payload
>*
payload
,
std
::
vector
<
std
::
shared_ptr
<
Payload
>>*
merged_payloads
)
{
*
payload
=
payload_queue_
.
front
();
payload_queue_
.
pop_front
();
{
std
::
lock_guard
<
std
::
mutex
>
exec_lock
(
*
((
*
payload
)
->
GetExecMutex
()));
(
*
payload
)
->
SetState
(
Payload
::
State
::
EXECUTING
);
if
((
!
payload_queue_
.
empty
())
&&
(
max_queue_delay_ns_
>
0
)
&&
(
max_batch_size_
>
1
)
&&
(
!
(
*
payload
)
->
IsSaturated
()))
{
bool
continue_merge
;
do
{
continue_merge
=
false
;
uint64_t
now_ns
=
std
::
chrono
::
duration_cast
<
std
::
chrono
::
nanoseconds
>
(
std
::
chrono
::
steady_clock
::
now
().
time_since_epoch
())
.
count
();
size_t
batch_size
=
(
*
payload
)
->
BatchSize
();
if
((
!
payload_queue_
.
empty
())
&&
(
!
payload_queue_
.
front
()
->
IsSaturated
())
&&
(
now_ns
-
payload_queue_
.
front
()
->
BatcherStartNs
())
>
max_queue_delay_ns_
)
{
std
::
lock_guard
<
std
::
mutex
>
exec_lock
(
*
(
payload_queue_
.
front
()
->
GetExecMutex
()));
payload_queue_
.
front
()
->
SetState
(
Payload
::
State
::
EXECUTING
);
size_t
front_batch_size
=
payload_queue_
.
front
()
->
BatchSize
();
if
((
batch_size
+
front_batch_size
)
<=
max_batch_size_
)
{
const
auto
&
status
=
(
*
payload
)
->
MergePayload
(
payload_queue_
.
front
());
if
(
status
.
IsOk
())
{
merged_payloads
->
push_back
(
payload_queue_
.
front
());
payload_queue_
.
pop_front
();
continue_merge
=
true
;
}
}
}
}
while
(
continue_merge
);
}
}
}
}}
// namespace triton::core
3rdparty/core-r22.12/src/instance_queue.h
0 → 100644
View file @
0a21fff9
// Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include "payload.h"
namespace
triton
{
namespace
core
{
//
// InstanceQueue
//
// A queue implementation holding Payloads ready to be scheduled on
// model instance.
class
InstanceQueue
{
public:
explicit
InstanceQueue
(
size_t
max_batch_size
,
uint64_t
max_queue_delay_ns
);
size_t
Size
();
bool
Empty
();
void
Enqueue
(
const
std
::
shared_ptr
<
Payload
>&
payload
);
void
Dequeue
(
std
::
shared_ptr
<
Payload
>*
payload
,
std
::
vector
<
std
::
shared_ptr
<
Payload
>>*
merged_payloads
);
private:
size_t
max_batch_size_
;
uint64_t
max_queue_delay_ns_
;
std
::
deque
<
std
::
shared_ptr
<
Payload
>>
payload_queue_
;
std
::
shared_ptr
<
Payload
>
staged_payload_
;
std
::
mutex
mu_
;
};
}}
// namespace triton::core
3rdparty/core-r22.12/src/label_provider.cc
0 → 100644
View file @
0a21fff9
// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "label_provider.h"
#include <iostream>
#include <iterator>
#include <sstream>
#include "filesystem.h"
namespace
triton
{
namespace
core
{
const
std
::
string
&
LabelProvider
::
GetLabel
(
const
std
::
string
&
name
,
size_t
index
)
const
{
static
const
std
::
string
not_found
;
auto
itr
=
label_map_
.
find
(
name
);
if
(
itr
==
label_map_
.
end
())
{
return
not_found
;
}
if
(
itr
->
second
.
size
()
<=
index
)
{
return
not_found
;
}
return
itr
->
second
[
index
];
}
Status
LabelProvider
::
AddLabels
(
const
std
::
string
&
name
,
const
std
::
string
&
filepath
)
{
std
::
string
label_file_contents
;
RETURN_IF_ERROR
(
ReadTextFile
(
filepath
,
&
label_file_contents
));
auto
p
=
label_map_
.
insert
(
std
::
make_pair
(
name
,
std
::
vector
<
std
::
string
>
()));
if
(
!
p
.
second
)
{
return
Status
(
Status
::
Code
::
INTERNAL
,
"multiple label files for '"
+
name
+
"'"
);
}
auto
itr
=
p
.
first
;
std
::
istringstream
label_file_stream
(
label_file_contents
);
std
::
string
line
;
while
(
std
::
getline
(
label_file_stream
,
line
))
{
itr
->
second
.
push_back
(
line
);
}
return
Status
::
Success
;
}
const
std
::
vector
<
std
::
string
>&
LabelProvider
::
GetLabels
(
const
std
::
string
&
name
)
{
static
const
std
::
vector
<
std
::
string
>
not_found
;
auto
itr
=
label_map_
.
find
(
name
);
if
(
itr
==
label_map_
.
end
())
{
return
not_found
;
}
return
itr
->
second
;
}
Status
LabelProvider
::
AddLabels
(
const
std
::
string
&
name
,
const
std
::
vector
<
std
::
string
>&
labels
)
{
label_map_
.
emplace
(
name
,
labels
);
return
Status
::
Success
;
}
}}
// namespace triton::core
3rdparty/core-r22.12/src/label_provider.h
0 → 100644
View file @
0a21fff9
// Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <string>
#include <unordered_map>
#include <vector>
#include "constants.h"
#include "status.h"
namespace
triton
{
namespace
core
{
// Provides classification labels.
class
LabelProvider
{
public:
LabelProvider
()
=
default
;
// Return the label associated with 'name' for a given
// 'index'. Return empty string if no label is available.
const
std
::
string
&
GetLabel
(
const
std
::
string
&
name
,
size_t
index
)
const
;
// Associate with 'name' a set of labels initialized from a given
// 'filepath'. Within the file each label is specified on its own
// line. The first label (line 0) is the index-0 label, the second
// label (line 1) is the index-1 label, etc.
Status
AddLabels
(
const
std
::
string
&
name
,
const
std
::
string
&
filepath
);
// Return the labels associated with 'name'. Return empty vector if no labels
// are available.
const
std
::
vector
<
std
::
string
>&
GetLabels
(
const
std
::
string
&
name
);
// Associate with 'name' a set of 'labels'
Status
AddLabels
(
const
std
::
string
&
name
,
const
std
::
vector
<
std
::
string
>&
labels
);
private:
DISALLOW_COPY_AND_ASSIGN
(
LabelProvider
);
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
std
::
string
>>
label_map_
;
};
}}
// namespace triton::core
3rdparty/core-r22.12/src/libtritonserver.ldscript
0 → 100644
View file @
0a21fff9
# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
{
global:
TRITONSERVER_*;
TRITONBACKEND_*;
TRITONREPOAGENT_*;
local: *;
};
3rdparty/core-r22.12/src/memory.cc
0 → 100644
View file @
0a21fff9
// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "memory.h"
#include "pinned_memory_manager.h"
#include "triton/common/logging.h"
#ifdef TRITON_ENABLE_GPU
#include <cuda_runtime_api.h>
#include "cuda_memory_manager.h"
#endif // TRITON_ENABLE_GPU
namespace
triton
{
namespace
core
{
//
// MemoryReference
//
MemoryReference
::
MemoryReference
()
:
Memory
()
{}
const
char
*
MemoryReference
::
BufferAt
(
size_t
idx
,
size_t
*
byte_size
,
TRITONSERVER_MemoryType
*
memory_type
,
int64_t
*
memory_type_id
)
const
{
if
(
idx
>=
buffer_
.
size
())
{
*
byte_size
=
0
;
*
memory_type
=
TRITONSERVER_MEMORY_CPU
;
*
memory_type_id
=
0
;
return
nullptr
;
}
*
memory_type
=
buffer_
[
idx
].
buffer_attributes_
.
MemoryType
();
*
memory_type_id
=
buffer_
[
idx
].
buffer_attributes_
.
MemoryTypeId
();
*
byte_size
=
buffer_
[
idx
].
buffer_attributes_
.
ByteSize
();
return
buffer_
[
idx
].
buffer_
;
}
const
char
*
MemoryReference
::
BufferAt
(
size_t
idx
,
BufferAttributes
**
buffer_attributes
)
{
if
(
idx
>=
buffer_
.
size
())
{
*
buffer_attributes
=
nullptr
;
return
nullptr
;
}
*
buffer_attributes
=
&
(
buffer_
[
idx
].
buffer_attributes_
);
return
buffer_
[
idx
].
buffer_
;
}
size_t
MemoryReference
::
AddBuffer
(
const
char
*
buffer
,
size_t
byte_size
,
TRITONSERVER_MemoryType
memory_type
,
int64_t
memory_type_id
)
{
total_byte_size_
+=
byte_size
;
buffer_count_
++
;
buffer_
.
emplace_back
(
buffer
,
byte_size
,
memory_type
,
memory_type_id
);
return
buffer_
.
size
()
-
1
;
}
size_t
MemoryReference
::
AddBuffer
(
const
char
*
buffer
,
BufferAttributes
*
buffer_attributes
)
{
total_byte_size_
+=
buffer_attributes
->
ByteSize
();
buffer_count_
++
;
buffer_
.
emplace_back
(
buffer
,
buffer_attributes
);
return
buffer_
.
size
()
-
1
;
}
size_t
MemoryReference
::
AddBufferFront
(
const
char
*
buffer
,
size_t
byte_size
,
TRITONSERVER_MemoryType
memory_type
,
int64_t
memory_type_id
)
{
total_byte_size_
+=
byte_size
;
buffer_count_
++
;
buffer_
.
emplace
(
buffer_
.
begin
(),
buffer
,
byte_size
,
memory_type
,
memory_type_id
);
return
buffer_
.
size
()
-
1
;
}
//
// MutableMemory
//
MutableMemory
::
MutableMemory
(
char
*
buffer
,
size_t
byte_size
,
TRITONSERVER_MemoryType
memory_type
,
int64_t
memory_type_id
)
:
Memory
(),
buffer_
(
buffer
),
buffer_attributes_
(
BufferAttributes
(
byte_size
,
memory_type
,
memory_type_id
,
nullptr
))
{
total_byte_size_
=
byte_size
;
buffer_count_
=
(
byte_size
==
0
)
?
0
:
1
;
}
const
char
*
MutableMemory
::
BufferAt
(
size_t
idx
,
size_t
*
byte_size
,
TRITONSERVER_MemoryType
*
memory_type
,
int64_t
*
memory_type_id
)
const
{
if
(
idx
!=
0
)
{
*
byte_size
=
0
;
*
memory_type
=
TRITONSERVER_MEMORY_CPU
;
*
memory_type_id
=
0
;
return
nullptr
;
}
*
byte_size
=
total_byte_size_
;
*
memory_type
=
buffer_attributes_
.
MemoryType
();
*
memory_type_id
=
buffer_attributes_
.
MemoryTypeId
();
return
buffer_
;
}
const
char
*
MutableMemory
::
BufferAt
(
size_t
idx
,
BufferAttributes
**
buffer_attributes
)
{
if
(
idx
!=
0
)
{
*
buffer_attributes
=
nullptr
;
return
nullptr
;
}
*
buffer_attributes
=
&
buffer_attributes_
;
return
buffer_
;
}
char
*
MutableMemory
::
MutableBuffer
(
TRITONSERVER_MemoryType
*
memory_type
,
int64_t
*
memory_type_id
)
{
if
(
memory_type
!=
nullptr
)
{
*
memory_type
=
buffer_attributes_
.
MemoryType
();
}
if
(
memory_type_id
!=
nullptr
)
{
*
memory_type_id
=
buffer_attributes_
.
MemoryTypeId
();
}
return
buffer_
;
}
//
// AllocatedMemory
//
AllocatedMemory
::
AllocatedMemory
(
size_t
byte_size
,
TRITONSERVER_MemoryType
memory_type
,
int64_t
memory_type_id
)
:
MutableMemory
(
nullptr
,
byte_size
,
memory_type
,
memory_type_id
)
{
if
(
total_byte_size_
!=
0
)
{
// Allocate memory with the following fallback policy:
// CUDA memory -> pinned system memory -> non-pinned system memory
switch
(
buffer_attributes_
.
MemoryType
())
{
#ifdef TRITON_ENABLE_GPU
case
TRITONSERVER_MEMORY_GPU
:
{
auto
status
=
CudaMemoryManager
::
Alloc
(
(
void
**
)
&
buffer_
,
total_byte_size_
,
buffer_attributes_
.
MemoryTypeId
());
if
(
!
status
.
IsOk
())
{
static
bool
warning_logged
=
false
;
if
(
!
warning_logged
)
{
LOG_WARNING
<<
status
.
Message
()
<<
", falling back to pinned system memory"
;
warning_logged
=
true
;
}
goto
pinned_memory_allocation
;
}
break
;
}
pinned_memory_allocation:
#endif // TRITON_ENABLE_GPU
default:
{
TRITONSERVER_MemoryType
memory_type
=
buffer_attributes_
.
MemoryType
();
auto
status
=
PinnedMemoryManager
::
Alloc
(
(
void
**
)
&
buffer_
,
total_byte_size_
,
&
memory_type
,
true
);
buffer_attributes_
.
SetMemoryType
(
memory_type
);
if
(
!
status
.
IsOk
())
{
LOG_ERROR
<<
status
.
Message
();
buffer_
=
nullptr
;
}
break
;
}
}
}
total_byte_size_
=
(
buffer_
==
nullptr
)
?
0
:
total_byte_size_
;
}
AllocatedMemory
::~
AllocatedMemory
()
{
if
(
buffer_
!=
nullptr
)
{
switch
(
buffer_attributes_
.
MemoryType
())
{
case
TRITONSERVER_MEMORY_GPU
:
{
#ifdef TRITON_ENABLE_GPU
auto
status
=
CudaMemoryManager
::
Free
(
buffer_
,
buffer_attributes_
.
MemoryTypeId
());
if
(
!
status
.
IsOk
())
{
LOG_ERROR
<<
status
.
Message
();
}
#endif // TRITON_ENABLE_GPU
break
;
}
default:
{
auto
status
=
PinnedMemoryManager
::
Free
(
buffer_
);
if
(
!
status
.
IsOk
())
{
LOG_ERROR
<<
status
.
Message
();
buffer_
=
nullptr
;
}
break
;
}
}
buffer_
=
nullptr
;
}
}
}}
// namespace triton::core
3rdparty/core-r22.12/src/memory.h
0 → 100644
View file @
0a21fff9
// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <vector>
#include "buffer_attributes.h"
#include "constants.h"
#include "status.h"
namespace
triton
{
namespace
core
{
//
// Memory used to access data in inference requests
//
class
Memory
{
public:
// Get the 'idx'-th data block in the buffer. Using index to avoid
// maintaining internal state such that one buffer can be shared
// across multiple providers.
// 'idx' zero base index. Valid indices are continuous.
// 'byte_size' returns the byte size of the chunk of bytes.
// 'memory_type' returns the memory type of the chunk of bytes.
// 'memory_type_id' returns the memory type id of the chunk of bytes.
// Return the pointer to the data block. Returns nullptr if 'idx' is
// out of range
virtual
const
char
*
BufferAt
(
size_t
idx
,
size_t
*
byte_size
,
TRITONSERVER_MemoryType
*
memory_type
,
int64_t
*
memory_type_id
)
const
=
0
;
// Similar to the above BufferAt but with BufferAttributes.
virtual
const
char
*
BufferAt
(
size_t
idx
,
BufferAttributes
**
buffer_attributes
)
=
0
;
// Get the number of contiguous buffers composing the memory.
size_t
BufferCount
()
const
{
return
buffer_count_
;
}
// Return the total byte size of the data buffer
size_t
TotalByteSize
()
const
{
return
total_byte_size_
;
}
protected:
Memory
()
:
total_byte_size_
(
0
),
buffer_count_
(
0
)
{}
size_t
total_byte_size_
;
size_t
buffer_count_
;
};
//
// MemoryReference
//
class
MemoryReference
:
public
Memory
{
public:
// Create a read-only data buffer as a reference to other data buffer
MemoryReference
();
//\see Memory::BufferAt()
const
char
*
BufferAt
(
size_t
idx
,
size_t
*
byte_size
,
TRITONSERVER_MemoryType
*
memory_type
,
int64_t
*
memory_type_id
)
const
override
;
const
char
*
BufferAt
(
size_t
idx
,
BufferAttributes
**
buffer_attributes
)
override
;
// Add a 'buffer' with 'byte_size' as part of this data buffer
// Return the index of the buffer
size_t
AddBuffer
(
const
char
*
buffer
,
size_t
byte_size
,
TRITONSERVER_MemoryType
memory_type
,
int64_t
memory_type_id
);
size_t
AddBuffer
(
const
char
*
buffer
,
BufferAttributes
*
buffer_attributes
);
// Add a 'buffer' with 'byte_size' as part of this data buffer in the front
// Return the index of the buffer
size_t
AddBufferFront
(
const
char
*
buffer
,
size_t
byte_size
,
TRITONSERVER_MemoryType
memory_type
,
int64_t
memory_type_id
);
private:
struct
Block
{
Block
(
const
char
*
buffer
,
size_t
byte_size
,
TRITONSERVER_MemoryType
memory_type
,
int64_t
memory_type_id
)
:
buffer_
(
buffer
),
buffer_attributes_
(
BufferAttributes
(
byte_size
,
memory_type
,
memory_type_id
,
nullptr
))
{
}
Block
(
const
char
*
buffer
,
BufferAttributes
*
buffer_attributes
)
:
buffer_
(
buffer
),
buffer_attributes_
(
*
buffer_attributes
)
{
}
const
char
*
buffer_
;
BufferAttributes
buffer_attributes_
;
};
std
::
vector
<
Block
>
buffer_
;
};
//
// MutableMemory
//
class
MutableMemory
:
public
Memory
{
public:
// Create a mutable data buffer referencing to other data buffer.
MutableMemory
(
char
*
buffer
,
size_t
byte_size
,
TRITONSERVER_MemoryType
memory_type
,
int64_t
memory_type_id
);
virtual
~
MutableMemory
()
{}
//\see Memory::BufferAt()
const
char
*
BufferAt
(
size_t
idx
,
size_t
*
byte_size
,
TRITONSERVER_MemoryType
*
memory_type
,
int64_t
*
memory_type_id
)
const
override
;
//\see Memory::BufferAt()
const
char
*
BufferAt
(
size_t
idx
,
BufferAttributes
**
buffer_attributes
)
override
;
// Return a pointer to the base address of the mutable buffer. If
// non-null 'memory_type' returns the memory type of the chunk of
// bytes. If non-null 'memory_type_id' returns the memory type id of
// the chunk of bytes.
char
*
MutableBuffer
(
TRITONSERVER_MemoryType
*
memory_type
=
nullptr
,
int64_t
*
memory_type_id
=
nullptr
);
DISALLOW_COPY_AND_ASSIGN
(
MutableMemory
);
protected:
MutableMemory
()
:
Memory
()
{}
char
*
buffer_
;
BufferAttributes
buffer_attributes_
;
};
//
// AllocatedMemory
//
class
AllocatedMemory
:
public
MutableMemory
{
public:
// Create a continuous data buffer with 'byte_size', 'memory_type' and
// 'memory_type_id'. Note that the buffer may be created on different memeory
// type and memory type id if the original request type and id can not be
// satisfied, thus the function caller should always check the actual memory
// type and memory type id before use.
AllocatedMemory
(
size_t
byte_size
,
TRITONSERVER_MemoryType
memory_type
,
int64_t
memory_type_id
);
~
AllocatedMemory
()
override
;
};
}}
// namespace triton::core
3rdparty/core-r22.12/src/metric_family.cc
0 → 100644
View file @
0a21fff9
// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifdef TRITON_ENABLE_METRICS
#include "metric_family.h"
#include "metrics.h"
#include "triton/common/logging.h"
namespace
triton
{
namespace
core
{
//
// Implementation for TRITONSERVER_MetricFamily.
//
MetricFamily
::
MetricFamily
(
TRITONSERVER_MetricKind
kind
,
const
char
*
name
,
const
char
*
description
)
{
auto
registry
=
Metrics
::
GetRegistry
();
switch
(
kind
)
{
case
TRITONSERVER_METRIC_KIND_COUNTER
:
family_
=
reinterpret_cast
<
void
*>
(
&
prometheus
::
BuildCounter
()
.
Name
(
name
)
.
Help
(
description
)
.
Register
(
*
registry
));
break
;
case
TRITONSERVER_METRIC_KIND_GAUGE
:
family_
=
reinterpret_cast
<
void
*>
(
&
prometheus
::
BuildGauge
()
.
Name
(
name
)
.
Help
(
description
)
.
Register
(
*
registry
));
break
;
default:
throw
std
::
invalid_argument
(
"Unsupported kind passed to MetricFamily constructor."
);
}
kind_
=
kind
;
}
void
*
MetricFamily
::
Add
(
std
::
map
<
std
::
string
,
std
::
string
>
label_map
,
Metric
*
metric
)
{
void
*
prom_metric
=
nullptr
;
switch
(
kind_
)
{
case
TRITONSERVER_METRIC_KIND_COUNTER
:
{
auto
counter_family_ptr
=
reinterpret_cast
<
prometheus
::
Family
<
prometheus
::
Counter
>*>
(
family_
);
auto
counter_ptr
=
&
counter_family_ptr
->
Add
(
label_map
);
prom_metric
=
reinterpret_cast
<
void
*>
(
counter_ptr
);
break
;
}
case
TRITONSERVER_METRIC_KIND_GAUGE
:
{
auto
gauge_family_ptr
=
reinterpret_cast
<
prometheus
::
Family
<
prometheus
::
Gauge
>*>
(
family_
);
auto
gauge_ptr
=
&
gauge_family_ptr
->
Add
(
label_map
);
prom_metric
=
reinterpret_cast
<
void
*>
(
gauge_ptr
);
break
;
}
default:
throw
std
::
invalid_argument
(
"Unsupported family kind passed to Metric constructor."
);
}
std
::
lock_guard
<
std
::
mutex
>
lk
(
metric_mtx_
);
++
prom_metric_ref_cnt_
[
prom_metric
];
child_metrics_
.
insert
(
metric
);
return
prom_metric
;
}
void
MetricFamily
::
Remove
(
void
*
prom_metric
,
Metric
*
metric
)
{
{
// Remove reference to dependent Metric object
std
::
lock_guard
<
std
::
mutex
>
lk
(
metric_mtx_
);
child_metrics_
.
erase
(
metric
);
}
if
(
prom_metric
==
nullptr
)
{
return
;
}
{
std
::
lock_guard
<
std
::
mutex
>
lk
(
metric_mtx_
);
const
auto
it
=
prom_metric_ref_cnt_
.
find
(
prom_metric
);
if
(
it
!=
prom_metric_ref_cnt_
.
end
())
{
--
it
->
second
;
if
(
it
->
second
==
0
)
{
prom_metric_ref_cnt_
.
erase
(
it
);
}
else
{
// Done as it is not the last reference
return
;
}
}
}
switch
(
kind_
)
{
case
TRITONSERVER_METRIC_KIND_COUNTER
:
{
auto
counter_family_ptr
=
reinterpret_cast
<
prometheus
::
Family
<
prometheus
::
Counter
>*>
(
family_
);
auto
counter_ptr
=
reinterpret_cast
<
prometheus
::
Counter
*>
(
prom_metric
);
counter_family_ptr
->
Remove
(
counter_ptr
);
break
;
}
case
TRITONSERVER_METRIC_KIND_GAUGE
:
{
auto
gauge_family_ptr
=
reinterpret_cast
<
prometheus
::
Family
<
prometheus
::
Gauge
>*>
(
family_
);
auto
gauge_ptr
=
reinterpret_cast
<
prometheus
::
Gauge
*>
(
prom_metric
);
gauge_family_ptr
->
Remove
(
gauge_ptr
);
break
;
}
default:
// Invalid kind should be caught in constructor
LOG_ERROR
<<
"Unsupported kind in Metric destructor."
;
break
;
}
}
void
MetricFamily
::
InvalidateReferences
()
{
std
::
lock_guard
<
std
::
mutex
>
lk
(
metric_mtx_
);
for
(
auto
&
metric
:
child_metrics_
)
{
if
(
metric
!=
nullptr
)
{
metric
->
Invalidate
();
}
}
child_metrics_
.
clear
();
}
MetricFamily
::~
MetricFamily
()
{
if
(
NumMetrics
()
>
0
)
{
LOG_WARNING
<<
"MetricFamily was deleted before its child Metrics, this "
"should not happen. Make sure to delete all child Metrics "
"before deleting their MetricFamily."
;
}
InvalidateReferences
();
// DLIS-4072: Support for removing metric families from registry
}
//
// Implementation for TRITONSERVER_Metric.
//
Metric
::
Metric
(
TRITONSERVER_MetricFamily
*
family
,
std
::
vector
<
const
InferenceParameter
*>
labels
)
{
family_
=
reinterpret_cast
<
MetricFamily
*>
(
family
);
kind_
=
family_
->
Kind
();
// Create map of labels from InferenceParameters
std
::
map
<
std
::
string
,
std
::
string
>
label_map
;
for
(
const
auto
&
param
:
labels
)
{
if
(
param
->
Type
()
!=
TRITONSERVER_PARAMETER_STRING
)
{
throw
std
::
invalid_argument
(
"Parameter ["
+
param
->
Name
()
+
"] must have a type of TRITONSERVER_PARAMETER_STRING to be "
"added as a label."
);
}
label_map
[
param
->
Name
()]
=
std
::
string
(
reinterpret_cast
<
const
char
*>
(
param
->
ValuePointer
()));
}
metric_
=
family_
->
Add
(
label_map
,
this
);
}
Metric
::~
Metric
()
{
if
(
family_
!=
nullptr
)
{
family_
->
Remove
(
metric_
,
this
);
}
else
{
LOG_WARNING
<<
"Corresponding MetricFamily was deleted before this Metric, "
"this should not happen. Make sure to delete a Metric "
"before deleting its MetricFamily."
;
}
// Catch lifetime management / invalid reference issues
Invalidate
();
}
void
Metric
::
Invalidate
()
{
family_
=
nullptr
;
metric_
=
nullptr
;
}
TRITONSERVER_Error
*
Metric
::
Value
(
double
*
value
)
{
if
(
metric_
==
nullptr
)
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INTERNAL
,
"Could not get metric value. Metric has been invalidated."
);
}
switch
(
kind_
)
{
case
TRITONSERVER_METRIC_KIND_COUNTER
:
{
auto
counter_ptr
=
reinterpret_cast
<
prometheus
::
Counter
*>
(
metric_
);
LOG_VERBOSE
(
1
)
<<
"SETTING COUNTER METRIC FROM: "
<<
*
value
<<
" to "
<<
counter_ptr
->
Value
();
*
value
=
counter_ptr
->
Value
();
break
;
}
case
TRITONSERVER_METRIC_KIND_GAUGE
:
{
auto
gauge_ptr
=
reinterpret_cast
<
prometheus
::
Gauge
*>
(
metric_
);
LOG_VERBOSE
(
1
)
<<
"SETTING GAUGE METRIC FROM: "
<<
*
value
<<
" to "
<<
gauge_ptr
->
Value
();
*
value
=
gauge_ptr
->
Value
();
break
;
}
default:
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_UNSUPPORTED
,
"Unsupported TRITONSERVER_MetricKind"
);
}
return
nullptr
;
// Success
}
TRITONSERVER_Error
*
Metric
::
Increment
(
double
value
)
{
if
(
metric_
==
nullptr
)
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INTERNAL
,
"Could not increment metric value. Metric has been invalidated."
);
}
switch
(
kind_
)
{
case
TRITONSERVER_METRIC_KIND_COUNTER
:
{
if
(
value
<
0.0
)
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INVALID_ARG
,
"TRITONSERVER_METRIC_KIND_COUNTER can only be incremented "
"monotonically by non-negative values."
);
}
auto
counter_ptr
=
reinterpret_cast
<
prometheus
::
Counter
*>
(
metric_
);
counter_ptr
->
Increment
(
value
);
break
;
}
case
TRITONSERVER_METRIC_KIND_GAUGE
:
{
auto
gauge_ptr
=
reinterpret_cast
<
prometheus
::
Gauge
*>
(
metric_
);
// Gauge::Increment works for both positive and negative values as of
// prometheus-cpp v1.0 but for now on v0.7 we defer call to
// Increment/Decrement based on the sign of value
// https://github.com/jupp0r/prometheus-cpp/blob/master/core/src/gauge.cc
if
(
value
<
0.0
)
{
gauge_ptr
->
Decrement
(
-
1.0
*
value
);
}
else
{
gauge_ptr
->
Increment
(
value
);
}
break
;
}
default:
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_UNSUPPORTED
,
"Unsupported TRITONSERVER_MetricKind"
);
}
return
nullptr
;
// Success
}
TRITONSERVER_Error
*
Metric
::
Set
(
double
value
)
{
if
(
metric_
==
nullptr
)
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INTERNAL
,
"Could not set metric value. Metric has been invalidated."
);
}
switch
(
kind_
)
{
case
TRITONSERVER_METRIC_KIND_COUNTER
:
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_UNSUPPORTED
,
"TRITONSERVER_METRIC_KIND_COUNTER does not support Set"
);
}
case
TRITONSERVER_METRIC_KIND_GAUGE
:
{
auto
gauge_ptr
=
reinterpret_cast
<
prometheus
::
Gauge
*>
(
metric_
);
gauge_ptr
->
Set
(
value
);
break
;
}
default:
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_UNSUPPORTED
,
"Unsupported TRITONSERVER_MetricKind"
);
}
return
nullptr
;
// Success
}
}}
// namespace triton::core
#endif // TRITON_ENABLE_METRICS
3rdparty/core-r22.12/src/metric_family.h
0 → 100644
View file @
0a21fff9
// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#ifdef TRITON_ENABLE_METRICS
#include <mutex>
#include <set>
#include <unordered_map>
#include "infer_parameter.h"
#include "prometheus/registry.h"
#include "tritonserver_apis.h"
namespace
triton
{
namespace
core
{
//
// Implementation for TRITONSERVER_MetricFamily.
//
class
Metric
;
class
MetricFamily
{
public:
MetricFamily
(
TRITONSERVER_MetricKind
kind
,
const
char
*
name
,
const
char
*
description
);
~
MetricFamily
();
void
*
Family
()
const
{
return
family_
;
}
TRITONSERVER_MetricKind
Kind
()
const
{
return
kind_
;
}
void
*
Add
(
std
::
map
<
std
::
string
,
std
::
string
>
label_map
,
Metric
*
metric
);
void
Remove
(
void
*
prom_metric
,
Metric
*
metric
);
int
NumMetrics
()
{
std
::
lock_guard
<
std
::
mutex
>
lk
(
metric_mtx_
);
return
child_metrics_
.
size
();
}
private:
// If a MetricFamily is deleted before its dependent Metric, we want to
// invalidate the reference so we don't access invalid memory.
void
InvalidateReferences
();
void
*
family_
;
TRITONSERVER_MetricKind
kind_
;
// Synchronize access of related metric objects
std
::
mutex
metric_mtx_
;
// Prometheus returns the existing metric pointer if the metric with the same
// set of labels are requested, as a result, different Metric objects may
// refer to the same prometheus metric. So we must track the reference count
// of the metric and request prometheus to remove it only when all references
// are released.
std
::
unordered_map
<
void
*
,
size_t
>
prom_metric_ref_cnt_
;
// Maintain references to metrics created from this metric family to
// invalidate their references if a family is deleted before its metric
std
::
set
<
Metric
*>
child_metrics_
;
};
//
// Implementation for TRITONSERVER_Metric.
//
class
Metric
{
public:
Metric
(
TRITONSERVER_MetricFamily
*
family
,
std
::
vector
<
const
InferenceParameter
*>
labels
);
~
Metric
();
MetricFamily
*
Family
()
const
{
return
family_
;
}
TRITONSERVER_MetricKind
Kind
()
const
{
return
kind_
;
}
TRITONSERVER_Error
*
Value
(
double
*
value
);
TRITONSERVER_Error
*
Increment
(
double
value
);
TRITONSERVER_Error
*
Set
(
double
value
);
// If a MetricFamily is deleted before its dependent Metric, we want to
// invalidate the references so we don't access invalid memory.
void
Invalidate
();
private:
void
*
metric_
;
MetricFamily
*
family_
;
TRITONSERVER_MetricKind
kind_
;
};
}}
// namespace triton::core
#endif // TRITON_ENABLE_METRICS
3rdparty/core-r22.12/src/metric_model_reporter.cc
0 → 100644
View file @
0a21fff9
// Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "metric_model_reporter.h"
#ifdef TRITON_ENABLE_METRICS
#include "constants.h"
#include "metrics.h"
namespace
triton
{
namespace
core
{
Status
MetricModelReporter
::
Create
(
const
std
::
string
&
model_name
,
const
int64_t
model_version
,
const
int
device
,
const
triton
::
common
::
MetricTagsMap
&
model_tags
,
std
::
shared_ptr
<
MetricModelReporter
>*
metric_model_reporter
)
{
static
std
::
mutex
mtx
;
static
std
::
unordered_map
<
size_t
,
std
::
weak_ptr
<
MetricModelReporter
>>
reporter_map
;
std
::
map
<
std
::
string
,
std
::
string
>
labels
;
GetMetricLabels
(
&
labels
,
model_name
,
model_version
,
device
,
model_tags
);
auto
hash_labels
=
Metrics
::
HashLabels
(
labels
);
std
::
lock_guard
<
std
::
mutex
>
lock
(
mtx
);
const
auto
&
itr
=
reporter_map
.
find
(
hash_labels
);
if
(
itr
!=
reporter_map
.
end
())
{
// Found in map. If the weak_ptr is still valid that means that
// there are other models using the reporter and we just reuse that
// same reporter. If the weak_ptr is not valid then we need to remove
// the weak_ptr from the map and create the reporter again.
*
metric_model_reporter
=
itr
->
second
.
lock
();
if
(
*
metric_model_reporter
!=
nullptr
)
{
return
Status
::
Success
;
}
reporter_map
.
erase
(
itr
);
}
metric_model_reporter
->
reset
(
new
MetricModelReporter
(
model_name
,
model_version
,
device
,
model_tags
));
reporter_map
.
insert
({
hash_labels
,
*
metric_model_reporter
});
return
Status
::
Success
;
}
MetricModelReporter
::
MetricModelReporter
(
const
std
::
string
&
model_name
,
const
int64_t
model_version
,
const
int
device
,
const
triton
::
common
::
MetricTagsMap
&
model_tags
)
{
std
::
map
<
std
::
string
,
std
::
string
>
labels
;
GetMetricLabels
(
&
labels
,
model_name
,
model_version
,
device
,
model_tags
);
metric_inf_success_
=
CreateCounterMetric
(
Metrics
::
FamilyInferenceSuccess
(),
labels
);
metric_inf_failure_
=
CreateCounterMetric
(
Metrics
::
FamilyInferenceFailure
(),
labels
);
metric_inf_count_
=
CreateCounterMetric
(
Metrics
::
FamilyInferenceCount
(),
labels
);
metric_inf_exec_count_
=
CreateCounterMetric
(
Metrics
::
FamilyInferenceExecutionCount
(),
labels
);
metric_inf_request_duration_us_
=
CreateCounterMetric
(
Metrics
::
FamilyInferenceRequestDuration
(),
labels
);
metric_inf_queue_duration_us_
=
CreateCounterMetric
(
Metrics
::
FamilyInferenceQueueDuration
(),
labels
);
metric_inf_compute_input_duration_us_
=
CreateCounterMetric
(
Metrics
::
FamilyInferenceComputeInputDuration
(),
labels
);
metric_inf_compute_infer_duration_us_
=
CreateCounterMetric
(
Metrics
::
FamilyInferenceComputeInferDuration
(),
labels
);
metric_inf_compute_output_duration_us_
=
CreateCounterMetric
(
Metrics
::
FamilyInferenceComputeOutputDuration
(),
labels
);
metric_cache_hit_count_
=
CreateCounterMetric
(
Metrics
::
FamilyCacheHitCount
(),
labels
);
metric_cache_hit_lookup_duration_us_
=
CreateCounterMetric
(
Metrics
::
FamilyCacheHitLookupDuration
(),
labels
);
metric_cache_miss_count_
=
CreateCounterMetric
(
Metrics
::
FamilyCacheMissCount
(),
labels
);
metric_cache_miss_lookup_duration_us_
=
CreateCounterMetric
(
Metrics
::
FamilyCacheMissLookupDuration
(),
labels
);
metric_cache_miss_insertion_duration_us_
=
CreateCounterMetric
(
Metrics
::
FamilyCacheMissInsertionDuration
(),
labels
);
}
MetricModelReporter
::~
MetricModelReporter
()
{
Metrics
::
FamilyInferenceSuccess
().
Remove
(
metric_inf_success_
);
Metrics
::
FamilyInferenceFailure
().
Remove
(
metric_inf_failure_
);
Metrics
::
FamilyInferenceCount
().
Remove
(
metric_inf_count_
);
Metrics
::
FamilyInferenceExecutionCount
().
Remove
(
metric_inf_exec_count_
);
Metrics
::
FamilyInferenceRequestDuration
().
Remove
(
metric_inf_request_duration_us_
);
Metrics
::
FamilyInferenceQueueDuration
().
Remove
(
metric_inf_queue_duration_us_
);
Metrics
::
FamilyInferenceComputeInputDuration
().
Remove
(
metric_inf_compute_input_duration_us_
);
Metrics
::
FamilyInferenceComputeInferDuration
().
Remove
(
metric_inf_compute_infer_duration_us_
);
Metrics
::
FamilyInferenceComputeOutputDuration
().
Remove
(
metric_inf_compute_output_duration_us_
);
Metrics
::
FamilyCacheHitCount
().
Remove
(
metric_cache_hit_count_
);
Metrics
::
FamilyCacheHitLookupDuration
().
Remove
(
metric_cache_hit_lookup_duration_us_
);
Metrics
::
FamilyCacheMissCount
().
Remove
(
metric_cache_miss_count_
);
Metrics
::
FamilyCacheMissInsertionDuration
().
Remove
(
metric_cache_miss_insertion_duration_us_
);
}
void
MetricModelReporter
::
GetMetricLabels
(
std
::
map
<
std
::
string
,
std
::
string
>*
labels
,
const
std
::
string
&
model_name
,
const
int64_t
model_version
,
const
int
device
,
const
triton
::
common
::
MetricTagsMap
&
model_tags
)
{
labels
->
insert
(
std
::
map
<
std
::
string
,
std
::
string
>::
value_type
(
std
::
string
(
kMetricsLabelModelName
),
model_name
));
labels
->
insert
(
std
::
map
<
std
::
string
,
std
::
string
>::
value_type
(
std
::
string
(
kMetricsLabelModelVersion
),
std
::
to_string
(
model_version
)));
for
(
const
auto
&
tag
:
model_tags
)
{
labels
->
insert
(
std
::
map
<
std
::
string
,
std
::
string
>::
value_type
(
"_"
+
tag
.
first
,
tag
.
second
));
}
// 'device' can be < 0 to indicate that the GPU is not known. In
// that case use a metric that doesn't have the gpu_uuid label.
if
(
device
>=
0
)
{
std
::
string
uuid
;
if
(
Metrics
::
UUIDForCudaDevice
(
device
,
&
uuid
))
{
labels
->
insert
(
std
::
map
<
std
::
string
,
std
::
string
>::
value_type
(
std
::
string
(
kMetricsLabelGpuUuid
),
uuid
));
}
}
}
prometheus
::
Counter
*
MetricModelReporter
::
CreateCounterMetric
(
prometheus
::
Family
<
prometheus
::
Counter
>&
family
,
const
std
::
map
<
std
::
string
,
std
::
string
>&
labels
)
{
return
&
family
.
Add
(
labels
);
}
}}
// namespace triton::core
#endif // TRITON_ENABLE_METRICS
3rdparty/core-r22.12/src/metric_model_reporter.h
0 → 100644
View file @
0a21fff9
// Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include "status.h"
#include "triton/common/model_config.h"
#ifdef TRITON_ENABLE_METRICS
#include "prometheus/registry.h"
#endif // TRITON_ENABLE_METRICS
namespace
triton
{
namespace
core
{
//
// Interface for a metric reporter for a given version of a model.
//
class
MetricModelReporter
{
public:
#ifdef TRITON_ENABLE_METRICS
static
Status
Create
(
const
std
::
string
&
model_name
,
const
int64_t
model_version
,
const
int
device
,
const
triton
::
common
::
MetricTagsMap
&
model_tags
,
std
::
shared_ptr
<
MetricModelReporter
>*
metric_model_reporter
);
~
MetricModelReporter
();
// Get a metric for the given model, version and GPU index.
prometheus
::
Counter
&
MetricInferenceSuccess
()
const
{
return
*
metric_inf_success_
;
}
prometheus
::
Counter
&
MetricInferenceFailure
()
const
{
return
*
metric_inf_failure_
;
}
prometheus
::
Counter
&
MetricInferenceCount
()
const
{
return
*
metric_inf_count_
;
}
prometheus
::
Counter
&
MetricInferenceExecutionCount
()
const
{
return
*
metric_inf_exec_count_
;
}
prometheus
::
Counter
&
MetricInferenceRequestDuration
()
const
{
return
*
metric_inf_request_duration_us_
;
}
prometheus
::
Counter
&
MetricInferenceQueueDuration
()
const
{
return
*
metric_inf_queue_duration_us_
;
}
prometheus
::
Counter
&
MetricInferenceComputeInputDuration
()
const
{
return
*
metric_inf_compute_input_duration_us_
;
}
prometheus
::
Counter
&
MetricInferenceComputeInferDuration
()
const
{
return
*
metric_inf_compute_infer_duration_us_
;
}
prometheus
::
Counter
&
MetricInferenceComputeOutputDuration
()
const
{
return
*
metric_inf_compute_output_duration_us_
;
}
prometheus
::
Counter
&
MetricCacheHitCount
()
const
{
return
*
metric_cache_hit_count_
;
}
prometheus
::
Counter
&
MetricCacheHitLookupDuration
()
const
{
return
*
metric_cache_hit_lookup_duration_us_
;
}
prometheus
::
Counter
&
MetricCacheMissCount
()
const
{
return
*
metric_cache_miss_count_
;
}
prometheus
::
Counter
&
MetricCacheMissLookupDuration
()
const
{
return
*
metric_cache_miss_lookup_duration_us_
;
}
prometheus
::
Counter
&
MetricCacheMissInsertionDuration
()
const
{
return
*
metric_cache_miss_insertion_duration_us_
;
}
private:
MetricModelReporter
(
const
std
::
string
&
model_name
,
const
int64_t
model_version
,
const
int
device
,
const
triton
::
common
::
MetricTagsMap
&
model_tags
);
static
void
GetMetricLabels
(
std
::
map
<
std
::
string
,
std
::
string
>*
labels
,
const
std
::
string
&
model_name
,
const
int64_t
model_version
,
const
int
device
,
const
triton
::
common
::
MetricTagsMap
&
model_tags
);
prometheus
::
Counter
*
CreateCounterMetric
(
prometheus
::
Family
<
prometheus
::
Counter
>&
family
,
const
std
::
map
<
std
::
string
,
std
::
string
>&
labels
);
prometheus
::
Counter
*
metric_inf_success_
;
prometheus
::
Counter
*
metric_inf_failure_
;
prometheus
::
Counter
*
metric_inf_count_
;
prometheus
::
Counter
*
metric_inf_exec_count_
;
prometheus
::
Counter
*
metric_inf_request_duration_us_
;
prometheus
::
Counter
*
metric_inf_queue_duration_us_
;
prometheus
::
Counter
*
metric_inf_compute_input_duration_us_
;
prometheus
::
Counter
*
metric_inf_compute_infer_duration_us_
;
prometheus
::
Counter
*
metric_inf_compute_output_duration_us_
;
prometheus
::
Counter
*
metric_cache_hit_count_
;
prometheus
::
Counter
*
metric_cache_hit_lookup_duration_us_
;
prometheus
::
Counter
*
metric_cache_miss_count_
;
prometheus
::
Counter
*
metric_cache_miss_lookup_duration_us_
;
prometheus
::
Counter
*
metric_cache_miss_insertion_duration_us_
;
#endif // TRITON_ENABLE_METRICS
};
}}
// namespace triton::core
3rdparty/core-r22.12/src/metrics.cc
0 → 100644
View file @
0a21fff9
// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
#ifdef TRITON_ENABLE_METRICS
#include "metrics.h"
#include <thread>
#include "constants.h"
#include "prometheus/detail/utils.h"
#include "triton/common/logging.h"
#ifdef TRITON_ENABLE_METRICS_GPU
#include <cuda_runtime_api.h>
#include <dcgm_agent.h>
#include <cstring>
#include <set>
#include <string>
#endif // TRITON_ENABLE_METRICS_GPU
namespace
triton
{
namespace
core
{
Metrics
::
Metrics
()
:
registry_
(
std
::
make_shared
<
prometheus
::
Registry
>
()),
serializer_
(
new
prometheus
::
TextSerializer
()),
inf_success_family_
(
prometheus
::
BuildCounter
()
.
Name
(
"nv_inference_request_success"
)
.
Help
(
"Number of successful inference requests, all batch sizes"
)
.
Register
(
*
registry_
)),
inf_failure_family_
(
prometheus
::
BuildCounter
()
.
Name
(
"nv_inference_request_failure"
)
.
Help
(
"Number of failed inference requests, all batch sizes"
)
.
Register
(
*
registry_
)),
inf_count_family_
(
prometheus
::
BuildCounter
()
.
Name
(
"nv_inference_count"
)
.
Help
(
"Number of inferences performed (does not "
"include cached requests)"
)
.
Register
(
*
registry_
)),
inf_count_exec_family_
(
prometheus
::
BuildCounter
()
.
Name
(
"nv_inference_exec_count"
)
.
Help
(
"Number of model executions performed "
"(does not include cached requests)"
)
.
Register
(
*
registry_
)),
inf_request_duration_us_family_
(
prometheus
::
BuildCounter
()
.
Name
(
"nv_inference_request_duration_us"
)
.
Help
(
"Cumulative inference request duration in microseconds "
"(includes cached requests)"
)
.
Register
(
*
registry_
)),
inf_queue_duration_us_family_
(
prometheus
::
BuildCounter
()
.
Name
(
"nv_inference_queue_duration_us"
)
.
Help
(
"Cumulative inference queuing duration in microseconds "
"(includes cached requests)"
)
.
Register
(
*
registry_
)),
inf_compute_input_duration_us_family_
(
prometheus
::
BuildCounter
()
.
Name
(
"nv_inference_compute_input_duration_us"
)
.
Help
(
"Cumulative compute input duration in microseconds (does "
"not include cached requests)"
)
.
Register
(
*
registry_
)),
inf_compute_infer_duration_us_family_
(
prometheus
::
BuildCounter
()
.
Name
(
"nv_inference_compute_infer_duration_us"
)
.
Help
(
"Cumulative compute inference duration in microseconds "
"(does not include cached requests)"
)
.
Register
(
*
registry_
)),
inf_compute_output_duration_us_family_
(
prometheus
::
BuildCounter
()
.
Name
(
"nv_inference_compute_output_duration_us"
)
.
Help
(
"Cumulative inference compute output duration in "
"microseconds (does not include cached requests)"
)
.
Register
(
*
registry_
)),
cache_num_entries_family_
(
prometheus
::
BuildGauge
()
.
Name
(
"nv_cache_num_entries"
)
.
Help
(
"Number of responses stored in response cache"
)
.
Register
(
*
registry_
)),
cache_num_lookups_family_
(
prometheus
::
BuildGauge
()
.
Name
(
"nv_cache_num_lookups"
)
.
Help
(
"Number of cache lookups in response cache"
)
.
Register
(
*
registry_
)),
cache_num_hits_family_
(
prometheus
::
BuildGauge
()
.
Name
(
"nv_cache_num_hits"
)
.
Help
(
"Number of cache hits in response cache"
)
.
Register
(
*
registry_
)),
cache_num_misses_family_
(
prometheus
::
BuildGauge
()
.
Name
(
"nv_cache_num_misses"
)
.
Help
(
"Number of cache misses in response cache"
)
.
Register
(
*
registry_
)),
cache_num_evictions_family_
(
prometheus
::
BuildGauge
()
.
Name
(
"nv_cache_num_evictions"
)
.
Help
(
"Number of cache evictions in response cache"
)
.
Register
(
*
registry_
)),
cache_lookup_duration_us_family_
(
prometheus
::
BuildGauge
()
.
Name
(
"nv_cache_lookup_duration"
)
.
Help
(
"Total cache lookup duration (hit and miss), in microseconds"
)
.
Register
(
*
registry_
)),
cache_insertion_duration_us_family_
(
prometheus
::
BuildGauge
()
.
Name
(
"nv_cache_insertion_duration"
)
.
Help
(
"Total cache insertion duration, in microseconds"
)
.
Register
(
*
registry_
)),
cache_util_family_
(
prometheus
::
BuildGauge
()
.
Name
(
"nv_cache_util"
)
.
Help
(
"Cache utilization [0.0 - 1.0]"
)
.
Register
(
*
registry_
)),
// Per-model cache metric families
cache_num_hits_model_family_
(
prometheus
::
BuildCounter
()
.
Name
(
"nv_cache_num_hits_per_model"
)
.
Help
(
"Number of cache hits per model"
)
.
Register
(
*
registry_
)),
cache_hit_lookup_duration_us_model_family_
(
prometheus
::
BuildCounter
()
.
Name
(
"nv_cache_hit_lookup_duration_per_model"
)
.
Help
(
"Total cache hit lookup duration per model, in microseconds"
)
.
Register
(
*
registry_
)),
cache_num_misses_model_family_
(
prometheus
::
BuildCounter
()
.
Name
(
"nv_cache_num_misses_per_model"
)
.
Help
(
"Number of cache misses per model"
)
.
Register
(
*
registry_
)),
cache_miss_lookup_duration_us_model_family_
(
prometheus
::
BuildCounter
()
.
Name
(
"nv_cache_miss_lookup_duration_per_model"
)
.
Help
(
"Total cache miss lookup duration per model, in microseconds"
)
.
Register
(
*
registry_
)),
cache_miss_insertion_duration_us_model_family_
(
prometheus
::
BuildCounter
()
.
Name
(
"nv_cache_miss_insertion_duration_per_model"
)
.
Help
(
"Total cache miss insertion duration per model, in "
"microseconds"
)
.
Register
(
*
registry_
)),
#ifdef TRITON_ENABLE_METRICS_GPU
gpu_utilization_family_
(
prometheus
::
BuildGauge
()
.
Name
(
"nv_gpu_utilization"
)
.
Help
(
"GPU utilization rate [0.0 - 1.0)"
)
.
Register
(
*
registry_
)),
gpu_memory_total_family_
(
prometheus
::
BuildGauge
()
.
Name
(
"nv_gpu_memory_total_bytes"
)
.
Help
(
"GPU total memory, in bytes"
)
.
Register
(
*
registry_
)),
gpu_memory_used_family_
(
prometheus
::
BuildGauge
()
.
Name
(
"nv_gpu_memory_used_bytes"
)
.
Help
(
"GPU used memory, in bytes"
)
.
Register
(
*
registry_
)),
gpu_power_usage_family_
(
prometheus
::
BuildGauge
()
.
Name
(
"nv_gpu_power_usage"
)
.
Help
(
"GPU power usage in watts"
)
.
Register
(
*
registry_
)),
gpu_power_limit_family_
(
prometheus
::
BuildGauge
()
.
Name
(
"nv_gpu_power_limit"
)
.
Help
(
"GPU power management limit in watts"
)
.
Register
(
*
registry_
)),
gpu_energy_consumption_family_
(
prometheus
::
BuildCounter
()
.
Name
(
"nv_energy_consumption"
)
.
Help
(
"GPU energy consumption in joules since the Triton Server "
"started"
)
.
Register
(
*
registry_
)),
#endif // TRITON_ENABLE_METRICS_GPU
#ifdef TRITON_ENABLE_METRICS_CPU
cpu_utilization_family_
(
prometheus
::
BuildGauge
()
.
Name
(
"nv_cpu_utilization"
)
.
Help
(
"CPU utilization rate [0.0 - 1.0]"
)
.
Register
(
*
registry_
)),
cpu_memory_total_family_
(
prometheus
::
BuildGauge
()
.
Name
(
"nv_cpu_memory_total_bytes"
)
.
Help
(
"CPU total memory (RAM), in bytes"
)
.
Register
(
*
registry_
)),
cpu_memory_used_family_
(
prometheus
::
BuildGauge
()
.
Name
(
"nv_cpu_memory_used_bytes"
)
.
Help
(
"CPU used memory (RAM), in bytes"
)
.
Register
(
*
registry_
)),
#endif // TRITON_ENABLE_METRICS_CPU
metrics_enabled_
(
false
),
gpu_metrics_enabled_
(
false
),
cpu_metrics_enabled_
(
false
),
cache_metrics_enabled_
(
false
),
metrics_interval_ms_
(
2000
)
{
}
static
prometheus
::
detail
::
LabelHasher
label_hasher_
;
size_t
Metrics
::
HashLabels
(
const
std
::
map
<
std
::
string
,
std
::
string
>&
labels
)
{
return
label_hasher_
(
labels
);
}
Metrics
::~
Metrics
()
{
// Signal the cache thread to exit and then wait for it...
if
(
poll_thread_
!=
nullptr
)
{
poll_thread_exit_
.
store
(
true
);
poll_thread_
->
join
();
#ifdef TRITON_ENABLE_METRICS_GPU
if
(
dcgm_metadata_
.
dcgm_initialized_
)
{
dcgmReturn_t
derr
;
// Group destroy will return an error if groupId invalid or dcgm not
// initialized or configured correctly
derr
=
dcgmGroupDestroy
(
dcgm_metadata_
.
dcgm_handle_
,
dcgm_metadata_
.
groupId_
);
if
(
derr
!=
DCGM_ST_OK
)
{
LOG_WARNING
<<
"Unable to destroy DCGM group: "
<<
errorString
(
derr
);
}
// Stop and shutdown DCGM
if
(
dcgm_metadata_
.
standalone_
)
{
derr
=
dcgmDisconnect
(
dcgm_metadata_
.
dcgm_handle_
);
}
else
{
derr
=
dcgmStopEmbedded
(
dcgm_metadata_
.
dcgm_handle_
);
}
if
(
derr
!=
DCGM_ST_OK
)
{
LOG_WARNING
<<
"Unable to stop DCGM: "
<<
errorString
(
derr
);
}
derr
=
dcgmShutdown
();
if
(
derr
!=
DCGM_ST_OK
)
{
LOG_WARNING
<<
"Unable to shutdown DCGM: "
<<
errorString
(
derr
);
}
}
#endif // TRITON_ENABLE_METRICS_GPU
}
}
bool
Metrics
::
Enabled
()
{
auto
singleton
=
GetSingleton
();
return
singleton
->
metrics_enabled_
;
}
void
Metrics
::
EnableMetrics
()
{
auto
singleton
=
GetSingleton
();
singleton
->
metrics_enabled_
=
true
;
}
void
Metrics
::
EnableCacheMetrics
(
std
::
shared_ptr
<
RequestResponseCache
>
response_cache
)
{
auto
singleton
=
GetSingleton
();
// Ensure thread-safe enabling of Cache Metrics
std
::
lock_guard
<
std
::
mutex
>
lock
(
singleton
->
metrics_enabling_
);
if
(
singleton
->
cache_metrics_enabled_
)
{
return
;
}
singleton
->
InitializeCacheMetrics
(
response_cache
);
singleton
->
cache_metrics_enabled_
=
true
;
}
void
Metrics
::
EnableGPUMetrics
()
{
auto
singleton
=
GetSingleton
();
// Ensure thread-safe enabling of GPU Metrics
std
::
lock_guard
<
std
::
mutex
>
lock
(
singleton
->
metrics_enabling_
);
if
(
singleton
->
gpu_metrics_enabled_
)
{
return
;
}
if
(
std
::
getenv
(
"TRITON_SERVER_CPU_ONLY"
)
==
nullptr
)
{
singleton
->
InitializeDcgmMetrics
();
}
singleton
->
gpu_metrics_enabled_
=
true
;
}
void
Metrics
::
EnableCpuMetrics
()
{
auto
singleton
=
GetSingleton
();
// Ensure thread-safe enabling of CPU Metrics
std
::
lock_guard
<
std
::
mutex
>
lock
(
singleton
->
metrics_enabling_
);
if
(
singleton
->
cpu_metrics_enabled_
)
{
return
;
}
singleton
->
InitializeCpuMetrics
();
singleton
->
cpu_metrics_enabled_
=
true
;
}
void
Metrics
::
SetMetricsInterval
(
uint64_t
metrics_interval_ms
)
{
auto
singleton
=
GetSingleton
();
singleton
->
metrics_interval_ms_
=
metrics_interval_ms
;
}
void
Metrics
::
StartPollingThreadSingleton
(
std
::
shared_ptr
<
RequestResponseCache
>
response_cache
)
{
auto
singleton
=
GetSingleton
();
// Ensure thread-safe start of polling thread
std
::
lock_guard
<
std
::
mutex
>
lock
(
singleton
->
poll_thread_starting_
);
if
(
singleton
->
poll_thread_started_
)
{
return
;
}
// Start thread for polling cache/dcgm metrics
singleton
->
StartPollingThread
(
response_cache
);
// Toggle flag so this function is only executed once
singleton
->
poll_thread_started_
=
true
;
}
bool
Metrics
::
StartPollingThread
(
std
::
shared_ptr
<
RequestResponseCache
>
response_cache
)
{
// Nothing to poll if no polling metrics enabled, don't spawn a thread
if
(
!
cache_metrics_enabled_
&&
!
gpu_metrics_enabled_
&&
!
cpu_metrics_enabled_
)
{
LOG_WARNING
<<
"No polling metrics (CPU, GPU, Cache) are enabled. Will not "
"poll for them."
;
return
false
;
}
poll_thread_exit_
.
store
(
false
);
// Start a separate thread for polling metrics at specified interval
poll_thread_
.
reset
(
new
std
::
thread
([
this
,
response_cache
]
{
// Thread will update metrics indefinitely until exit flag set
while
(
!
poll_thread_exit_
.
load
())
{
// Sleep for metric interval
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
metrics_interval_ms_
/
2
));
// Poll Response Cache metrics
if
(
cache_metrics_enabled_
&&
response_cache
!=
nullptr
)
{
PollCacheMetrics
(
response_cache
);
}
#ifdef TRITON_ENABLE_METRICS_GPU
// Poll DCGM GPU metrics
if
(
gpu_metrics_enabled_
&&
dcgm_metadata_
.
available_cuda_gpu_ids_
.
size
()
>
0
)
{
PollDcgmMetrics
();
}
#endif // TRITON_ENABLE_METRICS_GPU
#ifdef TRITON_ENABLE_METRICS_CPU
if
(
cpu_metrics_enabled_
)
{
PollCpuMetrics
();
}
#endif // TRITON_ENABLE_METRICS_CPU
}
}));
return
true
;
}
bool
Metrics
::
PollCacheMetrics
(
std
::
shared_ptr
<
RequestResponseCache
>
response_cache
)
{
if
(
response_cache
==
nullptr
)
{
LOG_WARNING
<<
"error polling cache metrics, cache metrics will not be "
<<
"available: cache was nullptr"
;
return
false
;
}
// Update global cache metrics
cache_num_entries_global_
->
Set
(
response_cache
->
NumEntries
());
cache_num_lookups_global_
->
Set
(
response_cache
->
NumLookups
());
cache_num_hits_global_
->
Set
(
response_cache
->
NumHits
());
cache_num_misses_global_
->
Set
(
response_cache
->
NumMisses
());
cache_num_evictions_global_
->
Set
(
response_cache
->
NumEvictions
());
cache_lookup_duration_us_global_
->
Set
(
response_cache
->
TotalLookupLatencyNs
()
/
1000
);
cache_insertion_duration_us_global_
->
Set
(
response_cache
->
TotalInsertionLatencyNs
()
/
1000
);
cache_util_global_
->
Set
(
response_cache
->
TotalUtilization
());
return
true
;
}
#ifdef TRITON_ENABLE_METRICS_CPU
Status
Metrics
::
ParseCpuInfo
(
CpuInfo
&
info
)
{
#ifdef _WIN32
return
Status
(
Status
::
Code
::
INTERNAL
,
"CPU metrics not supported on Windows."
);
#else
std
::
ifstream
ifs
(
"/proc/stat"
);
if
(
!
ifs
.
good
())
{
return
Status
(
Status
::
Code
::
INTERNAL
,
"Failed to open /proc/stat."
);
}
std
::
string
line
;
// Verify first line is aggregate cpu line
std
::
getline
(
ifs
,
line
);
if
(
line
.
rfind
(
"cpu "
,
0
)
==
std
::
string
::
npos
)
{
return
Status
(
Status
::
Code
::
INTERNAL
,
"Failed to find aggregate CPU info in /proc/stat."
);
}
std
::
string
_
;
std
::
istringstream
iss
(
line
);
// Use _ to skip "cpu" at start of line
if
(
!
(
iss
>>
_
>>
info
))
{
return
Status
(
Status
::
Code
::
INTERNAL
,
"Failed to parse aggregate CPU info in /proc/stat."
);
}
return
Status
::
Success
;
#endif // OS
}
Status
Metrics
::
ParseMemInfo
(
MemInfo
&
info
)
{
#ifdef _WIN32
return
Status
(
Status
::
Code
::
INTERNAL
,
"Memory metrics not supported on Windows."
);
#else
std
::
ifstream
ifs
(
"/proc/meminfo"
);
if
(
!
ifs
.
good
())
{
return
Status
(
Status
::
Code
::
INTERNAL
,
"Failed to open /proc/meminfo."
);
}
std
::
string
line
;
constexpr
uint64_t
KB
=
1024
;
while
(
std
::
getline
(
ifs
,
line
))
{
std
::
istringstream
iss
(
line
);
std
::
string
name
;
uint64_t
value
=
0
;
if
(
iss
>>
name
>>
value
)
{
name
.
pop_back
();
info
[
name
]
=
value
*
KB
;
}
else
{
return
Status
(
Status
::
Code
::
INTERNAL
,
"Encountered error parsing /proc/meminfo."
);
}
}
if
(
info
.
find
(
"MemTotal"
)
==
info
.
end
()
||
info
.
find
(
"MemAvailable"
)
==
info
.
end
())
{
return
Status
(
Status
::
Code
::
INTERNAL
,
"Failed to find desired values in /proc/meminfo."
);
}
if
(
info
[
"MemAvailable"
]
>
info
[
"MemTotal"
])
{
return
Status
(
Status
::
Code
::
INTERNAL
,
"Available bytes shouldn't be greater than Total bytes"
);
}
// "Used" memory can be defined in many different ways. While many
// older applications consider "used = total - (free + cached)", a more
// accurate measure of available memory "MemAvailable" was added,
// so we choose "used = total - available" for a more accurate measure.
// This may change in the future if not sufficient for most use cases.
// See https://stackoverflow.com/a/35019697.
info
[
"MemUsed"
]
=
info
[
"MemTotal"
]
-
info
[
"MemAvailable"
];
return
Status
::
Success
;
#endif // OS
}
double
Metrics
::
CpuUtilization
(
const
CpuInfo
&
info_new
,
const
CpuInfo
&
info_old
)
{
// Account for overflow
const
auto
wrap_sub
=
[](
uint64_t
a
,
uint64_t
b
)
{
return
(
a
>
b
)
?
(
a
-
b
)
:
0
;
};
uint64_t
util_diff
=
wrap_sub
(
info_new
.
user
,
info_old
.
user
)
+
wrap_sub
(
info_new
.
nice
,
info_old
.
nice
)
+
wrap_sub
(
info_new
.
system
,
info_old
.
system
)
+
wrap_sub
(
info_new
.
irq
,
info_old
.
irq
)
+
wrap_sub
(
info_new
.
softirq
,
info_old
.
softirq
)
+
wrap_sub
(
info_new
.
steal
,
info_old
.
steal
);
uint64_t
idle_diff
=
wrap_sub
(
info_new
.
idle
,
info_old
.
idle
)
+
wrap_sub
(
info_new
.
iowait
,
info_old
.
iowait
);
double
util_ratio
=
static_cast
<
double
>
(
util_diff
)
/
(
util_diff
+
idle_diff
);
return
util_ratio
;
}
#endif // TRITON_ENABLE_METRICS_CPU
bool
Metrics
::
PollCpuMetrics
()
{
#ifndef TRITON_ENABLE_METRICS_CPU
return
false
;
#else
// CPU Utilization
double
cpu_util
=
0.0
;
auto
cpu_info
=
CpuInfo
();
auto
status
=
ParseCpuInfo
(
cpu_info
);
if
(
status
.
IsOk
())
{
cpu_util
=
CpuUtilization
(
cpu_info
,
last_cpu_info_
);
last_cpu_info_
=
cpu_info
;
}
cpu_utilization_
->
Set
(
cpu_util
);
// [0.0, 1.0]
// RAM / Memory
double
mem_total_bytes
=
0.0
;
double
mem_used_bytes
=
0.0
;
auto
mem_info
=
MemInfo
();
status
=
ParseMemInfo
(
mem_info
);
if
(
status
.
IsOk
())
{
// MemTotal will usually not change over time, but if something
// goes wrong when querying memory, we can reflect that by updating.
mem_total_bytes
=
mem_info
[
"MemTotal"
];
mem_used_bytes
=
mem_info
[
"MemUsed"
];
}
cpu_memory_total_
->
Set
(
mem_total_bytes
);
cpu_memory_used_
->
Set
(
mem_used_bytes
);
return
true
;
#endif // TRITON_ENABLE_METRICS_CPU
}
bool
Metrics
::
PollDcgmMetrics
()
{
#ifndef TRITON_ENABLE_METRICS_GPU
return
false
;
#else
if
(
dcgm_metadata_
.
available_cuda_gpu_ids_
.
size
()
==
0
)
{
LOG_WARNING
<<
"error polling GPU metrics, GPU metrics will not be "
<<
"available: no available gpus to poll"
;
return
false
;
}
dcgmUpdateAllFields
(
dcgm_metadata_
.
dcgm_handle_
,
1
/* wait for update*/
);
for
(
unsigned
int
didx
=
0
;
didx
<
dcgm_metadata_
.
available_cuda_gpu_ids_
.
size
();
++
didx
)
{
uint32_t
cuda_id
=
dcgm_metadata_
.
available_cuda_gpu_ids_
[
didx
];
if
(
dcgm_metadata_
.
cuda_ids_to_dcgm_ids_
.
count
(
cuda_id
)
<=
0
)
{
LOG_WARNING
<<
"Cannot find DCGM id for CUDA id "
<<
cuda_id
;
continue
;
}
uint32_t
dcgm_id
=
dcgm_metadata_
.
cuda_ids_to_dcgm_ids_
.
at
(
cuda_id
);
dcgmFieldValue_v1
field_values
[
dcgm_metadata_
.
field_count_
];
dcgmReturn_t
dcgmerr
=
dcgmGetLatestValuesForFields
(
dcgm_metadata_
.
dcgm_handle_
,
dcgm_id
,
dcgm_metadata_
.
fields_
.
data
(),
dcgm_metadata_
.
field_count_
,
field_values
);
if
(
dcgmerr
!=
DCGM_ST_OK
)
{
dcgm_metadata_
.
power_limit_fail_cnt_
[
didx
]
++
;
dcgm_metadata_
.
power_usage_fail_cnt_
[
didx
]
++
;
dcgm_metadata_
.
energy_fail_cnt_
[
didx
]
++
;
dcgm_metadata_
.
util_fail_cnt_
[
didx
]
++
;
dcgm_metadata_
.
mem_fail_cnt_
[
didx
]
++
;
LOG_WARNING
<<
"Unable to get field values for GPU ID "
<<
cuda_id
<<
": "
<<
errorString
(
dcgmerr
);
}
else
{
// Power limit
if
(
dcgm_metadata_
.
power_limit_fail_cnt_
[
didx
]
<
dcgm_metadata_
.
fail_threshold_
)
{
double
power_limit
=
field_values
[
0
].
value
.
dbl
;
if
((
field_values
[
0
].
status
==
DCGM_ST_OK
)
&&
(
!
DCGM_FP64_IS_BLANK
(
power_limit
)))
{
dcgm_metadata_
.
power_limit_fail_cnt_
[
didx
]
=
0
;
}
else
{
dcgm_metadata_
.
power_limit_fail_cnt_
[
didx
]
++
;
power_limit
=
0
;
dcgmReturn_t
status
=
dcgmReturn_t
(
field_values
[
0
].
status
);
LOG_WARNING
<<
"Unable to get power limit for GPU "
<<
cuda_id
<<
". Status:"
<<
errorString
(
status
)
<<
", value:"
<<
dcgmValueToErrorMessage
(
power_limit
);
}
gpu_power_limit_
[
didx
]
->
Set
(
power_limit
);
}
// Power usage
if
(
dcgm_metadata_
.
power_usage_fail_cnt_
[
didx
]
<
dcgm_metadata_
.
fail_threshold_
)
{
double
power_usage
=
field_values
[
1
].
value
.
dbl
;
if
((
field_values
[
1
].
status
==
DCGM_ST_OK
)
&&
(
!
DCGM_FP64_IS_BLANK
(
power_usage
)))
{
dcgm_metadata_
.
power_usage_fail_cnt_
[
didx
]
=
0
;
}
else
{
dcgm_metadata_
.
power_usage_fail_cnt_
[
didx
]
++
;
power_usage
=
0
;
dcgmReturn_t
status
=
dcgmReturn_t
(
field_values
[
1
].
status
);
LOG_WARNING
<<
"Unable to get power usage for GPU "
<<
cuda_id
<<
". Status:"
<<
errorString
(
status
)
<<
", value:"
<<
dcgmValueToErrorMessage
(
power_usage
);
}
gpu_power_usage_
[
didx
]
->
Set
(
power_usage
);
}
// Energy Consumption
if
(
dcgm_metadata_
.
energy_fail_cnt_
[
didx
]
<
dcgm_metadata_
.
fail_threshold_
)
{
int64_t
energy
=
field_values
[
2
].
value
.
i64
;
if
((
field_values
[
2
].
status
==
DCGM_ST_OK
)
&&
(
!
DCGM_INT64_IS_BLANK
(
energy
)))
{
dcgm_metadata_
.
energy_fail_cnt_
[
didx
]
=
0
;
if
(
dcgm_metadata_
.
last_energy_
[
didx
]
==
0
)
{
dcgm_metadata_
.
last_energy_
[
didx
]
=
energy
;
}
gpu_energy_consumption_
[
didx
]
->
Increment
(
(
double
)(
energy
-
dcgm_metadata_
.
last_energy_
[
didx
])
*
0.001
);
dcgm_metadata_
.
last_energy_
[
didx
]
=
energy
;
}
else
{
dcgm_metadata_
.
energy_fail_cnt_
[
didx
]
++
;
energy
=
0
;
dcgmReturn_t
status
=
dcgmReturn_t
(
field_values
[
2
].
status
);
LOG_WARNING
<<
"Unable to get energy consumption for "
<<
"GPU "
<<
cuda_id
<<
". Status:"
<<
errorString
(
status
)
<<
", value:"
<<
dcgmValueToErrorMessage
(
energy
);
}
}
// Utilization
if
(
dcgm_metadata_
.
util_fail_cnt_
[
didx
]
<
dcgm_metadata_
.
fail_threshold_
)
{
int64_t
util
=
field_values
[
3
].
value
.
i64
;
if
((
field_values
[
3
].
status
==
DCGM_ST_OK
)
&&
(
!
DCGM_INT64_IS_BLANK
(
util
)))
{
dcgm_metadata_
.
util_fail_cnt_
[
didx
]
=
0
;
}
else
{
dcgm_metadata_
.
util_fail_cnt_
[
didx
]
++
;
util
=
0
;
dcgmReturn_t
status
=
dcgmReturn_t
(
field_values
[
3
].
status
);
LOG_WARNING
<<
"Unable to get GPU utilization for GPU "
<<
cuda_id
<<
". Status:"
<<
errorString
(
status
)
<<
", value:"
<<
dcgmValueToErrorMessage
(
util
);
}
gpu_utilization_
[
didx
]
->
Set
((
double
)
util
*
0.01
);
}
// Memory Usage
if
(
dcgm_metadata_
.
mem_fail_cnt_
[
didx
]
<
dcgm_metadata_
.
fail_threshold_
)
{
int64_t
memory_used
=
field_values
[
4
].
value
.
i64
;
int64_t
memory_total
=
field_values
[
5
].
value
.
i64
;
if
((
field_values
[
4
].
status
==
DCGM_ST_OK
)
&&
(
!
DCGM_INT64_IS_BLANK
(
memory_used
))
&&
(
field_values
[
5
].
status
==
DCGM_ST_OK
)
&&
(
!
DCGM_INT64_IS_BLANK
(
memory_total
)))
{
dcgm_metadata_
.
mem_fail_cnt_
[
didx
]
=
0
;
}
else
{
memory_total
=
0
;
memory_used
=
0
;
dcgm_metadata_
.
mem_fail_cnt_
[
didx
]
++
;
dcgmReturn_t
usageStatus
=
dcgmReturn_t
(
field_values
[
4
].
status
);
dcgmReturn_t
memoryTotaltatus
=
dcgmReturn_t
(
field_values
[
5
].
status
);
LOG_WARNING
<<
"Unable to get memory usage for GPU "
<<
cuda_id
<<
". Memory usage status:"
<<
errorString
(
usageStatus
)
<<
", value:"
<<
dcgmValueToErrorMessage
(
memory_used
)
<<
". Memory total status:"
<<
errorString
(
memoryTotaltatus
)
<<
", value:"
<<
dcgmValueToErrorMessage
(
memory_total
);
}
gpu_memory_total_
[
didx
]
->
Set
(
memory_total
*
1024
*
1024
);
// bytes
gpu_memory_used_
[
didx
]
->
Set
(
memory_used
*
1024
*
1024
);
// bytes
}
}
}
return
true
;
#endif // TRITON_ENABLE_METRICS_GPU
}
bool
Metrics
::
InitializeCacheMetrics
(
std
::
shared_ptr
<
RequestResponseCache
>
response_cache
)
{
if
(
response_cache
==
nullptr
)
{
LOG_WARNING
<<
"error initializing cache metrics, cache metrics will not be "
<<
"available: cache was nullptr"
;
return
false
;
}
const
std
::
map
<
std
::
string
,
std
::
string
>
cache_labels
;
cache_num_entries_global_
=
&
cache_num_entries_family_
.
Add
(
cache_labels
);
cache_num_lookups_global_
=
&
cache_num_lookups_family_
.
Add
(
cache_labels
);
cache_num_hits_global_
=
&
cache_num_hits_family_
.
Add
(
cache_labels
);
cache_num_misses_global_
=
&
cache_num_misses_family_
.
Add
(
cache_labels
);
cache_num_evictions_global_
=
&
cache_num_evictions_family_
.
Add
(
cache_labels
);
cache_lookup_duration_us_global_
=
&
cache_lookup_duration_us_family_
.
Add
(
cache_labels
);
cache_insertion_duration_us_global_
=
&
cache_insertion_duration_us_family_
.
Add
(
cache_labels
);
cache_util_global_
=
&
cache_util_family_
.
Add
(
cache_labels
);
LOG_INFO
<<
"Collecting Response Cache metrics"
;
return
true
;
}
bool
Metrics
::
InitializeCpuMetrics
()
{
#ifndef TRITON_ENABLE_METRICS_CPU
return
false
;
#else
const
std
::
map
<
std
::
string
,
std
::
string
>
cpu_labels
;
cpu_utilization_
=
&
cpu_utilization_family_
.
Add
(
cpu_labels
);
cpu_memory_total_
=
&
cpu_memory_total_family_
.
Add
(
cpu_labels
);
cpu_memory_used_
=
&
cpu_memory_used_family_
.
Add
(
cpu_labels
);
// Get baseline CPU info for future comparisons
last_cpu_info_
=
CpuInfo
();
auto
status
=
ParseCpuInfo
(
last_cpu_info_
);
if
(
!
status
.
IsOk
())
{
LOG_WARNING
<<
"error initializing CPU metrics, CPU utilization may not "
"be available: "
<<
status
.
Message
();
return
false
;
}
// Verify memory metrics can be parsed
auto
mem_info
=
MemInfo
();
status
=
ParseMemInfo
(
mem_info
);
if
(
!
status
.
IsOk
())
{
LOG_WARNING
<<
"error initializing CPU metrics, CPU memory metrics may not "
"be available: "
<<
status
.
Message
();
return
false
;
}
LOG_INFO
<<
"Collecting CPU metrics"
;
return
true
;
#endif // TRITON_ENABLE_METRICS_CPU
}
bool
Metrics
::
InitializeDcgmMetrics
()
{
#ifndef TRITON_ENABLE_METRICS_GPU
return
false
;
#else
dcgmReturn_t
dcgmerr
=
dcgmInit
();
if
(
dcgmerr
!=
DCGM_ST_OK
)
{
LOG_WARNING
<<
"error initializing DCGM, GPU metrics will not be "
<<
"available: "
<<
errorString
(
dcgmerr
);
return
false
;
}
if
(
dcgm_metadata_
.
standalone_
)
{
char
hostIpAddress
[
16
]
=
{
0
};
std
::
string
ipAddress
=
"127.0.0.1"
;
strncpy
(
hostIpAddress
,
ipAddress
.
c_str
(),
15
);
dcgmerr
=
dcgmConnect
(
hostIpAddress
,
&
dcgm_metadata_
.
dcgm_handle_
);
}
else
{
dcgmerr
=
dcgmStartEmbedded
(
DCGM_OPERATION_MODE_MANUAL
,
&
dcgm_metadata_
.
dcgm_handle_
);
}
if
(
dcgmerr
!=
DCGM_ST_OK
)
{
LOG_WARNING
<<
"DCGM unable to start: "
<<
errorString
(
dcgmerr
);
return
false
;
}
else
{
// Set this flag to signal DCGM cleanup in destructor
dcgm_metadata_
.
dcgm_initialized_
=
true
;
}
if
(
dcgm_metadata_
.
standalone_
)
{
dcgmerr
=
dcgmUpdateAllFields
(
dcgm_metadata_
.
dcgm_handle_
,
1
);
if
(
dcgmerr
!=
DCGM_ST_OK
)
{
LOG_WARNING
<<
"DCGM unable to update all fields, GPU metrics will "
"not be available: "
<<
errorString
(
dcgmerr
);
return
false
;
}
}
unsigned
int
dcgm_gpu_ids
[
DCGM_MAX_NUM_DEVICES
];
int
dcgm_gpu_count
;
dcgmerr
=
dcgmGetAllDevices
(
dcgm_metadata_
.
dcgm_handle_
,
dcgm_gpu_ids
,
&
dcgm_gpu_count
);
if
(
dcgmerr
!=
DCGM_ST_OK
)
{
LOG_WARNING
<<
"DCGM unable to get device info and count, GPU "
"metrics will not be available: "
<<
errorString
(
dcgmerr
);
return
false
;
}
// Get PCI Bus ID to DCGM device Id map.
// Some devices may have problems using DCGM API and
// these devices needs to be ignored.
std
::
map
<
std
::
string
,
size_t
>
pci_bus_id_to_dcgm_id
;
std
::
map
<
std
::
string
,
std
::
map
<
std
::
string
,
std
::
string
>
>
pci_bus_id_to_gpu_labels
;
std
::
map
<
std
::
string
,
std
::
string
>
pci_bus_id_to_device_name
;
dcgmDeviceAttributes_t
gpu_attributes
[
DCGM_MAX_NUM_DEVICES
];
for
(
int
i
=
0
;
i
<
dcgm_gpu_count
;
i
++
)
{
gpu_attributes
[
i
].
version
=
dcgmDeviceAttributes_version
;
dcgmerr
=
dcgmGetDeviceAttributes
(
dcgm_metadata_
.
dcgm_handle_
,
dcgm_gpu_ids
[
i
],
&
gpu_attributes
[
i
]);
if
(
dcgmerr
!=
DCGM_ST_OK
)
{
LOG_WARNING
<<
"DCGM unable to get device properties for DCGM device "
<<
dcgm_gpu_ids
[
i
]
<<
", GPU metrics will not be available for this device: "
<<
errorString
(
dcgmerr
);
}
else
{
std
::
string
pciBusId
=
gpu_attributes
[
i
].
identifiers
.
pciBusId
;
pci_bus_id_to_dcgm_id
[
pciBusId
]
=
i
;
pci_bus_id_to_device_name
[
pciBusId
]
=
std
::
string
(
gpu_attributes
[
i
].
identifiers
.
deviceName
);
std
::
map
<
std
::
string
,
std
::
string
>
gpu_labels
;
gpu_labels
.
insert
(
std
::
map
<
std
::
string
,
std
::
string
>::
value_type
(
kMetricsLabelGpuUuid
,
std
::
string
(
gpu_attributes
[
i
].
identifiers
.
uuid
)));
pci_bus_id_to_gpu_labels
[
pciBusId
]
=
gpu_labels
;
}
}
// Get CUDA-visible PCI Bus Ids and get DCGM metrics for each CUDA-visible GPU
int
cuda_gpu_count
;
cudaError_t
cudaerr
=
cudaGetDeviceCount
(
&
cuda_gpu_count
);
if
(
cudaerr
!=
cudaSuccess
)
{
LOG_WARNING
<<
"Cannot get CUDA device count, GPU metrics will not be available"
;
return
false
;
}
for
(
int
i
=
0
;
i
<
cuda_gpu_count
;
++
i
)
{
std
::
string
pci_bus_id
=
"0000"
;
// pad 0's for uniformity
char
pcibusid_str
[
64
];
cudaerr
=
cudaDeviceGetPCIBusId
(
pcibusid_str
,
sizeof
(
pcibusid_str
)
-
1
,
i
);
if
(
cudaerr
==
cudaSuccess
)
{
pci_bus_id
.
append
(
pcibusid_str
);
if
(
pci_bus_id_to_dcgm_id
.
count
(
pci_bus_id
)
<=
0
)
{
LOG_INFO
<<
"Skipping GPU:"
<<
i
<<
" since it's not CUDA enabled. This should never happen!"
;
continue
;
}
// Filter out CUDA visible GPUs from GPUs found by DCGM
LOG_INFO
<<
"Collecting metrics for GPU "
<<
i
<<
": "
<<
pci_bus_id_to_device_name
[
pci_bus_id
];
auto
&
gpu_labels
=
pci_bus_id_to_gpu_labels
[
pci_bus_id
];
gpu_utilization_
.
push_back
(
&
gpu_utilization_family_
.
Add
(
gpu_labels
));
gpu_memory_total_
.
push_back
(
&
gpu_memory_total_family_
.
Add
(
gpu_labels
));
gpu_memory_used_
.
push_back
(
&
gpu_memory_used_family_
.
Add
(
gpu_labels
));
gpu_power_usage_
.
push_back
(
&
gpu_power_usage_family_
.
Add
(
gpu_labels
));
gpu_power_limit_
.
push_back
(
&
gpu_power_limit_family_
.
Add
(
gpu_labels
));
gpu_energy_consumption_
.
push_back
(
&
gpu_energy_consumption_family_
.
Add
(
gpu_labels
));
uint32_t
dcgm_id
=
pci_bus_id_to_dcgm_id
[
pci_bus_id
];
dcgm_metadata_
.
cuda_ids_to_dcgm_ids_
[
i
]
=
dcgm_id
;
dcgm_metadata_
.
available_cuda_gpu_ids_
.
emplace_back
(
i
);
}
else
{
LOG_WARNING
<<
"GPU metrics will not be available for device:"
<<
i
;
}
}
// create a gpu group
char
groupName
[]
=
"dcgm_group"
;
dcgmerr
=
dcgmGroupCreate
(
dcgm_metadata_
.
dcgm_handle_
,
DCGM_GROUP_DEFAULT
,
groupName
,
&
dcgm_metadata_
.
groupId_
);
if
(
dcgmerr
!=
DCGM_ST_OK
)
{
LOG_WARNING
<<
"Cannot make GPU group: "
<<
errorString
(
dcgmerr
);
}
// Initialize tracking vectors
for
(
unsigned
int
didx
=
0
;
didx
<
dcgm_metadata_
.
available_cuda_gpu_ids_
.
size
();
++
didx
)
{
dcgm_metadata_
.
power_limit_fail_cnt_
.
push_back
(
0
);
dcgm_metadata_
.
power_usage_fail_cnt_
.
push_back
(
0
);
dcgm_metadata_
.
energy_fail_cnt_
.
push_back
(
0
);
dcgm_metadata_
.
util_fail_cnt_
.
push_back
(
0
);
dcgm_metadata_
.
mem_fail_cnt_
.
push_back
(
0
);
dcgm_metadata_
.
last_energy_
.
push_back
(
0
);
}
// Number of fields for DCGM to use from fields_ below
dcgm_metadata_
.
field_count_
=
6
;
unsigned
short
util_flag
=
dcgm_metadata_
.
standalone_
?
DCGM_FI_PROF_GR_ENGINE_ACTIVE
:
DCGM_FI_DEV_GPU_UTIL
;
dcgm_metadata_
.
fields_
=
{
DCGM_FI_DEV_POWER_MGMT_LIMIT
,
// power limit, watts
DCGM_FI_DEV_POWER_USAGE
,
// power usage, watts
DCGM_FI_DEV_TOTAL_ENERGY_CONSUMPTION
,
// Total energy consumption, mJ
util_flag
,
// util ratio, 1 = 1%
DCGM_FI_DEV_FB_USED
,
// Frame buffer used, MiB
DCGM_FI_DEV_FB_TOTAL
,
// Frame buffer used, MiB
};
char
fieldName
[]
=
"field_group"
;
dcgmFieldGrp_t
fieldGroupId
;
dcgmerr
=
dcgmFieldGroupCreate
(
dcgm_metadata_
.
dcgm_handle_
,
dcgm_metadata_
.
field_count_
,
dcgm_metadata_
.
fields_
.
data
(),
fieldName
,
&
fieldGroupId
);
if
(
dcgmerr
!=
DCGM_ST_OK
)
{
LOG_WARNING
<<
"Cannot make field group: "
<<
errorString
(
dcgmerr
);
}
dcgmerr
=
dcgmWatchFields
(
dcgm_metadata_
.
dcgm_handle_
,
dcgm_metadata_
.
groupId_
,
fieldGroupId
,
metrics_interval_ms_
*
1000
/*update period, usec*/
,
5.0
/*maxKeepAge, sec*/
,
5
/*maxKeepSamples*/
);
if
(
dcgmerr
!=
DCGM_ST_OK
)
{
LOG_WARNING
<<
"Cannot start watching fields: "
<<
errorString
(
dcgmerr
);
return
false
;
}
return
true
;
#endif // TRITON_ENABLE_METRICS_GPU
}
#ifdef TRITON_ENABLE_METRICS_GPU
std
::
string
Metrics
::
dcgmValueToErrorMessage
(
double
val
)
{
if
(
DCGM_FP64_IS_BLANK
(
val
))
{
if
(
val
==
DCGM_FP64_BLANK
)
{
return
"Not Specified"
;
}
else
if
(
val
==
DCGM_FP64_NOT_FOUND
)
{
return
"Not Found"
;
}
else
if
(
val
==
DCGM_FP64_NOT_SUPPORTED
)
{
return
"Not Supported"
;
}
else
if
(
val
==
DCGM_FP64_NOT_PERMISSIONED
)
{
return
"Insf. Permission"
;
}
else
{
return
"Unknown"
;
}
}
else
{
return
std
::
to_string
(
val
);
}
}
std
::
string
Metrics
::
dcgmValueToErrorMessage
(
int64_t
val
)
{
if
(
DCGM_INT64_IS_BLANK
(
val
))
{
switch
(
val
)
{
case
DCGM_INT64_BLANK
:
return
"Not Specified"
;
case
DCGM_INT64_NOT_FOUND
:
return
"Not Found"
;
case
DCGM_INT64_NOT_SUPPORTED
:
return
"Not Supported"
;
case
DCGM_INT64_NOT_PERMISSIONED
:
return
"Insf. Permission"
;
default:
return
"Unknown"
;
}
}
else
{
return
std
::
to_string
(
val
);
}
}
#endif // TRITON_ENABLE_METRICS_GPU
bool
Metrics
::
UUIDForCudaDevice
(
int
cuda_device
,
std
::
string
*
uuid
)
{
// If metrics were not initialized then just silently fail since
// with DCGM we can't get the CUDA device (and not worth doing
// anyway since metrics aren't being reported).
auto
singleton
=
GetSingleton
();
if
(
!
singleton
->
gpu_metrics_enabled_
)
{
return
false
;
}
// If GPU metrics is not enabled just silently fail.
#ifndef TRITON_ENABLE_METRICS_GPU
return
false
;
#else
dcgmDeviceAttributes_t
gpu_attributes
;
gpu_attributes
.
version
=
dcgmDeviceAttributes_version
;
dcgmReturn_t
dcgmerr
=
dcgmGetDeviceAttributes
(
singleton
->
dcgm_metadata_
.
dcgm_handle_
,
cuda_device
,
&
gpu_attributes
);
if
(
dcgmerr
!=
DCGM_ST_OK
)
{
LOG_ERROR
<<
"Unable to get device UUID: "
<<
errorString
(
dcgmerr
);
return
false
;
}
*
uuid
=
gpu_attributes
.
identifiers
.
uuid
;
return
true
;
#endif // TRITON_ENABLE_METRICS_GPU
}
std
::
shared_ptr
<
prometheus
::
Registry
>
Metrics
::
GetRegistry
()
{
auto
singleton
=
Metrics
::
GetSingleton
();
return
singleton
->
registry_
;
}
const
std
::
string
Metrics
::
SerializedMetrics
()
{
auto
singleton
=
Metrics
::
GetSingleton
();
return
singleton
->
serializer_
->
Serialize
(
singleton
->
registry_
.
get
()
->
Collect
());
}
Metrics
*
Metrics
::
GetSingleton
()
{
static
Metrics
singleton
;
return
&
singleton
;
}
}}
// namespace triton::core
#endif // TRITON_ENABLE_METRICS
3rdparty/core-r22.12/src/metrics.h
0 → 100644
View file @
0a21fff9
// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
#pragma once
#ifdef TRITON_ENABLE_METRICS
#include <atomic>
#include <mutex>
#include <thread>
#include "prometheus/counter.h"
#include "prometheus/gauge.h"
#include "prometheus/registry.h"
#include "prometheus/serializer.h"
#include "prometheus/text_serializer.h"
#include "response_cache.h"
#ifdef TRITON_ENABLE_METRICS_GPU
#include <dcgm_agent.h>
#endif // TRITON_ENABLE_METRICS_GPU
namespace
triton
{
namespace
core
{
#ifdef TRITON_ENABLE_METRICS_CPU
using
MemInfo
=
std
::
unordered_map
<
std
::
string
,
uint64_t
>
;
// References:
// - htop source: https://stackoverflow.com/a/23376195
// - Linux docs: https://www.kernel.org/doc/Documentation/filesystems/proc.txt
// guest/guestnice values are counted in user/nice so we skip parsing them
struct
CpuInfo
{
uint64_t
user
=
0
;
// normal processes executing in user mode
uint64_t
nice
=
0
;
// niced processes executing in user mode
uint64_t
system
=
0
;
// processes executing in kernel mode
uint64_t
idle
=
0
;
// twiddling thumbs
uint64_t
iowait
=
0
;
// waiting for I/O to complete
uint64_t
irq
=
0
;
// servicing interrupts
uint64_t
softirq
=
0
;
// servicing softirqs
uint64_t
steal
=
0
;
// involuntary wait
};
inline
std
::
istream
&
operator
>>
(
std
::
istream
&
is
,
CpuInfo
&
info
)
{
is
>>
info
.
user
>>
info
.
nice
>>
info
.
system
>>
info
.
idle
>>
info
.
iowait
>>
info
.
irq
>>
info
.
softirq
>>
info
.
steal
;
return
is
;
}
#endif // TRITON_ENABLE_METRICS_CPU
#ifdef TRITON_ENABLE_METRICS_GPU
struct
DcgmMetadata
{
// DCGM handles for initialization and destruction
dcgmHandle_t
dcgm_handle_
=
0
;
dcgmGpuGrp_t
groupId_
=
0
;
// DCGM Flags
bool
standalone_
=
false
;
// DCGM Fields
size_t
field_count_
=
0
;
std
::
vector
<
unsigned
short
>
fields_
;
// GPU Device Mapping
std
::
map
<
uint32_t
,
uint32_t
>
cuda_ids_to_dcgm_ids_
;
std
::
vector
<
uint32_t
>
available_cuda_gpu_ids_
;
// Stop attempting metrics if they fail multiple consecutive
// times for a device.
const
int
fail_threshold_
=
3
;
// DCGM Failure Tracking
std
::
vector
<
int
>
power_limit_fail_cnt_
;
std
::
vector
<
int
>
power_usage_fail_cnt_
;
std
::
vector
<
int
>
energy_fail_cnt_
;
std
::
vector
<
int
>
util_fail_cnt_
;
std
::
vector
<
int
>
mem_fail_cnt_
;
// DCGM Energy Tracking
std
::
vector
<
unsigned
long
long
>
last_energy_
;
// Track if DCGM handle initialized successfully
bool
dcgm_initialized_
=
false
;
};
#endif // TRITON_ENABLE_METRICS_GPU
class
Metrics
{
public:
// Return the hash value of the labels
static
size_t
HashLabels
(
const
std
::
map
<
std
::
string
,
std
::
string
>&
labels
);
// Are metrics enabled?
static
bool
Enabled
();
// Enable reporting of metrics
static
void
EnableMetrics
();
// Enable reporting of GPU metrics
static
void
EnableGPUMetrics
();
// Enable reporting of CPU metrics
static
void
EnableCpuMetrics
();
// Enable reporting of Cache metrics
static
void
EnableCacheMetrics
(
std
::
shared_ptr
<
RequestResponseCache
>
response_cache
);
// Start a thread for polling enabled metrics if any
static
void
StartPollingThreadSingleton
(
std
::
shared_ptr
<
RequestResponseCache
>
response_cache
);
// Set the time interval in secs at which metrics are collected
static
void
SetMetricsInterval
(
uint64_t
metrics_interval_ms
);
// Get the prometheus registry
static
std
::
shared_ptr
<
prometheus
::
Registry
>
GetRegistry
();
// Get serialized metrics
static
const
std
::
string
SerializedMetrics
();
// Get the UUID for a CUDA device. Return true and initialize 'uuid'
// if a UUID is found, return false if a UUID cannot be returned.
static
bool
UUIDForCudaDevice
(
int
cuda_device
,
std
::
string
*
uuid
);
// Metric family counting successful inference requests
static
prometheus
::
Family
<
prometheus
::
Counter
>&
FamilyInferenceSuccess
()
{
return
GetSingleton
()
->
inf_success_family_
;
}
// Metric family counting failed inference requests
static
prometheus
::
Family
<
prometheus
::
Counter
>&
FamilyInferenceFailure
()
{
return
GetSingleton
()
->
inf_failure_family_
;
}
// Metric family counting inferences performed, where a batch-size
// 'n' inference request is counted as 'n' inferences
static
prometheus
::
Family
<
prometheus
::
Counter
>&
FamilyInferenceCount
()
{
return
GetSingleton
()
->
inf_count_family_
;
}
// Metric family counting inferences performed, where a batch-size
// 'n' inference request is counted as 'n' inferences
static
prometheus
::
Family
<
prometheus
::
Counter
>&
FamilyInferenceExecutionCount
()
{
return
GetSingleton
()
->
inf_count_exec_family_
;
}
// Metric family of cumulative inference request duration, in
// microseconds
static
prometheus
::
Family
<
prometheus
::
Counter
>&
FamilyInferenceRequestDuration
()
{
return
GetSingleton
()
->
inf_request_duration_us_family_
;
}
// Metric family of cumulative inference queuing duration, in
// microseconds
static
prometheus
::
Family
<
prometheus
::
Counter
>&
FamilyInferenceQueueDuration
()
{
return
GetSingleton
()
->
inf_queue_duration_us_family_
;
}
// Metric family of cumulative inference compute durations, in
// microseconds
static
prometheus
::
Family
<
prometheus
::
Counter
>&
FamilyInferenceComputeInputDuration
()
{
return
GetSingleton
()
->
inf_compute_input_duration_us_family_
;
}
static
prometheus
::
Family
<
prometheus
::
Counter
>&
FamilyInferenceComputeInferDuration
()
{
return
GetSingleton
()
->
inf_compute_infer_duration_us_family_
;
}
static
prometheus
::
Family
<
prometheus
::
Counter
>&
FamilyInferenceComputeOutputDuration
()
{
return
GetSingleton
()
->
inf_compute_output_duration_us_family_
;
}
// Metric families of per-model response cache metrics
static
prometheus
::
Family
<
prometheus
::
Counter
>&
FamilyCacheHitCount
()
{
return
GetSingleton
()
->
cache_num_hits_model_family_
;
}
static
prometheus
::
Family
<
prometheus
::
Counter
>&
FamilyCacheHitLookupDuration
()
{
return
GetSingleton
()
->
cache_hit_lookup_duration_us_model_family_
;
}
static
prometheus
::
Family
<
prometheus
::
Counter
>&
FamilyCacheMissCount
()
{
return
GetSingleton
()
->
cache_num_misses_model_family_
;
}
static
prometheus
::
Family
<
prometheus
::
Counter
>&
FamilyCacheMissLookupDuration
()
{
return
GetSingleton
()
->
cache_miss_lookup_duration_us_model_family_
;
}
static
prometheus
::
Family
<
prometheus
::
Counter
>&
FamilyCacheMissInsertionDuration
()
{
return
GetSingleton
()
->
cache_miss_insertion_duration_us_model_family_
;
}
private:
Metrics
();
virtual
~
Metrics
();
static
Metrics
*
GetSingleton
();
bool
InitializeDcgmMetrics
();
bool
InitializeCpuMetrics
();
bool
InitializeCacheMetrics
(
std
::
shared_ptr
<
RequestResponseCache
>
response_cache
);
bool
StartPollingThread
(
std
::
shared_ptr
<
RequestResponseCache
>
response_cache
);
bool
PollCacheMetrics
(
std
::
shared_ptr
<
RequestResponseCache
>
response_cache
);
bool
PollDcgmMetrics
();
bool
PollCpuMetrics
();
std
::
string
dcgmValueToErrorMessage
(
double
val
);
std
::
string
dcgmValueToErrorMessage
(
int64_t
val
);
std
::
shared_ptr
<
prometheus
::
Registry
>
registry_
;
std
::
unique_ptr
<
prometheus
::
Serializer
>
serializer_
;
prometheus
::
Family
<
prometheus
::
Counter
>&
inf_success_family_
;
prometheus
::
Family
<
prometheus
::
Counter
>&
inf_failure_family_
;
prometheus
::
Family
<
prometheus
::
Counter
>&
inf_count_family_
;
prometheus
::
Family
<
prometheus
::
Counter
>&
inf_count_exec_family_
;
prometheus
::
Family
<
prometheus
::
Counter
>&
inf_request_duration_us_family_
;
prometheus
::
Family
<
prometheus
::
Counter
>&
inf_queue_duration_us_family_
;
prometheus
::
Family
<
prometheus
::
Counter
>&
inf_compute_input_duration_us_family_
;
prometheus
::
Family
<
prometheus
::
Counter
>&
inf_compute_infer_duration_us_family_
;
prometheus
::
Family
<
prometheus
::
Counter
>&
inf_compute_output_duration_us_family_
;
// Global Response Cache metrics
prometheus
::
Family
<
prometheus
::
Gauge
>&
cache_num_entries_family_
;
prometheus
::
Family
<
prometheus
::
Gauge
>&
cache_num_lookups_family_
;
prometheus
::
Family
<
prometheus
::
Gauge
>&
cache_num_hits_family_
;
prometheus
::
Family
<
prometheus
::
Gauge
>&
cache_num_misses_family_
;
prometheus
::
Family
<
prometheus
::
Gauge
>&
cache_num_evictions_family_
;
prometheus
::
Family
<
prometheus
::
Gauge
>&
cache_lookup_duration_us_family_
;
prometheus
::
Family
<
prometheus
::
Gauge
>&
cache_insertion_duration_us_family_
;
prometheus
::
Family
<
prometheus
::
Gauge
>&
cache_util_family_
;
// Gauges for Global Response Cache metrics
prometheus
::
Gauge
*
cache_num_entries_global_
;
prometheus
::
Gauge
*
cache_num_lookups_global_
;
prometheus
::
Gauge
*
cache_num_hits_global_
;
prometheus
::
Gauge
*
cache_num_misses_global_
;
prometheus
::
Gauge
*
cache_num_evictions_global_
;
prometheus
::
Gauge
*
cache_lookup_duration_us_global_
;
prometheus
::
Gauge
*
cache_insertion_duration_us_global_
;
prometheus
::
Gauge
*
cache_util_global_
;
// Per-model Response Cache metrics
prometheus
::
Family
<
prometheus
::
Counter
>&
cache_num_hits_model_family_
;
prometheus
::
Family
<
prometheus
::
Counter
>&
cache_hit_lookup_duration_us_model_family_
;
prometheus
::
Family
<
prometheus
::
Counter
>&
cache_num_misses_model_family_
;
prometheus
::
Family
<
prometheus
::
Counter
>&
cache_miss_lookup_duration_us_model_family_
;
prometheus
::
Family
<
prometheus
::
Counter
>&
cache_miss_insertion_duration_us_model_family_
;
#ifdef TRITON_ENABLE_METRICS_GPU
prometheus
::
Family
<
prometheus
::
Gauge
>&
gpu_utilization_family_
;
prometheus
::
Family
<
prometheus
::
Gauge
>&
gpu_memory_total_family_
;
prometheus
::
Family
<
prometheus
::
Gauge
>&
gpu_memory_used_family_
;
prometheus
::
Family
<
prometheus
::
Gauge
>&
gpu_power_usage_family_
;
prometheus
::
Family
<
prometheus
::
Gauge
>&
gpu_power_limit_family_
;
prometheus
::
Family
<
prometheus
::
Counter
>&
gpu_energy_consumption_family_
;
std
::
vector
<
prometheus
::
Gauge
*>
gpu_utilization_
;
std
::
vector
<
prometheus
::
Gauge
*>
gpu_memory_total_
;
std
::
vector
<
prometheus
::
Gauge
*>
gpu_memory_used_
;
std
::
vector
<
prometheus
::
Gauge
*>
gpu_power_usage_
;
std
::
vector
<
prometheus
::
Gauge
*>
gpu_power_limit_
;
std
::
vector
<
prometheus
::
Counter
*>
gpu_energy_consumption_
;
DcgmMetadata
dcgm_metadata_
;
#endif // TRITON_ENABLE_METRICS_GPU
#ifdef TRITON_ENABLE_METRICS_CPU
// Parses "/proc/meminfo" for metrics, currently only supported on Linux.
Status
ParseMemInfo
(
MemInfo
&
info
);
// Parses "/proc/stat" for metrics, currently only supported on Linux.
Status
ParseCpuInfo
(
CpuInfo
&
info
);
// Computes CPU utilization between "info_new" and "info_old" values
double
CpuUtilization
(
const
CpuInfo
&
info_new
,
const
CpuInfo
&
info_old
);
prometheus
::
Family
<
prometheus
::
Gauge
>&
cpu_utilization_family_
;
prometheus
::
Family
<
prometheus
::
Gauge
>&
cpu_memory_total_family_
;
prometheus
::
Family
<
prometheus
::
Gauge
>&
cpu_memory_used_family_
;
prometheus
::
Gauge
*
cpu_utilization_
;
prometheus
::
Gauge
*
cpu_memory_total_
;
prometheus
::
Gauge
*
cpu_memory_used_
;
CpuInfo
last_cpu_info_
;
#endif // TRITON_ENABLE_METRICS_CPU
// Thread for polling cache/gpu metrics periodically
std
::
unique_ptr
<
std
::
thread
>
poll_thread_
;
std
::
atomic
<
bool
>
poll_thread_exit_
;
bool
metrics_enabled_
;
bool
gpu_metrics_enabled_
;
bool
cpu_metrics_enabled_
;
bool
cache_metrics_enabled_
;
bool
poll_thread_started_
;
std
::
mutex
metrics_enabling_
;
std
::
mutex
poll_thread_starting_
;
uint64_t
metrics_interval_ms_
;
};
}}
// namespace triton::core
#endif // TRITON_ENABLE_METRICS
3rdparty/core-r22.12/src/model.cc
0 → 100644
View file @
0a21fff9
// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "model.h"
#include <chrono>
#include <future>
#include "constants.h"
#include "filesystem.h"
#include "infer_request.h"
#include "model_config_utils.h"
#include "triton/common/logging.h"
namespace
triton
{
namespace
core
{
Status
Model
::
GetInput
(
const
std
::
string
&
name
,
const
inference
::
ModelInput
**
input
)
const
{
const
auto
itr
=
input_map_
.
find
(
name
);
if
(
itr
==
input_map_
.
end
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"unexpected inference input '"
+
name
+
"' for model '"
+
Name
()
+
"'"
);
}
*
input
=
&
itr
->
second
;
return
Status
::
Success
;
}
Status
Model
::
GetOutput
(
const
std
::
string
&
name
,
const
inference
::
ModelOutput
**
output
)
const
{
const
auto
itr
=
output_map_
.
find
(
name
);
if
(
itr
==
output_map_
.
end
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"unexpected inference output '"
+
name
+
"' for model '"
+
Name
()
+
"'"
);
}
*
output
=
&
itr
->
second
;
return
Status
::
Success
;
}
Status
Model
::
SetModelConfig
(
const
inference
::
ModelConfig
&
config
)
{
config_
=
config
;
set_model_config_
=
true
;
return
Status
::
Success
;
}
Status
Model
::
SetScheduler
(
std
::
unique_ptr
<
Scheduler
>
scheduler
)
{
if
(
scheduler_
!=
nullptr
)
{
return
Status
(
Status
::
Code
::
INTERNAL
,
"Attempt to change scheduler not allowed"
);
}
scheduler_
=
std
::
move
(
scheduler
);
return
Status
::
Success
;
}
Status
Model
::
Init
(
const
bool
is_config_provided
)
{
if
(
!
set_model_config_
&&
!
is_config_provided
)
{
return
Status
(
Status
::
Code
::
NOT_FOUND
,
"model configuration is not provided for model '"
+
Name
()
+
"'"
);
}
RETURN_IF_ERROR
(
ValidateModelConfig
(
config_
,
min_compute_capability_
));
RETURN_IF_ERROR
(
ValidateModelIOConfig
(
config_
));
// Initialize the input map
for
(
const
auto
&
io
:
config_
.
input
())
{
input_map_
.
insert
(
std
::
make_pair
(
io
.
name
(),
io
));
if
(
!
io
.
optional
())
{
++
required_input_count_
;
}
}
// Initialize the output map and label provider for each output
label_provider_
=
std
::
make_shared
<
LabelProvider
>
();
for
(
const
auto
&
io
:
config_
.
output
())
{
output_map_
.
insert
(
std
::
make_pair
(
io
.
name
(),
io
));
if
(
!
io
.
label_filename
().
empty
())
{
const
auto
label_path
=
JoinPath
({
model_dir_
,
io
.
label_filename
()});
RETURN_IF_ERROR
(
label_provider_
->
AddLabels
(
io
.
name
(),
label_path
));
}
}
if
(
config_
.
has_dynamic_batching
())
{
default_priority_level_
=
config_
.
dynamic_batching
().
default_priority_level
();
max_priority_level_
=
config_
.
dynamic_batching
().
priority_levels
();
}
else
if
(
config_
.
has_ensemble_scheduling
())
{
// For ensemble, allow any priority level to pass through
default_priority_level_
=
0
;
max_priority_level_
=
UINT32_MAX
;
}
else
{
default_priority_level_
=
0
;
max_priority_level_
=
0
;
}
return
Status
::
Success
;
}
}}
// namespace triton::core
3rdparty/core-r22.12/src/model.h
0 → 100644
View file @
0a21fff9
// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include "infer_stats.h"
#include "label_provider.h"
#include "model_config.pb.h"
#include "scheduler.h"
#include "status.h"
namespace
triton
{
namespace
core
{
class
InferenceRequest
;
//
// Interface for models that handle inference requests.
//
class
Model
{
public:
explicit
Model
(
const
double
min_compute_capability
,
const
std
::
string
&
model_dir
,
const
int64_t
version
,
const
inference
::
ModelConfig
&
config
)
:
config_
(
config
),
min_compute_capability_
(
min_compute_capability
),
version_
(
version
),
required_input_count_
(
0
),
model_dir_
(
model_dir
),
set_model_config_
(
false
)
{
}
virtual
~
Model
()
{}
// Get the name of model being served.
const
std
::
string
&
Name
()
const
{
return
config_
.
name
();
}
// Get the version of model being served.
int64_t
Version
()
const
{
return
version_
;
}
// Get the configuration of model being served.
const
inference
::
ModelConfig
&
Config
()
const
{
return
config_
;
}
// Get the number of required inputs
size_t
RequiredInputCount
()
const
{
return
required_input_count_
;
}
// Get the stats collector for the model being served.
InferenceStatsAggregator
*
MutableStatsAggregator
()
{
return
&
stats_aggregator_
;
}
const
InferenceStatsAggregator
&
StatsAggregator
()
const
{
return
stats_aggregator_
;
}
// Get the model configuration for a named input.
Status
GetInput
(
const
std
::
string
&
name
,
const
inference
::
ModelInput
**
input
)
const
;
// Get the model configuration for a named output.
Status
GetOutput
(
const
std
::
string
&
name
,
const
inference
::
ModelOutput
**
output
)
const
;
// Get a label provider for the model.
const
std
::
shared_ptr
<
LabelProvider
>&
GetLabelProvider
()
const
{
return
label_provider_
;
}
// Initialize the instance for Triton core usage
Status
Init
(
const
bool
is_config_provided
);
// Enqueue a request for execution. If Status::Success is returned
// then the model has taken ownership of the request object and so
// 'request' will be nullptr. If non-success is returned then the
// caller still retains ownership of 'request'.
Status
Enqueue
(
std
::
unique_ptr
<
InferenceRequest
>&
request
)
{
return
scheduler_
->
Enqueue
(
request
);
}
// Return the number of in-flight inferences.
size_t
InflightInferenceCount
()
{
return
scheduler_
->
InflightInferenceCount
();
}
// Stop processing future requests unless they are considered as in-flight.
void
Stop
()
{
scheduler_
->
Stop
();
}
uint32_t
DefaultPriorityLevel
()
const
{
return
default_priority_level_
;
}
uint32_t
MaxPriorityLevel
()
const
{
return
max_priority_level_
;
}
protected:
// Set the configuration of the model being served.
Status
SetModelConfig
(
const
inference
::
ModelConfig
&
config
);
// Explicitly set the scheduler to use for inference requests to the
// model. The scheduler can only be set once for a model.
Status
SetScheduler
(
std
::
unique_ptr
<
Scheduler
>
scheduler
);
// The scheduler to use for this model.
std
::
unique_ptr
<
Scheduler
>
scheduler_
;
// Configuration of the model.
inference
::
ModelConfig
config_
;
private:
// The minimum supported CUDA compute capability.
const
double
min_compute_capability_
;
// Version of the model.
int64_t
version_
;
// The stats collector for the model.
InferenceStatsAggregator
stats_aggregator_
;
// Label provider for this model.
std
::
shared_ptr
<
LabelProvider
>
label_provider_
;
size_t
required_input_count_
;
// Map from input name to the model configuration for that input.
std
::
unordered_map
<
std
::
string
,
inference
::
ModelInput
>
input_map_
;
// Map from output name to the model configuration for that output.
std
::
unordered_map
<
std
::
string
,
inference
::
ModelOutput
>
output_map_
;
// Path to model
std
::
string
model_dir_
;
// The default priority level for the model.
uint32_t
default_priority_level_
;
// The largest priority value for the model.
uint32_t
max_priority_level_
;
// Whether or not model config has been set.
bool
set_model_config_
;
};
}}
// namespace triton::core
3rdparty/core-r22.12/src/model_config_cuda.cc
0 → 100644
View file @
0a21fff9
// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "model_config_cuda.h"
#include <cuda_runtime_api.h>
namespace
triton
{
namespace
core
{
int
GetCudaStreamPriority
(
inference
::
ModelOptimizationPolicy
::
ModelPriority
priority
)
{
// Default priority is 0
int
cuda_stream_priority
=
0
;
int
min
,
max
;
cudaError_t
cuerr
=
cudaDeviceGetStreamPriorityRange
(
&
min
,
&
max
);
if
((
cuerr
!=
cudaErrorNoDevice
)
&&
(
cuerr
!=
cudaSuccess
))
{
return
0
;
}
switch
(
priority
)
{
case
inference
::
ModelOptimizationPolicy
::
PRIORITY_MAX
:
cuda_stream_priority
=
max
;
break
;
case
inference
::
ModelOptimizationPolicy
::
PRIORITY_MIN
:
cuda_stream_priority
=
min
;
break
;
default:
cuda_stream_priority
=
0
;
break
;
}
return
cuda_stream_priority
;
}
}}
// namespace triton::core
3rdparty/core-r22.12/src/model_config_cuda.h
0 → 100644
View file @
0a21fff9
// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <stdint.h>
#include "model_config.pb.h"
namespace
triton
{
namespace
core
{
/// Get the CUDA stream priority for a given ModelPriority
/// \param priority The inference::ModelOptimizationPolicy::ModelPriority
/// priority. \param cuda_stream_priority Returns the CUDA stream priority.
/// \return The error status.
int
GetCudaStreamPriority
(
inference
::
ModelOptimizationPolicy
::
ModelPriority
priority
);
}}
// namespace triton::core
3rdparty/core-r22.12/src/model_config_utils.cc
0 → 100644
View file @
0a21fff9
// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "model_config_utils.h"
#include <google/protobuf/util/json_util.h>
#include <deque>
#include <mutex>
#include <set>
#include "constants.h"
#include "cuda_utils.h"
#include "filesystem.h"
#include "triton/common/logging.h"
#define TRITONJSON_STATUSTYPE triton::core::Status
#define TRITONJSON_STATUSRETURN(M) \
return triton::core::Status(triton::core::Status::Code::INTERNAL, (M))
#define TRITONJSON_STATUSSUCCESS triton::core::Status::Success
#include "triton/common/triton_json.h"
#ifdef TRITON_ENABLE_GPU
#include <cuda_runtime_api.h>
#endif // TRITON_ENABLE_GPU
namespace
triton
{
namespace
core
{
namespace
{
#ifdef TRITON_ENABLE_ENSEMBLE
struct
EnsembleTensor
{
EnsembleTensor
(
bool
isOutput
)
:
ready
(
false
),
isOutput
(
isOutput
)
{}
bool
ready
;
bool
isOutput
;
std
::
vector
<
EnsembleTensor
*>
prev_nodes
;
std
::
vector
<
EnsembleTensor
*>
next_nodes
;
};
/// Build a graph that represents the data flow in the ensemble specified in
/// given model config. the node (ensemble tensor) in the graph can be looked
/// up using its name as key.
/// \param ensemble_config The model configuration that specifies
/// ensemble_scheduling field.
/// \param keyed_ensemble_graph Returned the ensemble graph.
/// \return The error status. A non-OK status indicates the build fails because
/// the ensemble configuration is not valid.
Status
BuildEnsembleGraph
(
const
inference
::
ModelConfig
&
config
,
std
::
unordered_map
<
std
::
string
,
EnsembleTensor
>&
keyed_ensemble_graph
)
{
keyed_ensemble_graph
.
clear
();
size_t
step_idx
=
0
;
for
(
const
auto
&
element
:
config
.
ensemble_scheduling
().
step
())
{
if
(
element
.
model_name
().
empty
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"must specify 'model_name' in step "
+
std
::
to_string
(
step_idx
)
+
" of ensemble '"
+
config
.
name
()
+
"'"
);
}
if
(
element
.
input_map
().
size
()
==
0
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"must specify 'input_map' in step "
+
std
::
to_string
(
step_idx
)
+
" of ensemble '"
+
config
.
name
()
+
"'"
);
}
if
(
element
.
output_map
().
size
()
==
0
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"must specify 'output_map' in step "
+
std
::
to_string
(
step_idx
)
+
" of ensemble '"
+
config
.
name
()
+
"'"
);
}
// Link ensemble tensors
std
::
vector
<
EnsembleTensor
*>
tensor_as_output
;
for
(
const
auto
&
output_map
:
element
.
output_map
())
{
auto
it
=
keyed_ensemble_graph
.
find
(
output_map
.
second
);
if
(
it
!=
keyed_ensemble_graph
.
end
())
{
if
(
it
->
second
.
isOutput
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"ensemble tensor '"
+
it
->
first
+
"' can appear in an output map only once for ensemble '"
+
config
.
name
()
+
"' step "
+
std
::
to_string
(
step_idx
));
}
else
{
it
->
second
.
isOutput
=
true
;
}
}
else
{
it
=
keyed_ensemble_graph
.
emplace
(
std
::
make_pair
(
output_map
.
second
,
EnsembleTensor
(
true
)))
.
first
;
}
tensor_as_output
.
push_back
(
&
(
it
->
second
));
}
std
::
set
<
std
::
string
>
model_inputs
;
for
(
const
auto
&
input_map
:
element
.
input_map
())
{
if
(
model_inputs
.
find
(
input_map
.
first
)
!=
model_inputs
.
end
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"input '"
+
input_map
.
first
+
"' in model '"
+
element
.
model_name
()
+
"' is mapped to multiple ensemble tensors for ensemble '"
+
config
.
name
()
+
"' step "
+
std
::
to_string
(
step_idx
));
}
else
{
model_inputs
.
emplace
(
input_map
.
first
);
}
auto
it
=
keyed_ensemble_graph
.
find
(
input_map
.
second
);
if
(
it
==
keyed_ensemble_graph
.
end
())
{
it
=
keyed_ensemble_graph
.
emplace
(
std
::
make_pair
(
input_map
.
second
,
EnsembleTensor
(
false
)))
.
first
;
}
for
(
auto
output
:
tensor_as_output
)
{
output
->
prev_nodes
.
push_back
(
&
(
it
->
second
));
it
->
second
.
next_nodes
.
push_back
(
output
);
}
}
step_idx
++
;
}
return
Status
::
Success
;
}
Status
ValidateEnsembleSchedulingConfig
(
const
inference
::
ModelConfig
&
config
)
{
if
(
config
.
platform
()
!=
kEnsemblePlatform
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"ensemble scheduling cannot be set for model '"
+
config
.
name
()
+
"' whose platform is not "
+
kEnsemblePlatform
);
}
if
(
config
.
instance_group
().
size
()
!=
0
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"instance group should not be specified for ensemble '"
+
config
.
name
()
+
"'"
);
}
if
(
config
.
has_optimization
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"optimization should not be specified for ensemble '"
+
config
.
name
()
+
"'"
);
}
if
(
config
.
model_warmup_size
()
!=
0
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"model_warmup can not be specified for ensemble '"
+
config
.
name
()
+
"'"
);
}
// Make sure step is not empty and all fields are set
if
(
config
.
ensemble_scheduling
().
step_size
()
==
0
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"must specify 'step' for ensemble '"
+
config
.
name
()
+
"'"
);
}
std
::
unordered_map
<
std
::
string
,
EnsembleTensor
>
tensors
;
RETURN_IF_ERROR
(
BuildEnsembleGraph
(
config
,
tensors
));
// check data flow
std
::
deque
<
EnsembleTensor
*>
ready_queue
;
for
(
const
auto
&
input
:
config
.
input
())
{
auto
it
=
tensors
.
find
(
input
.
name
());
if
(
it
==
tensors
.
end
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"ensemble input '"
+
input
.
name
()
+
"' for ensemble "
+
config
.
name
()
+
"' is not used"
);
}
it
->
second
.
ready
=
true
;
ready_queue
.
push_back
(
&
(
it
->
second
));
}
while
(
!
ready_queue
.
empty
())
{
auto
&
ready_node
=
ready_queue
.
front
();
for
(
auto
&
next_node
:
ready_node
->
next_nodes
)
{
if
(
next_node
->
ready
)
{
continue
;
}
bool
next_node_ready
=
true
;
for
(
auto
&
prev_node
:
next_node
->
prev_nodes
)
{
if
(
!
prev_node
->
ready
)
{
next_node_ready
=
false
;
break
;
}
}
next_node
->
ready
=
next_node_ready
;
if
(
next_node_ready
)
{
ready_queue
.
push_back
(
next_node
);
}
}
ready_queue
.
pop_front
();
}
std
::
set
<
std
::
string
>
outputs
;
for
(
const
auto
&
output
:
config
.
output
())
{
auto
it
=
tensors
.
find
(
output
.
name
());
if
(
it
==
tensors
.
end
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"ensemble output '"
+
output
.
name
()
+
"' for ensemble "
+
config
.
name
()
+
"' is not used"
);
}
if
(
!
it
->
second
.
ready
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"output '"
+
output
.
name
()
+
"' for ensemble '"
+
config
.
name
()
+
"' is not written"
);
}
else
{
outputs
.
insert
(
it
->
first
);
}
}
// Check redundant ensemble tensors
for
(
const
auto
&
tensor
:
tensors
)
{
// skip ensemble outputs as they have been checked and can have no
// next nodes
if
(
outputs
.
find
(
tensor
.
first
)
!=
outputs
.
end
())
{
continue
;
}
if
(
!
tensor
.
second
.
ready
||
(
tensor
.
second
.
next_nodes
.
size
()
==
0
))
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"ensemble tensor '"
+
tensor
.
first
+
"' is unused in ensemble '"
+
config
.
name
()
+
"'"
);
}
}
return
Status
::
Success
;
}
#endif // TRITON_ENABLE_ENSEMBLE
template
<
class
ModelIO
>
Status
ValidateIOShape
(
const
ModelIO
&
io
,
int32_t
max_batch_size
,
const
std
::
string
&
message_prefix
=
""
)
{
if
(
io
.
name
().
empty
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
message_prefix
+
"must specify 'name'"
);
}
if
(
io
.
data_type
()
==
inference
::
DataType
::
TYPE_INVALID
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"model output must specify 'data_type'"
);
}
if
(
io
.
dims_size
()
==
0
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
message_prefix
+
"must specify 'dims'"
);
}
// If the configuration is non-batching, then no input or output
// reshape can be empty as that would mean that input or output was
// always empty (no data).
if
(
io
.
has_reshape
()
&&
(
io
.
reshape
().
shape_size
()
==
0
)
&&
(
max_batch_size
==
0
))
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
message_prefix
+
"cannot have empty reshape for non-batching model as scalar "
"tensors are not supported"
);
}
for
(
auto
dim
:
io
.
dims
())
{
// Dimension cannot be 0.
if
((
dim
<
1
)
&&
(
dim
!=
triton
::
common
::
WILDCARD_DIM
))
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
message_prefix
+
"dimension must be integer >= 1, or "
+
std
::
to_string
(
triton
::
common
::
WILDCARD_DIM
)
+
" to indicate a variable-size dimension"
);
}
}
if
(
io
.
has_reshape
())
{
// Zeros are not allowed in reshape.
for
(
auto
dim
:
io
.
reshape
().
shape
())
{
if
((
dim
<
1
)
&&
(
dim
!=
triton
::
common
::
WILDCARD_DIM
))
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
message_prefix
+
"reshape dimensions must be integer >= 1, or "
+
std
::
to_string
(
triton
::
common
::
WILDCARD_DIM
)
+
" to indicate a variable-size dimension"
);
}
}
const
int64_t
dims_size
=
triton
::
common
::
GetElementCount
(
io
.
dims
());
const
int64_t
reshape_size
=
triton
::
common
::
GetElementCount
(
io
.
reshape
().
shape
());
// dims and reshape must both have same element count
// or both have variable-size dimension.
// Special case for empty reshape... expect dims to have element
// count of 1.
if
((
dims_size
!=
reshape_size
)
&&
((
reshape_size
!=
0
)
||
(
dims_size
!=
1
)))
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
message_prefix
+
"has different size for dims and reshape"
);
}
// shape contains variable-size dimension, in this case we compare if
// each pair of the trunks separated by variable-size dimension has
// the same element count. For instance, from [2, 4, -1, 6] to [8, -1, 1, 6]
// is valid reshape as 2 * 4 = 8 and 6 = 1 * 6.
if
(
dims_size
==
-
1
)
{
std
::
vector
<
int64_t
>
dim_element_cnts
;
std
::
vector
<
int64_t
>
reshape_element_cnts
;
int64_t
current_cnt
=
1
;
for
(
const
auto
&
dim
:
io
.
dims
())
{
if
(
dim
!=
-
1
)
{
current_cnt
*=
dim
;
}
else
{
dim_element_cnts
.
push_back
(
current_cnt
);
current_cnt
=
1
;
}
}
dim_element_cnts
.
push_back
(
current_cnt
);
current_cnt
=
1
;
for
(
const
auto
&
dim
:
io
.
reshape
().
shape
())
{
if
(
dim
!=
-
1
)
{
current_cnt
*=
dim
;
}
else
{
reshape_element_cnts
.
push_back
(
current_cnt
);
current_cnt
=
1
;
}
}
reshape_element_cnts
.
push_back
(
current_cnt
);
if
(
dim_element_cnts
.
size
()
!=
reshape_element_cnts
.
size
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
message_prefix
+
"has different number of variable-size dimensions for dims "
"and reshape"
);
}
for
(
size_t
idx
=
0
;
idx
<
dim_element_cnts
.
size
();
idx
++
)
{
if
(
dim_element_cnts
[
idx
]
!=
reshape_element_cnts
[
idx
])
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
message_prefix
+
"has different size for dims and reshape"
);
}
}
}
}
return
Status
::
Success
;
}
}
// namespace
Status
GetModelVersionFromPath
(
const
std
::
string
&
path
,
int64_t
*
version
)
{
auto
version_dir
=
BaseName
(
path
);
// Determine the version from the last segment of 'path'
try
{
*
version
=
std
::
atoll
(
version_dir
.
c_str
());
}
catch
(...)
{
return
Status
(
Status
::
Code
::
INTERNAL
,
"unable to determine model version from "
+
path
);
}
return
Status
::
Success
;
}
Status
GetBooleanSequenceControlProperties
(
const
inference
::
ModelSequenceBatching
&
batcher
,
const
std
::
string
&
model_name
,
const
inference
::
ModelSequenceBatching
::
Control
::
Kind
control_kind
,
const
bool
required
,
std
::
string
*
tensor_name
,
inference
::
DataType
*
tensor_datatype
,
float
*
fp32_false_value
,
float
*
fp32_true_value
,
int32_t
*
int32_false_value
,
int32_t
*
int32_true_value
,
bool
*
bool_false_value
,
bool
*
bool_true_value
)
{
// Make sure same tensor is not configured for multiple controls
std
::
set
<
std
::
string
>
seen_tensors
;
// Make sure the control kind is not mentioned multiple times.
bool
seen_control
=
false
;
for
(
const
auto
&
control_input
:
batcher
.
control_input
())
{
if
(
control_input
.
name
().
empty
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"sequence batching control tensor must have a name for "
+
model_name
);
}
if
(
seen_tensors
.
find
(
control_input
.
name
())
!=
seen_tensors
.
end
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"sequence batching control tensor '"
+
control_input
.
name
()
+
"' is specified for multiple control kinds for "
+
model_name
);
}
seen_tensors
.
insert
(
control_input
.
name
());
for
(
const
auto
&
c
:
control_input
.
control
())
{
if
(
c
.
kind
()
==
control_kind
)
{
if
(
seen_control
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"sequence batching specifies multiple "
+
inference
::
ModelSequenceBatching_Control_Kind_Name
(
control_kind
)
+
" tensors for "
+
model_name
);
}
*
tensor_name
=
control_input
.
name
();
seen_control
=
true
;
// Make sure only one of int, float, or bool type is specified.
if
(
!
((
c
.
int32_false_true_size
()
!=
0
)
||
(
c
.
fp32_false_true_size
()
!=
0
)
||
(
c
.
bool_false_true_size
()
!=
0
)))
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"sequence batching must specify either 'int32_false_true', "
"'fp32_false_true' or 'bool_false_true' for "
+
inference
::
ModelSequenceBatching_Control_Kind_Name
(
control_kind
)
+
" for "
+
model_name
);
}
else
if
(
((
c
.
int32_false_true_size
()
!=
0
)
&&
(
c
.
fp32_false_true_size
()
!=
0
))
||
((
c
.
int32_false_true_size
()
!=
0
)
&&
(
c
.
bool_false_true_size
()
!=
0
))
||
((
c
.
fp32_false_true_size
()
!=
0
)
&&
(
c
.
bool_false_true_size
()
!=
0
)))
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"sequence batching specifies more than one from "
"'int32_false_true', 'fp32_false_true' and 'bool_false_true' "
"for "
+
inference
::
ModelSequenceBatching_Control_Kind_Name
(
control_kind
)
+
" for "
+
model_name
);
}
if
(
c
.
int32_false_true_size
()
>
0
)
{
if
(
c
.
int32_false_true_size
()
!=
2
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"sequence batching control 'int32_false_true' must have "
"exactly 2 entries for "
+
inference
::
ModelSequenceBatching_Control_Kind_Name
(
control_kind
)
+
" for "
+
model_name
);
}
if
(
tensor_datatype
!=
nullptr
)
{
*
tensor_datatype
=
inference
::
DataType
::
TYPE_INT32
;
}
if
(
int32_false_value
!=
nullptr
)
{
*
int32_false_value
=
c
.
int32_false_true
(
0
);
}
if
(
int32_true_value
!=
nullptr
)
{
*
int32_true_value
=
c
.
int32_false_true
(
1
);
}
}
else
if
(
c
.
fp32_false_true_size
()
>
0
)
{
if
(
c
.
fp32_false_true_size
()
!=
2
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"sequence batching control 'fp32_false_true' must have exactly "
"2 entries for "
+
inference
::
ModelSequenceBatching_Control_Kind_Name
(
control_kind
)
+
" for "
+
model_name
);
}
if
(
tensor_datatype
!=
nullptr
)
{
*
tensor_datatype
=
inference
::
DataType
::
TYPE_FP32
;
}
if
(
fp32_false_value
!=
nullptr
)
{
*
fp32_false_value
=
c
.
fp32_false_true
(
0
);
}
if
(
fp32_true_value
!=
nullptr
)
{
*
fp32_true_value
=
c
.
fp32_false_true
(
1
);
}
}
else
{
if
(
c
.
bool_false_true_size
()
!=
2
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"sequence batching control 'bool_false_true' must have exactly "
"2 entries for "
+
inference
::
ModelSequenceBatching_Control_Kind_Name
(
control_kind
)
+
" for "
+
model_name
);
}
if
(
tensor_datatype
!=
nullptr
)
{
*
tensor_datatype
=
inference
::
DataType
::
TYPE_BOOL
;
}
if
(
bool_false_value
!=
nullptr
)
{
*
bool_false_value
=
c
.
bool_false_true
(
0
);
}
if
(
bool_true_value
!=
nullptr
)
{
*
bool_true_value
=
c
.
bool_false_true
(
1
);
}
}
}
}
}
if
(
!
seen_control
)
{
if
(
required
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"sequence batching control tensor must specify a "
+
inference
::
ModelSequenceBatching_Control_Kind_Name
(
control_kind
)
+
" value for "
+
model_name
);
}
tensor_name
->
clear
();
}
return
Status
::
Success
;
}
Status
GetTypedSequenceControlProperties
(
const
inference
::
ModelSequenceBatching
&
batcher
,
const
std
::
string
&
model_name
,
const
inference
::
ModelSequenceBatching
::
Control
::
Kind
control_kind
,
const
bool
required
,
std
::
string
*
tensor_name
,
inference
::
DataType
*
tensor_datatype
)
{
// Make sure same tensor is not configured for multiple controls
std
::
set
<
std
::
string
>
seen_tensors
;
// Make sure the control kind is not mentioned multiple times.
bool
seen_control
=
false
;
for
(
const
auto
&
control_input
:
batcher
.
control_input
())
{
if
(
control_input
.
name
().
empty
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"sequence batching control tensor must have a name for "
+
model_name
);
}
if
(
seen_tensors
.
find
(
control_input
.
name
())
!=
seen_tensors
.
end
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"sequence batching control tensor '"
+
control_input
.
name
()
+
"' is specified for multiple control kinds for "
+
model_name
);
}
seen_tensors
.
insert
(
control_input
.
name
());
for
(
const
auto
&
c
:
control_input
.
control
())
{
if
(
c
.
kind
()
==
control_kind
)
{
if
(
seen_control
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"sequence batching specifies multiple "
+
inference
::
ModelSequenceBatching_Control_Kind_Name
(
control_kind
)
+
" tensors for "
+
model_name
);
}
*
tensor_name
=
control_input
.
name
();
if
(
tensor_datatype
!=
nullptr
)
{
*
tensor_datatype
=
c
.
data_type
();
}
seen_control
=
true
;
if
((
c
.
int32_false_true_size
()
>
0
)
||
(
c
.
fp32_false_true_size
()
>
0
)
||
(
c
.
bool_false_true_size
()
>
0
))
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"sequence batching must not specify either 'int32_false_true', "
"'fp32_false_true' or 'bool_false_true' for "
+
inference
::
ModelSequenceBatching_Control_Kind_Name
(
control_kind
)
+
" for "
+
model_name
);
}
}
}
}
if
(
!
seen_control
)
{
if
(
required
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"sequence batching control tensor must specify a "
+
inference
::
ModelSequenceBatching_Control_Kind_Name
(
control_kind
)
+
" value for "
+
model_name
);
}
tensor_name
->
clear
();
}
return
Status
::
Success
;
}
Status
GetNormalizedModelConfig
(
const
std
::
string
&
model_name
,
const
std
::
string
&
path
,
const
double
min_compute_capability
,
inference
::
ModelConfig
*
config
)
{
// Server-side autofill only sets certain backend fields for the models that
// belong to limited backends for backwards-compatibility. See TensorRT
// backend, ONNX Runtime backend, OpenVINO backend, TensorFLow backend, and
// PyTorch backend.
// Extracting detailed information is delegated to the backend implementation
// to auto-complete.
RETURN_IF_ERROR
(
AutoCompleteBackendFields
(
model_name
,
std
::
string
(
path
),
config
));
LOG_VERBOSE
(
1
)
<<
"Server side auto-completed config: "
<<
config
->
DebugString
();
RETURN_IF_ERROR
(
NormalizeModelConfig
(
min_compute_capability
,
config
));
return
Status
::
Success
;
}
Status
NormalizeModelConfig
(
const
double
min_compute_capability
,
inference
::
ModelConfig
*
config
)
{
// If version_policy is not specified, default to Latest 1 version.
if
(
!
config
->
has_version_policy
())
{
inference
::
ModelVersionPolicy
::
Latest
latest
;
latest
.
set_num_versions
(
1
);
config
->
mutable_version_policy
()
->
mutable_latest
()
->
CopyFrom
(
latest
);
}
// If dynamic batching is specified...
if
(
config
->
has_dynamic_batching
())
{
// If preferred batch size is not specified set it to
// max-batch-size.
if
(
config
->
dynamic_batching
().
preferred_batch_size
().
size
()
==
0
)
{
auto
mutable_preferred_batch_size
=
config
->
mutable_dynamic_batching
()
->
mutable_preferred_batch_size
();
if
(
config
->
max_batch_size
()
>
0
)
{
mutable_preferred_batch_size
->
Add
(
config
->
max_batch_size
());
}
}
}
// If sequence batching is specified...
if
(
config
->
has_sequence_batching
())
{
// Set default idle is not specified.
if
(
config
->
sequence_batching
().
max_sequence_idle_microseconds
()
==
0
)
{
config
->
mutable_sequence_batching
()
->
set_max_sequence_idle_microseconds
(
SEQUENCE_IDLE_DEFAULT_MICROSECONDS
);
}
if
(
config
->
sequence_batching
().
has_oldest
())
{
// If preferred batch size is not specified set it to
// max-batch-size.
if
(
config
->
sequence_batching
().
oldest
().
preferred_batch_size
().
size
()
==
0
)
{
auto
mutable_preferred_batch_size
=
config
->
mutable_sequence_batching
()
->
mutable_oldest
()
->
mutable_preferred_batch_size
();
if
(
config
->
max_batch_size
()
>
0
)
{
mutable_preferred_batch_size
->
Add
(
config
->
max_batch_size
());
}
}
}
}
// If model ensembling is specified, don't attempt to normalize instance_group
// as it is not allowed in ensemble scheduling
if
(
!
config
->
has_ensemble_scheduling
())
{
auto
optimization
=
config
->
mutable_optimization
();
if
(
!
optimization
->
has_input_pinned_memory
())
{
optimization
->
mutable_input_pinned_memory
()
->
set_enable
(
true
);
}
if
(
!
optimization
->
has_output_pinned_memory
())
{
optimization
->
mutable_output_pinned_memory
()
->
set_enable
(
true
);
}
}
return
Status
::
Success
;
}
Status
NormalizeInstanceGroup
(
const
double
min_compute_capability
,
const
std
::
vector
<
inference
::
ModelInstanceGroup
>&
preferred_groups
,
inference
::
ModelConfig
*
config
)
{
// Instance group setting doesn't apply to ensemble
if
(
config
->
has_ensemble_scheduling
())
{
return
Status
::
Success
;
}
// Creates a set of supported GPU device ids
std
::
set
<
int
>
supported_gpus
;
#ifdef TRITON_ENABLE_GPU
// Get the total number of GPUs from the runtime library.
Status
status
=
GetSupportedGPUs
(
&
supported_gpus
,
min_compute_capability
);
if
(
!
status
.
IsOk
())
{
return
status
;
}
#endif // TRITON_ENABLE_GPU
// Make sure there is at least one instance_group.
if
(
config
->
instance_group
().
empty
())
{
inference
::
ModelInstanceGroup
*
group
=
config
->
add_instance_group
();
group
->
set_name
(
config
->
name
());
for
(
const
auto
&
pg
:
preferred_groups
)
{
group
->
set_kind
(
pg
.
kind
());
group
->
set_count
(
pg
.
count
());
// handle preferred GPU setting differently based on kind
if
(
pg
.
kind
()
==
inference
::
ModelInstanceGroup
::
KIND_GPU
)
{
// Don't use preferred group with KIND_GPU if there is no GPU.
if
(
supported_gpus
.
empty
())
{
continue
;
}
// If preferred group sets GPUs, limit deployment onto those that
// are also listed in supported gpus
if
(
!
pg
.
gpus
().
empty
())
{
for
(
const
int32_t
gid
:
pg
.
gpus
())
{
if
(
supported_gpus
.
find
(
gid
)
!=
supported_gpus
.
end
())
{
group
->
add_gpus
(
gid
);
}
}
}
break
;
}
else
if
(
pg
.
kind
()
==
inference
::
ModelInstanceGroup
::
KIND_AUTO
)
{
// if AUTO, then set preferred GPU as is, to align with KIND_AUTO
// deduction specified below
for
(
const
int32_t
gid
:
pg
.
gpus
())
{
group
->
add_gpus
(
gid
);
}
break
;
}
// Other kind should not set GPUs
break
;
}
}
// Assign default name, kind and count to each instance group that
// doesn't give those values explicitly. For KIND_GPU, set GPUs to
// all available if not specified explicitly.
size_t
cnt
=
0
;
for
(
auto
&
group
:
*
config
->
mutable_instance_group
())
{
// Name
if
(
group
.
name
().
empty
())
{
group
.
set_name
(
config
->
name
()
+
"_"
+
std
::
to_string
(
cnt
));
}
cnt
++
;
// For KIND_AUTO... if there are no GPUs or if any of the listed
// 'gpu's are not present, then use KIND_CPU.
if
(
group
.
kind
()
==
inference
::
ModelInstanceGroup
::
KIND_AUTO
)
{
if
(
supported_gpus
.
empty
())
{
group
.
set_kind
(
inference
::
ModelInstanceGroup
::
KIND_CPU
);
}
else
{
for
(
const
int32_t
gid
:
group
.
gpus
())
{
if
(
supported_gpus
.
find
(
gid
)
==
supported_gpus
.
end
())
{
group
.
set_kind
(
inference
::
ModelInstanceGroup
::
KIND_CPU
);
break
;
}
}
}
if
(
group
.
kind
()
==
inference
::
ModelInstanceGroup
::
KIND_AUTO
)
{
group
.
set_kind
(
inference
::
ModelInstanceGroup
::
KIND_GPU
);
}
}
// KIND is resolved at this point
for
(
const
auto
&
pg
:
preferred_groups
)
{
if
(
group
.
kind
()
!=
pg
.
kind
())
{
continue
;
}
// Limit the GPU setting within what is specified in the preferred group,
// if no available GPU then skip to next preferred group
if
((
group
.
kind
()
==
inference
::
ModelInstanceGroup
::
KIND_GPU
)
&&
group
.
gpus
().
empty
()
&&
!
pg
.
gpus
().
empty
())
{
for
(
const
int32_t
gid
:
pg
.
gpus
())
{
if
(
supported_gpus
.
find
(
gid
)
!=
supported_gpus
.
end
())
{
group
.
add_gpus
(
gid
);
}
}
if
(
group
.
gpus
().
empty
())
{
continue
;
}
}
if
((
group
.
count
()
<
1
)
&&
(
pg
.
count
()
>
0
))
{
group
.
set_count
(
pg
.
count
());
}
}
// Set Triton default if the fields are not set from preferred group
// Count
if
(
group
.
count
()
<
1
)
{
RETURN_IF_ERROR
(
SetDefaultInstanceCount
(
&
group
,
config
->
backend
()));
}
// GPUs
if
((
group
.
kind
()
==
inference
::
ModelInstanceGroup
::
KIND_GPU
)
&&
(
group
.
gpus
().
size
()
==
0
))
{
for
(
auto
d
:
supported_gpus
)
{
group
.
add_gpus
(
d
);
}
}
}
return
Status
::
Success
;
}
Status
LocalizePythonBackendExecutionEnvironmentPath
(
const
std
::
string
&
model_path
,
inference
::
ModelConfig
*
config
,
std
::
shared_ptr
<
LocalizedPath
>*
localized_model_dir
)
{
if
(
config
->
backend
()
==
"python"
)
{
if
(
config
->
parameters
().
contains
(
"EXECUTION_ENV_PATH"
))
{
// Read EXECUTION_ENV_PATH
std
::
string
exec_env_path
=
config
->
parameters
().
at
(
"EXECUTION_ENV_PATH"
).
string_value
();
// Replace model directory variable with model_path
std
::
string
model_dir_var
=
"$$TRITON_MODEL_DIRECTORY"
;
if
(
exec_env_path
.
substr
(
0
,
model_dir_var
.
size
())
==
model_dir_var
)
{
exec_env_path
.
replace
(
0
,
model_dir_var
.
size
(),
model_path
);
}
// Collapse any .. in the path
std
::
string
abs_exec_env_path
;
std
::
size_t
prev_pos
=
exec_env_path
.
size
();
std
::
size_t
pos
=
exec_env_path
.
find_last_of
(
'/'
,
prev_pos
-
1
);
int
skip
=
0
;
while
(
pos
!=
std
::
string
::
npos
&&
prev_pos
>
0
)
{
if
(
!
skip
)
{
abs_exec_env_path
=
exec_env_path
.
substr
(
pos
,
prev_pos
-
pos
)
+
abs_exec_env_path
;
}
skip
=
skip
>
0
?
skip
-
1
:
skip
;
if
(
pos
>=
3
&&
exec_env_path
.
substr
(
pos
-
3
,
3
)
==
"/.."
)
{
skip
+=
2
;
}
prev_pos
=
pos
;
pos
=
exec_env_path
.
find_last_of
(
'/'
,
prev_pos
-
1
);
}
abs_exec_env_path
=
exec_env_path
.
substr
(
0
,
prev_pos
)
+
abs_exec_env_path
;
// Localize iff abs_exec_env_path is outside the model directory
std
::
string
model_path_slash
=
model_path
.
back
()
==
'/'
?
model_path
:
model_path
+
"/"
;
if
(
abs_exec_env_path
.
substr
(
0
,
model_path_slash
.
size
())
!=
model_path_slash
)
{
// Localize the file
std
::
shared_ptr
<
LocalizedPath
>
localized_exec_env_path
;
RETURN_IF_ERROR
(
LocalizePath
(
abs_exec_env_path
,
&
localized_exec_env_path
));
// Persist the localized temporary path
(
*
localized_model_dir
)
->
other_localized_path
.
push_back
(
localized_exec_env_path
);
// Rewrite EXECUTION_ENV_PATH
config
->
mutable_parameters
()
->
at
(
"EXECUTION_ENV_PATH"
)
.
set_string_value
(
localized_exec_env_path
->
Path
());
}
}
}
return
Status
::
Success
;
}
Status
SetDefaultInstanceCount
(
inference
::
ModelInstanceGroup
*
group
,
const
std
::
string
&
backend
)
{
group
->
set_count
(
1
);
// Backends opt into the default_cpu_instance_count since
// some backends (pytorch, OpenVINO) don't perform well/have high overhead
// when using multiple instances.
const
int
default_cpu_instance_count
=
2
;
bool
use_default_cpu_instance_count
=
(
backend
==
kTensorFlowBackend
)
||
(
backend
==
kOnnxRuntimeBackend
);
if
(
group
->
kind
()
==
inference
::
ModelInstanceGroup
::
KIND_CPU
&&
use_default_cpu_instance_count
)
{
group
->
set_count
(
default_cpu_instance_count
);
}
return
Status
::
Success
;
}
Status
AutoCompleteBackendFields
(
const
std
::
string
&
model_name
,
const
std
::
string
&
model_path
,
inference
::
ModelConfig
*
config
)
{
std
::
set
<
std
::
string
>
version_dirs
;
RETURN_IF_ERROR
(
GetDirectorySubdirs
(
model_path
,
&
version_dirs
));
// There must be at least one version directory that we can inspect to
// attempt to determine the platform. If not, we skip autofill with file name.
// For now we allow multiple versions and only inspect the first verison
// directory to ensure it is valid. We can add more aggressive checks later.
const
bool
has_version
=
(
version_dirs
.
size
()
!=
0
);
const
auto
version_path
=
has_version
?
JoinPath
({
model_path
,
*
(
version_dirs
.
begin
())})
:
""
;
std
::
set
<
std
::
string
>
version_dir_content
;
if
(
has_version
)
{
RETURN_IF_ERROR
(
GetDirectoryContents
(
version_path
,
&
version_dir_content
));
}
// If the model name is not given in the configuration, set if based
// on the model path.
if
(
config
->
name
().
empty
())
{
config
->
set_name
(
model_name
);
}
// Trying to fill the 'backend', 'default_model_filename' field.
// TensorFlow
// For TF backend, the platform is required
if
(
config
->
platform
().
empty
())
{
// Check 'backend', 'default_model_filename', and the actual directory
// to determine the platform
if
(
config
->
backend
().
empty
()
||
(
config
->
backend
()
==
kTensorFlowBackend
))
{
if
(
config
->
default_model_filename
()
==
kTensorFlowSavedModelFilename
)
{
config
->
set_platform
(
kTensorFlowSavedModelPlatform
);
}
else
if
(
config
->
default_model_filename
()
==
kTensorFlowGraphDefFilename
)
{
config
->
set_platform
(
kTensorFlowGraphDefPlatform
);
}
else
if
(
config
->
default_model_filename
().
empty
()
&&
has_version
)
{
bool
is_dir
=
false
;
if
(
version_dir_content
.
find
(
kTensorFlowSavedModelFilename
)
!=
version_dir_content
.
end
())
{
RETURN_IF_ERROR
(
IsDirectory
(
JoinPath
({
version_path
,
kTensorFlowSavedModelFilename
}),
&
is_dir
));
if
(
is_dir
)
{
config
->
set_platform
(
kTensorFlowSavedModelPlatform
);
}
}
if
(
version_dir_content
.
find
(
kTensorFlowGraphDefFilename
)
!=
version_dir_content
.
end
())
{
RETURN_IF_ERROR
(
IsDirectory
(
JoinPath
({
version_path
,
kTensorFlowGraphDefFilename
}),
&
is_dir
));
if
(
!
is_dir
)
{
config
->
set_platform
(
kTensorFlowGraphDefPlatform
);
}
}
}
}
}
// Fill 'backend' and 'default_model_filename' if missing
if
((
config
->
platform
()
==
kTensorFlowSavedModelPlatform
)
||
(
config
->
platform
()
==
kTensorFlowGraphDefPlatform
))
{
if
(
config
->
backend
().
empty
())
{
config
->
set_backend
(
kTensorFlowBackend
);
}
if
(
config
->
default_model_filename
().
empty
())
{
if
(
config
->
platform
()
==
kTensorFlowSavedModelPlatform
)
{
config
->
set_default_model_filename
(
kTensorFlowSavedModelFilename
);
}
else
{
config
->
set_default_model_filename
(
kTensorFlowGraphDefFilename
);
}
}
return
Status
::
Success
;
}
// TensorRT
if
(
config
->
backend
().
empty
())
{
if
((
config
->
platform
()
==
kTensorRTPlanPlatform
)
||
(
config
->
default_model_filename
()
==
kTensorRTPlanFilename
))
{
config
->
set_backend
(
kTensorRTBackend
);
}
else
if
(
config
->
platform
().
empty
()
&&
config
->
default_model_filename
().
empty
()
&&
has_version
)
{
bool
is_dir
=
false
;
if
(
version_dir_content
.
find
(
kTensorRTPlanFilename
)
!=
version_dir_content
.
end
())
{
RETURN_IF_ERROR
(
IsDirectory
(
JoinPath
({
version_path
,
kTensorRTPlanFilename
}),
&
is_dir
));
if
(
!
is_dir
)
{
config
->
set_backend
(
kTensorRTBackend
);
}
}
}
}
if
(
config
->
backend
()
==
kTensorRTBackend
)
{
if
(
config
->
platform
().
empty
())
{
config
->
set_platform
(
kTensorRTPlanPlatform
);
}
if
(
config
->
default_model_filename
().
empty
())
{
config
->
set_default_model_filename
(
kTensorRTPlanFilename
);
}
return
Status
::
Success
;
}
// ONNXRuntime
if
(
config
->
backend
().
empty
())
{
if
((
config
->
platform
()
==
kOnnxRuntimeOnnxPlatform
)
||
(
config
->
default_model_filename
()
==
kOnnxRuntimeOnnxFilename
))
{
config
->
set_backend
(
kOnnxRuntimeBackend
);
}
else
if
(
config
->
platform
().
empty
()
&&
config
->
default_model_filename
().
empty
()
&&
has_version
)
{
if
(
version_dir_content
.
find
(
kOnnxRuntimeOnnxFilename
)
!=
version_dir_content
.
end
())
{
// ONNX model can be a file or a directory in the case of large model
config
->
set_backend
(
kOnnxRuntimeBackend
);
}
}
}
if
(
config
->
backend
()
==
kOnnxRuntimeBackend
)
{
if
(
config
->
platform
().
empty
())
{
config
->
set_platform
(
kOnnxRuntimeOnnxPlatform
);
}
if
(
config
->
default_model_filename
().
empty
())
{
config
->
set_default_model_filename
(
kOnnxRuntimeOnnxFilename
);
}
return
Status
::
Success
;
}
// OpenVINO
if
(
config
->
backend
().
empty
())
{
if
(
config
->
default_model_filename
()
==
kOpenVINORuntimeOpenVINOFilename
)
{
config
->
set_backend
(
kOpenVINORuntimeBackend
);
}
else
if
(
config
->
platform
().
empty
()
&&
config
->
default_model_filename
().
empty
()
&&
has_version
)
{
if
(
version_dir_content
.
find
(
kOpenVINORuntimeOpenVINOFilename
)
!=
version_dir_content
.
end
())
{
config
->
set_backend
(
kOpenVINORuntimeBackend
);
}
}
}
if
(
config
->
backend
()
==
kOpenVINORuntimeBackend
)
{
if
(
config
->
default_model_filename
().
empty
())
{
config
->
set_default_model_filename
(
kOpenVINORuntimeOpenVINOFilename
);
}
return
Status
::
Success
;
}
// PyTorch (TorchScript, LibTorch)
if
(
config
->
backend
().
empty
())
{
if
((
config
->
platform
()
==
kPyTorchLibTorchPlatform
)
||
(
config
->
default_model_filename
()
==
kPyTorchLibTorchFilename
))
{
config
->
set_backend
(
kPyTorchBackend
);
}
else
if
(
config
->
platform
().
empty
()
&&
config
->
default_model_filename
().
empty
()
&&
has_version
)
{
bool
is_dir
=
false
;
if
(
version_dir_content
.
find
(
kPyTorchLibTorchFilename
)
!=
version_dir_content
.
end
())
{
RETURN_IF_ERROR
(
IsDirectory
(
JoinPath
({
version_path
,
kPyTorchLibTorchFilename
}),
&
is_dir
));
if
(
!
is_dir
)
{
config
->
set_backend
(
kPyTorchBackend
);
}
}
}
}
if
(
config
->
backend
()
==
kPyTorchBackend
)
{
if
(
config
->
platform
().
empty
())
{
config
->
set_platform
(
kPyTorchLibTorchPlatform
);
}
if
(
config
->
default_model_filename
().
empty
())
{
config
->
set_default_model_filename
(
kPyTorchLibTorchFilename
);
}
return
Status
::
Success
;
}
// Python
if
(
config
->
backend
().
empty
())
{
if
(
config
->
default_model_filename
()
==
kPythonFilename
)
{
config
->
set_backend
(
kPythonBackend
);
}
else
if
(
config
->
platform
().
empty
()
&&
config
->
default_model_filename
().
empty
()
&&
has_version
)
{
if
(
version_dir_content
.
find
(
kPythonFilename
)
!=
version_dir_content
.
end
())
{
config
->
set_backend
(
kPythonBackend
);
}
}
}
if
(
config
->
backend
()
==
kPythonBackend
)
{
if
(
config
->
default_model_filename
().
empty
())
{
config
->
set_default_model_filename
(
kPythonFilename
);
}
return
Status
::
Success
;
}
// Custom Backend
// For now, only do the narrowest case, where no info is given in the config.
if
(
config
->
backend
().
empty
()
&&
config
->
platform
().
empty
()
&&
config
->
default_model_filename
().
empty
())
{
LOG_VERBOSE
(
1
)
<<
"Could not infer supported backend, so attempting "
"autofill of custom backend."
;
// Since we lazily load the backends, we let the model tell us what backend
// to load. We must assume that if the model name conforms to the required
// shape, we parse the backend name out of the model file name. i.e.
// model.identity will set the backend to "identity".
const
std
::
string
delimiter
=
"."
;
size_t
pos
=
model_name
.
find
(
delimiter
,
0
);
if
(
pos
==
std
::
string
::
npos
)
{
return
Status
(
triton
::
common
::
Error
::
Code
::
INVALID_ARG
,
(
"Invalid model name: Could not determine backend for model '"
+
model_name
+
"' with no backend in model configuration. Expected model name of "
"the form 'model.<backend_name>'."
));
}
const
std
::
string
backend_name
=
model_name
.
substr
(
pos
+
1
,
std
::
string
::
npos
);
config
->
set_backend
(
backend_name
);
config
->
set_default_model_filename
(
(
std
::
string
(
"model."
)
+
backend_name
).
c_str
());
return
Status
::
Success
;
}
return
Status
::
Success
;
}
Status
ValidateModelIOConfig
(
const
inference
::
ModelConfig
&
config
)
{
Status
status
;
for
(
const
auto
&
io
:
config
.
input
())
{
status
=
ValidateModelInput
(
io
,
config
.
max_batch_size
(),
config
.
platform
());
if
(
!
status
.
IsOk
())
{
return
Status
(
status
.
StatusCode
(),
status
.
Message
()
+
" for "
+
config
.
name
());
}
}
for
(
const
auto
&
io
:
config
.
output
())
{
status
=
ValidateModelOutput
(
io
,
config
.
max_batch_size
(),
config
.
platform
());
if
(
!
status
.
IsOk
())
{
return
Status
(
status
.
StatusCode
(),
status
.
Message
()
+
" for "
+
config
.
name
());
}
}
status
=
ValidateBatchIO
(
config
);
if
(
!
status
.
IsOk
())
{
return
Status
(
status
.
StatusCode
(),
status
.
Message
()
+
" for "
+
config
.
name
());
}
return
Status
::
Success
;
}
Status
ValidateBatchIO
(
const
inference
::
ModelConfig
&
config
)
{
std
::
set
<
std
::
string
>
input_names
;
std
::
set
<
std
::
string
>
output_names
;
for
(
const
auto
&
io
:
config
.
input
())
{
input_names
.
emplace
(
io
.
name
());
}
for
(
const
auto
&
io
:
config
.
output
())
{
output_names
.
emplace
(
io
.
name
());
}
for
(
const
auto
&
batch_io
:
config
.
batch_input
())
{
switch
(
batch_io
.
kind
())
{
case
inference
::
BatchInput
::
BATCH_ELEMENT_COUNT
:
case
inference
::
BatchInput
::
BATCH_ACCUMULATED_ELEMENT_COUNT
:
case
inference
::
BatchInput
::
BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO
:
case
inference
::
BatchInput
::
BATCH_MAX_ELEMENT_COUNT_AS_SHAPE
:
case
inference
::
BatchInput
::
BATCH_ITEM_SHAPE
:
case
inference
::
BatchInput
::
BATCH_ITEM_SHAPE_FLATTEN
:
{
if
(
batch_io
.
source_input_size
()
!=
1
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"batch input kind '"
+
inference
::
BatchInput
::
Kind_Name
(
batch_io
.
kind
())
+
"' expects 1 source input, got "
+
std
::
to_string
(
batch_io
.
source_input_size
()));
}
break
;
}
default:
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"unknown batch input kind '"
+
inference
::
BatchInput
::
Kind_Name
(
batch_io
.
kind
())
+
"'"
);
}
if
((
batch_io
.
data_type
()
!=
inference
::
DataType
::
TYPE_INT32
)
&&
(
batch_io
.
data_type
()
!=
inference
::
DataType
::
TYPE_FP32
))
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"batch input data type must be TYPE_INT32 or TYPE_FP32"
);
}
for
(
const
auto
&
source_name
:
batch_io
.
source_input
())
{
if
(
input_names
.
find
(
source_name
)
==
input_names
.
end
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"unknown source input name '"
+
source_name
+
"'"
);
}
}
}
for
(
const
auto
&
batch_io
:
config
.
batch_output
())
{
switch
(
batch_io
.
kind
())
{
case
inference
::
BatchOutput
::
BATCH_SCATTER_WITH_INPUT_SHAPE
:
{
if
(
batch_io
.
source_input_size
()
!=
1
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"batch output kind '"
+
inference
::
BatchOutput
::
Kind_Name
(
batch_io
.
kind
())
+
"' expects 1 source input, got "
+
std
::
to_string
(
batch_io
.
source_input_size
()));
}
break
;
}
default:
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"unknown batch output kind '"
+
inference
::
BatchOutput
::
Kind_Name
(
batch_io
.
kind
())
+
"'"
);
}
for
(
const
auto
&
source_name
:
batch_io
.
source_input
())
{
if
(
input_names
.
find
(
source_name
)
==
input_names
.
end
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"unknown source input name '"
+
source_name
+
"'"
);
}
}
std
::
set
<
std
::
string
>
target_names
;
for
(
const
auto
&
target_name
:
batch_io
.
target_name
())
{
if
(
output_names
.
find
(
target_name
)
==
output_names
.
end
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"unknown target output name '"
+
target_name
+
"'"
);
}
if
(
target_names
.
emplace
(
target_name
).
second
==
false
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"target output name '"
+
target_name
+
"' can only be specified once"
);
}
}
}
return
Status
::
Success
;
}
Status
ValidateModelConfig
(
const
inference
::
ModelConfig
&
config
,
const
double
min_compute_capability
)
{
if
(
config
.
name
().
empty
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"model configuration must specify 'name'"
);
}
if
(
config
.
backend
().
empty
())
{
// Expect backend is not empty unless it is ensemble platform.
#ifdef TRITON_ENABLE_ENSEMBLE
if
(
config
.
platform
()
!=
kEnsemblePlatform
)
#endif // TRITON_ENABLE_ENSEMBLE
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"unexpected platform type '"
+
config
.
platform
()
+
"' for "
+
config
.
name
());
}
#ifdef TRITON_ENABLE_ENSEMBLE
else
if
(
config
.
platform
()
==
kEnsemblePlatform
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"Ensemble model '"
+
config
.
name
()
+
"' must have platform type '"
+
config
.
platform
()
+
"' and empty backend type"
);
}
#endif // TRITON_ENABLE_ENSEMBLE
if
(
config
.
platform
().
empty
()
&&
config
.
backend
().
empty
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"must specify 'platform' or 'backend' for '"
+
config
.
name
()
+
"'"
);
}
// Ensure both platform and backend are referring to known backend,
// or both referring to unknown backend for user-provided backend.
if
(
GetBackendTypeFromPlatform
(
config
.
platform
())
!=
GetBackendType
(
config
.
backend
()))
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"unexpected 'platform' and 'backend' pair, got:"
+
config
.
platform
()
+
", "
+
config
.
backend
());
}
if
(
config
.
max_batch_size
()
<
0
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"'max_batch_size' must be non-negative value for "
+
config
.
name
());
}
if
(
!
config
.
has_version_policy
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"must specify 'version policy' for "
+
config
.
name
());
}
// If dynamic batching is specified make sure the preferred batch
// sizes are positive and don't exceed maximum batch size.
if
(
config
.
has_dynamic_batching
())
{
for
(
const
auto
size
:
config
.
dynamic_batching
().
preferred_batch_size
())
{
if
(
size
<=
0
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"dynamic batching preferred size must be positive for "
+
config
.
name
());
}
if
(
size
>
config
.
max_batch_size
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"dynamic batching preferred size must be <= max batch size for "
+
config
.
name
());
}
}
// Priority queue is specified
const
auto
priority_levels
=
config
.
dynamic_batching
().
priority_levels
();
if
(
priority_levels
!=
0
)
{
if
((
config
.
dynamic_batching
().
default_priority_level
()
==
0
)
||
(
config
.
dynamic_batching
().
default_priority_level
()
>
priority_levels
))
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"default priority level must be in range [1, "
+
std
::
to_string
(
priority_levels
)
+
"] for "
+
config
.
name
());
}
for
(
const
auto
&
queue_policy
:
config
.
dynamic_batching
().
priority_queue_policy
())
{
if
((
queue_policy
.
first
==
0
)
||
(
queue_policy
.
first
>
priority_levels
))
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"priority queue policy must have priority level in range [1, "
+
std
::
to_string
(
priority_levels
)
+
"] for "
+
config
.
name
());
}
}
}
// preserve ordering option will conflict with priorities and delay policy
if
(
config
.
dynamic_batching
().
preserve_ordering
())
{
if
(
priority_levels
>
1
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"Only one priority level is allowed when 'preserve_ordering' is "
"true for "
+
config
.
name
());
}
const
auto
&
default_policy
=
config
.
dynamic_batching
().
default_queue_policy
();
if
((
default_policy
.
default_timeout_microseconds
()
!=
0
)
&&
(
default_policy
.
timeout_action
()
==
inference
::
ModelQueuePolicy
::
DELAY
))
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"Queue policy can not have DELAY as timeout action when "
"'preserve_ordering' is true for "
+
config
.
name
());
}
// Also need to check policy in 'priority_queue_policy'
// for single priority case
for
(
const
auto
&
policy
:
config
.
dynamic_batching
().
priority_queue_policy
())
{
if
((
policy
.
second
.
default_timeout_microseconds
()
!=
0
)
&&
(
policy
.
second
.
timeout_action
()
==
inference
::
ModelQueuePolicy
::
DELAY
))
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"Queue policy can not have DELAY as timeout action when "
"'preserve_ordering' is true for "
+
config
.
name
());
}
}
}
}
// If sequence batching is specified make sure the control is
// specified correctly.
if
(
config
.
has_sequence_batching
())
{
const
auto
&
batcher
=
config
.
sequence_batching
();
// Check boolean controls...
std
::
string
tensor_name
;
RETURN_IF_ERROR
(
GetBooleanSequenceControlProperties
(
batcher
,
config
.
name
(),
inference
::
ModelSequenceBatching
::
Control
::
CONTROL_SEQUENCE_START
,
false
/* required */
,
&
tensor_name
,
nullptr
,
nullptr
,
nullptr
,
nullptr
,
nullptr
,
nullptr
,
nullptr
));
RETURN_IF_ERROR
(
GetBooleanSequenceControlProperties
(
batcher
,
config
.
name
(),
inference
::
ModelSequenceBatching
::
Control
::
CONTROL_SEQUENCE_END
,
false
/* required */
,
&
tensor_name
,
nullptr
,
nullptr
,
nullptr
,
nullptr
,
nullptr
,
nullptr
,
nullptr
));
RETURN_IF_ERROR
(
GetBooleanSequenceControlProperties
(
batcher
,
config
.
name
(),
inference
::
ModelSequenceBatching
::
Control
::
CONTROL_SEQUENCE_READY
,
false
/* required */
,
&
tensor_name
,
nullptr
,
nullptr
,
nullptr
,
nullptr
,
nullptr
,
nullptr
,
nullptr
));
// Check CORRID control and make sure it is one of the allowed types.
inference
::
DataType
tensor_datatype
;
RETURN_IF_ERROR
(
GetTypedSequenceControlProperties
(
batcher
,
config
.
name
(),
inference
::
ModelSequenceBatching
::
Control
::
CONTROL_SEQUENCE_CORRID
,
false
/* required */
,
&
tensor_name
,
&
tensor_datatype
));
if
(
!
tensor_name
.
empty
())
{
if
((
tensor_datatype
!=
inference
::
DataType
::
TYPE_UINT64
)
&&
(
tensor_datatype
!=
inference
::
DataType
::
TYPE_INT64
)
&&
(
tensor_datatype
!=
inference
::
DataType
::
TYPE_UINT32
)
&&
(
tensor_datatype
!=
inference
::
DataType
::
TYPE_INT32
)
&&
(
tensor_datatype
!=
inference
::
DataType
::
TYPE_STRING
))
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"unexpected data type for control "
+
inference
::
ModelSequenceBatching_Control_Kind_Name
(
inference
::
ModelSequenceBatching
::
Control
::
CONTROL_SEQUENCE_CORRID
)
+
" for "
+
config
.
name
()
+
". Allowed data types are TYPE_UINT64, TYPE_INT64, "
"TYPE_UINT32, "
"TYPE_INT32 and TYPE_STRING"
);
}
}
// If oldest-first strategy is enabled make sure the preferred
// batch sizes are positive and don't exceed maximum batch size.
if
(
config
.
sequence_batching
().
has_oldest
())
{
for
(
const
auto
size
:
config
.
sequence_batching
().
oldest
().
preferred_batch_size
())
{
if
(
size
<=
0
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"sequence batching preferred batch size must be positive for "
+
config
.
name
());
}
if
(
size
>
config
.
max_batch_size
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"sequence batching preferred batch size must be <= max batch "
"size for "
+
config
.
name
());
}
}
}
// If direct strategy is enabled make sure the minimum slot utilization is
// in range (0.0, 1.0]
if
(
config
.
sequence_batching
().
has_direct
())
{
if
((
config
.
sequence_batching
().
direct
().
minimum_slot_utilization
()
<
0.0
)
||
(
config
.
sequence_batching
().
direct
().
minimum_slot_utilization
()
>
1.0
))
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"sequence batching minimum slot utilization must be in range "
"(0.0, 1.0] for "
+
config
.
name
());
}
}
}
// If ensemble scheduling is specified, validate it. Otherwise,
// must validate platform and instance_group
if
(
config
.
has_ensemble_scheduling
())
{
#ifdef TRITON_ENABLE_ENSEMBLE
RETURN_IF_ERROR
(
ValidateEnsembleSchedulingConfig
(
config
));
#else
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"ensemble scheduling not supported"
);
#endif // TRITON_ENABLE_ENSEMBLE
}
#ifdef TRITON_ENABLE_ENSEMBLE
else
if
(
config
.
platform
()
==
kEnsemblePlatform
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"ensemble scheduling must be set for ensemble "
+
config
.
name
()
+
" whose platform is "
+
kEnsemblePlatform
);
}
#endif // TRITON_ENABLE_ENSEMBLE
// FIXME: DLIS-3916 - Response Cache does not yet support decoupled models
if
(
config
.
model_transaction_policy
().
decoupled
()
&&
config
.
response_cache
().
enable
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"Response Cache does not currently support model "
+
config
.
name
()
+
" with 'decoupled' transaction policy. Please disable the response"
" cache."
);
}
return
Status
::
Success
;
}
Status
ValidateInstanceGroup
(
const
inference
::
ModelConfig
&
config
,
const
double
min_compute_capability
)
{
// Instance group setting doesn't apply to ensemble
if
(
config
.
has_ensemble_scheduling
())
{
return
Status
::
Success
;
}
if
(
config
.
instance_group
().
size
()
==
0
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"must specify one or more 'instance group's for "
+
config
.
name
());
}
// Make sure KIND_GPU instance group specifies at least one GPU and
// doesn't specify a non-existent GPU. Make sure non-KIND_GPU does
// not specify any GPUs.
#ifdef TRITON_ENABLE_GPU
std
::
set
<
int
>
supported_gpus
;
Status
status
=
GetSupportedGPUs
(
&
supported_gpus
,
min_compute_capability
);
if
(
!
status
.
IsOk
())
{
return
status
;
}
#endif // TRITON_ENABLE_GPU
for
(
const
auto
&
group
:
config
.
instance_group
())
{
if
(
group
.
kind
()
==
inference
::
ModelInstanceGroup
::
KIND_MODEL
)
{
if
(
group
.
gpus
().
size
()
>
0
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"instance group "
+
group
.
name
()
+
" of model "
+
config
.
name
()
+
" has kind KIND_MODEL but specifies one or more GPUs"
);
}
}
else
if
(
group
.
kind
()
==
inference
::
ModelInstanceGroup
::
KIND_GPU
)
{
#if !defined(TRITON_ENABLE_GPU) && !defined(TRITON_ENABLE_MALI_GPU)
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"instance group "
+
group
.
name
()
+
" of model "
+
config
.
name
()
+
" has kind KIND_GPU but server does not support GPUs"
);
#elif defined(TRITON_ENABLE_GPU)
if
(
group
.
gpus
().
size
()
==
0
)
{
if
(
supported_gpus
.
size
()
==
0
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"instance group "
+
group
.
name
()
+
" of model "
+
config
.
name
()
+
" has kind KIND_GPU but no GPUs are available"
);
}
else
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"instance group "
+
group
.
name
()
+
" of model "
+
config
.
name
()
+
" has kind KIND_GPU but specifies no GPUs"
);
}
}
for
(
const
int32_t
gid
:
group
.
gpus
())
{
if
(
supported_gpus
.
find
(
gid
)
==
supported_gpus
.
end
())
{
std
::
string
supported_gpus_str
;
for
(
const
auto
&
cc
:
supported_gpus
)
{
if
(
!
supported_gpus_str
.
empty
())
{
supported_gpus_str
+=
", "
;
}
supported_gpus_str
+=
std
::
to_string
(
cc
);
}
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"instance group "
+
group
.
name
()
+
" of model "
+
config
.
name
()
+
" specifies invalid or unsupported gpu id "
+
std
::
to_string
(
gid
)
+
". GPUs with at least the minimum required CUDA compute "
"compatibility of "
+
std
::
to_string
(
min_compute_capability
)
+
" are: "
+
supported_gpus_str
);
}
}
#endif // ! TRITON_ENABLE_GPU && ! TRITON_ENABLE_MALI_GPU
}
else
if
(
group
.
kind
()
==
inference
::
ModelInstanceGroup
::
KIND_CPU
)
{
if
(
group
.
gpus
().
size
()
>
0
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"instance group "
+
group
.
name
()
+
" of model "
+
config
.
name
()
+
" has kind KIND_CPU but specifies one or more GPUs"
);
}
}
else
{
return
Status
(
Status
::
Code
::
INTERNAL
,
"instance group "
+
group
.
name
()
+
" of model "
+
config
.
name
()
+
" has unexpected kind KIND_AUTO"
);
}
if
((
config
.
platform
()
!=
kTensorRTPlanPlatform
)
&&
!
group
.
profile
().
empty
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"instance group "
+
group
.
name
()
+
" of model "
+
config
.
name
()
+
" and platform "
+
config
.
platform
()
+
"specifies profile field which is only supported for "
"TensorRT models"
);
}
else
if
(
!
group
.
profile
().
empty
())
{
for
(
const
auto
&
profile
:
group
.
profile
())
{
int
profile_index
;
RETURN_IF_ERROR
(
GetProfileIndex
(
profile
,
&
profile_index
));
if
(
profile_index
<
0
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"instance group "
+
group
.
name
()
+
" of model "
+
config
.
name
()
+
" and platform "
+
config
.
platform
()
+
" specifies invalid profile "
+
profile
+
". The field should contain the string representation of a "
"non-negative integer."
);
}
}
}
}
return
Status
::
Success
;
}
Status
ValidateModelInput
(
const
inference
::
ModelInput
&
io
,
int32_t
max_batch_size
,
const
std
::
string
&
platform
)
{
RETURN_IF_ERROR
(
ValidateIOShape
(
io
,
max_batch_size
,
"model input "
));
if
(((
io
.
format
()
==
inference
::
ModelInput
::
FORMAT_NHWC
)
||
(
io
.
format
()
==
inference
::
ModelInput
::
FORMAT_NCHW
))
&&
(
io
.
dims_size
()
!=
3
))
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"model input NHWC/NCHW require 3 dims"
);
}
if
((
platform
!=
kTensorRTPlanPlatform
)
&&
io
.
is_shape_tensor
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"shape tensors are only supported for TensorRT platform"
);
}
return
Status
::
Success
;
}
Status
CheckAllowedModelInput
(
const
inference
::
ModelInput
&
io
,
const
std
::
set
<
std
::
string
>&
allowed
)
{
if
(
allowed
.
find
(
io
.
name
())
==
allowed
.
end
())
{
std
::
string
astr
;
for
(
const
auto
&
a
:
allowed
)
{
if
(
!
astr
.
empty
())
{
astr
.
append
(
", "
);
}
astr
.
append
(
a
);
}
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"unexpected inference input '"
+
io
.
name
()
+
"', allowed inputs are: "
+
astr
);
}
return
Status
::
Success
;
}
Status
ValidateModelOutput
(
const
inference
::
ModelOutput
&
io
,
int32_t
max_batch_size
,
const
std
::
string
&
platform
)
{
RETURN_IF_ERROR
(
ValidateIOShape
(
io
,
max_batch_size
,
"model output "
));
if
((
platform
!=
kTensorRTPlanPlatform
)
&&
io
.
is_shape_tensor
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"shape tensors are only supported for TensorRT platform"
);
}
return
Status
::
Success
;
}
Status
CheckAllowedModelOutput
(
const
inference
::
ModelOutput
&
io
,
const
std
::
set
<
std
::
string
>&
allowed
)
{
if
(
allowed
.
find
(
io
.
name
())
==
allowed
.
end
())
{
std
::
string
astr
;
for
(
const
auto
&
a
:
allowed
)
{
if
(
!
astr
.
empty
())
{
astr
.
append
(
", "
);
}
astr
.
append
(
a
);
}
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"unexpected inference output '"
+
io
.
name
()
+
"', allowed outputs are: "
+
astr
);
}
return
Status
::
Success
;
}
Status
ParseBoolParameter
(
const
std
::
string
&
key
,
std
::
string
value
,
bool
*
parsed_value
)
{
std
::
transform
(
value
.
begin
(),
value
.
end
(),
value
.
begin
(),
[](
unsigned
char
c
)
{
return
std
::
tolower
(
c
);
});
if
((
value
==
"true"
)
||
(
value
==
"1"
))
{
*
parsed_value
=
true
;
}
else
if
((
value
==
"false"
)
||
(
value
==
"0"
))
{
*
parsed_value
=
false
;
}
else
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"failed to convert "
+
key
+
" '"
+
value
+
"' to boolean value"
);
}
return
Status
::
Success
;
}
Status
ParseLongLongParameter
(
const
std
::
string
&
key
,
const
std
::
string
&
value
,
int64_t
*
parsed_value
)
{
try
{
*
parsed_value
=
std
::
stoll
(
value
);
}
catch
(
const
std
::
invalid_argument
&
ia
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"failed to convert "
+
key
+
" '"
+
value
+
"' to integral number"
);
}
return
Status
::
Success
;
}
Status
GetProfileIndex
(
const
std
::
string
&
profile_name
,
int
*
profile_index
)
{
if
(
profile_name
.
empty
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"profile name must not be empty"
);
}
try
{
*
profile_index
=
stoi
(
profile_name
);
}
catch
(
const
std
::
invalid_argument
&
ia
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"unable to parse '"
+
profile_name
+
"': "
+
ia
.
what
());
}
return
Status
::
Success
;
}
namespace
{
Status
CollectInt64Fields
(
google
::
protobuf
::
Message
*
message
,
const
std
::
string
&
prefix
,
std
::
set
<
std
::
string
>*
int64_fields
)
{
const
google
::
protobuf
::
Descriptor
*
desc
=
message
->
GetDescriptor
();
const
google
::
protobuf
::
Reflection
*
refl
=
message
->
GetReflection
();
for
(
int
i
=
0
;
i
<
desc
->
field_count
();
++
i
)
{
const
google
::
protobuf
::
FieldDescriptor
*
field
=
desc
->
field
(
i
);
const
std
::
string
fullname
=
prefix
+
"::"
+
field
->
name
();
switch
(
field
->
type
())
{
case
google
::
protobuf
::
FieldDescriptor
::
TYPE_MESSAGE
:
{
if
(
field
->
is_repeated
())
{
int
rsize
=
refl
->
FieldSize
(
*
message
,
field
);
if
(
rsize
==
0
)
{
refl
->
AddMessage
(
message
,
field
);
}
rsize
=
refl
->
FieldSize
(
*
message
,
field
);
for
(
int
r
=
0
;
r
<
rsize
;
++
r
)
{
RETURN_IF_ERROR
(
CollectInt64Fields
(
refl
->
MutableRepeatedMessage
(
message
,
field
,
r
),
fullname
,
int64_fields
));
}
}
else
{
RETURN_IF_ERROR
(
CollectInt64Fields
(
refl
->
MutableMessage
(
message
,
field
),
fullname
,
int64_fields
));
}
}
break
;
case
google
::
protobuf
::
FieldDescriptor
::
TYPE_INT64
:
case
google
::
protobuf
::
FieldDescriptor
::
TYPE_UINT64
:
case
google
::
protobuf
::
FieldDescriptor
::
TYPE_SINT64
:
case
google
::
protobuf
::
FieldDescriptor
::
TYPE_FIXED64
:
case
google
::
protobuf
::
FieldDescriptor
::
TYPE_SFIXED64
:
int64_fields
->
insert
(
fullname
);
break
;
default:
break
;
}
}
return
Status
::
Success
;
}
Status
ValidateModelConfigInt64
()
{
// Must initialize a dummy ModelConfig so that all fields are
// visited.
inference
::
ModelConfig
config
;
std
::
set
<
std
::
string
>
int64_fields
;
RETURN_IF_ERROR
(
CollectInt64Fields
(
&
config
,
"ModelConfig"
,
&
int64_fields
));
LOG_VERBOSE
(
1
)
<<
"ModelConfig 64-bit fields:"
;
for
(
const
auto
&
f
:
int64_fields
)
{
LOG_VERBOSE
(
1
)
<<
"
\t
"
<<
f
;
}
// We expect to find exactly the following fields. If we get an
// error from this code ModelConfig has added or removed a 64-bit
// field and we need to adjust here and in ModelConfigToJson below.
std
::
set
<
std
::
string
>
expected
{
"ModelConfig::input::dims"
,
"ModelConfig::input::reshape::shape"
,
"ModelConfig::output::dims"
,
"ModelConfig::output::reshape::shape"
,
"ModelConfig::version_policy::specific::versions"
,
"ModelConfig::dynamic_batching::max_queue_delay_microseconds"
,
"ModelConfig::dynamic_batching::default_queue_policy::default_timeout_"
"microseconds"
,
"ModelConfig::dynamic_batching::priority_queue_policy::value::default_"
"timeout_microseconds"
,
"ModelConfig::sequence_batching::direct::max_queue_delay_microseconds"
,
"ModelConfig::sequence_batching::state::dims"
,
"ModelConfig::sequence_batching::state::initial_state::dims"
,
"ModelConfig::sequence_batching::oldest::max_queue_delay_microseconds"
,
"ModelConfig::sequence_batching::max_sequence_idle_microseconds"
,
"ModelConfig::ensemble_scheduling::step::model_version"
,
"ModelConfig::model_warmup::inputs::value::dims"
,
"ModelConfig::optimization::cuda::graph_spec::input::value::dim"
,
"ModelConfig::optimization::cuda::graph_spec::graph_lower_bound::input::"
"value::dim"
,
"ModelConfig::instance_group::secondary_devices::device_id"
};
if
(
int64_fields
!=
expected
)
{
return
Status
(
Status
::
Code
::
INTERNAL
,
"ModelConfig 64-bit field needs update"
);
}
return
Status
::
Success
;
}
Status
FixInt
(
triton
::
common
::
TritonJson
::
Value
&
document
,
triton
::
common
::
TritonJson
::
Value
&
io
,
const
std
::
string
&
name
)
{
triton
::
common
::
TritonJson
::
Value
str_value
;
if
(
!
io
.
Find
(
name
.
c_str
(),
&
str_value
))
{
return
Status
::
Success
;
}
std
::
string
str
;
RETURN_IF_ERROR
(
str_value
.
AsString
(
&
str
));
int64_t
d
;
try
{
d
=
std
::
atoll
(
str
.
c_str
());
}
catch
(...)
{
return
Status
(
Status
::
Code
::
INTERNAL
,
(
std
::
string
(
"unable to convert '"
)
+
str
+
"' to integer"
));
}
str_value
.
SetInt
(
d
);
return
Status
::
Success
;
}
Status
FixIntArray
(
triton
::
common
::
TritonJson
::
Value
&
document
,
triton
::
common
::
TritonJson
::
Value
&
io
,
const
std
::
string
&
name
)
{
triton
::
common
::
TritonJson
::
Value
fixed_shape_array
(
document
,
triton
::
common
::
TritonJson
::
ValueType
::
ARRAY
);
if
(
!
io
.
Find
(
name
.
c_str
()))
{
return
Status
::
Success
;
}
triton
::
common
::
TritonJson
::
Value
shape_array
;
RETURN_IF_ERROR
(
io
.
MemberAsArray
(
name
.
c_str
(),
&
shape_array
));
for
(
size_t
i
=
0
;
i
<
shape_array
.
ArraySize
();
++
i
)
{
std
::
string
str
;
RETURN_IF_ERROR
(
shape_array
.
IndexAsString
(
i
,
&
str
));
int64_t
d
;
try
{
d
=
std
::
atoll
(
str
.
c_str
());
}
catch
(...)
{
return
Status
(
Status
::
Code
::
INTERNAL
,
(
std
::
string
(
"unable to convert '"
)
+
str
+
"' to integer"
));
}
RETURN_IF_ERROR
(
fixed_shape_array
.
AppendInt
(
d
));
}
shape_array
.
Swap
(
fixed_shape_array
);
fixed_shape_array
.
Release
();
return
Status
::
Success
;
}
Status
FixObjectArray
(
triton
::
common
::
TritonJson
::
Value
&
document
,
triton
::
common
::
TritonJson
::
Value
&
arr
,
const
std
::
string
&
name
)
{
for
(
size_t
i
=
0
;
i
<
arr
.
ArraySize
();
++
i
)
{
triton
::
common
::
TritonJson
::
Value
obj
;
RETURN_IF_ERROR
(
arr
.
IndexAsObject
(
i
,
&
obj
));
RETURN_IF_ERROR
(
FixInt
(
document
,
obj
,
name
));
}
return
Status
::
Success
;
}
}
// namespace
Status
ModelConfigToJson
(
const
inference
::
ModelConfig
&
config
,
const
uint32_t
config_version
,
std
::
string
*
json_str
)
{
// Currently only support 'config_version' 1, which is the json
// representation of the ModelConfig protobuf with the int64 fields
// fixes to be actual numbers instead of the string madness done by
// protobuf.
if
(
config_version
!=
1
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
std
::
string
(
"model configuration version "
)
+
std
::
to_string
(
config_version
)
+
" not supported, supported versions are: 1"
);
}
// Config will have 0 byte size if all fields are with default value,
// in other word the config is empty.
if
(
config
.
ByteSizeLong
()
==
0
)
{
json_str
->
clear
();
return
Status
::
Success
;
}
std
::
string
config_json_str
;
::
google
::
protobuf
::
util
::
JsonPrintOptions
options
;
options
.
preserve_proto_field_names
=
true
;
options
.
always_print_primitive_fields
=
true
;
::
google
::
protobuf
::
util
::
MessageToJsonString
(
config
,
&
config_json_str
,
options
);
// We need to verify that every field 64-bit field in the
// ModelConfig protobuf is being handled. We hardcode the known
// fields and check just once to make sure everything has been
// handled. We could have this check in a separately compiled CI
// test but it is convenient to keep it here close to the code below
// that actually fixes the 64-bit fields.
{
static
std
::
once_flag
fonce
;
Status
status
=
Status
::
Success
;
std
::
call_once
(
fonce
,
[
&
status
]
{
status
=
ValidateModelConfigInt64
();
});
RETURN_IF_ERROR
(
status
);
}
// In the json produced by protobuf, int64 and uint64 values are
// represented as strings. Protobuf doesn't provide an option to
// disable this (sigh) so we need to fix it up here as we want the
// json representation of the config to be reasonable json...
triton
::
common
::
TritonJson
::
Value
config_json
;
config_json
.
Parse
(
config_json_str
);
// Fix input::dims, input::reshape::shape, output::dims,
// output::reshape::shape
for
(
std
::
string
name
:
{
"input"
,
"output"
})
{
triton
::
common
::
TritonJson
::
Value
ios
;
RETURN_IF_ERROR
(
config_json
.
MemberAsArray
(
name
.
c_str
(),
&
ios
));
for
(
size_t
i
=
0
;
i
<
ios
.
ArraySize
();
++
i
)
{
triton
::
common
::
TritonJson
::
Value
io
;
RETURN_IF_ERROR
(
ios
.
IndexAsObject
(
i
,
&
io
));
RETURN_IF_ERROR
(
FixIntArray
(
config_json
,
io
,
"dims"
));
triton
::
common
::
TritonJson
::
Value
reshape
;
if
(
io
.
Find
(
"reshape"
,
&
reshape
))
{
RETURN_IF_ERROR
(
FixIntArray
(
config_json
,
reshape
,
"shape"
));
}
}
}
// Fix version_policy::specific::versions
{
triton
::
common
::
TritonJson
::
Value
vp
;
if
(
config_json
.
Find
(
"version_policy"
,
&
vp
))
{
triton
::
common
::
TritonJson
::
Value
specific
;
if
(
vp
.
Find
(
"specific"
,
&
specific
))
{
RETURN_IF_ERROR
(
FixIntArray
(
config_json
,
specific
,
"versions"
));
}
}
}
// Fix dynamic_batching::max_queue_delay_microseconds,
// dynamic_batching::default_queue_policy::default_timeout_microseconds,
// dynamic_batching::priority_queue_policy::value::default_timeout_microseconds
{
triton
::
common
::
TritonJson
::
Value
db
;
if
(
config_json
.
Find
(
"dynamic_batching"
,
&
db
))
{
RETURN_IF_ERROR
(
FixInt
(
config_json
,
db
,
"max_queue_delay_microseconds"
));
triton
::
common
::
TritonJson
::
Value
dqp
;
if
(
db
.
Find
(
"default_queue_policy"
,
&
dqp
))
{
RETURN_IF_ERROR
(
FixInt
(
config_json
,
dqp
,
"default_timeout_microseconds"
));
}
triton
::
common
::
TritonJson
::
Value
pqp
;
if
(
db
.
Find
(
"priority_queue_policy"
,
&
pqp
))
{
// Iterate over each member in 'pqp' and fix...
std
::
vector
<
std
::
string
>
members
;
RETURN_IF_ERROR
(
pqp
.
Members
(
&
members
));
for
(
const
auto
&
m
:
members
)
{
triton
::
common
::
TritonJson
::
Value
el
;
RETURN_IF_ERROR
(
pqp
.
MemberAsObject
(
m
.
c_str
(),
&
el
));
RETURN_IF_ERROR
(
FixInt
(
config_json
,
el
,
"default_timeout_microseconds"
));
}
}
}
}
// Fix sequence_batching::oldest::max_queue_delay_microseconds,
// sequence_batching::direct::max_queue_delay_microseconds,
// sequence_batching::max_sequence_idle_microseconds
{
triton
::
common
::
TritonJson
::
Value
sb
;
if
(
config_json
.
Find
(
"sequence_batching"
,
&
sb
))
{
RETURN_IF_ERROR
(
FixInt
(
config_json
,
sb
,
"max_sequence_idle_microseconds"
));
triton
::
common
::
TritonJson
::
Value
oldest
;
if
(
sb
.
Find
(
"oldest"
,
&
oldest
))
{
RETURN_IF_ERROR
(
FixInt
(
config_json
,
oldest
,
"max_queue_delay_microseconds"
));
}
triton
::
common
::
TritonJson
::
Value
direct
;
if
(
sb
.
Find
(
"direct"
,
&
direct
))
{
RETURN_IF_ERROR
(
FixInt
(
config_json
,
direct
,
"max_queue_delay_microseconds"
));
}
triton
::
common
::
TritonJson
::
Value
states
;
if
(
sb
.
Find
(
"state"
,
&
states
))
{
for
(
size_t
i
=
0
;
i
<
states
.
ArraySize
();
++
i
)
{
triton
::
common
::
TritonJson
::
Value
state
;
RETURN_IF_ERROR
(
states
.
IndexAsObject
(
i
,
&
state
));
RETURN_IF_ERROR
(
FixIntArray
(
config_json
,
state
,
"dims"
));
triton
::
common
::
TritonJson
::
Value
initial_state
;
if
(
sb
.
Find
(
"initial_state"
,
&
initial_state
))
{
RETURN_IF_ERROR
(
FixIntArray
(
config_json
,
initial_state
,
"dims"
));
}
}
}
}
}
// Fix ensemble_scheduling::step::model_version.
{
triton
::
common
::
TritonJson
::
Value
ens
;
if
(
config_json
.
Find
(
"ensemble_scheduling"
,
&
ens
))
{
triton
::
common
::
TritonJson
::
Value
step
;
if
(
ens
.
Find
(
"step"
,
&
step
))
{
RETURN_IF_ERROR
(
FixObjectArray
(
config_json
,
step
,
"model_version"
));
}
}
}
// Fix model_warmup::inputs::value::dims.
{
triton
::
common
::
TritonJson
::
Value
warmups
;
if
(
config_json
.
Find
(
"model_warmup"
,
&
warmups
))
{
for
(
size_t
i
=
0
;
i
<
warmups
.
ArraySize
();
++
i
)
{
triton
::
common
::
TritonJson
::
Value
warmup
;
RETURN_IF_ERROR
(
warmups
.
IndexAsObject
(
i
,
&
warmup
));
triton
::
common
::
TritonJson
::
Value
inputs
;
if
(
warmup
.
Find
(
"inputs"
,
&
inputs
))
{
std
::
vector
<
std
::
string
>
members
;
RETURN_IF_ERROR
(
inputs
.
Members
(
&
members
));
for
(
const
auto
&
m
:
members
)
{
triton
::
common
::
TritonJson
::
Value
input
;
RETURN_IF_ERROR
(
inputs
.
MemberAsObject
(
m
.
c_str
(),
&
input
));
RETURN_IF_ERROR
(
FixIntArray
(
config_json
,
input
,
"dims"
));
}
}
}
}
}
// Convert fixed json back the string...
triton
::
common
::
TritonJson
::
WriteBuffer
buffer
;
RETURN_IF_ERROR
(
config_json
.
Write
(
&
buffer
));
*
json_str
=
std
::
move
(
buffer
.
MutableContents
());
return
Status
::
Success
;
}
Status
JsonToModelConfig
(
const
std
::
string
&
json_config
,
const
uint32_t
config_version
,
inference
::
ModelConfig
*
protobuf_config
)
{
// Currently only support 'config_version' 1, which is the json
// representation of the ModelConfig protobuf matches the representation in
// ModelConfigToJson().
if
(
config_version
!=
1
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
std
::
string
(
"model configuration version "
)
+
std
::
to_string
(
config_version
)
+
" not supported, supported versions are: 1"
);
}
::
google
::
protobuf
::
util
::
JsonParseOptions
options
;
options
.
case_insensitive_enum_parsing
=
true
;
options
.
ignore_unknown_fields
=
false
;
auto
err
=
::
google
::
protobuf
::
util
::
JsonStringToMessage
(
json_config
,
protobuf_config
,
options
);
if
(
!
err
.
ok
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
std
::
string
(
err
.
message
()));
}
return
Status
::
Success
;
}
BackendType
GetBackendTypeFromPlatform
(
const
std
::
string
&
platform_name
)
{
if
((
platform_name
==
kTensorFlowGraphDefPlatform
)
||
(
platform_name
==
kTensorFlowSavedModelPlatform
))
{
return
BackendType
::
BACKEND_TYPE_TENSORFLOW
;
}
if
(
platform_name
==
kTensorRTPlanPlatform
)
{
return
BackendType
::
BACKEND_TYPE_TENSORRT
;
}
if
(
platform_name
==
kOnnxRuntimeOnnxPlatform
)
{
return
BackendType
::
BACKEND_TYPE_ONNXRUNTIME
;
}
if
(
platform_name
==
kPyTorchLibTorchPlatform
)
{
return
BackendType
::
BACKEND_TYPE_PYTORCH
;
}
return
BackendType
::
BACKEND_TYPE_UNKNOWN
;
}
/// Get the BackendType value for a backend name.
/// \param backend_name The backend name.
/// \return The BackendType or BackendType::UNKNOWN if the platform string
/// is not recognized.
BackendType
GetBackendType
(
const
std
::
string
&
backend_name
)
{
if
(
backend_name
==
kTensorFlowBackend
)
{
return
BackendType
::
BACKEND_TYPE_TENSORFLOW
;
}
if
(
backend_name
==
kTensorRTBackend
)
{
return
BackendType
::
BACKEND_TYPE_TENSORRT
;
}
if
(
backend_name
==
kOnnxRuntimeBackend
)
{
return
BackendType
::
BACKEND_TYPE_ONNXRUNTIME
;
}
if
(
backend_name
==
kPyTorchBackend
)
{
return
BackendType
::
BACKEND_TYPE_PYTORCH
;
}
return
BackendType
::
BACKEND_TYPE_UNKNOWN
;
}
TRITONSERVER_DataType
DataTypeToTriton
(
const
inference
::
DataType
dtype
)
{
switch
(
dtype
)
{
case
inference
::
DataType
::
TYPE_BOOL
:
return
TRITONSERVER_TYPE_BOOL
;
case
inference
::
DataType
::
TYPE_UINT8
:
return
TRITONSERVER_TYPE_UINT8
;
case
inference
::
DataType
::
TYPE_UINT16
:
return
TRITONSERVER_TYPE_UINT16
;
case
inference
::
DataType
::
TYPE_UINT32
:
return
TRITONSERVER_TYPE_UINT32
;
case
inference
::
DataType
::
TYPE_UINT64
:
return
TRITONSERVER_TYPE_UINT64
;
case
inference
::
DataType
::
TYPE_INT8
:
return
TRITONSERVER_TYPE_INT8
;
case
inference
::
DataType
::
TYPE_INT16
:
return
TRITONSERVER_TYPE_INT16
;
case
inference
::
DataType
::
TYPE_INT32
:
return
TRITONSERVER_TYPE_INT32
;
case
inference
::
DataType
::
TYPE_INT64
:
return
TRITONSERVER_TYPE_INT64
;
case
inference
::
DataType
::
TYPE_FP16
:
return
TRITONSERVER_TYPE_FP16
;
case
inference
::
DataType
::
TYPE_FP32
:
return
TRITONSERVER_TYPE_FP32
;
case
inference
::
DataType
::
TYPE_FP64
:
return
TRITONSERVER_TYPE_FP64
;
case
inference
::
DataType
::
TYPE_STRING
:
return
TRITONSERVER_TYPE_BYTES
;
case
inference
::
DataType
::
TYPE_BF16
:
return
TRITONSERVER_TYPE_BF16
;
default:
break
;
}
return
TRITONSERVER_TYPE_INVALID
;
}
inference
::
DataType
TritonToDataType
(
const
TRITONSERVER_DataType
dtype
)
{
switch
(
dtype
)
{
case
TRITONSERVER_TYPE_BOOL
:
return
inference
::
DataType
::
TYPE_BOOL
;
case
TRITONSERVER_TYPE_UINT8
:
return
inference
::
DataType
::
TYPE_UINT8
;
case
TRITONSERVER_TYPE_UINT16
:
return
inference
::
DataType
::
TYPE_UINT16
;
case
TRITONSERVER_TYPE_UINT32
:
return
inference
::
DataType
::
TYPE_UINT32
;
case
TRITONSERVER_TYPE_UINT64
:
return
inference
::
DataType
::
TYPE_UINT64
;
case
TRITONSERVER_TYPE_INT8
:
return
inference
::
DataType
::
TYPE_INT8
;
case
TRITONSERVER_TYPE_INT16
:
return
inference
::
DataType
::
TYPE_INT16
;
case
TRITONSERVER_TYPE_INT32
:
return
inference
::
DataType
::
TYPE_INT32
;
case
TRITONSERVER_TYPE_INT64
:
return
inference
::
DataType
::
TYPE_INT64
;
case
TRITONSERVER_TYPE_FP16
:
return
inference
::
DataType
::
TYPE_FP16
;
case
TRITONSERVER_TYPE_FP32
:
return
inference
::
DataType
::
TYPE_FP32
;
case
TRITONSERVER_TYPE_FP64
:
return
inference
::
DataType
::
TYPE_FP64
;
case
TRITONSERVER_TYPE_BYTES
:
return
inference
::
DataType
::
TYPE_STRING
;
case
TRITONSERVER_TYPE_BF16
:
return
inference
::
DataType
::
TYPE_BF16
;
default:
break
;
}
return
inference
::
DataType
::
TYPE_INVALID
;
}
}}
// namespace triton::core
3rdparty/core-r22.12/src/model_config_utils.h
0 → 100644
View file @
0a21fff9
// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include "model_config.pb.h"
#include "status.h"
#include "triton/common/model_config.h"
#include "tritonserver_apis.h"
#include "filesystem.h"
namespace
triton
{
namespace
core
{
/// Enumeration for the different backend types.
enum
BackendType
{
BACKEND_TYPE_UNKNOWN
=
0
,
BACKEND_TYPE_TENSORRT
=
1
,
BACKEND_TYPE_TENSORFLOW
=
2
,
BACKEND_TYPE_ONNXRUNTIME
=
3
,
BACKEND_TYPE_PYTORCH
=
4
};
// Get version of a model from the path containing the model
/// definition file.
/// \param path The path to the model definition file.
/// \param version Returns the version.
/// \return The error status.
Status
GetModelVersionFromPath
(
const
std
::
string
&
path
,
int64_t
*
version
);
/// Get the tensor name, false value, and true value for a boolean
/// sequence batcher control kind. If 'required' is true then must
/// find a tensor for the control. If 'required' is false, return
/// 'tensor_name' as empty-string if the control is not mapped to any
/// tensor.
Status
GetBooleanSequenceControlProperties
(
const
inference
::
ModelSequenceBatching
&
batcher
,
const
std
::
string
&
model_name
,
const
inference
::
ModelSequenceBatching
::
Control
::
Kind
control_kind
,
const
bool
required
,
std
::
string
*
tensor_name
,
inference
::
DataType
*
tensor_datatype
,
float
*
fp32_false_value
,
float
*
fp32_true_value
,
int32_t
*
int32_false_value
,
int32_t
*
int32_true_value
,
bool
*
bool_false_value
,
bool
*
bool_true_value
);
/// Get the tensor name and datatype for a non-boolean sequence
/// batcher control kind. If 'required' is true then must find a
/// tensor for the control. If 'required' is false, return
/// 'tensor_name' as empty-string if the control is not mapped to any
/// tensor. 'tensor_datatype' returns the required datatype for the
/// control.
Status
GetTypedSequenceControlProperties
(
const
inference
::
ModelSequenceBatching
&
batcher
,
const
std
::
string
&
model_name
,
const
inference
::
ModelSequenceBatching
::
Control
::
Kind
control_kind
,
const
bool
required
,
std
::
string
*
tensor_name
,
inference
::
DataType
*
tensor_datatype
);
/// Read a ModelConfig and normalize it as expected by model backends.
/// \param path The full-path to the directory containing the
/// model configuration.
/// \param min_compute_capability The minimum support CUDA compute
/// capability.
/// \param config Returns the normalized model configuration.
/// \return The error status.
Status
GetNormalizedModelConfig
(
const
std
::
string
&
model_name
,
const
std
::
string
&
path
,
const
double
min_compute_capability
,
inference
::
ModelConfig
*
config
);
/// Auto-complete backend related fields (platform, backend and default model
/// filename) if not set, note that only Triton recognized backends will be
/// checked.
/// \param model_name The name of the model.
/// \param model_path The full-path to the directory containing the
/// model configuration.
/// \param config Returns the auto-completed model configuration.
/// \return The error status.
Status
AutoCompleteBackendFields
(
const
std
::
string
&
model_name
,
const
std
::
string
&
model_path
,
inference
::
ModelConfig
*
config
);
/// Detects and adds missing fields in the model configuration.
/// \param min_compute_capability The minimum supported CUDA compute
/// capability.
/// \param config The model configuration
/// \return The error status
Status
NormalizeModelConfig
(
const
double
min_compute_capability
,
inference
::
ModelConfig
*
config
);
/// [FIXME] better formalize config normalization / validation
/// Detects and adds missing fields in instance group setting.
/// \param min_compute_capability The minimum supported CUDA compute
/// capability.
/// \param config The model configuration
/// \return The error status
Status
NormalizeInstanceGroup
(
const
double
min_compute_capability
,
const
std
::
vector
<
inference
::
ModelInstanceGroup
>&
preferred_groups
,
inference
::
ModelConfig
*
config
);
/// [FIXME] Remove once a more permanent solution is implemented (DLIS-4211)
/// Localize EXECUTION_ENV_PATH in python backend.
/// \param model_path The full-path to the directory containing the model
/// configuration, before localization.
/// \param config The model configuration
/// \param localized_model_dir The localized model directory
/// \return The error status
Status
LocalizePythonBackendExecutionEnvironmentPath
(
const
std
::
string
&
model_path
,
inference
::
ModelConfig
*
config
,
std
::
shared_ptr
<
LocalizedPath
>*
localized_model_dir
);
/// Auto-complete the instance count based on instance kind and backend name.
/// \param group The instance group to set the count for.
/// \param backend The backend name to check against.
/// \return The error status.
Status
SetDefaultInstanceCount
(
inference
::
ModelInstanceGroup
*
group
,
const
std
::
string
&
backend
);
/// Validate that a model is specified correctly, except for model inputs
/// and outputs. ValidateModelIOConfig() should be called to
/// validate model inputs and outputs.
/// \param config The model configuration to validate.
/// \param min_compute_capability The minimum support CUDA compute
/// capability.
/// \return The error status. A non-OK status indicates the configuration
/// is not valid.
Status
ValidateModelConfig
(
const
inference
::
ModelConfig
&
config
,
const
double
min_compute_capability
);
/// [FIXME] better formalize config normalization / validation
/// Validate instance group setting.
/// \param config The model configuration to validate.
/// \param min_compute_capability The minimum support CUDA compute
/// capability.
/// \return The error status. A non-OK status indicates the configuration
/// is not valid.
Status
ValidateInstanceGroup
(
const
inference
::
ModelConfig
&
config
,
const
double
min_compute_capability
);
/// Validate that a model inputs and outputs are specified correctly.
/// \param config The model configuration to validate.
/// \return The error status. A non-OK status indicates the configuration
/// is not valid.
Status
ValidateModelIOConfig
(
const
inference
::
ModelConfig
&
config
);
/// Validate that input is specified correctly in a model
/// configuration.
/// \param io The model input.
/// \param max_batch_size The max batch size specified in model configuration.
/// \param platform The platform name
/// \return The error status. A non-OK status indicates the input
/// is not valid.
Status
ValidateModelInput
(
const
inference
::
ModelInput
&
io
,
int32_t
max_batch_size
,
const
std
::
string
&
platform
);
/// Validate that an input matches one of the allowed input names.
/// \param io The model input.
/// \param allowed The set of allowed input names.
/// \return The error status. A non-OK status indicates the input
/// is not valid.
Status
CheckAllowedModelInput
(
const
inference
::
ModelInput
&
io
,
const
std
::
set
<
std
::
string
>&
allowed
);
/// Validate that an output is specified correctly in a model
/// configuration.
/// \param io The model output.
/// \param max_batch_size The max batch size specified in model configuration.
/// \param platform The platform name
/// \return The error status. A non-OK status indicates the output
/// is not valid.
Status
ValidateModelOutput
(
const
inference
::
ModelOutput
&
io
,
int32_t
max_batch_size
,
const
std
::
string
&
platform
);
/// Validate that an output matches one of the allowed output names.
/// \param io The model output.
/// \param allowed The set of allowed output names.
/// \return The error status. A non-OK status indicates the output
/// is not valid.
Status
CheckAllowedModelOutput
(
const
inference
::
ModelOutput
&
io
,
const
std
::
set
<
std
::
string
>&
allowed
);
/// Validate that a model batch inputs and batch outputs are specified
/// correctly.
/// \param config The model configuration to validate..
/// \return The error status. A non-OK status indicates the batch inputs or
/// batch outputs are not valid.
Status
ValidateBatchIO
(
const
inference
::
ModelConfig
&
config
);
/// Parse the 'value' of the parameter 'key' into a boolean value.
/// \param key The name of the parameter.
/// \param value The value of the parameter in string.
/// \param parsed_value Return the boolean of the parameter.
/// \return The error status. A non-OK status indicates failure on parsing the
/// value.
Status
ParseBoolParameter
(
const
std
::
string
&
key
,
std
::
string
value
,
bool
*
parsed_value
);
/// Parse the 'value' of the parameter 'key' into a long long integer value.
/// \param key The name of the parameter.
/// \param value The value of the parameter in string.
/// \param parsed_value Return the numerical value of the parameter.
/// \return The error status. A non-OK status indicates failure on parsing the
/// value.
Status
ParseLongLongParameter
(
const
std
::
string
&
key
,
const
std
::
string
&
value
,
int64_t
*
parsed_value
);
/// Obtain the 'profile_index' of the 'profile_name'.
/// \param profile_name The name of the profile.
/// \param profile_index Return the index of the profile.
/// \return The error status. A non-OK status indicates failure on getting the
/// value.
Status
GetProfileIndex
(
const
std
::
string
&
profile_name
,
int
*
profile_index
);
/// Convert a model configuration protobuf to the equivalent json.
/// \param config The protobuf model configuration.
/// \param config_version The model configuration will be returned in
/// a format matching this version. If the configuration cannot be
/// represented in the requested version's format then an error will
/// be returned.
/// \param json Returns the equivalent JSON.
/// \return The error status.
Status
ModelConfigToJson
(
const
inference
::
ModelConfig
&
config
,
const
uint32_t
config_version
,
std
::
string
*
json_str
);
/// Convert a model configuration JSON to the equivalent protobuf.
/// \param config The JSON model configuration.
/// \param config_version The model configuration will be returned in
/// a format matching this version. If the configuration cannot be
/// represented in the requested version's format then an error will
/// be returned.
/// \param protobuf Returns the equivalent protobuf.
/// \return The error status.
Status
JsonToModelConfig
(
const
std
::
string
&
json_config
,
const
uint32_t
config_version
,
inference
::
ModelConfig
*
protobuf_config
);
/// Get the BackendType value for a platform name.
/// \param platform_name The platform name.
/// \return The BackendType or BackendType::UNKNOWN if the platform string
/// is not recognized.
BackendType
GetBackendTypeFromPlatform
(
const
std
::
string
&
platform_name
);
/// Get the BackendType value for a backend name.
/// \param backend_name The backend name.
/// \return The BackendType or BackendType::UNKNOWN if the platform string
/// is not recognized.
BackendType
GetBackendType
(
const
std
::
string
&
backend_name
);
/// Get the Triton server data type corresponding to a data type.
/// \param dtype The data type.
/// \return The Triton server data type.
TRITONSERVER_DataType
DataTypeToTriton
(
const
inference
::
DataType
dtype
);
/// Get the data type corresponding to a Triton server data type.
/// \param dtype The Triton server data type.
/// \return The data type.
inference
::
DataType
TritonToDataType
(
const
TRITONSERVER_DataType
dtype
);
}}
// namespace triton::core
Prev
1
…
3
4
5
6
7
8
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment