Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
guobj
Qwen_lmdeploy
Commits
fcefbf3d
Commit
fcefbf3d
authored
Nov 30, 2023
by
xiabo
Browse files
重新整理工程
parent
d592fbea
Changes
170
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
0 additions
and
6049 deletions
+0
-6049
3rdparty/core-r22.12/src/infer_trace.h
3rdparty/core-r22.12/src/infer_trace.h
+0
-205
3rdparty/core-r22.12/src/instance_queue.cc
3rdparty/core-r22.12/src/instance_queue.cc
+0
-99
3rdparty/core-r22.12/src/instance_queue.h
3rdparty/core-r22.12/src/instance_queue.h
+0
-57
3rdparty/core-r22.12/src/label_provider.cc
3rdparty/core-r22.12/src/label_provider.cc
+0
-95
3rdparty/core-r22.12/src/label_provider.h
3rdparty/core-r22.12/src/label_provider.h
+0
-65
3rdparty/core-r22.12/src/libtritonserver.ldscript
3rdparty/core-r22.12/src/libtritonserver.ldscript
+0
-32
3rdparty/core-r22.12/src/memory.cc
3rdparty/core-r22.12/src/memory.cc
+0
-238
3rdparty/core-r22.12/src/memory.h
3rdparty/core-r22.12/src/memory.h
+0
-174
3rdparty/core-r22.12/src/metric_family.cc
3rdparty/core-r22.12/src/metric_family.cc
+0
-321
3rdparty/core-r22.12/src/metric_family.h
3rdparty/core-r22.12/src/metric_family.h
+0
-111
3rdparty/core-r22.12/src/metric_model_reporter.cc
3rdparty/core-r22.12/src/metric_model_reporter.cc
+0
-168
3rdparty/core-r22.12/src/metric_model_reporter.h
3rdparty/core-r22.12/src/metric_model_reporter.h
+0
-138
3rdparty/core-r22.12/src/metrics.cc
3rdparty/core-r22.12/src/metrics.cc
+0
-1035
3rdparty/core-r22.12/src/metrics.h
3rdparty/core-r22.12/src/metrics.h
+0
-335
3rdparty/core-r22.12/src/model.cc
3rdparty/core-r22.12/src/model.cc
+0
-137
3rdparty/core-r22.12/src/model.h
3rdparty/core-r22.12/src/model.h
+0
-162
3rdparty/core-r22.12/src/model_config_cuda.cc
3rdparty/core-r22.12/src/model_config_cuda.cc
+0
-61
3rdparty/core-r22.12/src/model_config_cuda.h
3rdparty/core-r22.12/src/model_config_cuda.h
+0
-40
3rdparty/core-r22.12/src/model_config_utils.cc
3rdparty/core-r22.12/src/model_config_utils.cc
+0
-2294
3rdparty/core-r22.12/src/model_config_utils.h
3rdparty/core-r22.12/src/model_config_utils.h
+0
-282
No files found.
Too many changes to show.
To preserve performance only
170 of 170+
files are displayed.
Plain diff
Email patch
3rdparty/core-r22.12/src/infer_trace.h
deleted
100644 → 0
View file @
d592fbea
// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <atomic>
#include <chrono>
#include <memory>
#include "constants.h"
#include "status.h"
#include "tritonserver_apis.h"
namespace
triton
{
namespace
core
{
#ifdef TRITON_ENABLE_TRACING
//
// InferenceTrace
//
// Interface to TRITONSERVER_InferenceTrace to report trace events.
//
class
InferenceTrace
{
public:
InferenceTrace
(
const
TRITONSERVER_InferenceTraceLevel
level
,
const
uint64_t
parent_id
,
TRITONSERVER_InferenceTraceActivityFn_t
activity_fn
,
TRITONSERVER_InferenceTraceTensorActivityFn_t
tensor_activity_fn
,
TRITONSERVER_InferenceTraceReleaseFn_t
release_fn
,
void
*
userp
)
:
level_
(
level
),
id_
(
next_id_
++
),
parent_id_
(
parent_id
),
activity_fn_
(
activity_fn
),
tensor_activity_fn_
(
tensor_activity_fn
),
release_fn_
(
release_fn
),
userp_
(
userp
)
{
}
InferenceTrace
*
SpawnChildTrace
();
int64_t
Id
()
const
{
return
id_
;
}
int64_t
ParentId
()
const
{
return
parent_id_
;
}
const
std
::
string
&
ModelName
()
const
{
return
model_name_
;
}
int64_t
ModelVersion
()
const
{
return
model_version_
;
}
void
SetModelName
(
const
std
::
string
&
n
)
{
model_name_
=
n
;
}
void
SetModelVersion
(
int64_t
v
)
{
model_version_
=
v
;
}
// Report trace activity.
void
Report
(
const
TRITONSERVER_InferenceTraceActivity
activity
,
uint64_t
timestamp_ns
)
{
if
((
level_
&
TRITONSERVER_TRACE_LEVEL_TIMESTAMPS
)
>
0
)
{
activity_fn_
(
reinterpret_cast
<
TRITONSERVER_InferenceTrace
*>
(
this
),
activity
,
timestamp_ns
,
userp_
);
}
}
// Report trace activity at the current time.
void
ReportNow
(
const
TRITONSERVER_InferenceTraceActivity
activity
)
{
if
((
level_
&
TRITONSERVER_TRACE_LEVEL_TIMESTAMPS
)
>
0
)
{
Report
(
activity
,
std
::
chrono
::
duration_cast
<
std
::
chrono
::
nanoseconds
>
(
std
::
chrono
::
steady_clock
::
now
().
time_since_epoch
())
.
count
());
}
}
// Report tensor trace activity.
void
ReportTensor
(
const
TRITONSERVER_InferenceTraceActivity
activity
,
const
char
*
name
,
TRITONSERVER_DataType
datatype
,
const
void
*
base
,
size_t
byte_size
,
const
int64_t
*
shape
,
uint64_t
dim_count
,
TRITONSERVER_MemoryType
memory_type
,
int64_t
memory_type_id
)
{
if
((
level_
&
TRITONSERVER_TRACE_LEVEL_TENSORS
)
>
0
)
{
tensor_activity_fn_
(
reinterpret_cast
<
TRITONSERVER_InferenceTrace
*>
(
this
),
activity
,
name
,
datatype
,
base
,
byte_size
,
shape
,
dim_count
,
memory_type
,
memory_type_id
,
userp_
);
}
}
// Release the trace. Call the trace release callback.
void
Release
();
private:
const
TRITONSERVER_InferenceTraceLevel
level_
;
const
uint64_t
id_
;
const
uint64_t
parent_id_
;
TRITONSERVER_InferenceTraceActivityFn_t
activity_fn_
;
TRITONSERVER_InferenceTraceTensorActivityFn_t
tensor_activity_fn_
;
TRITONSERVER_InferenceTraceReleaseFn_t
release_fn_
;
void
*
userp_
;
std
::
string
model_name_
;
int64_t
model_version_
;
// Maintain next id statically so that trace id is unique even
// across traces
static
std
::
atomic
<
uint64_t
>
next_id_
;
};
//
// InferenceTraceProxy
//
// Object attached as shared_ptr to InferenceRequest and
// InferenceResponse(s) being traced as part of a single inference
// request.
//
class
InferenceTraceProxy
{
public:
InferenceTraceProxy
(
InferenceTrace
*
trace
)
:
trace_
(
trace
)
{}
~
InferenceTraceProxy
()
{
trace_
->
Release
();
}
int64_t
Id
()
const
{
return
trace_
->
Id
();
}
int64_t
ParentId
()
const
{
return
trace_
->
ParentId
();
}
const
std
::
string
&
ModelName
()
const
{
return
trace_
->
ModelName
();
}
int64_t
ModelVersion
()
const
{
return
trace_
->
ModelVersion
();
}
void
SetModelName
(
const
std
::
string
&
n
)
{
trace_
->
SetModelName
(
n
);
}
void
SetModelVersion
(
int64_t
v
)
{
trace_
->
SetModelVersion
(
v
);
}
void
Report
(
const
TRITONSERVER_InferenceTraceActivity
activity
,
uint64_t
timestamp_ns
)
{
trace_
->
Report
(
activity
,
timestamp_ns
);
}
void
ReportNow
(
const
TRITONSERVER_InferenceTraceActivity
activity
)
{
trace_
->
ReportNow
(
activity
);
}
void
ReportTensor
(
const
TRITONSERVER_InferenceTraceActivity
activity
,
const
char
*
name
,
TRITONSERVER_DataType
datatype
,
const
void
*
base
,
size_t
byte_size
,
const
int64_t
*
shape
,
uint64_t
dim_count
,
TRITONSERVER_MemoryType
memory_type
,
int64_t
memory_type_id
)
{
trace_
->
ReportTensor
(
activity
,
name
,
datatype
,
base
,
byte_size
,
shape
,
dim_count
,
memory_type
,
memory_type_id
);
}
std
::
shared_ptr
<
InferenceTraceProxy
>
SpawnChildTrace
();
private:
InferenceTrace
*
trace_
;
};
#endif // TRITON_ENABLE_TRACING
//
// Macros to generate trace activity
//
#ifdef TRITON_ENABLE_TRACING
#define INFER_TRACE_ACTIVITY(T, A, TS_NS) \
{ \
const auto& trace = (T); \
const auto ts_ns = (TS_NS); \
if (trace != nullptr) { \
trace->Report(A, ts_ns); \
} \
}
#define INFER_TRACE_ACTIVITY_NOW(T, A) \
{ \
const auto& trace = (T); \
if (trace != nullptr) { \
trace->ReportNow(A); \
} \
}
#define INFER_TRACE_TENSOR_ACTIVITY(T, A, N, D, BA, BY, S, DI, MT, MTI) \
{ \
const auto& trace = (T); \
if (trace != nullptr) { \
trace->ReportTensor(A, N, D, BA, BY, S, DI, MT, MTI); \
} \
}
#else
#define INFER_TRACE_ACTIVITY(T, A, TS_NS)
#define INFER_TRACE_ACTIVITY_NOW(T, A)
#define INFER_TRACE_TENSOR_ACTIVITY(T, A, N, D, BA, BY, S, DI, MT, MTI)
#endif // TRITON_ENABLE_TRACING
}}
// namespace triton::core
3rdparty/core-r22.12/src/instance_queue.cc
deleted
100644 → 0
View file @
d592fbea
// Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "instance_queue.h"
#include "triton/common/logging.h"
namespace
triton
{
namespace
core
{
InstanceQueue
::
InstanceQueue
(
size_t
max_batch_size
,
uint64_t
max_queue_delay_ns
)
:
max_batch_size_
(
max_batch_size
),
max_queue_delay_ns_
(
max_queue_delay_ns
)
{
}
size_t
InstanceQueue
::
Size
()
{
return
payload_queue_
.
size
();
}
bool
InstanceQueue
::
Empty
()
{
return
payload_queue_
.
empty
();
}
void
InstanceQueue
::
Enqueue
(
const
std
::
shared_ptr
<
Payload
>&
payload
)
{
payload_queue_
.
push_back
(
payload
);
}
void
InstanceQueue
::
Dequeue
(
std
::
shared_ptr
<
Payload
>*
payload
,
std
::
vector
<
std
::
shared_ptr
<
Payload
>>*
merged_payloads
)
{
*
payload
=
payload_queue_
.
front
();
payload_queue_
.
pop_front
();
{
std
::
lock_guard
<
std
::
mutex
>
exec_lock
(
*
((
*
payload
)
->
GetExecMutex
()));
(
*
payload
)
->
SetState
(
Payload
::
State
::
EXECUTING
);
if
((
!
payload_queue_
.
empty
())
&&
(
max_queue_delay_ns_
>
0
)
&&
(
max_batch_size_
>
1
)
&&
(
!
(
*
payload
)
->
IsSaturated
()))
{
bool
continue_merge
;
do
{
continue_merge
=
false
;
uint64_t
now_ns
=
std
::
chrono
::
duration_cast
<
std
::
chrono
::
nanoseconds
>
(
std
::
chrono
::
steady_clock
::
now
().
time_since_epoch
())
.
count
();
size_t
batch_size
=
(
*
payload
)
->
BatchSize
();
if
((
!
payload_queue_
.
empty
())
&&
(
!
payload_queue_
.
front
()
->
IsSaturated
())
&&
(
now_ns
-
payload_queue_
.
front
()
->
BatcherStartNs
())
>
max_queue_delay_ns_
)
{
std
::
lock_guard
<
std
::
mutex
>
exec_lock
(
*
(
payload_queue_
.
front
()
->
GetExecMutex
()));
payload_queue_
.
front
()
->
SetState
(
Payload
::
State
::
EXECUTING
);
size_t
front_batch_size
=
payload_queue_
.
front
()
->
BatchSize
();
if
((
batch_size
+
front_batch_size
)
<=
max_batch_size_
)
{
const
auto
&
status
=
(
*
payload
)
->
MergePayload
(
payload_queue_
.
front
());
if
(
status
.
IsOk
())
{
merged_payloads
->
push_back
(
payload_queue_
.
front
());
payload_queue_
.
pop_front
();
continue_merge
=
true
;
}
}
}
}
while
(
continue_merge
);
}
}
}
}}
// namespace triton::core
3rdparty/core-r22.12/src/instance_queue.h
deleted
100644 → 0
View file @
d592fbea
// Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include "payload.h"
namespace
triton
{
namespace
core
{
//
// InstanceQueue
//
// A queue implementation holding Payloads ready to be scheduled on
// model instance.
class
InstanceQueue
{
public:
explicit
InstanceQueue
(
size_t
max_batch_size
,
uint64_t
max_queue_delay_ns
);
size_t
Size
();
bool
Empty
();
void
Enqueue
(
const
std
::
shared_ptr
<
Payload
>&
payload
);
void
Dequeue
(
std
::
shared_ptr
<
Payload
>*
payload
,
std
::
vector
<
std
::
shared_ptr
<
Payload
>>*
merged_payloads
);
private:
size_t
max_batch_size_
;
uint64_t
max_queue_delay_ns_
;
std
::
deque
<
std
::
shared_ptr
<
Payload
>>
payload_queue_
;
std
::
shared_ptr
<
Payload
>
staged_payload_
;
std
::
mutex
mu_
;
};
}}
// namespace triton::core
3rdparty/core-r22.12/src/label_provider.cc
deleted
100644 → 0
View file @
d592fbea
// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "label_provider.h"
#include <iostream>
#include <iterator>
#include <sstream>
#include "filesystem.h"
namespace
triton
{
namespace
core
{
const
std
::
string
&
LabelProvider
::
GetLabel
(
const
std
::
string
&
name
,
size_t
index
)
const
{
static
const
std
::
string
not_found
;
auto
itr
=
label_map_
.
find
(
name
);
if
(
itr
==
label_map_
.
end
())
{
return
not_found
;
}
if
(
itr
->
second
.
size
()
<=
index
)
{
return
not_found
;
}
return
itr
->
second
[
index
];
}
Status
LabelProvider
::
AddLabels
(
const
std
::
string
&
name
,
const
std
::
string
&
filepath
)
{
std
::
string
label_file_contents
;
RETURN_IF_ERROR
(
ReadTextFile
(
filepath
,
&
label_file_contents
));
auto
p
=
label_map_
.
insert
(
std
::
make_pair
(
name
,
std
::
vector
<
std
::
string
>
()));
if
(
!
p
.
second
)
{
return
Status
(
Status
::
Code
::
INTERNAL
,
"multiple label files for '"
+
name
+
"'"
);
}
auto
itr
=
p
.
first
;
std
::
istringstream
label_file_stream
(
label_file_contents
);
std
::
string
line
;
while
(
std
::
getline
(
label_file_stream
,
line
))
{
itr
->
second
.
push_back
(
line
);
}
return
Status
::
Success
;
}
const
std
::
vector
<
std
::
string
>&
LabelProvider
::
GetLabels
(
const
std
::
string
&
name
)
{
static
const
std
::
vector
<
std
::
string
>
not_found
;
auto
itr
=
label_map_
.
find
(
name
);
if
(
itr
==
label_map_
.
end
())
{
return
not_found
;
}
return
itr
->
second
;
}
Status
LabelProvider
::
AddLabels
(
const
std
::
string
&
name
,
const
std
::
vector
<
std
::
string
>&
labels
)
{
label_map_
.
emplace
(
name
,
labels
);
return
Status
::
Success
;
}
}}
// namespace triton::core
3rdparty/core-r22.12/src/label_provider.h
deleted
100644 → 0
View file @
d592fbea
// Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <string>
#include <unordered_map>
#include <vector>
#include "constants.h"
#include "status.h"
namespace
triton
{
namespace
core
{
// Provides classification labels.
class
LabelProvider
{
public:
LabelProvider
()
=
default
;
// Return the label associated with 'name' for a given
// 'index'. Return empty string if no label is available.
const
std
::
string
&
GetLabel
(
const
std
::
string
&
name
,
size_t
index
)
const
;
// Associate with 'name' a set of labels initialized from a given
// 'filepath'. Within the file each label is specified on its own
// line. The first label (line 0) is the index-0 label, the second
// label (line 1) is the index-1 label, etc.
Status
AddLabels
(
const
std
::
string
&
name
,
const
std
::
string
&
filepath
);
// Return the labels associated with 'name'. Return empty vector if no labels
// are available.
const
std
::
vector
<
std
::
string
>&
GetLabels
(
const
std
::
string
&
name
);
// Associate with 'name' a set of 'labels'
Status
AddLabels
(
const
std
::
string
&
name
,
const
std
::
vector
<
std
::
string
>&
labels
);
private:
DISALLOW_COPY_AND_ASSIGN
(
LabelProvider
);
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
std
::
string
>>
label_map_
;
};
}}
// namespace triton::core
3rdparty/core-r22.12/src/libtritonserver.ldscript
deleted
100644 → 0
View file @
d592fbea
# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
{
global:
TRITONSERVER_*;
TRITONBACKEND_*;
TRITONREPOAGENT_*;
local: *;
};
3rdparty/core-r22.12/src/memory.cc
deleted
100644 → 0
View file @
d592fbea
// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "memory.h"
#include "pinned_memory_manager.h"
#include "triton/common/logging.h"
#ifdef TRITON_ENABLE_GPU
#include <cuda_runtime_api.h>
#include "cuda_memory_manager.h"
#endif // TRITON_ENABLE_GPU
namespace
triton
{
namespace
core
{
//
// MemoryReference
//
MemoryReference
::
MemoryReference
()
:
Memory
()
{}
const
char
*
MemoryReference
::
BufferAt
(
size_t
idx
,
size_t
*
byte_size
,
TRITONSERVER_MemoryType
*
memory_type
,
int64_t
*
memory_type_id
)
const
{
if
(
idx
>=
buffer_
.
size
())
{
*
byte_size
=
0
;
*
memory_type
=
TRITONSERVER_MEMORY_CPU
;
*
memory_type_id
=
0
;
return
nullptr
;
}
*
memory_type
=
buffer_
[
idx
].
buffer_attributes_
.
MemoryType
();
*
memory_type_id
=
buffer_
[
idx
].
buffer_attributes_
.
MemoryTypeId
();
*
byte_size
=
buffer_
[
idx
].
buffer_attributes_
.
ByteSize
();
return
buffer_
[
idx
].
buffer_
;
}
const
char
*
MemoryReference
::
BufferAt
(
size_t
idx
,
BufferAttributes
**
buffer_attributes
)
{
if
(
idx
>=
buffer_
.
size
())
{
*
buffer_attributes
=
nullptr
;
return
nullptr
;
}
*
buffer_attributes
=
&
(
buffer_
[
idx
].
buffer_attributes_
);
return
buffer_
[
idx
].
buffer_
;
}
size_t
MemoryReference
::
AddBuffer
(
const
char
*
buffer
,
size_t
byte_size
,
TRITONSERVER_MemoryType
memory_type
,
int64_t
memory_type_id
)
{
total_byte_size_
+=
byte_size
;
buffer_count_
++
;
buffer_
.
emplace_back
(
buffer
,
byte_size
,
memory_type
,
memory_type_id
);
return
buffer_
.
size
()
-
1
;
}
size_t
MemoryReference
::
AddBuffer
(
const
char
*
buffer
,
BufferAttributes
*
buffer_attributes
)
{
total_byte_size_
+=
buffer_attributes
->
ByteSize
();
buffer_count_
++
;
buffer_
.
emplace_back
(
buffer
,
buffer_attributes
);
return
buffer_
.
size
()
-
1
;
}
size_t
MemoryReference
::
AddBufferFront
(
const
char
*
buffer
,
size_t
byte_size
,
TRITONSERVER_MemoryType
memory_type
,
int64_t
memory_type_id
)
{
total_byte_size_
+=
byte_size
;
buffer_count_
++
;
buffer_
.
emplace
(
buffer_
.
begin
(),
buffer
,
byte_size
,
memory_type
,
memory_type_id
);
return
buffer_
.
size
()
-
1
;
}
//
// MutableMemory
//
MutableMemory
::
MutableMemory
(
char
*
buffer
,
size_t
byte_size
,
TRITONSERVER_MemoryType
memory_type
,
int64_t
memory_type_id
)
:
Memory
(),
buffer_
(
buffer
),
buffer_attributes_
(
BufferAttributes
(
byte_size
,
memory_type
,
memory_type_id
,
nullptr
))
{
total_byte_size_
=
byte_size
;
buffer_count_
=
(
byte_size
==
0
)
?
0
:
1
;
}
const
char
*
MutableMemory
::
BufferAt
(
size_t
idx
,
size_t
*
byte_size
,
TRITONSERVER_MemoryType
*
memory_type
,
int64_t
*
memory_type_id
)
const
{
if
(
idx
!=
0
)
{
*
byte_size
=
0
;
*
memory_type
=
TRITONSERVER_MEMORY_CPU
;
*
memory_type_id
=
0
;
return
nullptr
;
}
*
byte_size
=
total_byte_size_
;
*
memory_type
=
buffer_attributes_
.
MemoryType
();
*
memory_type_id
=
buffer_attributes_
.
MemoryTypeId
();
return
buffer_
;
}
const
char
*
MutableMemory
::
BufferAt
(
size_t
idx
,
BufferAttributes
**
buffer_attributes
)
{
if
(
idx
!=
0
)
{
*
buffer_attributes
=
nullptr
;
return
nullptr
;
}
*
buffer_attributes
=
&
buffer_attributes_
;
return
buffer_
;
}
char
*
MutableMemory
::
MutableBuffer
(
TRITONSERVER_MemoryType
*
memory_type
,
int64_t
*
memory_type_id
)
{
if
(
memory_type
!=
nullptr
)
{
*
memory_type
=
buffer_attributes_
.
MemoryType
();
}
if
(
memory_type_id
!=
nullptr
)
{
*
memory_type_id
=
buffer_attributes_
.
MemoryTypeId
();
}
return
buffer_
;
}
//
// AllocatedMemory
//
AllocatedMemory
::
AllocatedMemory
(
size_t
byte_size
,
TRITONSERVER_MemoryType
memory_type
,
int64_t
memory_type_id
)
:
MutableMemory
(
nullptr
,
byte_size
,
memory_type
,
memory_type_id
)
{
if
(
total_byte_size_
!=
0
)
{
// Allocate memory with the following fallback policy:
// CUDA memory -> pinned system memory -> non-pinned system memory
switch
(
buffer_attributes_
.
MemoryType
())
{
#ifdef TRITON_ENABLE_GPU
case
TRITONSERVER_MEMORY_GPU
:
{
auto
status
=
CudaMemoryManager
::
Alloc
(
(
void
**
)
&
buffer_
,
total_byte_size_
,
buffer_attributes_
.
MemoryTypeId
());
if
(
!
status
.
IsOk
())
{
static
bool
warning_logged
=
false
;
if
(
!
warning_logged
)
{
LOG_WARNING
<<
status
.
Message
()
<<
", falling back to pinned system memory"
;
warning_logged
=
true
;
}
goto
pinned_memory_allocation
;
}
break
;
}
pinned_memory_allocation:
#endif // TRITON_ENABLE_GPU
default:
{
TRITONSERVER_MemoryType
memory_type
=
buffer_attributes_
.
MemoryType
();
auto
status
=
PinnedMemoryManager
::
Alloc
(
(
void
**
)
&
buffer_
,
total_byte_size_
,
&
memory_type
,
true
);
buffer_attributes_
.
SetMemoryType
(
memory_type
);
if
(
!
status
.
IsOk
())
{
LOG_ERROR
<<
status
.
Message
();
buffer_
=
nullptr
;
}
break
;
}
}
}
total_byte_size_
=
(
buffer_
==
nullptr
)
?
0
:
total_byte_size_
;
}
AllocatedMemory
::~
AllocatedMemory
()
{
if
(
buffer_
!=
nullptr
)
{
switch
(
buffer_attributes_
.
MemoryType
())
{
case
TRITONSERVER_MEMORY_GPU
:
{
#ifdef TRITON_ENABLE_GPU
auto
status
=
CudaMemoryManager
::
Free
(
buffer_
,
buffer_attributes_
.
MemoryTypeId
());
if
(
!
status
.
IsOk
())
{
LOG_ERROR
<<
status
.
Message
();
}
#endif // TRITON_ENABLE_GPU
break
;
}
default:
{
auto
status
=
PinnedMemoryManager
::
Free
(
buffer_
);
if
(
!
status
.
IsOk
())
{
LOG_ERROR
<<
status
.
Message
();
buffer_
=
nullptr
;
}
break
;
}
}
buffer_
=
nullptr
;
}
}
}}
// namespace triton::core
3rdparty/core-r22.12/src/memory.h
deleted
100644 → 0
View file @
d592fbea
// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <vector>
#include "buffer_attributes.h"
#include "constants.h"
#include "status.h"
namespace
triton
{
namespace
core
{
//
// Memory used to access data in inference requests
//
class
Memory
{
public:
// Get the 'idx'-th data block in the buffer. Using index to avoid
// maintaining internal state such that one buffer can be shared
// across multiple providers.
// 'idx' zero base index. Valid indices are continuous.
// 'byte_size' returns the byte size of the chunk of bytes.
// 'memory_type' returns the memory type of the chunk of bytes.
// 'memory_type_id' returns the memory type id of the chunk of bytes.
// Return the pointer to the data block. Returns nullptr if 'idx' is
// out of range
virtual
const
char
*
BufferAt
(
size_t
idx
,
size_t
*
byte_size
,
TRITONSERVER_MemoryType
*
memory_type
,
int64_t
*
memory_type_id
)
const
=
0
;
// Similar to the above BufferAt but with BufferAttributes.
virtual
const
char
*
BufferAt
(
size_t
idx
,
BufferAttributes
**
buffer_attributes
)
=
0
;
// Get the number of contiguous buffers composing the memory.
size_t
BufferCount
()
const
{
return
buffer_count_
;
}
// Return the total byte size of the data buffer
size_t
TotalByteSize
()
const
{
return
total_byte_size_
;
}
protected:
Memory
()
:
total_byte_size_
(
0
),
buffer_count_
(
0
)
{}
size_t
total_byte_size_
;
size_t
buffer_count_
;
};
//
// MemoryReference
//
class
MemoryReference
:
public
Memory
{
public:
// Create a read-only data buffer as a reference to other data buffer
MemoryReference
();
//\see Memory::BufferAt()
const
char
*
BufferAt
(
size_t
idx
,
size_t
*
byte_size
,
TRITONSERVER_MemoryType
*
memory_type
,
int64_t
*
memory_type_id
)
const
override
;
const
char
*
BufferAt
(
size_t
idx
,
BufferAttributes
**
buffer_attributes
)
override
;
// Add a 'buffer' with 'byte_size' as part of this data buffer
// Return the index of the buffer
size_t
AddBuffer
(
const
char
*
buffer
,
size_t
byte_size
,
TRITONSERVER_MemoryType
memory_type
,
int64_t
memory_type_id
);
size_t
AddBuffer
(
const
char
*
buffer
,
BufferAttributes
*
buffer_attributes
);
// Add a 'buffer' with 'byte_size' as part of this data buffer in the front
// Return the index of the buffer
size_t
AddBufferFront
(
const
char
*
buffer
,
size_t
byte_size
,
TRITONSERVER_MemoryType
memory_type
,
int64_t
memory_type_id
);
private:
struct
Block
{
Block
(
const
char
*
buffer
,
size_t
byte_size
,
TRITONSERVER_MemoryType
memory_type
,
int64_t
memory_type_id
)
:
buffer_
(
buffer
),
buffer_attributes_
(
BufferAttributes
(
byte_size
,
memory_type
,
memory_type_id
,
nullptr
))
{
}
Block
(
const
char
*
buffer
,
BufferAttributes
*
buffer_attributes
)
:
buffer_
(
buffer
),
buffer_attributes_
(
*
buffer_attributes
)
{
}
const
char
*
buffer_
;
BufferAttributes
buffer_attributes_
;
};
std
::
vector
<
Block
>
buffer_
;
};
//
// MutableMemory
//
class
MutableMemory
:
public
Memory
{
public:
// Create a mutable data buffer referencing to other data buffer.
MutableMemory
(
char
*
buffer
,
size_t
byte_size
,
TRITONSERVER_MemoryType
memory_type
,
int64_t
memory_type_id
);
virtual
~
MutableMemory
()
{}
//\see Memory::BufferAt()
const
char
*
BufferAt
(
size_t
idx
,
size_t
*
byte_size
,
TRITONSERVER_MemoryType
*
memory_type
,
int64_t
*
memory_type_id
)
const
override
;
//\see Memory::BufferAt()
const
char
*
BufferAt
(
size_t
idx
,
BufferAttributes
**
buffer_attributes
)
override
;
// Return a pointer to the base address of the mutable buffer. If
// non-null 'memory_type' returns the memory type of the chunk of
// bytes. If non-null 'memory_type_id' returns the memory type id of
// the chunk of bytes.
char
*
MutableBuffer
(
TRITONSERVER_MemoryType
*
memory_type
=
nullptr
,
int64_t
*
memory_type_id
=
nullptr
);
DISALLOW_COPY_AND_ASSIGN
(
MutableMemory
);
protected:
MutableMemory
()
:
Memory
()
{}
char
*
buffer_
;
BufferAttributes
buffer_attributes_
;
};
//
// AllocatedMemory
//
class
AllocatedMemory
:
public
MutableMemory
{
public:
// Create a continuous data buffer with 'byte_size', 'memory_type' and
// 'memory_type_id'. Note that the buffer may be created on different memeory
// type and memory type id if the original request type and id can not be
// satisfied, thus the function caller should always check the actual memory
// type and memory type id before use.
AllocatedMemory
(
size_t
byte_size
,
TRITONSERVER_MemoryType
memory_type
,
int64_t
memory_type_id
);
~
AllocatedMemory
()
override
;
};
}}
// namespace triton::core
3rdparty/core-r22.12/src/metric_family.cc
deleted
100644 → 0
View file @
d592fbea
// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifdef TRITON_ENABLE_METRICS
#include "metric_family.h"
#include "metrics.h"
#include "triton/common/logging.h"
namespace
triton
{
namespace
core
{
//
// Implementation for TRITONSERVER_MetricFamily.
//
MetricFamily
::
MetricFamily
(
TRITONSERVER_MetricKind
kind
,
const
char
*
name
,
const
char
*
description
)
{
auto
registry
=
Metrics
::
GetRegistry
();
switch
(
kind
)
{
case
TRITONSERVER_METRIC_KIND_COUNTER
:
family_
=
reinterpret_cast
<
void
*>
(
&
prometheus
::
BuildCounter
()
.
Name
(
name
)
.
Help
(
description
)
.
Register
(
*
registry
));
break
;
case
TRITONSERVER_METRIC_KIND_GAUGE
:
family_
=
reinterpret_cast
<
void
*>
(
&
prometheus
::
BuildGauge
()
.
Name
(
name
)
.
Help
(
description
)
.
Register
(
*
registry
));
break
;
default:
throw
std
::
invalid_argument
(
"Unsupported kind passed to MetricFamily constructor."
);
}
kind_
=
kind
;
}
void
*
MetricFamily
::
Add
(
std
::
map
<
std
::
string
,
std
::
string
>
label_map
,
Metric
*
metric
)
{
void
*
prom_metric
=
nullptr
;
switch
(
kind_
)
{
case
TRITONSERVER_METRIC_KIND_COUNTER
:
{
auto
counter_family_ptr
=
reinterpret_cast
<
prometheus
::
Family
<
prometheus
::
Counter
>*>
(
family_
);
auto
counter_ptr
=
&
counter_family_ptr
->
Add
(
label_map
);
prom_metric
=
reinterpret_cast
<
void
*>
(
counter_ptr
);
break
;
}
case
TRITONSERVER_METRIC_KIND_GAUGE
:
{
auto
gauge_family_ptr
=
reinterpret_cast
<
prometheus
::
Family
<
prometheus
::
Gauge
>*>
(
family_
);
auto
gauge_ptr
=
&
gauge_family_ptr
->
Add
(
label_map
);
prom_metric
=
reinterpret_cast
<
void
*>
(
gauge_ptr
);
break
;
}
default:
throw
std
::
invalid_argument
(
"Unsupported family kind passed to Metric constructor."
);
}
std
::
lock_guard
<
std
::
mutex
>
lk
(
metric_mtx_
);
++
prom_metric_ref_cnt_
[
prom_metric
];
child_metrics_
.
insert
(
metric
);
return
prom_metric
;
}
void
MetricFamily
::
Remove
(
void
*
prom_metric
,
Metric
*
metric
)
{
{
// Remove reference to dependent Metric object
std
::
lock_guard
<
std
::
mutex
>
lk
(
metric_mtx_
);
child_metrics_
.
erase
(
metric
);
}
if
(
prom_metric
==
nullptr
)
{
return
;
}
{
std
::
lock_guard
<
std
::
mutex
>
lk
(
metric_mtx_
);
const
auto
it
=
prom_metric_ref_cnt_
.
find
(
prom_metric
);
if
(
it
!=
prom_metric_ref_cnt_
.
end
())
{
--
it
->
second
;
if
(
it
->
second
==
0
)
{
prom_metric_ref_cnt_
.
erase
(
it
);
}
else
{
// Done as it is not the last reference
return
;
}
}
}
switch
(
kind_
)
{
case
TRITONSERVER_METRIC_KIND_COUNTER
:
{
auto
counter_family_ptr
=
reinterpret_cast
<
prometheus
::
Family
<
prometheus
::
Counter
>*>
(
family_
);
auto
counter_ptr
=
reinterpret_cast
<
prometheus
::
Counter
*>
(
prom_metric
);
counter_family_ptr
->
Remove
(
counter_ptr
);
break
;
}
case
TRITONSERVER_METRIC_KIND_GAUGE
:
{
auto
gauge_family_ptr
=
reinterpret_cast
<
prometheus
::
Family
<
prometheus
::
Gauge
>*>
(
family_
);
auto
gauge_ptr
=
reinterpret_cast
<
prometheus
::
Gauge
*>
(
prom_metric
);
gauge_family_ptr
->
Remove
(
gauge_ptr
);
break
;
}
default:
// Invalid kind should be caught in constructor
LOG_ERROR
<<
"Unsupported kind in Metric destructor."
;
break
;
}
}
void
MetricFamily
::
InvalidateReferences
()
{
std
::
lock_guard
<
std
::
mutex
>
lk
(
metric_mtx_
);
for
(
auto
&
metric
:
child_metrics_
)
{
if
(
metric
!=
nullptr
)
{
metric
->
Invalidate
();
}
}
child_metrics_
.
clear
();
}
MetricFamily
::~
MetricFamily
()
{
if
(
NumMetrics
()
>
0
)
{
LOG_WARNING
<<
"MetricFamily was deleted before its child Metrics, this "
"should not happen. Make sure to delete all child Metrics "
"before deleting their MetricFamily."
;
}
InvalidateReferences
();
// DLIS-4072: Support for removing metric families from registry
}
//
// Implementation for TRITONSERVER_Metric.
//
Metric
::
Metric
(
TRITONSERVER_MetricFamily
*
family
,
std
::
vector
<
const
InferenceParameter
*>
labels
)
{
family_
=
reinterpret_cast
<
MetricFamily
*>
(
family
);
kind_
=
family_
->
Kind
();
// Create map of labels from InferenceParameters
std
::
map
<
std
::
string
,
std
::
string
>
label_map
;
for
(
const
auto
&
param
:
labels
)
{
if
(
param
->
Type
()
!=
TRITONSERVER_PARAMETER_STRING
)
{
throw
std
::
invalid_argument
(
"Parameter ["
+
param
->
Name
()
+
"] must have a type of TRITONSERVER_PARAMETER_STRING to be "
"added as a label."
);
}
label_map
[
param
->
Name
()]
=
std
::
string
(
reinterpret_cast
<
const
char
*>
(
param
->
ValuePointer
()));
}
metric_
=
family_
->
Add
(
label_map
,
this
);
}
Metric
::~
Metric
()
{
if
(
family_
!=
nullptr
)
{
family_
->
Remove
(
metric_
,
this
);
}
else
{
LOG_WARNING
<<
"Corresponding MetricFamily was deleted before this Metric, "
"this should not happen. Make sure to delete a Metric "
"before deleting its MetricFamily."
;
}
// Catch lifetime management / invalid reference issues
Invalidate
();
}
void
Metric
::
Invalidate
()
{
family_
=
nullptr
;
metric_
=
nullptr
;
}
TRITONSERVER_Error
*
Metric
::
Value
(
double
*
value
)
{
if
(
metric_
==
nullptr
)
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INTERNAL
,
"Could not get metric value. Metric has been invalidated."
);
}
switch
(
kind_
)
{
case
TRITONSERVER_METRIC_KIND_COUNTER
:
{
auto
counter_ptr
=
reinterpret_cast
<
prometheus
::
Counter
*>
(
metric_
);
LOG_VERBOSE
(
1
)
<<
"SETTING COUNTER METRIC FROM: "
<<
*
value
<<
" to "
<<
counter_ptr
->
Value
();
*
value
=
counter_ptr
->
Value
();
break
;
}
case
TRITONSERVER_METRIC_KIND_GAUGE
:
{
auto
gauge_ptr
=
reinterpret_cast
<
prometheus
::
Gauge
*>
(
metric_
);
LOG_VERBOSE
(
1
)
<<
"SETTING GAUGE METRIC FROM: "
<<
*
value
<<
" to "
<<
gauge_ptr
->
Value
();
*
value
=
gauge_ptr
->
Value
();
break
;
}
default:
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_UNSUPPORTED
,
"Unsupported TRITONSERVER_MetricKind"
);
}
return
nullptr
;
// Success
}
TRITONSERVER_Error
*
Metric
::
Increment
(
double
value
)
{
if
(
metric_
==
nullptr
)
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INTERNAL
,
"Could not increment metric value. Metric has been invalidated."
);
}
switch
(
kind_
)
{
case
TRITONSERVER_METRIC_KIND_COUNTER
:
{
if
(
value
<
0.0
)
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INVALID_ARG
,
"TRITONSERVER_METRIC_KIND_COUNTER can only be incremented "
"monotonically by non-negative values."
);
}
auto
counter_ptr
=
reinterpret_cast
<
prometheus
::
Counter
*>
(
metric_
);
counter_ptr
->
Increment
(
value
);
break
;
}
case
TRITONSERVER_METRIC_KIND_GAUGE
:
{
auto
gauge_ptr
=
reinterpret_cast
<
prometheus
::
Gauge
*>
(
metric_
);
// Gauge::Increment works for both positive and negative values as of
// prometheus-cpp v1.0 but for now on v0.7 we defer call to
// Increment/Decrement based on the sign of value
// https://github.com/jupp0r/prometheus-cpp/blob/master/core/src/gauge.cc
if
(
value
<
0.0
)
{
gauge_ptr
->
Decrement
(
-
1.0
*
value
);
}
else
{
gauge_ptr
->
Increment
(
value
);
}
break
;
}
default:
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_UNSUPPORTED
,
"Unsupported TRITONSERVER_MetricKind"
);
}
return
nullptr
;
// Success
}
TRITONSERVER_Error
*
Metric
::
Set
(
double
value
)
{
if
(
metric_
==
nullptr
)
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INTERNAL
,
"Could not set metric value. Metric has been invalidated."
);
}
switch
(
kind_
)
{
case
TRITONSERVER_METRIC_KIND_COUNTER
:
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_UNSUPPORTED
,
"TRITONSERVER_METRIC_KIND_COUNTER does not support Set"
);
}
case
TRITONSERVER_METRIC_KIND_GAUGE
:
{
auto
gauge_ptr
=
reinterpret_cast
<
prometheus
::
Gauge
*>
(
metric_
);
gauge_ptr
->
Set
(
value
);
break
;
}
default:
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_UNSUPPORTED
,
"Unsupported TRITONSERVER_MetricKind"
);
}
return
nullptr
;
// Success
}
}}
// namespace triton::core
#endif // TRITON_ENABLE_METRICS
3rdparty/core-r22.12/src/metric_family.h
deleted
100644 → 0
View file @
d592fbea
// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#ifdef TRITON_ENABLE_METRICS
#include <mutex>
#include <set>
#include <unordered_map>
#include "infer_parameter.h"
#include "prometheus/registry.h"
#include "tritonserver_apis.h"
namespace
triton
{
namespace
core
{
//
// Implementation for TRITONSERVER_MetricFamily.
//
class
Metric
;
class
MetricFamily
{
public:
MetricFamily
(
TRITONSERVER_MetricKind
kind
,
const
char
*
name
,
const
char
*
description
);
~
MetricFamily
();
void
*
Family
()
const
{
return
family_
;
}
TRITONSERVER_MetricKind
Kind
()
const
{
return
kind_
;
}
void
*
Add
(
std
::
map
<
std
::
string
,
std
::
string
>
label_map
,
Metric
*
metric
);
void
Remove
(
void
*
prom_metric
,
Metric
*
metric
);
int
NumMetrics
()
{
std
::
lock_guard
<
std
::
mutex
>
lk
(
metric_mtx_
);
return
child_metrics_
.
size
();
}
private:
// If a MetricFamily is deleted before its dependent Metric, we want to
// invalidate the reference so we don't access invalid memory.
void
InvalidateReferences
();
void
*
family_
;
TRITONSERVER_MetricKind
kind_
;
// Synchronize access of related metric objects
std
::
mutex
metric_mtx_
;
// Prometheus returns the existing metric pointer if the metric with the same
// set of labels are requested, as a result, different Metric objects may
// refer to the same prometheus metric. So we must track the reference count
// of the metric and request prometheus to remove it only when all references
// are released.
std
::
unordered_map
<
void
*
,
size_t
>
prom_metric_ref_cnt_
;
// Maintain references to metrics created from this metric family to
// invalidate their references if a family is deleted before its metric
std
::
set
<
Metric
*>
child_metrics_
;
};
//
// Implementation for TRITONSERVER_Metric.
//
class
Metric
{
public:
Metric
(
TRITONSERVER_MetricFamily
*
family
,
std
::
vector
<
const
InferenceParameter
*>
labels
);
~
Metric
();
MetricFamily
*
Family
()
const
{
return
family_
;
}
TRITONSERVER_MetricKind
Kind
()
const
{
return
kind_
;
}
TRITONSERVER_Error
*
Value
(
double
*
value
);
TRITONSERVER_Error
*
Increment
(
double
value
);
TRITONSERVER_Error
*
Set
(
double
value
);
// If a MetricFamily is deleted before its dependent Metric, we want to
// invalidate the references so we don't access invalid memory.
void
Invalidate
();
private:
void
*
metric_
;
MetricFamily
*
family_
;
TRITONSERVER_MetricKind
kind_
;
};
}}
// namespace triton::core
#endif // TRITON_ENABLE_METRICS
3rdparty/core-r22.12/src/metric_model_reporter.cc
deleted
100644 → 0
View file @
d592fbea
// Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "metric_model_reporter.h"
#ifdef TRITON_ENABLE_METRICS
#include "constants.h"
#include "metrics.h"
namespace
triton
{
namespace
core
{
Status
MetricModelReporter
::
Create
(
const
std
::
string
&
model_name
,
const
int64_t
model_version
,
const
int
device
,
const
triton
::
common
::
MetricTagsMap
&
model_tags
,
std
::
shared_ptr
<
MetricModelReporter
>*
metric_model_reporter
)
{
static
std
::
mutex
mtx
;
static
std
::
unordered_map
<
size_t
,
std
::
weak_ptr
<
MetricModelReporter
>>
reporter_map
;
std
::
map
<
std
::
string
,
std
::
string
>
labels
;
GetMetricLabels
(
&
labels
,
model_name
,
model_version
,
device
,
model_tags
);
auto
hash_labels
=
Metrics
::
HashLabels
(
labels
);
std
::
lock_guard
<
std
::
mutex
>
lock
(
mtx
);
const
auto
&
itr
=
reporter_map
.
find
(
hash_labels
);
if
(
itr
!=
reporter_map
.
end
())
{
// Found in map. If the weak_ptr is still valid that means that
// there are other models using the reporter and we just reuse that
// same reporter. If the weak_ptr is not valid then we need to remove
// the weak_ptr from the map and create the reporter again.
*
metric_model_reporter
=
itr
->
second
.
lock
();
if
(
*
metric_model_reporter
!=
nullptr
)
{
return
Status
::
Success
;
}
reporter_map
.
erase
(
itr
);
}
metric_model_reporter
->
reset
(
new
MetricModelReporter
(
model_name
,
model_version
,
device
,
model_tags
));
reporter_map
.
insert
({
hash_labels
,
*
metric_model_reporter
});
return
Status
::
Success
;
}
MetricModelReporter
::
MetricModelReporter
(
const
std
::
string
&
model_name
,
const
int64_t
model_version
,
const
int
device
,
const
triton
::
common
::
MetricTagsMap
&
model_tags
)
{
std
::
map
<
std
::
string
,
std
::
string
>
labels
;
GetMetricLabels
(
&
labels
,
model_name
,
model_version
,
device
,
model_tags
);
metric_inf_success_
=
CreateCounterMetric
(
Metrics
::
FamilyInferenceSuccess
(),
labels
);
metric_inf_failure_
=
CreateCounterMetric
(
Metrics
::
FamilyInferenceFailure
(),
labels
);
metric_inf_count_
=
CreateCounterMetric
(
Metrics
::
FamilyInferenceCount
(),
labels
);
metric_inf_exec_count_
=
CreateCounterMetric
(
Metrics
::
FamilyInferenceExecutionCount
(),
labels
);
metric_inf_request_duration_us_
=
CreateCounterMetric
(
Metrics
::
FamilyInferenceRequestDuration
(),
labels
);
metric_inf_queue_duration_us_
=
CreateCounterMetric
(
Metrics
::
FamilyInferenceQueueDuration
(),
labels
);
metric_inf_compute_input_duration_us_
=
CreateCounterMetric
(
Metrics
::
FamilyInferenceComputeInputDuration
(),
labels
);
metric_inf_compute_infer_duration_us_
=
CreateCounterMetric
(
Metrics
::
FamilyInferenceComputeInferDuration
(),
labels
);
metric_inf_compute_output_duration_us_
=
CreateCounterMetric
(
Metrics
::
FamilyInferenceComputeOutputDuration
(),
labels
);
metric_cache_hit_count_
=
CreateCounterMetric
(
Metrics
::
FamilyCacheHitCount
(),
labels
);
metric_cache_hit_lookup_duration_us_
=
CreateCounterMetric
(
Metrics
::
FamilyCacheHitLookupDuration
(),
labels
);
metric_cache_miss_count_
=
CreateCounterMetric
(
Metrics
::
FamilyCacheMissCount
(),
labels
);
metric_cache_miss_lookup_duration_us_
=
CreateCounterMetric
(
Metrics
::
FamilyCacheMissLookupDuration
(),
labels
);
metric_cache_miss_insertion_duration_us_
=
CreateCounterMetric
(
Metrics
::
FamilyCacheMissInsertionDuration
(),
labels
);
}
MetricModelReporter
::~
MetricModelReporter
()
{
Metrics
::
FamilyInferenceSuccess
().
Remove
(
metric_inf_success_
);
Metrics
::
FamilyInferenceFailure
().
Remove
(
metric_inf_failure_
);
Metrics
::
FamilyInferenceCount
().
Remove
(
metric_inf_count_
);
Metrics
::
FamilyInferenceExecutionCount
().
Remove
(
metric_inf_exec_count_
);
Metrics
::
FamilyInferenceRequestDuration
().
Remove
(
metric_inf_request_duration_us_
);
Metrics
::
FamilyInferenceQueueDuration
().
Remove
(
metric_inf_queue_duration_us_
);
Metrics
::
FamilyInferenceComputeInputDuration
().
Remove
(
metric_inf_compute_input_duration_us_
);
Metrics
::
FamilyInferenceComputeInferDuration
().
Remove
(
metric_inf_compute_infer_duration_us_
);
Metrics
::
FamilyInferenceComputeOutputDuration
().
Remove
(
metric_inf_compute_output_duration_us_
);
Metrics
::
FamilyCacheHitCount
().
Remove
(
metric_cache_hit_count_
);
Metrics
::
FamilyCacheHitLookupDuration
().
Remove
(
metric_cache_hit_lookup_duration_us_
);
Metrics
::
FamilyCacheMissCount
().
Remove
(
metric_cache_miss_count_
);
Metrics
::
FamilyCacheMissInsertionDuration
().
Remove
(
metric_cache_miss_insertion_duration_us_
);
}
void
MetricModelReporter
::
GetMetricLabels
(
std
::
map
<
std
::
string
,
std
::
string
>*
labels
,
const
std
::
string
&
model_name
,
const
int64_t
model_version
,
const
int
device
,
const
triton
::
common
::
MetricTagsMap
&
model_tags
)
{
labels
->
insert
(
std
::
map
<
std
::
string
,
std
::
string
>::
value_type
(
std
::
string
(
kMetricsLabelModelName
),
model_name
));
labels
->
insert
(
std
::
map
<
std
::
string
,
std
::
string
>::
value_type
(
std
::
string
(
kMetricsLabelModelVersion
),
std
::
to_string
(
model_version
)));
for
(
const
auto
&
tag
:
model_tags
)
{
labels
->
insert
(
std
::
map
<
std
::
string
,
std
::
string
>::
value_type
(
"_"
+
tag
.
first
,
tag
.
second
));
}
// 'device' can be < 0 to indicate that the GPU is not known. In
// that case use a metric that doesn't have the gpu_uuid label.
if
(
device
>=
0
)
{
std
::
string
uuid
;
if
(
Metrics
::
UUIDForCudaDevice
(
device
,
&
uuid
))
{
labels
->
insert
(
std
::
map
<
std
::
string
,
std
::
string
>::
value_type
(
std
::
string
(
kMetricsLabelGpuUuid
),
uuid
));
}
}
}
prometheus
::
Counter
*
MetricModelReporter
::
CreateCounterMetric
(
prometheus
::
Family
<
prometheus
::
Counter
>&
family
,
const
std
::
map
<
std
::
string
,
std
::
string
>&
labels
)
{
return
&
family
.
Add
(
labels
);
}
}}
// namespace triton::core
#endif // TRITON_ENABLE_METRICS
3rdparty/core-r22.12/src/metric_model_reporter.h
deleted
100644 → 0
View file @
d592fbea
// Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include "status.h"
#include "triton/common/model_config.h"
#ifdef TRITON_ENABLE_METRICS
#include "prometheus/registry.h"
#endif // TRITON_ENABLE_METRICS
namespace
triton
{
namespace
core
{
//
// Interface for a metric reporter for a given version of a model.
//
class
MetricModelReporter
{
public:
#ifdef TRITON_ENABLE_METRICS
static
Status
Create
(
const
std
::
string
&
model_name
,
const
int64_t
model_version
,
const
int
device
,
const
triton
::
common
::
MetricTagsMap
&
model_tags
,
std
::
shared_ptr
<
MetricModelReporter
>*
metric_model_reporter
);
~
MetricModelReporter
();
// Get a metric for the given model, version and GPU index.
prometheus
::
Counter
&
MetricInferenceSuccess
()
const
{
return
*
metric_inf_success_
;
}
prometheus
::
Counter
&
MetricInferenceFailure
()
const
{
return
*
metric_inf_failure_
;
}
prometheus
::
Counter
&
MetricInferenceCount
()
const
{
return
*
metric_inf_count_
;
}
prometheus
::
Counter
&
MetricInferenceExecutionCount
()
const
{
return
*
metric_inf_exec_count_
;
}
prometheus
::
Counter
&
MetricInferenceRequestDuration
()
const
{
return
*
metric_inf_request_duration_us_
;
}
prometheus
::
Counter
&
MetricInferenceQueueDuration
()
const
{
return
*
metric_inf_queue_duration_us_
;
}
prometheus
::
Counter
&
MetricInferenceComputeInputDuration
()
const
{
return
*
metric_inf_compute_input_duration_us_
;
}
prometheus
::
Counter
&
MetricInferenceComputeInferDuration
()
const
{
return
*
metric_inf_compute_infer_duration_us_
;
}
prometheus
::
Counter
&
MetricInferenceComputeOutputDuration
()
const
{
return
*
metric_inf_compute_output_duration_us_
;
}
prometheus
::
Counter
&
MetricCacheHitCount
()
const
{
return
*
metric_cache_hit_count_
;
}
prometheus
::
Counter
&
MetricCacheHitLookupDuration
()
const
{
return
*
metric_cache_hit_lookup_duration_us_
;
}
prometheus
::
Counter
&
MetricCacheMissCount
()
const
{
return
*
metric_cache_miss_count_
;
}
prometheus
::
Counter
&
MetricCacheMissLookupDuration
()
const
{
return
*
metric_cache_miss_lookup_duration_us_
;
}
prometheus
::
Counter
&
MetricCacheMissInsertionDuration
()
const
{
return
*
metric_cache_miss_insertion_duration_us_
;
}
private:
MetricModelReporter
(
const
std
::
string
&
model_name
,
const
int64_t
model_version
,
const
int
device
,
const
triton
::
common
::
MetricTagsMap
&
model_tags
);
static
void
GetMetricLabels
(
std
::
map
<
std
::
string
,
std
::
string
>*
labels
,
const
std
::
string
&
model_name
,
const
int64_t
model_version
,
const
int
device
,
const
triton
::
common
::
MetricTagsMap
&
model_tags
);
prometheus
::
Counter
*
CreateCounterMetric
(
prometheus
::
Family
<
prometheus
::
Counter
>&
family
,
const
std
::
map
<
std
::
string
,
std
::
string
>&
labels
);
prometheus
::
Counter
*
metric_inf_success_
;
prometheus
::
Counter
*
metric_inf_failure_
;
prometheus
::
Counter
*
metric_inf_count_
;
prometheus
::
Counter
*
metric_inf_exec_count_
;
prometheus
::
Counter
*
metric_inf_request_duration_us_
;
prometheus
::
Counter
*
metric_inf_queue_duration_us_
;
prometheus
::
Counter
*
metric_inf_compute_input_duration_us_
;
prometheus
::
Counter
*
metric_inf_compute_infer_duration_us_
;
prometheus
::
Counter
*
metric_inf_compute_output_duration_us_
;
prometheus
::
Counter
*
metric_cache_hit_count_
;
prometheus
::
Counter
*
metric_cache_hit_lookup_duration_us_
;
prometheus
::
Counter
*
metric_cache_miss_count_
;
prometheus
::
Counter
*
metric_cache_miss_lookup_duration_us_
;
prometheus
::
Counter
*
metric_cache_miss_insertion_duration_us_
;
#endif // TRITON_ENABLE_METRICS
};
}}
// namespace triton::core
3rdparty/core-r22.12/src/metrics.cc
deleted
100644 → 0
View file @
d592fbea
// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
#ifdef TRITON_ENABLE_METRICS
#include "metrics.h"
#include <thread>
#include "constants.h"
#include "prometheus/detail/utils.h"
#include "triton/common/logging.h"
#ifdef TRITON_ENABLE_METRICS_GPU
#include <cuda_runtime_api.h>
#include <dcgm_agent.h>
#include <cstring>
#include <set>
#include <string>
#endif // TRITON_ENABLE_METRICS_GPU
namespace
triton
{
namespace
core
{
Metrics
::
Metrics
()
:
registry_
(
std
::
make_shared
<
prometheus
::
Registry
>
()),
serializer_
(
new
prometheus
::
TextSerializer
()),
inf_success_family_
(
prometheus
::
BuildCounter
()
.
Name
(
"nv_inference_request_success"
)
.
Help
(
"Number of successful inference requests, all batch sizes"
)
.
Register
(
*
registry_
)),
inf_failure_family_
(
prometheus
::
BuildCounter
()
.
Name
(
"nv_inference_request_failure"
)
.
Help
(
"Number of failed inference requests, all batch sizes"
)
.
Register
(
*
registry_
)),
inf_count_family_
(
prometheus
::
BuildCounter
()
.
Name
(
"nv_inference_count"
)
.
Help
(
"Number of inferences performed (does not "
"include cached requests)"
)
.
Register
(
*
registry_
)),
inf_count_exec_family_
(
prometheus
::
BuildCounter
()
.
Name
(
"nv_inference_exec_count"
)
.
Help
(
"Number of model executions performed "
"(does not include cached requests)"
)
.
Register
(
*
registry_
)),
inf_request_duration_us_family_
(
prometheus
::
BuildCounter
()
.
Name
(
"nv_inference_request_duration_us"
)
.
Help
(
"Cumulative inference request duration in microseconds "
"(includes cached requests)"
)
.
Register
(
*
registry_
)),
inf_queue_duration_us_family_
(
prometheus
::
BuildCounter
()
.
Name
(
"nv_inference_queue_duration_us"
)
.
Help
(
"Cumulative inference queuing duration in microseconds "
"(includes cached requests)"
)
.
Register
(
*
registry_
)),
inf_compute_input_duration_us_family_
(
prometheus
::
BuildCounter
()
.
Name
(
"nv_inference_compute_input_duration_us"
)
.
Help
(
"Cumulative compute input duration in microseconds (does "
"not include cached requests)"
)
.
Register
(
*
registry_
)),
inf_compute_infer_duration_us_family_
(
prometheus
::
BuildCounter
()
.
Name
(
"nv_inference_compute_infer_duration_us"
)
.
Help
(
"Cumulative compute inference duration in microseconds "
"(does not include cached requests)"
)
.
Register
(
*
registry_
)),
inf_compute_output_duration_us_family_
(
prometheus
::
BuildCounter
()
.
Name
(
"nv_inference_compute_output_duration_us"
)
.
Help
(
"Cumulative inference compute output duration in "
"microseconds (does not include cached requests)"
)
.
Register
(
*
registry_
)),
cache_num_entries_family_
(
prometheus
::
BuildGauge
()
.
Name
(
"nv_cache_num_entries"
)
.
Help
(
"Number of responses stored in response cache"
)
.
Register
(
*
registry_
)),
cache_num_lookups_family_
(
prometheus
::
BuildGauge
()
.
Name
(
"nv_cache_num_lookups"
)
.
Help
(
"Number of cache lookups in response cache"
)
.
Register
(
*
registry_
)),
cache_num_hits_family_
(
prometheus
::
BuildGauge
()
.
Name
(
"nv_cache_num_hits"
)
.
Help
(
"Number of cache hits in response cache"
)
.
Register
(
*
registry_
)),
cache_num_misses_family_
(
prometheus
::
BuildGauge
()
.
Name
(
"nv_cache_num_misses"
)
.
Help
(
"Number of cache misses in response cache"
)
.
Register
(
*
registry_
)),
cache_num_evictions_family_
(
prometheus
::
BuildGauge
()
.
Name
(
"nv_cache_num_evictions"
)
.
Help
(
"Number of cache evictions in response cache"
)
.
Register
(
*
registry_
)),
cache_lookup_duration_us_family_
(
prometheus
::
BuildGauge
()
.
Name
(
"nv_cache_lookup_duration"
)
.
Help
(
"Total cache lookup duration (hit and miss), in microseconds"
)
.
Register
(
*
registry_
)),
cache_insertion_duration_us_family_
(
prometheus
::
BuildGauge
()
.
Name
(
"nv_cache_insertion_duration"
)
.
Help
(
"Total cache insertion duration, in microseconds"
)
.
Register
(
*
registry_
)),
cache_util_family_
(
prometheus
::
BuildGauge
()
.
Name
(
"nv_cache_util"
)
.
Help
(
"Cache utilization [0.0 - 1.0]"
)
.
Register
(
*
registry_
)),
// Per-model cache metric families
cache_num_hits_model_family_
(
prometheus
::
BuildCounter
()
.
Name
(
"nv_cache_num_hits_per_model"
)
.
Help
(
"Number of cache hits per model"
)
.
Register
(
*
registry_
)),
cache_hit_lookup_duration_us_model_family_
(
prometheus
::
BuildCounter
()
.
Name
(
"nv_cache_hit_lookup_duration_per_model"
)
.
Help
(
"Total cache hit lookup duration per model, in microseconds"
)
.
Register
(
*
registry_
)),
cache_num_misses_model_family_
(
prometheus
::
BuildCounter
()
.
Name
(
"nv_cache_num_misses_per_model"
)
.
Help
(
"Number of cache misses per model"
)
.
Register
(
*
registry_
)),
cache_miss_lookup_duration_us_model_family_
(
prometheus
::
BuildCounter
()
.
Name
(
"nv_cache_miss_lookup_duration_per_model"
)
.
Help
(
"Total cache miss lookup duration per model, in microseconds"
)
.
Register
(
*
registry_
)),
cache_miss_insertion_duration_us_model_family_
(
prometheus
::
BuildCounter
()
.
Name
(
"nv_cache_miss_insertion_duration_per_model"
)
.
Help
(
"Total cache miss insertion duration per model, in "
"microseconds"
)
.
Register
(
*
registry_
)),
#ifdef TRITON_ENABLE_METRICS_GPU
gpu_utilization_family_
(
prometheus
::
BuildGauge
()
.
Name
(
"nv_gpu_utilization"
)
.
Help
(
"GPU utilization rate [0.0 - 1.0)"
)
.
Register
(
*
registry_
)),
gpu_memory_total_family_
(
prometheus
::
BuildGauge
()
.
Name
(
"nv_gpu_memory_total_bytes"
)
.
Help
(
"GPU total memory, in bytes"
)
.
Register
(
*
registry_
)),
gpu_memory_used_family_
(
prometheus
::
BuildGauge
()
.
Name
(
"nv_gpu_memory_used_bytes"
)
.
Help
(
"GPU used memory, in bytes"
)
.
Register
(
*
registry_
)),
gpu_power_usage_family_
(
prometheus
::
BuildGauge
()
.
Name
(
"nv_gpu_power_usage"
)
.
Help
(
"GPU power usage in watts"
)
.
Register
(
*
registry_
)),
gpu_power_limit_family_
(
prometheus
::
BuildGauge
()
.
Name
(
"nv_gpu_power_limit"
)
.
Help
(
"GPU power management limit in watts"
)
.
Register
(
*
registry_
)),
gpu_energy_consumption_family_
(
prometheus
::
BuildCounter
()
.
Name
(
"nv_energy_consumption"
)
.
Help
(
"GPU energy consumption in joules since the Triton Server "
"started"
)
.
Register
(
*
registry_
)),
#endif // TRITON_ENABLE_METRICS_GPU
#ifdef TRITON_ENABLE_METRICS_CPU
cpu_utilization_family_
(
prometheus
::
BuildGauge
()
.
Name
(
"nv_cpu_utilization"
)
.
Help
(
"CPU utilization rate [0.0 - 1.0]"
)
.
Register
(
*
registry_
)),
cpu_memory_total_family_
(
prometheus
::
BuildGauge
()
.
Name
(
"nv_cpu_memory_total_bytes"
)
.
Help
(
"CPU total memory (RAM), in bytes"
)
.
Register
(
*
registry_
)),
cpu_memory_used_family_
(
prometheus
::
BuildGauge
()
.
Name
(
"nv_cpu_memory_used_bytes"
)
.
Help
(
"CPU used memory (RAM), in bytes"
)
.
Register
(
*
registry_
)),
#endif // TRITON_ENABLE_METRICS_CPU
metrics_enabled_
(
false
),
gpu_metrics_enabled_
(
false
),
cpu_metrics_enabled_
(
false
),
cache_metrics_enabled_
(
false
),
metrics_interval_ms_
(
2000
)
{
}
static
prometheus
::
detail
::
LabelHasher
label_hasher_
;
size_t
Metrics
::
HashLabels
(
const
std
::
map
<
std
::
string
,
std
::
string
>&
labels
)
{
return
label_hasher_
(
labels
);
}
Metrics
::~
Metrics
()
{
// Signal the cache thread to exit and then wait for it...
if
(
poll_thread_
!=
nullptr
)
{
poll_thread_exit_
.
store
(
true
);
poll_thread_
->
join
();
#ifdef TRITON_ENABLE_METRICS_GPU
if
(
dcgm_metadata_
.
dcgm_initialized_
)
{
dcgmReturn_t
derr
;
// Group destroy will return an error if groupId invalid or dcgm not
// initialized or configured correctly
derr
=
dcgmGroupDestroy
(
dcgm_metadata_
.
dcgm_handle_
,
dcgm_metadata_
.
groupId_
);
if
(
derr
!=
DCGM_ST_OK
)
{
LOG_WARNING
<<
"Unable to destroy DCGM group: "
<<
errorString
(
derr
);
}
// Stop and shutdown DCGM
if
(
dcgm_metadata_
.
standalone_
)
{
derr
=
dcgmDisconnect
(
dcgm_metadata_
.
dcgm_handle_
);
}
else
{
derr
=
dcgmStopEmbedded
(
dcgm_metadata_
.
dcgm_handle_
);
}
if
(
derr
!=
DCGM_ST_OK
)
{
LOG_WARNING
<<
"Unable to stop DCGM: "
<<
errorString
(
derr
);
}
derr
=
dcgmShutdown
();
if
(
derr
!=
DCGM_ST_OK
)
{
LOG_WARNING
<<
"Unable to shutdown DCGM: "
<<
errorString
(
derr
);
}
}
#endif // TRITON_ENABLE_METRICS_GPU
}
}
bool
Metrics
::
Enabled
()
{
auto
singleton
=
GetSingleton
();
return
singleton
->
metrics_enabled_
;
}
void
Metrics
::
EnableMetrics
()
{
auto
singleton
=
GetSingleton
();
singleton
->
metrics_enabled_
=
true
;
}
void
Metrics
::
EnableCacheMetrics
(
std
::
shared_ptr
<
RequestResponseCache
>
response_cache
)
{
auto
singleton
=
GetSingleton
();
// Ensure thread-safe enabling of Cache Metrics
std
::
lock_guard
<
std
::
mutex
>
lock
(
singleton
->
metrics_enabling_
);
if
(
singleton
->
cache_metrics_enabled_
)
{
return
;
}
singleton
->
InitializeCacheMetrics
(
response_cache
);
singleton
->
cache_metrics_enabled_
=
true
;
}
void
Metrics
::
EnableGPUMetrics
()
{
auto
singleton
=
GetSingleton
();
// Ensure thread-safe enabling of GPU Metrics
std
::
lock_guard
<
std
::
mutex
>
lock
(
singleton
->
metrics_enabling_
);
if
(
singleton
->
gpu_metrics_enabled_
)
{
return
;
}
if
(
std
::
getenv
(
"TRITON_SERVER_CPU_ONLY"
)
==
nullptr
)
{
singleton
->
InitializeDcgmMetrics
();
}
singleton
->
gpu_metrics_enabled_
=
true
;
}
void
Metrics
::
EnableCpuMetrics
()
{
auto
singleton
=
GetSingleton
();
// Ensure thread-safe enabling of CPU Metrics
std
::
lock_guard
<
std
::
mutex
>
lock
(
singleton
->
metrics_enabling_
);
if
(
singleton
->
cpu_metrics_enabled_
)
{
return
;
}
singleton
->
InitializeCpuMetrics
();
singleton
->
cpu_metrics_enabled_
=
true
;
}
void
Metrics
::
SetMetricsInterval
(
uint64_t
metrics_interval_ms
)
{
auto
singleton
=
GetSingleton
();
singleton
->
metrics_interval_ms_
=
metrics_interval_ms
;
}
void
Metrics
::
StartPollingThreadSingleton
(
std
::
shared_ptr
<
RequestResponseCache
>
response_cache
)
{
auto
singleton
=
GetSingleton
();
// Ensure thread-safe start of polling thread
std
::
lock_guard
<
std
::
mutex
>
lock
(
singleton
->
poll_thread_starting_
);
if
(
singleton
->
poll_thread_started_
)
{
return
;
}
// Start thread for polling cache/dcgm metrics
singleton
->
StartPollingThread
(
response_cache
);
// Toggle flag so this function is only executed once
singleton
->
poll_thread_started_
=
true
;
}
bool
Metrics
::
StartPollingThread
(
std
::
shared_ptr
<
RequestResponseCache
>
response_cache
)
{
// Nothing to poll if no polling metrics enabled, don't spawn a thread
if
(
!
cache_metrics_enabled_
&&
!
gpu_metrics_enabled_
&&
!
cpu_metrics_enabled_
)
{
LOG_WARNING
<<
"No polling metrics (CPU, GPU, Cache) are enabled. Will not "
"poll for them."
;
return
false
;
}
poll_thread_exit_
.
store
(
false
);
// Start a separate thread for polling metrics at specified interval
poll_thread_
.
reset
(
new
std
::
thread
([
this
,
response_cache
]
{
// Thread will update metrics indefinitely until exit flag set
while
(
!
poll_thread_exit_
.
load
())
{
// Sleep for metric interval
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
metrics_interval_ms_
/
2
));
// Poll Response Cache metrics
if
(
cache_metrics_enabled_
&&
response_cache
!=
nullptr
)
{
PollCacheMetrics
(
response_cache
);
}
#ifdef TRITON_ENABLE_METRICS_GPU
// Poll DCGM GPU metrics
if
(
gpu_metrics_enabled_
&&
dcgm_metadata_
.
available_cuda_gpu_ids_
.
size
()
>
0
)
{
PollDcgmMetrics
();
}
#endif // TRITON_ENABLE_METRICS_GPU
#ifdef TRITON_ENABLE_METRICS_CPU
if
(
cpu_metrics_enabled_
)
{
PollCpuMetrics
();
}
#endif // TRITON_ENABLE_METRICS_CPU
}
}));
return
true
;
}
bool
Metrics
::
PollCacheMetrics
(
std
::
shared_ptr
<
RequestResponseCache
>
response_cache
)
{
if
(
response_cache
==
nullptr
)
{
LOG_WARNING
<<
"error polling cache metrics, cache metrics will not be "
<<
"available: cache was nullptr"
;
return
false
;
}
// Update global cache metrics
cache_num_entries_global_
->
Set
(
response_cache
->
NumEntries
());
cache_num_lookups_global_
->
Set
(
response_cache
->
NumLookups
());
cache_num_hits_global_
->
Set
(
response_cache
->
NumHits
());
cache_num_misses_global_
->
Set
(
response_cache
->
NumMisses
());
cache_num_evictions_global_
->
Set
(
response_cache
->
NumEvictions
());
cache_lookup_duration_us_global_
->
Set
(
response_cache
->
TotalLookupLatencyNs
()
/
1000
);
cache_insertion_duration_us_global_
->
Set
(
response_cache
->
TotalInsertionLatencyNs
()
/
1000
);
cache_util_global_
->
Set
(
response_cache
->
TotalUtilization
());
return
true
;
}
#ifdef TRITON_ENABLE_METRICS_CPU
Status
Metrics
::
ParseCpuInfo
(
CpuInfo
&
info
)
{
#ifdef _WIN32
return
Status
(
Status
::
Code
::
INTERNAL
,
"CPU metrics not supported on Windows."
);
#else
std
::
ifstream
ifs
(
"/proc/stat"
);
if
(
!
ifs
.
good
())
{
return
Status
(
Status
::
Code
::
INTERNAL
,
"Failed to open /proc/stat."
);
}
std
::
string
line
;
// Verify first line is aggregate cpu line
std
::
getline
(
ifs
,
line
);
if
(
line
.
rfind
(
"cpu "
,
0
)
==
std
::
string
::
npos
)
{
return
Status
(
Status
::
Code
::
INTERNAL
,
"Failed to find aggregate CPU info in /proc/stat."
);
}
std
::
string
_
;
std
::
istringstream
iss
(
line
);
// Use _ to skip "cpu" at start of line
if
(
!
(
iss
>>
_
>>
info
))
{
return
Status
(
Status
::
Code
::
INTERNAL
,
"Failed to parse aggregate CPU info in /proc/stat."
);
}
return
Status
::
Success
;
#endif // OS
}
Status
Metrics
::
ParseMemInfo
(
MemInfo
&
info
)
{
#ifdef _WIN32
return
Status
(
Status
::
Code
::
INTERNAL
,
"Memory metrics not supported on Windows."
);
#else
std
::
ifstream
ifs
(
"/proc/meminfo"
);
if
(
!
ifs
.
good
())
{
return
Status
(
Status
::
Code
::
INTERNAL
,
"Failed to open /proc/meminfo."
);
}
std
::
string
line
;
constexpr
uint64_t
KB
=
1024
;
while
(
std
::
getline
(
ifs
,
line
))
{
std
::
istringstream
iss
(
line
);
std
::
string
name
;
uint64_t
value
=
0
;
if
(
iss
>>
name
>>
value
)
{
name
.
pop_back
();
info
[
name
]
=
value
*
KB
;
}
else
{
return
Status
(
Status
::
Code
::
INTERNAL
,
"Encountered error parsing /proc/meminfo."
);
}
}
if
(
info
.
find
(
"MemTotal"
)
==
info
.
end
()
||
info
.
find
(
"MemAvailable"
)
==
info
.
end
())
{
return
Status
(
Status
::
Code
::
INTERNAL
,
"Failed to find desired values in /proc/meminfo."
);
}
if
(
info
[
"MemAvailable"
]
>
info
[
"MemTotal"
])
{
return
Status
(
Status
::
Code
::
INTERNAL
,
"Available bytes shouldn't be greater than Total bytes"
);
}
// "Used" memory can be defined in many different ways. While many
// older applications consider "used = total - (free + cached)", a more
// accurate measure of available memory "MemAvailable" was added,
// so we choose "used = total - available" for a more accurate measure.
// This may change in the future if not sufficient for most use cases.
// See https://stackoverflow.com/a/35019697.
info
[
"MemUsed"
]
=
info
[
"MemTotal"
]
-
info
[
"MemAvailable"
];
return
Status
::
Success
;
#endif // OS
}
double
Metrics
::
CpuUtilization
(
const
CpuInfo
&
info_new
,
const
CpuInfo
&
info_old
)
{
// Account for overflow
const
auto
wrap_sub
=
[](
uint64_t
a
,
uint64_t
b
)
{
return
(
a
>
b
)
?
(
a
-
b
)
:
0
;
};
uint64_t
util_diff
=
wrap_sub
(
info_new
.
user
,
info_old
.
user
)
+
wrap_sub
(
info_new
.
nice
,
info_old
.
nice
)
+
wrap_sub
(
info_new
.
system
,
info_old
.
system
)
+
wrap_sub
(
info_new
.
irq
,
info_old
.
irq
)
+
wrap_sub
(
info_new
.
softirq
,
info_old
.
softirq
)
+
wrap_sub
(
info_new
.
steal
,
info_old
.
steal
);
uint64_t
idle_diff
=
wrap_sub
(
info_new
.
idle
,
info_old
.
idle
)
+
wrap_sub
(
info_new
.
iowait
,
info_old
.
iowait
);
double
util_ratio
=
static_cast
<
double
>
(
util_diff
)
/
(
util_diff
+
idle_diff
);
return
util_ratio
;
}
#endif // TRITON_ENABLE_METRICS_CPU
bool
Metrics
::
PollCpuMetrics
()
{
#ifndef TRITON_ENABLE_METRICS_CPU
return
false
;
#else
// CPU Utilization
double
cpu_util
=
0.0
;
auto
cpu_info
=
CpuInfo
();
auto
status
=
ParseCpuInfo
(
cpu_info
);
if
(
status
.
IsOk
())
{
cpu_util
=
CpuUtilization
(
cpu_info
,
last_cpu_info_
);
last_cpu_info_
=
cpu_info
;
}
cpu_utilization_
->
Set
(
cpu_util
);
// [0.0, 1.0]
// RAM / Memory
double
mem_total_bytes
=
0.0
;
double
mem_used_bytes
=
0.0
;
auto
mem_info
=
MemInfo
();
status
=
ParseMemInfo
(
mem_info
);
if
(
status
.
IsOk
())
{
// MemTotal will usually not change over time, but if something
// goes wrong when querying memory, we can reflect that by updating.
mem_total_bytes
=
mem_info
[
"MemTotal"
];
mem_used_bytes
=
mem_info
[
"MemUsed"
];
}
cpu_memory_total_
->
Set
(
mem_total_bytes
);
cpu_memory_used_
->
Set
(
mem_used_bytes
);
return
true
;
#endif // TRITON_ENABLE_METRICS_CPU
}
bool
Metrics
::
PollDcgmMetrics
()
{
#ifndef TRITON_ENABLE_METRICS_GPU
return
false
;
#else
if
(
dcgm_metadata_
.
available_cuda_gpu_ids_
.
size
()
==
0
)
{
LOG_WARNING
<<
"error polling GPU metrics, GPU metrics will not be "
<<
"available: no available gpus to poll"
;
return
false
;
}
dcgmUpdateAllFields
(
dcgm_metadata_
.
dcgm_handle_
,
1
/* wait for update*/
);
for
(
unsigned
int
didx
=
0
;
didx
<
dcgm_metadata_
.
available_cuda_gpu_ids_
.
size
();
++
didx
)
{
uint32_t
cuda_id
=
dcgm_metadata_
.
available_cuda_gpu_ids_
[
didx
];
if
(
dcgm_metadata_
.
cuda_ids_to_dcgm_ids_
.
count
(
cuda_id
)
<=
0
)
{
LOG_WARNING
<<
"Cannot find DCGM id for CUDA id "
<<
cuda_id
;
continue
;
}
uint32_t
dcgm_id
=
dcgm_metadata_
.
cuda_ids_to_dcgm_ids_
.
at
(
cuda_id
);
dcgmFieldValue_v1
field_values
[
dcgm_metadata_
.
field_count_
];
dcgmReturn_t
dcgmerr
=
dcgmGetLatestValuesForFields
(
dcgm_metadata_
.
dcgm_handle_
,
dcgm_id
,
dcgm_metadata_
.
fields_
.
data
(),
dcgm_metadata_
.
field_count_
,
field_values
);
if
(
dcgmerr
!=
DCGM_ST_OK
)
{
dcgm_metadata_
.
power_limit_fail_cnt_
[
didx
]
++
;
dcgm_metadata_
.
power_usage_fail_cnt_
[
didx
]
++
;
dcgm_metadata_
.
energy_fail_cnt_
[
didx
]
++
;
dcgm_metadata_
.
util_fail_cnt_
[
didx
]
++
;
dcgm_metadata_
.
mem_fail_cnt_
[
didx
]
++
;
LOG_WARNING
<<
"Unable to get field values for GPU ID "
<<
cuda_id
<<
": "
<<
errorString
(
dcgmerr
);
}
else
{
// Power limit
if
(
dcgm_metadata_
.
power_limit_fail_cnt_
[
didx
]
<
dcgm_metadata_
.
fail_threshold_
)
{
double
power_limit
=
field_values
[
0
].
value
.
dbl
;
if
((
field_values
[
0
].
status
==
DCGM_ST_OK
)
&&
(
!
DCGM_FP64_IS_BLANK
(
power_limit
)))
{
dcgm_metadata_
.
power_limit_fail_cnt_
[
didx
]
=
0
;
}
else
{
dcgm_metadata_
.
power_limit_fail_cnt_
[
didx
]
++
;
power_limit
=
0
;
dcgmReturn_t
status
=
dcgmReturn_t
(
field_values
[
0
].
status
);
LOG_WARNING
<<
"Unable to get power limit for GPU "
<<
cuda_id
<<
". Status:"
<<
errorString
(
status
)
<<
", value:"
<<
dcgmValueToErrorMessage
(
power_limit
);
}
gpu_power_limit_
[
didx
]
->
Set
(
power_limit
);
}
// Power usage
if
(
dcgm_metadata_
.
power_usage_fail_cnt_
[
didx
]
<
dcgm_metadata_
.
fail_threshold_
)
{
double
power_usage
=
field_values
[
1
].
value
.
dbl
;
if
((
field_values
[
1
].
status
==
DCGM_ST_OK
)
&&
(
!
DCGM_FP64_IS_BLANK
(
power_usage
)))
{
dcgm_metadata_
.
power_usage_fail_cnt_
[
didx
]
=
0
;
}
else
{
dcgm_metadata_
.
power_usage_fail_cnt_
[
didx
]
++
;
power_usage
=
0
;
dcgmReturn_t
status
=
dcgmReturn_t
(
field_values
[
1
].
status
);
LOG_WARNING
<<
"Unable to get power usage for GPU "
<<
cuda_id
<<
". Status:"
<<
errorString
(
status
)
<<
", value:"
<<
dcgmValueToErrorMessage
(
power_usage
);
}
gpu_power_usage_
[
didx
]
->
Set
(
power_usage
);
}
// Energy Consumption
if
(
dcgm_metadata_
.
energy_fail_cnt_
[
didx
]
<
dcgm_metadata_
.
fail_threshold_
)
{
int64_t
energy
=
field_values
[
2
].
value
.
i64
;
if
((
field_values
[
2
].
status
==
DCGM_ST_OK
)
&&
(
!
DCGM_INT64_IS_BLANK
(
energy
)))
{
dcgm_metadata_
.
energy_fail_cnt_
[
didx
]
=
0
;
if
(
dcgm_metadata_
.
last_energy_
[
didx
]
==
0
)
{
dcgm_metadata_
.
last_energy_
[
didx
]
=
energy
;
}
gpu_energy_consumption_
[
didx
]
->
Increment
(
(
double
)(
energy
-
dcgm_metadata_
.
last_energy_
[
didx
])
*
0.001
);
dcgm_metadata_
.
last_energy_
[
didx
]
=
energy
;
}
else
{
dcgm_metadata_
.
energy_fail_cnt_
[
didx
]
++
;
energy
=
0
;
dcgmReturn_t
status
=
dcgmReturn_t
(
field_values
[
2
].
status
);
LOG_WARNING
<<
"Unable to get energy consumption for "
<<
"GPU "
<<
cuda_id
<<
". Status:"
<<
errorString
(
status
)
<<
", value:"
<<
dcgmValueToErrorMessage
(
energy
);
}
}
// Utilization
if
(
dcgm_metadata_
.
util_fail_cnt_
[
didx
]
<
dcgm_metadata_
.
fail_threshold_
)
{
int64_t
util
=
field_values
[
3
].
value
.
i64
;
if
((
field_values
[
3
].
status
==
DCGM_ST_OK
)
&&
(
!
DCGM_INT64_IS_BLANK
(
util
)))
{
dcgm_metadata_
.
util_fail_cnt_
[
didx
]
=
0
;
}
else
{
dcgm_metadata_
.
util_fail_cnt_
[
didx
]
++
;
util
=
0
;
dcgmReturn_t
status
=
dcgmReturn_t
(
field_values
[
3
].
status
);
LOG_WARNING
<<
"Unable to get GPU utilization for GPU "
<<
cuda_id
<<
". Status:"
<<
errorString
(
status
)
<<
", value:"
<<
dcgmValueToErrorMessage
(
util
);
}
gpu_utilization_
[
didx
]
->
Set
((
double
)
util
*
0.01
);
}
// Memory Usage
if
(
dcgm_metadata_
.
mem_fail_cnt_
[
didx
]
<
dcgm_metadata_
.
fail_threshold_
)
{
int64_t
memory_used
=
field_values
[
4
].
value
.
i64
;
int64_t
memory_total
=
field_values
[
5
].
value
.
i64
;
if
((
field_values
[
4
].
status
==
DCGM_ST_OK
)
&&
(
!
DCGM_INT64_IS_BLANK
(
memory_used
))
&&
(
field_values
[
5
].
status
==
DCGM_ST_OK
)
&&
(
!
DCGM_INT64_IS_BLANK
(
memory_total
)))
{
dcgm_metadata_
.
mem_fail_cnt_
[
didx
]
=
0
;
}
else
{
memory_total
=
0
;
memory_used
=
0
;
dcgm_metadata_
.
mem_fail_cnt_
[
didx
]
++
;
dcgmReturn_t
usageStatus
=
dcgmReturn_t
(
field_values
[
4
].
status
);
dcgmReturn_t
memoryTotaltatus
=
dcgmReturn_t
(
field_values
[
5
].
status
);
LOG_WARNING
<<
"Unable to get memory usage for GPU "
<<
cuda_id
<<
". Memory usage status:"
<<
errorString
(
usageStatus
)
<<
", value:"
<<
dcgmValueToErrorMessage
(
memory_used
)
<<
". Memory total status:"
<<
errorString
(
memoryTotaltatus
)
<<
", value:"
<<
dcgmValueToErrorMessage
(
memory_total
);
}
gpu_memory_total_
[
didx
]
->
Set
(
memory_total
*
1024
*
1024
);
// bytes
gpu_memory_used_
[
didx
]
->
Set
(
memory_used
*
1024
*
1024
);
// bytes
}
}
}
return
true
;
#endif // TRITON_ENABLE_METRICS_GPU
}
bool
Metrics
::
InitializeCacheMetrics
(
std
::
shared_ptr
<
RequestResponseCache
>
response_cache
)
{
if
(
response_cache
==
nullptr
)
{
LOG_WARNING
<<
"error initializing cache metrics, cache metrics will not be "
<<
"available: cache was nullptr"
;
return
false
;
}
const
std
::
map
<
std
::
string
,
std
::
string
>
cache_labels
;
cache_num_entries_global_
=
&
cache_num_entries_family_
.
Add
(
cache_labels
);
cache_num_lookups_global_
=
&
cache_num_lookups_family_
.
Add
(
cache_labels
);
cache_num_hits_global_
=
&
cache_num_hits_family_
.
Add
(
cache_labels
);
cache_num_misses_global_
=
&
cache_num_misses_family_
.
Add
(
cache_labels
);
cache_num_evictions_global_
=
&
cache_num_evictions_family_
.
Add
(
cache_labels
);
cache_lookup_duration_us_global_
=
&
cache_lookup_duration_us_family_
.
Add
(
cache_labels
);
cache_insertion_duration_us_global_
=
&
cache_insertion_duration_us_family_
.
Add
(
cache_labels
);
cache_util_global_
=
&
cache_util_family_
.
Add
(
cache_labels
);
LOG_INFO
<<
"Collecting Response Cache metrics"
;
return
true
;
}
bool
Metrics
::
InitializeCpuMetrics
()
{
#ifndef TRITON_ENABLE_METRICS_CPU
return
false
;
#else
const
std
::
map
<
std
::
string
,
std
::
string
>
cpu_labels
;
cpu_utilization_
=
&
cpu_utilization_family_
.
Add
(
cpu_labels
);
cpu_memory_total_
=
&
cpu_memory_total_family_
.
Add
(
cpu_labels
);
cpu_memory_used_
=
&
cpu_memory_used_family_
.
Add
(
cpu_labels
);
// Get baseline CPU info for future comparisons
last_cpu_info_
=
CpuInfo
();
auto
status
=
ParseCpuInfo
(
last_cpu_info_
);
if
(
!
status
.
IsOk
())
{
LOG_WARNING
<<
"error initializing CPU metrics, CPU utilization may not "
"be available: "
<<
status
.
Message
();
return
false
;
}
// Verify memory metrics can be parsed
auto
mem_info
=
MemInfo
();
status
=
ParseMemInfo
(
mem_info
);
if
(
!
status
.
IsOk
())
{
LOG_WARNING
<<
"error initializing CPU metrics, CPU memory metrics may not "
"be available: "
<<
status
.
Message
();
return
false
;
}
LOG_INFO
<<
"Collecting CPU metrics"
;
return
true
;
#endif // TRITON_ENABLE_METRICS_CPU
}
bool
Metrics
::
InitializeDcgmMetrics
()
{
#ifndef TRITON_ENABLE_METRICS_GPU
return
false
;
#else
dcgmReturn_t
dcgmerr
=
dcgmInit
();
if
(
dcgmerr
!=
DCGM_ST_OK
)
{
LOG_WARNING
<<
"error initializing DCGM, GPU metrics will not be "
<<
"available: "
<<
errorString
(
dcgmerr
);
return
false
;
}
if
(
dcgm_metadata_
.
standalone_
)
{
char
hostIpAddress
[
16
]
=
{
0
};
std
::
string
ipAddress
=
"127.0.0.1"
;
strncpy
(
hostIpAddress
,
ipAddress
.
c_str
(),
15
);
dcgmerr
=
dcgmConnect
(
hostIpAddress
,
&
dcgm_metadata_
.
dcgm_handle_
);
}
else
{
dcgmerr
=
dcgmStartEmbedded
(
DCGM_OPERATION_MODE_MANUAL
,
&
dcgm_metadata_
.
dcgm_handle_
);
}
if
(
dcgmerr
!=
DCGM_ST_OK
)
{
LOG_WARNING
<<
"DCGM unable to start: "
<<
errorString
(
dcgmerr
);
return
false
;
}
else
{
// Set this flag to signal DCGM cleanup in destructor
dcgm_metadata_
.
dcgm_initialized_
=
true
;
}
if
(
dcgm_metadata_
.
standalone_
)
{
dcgmerr
=
dcgmUpdateAllFields
(
dcgm_metadata_
.
dcgm_handle_
,
1
);
if
(
dcgmerr
!=
DCGM_ST_OK
)
{
LOG_WARNING
<<
"DCGM unable to update all fields, GPU metrics will "
"not be available: "
<<
errorString
(
dcgmerr
);
return
false
;
}
}
unsigned
int
dcgm_gpu_ids
[
DCGM_MAX_NUM_DEVICES
];
int
dcgm_gpu_count
;
dcgmerr
=
dcgmGetAllDevices
(
dcgm_metadata_
.
dcgm_handle_
,
dcgm_gpu_ids
,
&
dcgm_gpu_count
);
if
(
dcgmerr
!=
DCGM_ST_OK
)
{
LOG_WARNING
<<
"DCGM unable to get device info and count, GPU "
"metrics will not be available: "
<<
errorString
(
dcgmerr
);
return
false
;
}
// Get PCI Bus ID to DCGM device Id map.
// Some devices may have problems using DCGM API and
// these devices needs to be ignored.
std
::
map
<
std
::
string
,
size_t
>
pci_bus_id_to_dcgm_id
;
std
::
map
<
std
::
string
,
std
::
map
<
std
::
string
,
std
::
string
>
>
pci_bus_id_to_gpu_labels
;
std
::
map
<
std
::
string
,
std
::
string
>
pci_bus_id_to_device_name
;
dcgmDeviceAttributes_t
gpu_attributes
[
DCGM_MAX_NUM_DEVICES
];
for
(
int
i
=
0
;
i
<
dcgm_gpu_count
;
i
++
)
{
gpu_attributes
[
i
].
version
=
dcgmDeviceAttributes_version
;
dcgmerr
=
dcgmGetDeviceAttributes
(
dcgm_metadata_
.
dcgm_handle_
,
dcgm_gpu_ids
[
i
],
&
gpu_attributes
[
i
]);
if
(
dcgmerr
!=
DCGM_ST_OK
)
{
LOG_WARNING
<<
"DCGM unable to get device properties for DCGM device "
<<
dcgm_gpu_ids
[
i
]
<<
", GPU metrics will not be available for this device: "
<<
errorString
(
dcgmerr
);
}
else
{
std
::
string
pciBusId
=
gpu_attributes
[
i
].
identifiers
.
pciBusId
;
pci_bus_id_to_dcgm_id
[
pciBusId
]
=
i
;
pci_bus_id_to_device_name
[
pciBusId
]
=
std
::
string
(
gpu_attributes
[
i
].
identifiers
.
deviceName
);
std
::
map
<
std
::
string
,
std
::
string
>
gpu_labels
;
gpu_labels
.
insert
(
std
::
map
<
std
::
string
,
std
::
string
>::
value_type
(
kMetricsLabelGpuUuid
,
std
::
string
(
gpu_attributes
[
i
].
identifiers
.
uuid
)));
pci_bus_id_to_gpu_labels
[
pciBusId
]
=
gpu_labels
;
}
}
// Get CUDA-visible PCI Bus Ids and get DCGM metrics for each CUDA-visible GPU
int
cuda_gpu_count
;
cudaError_t
cudaerr
=
cudaGetDeviceCount
(
&
cuda_gpu_count
);
if
(
cudaerr
!=
cudaSuccess
)
{
LOG_WARNING
<<
"Cannot get CUDA device count, GPU metrics will not be available"
;
return
false
;
}
for
(
int
i
=
0
;
i
<
cuda_gpu_count
;
++
i
)
{
std
::
string
pci_bus_id
=
"0000"
;
// pad 0's for uniformity
char
pcibusid_str
[
64
];
cudaerr
=
cudaDeviceGetPCIBusId
(
pcibusid_str
,
sizeof
(
pcibusid_str
)
-
1
,
i
);
if
(
cudaerr
==
cudaSuccess
)
{
pci_bus_id
.
append
(
pcibusid_str
);
if
(
pci_bus_id_to_dcgm_id
.
count
(
pci_bus_id
)
<=
0
)
{
LOG_INFO
<<
"Skipping GPU:"
<<
i
<<
" since it's not CUDA enabled. This should never happen!"
;
continue
;
}
// Filter out CUDA visible GPUs from GPUs found by DCGM
LOG_INFO
<<
"Collecting metrics for GPU "
<<
i
<<
": "
<<
pci_bus_id_to_device_name
[
pci_bus_id
];
auto
&
gpu_labels
=
pci_bus_id_to_gpu_labels
[
pci_bus_id
];
gpu_utilization_
.
push_back
(
&
gpu_utilization_family_
.
Add
(
gpu_labels
));
gpu_memory_total_
.
push_back
(
&
gpu_memory_total_family_
.
Add
(
gpu_labels
));
gpu_memory_used_
.
push_back
(
&
gpu_memory_used_family_
.
Add
(
gpu_labels
));
gpu_power_usage_
.
push_back
(
&
gpu_power_usage_family_
.
Add
(
gpu_labels
));
gpu_power_limit_
.
push_back
(
&
gpu_power_limit_family_
.
Add
(
gpu_labels
));
gpu_energy_consumption_
.
push_back
(
&
gpu_energy_consumption_family_
.
Add
(
gpu_labels
));
uint32_t
dcgm_id
=
pci_bus_id_to_dcgm_id
[
pci_bus_id
];
dcgm_metadata_
.
cuda_ids_to_dcgm_ids_
[
i
]
=
dcgm_id
;
dcgm_metadata_
.
available_cuda_gpu_ids_
.
emplace_back
(
i
);
}
else
{
LOG_WARNING
<<
"GPU metrics will not be available for device:"
<<
i
;
}
}
// create a gpu group
char
groupName
[]
=
"dcgm_group"
;
dcgmerr
=
dcgmGroupCreate
(
dcgm_metadata_
.
dcgm_handle_
,
DCGM_GROUP_DEFAULT
,
groupName
,
&
dcgm_metadata_
.
groupId_
);
if
(
dcgmerr
!=
DCGM_ST_OK
)
{
LOG_WARNING
<<
"Cannot make GPU group: "
<<
errorString
(
dcgmerr
);
}
// Initialize tracking vectors
for
(
unsigned
int
didx
=
0
;
didx
<
dcgm_metadata_
.
available_cuda_gpu_ids_
.
size
();
++
didx
)
{
dcgm_metadata_
.
power_limit_fail_cnt_
.
push_back
(
0
);
dcgm_metadata_
.
power_usage_fail_cnt_
.
push_back
(
0
);
dcgm_metadata_
.
energy_fail_cnt_
.
push_back
(
0
);
dcgm_metadata_
.
util_fail_cnt_
.
push_back
(
0
);
dcgm_metadata_
.
mem_fail_cnt_
.
push_back
(
0
);
dcgm_metadata_
.
last_energy_
.
push_back
(
0
);
}
// Number of fields for DCGM to use from fields_ below
dcgm_metadata_
.
field_count_
=
6
;
unsigned
short
util_flag
=
dcgm_metadata_
.
standalone_
?
DCGM_FI_PROF_GR_ENGINE_ACTIVE
:
DCGM_FI_DEV_GPU_UTIL
;
dcgm_metadata_
.
fields_
=
{
DCGM_FI_DEV_POWER_MGMT_LIMIT
,
// power limit, watts
DCGM_FI_DEV_POWER_USAGE
,
// power usage, watts
DCGM_FI_DEV_TOTAL_ENERGY_CONSUMPTION
,
// Total energy consumption, mJ
util_flag
,
// util ratio, 1 = 1%
DCGM_FI_DEV_FB_USED
,
// Frame buffer used, MiB
DCGM_FI_DEV_FB_TOTAL
,
// Frame buffer used, MiB
};
char
fieldName
[]
=
"field_group"
;
dcgmFieldGrp_t
fieldGroupId
;
dcgmerr
=
dcgmFieldGroupCreate
(
dcgm_metadata_
.
dcgm_handle_
,
dcgm_metadata_
.
field_count_
,
dcgm_metadata_
.
fields_
.
data
(),
fieldName
,
&
fieldGroupId
);
if
(
dcgmerr
!=
DCGM_ST_OK
)
{
LOG_WARNING
<<
"Cannot make field group: "
<<
errorString
(
dcgmerr
);
}
dcgmerr
=
dcgmWatchFields
(
dcgm_metadata_
.
dcgm_handle_
,
dcgm_metadata_
.
groupId_
,
fieldGroupId
,
metrics_interval_ms_
*
1000
/*update period, usec*/
,
5.0
/*maxKeepAge, sec*/
,
5
/*maxKeepSamples*/
);
if
(
dcgmerr
!=
DCGM_ST_OK
)
{
LOG_WARNING
<<
"Cannot start watching fields: "
<<
errorString
(
dcgmerr
);
return
false
;
}
return
true
;
#endif // TRITON_ENABLE_METRICS_GPU
}
#ifdef TRITON_ENABLE_METRICS_GPU
std
::
string
Metrics
::
dcgmValueToErrorMessage
(
double
val
)
{
if
(
DCGM_FP64_IS_BLANK
(
val
))
{
if
(
val
==
DCGM_FP64_BLANK
)
{
return
"Not Specified"
;
}
else
if
(
val
==
DCGM_FP64_NOT_FOUND
)
{
return
"Not Found"
;
}
else
if
(
val
==
DCGM_FP64_NOT_SUPPORTED
)
{
return
"Not Supported"
;
}
else
if
(
val
==
DCGM_FP64_NOT_PERMISSIONED
)
{
return
"Insf. Permission"
;
}
else
{
return
"Unknown"
;
}
}
else
{
return
std
::
to_string
(
val
);
}
}
std
::
string
Metrics
::
dcgmValueToErrorMessage
(
int64_t
val
)
{
if
(
DCGM_INT64_IS_BLANK
(
val
))
{
switch
(
val
)
{
case
DCGM_INT64_BLANK
:
return
"Not Specified"
;
case
DCGM_INT64_NOT_FOUND
:
return
"Not Found"
;
case
DCGM_INT64_NOT_SUPPORTED
:
return
"Not Supported"
;
case
DCGM_INT64_NOT_PERMISSIONED
:
return
"Insf. Permission"
;
default:
return
"Unknown"
;
}
}
else
{
return
std
::
to_string
(
val
);
}
}
#endif // TRITON_ENABLE_METRICS_GPU
bool
Metrics
::
UUIDForCudaDevice
(
int
cuda_device
,
std
::
string
*
uuid
)
{
// If metrics were not initialized then just silently fail since
// with DCGM we can't get the CUDA device (and not worth doing
// anyway since metrics aren't being reported).
auto
singleton
=
GetSingleton
();
if
(
!
singleton
->
gpu_metrics_enabled_
)
{
return
false
;
}
// If GPU metrics is not enabled just silently fail.
#ifndef TRITON_ENABLE_METRICS_GPU
return
false
;
#else
dcgmDeviceAttributes_t
gpu_attributes
;
gpu_attributes
.
version
=
dcgmDeviceAttributes_version
;
dcgmReturn_t
dcgmerr
=
dcgmGetDeviceAttributes
(
singleton
->
dcgm_metadata_
.
dcgm_handle_
,
cuda_device
,
&
gpu_attributes
);
if
(
dcgmerr
!=
DCGM_ST_OK
)
{
LOG_ERROR
<<
"Unable to get device UUID: "
<<
errorString
(
dcgmerr
);
return
false
;
}
*
uuid
=
gpu_attributes
.
identifiers
.
uuid
;
return
true
;
#endif // TRITON_ENABLE_METRICS_GPU
}
std
::
shared_ptr
<
prometheus
::
Registry
>
Metrics
::
GetRegistry
()
{
auto
singleton
=
Metrics
::
GetSingleton
();
return
singleton
->
registry_
;
}
const
std
::
string
Metrics
::
SerializedMetrics
()
{
auto
singleton
=
Metrics
::
GetSingleton
();
return
singleton
->
serializer_
->
Serialize
(
singleton
->
registry_
.
get
()
->
Collect
());
}
Metrics
*
Metrics
::
GetSingleton
()
{
static
Metrics
singleton
;
return
&
singleton
;
}
}}
// namespace triton::core
#endif // TRITON_ENABLE_METRICS
3rdparty/core-r22.12/src/metrics.h
deleted
100644 → 0
View file @
d592fbea
// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
#pragma once
#ifdef TRITON_ENABLE_METRICS
#include <atomic>
#include <mutex>
#include <thread>
#include "prometheus/counter.h"
#include "prometheus/gauge.h"
#include "prometheus/registry.h"
#include "prometheus/serializer.h"
#include "prometheus/text_serializer.h"
#include "response_cache.h"
#ifdef TRITON_ENABLE_METRICS_GPU
#include <dcgm_agent.h>
#endif // TRITON_ENABLE_METRICS_GPU
namespace
triton
{
namespace
core
{
#ifdef TRITON_ENABLE_METRICS_CPU
using
MemInfo
=
std
::
unordered_map
<
std
::
string
,
uint64_t
>
;
// References:
// - htop source: https://stackoverflow.com/a/23376195
// - Linux docs: https://www.kernel.org/doc/Documentation/filesystems/proc.txt
// guest/guestnice values are counted in user/nice so we skip parsing them
struct
CpuInfo
{
uint64_t
user
=
0
;
// normal processes executing in user mode
uint64_t
nice
=
0
;
// niced processes executing in user mode
uint64_t
system
=
0
;
// processes executing in kernel mode
uint64_t
idle
=
0
;
// twiddling thumbs
uint64_t
iowait
=
0
;
// waiting for I/O to complete
uint64_t
irq
=
0
;
// servicing interrupts
uint64_t
softirq
=
0
;
// servicing softirqs
uint64_t
steal
=
0
;
// involuntary wait
};
inline
std
::
istream
&
operator
>>
(
std
::
istream
&
is
,
CpuInfo
&
info
)
{
is
>>
info
.
user
>>
info
.
nice
>>
info
.
system
>>
info
.
idle
>>
info
.
iowait
>>
info
.
irq
>>
info
.
softirq
>>
info
.
steal
;
return
is
;
}
#endif // TRITON_ENABLE_METRICS_CPU
#ifdef TRITON_ENABLE_METRICS_GPU
struct
DcgmMetadata
{
// DCGM handles for initialization and destruction
dcgmHandle_t
dcgm_handle_
=
0
;
dcgmGpuGrp_t
groupId_
=
0
;
// DCGM Flags
bool
standalone_
=
false
;
// DCGM Fields
size_t
field_count_
=
0
;
std
::
vector
<
unsigned
short
>
fields_
;
// GPU Device Mapping
std
::
map
<
uint32_t
,
uint32_t
>
cuda_ids_to_dcgm_ids_
;
std
::
vector
<
uint32_t
>
available_cuda_gpu_ids_
;
// Stop attempting metrics if they fail multiple consecutive
// times for a device.
const
int
fail_threshold_
=
3
;
// DCGM Failure Tracking
std
::
vector
<
int
>
power_limit_fail_cnt_
;
std
::
vector
<
int
>
power_usage_fail_cnt_
;
std
::
vector
<
int
>
energy_fail_cnt_
;
std
::
vector
<
int
>
util_fail_cnt_
;
std
::
vector
<
int
>
mem_fail_cnt_
;
// DCGM Energy Tracking
std
::
vector
<
unsigned
long
long
>
last_energy_
;
// Track if DCGM handle initialized successfully
bool
dcgm_initialized_
=
false
;
};
#endif // TRITON_ENABLE_METRICS_GPU
class
Metrics
{
public:
// Return the hash value of the labels
static
size_t
HashLabels
(
const
std
::
map
<
std
::
string
,
std
::
string
>&
labels
);
// Are metrics enabled?
static
bool
Enabled
();
// Enable reporting of metrics
static
void
EnableMetrics
();
// Enable reporting of GPU metrics
static
void
EnableGPUMetrics
();
// Enable reporting of CPU metrics
static
void
EnableCpuMetrics
();
// Enable reporting of Cache metrics
static
void
EnableCacheMetrics
(
std
::
shared_ptr
<
RequestResponseCache
>
response_cache
);
// Start a thread for polling enabled metrics if any
static
void
StartPollingThreadSingleton
(
std
::
shared_ptr
<
RequestResponseCache
>
response_cache
);
// Set the time interval in secs at which metrics are collected
static
void
SetMetricsInterval
(
uint64_t
metrics_interval_ms
);
// Get the prometheus registry
static
std
::
shared_ptr
<
prometheus
::
Registry
>
GetRegistry
();
// Get serialized metrics
static
const
std
::
string
SerializedMetrics
();
// Get the UUID for a CUDA device. Return true and initialize 'uuid'
// if a UUID is found, return false if a UUID cannot be returned.
static
bool
UUIDForCudaDevice
(
int
cuda_device
,
std
::
string
*
uuid
);
// Metric family counting successful inference requests
static
prometheus
::
Family
<
prometheus
::
Counter
>&
FamilyInferenceSuccess
()
{
return
GetSingleton
()
->
inf_success_family_
;
}
// Metric family counting failed inference requests
static
prometheus
::
Family
<
prometheus
::
Counter
>&
FamilyInferenceFailure
()
{
return
GetSingleton
()
->
inf_failure_family_
;
}
// Metric family counting inferences performed, where a batch-size
// 'n' inference request is counted as 'n' inferences
static
prometheus
::
Family
<
prometheus
::
Counter
>&
FamilyInferenceCount
()
{
return
GetSingleton
()
->
inf_count_family_
;
}
// Metric family counting inferences performed, where a batch-size
// 'n' inference request is counted as 'n' inferences
static
prometheus
::
Family
<
prometheus
::
Counter
>&
FamilyInferenceExecutionCount
()
{
return
GetSingleton
()
->
inf_count_exec_family_
;
}
// Metric family of cumulative inference request duration, in
// microseconds
static
prometheus
::
Family
<
prometheus
::
Counter
>&
FamilyInferenceRequestDuration
()
{
return
GetSingleton
()
->
inf_request_duration_us_family_
;
}
// Metric family of cumulative inference queuing duration, in
// microseconds
static
prometheus
::
Family
<
prometheus
::
Counter
>&
FamilyInferenceQueueDuration
()
{
return
GetSingleton
()
->
inf_queue_duration_us_family_
;
}
// Metric family of cumulative inference compute durations, in
// microseconds
static
prometheus
::
Family
<
prometheus
::
Counter
>&
FamilyInferenceComputeInputDuration
()
{
return
GetSingleton
()
->
inf_compute_input_duration_us_family_
;
}
static
prometheus
::
Family
<
prometheus
::
Counter
>&
FamilyInferenceComputeInferDuration
()
{
return
GetSingleton
()
->
inf_compute_infer_duration_us_family_
;
}
static
prometheus
::
Family
<
prometheus
::
Counter
>&
FamilyInferenceComputeOutputDuration
()
{
return
GetSingleton
()
->
inf_compute_output_duration_us_family_
;
}
// Metric families of per-model response cache metrics
static
prometheus
::
Family
<
prometheus
::
Counter
>&
FamilyCacheHitCount
()
{
return
GetSingleton
()
->
cache_num_hits_model_family_
;
}
static
prometheus
::
Family
<
prometheus
::
Counter
>&
FamilyCacheHitLookupDuration
()
{
return
GetSingleton
()
->
cache_hit_lookup_duration_us_model_family_
;
}
static
prometheus
::
Family
<
prometheus
::
Counter
>&
FamilyCacheMissCount
()
{
return
GetSingleton
()
->
cache_num_misses_model_family_
;
}
static
prometheus
::
Family
<
prometheus
::
Counter
>&
FamilyCacheMissLookupDuration
()
{
return
GetSingleton
()
->
cache_miss_lookup_duration_us_model_family_
;
}
static
prometheus
::
Family
<
prometheus
::
Counter
>&
FamilyCacheMissInsertionDuration
()
{
return
GetSingleton
()
->
cache_miss_insertion_duration_us_model_family_
;
}
private:
Metrics
();
virtual
~
Metrics
();
static
Metrics
*
GetSingleton
();
bool
InitializeDcgmMetrics
();
bool
InitializeCpuMetrics
();
bool
InitializeCacheMetrics
(
std
::
shared_ptr
<
RequestResponseCache
>
response_cache
);
bool
StartPollingThread
(
std
::
shared_ptr
<
RequestResponseCache
>
response_cache
);
bool
PollCacheMetrics
(
std
::
shared_ptr
<
RequestResponseCache
>
response_cache
);
bool
PollDcgmMetrics
();
bool
PollCpuMetrics
();
std
::
string
dcgmValueToErrorMessage
(
double
val
);
std
::
string
dcgmValueToErrorMessage
(
int64_t
val
);
std
::
shared_ptr
<
prometheus
::
Registry
>
registry_
;
std
::
unique_ptr
<
prometheus
::
Serializer
>
serializer_
;
prometheus
::
Family
<
prometheus
::
Counter
>&
inf_success_family_
;
prometheus
::
Family
<
prometheus
::
Counter
>&
inf_failure_family_
;
prometheus
::
Family
<
prometheus
::
Counter
>&
inf_count_family_
;
prometheus
::
Family
<
prometheus
::
Counter
>&
inf_count_exec_family_
;
prometheus
::
Family
<
prometheus
::
Counter
>&
inf_request_duration_us_family_
;
prometheus
::
Family
<
prometheus
::
Counter
>&
inf_queue_duration_us_family_
;
prometheus
::
Family
<
prometheus
::
Counter
>&
inf_compute_input_duration_us_family_
;
prometheus
::
Family
<
prometheus
::
Counter
>&
inf_compute_infer_duration_us_family_
;
prometheus
::
Family
<
prometheus
::
Counter
>&
inf_compute_output_duration_us_family_
;
// Global Response Cache metrics
prometheus
::
Family
<
prometheus
::
Gauge
>&
cache_num_entries_family_
;
prometheus
::
Family
<
prometheus
::
Gauge
>&
cache_num_lookups_family_
;
prometheus
::
Family
<
prometheus
::
Gauge
>&
cache_num_hits_family_
;
prometheus
::
Family
<
prometheus
::
Gauge
>&
cache_num_misses_family_
;
prometheus
::
Family
<
prometheus
::
Gauge
>&
cache_num_evictions_family_
;
prometheus
::
Family
<
prometheus
::
Gauge
>&
cache_lookup_duration_us_family_
;
prometheus
::
Family
<
prometheus
::
Gauge
>&
cache_insertion_duration_us_family_
;
prometheus
::
Family
<
prometheus
::
Gauge
>&
cache_util_family_
;
// Gauges for Global Response Cache metrics
prometheus
::
Gauge
*
cache_num_entries_global_
;
prometheus
::
Gauge
*
cache_num_lookups_global_
;
prometheus
::
Gauge
*
cache_num_hits_global_
;
prometheus
::
Gauge
*
cache_num_misses_global_
;
prometheus
::
Gauge
*
cache_num_evictions_global_
;
prometheus
::
Gauge
*
cache_lookup_duration_us_global_
;
prometheus
::
Gauge
*
cache_insertion_duration_us_global_
;
prometheus
::
Gauge
*
cache_util_global_
;
// Per-model Response Cache metrics
prometheus
::
Family
<
prometheus
::
Counter
>&
cache_num_hits_model_family_
;
prometheus
::
Family
<
prometheus
::
Counter
>&
cache_hit_lookup_duration_us_model_family_
;
prometheus
::
Family
<
prometheus
::
Counter
>&
cache_num_misses_model_family_
;
prometheus
::
Family
<
prometheus
::
Counter
>&
cache_miss_lookup_duration_us_model_family_
;
prometheus
::
Family
<
prometheus
::
Counter
>&
cache_miss_insertion_duration_us_model_family_
;
#ifdef TRITON_ENABLE_METRICS_GPU
prometheus
::
Family
<
prometheus
::
Gauge
>&
gpu_utilization_family_
;
prometheus
::
Family
<
prometheus
::
Gauge
>&
gpu_memory_total_family_
;
prometheus
::
Family
<
prometheus
::
Gauge
>&
gpu_memory_used_family_
;
prometheus
::
Family
<
prometheus
::
Gauge
>&
gpu_power_usage_family_
;
prometheus
::
Family
<
prometheus
::
Gauge
>&
gpu_power_limit_family_
;
prometheus
::
Family
<
prometheus
::
Counter
>&
gpu_energy_consumption_family_
;
std
::
vector
<
prometheus
::
Gauge
*>
gpu_utilization_
;
std
::
vector
<
prometheus
::
Gauge
*>
gpu_memory_total_
;
std
::
vector
<
prometheus
::
Gauge
*>
gpu_memory_used_
;
std
::
vector
<
prometheus
::
Gauge
*>
gpu_power_usage_
;
std
::
vector
<
prometheus
::
Gauge
*>
gpu_power_limit_
;
std
::
vector
<
prometheus
::
Counter
*>
gpu_energy_consumption_
;
DcgmMetadata
dcgm_metadata_
;
#endif // TRITON_ENABLE_METRICS_GPU
#ifdef TRITON_ENABLE_METRICS_CPU
// Parses "/proc/meminfo" for metrics, currently only supported on Linux.
Status
ParseMemInfo
(
MemInfo
&
info
);
// Parses "/proc/stat" for metrics, currently only supported on Linux.
Status
ParseCpuInfo
(
CpuInfo
&
info
);
// Computes CPU utilization between "info_new" and "info_old" values
double
CpuUtilization
(
const
CpuInfo
&
info_new
,
const
CpuInfo
&
info_old
);
prometheus
::
Family
<
prometheus
::
Gauge
>&
cpu_utilization_family_
;
prometheus
::
Family
<
prometheus
::
Gauge
>&
cpu_memory_total_family_
;
prometheus
::
Family
<
prometheus
::
Gauge
>&
cpu_memory_used_family_
;
prometheus
::
Gauge
*
cpu_utilization_
;
prometheus
::
Gauge
*
cpu_memory_total_
;
prometheus
::
Gauge
*
cpu_memory_used_
;
CpuInfo
last_cpu_info_
;
#endif // TRITON_ENABLE_METRICS_CPU
// Thread for polling cache/gpu metrics periodically
std
::
unique_ptr
<
std
::
thread
>
poll_thread_
;
std
::
atomic
<
bool
>
poll_thread_exit_
;
bool
metrics_enabled_
;
bool
gpu_metrics_enabled_
;
bool
cpu_metrics_enabled_
;
bool
cache_metrics_enabled_
;
bool
poll_thread_started_
;
std
::
mutex
metrics_enabling_
;
std
::
mutex
poll_thread_starting_
;
uint64_t
metrics_interval_ms_
;
};
}}
// namespace triton::core
#endif // TRITON_ENABLE_METRICS
3rdparty/core-r22.12/src/model.cc
deleted
100644 → 0
View file @
d592fbea
// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "model.h"
#include <chrono>
#include <future>
#include "constants.h"
#include "filesystem.h"
#include "infer_request.h"
#include "model_config_utils.h"
#include "triton/common/logging.h"
namespace
triton
{
namespace
core
{
Status
Model
::
GetInput
(
const
std
::
string
&
name
,
const
inference
::
ModelInput
**
input
)
const
{
const
auto
itr
=
input_map_
.
find
(
name
);
if
(
itr
==
input_map_
.
end
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"unexpected inference input '"
+
name
+
"' for model '"
+
Name
()
+
"'"
);
}
*
input
=
&
itr
->
second
;
return
Status
::
Success
;
}
Status
Model
::
GetOutput
(
const
std
::
string
&
name
,
const
inference
::
ModelOutput
**
output
)
const
{
const
auto
itr
=
output_map_
.
find
(
name
);
if
(
itr
==
output_map_
.
end
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"unexpected inference output '"
+
name
+
"' for model '"
+
Name
()
+
"'"
);
}
*
output
=
&
itr
->
second
;
return
Status
::
Success
;
}
Status
Model
::
SetModelConfig
(
const
inference
::
ModelConfig
&
config
)
{
config_
=
config
;
set_model_config_
=
true
;
return
Status
::
Success
;
}
Status
Model
::
SetScheduler
(
std
::
unique_ptr
<
Scheduler
>
scheduler
)
{
if
(
scheduler_
!=
nullptr
)
{
return
Status
(
Status
::
Code
::
INTERNAL
,
"Attempt to change scheduler not allowed"
);
}
scheduler_
=
std
::
move
(
scheduler
);
return
Status
::
Success
;
}
Status
Model
::
Init
(
const
bool
is_config_provided
)
{
if
(
!
set_model_config_
&&
!
is_config_provided
)
{
return
Status
(
Status
::
Code
::
NOT_FOUND
,
"model configuration is not provided for model '"
+
Name
()
+
"'"
);
}
RETURN_IF_ERROR
(
ValidateModelConfig
(
config_
,
min_compute_capability_
));
RETURN_IF_ERROR
(
ValidateModelIOConfig
(
config_
));
// Initialize the input map
for
(
const
auto
&
io
:
config_
.
input
())
{
input_map_
.
insert
(
std
::
make_pair
(
io
.
name
(),
io
));
if
(
!
io
.
optional
())
{
++
required_input_count_
;
}
}
// Initialize the output map and label provider for each output
label_provider_
=
std
::
make_shared
<
LabelProvider
>
();
for
(
const
auto
&
io
:
config_
.
output
())
{
output_map_
.
insert
(
std
::
make_pair
(
io
.
name
(),
io
));
if
(
!
io
.
label_filename
().
empty
())
{
const
auto
label_path
=
JoinPath
({
model_dir_
,
io
.
label_filename
()});
RETURN_IF_ERROR
(
label_provider_
->
AddLabels
(
io
.
name
(),
label_path
));
}
}
if
(
config_
.
has_dynamic_batching
())
{
default_priority_level_
=
config_
.
dynamic_batching
().
default_priority_level
();
max_priority_level_
=
config_
.
dynamic_batching
().
priority_levels
();
}
else
if
(
config_
.
has_ensemble_scheduling
())
{
// For ensemble, allow any priority level to pass through
default_priority_level_
=
0
;
max_priority_level_
=
UINT32_MAX
;
}
else
{
default_priority_level_
=
0
;
max_priority_level_
=
0
;
}
return
Status
::
Success
;
}
}}
// namespace triton::core
3rdparty/core-r22.12/src/model.h
deleted
100644 → 0
View file @
d592fbea
// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include "infer_stats.h"
#include "label_provider.h"
#include "model_config.pb.h"
#include "scheduler.h"
#include "status.h"
namespace
triton
{
namespace
core
{
class
InferenceRequest
;
//
// Interface for models that handle inference requests.
//
class
Model
{
public:
explicit
Model
(
const
double
min_compute_capability
,
const
std
::
string
&
model_dir
,
const
int64_t
version
,
const
inference
::
ModelConfig
&
config
)
:
config_
(
config
),
min_compute_capability_
(
min_compute_capability
),
version_
(
version
),
required_input_count_
(
0
),
model_dir_
(
model_dir
),
set_model_config_
(
false
)
{
}
virtual
~
Model
()
{}
// Get the name of model being served.
const
std
::
string
&
Name
()
const
{
return
config_
.
name
();
}
// Get the version of model being served.
int64_t
Version
()
const
{
return
version_
;
}
// Get the configuration of model being served.
const
inference
::
ModelConfig
&
Config
()
const
{
return
config_
;
}
// Get the number of required inputs
size_t
RequiredInputCount
()
const
{
return
required_input_count_
;
}
// Get the stats collector for the model being served.
InferenceStatsAggregator
*
MutableStatsAggregator
()
{
return
&
stats_aggregator_
;
}
const
InferenceStatsAggregator
&
StatsAggregator
()
const
{
return
stats_aggregator_
;
}
// Get the model configuration for a named input.
Status
GetInput
(
const
std
::
string
&
name
,
const
inference
::
ModelInput
**
input
)
const
;
// Get the model configuration for a named output.
Status
GetOutput
(
const
std
::
string
&
name
,
const
inference
::
ModelOutput
**
output
)
const
;
// Get a label provider for the model.
const
std
::
shared_ptr
<
LabelProvider
>&
GetLabelProvider
()
const
{
return
label_provider_
;
}
// Initialize the instance for Triton core usage
Status
Init
(
const
bool
is_config_provided
);
// Enqueue a request for execution. If Status::Success is returned
// then the model has taken ownership of the request object and so
// 'request' will be nullptr. If non-success is returned then the
// caller still retains ownership of 'request'.
Status
Enqueue
(
std
::
unique_ptr
<
InferenceRequest
>&
request
)
{
return
scheduler_
->
Enqueue
(
request
);
}
// Return the number of in-flight inferences.
size_t
InflightInferenceCount
()
{
return
scheduler_
->
InflightInferenceCount
();
}
// Stop processing future requests unless they are considered as in-flight.
void
Stop
()
{
scheduler_
->
Stop
();
}
uint32_t
DefaultPriorityLevel
()
const
{
return
default_priority_level_
;
}
uint32_t
MaxPriorityLevel
()
const
{
return
max_priority_level_
;
}
protected:
// Set the configuration of the model being served.
Status
SetModelConfig
(
const
inference
::
ModelConfig
&
config
);
// Explicitly set the scheduler to use for inference requests to the
// model. The scheduler can only be set once for a model.
Status
SetScheduler
(
std
::
unique_ptr
<
Scheduler
>
scheduler
);
// The scheduler to use for this model.
std
::
unique_ptr
<
Scheduler
>
scheduler_
;
// Configuration of the model.
inference
::
ModelConfig
config_
;
private:
// The minimum supported CUDA compute capability.
const
double
min_compute_capability_
;
// Version of the model.
int64_t
version_
;
// The stats collector for the model.
InferenceStatsAggregator
stats_aggregator_
;
// Label provider for this model.
std
::
shared_ptr
<
LabelProvider
>
label_provider_
;
size_t
required_input_count_
;
// Map from input name to the model configuration for that input.
std
::
unordered_map
<
std
::
string
,
inference
::
ModelInput
>
input_map_
;
// Map from output name to the model configuration for that output.
std
::
unordered_map
<
std
::
string
,
inference
::
ModelOutput
>
output_map_
;
// Path to model
std
::
string
model_dir_
;
// The default priority level for the model.
uint32_t
default_priority_level_
;
// The largest priority value for the model.
uint32_t
max_priority_level_
;
// Whether or not model config has been set.
bool
set_model_config_
;
};
}}
// namespace triton::core
3rdparty/core-r22.12/src/model_config_cuda.cc
deleted
100644 → 0
View file @
d592fbea
// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "model_config_cuda.h"
#include <cuda_runtime_api.h>
namespace
triton
{
namespace
core
{
int
GetCudaStreamPriority
(
inference
::
ModelOptimizationPolicy
::
ModelPriority
priority
)
{
// Default priority is 0
int
cuda_stream_priority
=
0
;
int
min
,
max
;
cudaError_t
cuerr
=
cudaDeviceGetStreamPriorityRange
(
&
min
,
&
max
);
if
((
cuerr
!=
cudaErrorNoDevice
)
&&
(
cuerr
!=
cudaSuccess
))
{
return
0
;
}
switch
(
priority
)
{
case
inference
::
ModelOptimizationPolicy
::
PRIORITY_MAX
:
cuda_stream_priority
=
max
;
break
;
case
inference
::
ModelOptimizationPolicy
::
PRIORITY_MIN
:
cuda_stream_priority
=
min
;
break
;
default:
cuda_stream_priority
=
0
;
break
;
}
return
cuda_stream_priority
;
}
}}
// namespace triton::core
3rdparty/core-r22.12/src/model_config_cuda.h
deleted
100644 → 0
View file @
d592fbea
// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <stdint.h>
#include "model_config.pb.h"
namespace
triton
{
namespace
core
{
/// Get the CUDA stream priority for a given ModelPriority
/// \param priority The inference::ModelOptimizationPolicy::ModelPriority
/// priority. \param cuda_stream_priority Returns the CUDA stream priority.
/// \return The error status.
int
GetCudaStreamPriority
(
inference
::
ModelOptimizationPolicy
::
ModelPriority
priority
);
}}
// namespace triton::core
3rdparty/core-r22.12/src/model_config_utils.cc
deleted
100644 → 0
View file @
d592fbea
// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "model_config_utils.h"
#include <google/protobuf/util/json_util.h>
#include <deque>
#include <mutex>
#include <set>
#include "constants.h"
#include "cuda_utils.h"
#include "filesystem.h"
#include "triton/common/logging.h"
#define TRITONJSON_STATUSTYPE triton::core::Status
#define TRITONJSON_STATUSRETURN(M) \
return triton::core::Status(triton::core::Status::Code::INTERNAL, (M))
#define TRITONJSON_STATUSSUCCESS triton::core::Status::Success
#include "triton/common/triton_json.h"
#ifdef TRITON_ENABLE_GPU
#include <cuda_runtime_api.h>
#endif // TRITON_ENABLE_GPU
namespace
triton
{
namespace
core
{
namespace
{
#ifdef TRITON_ENABLE_ENSEMBLE
struct
EnsembleTensor
{
EnsembleTensor
(
bool
isOutput
)
:
ready
(
false
),
isOutput
(
isOutput
)
{}
bool
ready
;
bool
isOutput
;
std
::
vector
<
EnsembleTensor
*>
prev_nodes
;
std
::
vector
<
EnsembleTensor
*>
next_nodes
;
};
/// Build a graph that represents the data flow in the ensemble specified in
/// given model config. the node (ensemble tensor) in the graph can be looked
/// up using its name as key.
/// \param ensemble_config The model configuration that specifies
/// ensemble_scheduling field.
/// \param keyed_ensemble_graph Returned the ensemble graph.
/// \return The error status. A non-OK status indicates the build fails because
/// the ensemble configuration is not valid.
Status
BuildEnsembleGraph
(
const
inference
::
ModelConfig
&
config
,
std
::
unordered_map
<
std
::
string
,
EnsembleTensor
>&
keyed_ensemble_graph
)
{
keyed_ensemble_graph
.
clear
();
size_t
step_idx
=
0
;
for
(
const
auto
&
element
:
config
.
ensemble_scheduling
().
step
())
{
if
(
element
.
model_name
().
empty
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"must specify 'model_name' in step "
+
std
::
to_string
(
step_idx
)
+
" of ensemble '"
+
config
.
name
()
+
"'"
);
}
if
(
element
.
input_map
().
size
()
==
0
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"must specify 'input_map' in step "
+
std
::
to_string
(
step_idx
)
+
" of ensemble '"
+
config
.
name
()
+
"'"
);
}
if
(
element
.
output_map
().
size
()
==
0
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"must specify 'output_map' in step "
+
std
::
to_string
(
step_idx
)
+
" of ensemble '"
+
config
.
name
()
+
"'"
);
}
// Link ensemble tensors
std
::
vector
<
EnsembleTensor
*>
tensor_as_output
;
for
(
const
auto
&
output_map
:
element
.
output_map
())
{
auto
it
=
keyed_ensemble_graph
.
find
(
output_map
.
second
);
if
(
it
!=
keyed_ensemble_graph
.
end
())
{
if
(
it
->
second
.
isOutput
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"ensemble tensor '"
+
it
->
first
+
"' can appear in an output map only once for ensemble '"
+
config
.
name
()
+
"' step "
+
std
::
to_string
(
step_idx
));
}
else
{
it
->
second
.
isOutput
=
true
;
}
}
else
{
it
=
keyed_ensemble_graph
.
emplace
(
std
::
make_pair
(
output_map
.
second
,
EnsembleTensor
(
true
)))
.
first
;
}
tensor_as_output
.
push_back
(
&
(
it
->
second
));
}
std
::
set
<
std
::
string
>
model_inputs
;
for
(
const
auto
&
input_map
:
element
.
input_map
())
{
if
(
model_inputs
.
find
(
input_map
.
first
)
!=
model_inputs
.
end
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"input '"
+
input_map
.
first
+
"' in model '"
+
element
.
model_name
()
+
"' is mapped to multiple ensemble tensors for ensemble '"
+
config
.
name
()
+
"' step "
+
std
::
to_string
(
step_idx
));
}
else
{
model_inputs
.
emplace
(
input_map
.
first
);
}
auto
it
=
keyed_ensemble_graph
.
find
(
input_map
.
second
);
if
(
it
==
keyed_ensemble_graph
.
end
())
{
it
=
keyed_ensemble_graph
.
emplace
(
std
::
make_pair
(
input_map
.
second
,
EnsembleTensor
(
false
)))
.
first
;
}
for
(
auto
output
:
tensor_as_output
)
{
output
->
prev_nodes
.
push_back
(
&
(
it
->
second
));
it
->
second
.
next_nodes
.
push_back
(
output
);
}
}
step_idx
++
;
}
return
Status
::
Success
;
}
Status
ValidateEnsembleSchedulingConfig
(
const
inference
::
ModelConfig
&
config
)
{
if
(
config
.
platform
()
!=
kEnsemblePlatform
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"ensemble scheduling cannot be set for model '"
+
config
.
name
()
+
"' whose platform is not "
+
kEnsemblePlatform
);
}
if
(
config
.
instance_group
().
size
()
!=
0
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"instance group should not be specified for ensemble '"
+
config
.
name
()
+
"'"
);
}
if
(
config
.
has_optimization
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"optimization should not be specified for ensemble '"
+
config
.
name
()
+
"'"
);
}
if
(
config
.
model_warmup_size
()
!=
0
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"model_warmup can not be specified for ensemble '"
+
config
.
name
()
+
"'"
);
}
// Make sure step is not empty and all fields are set
if
(
config
.
ensemble_scheduling
().
step_size
()
==
0
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"must specify 'step' for ensemble '"
+
config
.
name
()
+
"'"
);
}
std
::
unordered_map
<
std
::
string
,
EnsembleTensor
>
tensors
;
RETURN_IF_ERROR
(
BuildEnsembleGraph
(
config
,
tensors
));
// check data flow
std
::
deque
<
EnsembleTensor
*>
ready_queue
;
for
(
const
auto
&
input
:
config
.
input
())
{
auto
it
=
tensors
.
find
(
input
.
name
());
if
(
it
==
tensors
.
end
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"ensemble input '"
+
input
.
name
()
+
"' for ensemble "
+
config
.
name
()
+
"' is not used"
);
}
it
->
second
.
ready
=
true
;
ready_queue
.
push_back
(
&
(
it
->
second
));
}
while
(
!
ready_queue
.
empty
())
{
auto
&
ready_node
=
ready_queue
.
front
();
for
(
auto
&
next_node
:
ready_node
->
next_nodes
)
{
if
(
next_node
->
ready
)
{
continue
;
}
bool
next_node_ready
=
true
;
for
(
auto
&
prev_node
:
next_node
->
prev_nodes
)
{
if
(
!
prev_node
->
ready
)
{
next_node_ready
=
false
;
break
;
}
}
next_node
->
ready
=
next_node_ready
;
if
(
next_node_ready
)
{
ready_queue
.
push_back
(
next_node
);
}
}
ready_queue
.
pop_front
();
}
std
::
set
<
std
::
string
>
outputs
;
for
(
const
auto
&
output
:
config
.
output
())
{
auto
it
=
tensors
.
find
(
output
.
name
());
if
(
it
==
tensors
.
end
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"ensemble output '"
+
output
.
name
()
+
"' for ensemble "
+
config
.
name
()
+
"' is not used"
);
}
if
(
!
it
->
second
.
ready
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"output '"
+
output
.
name
()
+
"' for ensemble '"
+
config
.
name
()
+
"' is not written"
);
}
else
{
outputs
.
insert
(
it
->
first
);
}
}
// Check redundant ensemble tensors
for
(
const
auto
&
tensor
:
tensors
)
{
// skip ensemble outputs as they have been checked and can have no
// next nodes
if
(
outputs
.
find
(
tensor
.
first
)
!=
outputs
.
end
())
{
continue
;
}
if
(
!
tensor
.
second
.
ready
||
(
tensor
.
second
.
next_nodes
.
size
()
==
0
))
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"ensemble tensor '"
+
tensor
.
first
+
"' is unused in ensemble '"
+
config
.
name
()
+
"'"
);
}
}
return
Status
::
Success
;
}
#endif // TRITON_ENABLE_ENSEMBLE
template
<
class
ModelIO
>
Status
ValidateIOShape
(
const
ModelIO
&
io
,
int32_t
max_batch_size
,
const
std
::
string
&
message_prefix
=
""
)
{
if
(
io
.
name
().
empty
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
message_prefix
+
"must specify 'name'"
);
}
if
(
io
.
data_type
()
==
inference
::
DataType
::
TYPE_INVALID
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"model output must specify 'data_type'"
);
}
if
(
io
.
dims_size
()
==
0
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
message_prefix
+
"must specify 'dims'"
);
}
// If the configuration is non-batching, then no input or output
// reshape can be empty as that would mean that input or output was
// always empty (no data).
if
(
io
.
has_reshape
()
&&
(
io
.
reshape
().
shape_size
()
==
0
)
&&
(
max_batch_size
==
0
))
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
message_prefix
+
"cannot have empty reshape for non-batching model as scalar "
"tensors are not supported"
);
}
for
(
auto
dim
:
io
.
dims
())
{
// Dimension cannot be 0.
if
((
dim
<
1
)
&&
(
dim
!=
triton
::
common
::
WILDCARD_DIM
))
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
message_prefix
+
"dimension must be integer >= 1, or "
+
std
::
to_string
(
triton
::
common
::
WILDCARD_DIM
)
+
" to indicate a variable-size dimension"
);
}
}
if
(
io
.
has_reshape
())
{
// Zeros are not allowed in reshape.
for
(
auto
dim
:
io
.
reshape
().
shape
())
{
if
((
dim
<
1
)
&&
(
dim
!=
triton
::
common
::
WILDCARD_DIM
))
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
message_prefix
+
"reshape dimensions must be integer >= 1, or "
+
std
::
to_string
(
triton
::
common
::
WILDCARD_DIM
)
+
" to indicate a variable-size dimension"
);
}
}
const
int64_t
dims_size
=
triton
::
common
::
GetElementCount
(
io
.
dims
());
const
int64_t
reshape_size
=
triton
::
common
::
GetElementCount
(
io
.
reshape
().
shape
());
// dims and reshape must both have same element count
// or both have variable-size dimension.
// Special case for empty reshape... expect dims to have element
// count of 1.
if
((
dims_size
!=
reshape_size
)
&&
((
reshape_size
!=
0
)
||
(
dims_size
!=
1
)))
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
message_prefix
+
"has different size for dims and reshape"
);
}
// shape contains variable-size dimension, in this case we compare if
// each pair of the trunks separated by variable-size dimension has
// the same element count. For instance, from [2, 4, -1, 6] to [8, -1, 1, 6]
// is valid reshape as 2 * 4 = 8 and 6 = 1 * 6.
if
(
dims_size
==
-
1
)
{
std
::
vector
<
int64_t
>
dim_element_cnts
;
std
::
vector
<
int64_t
>
reshape_element_cnts
;
int64_t
current_cnt
=
1
;
for
(
const
auto
&
dim
:
io
.
dims
())
{
if
(
dim
!=
-
1
)
{
current_cnt
*=
dim
;
}
else
{
dim_element_cnts
.
push_back
(
current_cnt
);
current_cnt
=
1
;
}
}
dim_element_cnts
.
push_back
(
current_cnt
);
current_cnt
=
1
;
for
(
const
auto
&
dim
:
io
.
reshape
().
shape
())
{
if
(
dim
!=
-
1
)
{
current_cnt
*=
dim
;
}
else
{
reshape_element_cnts
.
push_back
(
current_cnt
);
current_cnt
=
1
;
}
}
reshape_element_cnts
.
push_back
(
current_cnt
);
if
(
dim_element_cnts
.
size
()
!=
reshape_element_cnts
.
size
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
message_prefix
+
"has different number of variable-size dimensions for dims "
"and reshape"
);
}
for
(
size_t
idx
=
0
;
idx
<
dim_element_cnts
.
size
();
idx
++
)
{
if
(
dim_element_cnts
[
idx
]
!=
reshape_element_cnts
[
idx
])
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
message_prefix
+
"has different size for dims and reshape"
);
}
}
}
}
return
Status
::
Success
;
}
}
// namespace
Status
GetModelVersionFromPath
(
const
std
::
string
&
path
,
int64_t
*
version
)
{
auto
version_dir
=
BaseName
(
path
);
// Determine the version from the last segment of 'path'
try
{
*
version
=
std
::
atoll
(
version_dir
.
c_str
());
}
catch
(...)
{
return
Status
(
Status
::
Code
::
INTERNAL
,
"unable to determine model version from "
+
path
);
}
return
Status
::
Success
;
}
Status
GetBooleanSequenceControlProperties
(
const
inference
::
ModelSequenceBatching
&
batcher
,
const
std
::
string
&
model_name
,
const
inference
::
ModelSequenceBatching
::
Control
::
Kind
control_kind
,
const
bool
required
,
std
::
string
*
tensor_name
,
inference
::
DataType
*
tensor_datatype
,
float
*
fp32_false_value
,
float
*
fp32_true_value
,
int32_t
*
int32_false_value
,
int32_t
*
int32_true_value
,
bool
*
bool_false_value
,
bool
*
bool_true_value
)
{
// Make sure same tensor is not configured for multiple controls
std
::
set
<
std
::
string
>
seen_tensors
;
// Make sure the control kind is not mentioned multiple times.
bool
seen_control
=
false
;
for
(
const
auto
&
control_input
:
batcher
.
control_input
())
{
if
(
control_input
.
name
().
empty
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"sequence batching control tensor must have a name for "
+
model_name
);
}
if
(
seen_tensors
.
find
(
control_input
.
name
())
!=
seen_tensors
.
end
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"sequence batching control tensor '"
+
control_input
.
name
()
+
"' is specified for multiple control kinds for "
+
model_name
);
}
seen_tensors
.
insert
(
control_input
.
name
());
for
(
const
auto
&
c
:
control_input
.
control
())
{
if
(
c
.
kind
()
==
control_kind
)
{
if
(
seen_control
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"sequence batching specifies multiple "
+
inference
::
ModelSequenceBatching_Control_Kind_Name
(
control_kind
)
+
" tensors for "
+
model_name
);
}
*
tensor_name
=
control_input
.
name
();
seen_control
=
true
;
// Make sure only one of int, float, or bool type is specified.
if
(
!
((
c
.
int32_false_true_size
()
!=
0
)
||
(
c
.
fp32_false_true_size
()
!=
0
)
||
(
c
.
bool_false_true_size
()
!=
0
)))
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"sequence batching must specify either 'int32_false_true', "
"'fp32_false_true' or 'bool_false_true' for "
+
inference
::
ModelSequenceBatching_Control_Kind_Name
(
control_kind
)
+
" for "
+
model_name
);
}
else
if
(
((
c
.
int32_false_true_size
()
!=
0
)
&&
(
c
.
fp32_false_true_size
()
!=
0
))
||
((
c
.
int32_false_true_size
()
!=
0
)
&&
(
c
.
bool_false_true_size
()
!=
0
))
||
((
c
.
fp32_false_true_size
()
!=
0
)
&&
(
c
.
bool_false_true_size
()
!=
0
)))
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"sequence batching specifies more than one from "
"'int32_false_true', 'fp32_false_true' and 'bool_false_true' "
"for "
+
inference
::
ModelSequenceBatching_Control_Kind_Name
(
control_kind
)
+
" for "
+
model_name
);
}
if
(
c
.
int32_false_true_size
()
>
0
)
{
if
(
c
.
int32_false_true_size
()
!=
2
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"sequence batching control 'int32_false_true' must have "
"exactly 2 entries for "
+
inference
::
ModelSequenceBatching_Control_Kind_Name
(
control_kind
)
+
" for "
+
model_name
);
}
if
(
tensor_datatype
!=
nullptr
)
{
*
tensor_datatype
=
inference
::
DataType
::
TYPE_INT32
;
}
if
(
int32_false_value
!=
nullptr
)
{
*
int32_false_value
=
c
.
int32_false_true
(
0
);
}
if
(
int32_true_value
!=
nullptr
)
{
*
int32_true_value
=
c
.
int32_false_true
(
1
);
}
}
else
if
(
c
.
fp32_false_true_size
()
>
0
)
{
if
(
c
.
fp32_false_true_size
()
!=
2
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"sequence batching control 'fp32_false_true' must have exactly "
"2 entries for "
+
inference
::
ModelSequenceBatching_Control_Kind_Name
(
control_kind
)
+
" for "
+
model_name
);
}
if
(
tensor_datatype
!=
nullptr
)
{
*
tensor_datatype
=
inference
::
DataType
::
TYPE_FP32
;
}
if
(
fp32_false_value
!=
nullptr
)
{
*
fp32_false_value
=
c
.
fp32_false_true
(
0
);
}
if
(
fp32_true_value
!=
nullptr
)
{
*
fp32_true_value
=
c
.
fp32_false_true
(
1
);
}
}
else
{
if
(
c
.
bool_false_true_size
()
!=
2
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"sequence batching control 'bool_false_true' must have exactly "
"2 entries for "
+
inference
::
ModelSequenceBatching_Control_Kind_Name
(
control_kind
)
+
" for "
+
model_name
);
}
if
(
tensor_datatype
!=
nullptr
)
{
*
tensor_datatype
=
inference
::
DataType
::
TYPE_BOOL
;
}
if
(
bool_false_value
!=
nullptr
)
{
*
bool_false_value
=
c
.
bool_false_true
(
0
);
}
if
(
bool_true_value
!=
nullptr
)
{
*
bool_true_value
=
c
.
bool_false_true
(
1
);
}
}
}
}
}
if
(
!
seen_control
)
{
if
(
required
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"sequence batching control tensor must specify a "
+
inference
::
ModelSequenceBatching_Control_Kind_Name
(
control_kind
)
+
" value for "
+
model_name
);
}
tensor_name
->
clear
();
}
return
Status
::
Success
;
}
Status
GetTypedSequenceControlProperties
(
const
inference
::
ModelSequenceBatching
&
batcher
,
const
std
::
string
&
model_name
,
const
inference
::
ModelSequenceBatching
::
Control
::
Kind
control_kind
,
const
bool
required
,
std
::
string
*
tensor_name
,
inference
::
DataType
*
tensor_datatype
)
{
// Make sure same tensor is not configured for multiple controls
std
::
set
<
std
::
string
>
seen_tensors
;
// Make sure the control kind is not mentioned multiple times.
bool
seen_control
=
false
;
for
(
const
auto
&
control_input
:
batcher
.
control_input
())
{
if
(
control_input
.
name
().
empty
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"sequence batching control tensor must have a name for "
+
model_name
);
}
if
(
seen_tensors
.
find
(
control_input
.
name
())
!=
seen_tensors
.
end
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"sequence batching control tensor '"
+
control_input
.
name
()
+
"' is specified for multiple control kinds for "
+
model_name
);
}
seen_tensors
.
insert
(
control_input
.
name
());
for
(
const
auto
&
c
:
control_input
.
control
())
{
if
(
c
.
kind
()
==
control_kind
)
{
if
(
seen_control
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"sequence batching specifies multiple "
+
inference
::
ModelSequenceBatching_Control_Kind_Name
(
control_kind
)
+
" tensors for "
+
model_name
);
}
*
tensor_name
=
control_input
.
name
();
if
(
tensor_datatype
!=
nullptr
)
{
*
tensor_datatype
=
c
.
data_type
();
}
seen_control
=
true
;
if
((
c
.
int32_false_true_size
()
>
0
)
||
(
c
.
fp32_false_true_size
()
>
0
)
||
(
c
.
bool_false_true_size
()
>
0
))
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"sequence batching must not specify either 'int32_false_true', "
"'fp32_false_true' or 'bool_false_true' for "
+
inference
::
ModelSequenceBatching_Control_Kind_Name
(
control_kind
)
+
" for "
+
model_name
);
}
}
}
}
if
(
!
seen_control
)
{
if
(
required
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"sequence batching control tensor must specify a "
+
inference
::
ModelSequenceBatching_Control_Kind_Name
(
control_kind
)
+
" value for "
+
model_name
);
}
tensor_name
->
clear
();
}
return
Status
::
Success
;
}
Status
GetNormalizedModelConfig
(
const
std
::
string
&
model_name
,
const
std
::
string
&
path
,
const
double
min_compute_capability
,
inference
::
ModelConfig
*
config
)
{
// Server-side autofill only sets certain backend fields for the models that
// belong to limited backends for backwards-compatibility. See TensorRT
// backend, ONNX Runtime backend, OpenVINO backend, TensorFLow backend, and
// PyTorch backend.
// Extracting detailed information is delegated to the backend implementation
// to auto-complete.
RETURN_IF_ERROR
(
AutoCompleteBackendFields
(
model_name
,
std
::
string
(
path
),
config
));
LOG_VERBOSE
(
1
)
<<
"Server side auto-completed config: "
<<
config
->
DebugString
();
RETURN_IF_ERROR
(
NormalizeModelConfig
(
min_compute_capability
,
config
));
return
Status
::
Success
;
}
Status
NormalizeModelConfig
(
const
double
min_compute_capability
,
inference
::
ModelConfig
*
config
)
{
// If version_policy is not specified, default to Latest 1 version.
if
(
!
config
->
has_version_policy
())
{
inference
::
ModelVersionPolicy
::
Latest
latest
;
latest
.
set_num_versions
(
1
);
config
->
mutable_version_policy
()
->
mutable_latest
()
->
CopyFrom
(
latest
);
}
// If dynamic batching is specified...
if
(
config
->
has_dynamic_batching
())
{
// If preferred batch size is not specified set it to
// max-batch-size.
if
(
config
->
dynamic_batching
().
preferred_batch_size
().
size
()
==
0
)
{
auto
mutable_preferred_batch_size
=
config
->
mutable_dynamic_batching
()
->
mutable_preferred_batch_size
();
if
(
config
->
max_batch_size
()
>
0
)
{
mutable_preferred_batch_size
->
Add
(
config
->
max_batch_size
());
}
}
}
// If sequence batching is specified...
if
(
config
->
has_sequence_batching
())
{
// Set default idle is not specified.
if
(
config
->
sequence_batching
().
max_sequence_idle_microseconds
()
==
0
)
{
config
->
mutable_sequence_batching
()
->
set_max_sequence_idle_microseconds
(
SEQUENCE_IDLE_DEFAULT_MICROSECONDS
);
}
if
(
config
->
sequence_batching
().
has_oldest
())
{
// If preferred batch size is not specified set it to
// max-batch-size.
if
(
config
->
sequence_batching
().
oldest
().
preferred_batch_size
().
size
()
==
0
)
{
auto
mutable_preferred_batch_size
=
config
->
mutable_sequence_batching
()
->
mutable_oldest
()
->
mutable_preferred_batch_size
();
if
(
config
->
max_batch_size
()
>
0
)
{
mutable_preferred_batch_size
->
Add
(
config
->
max_batch_size
());
}
}
}
}
// If model ensembling is specified, don't attempt to normalize instance_group
// as it is not allowed in ensemble scheduling
if
(
!
config
->
has_ensemble_scheduling
())
{
auto
optimization
=
config
->
mutable_optimization
();
if
(
!
optimization
->
has_input_pinned_memory
())
{
optimization
->
mutable_input_pinned_memory
()
->
set_enable
(
true
);
}
if
(
!
optimization
->
has_output_pinned_memory
())
{
optimization
->
mutable_output_pinned_memory
()
->
set_enable
(
true
);
}
}
return
Status
::
Success
;
}
Status
NormalizeInstanceGroup
(
const
double
min_compute_capability
,
const
std
::
vector
<
inference
::
ModelInstanceGroup
>&
preferred_groups
,
inference
::
ModelConfig
*
config
)
{
// Instance group setting doesn't apply to ensemble
if
(
config
->
has_ensemble_scheduling
())
{
return
Status
::
Success
;
}
// Creates a set of supported GPU device ids
std
::
set
<
int
>
supported_gpus
;
#ifdef TRITON_ENABLE_GPU
// Get the total number of GPUs from the runtime library.
Status
status
=
GetSupportedGPUs
(
&
supported_gpus
,
min_compute_capability
);
if
(
!
status
.
IsOk
())
{
return
status
;
}
#endif // TRITON_ENABLE_GPU
// Make sure there is at least one instance_group.
if
(
config
->
instance_group
().
empty
())
{
inference
::
ModelInstanceGroup
*
group
=
config
->
add_instance_group
();
group
->
set_name
(
config
->
name
());
for
(
const
auto
&
pg
:
preferred_groups
)
{
group
->
set_kind
(
pg
.
kind
());
group
->
set_count
(
pg
.
count
());
// handle preferred GPU setting differently based on kind
if
(
pg
.
kind
()
==
inference
::
ModelInstanceGroup
::
KIND_GPU
)
{
// Don't use preferred group with KIND_GPU if there is no GPU.
if
(
supported_gpus
.
empty
())
{
continue
;
}
// If preferred group sets GPUs, limit deployment onto those that
// are also listed in supported gpus
if
(
!
pg
.
gpus
().
empty
())
{
for
(
const
int32_t
gid
:
pg
.
gpus
())
{
if
(
supported_gpus
.
find
(
gid
)
!=
supported_gpus
.
end
())
{
group
->
add_gpus
(
gid
);
}
}
}
break
;
}
else
if
(
pg
.
kind
()
==
inference
::
ModelInstanceGroup
::
KIND_AUTO
)
{
// if AUTO, then set preferred GPU as is, to align with KIND_AUTO
// deduction specified below
for
(
const
int32_t
gid
:
pg
.
gpus
())
{
group
->
add_gpus
(
gid
);
}
break
;
}
// Other kind should not set GPUs
break
;
}
}
// Assign default name, kind and count to each instance group that
// doesn't give those values explicitly. For KIND_GPU, set GPUs to
// all available if not specified explicitly.
size_t
cnt
=
0
;
for
(
auto
&
group
:
*
config
->
mutable_instance_group
())
{
// Name
if
(
group
.
name
().
empty
())
{
group
.
set_name
(
config
->
name
()
+
"_"
+
std
::
to_string
(
cnt
));
}
cnt
++
;
// For KIND_AUTO... if there are no GPUs or if any of the listed
// 'gpu's are not present, then use KIND_CPU.
if
(
group
.
kind
()
==
inference
::
ModelInstanceGroup
::
KIND_AUTO
)
{
if
(
supported_gpus
.
empty
())
{
group
.
set_kind
(
inference
::
ModelInstanceGroup
::
KIND_CPU
);
}
else
{
for
(
const
int32_t
gid
:
group
.
gpus
())
{
if
(
supported_gpus
.
find
(
gid
)
==
supported_gpus
.
end
())
{
group
.
set_kind
(
inference
::
ModelInstanceGroup
::
KIND_CPU
);
break
;
}
}
}
if
(
group
.
kind
()
==
inference
::
ModelInstanceGroup
::
KIND_AUTO
)
{
group
.
set_kind
(
inference
::
ModelInstanceGroup
::
KIND_GPU
);
}
}
// KIND is resolved at this point
for
(
const
auto
&
pg
:
preferred_groups
)
{
if
(
group
.
kind
()
!=
pg
.
kind
())
{
continue
;
}
// Limit the GPU setting within what is specified in the preferred group,
// if no available GPU then skip to next preferred group
if
((
group
.
kind
()
==
inference
::
ModelInstanceGroup
::
KIND_GPU
)
&&
group
.
gpus
().
empty
()
&&
!
pg
.
gpus
().
empty
())
{
for
(
const
int32_t
gid
:
pg
.
gpus
())
{
if
(
supported_gpus
.
find
(
gid
)
!=
supported_gpus
.
end
())
{
group
.
add_gpus
(
gid
);
}
}
if
(
group
.
gpus
().
empty
())
{
continue
;
}
}
if
((
group
.
count
()
<
1
)
&&
(
pg
.
count
()
>
0
))
{
group
.
set_count
(
pg
.
count
());
}
}
// Set Triton default if the fields are not set from preferred group
// Count
if
(
group
.
count
()
<
1
)
{
RETURN_IF_ERROR
(
SetDefaultInstanceCount
(
&
group
,
config
->
backend
()));
}
// GPUs
if
((
group
.
kind
()
==
inference
::
ModelInstanceGroup
::
KIND_GPU
)
&&
(
group
.
gpus
().
size
()
==
0
))
{
for
(
auto
d
:
supported_gpus
)
{
group
.
add_gpus
(
d
);
}
}
}
return
Status
::
Success
;
}
Status
LocalizePythonBackendExecutionEnvironmentPath
(
const
std
::
string
&
model_path
,
inference
::
ModelConfig
*
config
,
std
::
shared_ptr
<
LocalizedPath
>*
localized_model_dir
)
{
if
(
config
->
backend
()
==
"python"
)
{
if
(
config
->
parameters
().
contains
(
"EXECUTION_ENV_PATH"
))
{
// Read EXECUTION_ENV_PATH
std
::
string
exec_env_path
=
config
->
parameters
().
at
(
"EXECUTION_ENV_PATH"
).
string_value
();
// Replace model directory variable with model_path
std
::
string
model_dir_var
=
"$$TRITON_MODEL_DIRECTORY"
;
if
(
exec_env_path
.
substr
(
0
,
model_dir_var
.
size
())
==
model_dir_var
)
{
exec_env_path
.
replace
(
0
,
model_dir_var
.
size
(),
model_path
);
}
// Collapse any .. in the path
std
::
string
abs_exec_env_path
;
std
::
size_t
prev_pos
=
exec_env_path
.
size
();
std
::
size_t
pos
=
exec_env_path
.
find_last_of
(
'/'
,
prev_pos
-
1
);
int
skip
=
0
;
while
(
pos
!=
std
::
string
::
npos
&&
prev_pos
>
0
)
{
if
(
!
skip
)
{
abs_exec_env_path
=
exec_env_path
.
substr
(
pos
,
prev_pos
-
pos
)
+
abs_exec_env_path
;
}
skip
=
skip
>
0
?
skip
-
1
:
skip
;
if
(
pos
>=
3
&&
exec_env_path
.
substr
(
pos
-
3
,
3
)
==
"/.."
)
{
skip
+=
2
;
}
prev_pos
=
pos
;
pos
=
exec_env_path
.
find_last_of
(
'/'
,
prev_pos
-
1
);
}
abs_exec_env_path
=
exec_env_path
.
substr
(
0
,
prev_pos
)
+
abs_exec_env_path
;
// Localize iff abs_exec_env_path is outside the model directory
std
::
string
model_path_slash
=
model_path
.
back
()
==
'/'
?
model_path
:
model_path
+
"/"
;
if
(
abs_exec_env_path
.
substr
(
0
,
model_path_slash
.
size
())
!=
model_path_slash
)
{
// Localize the file
std
::
shared_ptr
<
LocalizedPath
>
localized_exec_env_path
;
RETURN_IF_ERROR
(
LocalizePath
(
abs_exec_env_path
,
&
localized_exec_env_path
));
// Persist the localized temporary path
(
*
localized_model_dir
)
->
other_localized_path
.
push_back
(
localized_exec_env_path
);
// Rewrite EXECUTION_ENV_PATH
config
->
mutable_parameters
()
->
at
(
"EXECUTION_ENV_PATH"
)
.
set_string_value
(
localized_exec_env_path
->
Path
());
}
}
}
return
Status
::
Success
;
}
Status
SetDefaultInstanceCount
(
inference
::
ModelInstanceGroup
*
group
,
const
std
::
string
&
backend
)
{
group
->
set_count
(
1
);
// Backends opt into the default_cpu_instance_count since
// some backends (pytorch, OpenVINO) don't perform well/have high overhead
// when using multiple instances.
const
int
default_cpu_instance_count
=
2
;
bool
use_default_cpu_instance_count
=
(
backend
==
kTensorFlowBackend
)
||
(
backend
==
kOnnxRuntimeBackend
);
if
(
group
->
kind
()
==
inference
::
ModelInstanceGroup
::
KIND_CPU
&&
use_default_cpu_instance_count
)
{
group
->
set_count
(
default_cpu_instance_count
);
}
return
Status
::
Success
;
}
Status
AutoCompleteBackendFields
(
const
std
::
string
&
model_name
,
const
std
::
string
&
model_path
,
inference
::
ModelConfig
*
config
)
{
std
::
set
<
std
::
string
>
version_dirs
;
RETURN_IF_ERROR
(
GetDirectorySubdirs
(
model_path
,
&
version_dirs
));
// There must be at least one version directory that we can inspect to
// attempt to determine the platform. If not, we skip autofill with file name.
// For now we allow multiple versions and only inspect the first verison
// directory to ensure it is valid. We can add more aggressive checks later.
const
bool
has_version
=
(
version_dirs
.
size
()
!=
0
);
const
auto
version_path
=
has_version
?
JoinPath
({
model_path
,
*
(
version_dirs
.
begin
())})
:
""
;
std
::
set
<
std
::
string
>
version_dir_content
;
if
(
has_version
)
{
RETURN_IF_ERROR
(
GetDirectoryContents
(
version_path
,
&
version_dir_content
));
}
// If the model name is not given in the configuration, set if based
// on the model path.
if
(
config
->
name
().
empty
())
{
config
->
set_name
(
model_name
);
}
// Trying to fill the 'backend', 'default_model_filename' field.
// TensorFlow
// For TF backend, the platform is required
if
(
config
->
platform
().
empty
())
{
// Check 'backend', 'default_model_filename', and the actual directory
// to determine the platform
if
(
config
->
backend
().
empty
()
||
(
config
->
backend
()
==
kTensorFlowBackend
))
{
if
(
config
->
default_model_filename
()
==
kTensorFlowSavedModelFilename
)
{
config
->
set_platform
(
kTensorFlowSavedModelPlatform
);
}
else
if
(
config
->
default_model_filename
()
==
kTensorFlowGraphDefFilename
)
{
config
->
set_platform
(
kTensorFlowGraphDefPlatform
);
}
else
if
(
config
->
default_model_filename
().
empty
()
&&
has_version
)
{
bool
is_dir
=
false
;
if
(
version_dir_content
.
find
(
kTensorFlowSavedModelFilename
)
!=
version_dir_content
.
end
())
{
RETURN_IF_ERROR
(
IsDirectory
(
JoinPath
({
version_path
,
kTensorFlowSavedModelFilename
}),
&
is_dir
));
if
(
is_dir
)
{
config
->
set_platform
(
kTensorFlowSavedModelPlatform
);
}
}
if
(
version_dir_content
.
find
(
kTensorFlowGraphDefFilename
)
!=
version_dir_content
.
end
())
{
RETURN_IF_ERROR
(
IsDirectory
(
JoinPath
({
version_path
,
kTensorFlowGraphDefFilename
}),
&
is_dir
));
if
(
!
is_dir
)
{
config
->
set_platform
(
kTensorFlowGraphDefPlatform
);
}
}
}
}
}
// Fill 'backend' and 'default_model_filename' if missing
if
((
config
->
platform
()
==
kTensorFlowSavedModelPlatform
)
||
(
config
->
platform
()
==
kTensorFlowGraphDefPlatform
))
{
if
(
config
->
backend
().
empty
())
{
config
->
set_backend
(
kTensorFlowBackend
);
}
if
(
config
->
default_model_filename
().
empty
())
{
if
(
config
->
platform
()
==
kTensorFlowSavedModelPlatform
)
{
config
->
set_default_model_filename
(
kTensorFlowSavedModelFilename
);
}
else
{
config
->
set_default_model_filename
(
kTensorFlowGraphDefFilename
);
}
}
return
Status
::
Success
;
}
// TensorRT
if
(
config
->
backend
().
empty
())
{
if
((
config
->
platform
()
==
kTensorRTPlanPlatform
)
||
(
config
->
default_model_filename
()
==
kTensorRTPlanFilename
))
{
config
->
set_backend
(
kTensorRTBackend
);
}
else
if
(
config
->
platform
().
empty
()
&&
config
->
default_model_filename
().
empty
()
&&
has_version
)
{
bool
is_dir
=
false
;
if
(
version_dir_content
.
find
(
kTensorRTPlanFilename
)
!=
version_dir_content
.
end
())
{
RETURN_IF_ERROR
(
IsDirectory
(
JoinPath
({
version_path
,
kTensorRTPlanFilename
}),
&
is_dir
));
if
(
!
is_dir
)
{
config
->
set_backend
(
kTensorRTBackend
);
}
}
}
}
if
(
config
->
backend
()
==
kTensorRTBackend
)
{
if
(
config
->
platform
().
empty
())
{
config
->
set_platform
(
kTensorRTPlanPlatform
);
}
if
(
config
->
default_model_filename
().
empty
())
{
config
->
set_default_model_filename
(
kTensorRTPlanFilename
);
}
return
Status
::
Success
;
}
// ONNXRuntime
if
(
config
->
backend
().
empty
())
{
if
((
config
->
platform
()
==
kOnnxRuntimeOnnxPlatform
)
||
(
config
->
default_model_filename
()
==
kOnnxRuntimeOnnxFilename
))
{
config
->
set_backend
(
kOnnxRuntimeBackend
);
}
else
if
(
config
->
platform
().
empty
()
&&
config
->
default_model_filename
().
empty
()
&&
has_version
)
{
if
(
version_dir_content
.
find
(
kOnnxRuntimeOnnxFilename
)
!=
version_dir_content
.
end
())
{
// ONNX model can be a file or a directory in the case of large model
config
->
set_backend
(
kOnnxRuntimeBackend
);
}
}
}
if
(
config
->
backend
()
==
kOnnxRuntimeBackend
)
{
if
(
config
->
platform
().
empty
())
{
config
->
set_platform
(
kOnnxRuntimeOnnxPlatform
);
}
if
(
config
->
default_model_filename
().
empty
())
{
config
->
set_default_model_filename
(
kOnnxRuntimeOnnxFilename
);
}
return
Status
::
Success
;
}
// OpenVINO
if
(
config
->
backend
().
empty
())
{
if
(
config
->
default_model_filename
()
==
kOpenVINORuntimeOpenVINOFilename
)
{
config
->
set_backend
(
kOpenVINORuntimeBackend
);
}
else
if
(
config
->
platform
().
empty
()
&&
config
->
default_model_filename
().
empty
()
&&
has_version
)
{
if
(
version_dir_content
.
find
(
kOpenVINORuntimeOpenVINOFilename
)
!=
version_dir_content
.
end
())
{
config
->
set_backend
(
kOpenVINORuntimeBackend
);
}
}
}
if
(
config
->
backend
()
==
kOpenVINORuntimeBackend
)
{
if
(
config
->
default_model_filename
().
empty
())
{
config
->
set_default_model_filename
(
kOpenVINORuntimeOpenVINOFilename
);
}
return
Status
::
Success
;
}
// PyTorch (TorchScript, LibTorch)
if
(
config
->
backend
().
empty
())
{
if
((
config
->
platform
()
==
kPyTorchLibTorchPlatform
)
||
(
config
->
default_model_filename
()
==
kPyTorchLibTorchFilename
))
{
config
->
set_backend
(
kPyTorchBackend
);
}
else
if
(
config
->
platform
().
empty
()
&&
config
->
default_model_filename
().
empty
()
&&
has_version
)
{
bool
is_dir
=
false
;
if
(
version_dir_content
.
find
(
kPyTorchLibTorchFilename
)
!=
version_dir_content
.
end
())
{
RETURN_IF_ERROR
(
IsDirectory
(
JoinPath
({
version_path
,
kPyTorchLibTorchFilename
}),
&
is_dir
));
if
(
!
is_dir
)
{
config
->
set_backend
(
kPyTorchBackend
);
}
}
}
}
if
(
config
->
backend
()
==
kPyTorchBackend
)
{
if
(
config
->
platform
().
empty
())
{
config
->
set_platform
(
kPyTorchLibTorchPlatform
);
}
if
(
config
->
default_model_filename
().
empty
())
{
config
->
set_default_model_filename
(
kPyTorchLibTorchFilename
);
}
return
Status
::
Success
;
}
// Python
if
(
config
->
backend
().
empty
())
{
if
(
config
->
default_model_filename
()
==
kPythonFilename
)
{
config
->
set_backend
(
kPythonBackend
);
}
else
if
(
config
->
platform
().
empty
()
&&
config
->
default_model_filename
().
empty
()
&&
has_version
)
{
if
(
version_dir_content
.
find
(
kPythonFilename
)
!=
version_dir_content
.
end
())
{
config
->
set_backend
(
kPythonBackend
);
}
}
}
if
(
config
->
backend
()
==
kPythonBackend
)
{
if
(
config
->
default_model_filename
().
empty
())
{
config
->
set_default_model_filename
(
kPythonFilename
);
}
return
Status
::
Success
;
}
// Custom Backend
// For now, only do the narrowest case, where no info is given in the config.
if
(
config
->
backend
().
empty
()
&&
config
->
platform
().
empty
()
&&
config
->
default_model_filename
().
empty
())
{
LOG_VERBOSE
(
1
)
<<
"Could not infer supported backend, so attempting "
"autofill of custom backend."
;
// Since we lazily load the backends, we let the model tell us what backend
// to load. We must assume that if the model name conforms to the required
// shape, we parse the backend name out of the model file name. i.e.
// model.identity will set the backend to "identity".
const
std
::
string
delimiter
=
"."
;
size_t
pos
=
model_name
.
find
(
delimiter
,
0
);
if
(
pos
==
std
::
string
::
npos
)
{
return
Status
(
triton
::
common
::
Error
::
Code
::
INVALID_ARG
,
(
"Invalid model name: Could not determine backend for model '"
+
model_name
+
"' with no backend in model configuration. Expected model name of "
"the form 'model.<backend_name>'."
));
}
const
std
::
string
backend_name
=
model_name
.
substr
(
pos
+
1
,
std
::
string
::
npos
);
config
->
set_backend
(
backend_name
);
config
->
set_default_model_filename
(
(
std
::
string
(
"model."
)
+
backend_name
).
c_str
());
return
Status
::
Success
;
}
return
Status
::
Success
;
}
Status
ValidateModelIOConfig
(
const
inference
::
ModelConfig
&
config
)
{
Status
status
;
for
(
const
auto
&
io
:
config
.
input
())
{
status
=
ValidateModelInput
(
io
,
config
.
max_batch_size
(),
config
.
platform
());
if
(
!
status
.
IsOk
())
{
return
Status
(
status
.
StatusCode
(),
status
.
Message
()
+
" for "
+
config
.
name
());
}
}
for
(
const
auto
&
io
:
config
.
output
())
{
status
=
ValidateModelOutput
(
io
,
config
.
max_batch_size
(),
config
.
platform
());
if
(
!
status
.
IsOk
())
{
return
Status
(
status
.
StatusCode
(),
status
.
Message
()
+
" for "
+
config
.
name
());
}
}
status
=
ValidateBatchIO
(
config
);
if
(
!
status
.
IsOk
())
{
return
Status
(
status
.
StatusCode
(),
status
.
Message
()
+
" for "
+
config
.
name
());
}
return
Status
::
Success
;
}
Status
ValidateBatchIO
(
const
inference
::
ModelConfig
&
config
)
{
std
::
set
<
std
::
string
>
input_names
;
std
::
set
<
std
::
string
>
output_names
;
for
(
const
auto
&
io
:
config
.
input
())
{
input_names
.
emplace
(
io
.
name
());
}
for
(
const
auto
&
io
:
config
.
output
())
{
output_names
.
emplace
(
io
.
name
());
}
for
(
const
auto
&
batch_io
:
config
.
batch_input
())
{
switch
(
batch_io
.
kind
())
{
case
inference
::
BatchInput
::
BATCH_ELEMENT_COUNT
:
case
inference
::
BatchInput
::
BATCH_ACCUMULATED_ELEMENT_COUNT
:
case
inference
::
BatchInput
::
BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO
:
case
inference
::
BatchInput
::
BATCH_MAX_ELEMENT_COUNT_AS_SHAPE
:
case
inference
::
BatchInput
::
BATCH_ITEM_SHAPE
:
case
inference
::
BatchInput
::
BATCH_ITEM_SHAPE_FLATTEN
:
{
if
(
batch_io
.
source_input_size
()
!=
1
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"batch input kind '"
+
inference
::
BatchInput
::
Kind_Name
(
batch_io
.
kind
())
+
"' expects 1 source input, got "
+
std
::
to_string
(
batch_io
.
source_input_size
()));
}
break
;
}
default:
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"unknown batch input kind '"
+
inference
::
BatchInput
::
Kind_Name
(
batch_io
.
kind
())
+
"'"
);
}
if
((
batch_io
.
data_type
()
!=
inference
::
DataType
::
TYPE_INT32
)
&&
(
batch_io
.
data_type
()
!=
inference
::
DataType
::
TYPE_FP32
))
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"batch input data type must be TYPE_INT32 or TYPE_FP32"
);
}
for
(
const
auto
&
source_name
:
batch_io
.
source_input
())
{
if
(
input_names
.
find
(
source_name
)
==
input_names
.
end
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"unknown source input name '"
+
source_name
+
"'"
);
}
}
}
for
(
const
auto
&
batch_io
:
config
.
batch_output
())
{
switch
(
batch_io
.
kind
())
{
case
inference
::
BatchOutput
::
BATCH_SCATTER_WITH_INPUT_SHAPE
:
{
if
(
batch_io
.
source_input_size
()
!=
1
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"batch output kind '"
+
inference
::
BatchOutput
::
Kind_Name
(
batch_io
.
kind
())
+
"' expects 1 source input, got "
+
std
::
to_string
(
batch_io
.
source_input_size
()));
}
break
;
}
default:
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"unknown batch output kind '"
+
inference
::
BatchOutput
::
Kind_Name
(
batch_io
.
kind
())
+
"'"
);
}
for
(
const
auto
&
source_name
:
batch_io
.
source_input
())
{
if
(
input_names
.
find
(
source_name
)
==
input_names
.
end
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"unknown source input name '"
+
source_name
+
"'"
);
}
}
std
::
set
<
std
::
string
>
target_names
;
for
(
const
auto
&
target_name
:
batch_io
.
target_name
())
{
if
(
output_names
.
find
(
target_name
)
==
output_names
.
end
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"unknown target output name '"
+
target_name
+
"'"
);
}
if
(
target_names
.
emplace
(
target_name
).
second
==
false
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"target output name '"
+
target_name
+
"' can only be specified once"
);
}
}
}
return
Status
::
Success
;
}
Status
ValidateModelConfig
(
const
inference
::
ModelConfig
&
config
,
const
double
min_compute_capability
)
{
if
(
config
.
name
().
empty
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"model configuration must specify 'name'"
);
}
if
(
config
.
backend
().
empty
())
{
// Expect backend is not empty unless it is ensemble platform.
#ifdef TRITON_ENABLE_ENSEMBLE
if
(
config
.
platform
()
!=
kEnsemblePlatform
)
#endif // TRITON_ENABLE_ENSEMBLE
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"unexpected platform type '"
+
config
.
platform
()
+
"' for "
+
config
.
name
());
}
#ifdef TRITON_ENABLE_ENSEMBLE
else
if
(
config
.
platform
()
==
kEnsemblePlatform
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"Ensemble model '"
+
config
.
name
()
+
"' must have platform type '"
+
config
.
platform
()
+
"' and empty backend type"
);
}
#endif // TRITON_ENABLE_ENSEMBLE
if
(
config
.
platform
().
empty
()
&&
config
.
backend
().
empty
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"must specify 'platform' or 'backend' for '"
+
config
.
name
()
+
"'"
);
}
// Ensure both platform and backend are referring to known backend,
// or both referring to unknown backend for user-provided backend.
if
(
GetBackendTypeFromPlatform
(
config
.
platform
())
!=
GetBackendType
(
config
.
backend
()))
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"unexpected 'platform' and 'backend' pair, got:"
+
config
.
platform
()
+
", "
+
config
.
backend
());
}
if
(
config
.
max_batch_size
()
<
0
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"'max_batch_size' must be non-negative value for "
+
config
.
name
());
}
if
(
!
config
.
has_version_policy
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"must specify 'version policy' for "
+
config
.
name
());
}
// If dynamic batching is specified make sure the preferred batch
// sizes are positive and don't exceed maximum batch size.
if
(
config
.
has_dynamic_batching
())
{
for
(
const
auto
size
:
config
.
dynamic_batching
().
preferred_batch_size
())
{
if
(
size
<=
0
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"dynamic batching preferred size must be positive for "
+
config
.
name
());
}
if
(
size
>
config
.
max_batch_size
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"dynamic batching preferred size must be <= max batch size for "
+
config
.
name
());
}
}
// Priority queue is specified
const
auto
priority_levels
=
config
.
dynamic_batching
().
priority_levels
();
if
(
priority_levels
!=
0
)
{
if
((
config
.
dynamic_batching
().
default_priority_level
()
==
0
)
||
(
config
.
dynamic_batching
().
default_priority_level
()
>
priority_levels
))
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"default priority level must be in range [1, "
+
std
::
to_string
(
priority_levels
)
+
"] for "
+
config
.
name
());
}
for
(
const
auto
&
queue_policy
:
config
.
dynamic_batching
().
priority_queue_policy
())
{
if
((
queue_policy
.
first
==
0
)
||
(
queue_policy
.
first
>
priority_levels
))
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"priority queue policy must have priority level in range [1, "
+
std
::
to_string
(
priority_levels
)
+
"] for "
+
config
.
name
());
}
}
}
// preserve ordering option will conflict with priorities and delay policy
if
(
config
.
dynamic_batching
().
preserve_ordering
())
{
if
(
priority_levels
>
1
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"Only one priority level is allowed when 'preserve_ordering' is "
"true for "
+
config
.
name
());
}
const
auto
&
default_policy
=
config
.
dynamic_batching
().
default_queue_policy
();
if
((
default_policy
.
default_timeout_microseconds
()
!=
0
)
&&
(
default_policy
.
timeout_action
()
==
inference
::
ModelQueuePolicy
::
DELAY
))
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"Queue policy can not have DELAY as timeout action when "
"'preserve_ordering' is true for "
+
config
.
name
());
}
// Also need to check policy in 'priority_queue_policy'
// for single priority case
for
(
const
auto
&
policy
:
config
.
dynamic_batching
().
priority_queue_policy
())
{
if
((
policy
.
second
.
default_timeout_microseconds
()
!=
0
)
&&
(
policy
.
second
.
timeout_action
()
==
inference
::
ModelQueuePolicy
::
DELAY
))
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"Queue policy can not have DELAY as timeout action when "
"'preserve_ordering' is true for "
+
config
.
name
());
}
}
}
}
// If sequence batching is specified make sure the control is
// specified correctly.
if
(
config
.
has_sequence_batching
())
{
const
auto
&
batcher
=
config
.
sequence_batching
();
// Check boolean controls...
std
::
string
tensor_name
;
RETURN_IF_ERROR
(
GetBooleanSequenceControlProperties
(
batcher
,
config
.
name
(),
inference
::
ModelSequenceBatching
::
Control
::
CONTROL_SEQUENCE_START
,
false
/* required */
,
&
tensor_name
,
nullptr
,
nullptr
,
nullptr
,
nullptr
,
nullptr
,
nullptr
,
nullptr
));
RETURN_IF_ERROR
(
GetBooleanSequenceControlProperties
(
batcher
,
config
.
name
(),
inference
::
ModelSequenceBatching
::
Control
::
CONTROL_SEQUENCE_END
,
false
/* required */
,
&
tensor_name
,
nullptr
,
nullptr
,
nullptr
,
nullptr
,
nullptr
,
nullptr
,
nullptr
));
RETURN_IF_ERROR
(
GetBooleanSequenceControlProperties
(
batcher
,
config
.
name
(),
inference
::
ModelSequenceBatching
::
Control
::
CONTROL_SEQUENCE_READY
,
false
/* required */
,
&
tensor_name
,
nullptr
,
nullptr
,
nullptr
,
nullptr
,
nullptr
,
nullptr
,
nullptr
));
// Check CORRID control and make sure it is one of the allowed types.
inference
::
DataType
tensor_datatype
;
RETURN_IF_ERROR
(
GetTypedSequenceControlProperties
(
batcher
,
config
.
name
(),
inference
::
ModelSequenceBatching
::
Control
::
CONTROL_SEQUENCE_CORRID
,
false
/* required */
,
&
tensor_name
,
&
tensor_datatype
));
if
(
!
tensor_name
.
empty
())
{
if
((
tensor_datatype
!=
inference
::
DataType
::
TYPE_UINT64
)
&&
(
tensor_datatype
!=
inference
::
DataType
::
TYPE_INT64
)
&&
(
tensor_datatype
!=
inference
::
DataType
::
TYPE_UINT32
)
&&
(
tensor_datatype
!=
inference
::
DataType
::
TYPE_INT32
)
&&
(
tensor_datatype
!=
inference
::
DataType
::
TYPE_STRING
))
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"unexpected data type for control "
+
inference
::
ModelSequenceBatching_Control_Kind_Name
(
inference
::
ModelSequenceBatching
::
Control
::
CONTROL_SEQUENCE_CORRID
)
+
" for "
+
config
.
name
()
+
". Allowed data types are TYPE_UINT64, TYPE_INT64, "
"TYPE_UINT32, "
"TYPE_INT32 and TYPE_STRING"
);
}
}
// If oldest-first strategy is enabled make sure the preferred
// batch sizes are positive and don't exceed maximum batch size.
if
(
config
.
sequence_batching
().
has_oldest
())
{
for
(
const
auto
size
:
config
.
sequence_batching
().
oldest
().
preferred_batch_size
())
{
if
(
size
<=
0
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"sequence batching preferred batch size must be positive for "
+
config
.
name
());
}
if
(
size
>
config
.
max_batch_size
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"sequence batching preferred batch size must be <= max batch "
"size for "
+
config
.
name
());
}
}
}
// If direct strategy is enabled make sure the minimum slot utilization is
// in range (0.0, 1.0]
if
(
config
.
sequence_batching
().
has_direct
())
{
if
((
config
.
sequence_batching
().
direct
().
minimum_slot_utilization
()
<
0.0
)
||
(
config
.
sequence_batching
().
direct
().
minimum_slot_utilization
()
>
1.0
))
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"sequence batching minimum slot utilization must be in range "
"(0.0, 1.0] for "
+
config
.
name
());
}
}
}
// If ensemble scheduling is specified, validate it. Otherwise,
// must validate platform and instance_group
if
(
config
.
has_ensemble_scheduling
())
{
#ifdef TRITON_ENABLE_ENSEMBLE
RETURN_IF_ERROR
(
ValidateEnsembleSchedulingConfig
(
config
));
#else
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"ensemble scheduling not supported"
);
#endif // TRITON_ENABLE_ENSEMBLE
}
#ifdef TRITON_ENABLE_ENSEMBLE
else
if
(
config
.
platform
()
==
kEnsemblePlatform
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"ensemble scheduling must be set for ensemble "
+
config
.
name
()
+
" whose platform is "
+
kEnsemblePlatform
);
}
#endif // TRITON_ENABLE_ENSEMBLE
// FIXME: DLIS-3916 - Response Cache does not yet support decoupled models
if
(
config
.
model_transaction_policy
().
decoupled
()
&&
config
.
response_cache
().
enable
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"Response Cache does not currently support model "
+
config
.
name
()
+
" with 'decoupled' transaction policy. Please disable the response"
" cache."
);
}
return
Status
::
Success
;
}
Status
ValidateInstanceGroup
(
const
inference
::
ModelConfig
&
config
,
const
double
min_compute_capability
)
{
// Instance group setting doesn't apply to ensemble
if
(
config
.
has_ensemble_scheduling
())
{
return
Status
::
Success
;
}
if
(
config
.
instance_group
().
size
()
==
0
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"must specify one or more 'instance group's for "
+
config
.
name
());
}
// Make sure KIND_GPU instance group specifies at least one GPU and
// doesn't specify a non-existent GPU. Make sure non-KIND_GPU does
// not specify any GPUs.
#ifdef TRITON_ENABLE_GPU
std
::
set
<
int
>
supported_gpus
;
Status
status
=
GetSupportedGPUs
(
&
supported_gpus
,
min_compute_capability
);
if
(
!
status
.
IsOk
())
{
return
status
;
}
#endif // TRITON_ENABLE_GPU
for
(
const
auto
&
group
:
config
.
instance_group
())
{
if
(
group
.
kind
()
==
inference
::
ModelInstanceGroup
::
KIND_MODEL
)
{
if
(
group
.
gpus
().
size
()
>
0
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"instance group "
+
group
.
name
()
+
" of model "
+
config
.
name
()
+
" has kind KIND_MODEL but specifies one or more GPUs"
);
}
}
else
if
(
group
.
kind
()
==
inference
::
ModelInstanceGroup
::
KIND_GPU
)
{
#if !defined(TRITON_ENABLE_GPU) && !defined(TRITON_ENABLE_MALI_GPU)
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"instance group "
+
group
.
name
()
+
" of model "
+
config
.
name
()
+
" has kind KIND_GPU but server does not support GPUs"
);
#elif defined(TRITON_ENABLE_GPU)
if
(
group
.
gpus
().
size
()
==
0
)
{
if
(
supported_gpus
.
size
()
==
0
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"instance group "
+
group
.
name
()
+
" of model "
+
config
.
name
()
+
" has kind KIND_GPU but no GPUs are available"
);
}
else
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"instance group "
+
group
.
name
()
+
" of model "
+
config
.
name
()
+
" has kind KIND_GPU but specifies no GPUs"
);
}
}
for
(
const
int32_t
gid
:
group
.
gpus
())
{
if
(
supported_gpus
.
find
(
gid
)
==
supported_gpus
.
end
())
{
std
::
string
supported_gpus_str
;
for
(
const
auto
&
cc
:
supported_gpus
)
{
if
(
!
supported_gpus_str
.
empty
())
{
supported_gpus_str
+=
", "
;
}
supported_gpus_str
+=
std
::
to_string
(
cc
);
}
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"instance group "
+
group
.
name
()
+
" of model "
+
config
.
name
()
+
" specifies invalid or unsupported gpu id "
+
std
::
to_string
(
gid
)
+
". GPUs with at least the minimum required CUDA compute "
"compatibility of "
+
std
::
to_string
(
min_compute_capability
)
+
" are: "
+
supported_gpus_str
);
}
}
#endif // ! TRITON_ENABLE_GPU && ! TRITON_ENABLE_MALI_GPU
}
else
if
(
group
.
kind
()
==
inference
::
ModelInstanceGroup
::
KIND_CPU
)
{
if
(
group
.
gpus
().
size
()
>
0
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"instance group "
+
group
.
name
()
+
" of model "
+
config
.
name
()
+
" has kind KIND_CPU but specifies one or more GPUs"
);
}
}
else
{
return
Status
(
Status
::
Code
::
INTERNAL
,
"instance group "
+
group
.
name
()
+
" of model "
+
config
.
name
()
+
" has unexpected kind KIND_AUTO"
);
}
if
((
config
.
platform
()
!=
kTensorRTPlanPlatform
)
&&
!
group
.
profile
().
empty
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"instance group "
+
group
.
name
()
+
" of model "
+
config
.
name
()
+
" and platform "
+
config
.
platform
()
+
"specifies profile field which is only supported for "
"TensorRT models"
);
}
else
if
(
!
group
.
profile
().
empty
())
{
for
(
const
auto
&
profile
:
group
.
profile
())
{
int
profile_index
;
RETURN_IF_ERROR
(
GetProfileIndex
(
profile
,
&
profile_index
));
if
(
profile_index
<
0
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"instance group "
+
group
.
name
()
+
" of model "
+
config
.
name
()
+
" and platform "
+
config
.
platform
()
+
" specifies invalid profile "
+
profile
+
". The field should contain the string representation of a "
"non-negative integer."
);
}
}
}
}
return
Status
::
Success
;
}
Status
ValidateModelInput
(
const
inference
::
ModelInput
&
io
,
int32_t
max_batch_size
,
const
std
::
string
&
platform
)
{
RETURN_IF_ERROR
(
ValidateIOShape
(
io
,
max_batch_size
,
"model input "
));
if
(((
io
.
format
()
==
inference
::
ModelInput
::
FORMAT_NHWC
)
||
(
io
.
format
()
==
inference
::
ModelInput
::
FORMAT_NCHW
))
&&
(
io
.
dims_size
()
!=
3
))
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"model input NHWC/NCHW require 3 dims"
);
}
if
((
platform
!=
kTensorRTPlanPlatform
)
&&
io
.
is_shape_tensor
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"shape tensors are only supported for TensorRT platform"
);
}
return
Status
::
Success
;
}
Status
CheckAllowedModelInput
(
const
inference
::
ModelInput
&
io
,
const
std
::
set
<
std
::
string
>&
allowed
)
{
if
(
allowed
.
find
(
io
.
name
())
==
allowed
.
end
())
{
std
::
string
astr
;
for
(
const
auto
&
a
:
allowed
)
{
if
(
!
astr
.
empty
())
{
astr
.
append
(
", "
);
}
astr
.
append
(
a
);
}
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"unexpected inference input '"
+
io
.
name
()
+
"', allowed inputs are: "
+
astr
);
}
return
Status
::
Success
;
}
Status
ValidateModelOutput
(
const
inference
::
ModelOutput
&
io
,
int32_t
max_batch_size
,
const
std
::
string
&
platform
)
{
RETURN_IF_ERROR
(
ValidateIOShape
(
io
,
max_batch_size
,
"model output "
));
if
((
platform
!=
kTensorRTPlanPlatform
)
&&
io
.
is_shape_tensor
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"shape tensors are only supported for TensorRT platform"
);
}
return
Status
::
Success
;
}
Status
CheckAllowedModelOutput
(
const
inference
::
ModelOutput
&
io
,
const
std
::
set
<
std
::
string
>&
allowed
)
{
if
(
allowed
.
find
(
io
.
name
())
==
allowed
.
end
())
{
std
::
string
astr
;
for
(
const
auto
&
a
:
allowed
)
{
if
(
!
astr
.
empty
())
{
astr
.
append
(
", "
);
}
astr
.
append
(
a
);
}
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"unexpected inference output '"
+
io
.
name
()
+
"', allowed outputs are: "
+
astr
);
}
return
Status
::
Success
;
}
Status
ParseBoolParameter
(
const
std
::
string
&
key
,
std
::
string
value
,
bool
*
parsed_value
)
{
std
::
transform
(
value
.
begin
(),
value
.
end
(),
value
.
begin
(),
[](
unsigned
char
c
)
{
return
std
::
tolower
(
c
);
});
if
((
value
==
"true"
)
||
(
value
==
"1"
))
{
*
parsed_value
=
true
;
}
else
if
((
value
==
"false"
)
||
(
value
==
"0"
))
{
*
parsed_value
=
false
;
}
else
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"failed to convert "
+
key
+
" '"
+
value
+
"' to boolean value"
);
}
return
Status
::
Success
;
}
Status
ParseLongLongParameter
(
const
std
::
string
&
key
,
const
std
::
string
&
value
,
int64_t
*
parsed_value
)
{
try
{
*
parsed_value
=
std
::
stoll
(
value
);
}
catch
(
const
std
::
invalid_argument
&
ia
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"failed to convert "
+
key
+
" '"
+
value
+
"' to integral number"
);
}
return
Status
::
Success
;
}
Status
GetProfileIndex
(
const
std
::
string
&
profile_name
,
int
*
profile_index
)
{
if
(
profile_name
.
empty
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"profile name must not be empty"
);
}
try
{
*
profile_index
=
stoi
(
profile_name
);
}
catch
(
const
std
::
invalid_argument
&
ia
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"unable to parse '"
+
profile_name
+
"': "
+
ia
.
what
());
}
return
Status
::
Success
;
}
namespace
{
Status
CollectInt64Fields
(
google
::
protobuf
::
Message
*
message
,
const
std
::
string
&
prefix
,
std
::
set
<
std
::
string
>*
int64_fields
)
{
const
google
::
protobuf
::
Descriptor
*
desc
=
message
->
GetDescriptor
();
const
google
::
protobuf
::
Reflection
*
refl
=
message
->
GetReflection
();
for
(
int
i
=
0
;
i
<
desc
->
field_count
();
++
i
)
{
const
google
::
protobuf
::
FieldDescriptor
*
field
=
desc
->
field
(
i
);
const
std
::
string
fullname
=
prefix
+
"::"
+
field
->
name
();
switch
(
field
->
type
())
{
case
google
::
protobuf
::
FieldDescriptor
::
TYPE_MESSAGE
:
{
if
(
field
->
is_repeated
())
{
int
rsize
=
refl
->
FieldSize
(
*
message
,
field
);
if
(
rsize
==
0
)
{
refl
->
AddMessage
(
message
,
field
);
}
rsize
=
refl
->
FieldSize
(
*
message
,
field
);
for
(
int
r
=
0
;
r
<
rsize
;
++
r
)
{
RETURN_IF_ERROR
(
CollectInt64Fields
(
refl
->
MutableRepeatedMessage
(
message
,
field
,
r
),
fullname
,
int64_fields
));
}
}
else
{
RETURN_IF_ERROR
(
CollectInt64Fields
(
refl
->
MutableMessage
(
message
,
field
),
fullname
,
int64_fields
));
}
}
break
;
case
google
::
protobuf
::
FieldDescriptor
::
TYPE_INT64
:
case
google
::
protobuf
::
FieldDescriptor
::
TYPE_UINT64
:
case
google
::
protobuf
::
FieldDescriptor
::
TYPE_SINT64
:
case
google
::
protobuf
::
FieldDescriptor
::
TYPE_FIXED64
:
case
google
::
protobuf
::
FieldDescriptor
::
TYPE_SFIXED64
:
int64_fields
->
insert
(
fullname
);
break
;
default:
break
;
}
}
return
Status
::
Success
;
}
Status
ValidateModelConfigInt64
()
{
// Must initialize a dummy ModelConfig so that all fields are
// visited.
inference
::
ModelConfig
config
;
std
::
set
<
std
::
string
>
int64_fields
;
RETURN_IF_ERROR
(
CollectInt64Fields
(
&
config
,
"ModelConfig"
,
&
int64_fields
));
LOG_VERBOSE
(
1
)
<<
"ModelConfig 64-bit fields:"
;
for
(
const
auto
&
f
:
int64_fields
)
{
LOG_VERBOSE
(
1
)
<<
"
\t
"
<<
f
;
}
// We expect to find exactly the following fields. If we get an
// error from this code ModelConfig has added or removed a 64-bit
// field and we need to adjust here and in ModelConfigToJson below.
std
::
set
<
std
::
string
>
expected
{
"ModelConfig::input::dims"
,
"ModelConfig::input::reshape::shape"
,
"ModelConfig::output::dims"
,
"ModelConfig::output::reshape::shape"
,
"ModelConfig::version_policy::specific::versions"
,
"ModelConfig::dynamic_batching::max_queue_delay_microseconds"
,
"ModelConfig::dynamic_batching::default_queue_policy::default_timeout_"
"microseconds"
,
"ModelConfig::dynamic_batching::priority_queue_policy::value::default_"
"timeout_microseconds"
,
"ModelConfig::sequence_batching::direct::max_queue_delay_microseconds"
,
"ModelConfig::sequence_batching::state::dims"
,
"ModelConfig::sequence_batching::state::initial_state::dims"
,
"ModelConfig::sequence_batching::oldest::max_queue_delay_microseconds"
,
"ModelConfig::sequence_batching::max_sequence_idle_microseconds"
,
"ModelConfig::ensemble_scheduling::step::model_version"
,
"ModelConfig::model_warmup::inputs::value::dims"
,
"ModelConfig::optimization::cuda::graph_spec::input::value::dim"
,
"ModelConfig::optimization::cuda::graph_spec::graph_lower_bound::input::"
"value::dim"
,
"ModelConfig::instance_group::secondary_devices::device_id"
};
if
(
int64_fields
!=
expected
)
{
return
Status
(
Status
::
Code
::
INTERNAL
,
"ModelConfig 64-bit field needs update"
);
}
return
Status
::
Success
;
}
Status
FixInt
(
triton
::
common
::
TritonJson
::
Value
&
document
,
triton
::
common
::
TritonJson
::
Value
&
io
,
const
std
::
string
&
name
)
{
triton
::
common
::
TritonJson
::
Value
str_value
;
if
(
!
io
.
Find
(
name
.
c_str
(),
&
str_value
))
{
return
Status
::
Success
;
}
std
::
string
str
;
RETURN_IF_ERROR
(
str_value
.
AsString
(
&
str
));
int64_t
d
;
try
{
d
=
std
::
atoll
(
str
.
c_str
());
}
catch
(...)
{
return
Status
(
Status
::
Code
::
INTERNAL
,
(
std
::
string
(
"unable to convert '"
)
+
str
+
"' to integer"
));
}
str_value
.
SetInt
(
d
);
return
Status
::
Success
;
}
Status
FixIntArray
(
triton
::
common
::
TritonJson
::
Value
&
document
,
triton
::
common
::
TritonJson
::
Value
&
io
,
const
std
::
string
&
name
)
{
triton
::
common
::
TritonJson
::
Value
fixed_shape_array
(
document
,
triton
::
common
::
TritonJson
::
ValueType
::
ARRAY
);
if
(
!
io
.
Find
(
name
.
c_str
()))
{
return
Status
::
Success
;
}
triton
::
common
::
TritonJson
::
Value
shape_array
;
RETURN_IF_ERROR
(
io
.
MemberAsArray
(
name
.
c_str
(),
&
shape_array
));
for
(
size_t
i
=
0
;
i
<
shape_array
.
ArraySize
();
++
i
)
{
std
::
string
str
;
RETURN_IF_ERROR
(
shape_array
.
IndexAsString
(
i
,
&
str
));
int64_t
d
;
try
{
d
=
std
::
atoll
(
str
.
c_str
());
}
catch
(...)
{
return
Status
(
Status
::
Code
::
INTERNAL
,
(
std
::
string
(
"unable to convert '"
)
+
str
+
"' to integer"
));
}
RETURN_IF_ERROR
(
fixed_shape_array
.
AppendInt
(
d
));
}
shape_array
.
Swap
(
fixed_shape_array
);
fixed_shape_array
.
Release
();
return
Status
::
Success
;
}
Status
FixObjectArray
(
triton
::
common
::
TritonJson
::
Value
&
document
,
triton
::
common
::
TritonJson
::
Value
&
arr
,
const
std
::
string
&
name
)
{
for
(
size_t
i
=
0
;
i
<
arr
.
ArraySize
();
++
i
)
{
triton
::
common
::
TritonJson
::
Value
obj
;
RETURN_IF_ERROR
(
arr
.
IndexAsObject
(
i
,
&
obj
));
RETURN_IF_ERROR
(
FixInt
(
document
,
obj
,
name
));
}
return
Status
::
Success
;
}
}
// namespace
Status
ModelConfigToJson
(
const
inference
::
ModelConfig
&
config
,
const
uint32_t
config_version
,
std
::
string
*
json_str
)
{
// Currently only support 'config_version' 1, which is the json
// representation of the ModelConfig protobuf with the int64 fields
// fixes to be actual numbers instead of the string madness done by
// protobuf.
if
(
config_version
!=
1
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
std
::
string
(
"model configuration version "
)
+
std
::
to_string
(
config_version
)
+
" not supported, supported versions are: 1"
);
}
// Config will have 0 byte size if all fields are with default value,
// in other word the config is empty.
if
(
config
.
ByteSizeLong
()
==
0
)
{
json_str
->
clear
();
return
Status
::
Success
;
}
std
::
string
config_json_str
;
::
google
::
protobuf
::
util
::
JsonPrintOptions
options
;
options
.
preserve_proto_field_names
=
true
;
options
.
always_print_primitive_fields
=
true
;
::
google
::
protobuf
::
util
::
MessageToJsonString
(
config
,
&
config_json_str
,
options
);
// We need to verify that every field 64-bit field in the
// ModelConfig protobuf is being handled. We hardcode the known
// fields and check just once to make sure everything has been
// handled. We could have this check in a separately compiled CI
// test but it is convenient to keep it here close to the code below
// that actually fixes the 64-bit fields.
{
static
std
::
once_flag
fonce
;
Status
status
=
Status
::
Success
;
std
::
call_once
(
fonce
,
[
&
status
]
{
status
=
ValidateModelConfigInt64
();
});
RETURN_IF_ERROR
(
status
);
}
// In the json produced by protobuf, int64 and uint64 values are
// represented as strings. Protobuf doesn't provide an option to
// disable this (sigh) so we need to fix it up here as we want the
// json representation of the config to be reasonable json...
triton
::
common
::
TritonJson
::
Value
config_json
;
config_json
.
Parse
(
config_json_str
);
// Fix input::dims, input::reshape::shape, output::dims,
// output::reshape::shape
for
(
std
::
string
name
:
{
"input"
,
"output"
})
{
triton
::
common
::
TritonJson
::
Value
ios
;
RETURN_IF_ERROR
(
config_json
.
MemberAsArray
(
name
.
c_str
(),
&
ios
));
for
(
size_t
i
=
0
;
i
<
ios
.
ArraySize
();
++
i
)
{
triton
::
common
::
TritonJson
::
Value
io
;
RETURN_IF_ERROR
(
ios
.
IndexAsObject
(
i
,
&
io
));
RETURN_IF_ERROR
(
FixIntArray
(
config_json
,
io
,
"dims"
));
triton
::
common
::
TritonJson
::
Value
reshape
;
if
(
io
.
Find
(
"reshape"
,
&
reshape
))
{
RETURN_IF_ERROR
(
FixIntArray
(
config_json
,
reshape
,
"shape"
));
}
}
}
// Fix version_policy::specific::versions
{
triton
::
common
::
TritonJson
::
Value
vp
;
if
(
config_json
.
Find
(
"version_policy"
,
&
vp
))
{
triton
::
common
::
TritonJson
::
Value
specific
;
if
(
vp
.
Find
(
"specific"
,
&
specific
))
{
RETURN_IF_ERROR
(
FixIntArray
(
config_json
,
specific
,
"versions"
));
}
}
}
// Fix dynamic_batching::max_queue_delay_microseconds,
// dynamic_batching::default_queue_policy::default_timeout_microseconds,
// dynamic_batching::priority_queue_policy::value::default_timeout_microseconds
{
triton
::
common
::
TritonJson
::
Value
db
;
if
(
config_json
.
Find
(
"dynamic_batching"
,
&
db
))
{
RETURN_IF_ERROR
(
FixInt
(
config_json
,
db
,
"max_queue_delay_microseconds"
));
triton
::
common
::
TritonJson
::
Value
dqp
;
if
(
db
.
Find
(
"default_queue_policy"
,
&
dqp
))
{
RETURN_IF_ERROR
(
FixInt
(
config_json
,
dqp
,
"default_timeout_microseconds"
));
}
triton
::
common
::
TritonJson
::
Value
pqp
;
if
(
db
.
Find
(
"priority_queue_policy"
,
&
pqp
))
{
// Iterate over each member in 'pqp' and fix...
std
::
vector
<
std
::
string
>
members
;
RETURN_IF_ERROR
(
pqp
.
Members
(
&
members
));
for
(
const
auto
&
m
:
members
)
{
triton
::
common
::
TritonJson
::
Value
el
;
RETURN_IF_ERROR
(
pqp
.
MemberAsObject
(
m
.
c_str
(),
&
el
));
RETURN_IF_ERROR
(
FixInt
(
config_json
,
el
,
"default_timeout_microseconds"
));
}
}
}
}
// Fix sequence_batching::oldest::max_queue_delay_microseconds,
// sequence_batching::direct::max_queue_delay_microseconds,
// sequence_batching::max_sequence_idle_microseconds
{
triton
::
common
::
TritonJson
::
Value
sb
;
if
(
config_json
.
Find
(
"sequence_batching"
,
&
sb
))
{
RETURN_IF_ERROR
(
FixInt
(
config_json
,
sb
,
"max_sequence_idle_microseconds"
));
triton
::
common
::
TritonJson
::
Value
oldest
;
if
(
sb
.
Find
(
"oldest"
,
&
oldest
))
{
RETURN_IF_ERROR
(
FixInt
(
config_json
,
oldest
,
"max_queue_delay_microseconds"
));
}
triton
::
common
::
TritonJson
::
Value
direct
;
if
(
sb
.
Find
(
"direct"
,
&
direct
))
{
RETURN_IF_ERROR
(
FixInt
(
config_json
,
direct
,
"max_queue_delay_microseconds"
));
}
triton
::
common
::
TritonJson
::
Value
states
;
if
(
sb
.
Find
(
"state"
,
&
states
))
{
for
(
size_t
i
=
0
;
i
<
states
.
ArraySize
();
++
i
)
{
triton
::
common
::
TritonJson
::
Value
state
;
RETURN_IF_ERROR
(
states
.
IndexAsObject
(
i
,
&
state
));
RETURN_IF_ERROR
(
FixIntArray
(
config_json
,
state
,
"dims"
));
triton
::
common
::
TritonJson
::
Value
initial_state
;
if
(
sb
.
Find
(
"initial_state"
,
&
initial_state
))
{
RETURN_IF_ERROR
(
FixIntArray
(
config_json
,
initial_state
,
"dims"
));
}
}
}
}
}
// Fix ensemble_scheduling::step::model_version.
{
triton
::
common
::
TritonJson
::
Value
ens
;
if
(
config_json
.
Find
(
"ensemble_scheduling"
,
&
ens
))
{
triton
::
common
::
TritonJson
::
Value
step
;
if
(
ens
.
Find
(
"step"
,
&
step
))
{
RETURN_IF_ERROR
(
FixObjectArray
(
config_json
,
step
,
"model_version"
));
}
}
}
// Fix model_warmup::inputs::value::dims.
{
triton
::
common
::
TritonJson
::
Value
warmups
;
if
(
config_json
.
Find
(
"model_warmup"
,
&
warmups
))
{
for
(
size_t
i
=
0
;
i
<
warmups
.
ArraySize
();
++
i
)
{
triton
::
common
::
TritonJson
::
Value
warmup
;
RETURN_IF_ERROR
(
warmups
.
IndexAsObject
(
i
,
&
warmup
));
triton
::
common
::
TritonJson
::
Value
inputs
;
if
(
warmup
.
Find
(
"inputs"
,
&
inputs
))
{
std
::
vector
<
std
::
string
>
members
;
RETURN_IF_ERROR
(
inputs
.
Members
(
&
members
));
for
(
const
auto
&
m
:
members
)
{
triton
::
common
::
TritonJson
::
Value
input
;
RETURN_IF_ERROR
(
inputs
.
MemberAsObject
(
m
.
c_str
(),
&
input
));
RETURN_IF_ERROR
(
FixIntArray
(
config_json
,
input
,
"dims"
));
}
}
}
}
}
// Convert fixed json back the string...
triton
::
common
::
TritonJson
::
WriteBuffer
buffer
;
RETURN_IF_ERROR
(
config_json
.
Write
(
&
buffer
));
*
json_str
=
std
::
move
(
buffer
.
MutableContents
());
return
Status
::
Success
;
}
Status
JsonToModelConfig
(
const
std
::
string
&
json_config
,
const
uint32_t
config_version
,
inference
::
ModelConfig
*
protobuf_config
)
{
// Currently only support 'config_version' 1, which is the json
// representation of the ModelConfig protobuf matches the representation in
// ModelConfigToJson().
if
(
config_version
!=
1
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
std
::
string
(
"model configuration version "
)
+
std
::
to_string
(
config_version
)
+
" not supported, supported versions are: 1"
);
}
::
google
::
protobuf
::
util
::
JsonParseOptions
options
;
options
.
case_insensitive_enum_parsing
=
true
;
options
.
ignore_unknown_fields
=
false
;
auto
err
=
::
google
::
protobuf
::
util
::
JsonStringToMessage
(
json_config
,
protobuf_config
,
options
);
if
(
!
err
.
ok
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
std
::
string
(
err
.
message
()));
}
return
Status
::
Success
;
}
BackendType
GetBackendTypeFromPlatform
(
const
std
::
string
&
platform_name
)
{
if
((
platform_name
==
kTensorFlowGraphDefPlatform
)
||
(
platform_name
==
kTensorFlowSavedModelPlatform
))
{
return
BackendType
::
BACKEND_TYPE_TENSORFLOW
;
}
if
(
platform_name
==
kTensorRTPlanPlatform
)
{
return
BackendType
::
BACKEND_TYPE_TENSORRT
;
}
if
(
platform_name
==
kOnnxRuntimeOnnxPlatform
)
{
return
BackendType
::
BACKEND_TYPE_ONNXRUNTIME
;
}
if
(
platform_name
==
kPyTorchLibTorchPlatform
)
{
return
BackendType
::
BACKEND_TYPE_PYTORCH
;
}
return
BackendType
::
BACKEND_TYPE_UNKNOWN
;
}
/// Get the BackendType value for a backend name.
/// \param backend_name The backend name.
/// \return The BackendType or BackendType::UNKNOWN if the platform string
/// is not recognized.
BackendType
GetBackendType
(
const
std
::
string
&
backend_name
)
{
if
(
backend_name
==
kTensorFlowBackend
)
{
return
BackendType
::
BACKEND_TYPE_TENSORFLOW
;
}
if
(
backend_name
==
kTensorRTBackend
)
{
return
BackendType
::
BACKEND_TYPE_TENSORRT
;
}
if
(
backend_name
==
kOnnxRuntimeBackend
)
{
return
BackendType
::
BACKEND_TYPE_ONNXRUNTIME
;
}
if
(
backend_name
==
kPyTorchBackend
)
{
return
BackendType
::
BACKEND_TYPE_PYTORCH
;
}
return
BackendType
::
BACKEND_TYPE_UNKNOWN
;
}
TRITONSERVER_DataType
DataTypeToTriton
(
const
inference
::
DataType
dtype
)
{
switch
(
dtype
)
{
case
inference
::
DataType
::
TYPE_BOOL
:
return
TRITONSERVER_TYPE_BOOL
;
case
inference
::
DataType
::
TYPE_UINT8
:
return
TRITONSERVER_TYPE_UINT8
;
case
inference
::
DataType
::
TYPE_UINT16
:
return
TRITONSERVER_TYPE_UINT16
;
case
inference
::
DataType
::
TYPE_UINT32
:
return
TRITONSERVER_TYPE_UINT32
;
case
inference
::
DataType
::
TYPE_UINT64
:
return
TRITONSERVER_TYPE_UINT64
;
case
inference
::
DataType
::
TYPE_INT8
:
return
TRITONSERVER_TYPE_INT8
;
case
inference
::
DataType
::
TYPE_INT16
:
return
TRITONSERVER_TYPE_INT16
;
case
inference
::
DataType
::
TYPE_INT32
:
return
TRITONSERVER_TYPE_INT32
;
case
inference
::
DataType
::
TYPE_INT64
:
return
TRITONSERVER_TYPE_INT64
;
case
inference
::
DataType
::
TYPE_FP16
:
return
TRITONSERVER_TYPE_FP16
;
case
inference
::
DataType
::
TYPE_FP32
:
return
TRITONSERVER_TYPE_FP32
;
case
inference
::
DataType
::
TYPE_FP64
:
return
TRITONSERVER_TYPE_FP64
;
case
inference
::
DataType
::
TYPE_STRING
:
return
TRITONSERVER_TYPE_BYTES
;
case
inference
::
DataType
::
TYPE_BF16
:
return
TRITONSERVER_TYPE_BF16
;
default:
break
;
}
return
TRITONSERVER_TYPE_INVALID
;
}
inference
::
DataType
TritonToDataType
(
const
TRITONSERVER_DataType
dtype
)
{
switch
(
dtype
)
{
case
TRITONSERVER_TYPE_BOOL
:
return
inference
::
DataType
::
TYPE_BOOL
;
case
TRITONSERVER_TYPE_UINT8
:
return
inference
::
DataType
::
TYPE_UINT8
;
case
TRITONSERVER_TYPE_UINT16
:
return
inference
::
DataType
::
TYPE_UINT16
;
case
TRITONSERVER_TYPE_UINT32
:
return
inference
::
DataType
::
TYPE_UINT32
;
case
TRITONSERVER_TYPE_UINT64
:
return
inference
::
DataType
::
TYPE_UINT64
;
case
TRITONSERVER_TYPE_INT8
:
return
inference
::
DataType
::
TYPE_INT8
;
case
TRITONSERVER_TYPE_INT16
:
return
inference
::
DataType
::
TYPE_INT16
;
case
TRITONSERVER_TYPE_INT32
:
return
inference
::
DataType
::
TYPE_INT32
;
case
TRITONSERVER_TYPE_INT64
:
return
inference
::
DataType
::
TYPE_INT64
;
case
TRITONSERVER_TYPE_FP16
:
return
inference
::
DataType
::
TYPE_FP16
;
case
TRITONSERVER_TYPE_FP32
:
return
inference
::
DataType
::
TYPE_FP32
;
case
TRITONSERVER_TYPE_FP64
:
return
inference
::
DataType
::
TYPE_FP64
;
case
TRITONSERVER_TYPE_BYTES
:
return
inference
::
DataType
::
TYPE_STRING
;
case
TRITONSERVER_TYPE_BF16
:
return
inference
::
DataType
::
TYPE_BF16
;
default:
break
;
}
return
inference
::
DataType
::
TYPE_INVALID
;
}
}}
// namespace triton::core
3rdparty/core-r22.12/src/model_config_utils.h
deleted
100644 → 0
View file @
d592fbea
// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include "model_config.pb.h"
#include "status.h"
#include "triton/common/model_config.h"
#include "tritonserver_apis.h"
#include "filesystem.h"
namespace
triton
{
namespace
core
{
/// Enumeration for the different backend types.
enum
BackendType
{
BACKEND_TYPE_UNKNOWN
=
0
,
BACKEND_TYPE_TENSORRT
=
1
,
BACKEND_TYPE_TENSORFLOW
=
2
,
BACKEND_TYPE_ONNXRUNTIME
=
3
,
BACKEND_TYPE_PYTORCH
=
4
};
// Get version of a model from the path containing the model
/// definition file.
/// \param path The path to the model definition file.
/// \param version Returns the version.
/// \return The error status.
Status
GetModelVersionFromPath
(
const
std
::
string
&
path
,
int64_t
*
version
);
/// Get the tensor name, false value, and true value for a boolean
/// sequence batcher control kind. If 'required' is true then must
/// find a tensor for the control. If 'required' is false, return
/// 'tensor_name' as empty-string if the control is not mapped to any
/// tensor.
Status
GetBooleanSequenceControlProperties
(
const
inference
::
ModelSequenceBatching
&
batcher
,
const
std
::
string
&
model_name
,
const
inference
::
ModelSequenceBatching
::
Control
::
Kind
control_kind
,
const
bool
required
,
std
::
string
*
tensor_name
,
inference
::
DataType
*
tensor_datatype
,
float
*
fp32_false_value
,
float
*
fp32_true_value
,
int32_t
*
int32_false_value
,
int32_t
*
int32_true_value
,
bool
*
bool_false_value
,
bool
*
bool_true_value
);
/// Get the tensor name and datatype for a non-boolean sequence
/// batcher control kind. If 'required' is true then must find a
/// tensor for the control. If 'required' is false, return
/// 'tensor_name' as empty-string if the control is not mapped to any
/// tensor. 'tensor_datatype' returns the required datatype for the
/// control.
Status
GetTypedSequenceControlProperties
(
const
inference
::
ModelSequenceBatching
&
batcher
,
const
std
::
string
&
model_name
,
const
inference
::
ModelSequenceBatching
::
Control
::
Kind
control_kind
,
const
bool
required
,
std
::
string
*
tensor_name
,
inference
::
DataType
*
tensor_datatype
);
/// Read a ModelConfig and normalize it as expected by model backends.
/// \param path The full-path to the directory containing the
/// model configuration.
/// \param min_compute_capability The minimum support CUDA compute
/// capability.
/// \param config Returns the normalized model configuration.
/// \return The error status.
Status
GetNormalizedModelConfig
(
const
std
::
string
&
model_name
,
const
std
::
string
&
path
,
const
double
min_compute_capability
,
inference
::
ModelConfig
*
config
);
/// Auto-complete backend related fields (platform, backend and default model
/// filename) if not set, note that only Triton recognized backends will be
/// checked.
/// \param model_name The name of the model.
/// \param model_path The full-path to the directory containing the
/// model configuration.
/// \param config Returns the auto-completed model configuration.
/// \return The error status.
Status
AutoCompleteBackendFields
(
const
std
::
string
&
model_name
,
const
std
::
string
&
model_path
,
inference
::
ModelConfig
*
config
);
/// Detects and adds missing fields in the model configuration.
/// \param min_compute_capability The minimum supported CUDA compute
/// capability.
/// \param config The model configuration
/// \return The error status
Status
NormalizeModelConfig
(
const
double
min_compute_capability
,
inference
::
ModelConfig
*
config
);
/// [FIXME] better formalize config normalization / validation
/// Detects and adds missing fields in instance group setting.
/// \param min_compute_capability The minimum supported CUDA compute
/// capability.
/// \param config The model configuration
/// \return The error status
Status
NormalizeInstanceGroup
(
const
double
min_compute_capability
,
const
std
::
vector
<
inference
::
ModelInstanceGroup
>&
preferred_groups
,
inference
::
ModelConfig
*
config
);
/// [FIXME] Remove once a more permanent solution is implemented (DLIS-4211)
/// Localize EXECUTION_ENV_PATH in python backend.
/// \param model_path The full-path to the directory containing the model
/// configuration, before localization.
/// \param config The model configuration
/// \param localized_model_dir The localized model directory
/// \return The error status
Status
LocalizePythonBackendExecutionEnvironmentPath
(
const
std
::
string
&
model_path
,
inference
::
ModelConfig
*
config
,
std
::
shared_ptr
<
LocalizedPath
>*
localized_model_dir
);
/// Auto-complete the instance count based on instance kind and backend name.
/// \param group The instance group to set the count for.
/// \param backend The backend name to check against.
/// \return The error status.
Status
SetDefaultInstanceCount
(
inference
::
ModelInstanceGroup
*
group
,
const
std
::
string
&
backend
);
/// Validate that a model is specified correctly, except for model inputs
/// and outputs. ValidateModelIOConfig() should be called to
/// validate model inputs and outputs.
/// \param config The model configuration to validate.
/// \param min_compute_capability The minimum support CUDA compute
/// capability.
/// \return The error status. A non-OK status indicates the configuration
/// is not valid.
Status
ValidateModelConfig
(
const
inference
::
ModelConfig
&
config
,
const
double
min_compute_capability
);
/// [FIXME] better formalize config normalization / validation
/// Validate instance group setting.
/// \param config The model configuration to validate.
/// \param min_compute_capability The minimum support CUDA compute
/// capability.
/// \return The error status. A non-OK status indicates the configuration
/// is not valid.
Status
ValidateInstanceGroup
(
const
inference
::
ModelConfig
&
config
,
const
double
min_compute_capability
);
/// Validate that a model inputs and outputs are specified correctly.
/// \param config The model configuration to validate.
/// \return The error status. A non-OK status indicates the configuration
/// is not valid.
Status
ValidateModelIOConfig
(
const
inference
::
ModelConfig
&
config
);
/// Validate that input is specified correctly in a model
/// configuration.
/// \param io The model input.
/// \param max_batch_size The max batch size specified in model configuration.
/// \param platform The platform name
/// \return The error status. A non-OK status indicates the input
/// is not valid.
Status
ValidateModelInput
(
const
inference
::
ModelInput
&
io
,
int32_t
max_batch_size
,
const
std
::
string
&
platform
);
/// Validate that an input matches one of the allowed input names.
/// \param io The model input.
/// \param allowed The set of allowed input names.
/// \return The error status. A non-OK status indicates the input
/// is not valid.
Status
CheckAllowedModelInput
(
const
inference
::
ModelInput
&
io
,
const
std
::
set
<
std
::
string
>&
allowed
);
/// Validate that an output is specified correctly in a model
/// configuration.
/// \param io The model output.
/// \param max_batch_size The max batch size specified in model configuration.
/// \param platform The platform name
/// \return The error status. A non-OK status indicates the output
/// is not valid.
Status
ValidateModelOutput
(
const
inference
::
ModelOutput
&
io
,
int32_t
max_batch_size
,
const
std
::
string
&
platform
);
/// Validate that an output matches one of the allowed output names.
/// \param io The model output.
/// \param allowed The set of allowed output names.
/// \return The error status. A non-OK status indicates the output
/// is not valid.
Status
CheckAllowedModelOutput
(
const
inference
::
ModelOutput
&
io
,
const
std
::
set
<
std
::
string
>&
allowed
);
/// Validate that a model batch inputs and batch outputs are specified
/// correctly.
/// \param config The model configuration to validate..
/// \return The error status. A non-OK status indicates the batch inputs or
/// batch outputs are not valid.
Status
ValidateBatchIO
(
const
inference
::
ModelConfig
&
config
);
/// Parse the 'value' of the parameter 'key' into a boolean value.
/// \param key The name of the parameter.
/// \param value The value of the parameter in string.
/// \param parsed_value Return the boolean of the parameter.
/// \return The error status. A non-OK status indicates failure on parsing the
/// value.
Status
ParseBoolParameter
(
const
std
::
string
&
key
,
std
::
string
value
,
bool
*
parsed_value
);
/// Parse the 'value' of the parameter 'key' into a long long integer value.
/// \param key The name of the parameter.
/// \param value The value of the parameter in string.
/// \param parsed_value Return the numerical value of the parameter.
/// \return The error status. A non-OK status indicates failure on parsing the
/// value.
Status
ParseLongLongParameter
(
const
std
::
string
&
key
,
const
std
::
string
&
value
,
int64_t
*
parsed_value
);
/// Obtain the 'profile_index' of the 'profile_name'.
/// \param profile_name The name of the profile.
/// \param profile_index Return the index of the profile.
/// \return The error status. A non-OK status indicates failure on getting the
/// value.
Status
GetProfileIndex
(
const
std
::
string
&
profile_name
,
int
*
profile_index
);
/// Convert a model configuration protobuf to the equivalent json.
/// \param config The protobuf model configuration.
/// \param config_version The model configuration will be returned in
/// a format matching this version. If the configuration cannot be
/// represented in the requested version's format then an error will
/// be returned.
/// \param json Returns the equivalent JSON.
/// \return The error status.
Status
ModelConfigToJson
(
const
inference
::
ModelConfig
&
config
,
const
uint32_t
config_version
,
std
::
string
*
json_str
);
/// Convert a model configuration JSON to the equivalent protobuf.
/// \param config The JSON model configuration.
/// \param config_version The model configuration will be returned in
/// a format matching this version. If the configuration cannot be
/// represented in the requested version's format then an error will
/// be returned.
/// \param protobuf Returns the equivalent protobuf.
/// \return The error status.
Status
JsonToModelConfig
(
const
std
::
string
&
json_config
,
const
uint32_t
config_version
,
inference
::
ModelConfig
*
protobuf_config
);
/// Get the BackendType value for a platform name.
/// \param platform_name The platform name.
/// \return The BackendType or BackendType::UNKNOWN if the platform string
/// is not recognized.
BackendType
GetBackendTypeFromPlatform
(
const
std
::
string
&
platform_name
);
/// Get the BackendType value for a backend name.
/// \param backend_name The backend name.
/// \return The BackendType or BackendType::UNKNOWN if the platform string
/// is not recognized.
BackendType
GetBackendType
(
const
std
::
string
&
backend_name
);
/// Get the Triton server data type corresponding to a data type.
/// \param dtype The data type.
/// \return The Triton server data type.
TRITONSERVER_DataType
DataTypeToTriton
(
const
inference
::
DataType
dtype
);
/// Get the data type corresponding to a Triton server data type.
/// \param dtype The Triton server data type.
/// \return The data type.
inference
::
DataType
TritonToDataType
(
const
TRITONSERVER_DataType
dtype
);
}}
// namespace triton::core
Prev
1
…
4
5
6
7
8
9
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment