Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
guobj
Qwen_lmdeploy
Commits
fcefbf3d
Commit
fcefbf3d
authored
Nov 30, 2023
by
xiabo
Browse files
重新整理工程
parent
d592fbea
Changes
170
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
0 additions
and
4108 deletions
+0
-4108
3rdparty/core-r22.12/src/model_lifecycle.cc
3rdparty/core-r22.12/src/model_lifecycle.cc
+0
-740
3rdparty/core-r22.12/src/model_lifecycle.h
3rdparty/core-r22.12/src/model_lifecycle.h
+0
-324
3rdparty/core-r22.12/src/model_repository_manager.cc
3rdparty/core-r22.12/src/model_repository_manager.cc
+0
-1602
3rdparty/core-r22.12/src/model_repository_manager.h
3rdparty/core-r22.12/src/model_repository_manager.h
+0
-345
3rdparty/core-r22.12/src/numa_utils.cc
3rdparty/core-r22.12/src/numa_utils.cc
+0
-237
3rdparty/core-r22.12/src/numa_utils.h
3rdparty/core-r22.12/src/numa_utils.h
+0
-57
3rdparty/core-r22.12/src/payload.cc
3rdparty/core-r22.12/src/payload.cc
+0
-215
3rdparty/core-r22.12/src/payload.h
3rdparty/core-r22.12/src/payload.h
+0
-102
3rdparty/core-r22.12/src/pinned_memory_manager.cc
3rdparty/core-r22.12/src/pinned_memory_manager.cc
+0
-378
3rdparty/core-r22.12/src/pinned_memory_manager.h
3rdparty/core-r22.12/src/pinned_memory_manager.h
+0
-108
No files found.
Too many changes to show.
To preserve performance only
170 of 170+
files are displayed.
Plain diff
Email patch
3rdparty/core-r22.12/src/model_lifecycle.cc
deleted
100644 → 0
View file @
d592fbea
// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
#include "model_lifecycle.h"
#include <algorithm>
#include <deque>
#include <future>
#include <stdexcept>
#include <thread>
#include "constants.h"
#include "filesystem.h"
#include "model.h"
#include "model_config_utils.h"
#include "repo_agent.h"
#include "triton/common/logging.h"
#include "triton/common/thread_pool.h"
#include "backend_model.h"
#ifdef TRITON_ENABLE_ENSEMBLE
#include "ensemble_model.h"
#endif // TRITON_ENABLE_ENSEMBLE
namespace
triton
{
namespace
core
{
const
std
::
string
&
ModelReadyStateString
(
ModelReadyState
state
)
{
switch
(
state
)
{
case
ModelReadyState
::
UNKNOWN
:
{
static
std
::
string
m
(
"UNKNOWN"
);
return
m
;
}
case
ModelReadyState
::
READY
:
{
static
std
::
string
m
(
"READY"
);
return
m
;
}
case
ModelReadyState
::
UNAVAILABLE
:
{
static
std
::
string
m
(
"UNAVAILABLE"
);
return
m
;
}
case
ModelReadyState
::
LOADING
:
{
static
std
::
string
m
(
"LOADING"
);
return
m
;
}
case
ModelReadyState
::
UNLOADING
:
{
static
std
::
string
m
(
"UNLOADING"
);
return
m
;
}
}
static
std
::
string
m
(
"<unknown>"
);
return
m
;
}
namespace
{
Status
VersionsToLoad
(
const
std
::
string
model_path
,
const
std
::
string
&
name
,
const
inference
::
ModelConfig
&
model_config
,
std
::
set
<
int64_t
>*
versions
)
{
versions
->
clear
();
// Get integral number of the version directory
std
::
set
<
std
::
string
>
subdirs
;
RETURN_IF_ERROR
(
GetDirectorySubdirs
(
model_path
,
&
subdirs
));
std
::
set
<
int64_t
,
std
::
greater
<
int64_t
>>
existing_versions
;
for
(
const
auto
&
subdir
:
subdirs
)
{
if
(
subdir
==
kWarmupDataFolder
||
subdir
==
kInitialStateFolder
)
{
continue
;
}
if
((
subdir
.
length
()
>
1
)
&&
(
subdir
.
front
()
==
'0'
))
{
LOG_WARNING
<<
"ignore version directory '"
<<
subdir
<<
"' which contains leading zeros in its directory name"
;
continue
;
}
try
{
int64_t
version
=
std
::
stoll
(
subdir
);
existing_versions
.
insert
(
version
);
}
catch
(
const
std
::
invalid_argument
&
ia
)
{
LOG_WARNING
<<
"ignore version directory '"
<<
subdir
<<
"' which fails to convert to integral number"
;
}
}
if
(
model_config
.
version_policy
().
has_specific
())
{
for
(
const
auto
&
v
:
model_config
.
version_policy
().
specific
().
versions
())
{
// Only load the specific versions that are presented in model directory
bool
version_not_exist
=
existing_versions
.
insert
(
v
).
second
;
if
(
!
version_not_exist
)
{
versions
->
emplace
(
v
);
}
else
{
LOG_ERROR
<<
"version "
<<
v
<<
" is specified for model '"
<<
name
<<
"', but the version directory is not present"
;
}
}
}
else
{
if
(
model_config
.
version_policy
().
has_latest
())
{
// std::set is sorted with std::greater
for
(
const
auto
&
v
:
existing_versions
)
{
if
(
versions
->
size
()
>=
model_config
.
version_policy
().
latest
().
num_versions
())
{
break
;
}
versions
->
emplace
(
v
);
}
}
else
{
// all
versions
->
insert
(
existing_versions
.
begin
(),
existing_versions
.
end
());
}
}
return
Status
::
Success
;
}
// Use smart pointer with custom deleter so that model state will be updated
// to UNAVAILABLE if all smart pointer copies are out of scope
struct
ModelDeleter
{
ModelDeleter
(
std
::
function
<
void
()
>
OnDestroyModel
)
:
OnDestroyModel_
(
std
::
move
(
OnDestroyModel
))
{
}
void
operator
()(
Model
*
model
)
{
// The actual model object must be destroyed in a different
// thread. This thread could have a callstack that includes the
// model itself because this deleter could be triggered by
// a request release or response send in the model. Following
// delete will lead to the model destructor which may wait on this
// same thread... so deadlock if we don't use a different thread
// here.
std
::
function
<
void
()
>
destroy_fn
=
OnDestroyModel_
;
std
::
thread
dthd
([
model
,
destroy_fn
]()
{
delete
model
;
destroy_fn
();
});
dthd
.
detach
();
}
// Use to inform the ModelLifeCycle that the model handle is destroyed
std
::
function
<
void
()
>
OnDestroyModel_
;
};
}
// namespace
Status
ModelLifeCycle
::
Create
(
InferenceServer
*
server
,
const
ModelLifeCycleOptions
&
options
,
std
::
unique_ptr
<
ModelLifeCycle
>*
life_cycle
)
{
std
::
unique_ptr
<
ModelLifeCycle
>
local_life_cycle
(
new
ModelLifeCycle
(
server
,
options
));
*
life_cycle
=
std
::
move
(
local_life_cycle
);
return
Status
::
Success
;
}
const
ModelStateMap
ModelLifeCycle
::
LiveModelStates
(
bool
strict_readiness
)
{
LOG_VERBOSE
(
2
)
<<
"LiveModelStates()"
;
std
::
lock_guard
<
std
::
mutex
>
map_lock
(
map_mtx_
);
ModelStateMap
live_model_states
;
for
(
auto
&
model_version
:
map_
)
{
bool
live
=
false
;
VersionStateMap
version_map
;
for
(
auto
&
version_model
:
model_version
.
second
)
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
version_model
.
second
->
mtx_
);
if
(
strict_readiness
&&
version_model
.
second
->
state_
!=
ModelReadyState
::
READY
)
{
continue
;
}
// At least one version is live (ready / loading / unloading)
if
((
version_model
.
second
->
state_
!=
ModelReadyState
::
UNKNOWN
)
&&
(
version_model
.
second
->
state_
!=
ModelReadyState
::
UNAVAILABLE
))
{
live
=
true
;
version_map
[
version_model
.
first
]
=
std
::
make_pair
(
version_model
.
second
->
state_
,
version_model
.
second
->
state_reason_
);
}
}
if
(
live
)
{
live_model_states
[
model_version
.
first
]
=
std
::
move
(
version_map
);
}
}
return
live_model_states
;
}
Status
ModelLifeCycle
::
StopAllModels
()
{
LOG_VERBOSE
(
2
)
<<
"StopAllModels()"
;
std
::
lock_guard
<
std
::
mutex
>
map_lock
(
map_mtx_
);
for
(
auto
&
model_version
:
map_
)
{
for
(
auto
&
version_model
:
model_version
.
second
)
{
if
(
version_model
.
second
!=
nullptr
)
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
version_model
.
second
->
mtx_
);
if
(
version_model
.
second
->
model_
!=
nullptr
)
{
version_model
.
second
->
model_
->
Stop
();
}
}
}
}
return
Status
::
Success
;
}
const
std
::
set
<
std
::
tuple
<
std
::
string
,
int64_t
,
size_t
>>
ModelLifeCycle
::
InflightStatus
()
{
LOG_VERBOSE
(
2
)
<<
"InflightStatus()"
;
std
::
lock_guard
<
std
::
mutex
>
map_lock
(
map_mtx_
);
std
::
set
<
std
::
tuple
<
std
::
string
,
int64_t
,
size_t
>>
inflight_status
;
for
(
auto
&
model_version
:
map_
)
{
for
(
auto
&
version_model
:
model_version
.
second
)
{
if
(
version_model
.
second
!=
nullptr
)
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
version_model
.
second
->
mtx_
);
if
(
version_model
.
second
->
model_
!=
nullptr
)
{
const
auto
cnt
=
version_model
.
second
->
model_
->
InflightInferenceCount
();
if
(
cnt
!=
0
)
{
inflight_status
.
emplace
(
model_version
.
first
,
version_model
.
first
,
cnt
);
}
}
}
}
}
return
inflight_status
;
}
const
ModelStateMap
ModelLifeCycle
::
ModelStates
()
{
LOG_VERBOSE
(
2
)
<<
"ModelStates()"
;
std
::
lock_guard
<
std
::
mutex
>
map_lock
(
map_mtx_
);
ModelStateMap
model_states
;
for
(
auto
&
model_version
:
map_
)
{
VersionStateMap
version_map
;
for
(
auto
&
version_model
:
model_version
.
second
)
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
version_model
.
second
->
mtx_
);
version_map
[
version_model
.
first
]
=
std
::
make_pair
(
version_model
.
second
->
state_
,
version_model
.
second
->
state_reason_
);
}
model_states
[
model_version
.
first
]
=
std
::
move
(
version_map
);
}
return
model_states
;
}
const
VersionStateMap
ModelLifeCycle
::
VersionStates
(
const
std
::
string
&
model_name
)
{
LOG_VERBOSE
(
2
)
<<
"VersionStates() '"
<<
model_name
<<
"'"
;
std
::
lock_guard
<
std
::
mutex
>
map_lock
(
map_mtx_
);
VersionStateMap
version_map
;
auto
mit
=
map_
.
find
(
model_name
);
if
(
mit
!=
map_
.
end
())
{
for
(
auto
&
version_model
:
mit
->
second
)
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
version_model
.
second
->
mtx_
);
version_map
[
version_model
.
first
]
=
std
::
make_pair
(
version_model
.
second
->
state_
,
version_model
.
second
->
state_reason_
);
}
}
return
version_map
;
}
Status
ModelLifeCycle
::
ModelState
(
const
std
::
string
&
model_name
,
const
int64_t
model_version
,
ModelReadyState
*
state
)
{
std
::
lock_guard
<
std
::
mutex
>
map_lock
(
map_mtx_
);
auto
mit
=
map_
.
find
(
model_name
);
if
(
mit
!=
map_
.
end
())
{
auto
vit
=
mit
->
second
.
find
(
model_version
);
if
(
vit
!=
mit
->
second
.
end
())
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
vit
->
second
->
mtx_
);
*
state
=
vit
->
second
->
state_
;
return
Status
::
Success
;
}
}
return
Status
(
Status
::
Code
::
NOT_FOUND
,
"model '"
+
model_name
+
"', version "
+
std
::
to_string
(
model_version
)
+
" is not found"
);
}
Status
ModelLifeCycle
::
GetModel
(
const
std
::
string
&
model_name
,
const
int64_t
version
,
std
::
shared_ptr
<
Model
>*
model
)
{
LOG_VERBOSE
(
2
)
<<
"GetModel() '"
<<
model_name
<<
"' version "
<<
version
;
std
::
lock_guard
<
std
::
mutex
>
map_lock
(
map_mtx_
);
auto
mit
=
map_
.
find
(
model_name
);
if
(
mit
==
map_
.
end
())
{
return
Status
(
Status
::
Code
::
NOT_FOUND
,
"'"
+
model_name
+
"' is not found"
);
}
auto
vit
=
mit
->
second
.
find
(
version
);
if
(
vit
==
mit
->
second
.
end
())
{
if
(
version
!=
-
1
)
{
return
Status
(
Status
::
Code
::
NOT_FOUND
,
"'"
+
model_name
+
"' version "
+
std
::
to_string
(
version
)
+
" is not found"
);
}
// The case where the request is asking for latest version
int64_t
latest
=
-
1
;
for
(
auto
&
version_model
:
mit
->
second
)
{
if
(
version_model
.
first
>
latest
)
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
version_model
.
second
->
mtx_
);
if
(
version_model
.
second
->
state_
==
ModelReadyState
::
READY
)
{
latest
=
version_model
.
first
;
// Tedious, but have to set handle for any "latest" version
// at the moment to avoid edge case like the following:
// "versions : 1 3 2", version 3 is latest but is requested
// to be unloaded when the iterator is examining version 2,
// then 'model' will ensure version 3 is still valid
*
model
=
version_model
.
second
->
model_
;
}
}
}
if
(
latest
==
-
1
)
{
return
Status
(
Status
::
Code
::
NOT_FOUND
,
"'"
+
model_name
+
"' has no available versions"
);
}
}
else
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
vit
->
second
->
mtx_
);
if
(
vit
->
second
->
state_
==
ModelReadyState
::
READY
)
{
*
model
=
vit
->
second
->
model_
;
}
else
{
return
Status
(
Status
::
Code
::
UNAVAILABLE
,
"'"
+
model_name
+
"' version "
+
std
::
to_string
(
version
)
+
" is not at ready state"
);
}
}
return
Status
::
Success
;
}
Status
ModelLifeCycle
::
AsyncUnload
(
const
std
::
string
&
model_name
)
{
LOG_VERBOSE
(
2
)
<<
"AsyncUnload() '"
<<
model_name
<<
"'"
;
std
::
lock_guard
<
std
::
mutex
>
map_lock
(
map_mtx_
);
auto
it
=
map_
.
find
(
model_name
);
if
(
it
==
map_
.
end
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"Model to be unloaded has not been served"
);
}
// Get the existing agent models and notify the unload action
const
uint64_t
now_ns
=
std
::
chrono
::
duration_cast
<
std
::
chrono
::
nanoseconds
>
(
std
::
chrono
::
steady_clock
::
now
().
time_since_epoch
())
.
count
();
for
(
auto
&
version
:
it
->
second
)
{
auto
&
model_info
=
version
.
second
;
std
::
lock_guard
<
std
::
mutex
>
lock
(
model_info
->
mtx_
);
model_info
->
last_update_ns_
=
now_ns
;
// Unload serving model, for model that is in LOADING state,
// the updated timestamp will be recognized that there is newer update
// on the model info and the load should be aborted
if
(
model_info
->
state_
==
ModelReadyState
::
READY
)
{
if
(
model_info
->
agent_model_list_
!=
nullptr
)
{
// Only log the error because the model should be unloaded regardless
auto
status
=
model_info
->
agent_model_list_
->
InvokeAgentModels
(
TRITONREPOAGENT_ACTION_UNLOAD
);
if
(
!
status
.
IsOk
())
{
LOG_ERROR
<<
"Agent model returns error on TRITONREPOAGENT_ACTION_UNLOAD: "
<<
status
.
AsString
();
}
}
// unload
model_info
->
Release
();
}
}
return
Status
::
Success
;
}
Status
ModelLifeCycle
::
AsyncLoad
(
const
std
::
string
&
model_name
,
const
std
::
string
&
model_path
,
const
inference
::
ModelConfig
&
model_config
,
const
bool
is_config_provided
,
const
std
::
shared_ptr
<
TritonRepoAgentModelList
>&
agent_model_list
,
std
::
function
<
void
(
Status
)
>&&
OnComplete
)
{
LOG_VERBOSE
(
2
)
<<
"AsyncLoad() '"
<<
model_name
<<
"'"
;
std
::
lock_guard
<
std
::
mutex
>
map_lock
(
map_mtx_
);
auto
it
=
map_
.
find
(
model_name
);
if
(
it
==
map_
.
end
())
{
it
=
map_
.
emplace
(
std
::
make_pair
(
model_name
,
VersionMap
())).
first
;
}
std
::
set
<
int64_t
>
versions
;
RETURN_IF_ERROR
(
VersionsToLoad
(
model_path
,
model_name
,
model_config
,
&
versions
));
if
(
versions
.
empty
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"at least one version must be available under the version policy of "
"model '"
+
model_name
+
"'"
);
}
const
uint64_t
now_ns
=
std
::
chrono
::
duration_cast
<
std
::
chrono
::
nanoseconds
>
(
std
::
chrono
::
steady_clock
::
now
().
time_since_epoch
())
.
count
();
std
::
shared_ptr
<
LoadTracker
>
load_tracker
(
new
LoadTracker
(
versions
.
size
(),
now_ns
));
for
(
const
auto
&
version
:
versions
)
{
std
::
unique_ptr
<
ModelInfo
>
linfo
(
new
ModelInfo
(
model_path
,
model_config
,
now_ns
));
ModelInfo
*
model_info
=
linfo
.
get
();
LOG_INFO
<<
"loading: "
<<
model_name
<<
":"
<<
version
;
model_info
->
state_
=
ModelReadyState
::
LOADING
;
model_info
->
state_reason_
.
clear
();
model_info
->
agent_model_list_
=
agent_model_list
;
auto
res
=
it
->
second
.
emplace
(
std
::
make_pair
(
version
,
std
::
unique_ptr
<
ModelInfo
>
()));
if
(
res
.
second
)
{
res
.
first
->
second
=
std
::
move
(
linfo
);
}
else
{
// There is already a record of this model version. Check if the version
// model is being served, if so, the re-load of the version
// should be performed in background to avoid version downtime.
// Otherwise, swap and monitor state for newly loading model.
auto
&
serving_model
=
res
.
first
->
second
;
std
::
lock_guard
<
std
::
mutex
>
lock
(
serving_model
->
mtx_
);
if
(
serving_model
->
state_
==
ModelReadyState
::
READY
)
{
background_models_
[(
uintptr_t
)
model_info
]
=
std
::
move
(
linfo
);
}
else
{
// swap the monitoring model info
serving_model
.
swap
(
linfo
);
// further check the state, put to 'background_models_' to keep
// the object valid if the model is LOADING / UNLOADING, because
// the model info will be accessed by a different thread once the
// operation is completed
if
((
linfo
->
state_
==
ModelReadyState
::
LOADING
)
||
(
linfo
->
state_
==
ModelReadyState
::
UNLOADING
))
{
ModelInfo
*
key
=
linfo
.
get
();
background_models_
[(
uintptr_t
)
key
]
=
std
::
move
(
linfo
);
}
}
}
// Load model asynchronously via thread pool
load_pool_
->
Enqueue
([
this
,
model_name
,
version
,
model_info
,
OnComplete
,
load_tracker
,
is_config_provided
]()
{
CreateModel
(
model_name
,
version
,
model_info
,
is_config_provided
);
OnLoadComplete
(
model_name
,
version
,
model_info
,
OnComplete
,
load_tracker
);
});
}
return
Status
::
Success
;
}
void
ModelLifeCycle
::
CreateModel
(
const
std
::
string
&
model_name
,
const
int64_t
version
,
ModelInfo
*
model_info
,
const
bool
is_config_provided
)
{
LOG_VERBOSE
(
2
)
<<
"CreateModel() '"
<<
model_name
<<
"' version "
<<
version
;
const
auto
&
model_config
=
model_info
->
model_config_
;
// Create model
Status
status
;
std
::
unique_ptr
<
Model
>
is
;
// If 'backend' is specified in the config then use the new triton
// backend.
if
(
!
model_config
.
backend
().
empty
())
{
std
::
unique_ptr
<
TritonModel
>
model
;
status
=
TritonModel
::
Create
(
server_
,
model_info
->
model_path_
,
cmdline_config_map_
,
host_policy_map_
,
model_name
,
version
,
model_config
,
is_config_provided
,
&
model
);
is
.
reset
(
model
.
release
());
}
else
{
#ifdef TRITON_ENABLE_ENSEMBLE
if
(
model_info
->
is_ensemble_
)
{
status
=
EnsembleModel
::
Create
(
server_
,
model_info
->
model_path_
,
version
,
model_config
,
is_config_provided
,
min_compute_capability_
,
&
is
);
// Complete label provider with label information from involved models
// Must be done here because involved models may not be able to
// obtained from server because this may happen during server
// initialization.
if
(
status
.
IsOk
())
{
std
::
set
<
std
::
string
>
no_label_outputs
;
const
auto
&
label_provider
=
is
->
GetLabelProvider
();
for
(
const
auto
&
output
:
model_config
.
output
())
{
if
(
label_provider
->
GetLabel
(
output
.
name
(),
0
).
empty
())
{
no_label_outputs
.
emplace
(
output
.
name
());
}
}
for
(
const
auto
&
element
:
model_config
.
ensemble_scheduling
().
step
())
{
for
(
const
auto
&
pair
:
element
.
output_map
())
{
// Found model that produce one of the missing output
if
(
no_label_outputs
.
find
(
pair
.
second
)
!=
no_label_outputs
.
end
())
{
std
::
shared_ptr
<
Model
>
model
;
// Safe to obtain model because the ensemble can't be loaded
// until the involved models are ready
GetModel
(
element
.
model_name
(),
element
.
model_version
(),
&
model
);
label_provider
->
AddLabels
(
pair
.
second
,
model
->
GetLabelProvider
()
->
GetLabels
(
pair
.
first
));
}
}
}
}
}
else
#endif // TRITON_ENABLE_ENSEMBLE
{
status
=
Status
(
Status
::
Code
::
INVALID_ARG
,
"unknown platform '"
+
model_config
.
platform
()
+
"'"
);
}
}
std
::
lock_guard
<
std
::
mutex
>
lock
(
model_info
->
mtx_
);
if
(
status
.
IsOk
())
{
// [FIXME] better way to manage agent model lifecycle
// Let the deleter also holds a shared pointer copy of agent model list,
// because the reference in ModelInfo can be cleared before the Model object
// is destroyed, and we want agent model to be valid for receiving
// UNLOAD_COMPLETE signal (see ~TritonRepoAgentModelList for detail)
auto
agent_model_list
=
model_info
->
agent_model_list_
;
model_info
->
model_
.
reset
(
is
.
release
(),
ModelDeleter
([
this
,
model_name
,
version
,
model_info
,
agent_model_list
]()
mutable
{
LOG_VERBOSE
(
2
)
<<
"OnDestroy callback() '"
<<
model_name
<<
"' version "
<<
version
;
LOG_INFO
<<
"successfully unloaded '"
<<
model_name
<<
"' version "
<<
version
;
// Update model state as it is fully unloaded
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
model_info
->
mtx_
);
model_info
->
state_
=
ModelReadyState
::
UNAVAILABLE
;
model_info
->
state_reason_
=
"unloaded"
;
}
// Check if the model info is in background, if so, remove from the
// map
std
::
lock_guard
<
std
::
mutex
>
lk
(
this
->
map_mtx_
);
auto
it
=
this
->
background_models_
.
find
((
uintptr_t
)
model_info
);
if
(
it
!=
this
->
background_models_
.
end
())
{
this
->
background_models_
.
erase
(
it
);
}
}));
}
else
{
LOG_ERROR
<<
"failed to load '"
<<
model_name
<<
"' version "
<<
version
<<
": "
<<
status
.
AsString
();
model_info
->
state_
=
ModelReadyState
::
UNAVAILABLE
;
model_info
->
state_reason_
=
status
.
AsString
();
}
}
void
ModelLifeCycle
::
OnLoadComplete
(
const
std
::
string
&
model_name
,
const
int64_t
version
,
ModelInfo
*
model_info
,
std
::
function
<
void
(
Status
)
>
OnComplete
,
std
::
shared_ptr
<
LoadTracker
>
load_tracker
)
{
std
::
lock_guard
<
std
::
mutex
>
tracker_lock
(
load_tracker
->
mtx_
);
++
load_tracker
->
completed_version_cnt_
;
load_tracker
->
load_set_
[
version
]
=
model_info
;
// Version will not be marked ready until all versions are
// ready, this simplify the unloading when one version fails to load as
// all other versions won't have inflight requests
if
(
model_info
->
state_
!=
ModelReadyState
::
LOADING
)
{
load_tracker
->
load_failed_
=
true
;
load_tracker
->
reason_
+=
(
"version "
+
std
::
to_string
(
version
)
+
" is at "
+
ModelReadyStateString
(
model_info
->
state_
)
+
" state: "
+
model_info
->
state_reason_
+
";"
);
}
// Check if all versions are completed and finish the load
if
(
load_tracker
->
completed_version_cnt_
==
load_tracker
->
affected_version_cnt_
)
{
// hold 'map_mtx_' as there will be change onto the model info map
std
::
lock_guard
<
std
::
mutex
>
map_lock
(
map_mtx_
);
auto
it
=
map_
.
find
(
model_name
);
// Check if the load is the latest frontground action on the model
for
(
const
auto
&
version_info
:
it
->
second
)
{
if
(
version_info
.
second
->
last_update_ns_
>
load_tracker
->
last_update_ns_
)
{
load_tracker
->
load_failed_
=
true
;
load_tracker
->
reason_
=
"Newer operation has been applied to the model lifecycle, current "
"load operation is out-dated."
;
break
;
}
}
if
(
load_tracker
->
load_failed_
)
{
// Move agent list out of ModelInfo as it needs to be invoked
// after all ModelInfos are reset
std
::
shared_ptr
<
TritonRepoAgentModelList
>
lagent_list
;
if
(
model_info
->
agent_model_list_
)
{
lagent_list
=
std
::
move
(
model_info
->
agent_model_list_
);
}
// If any of the versions fails to load, abort the load and unload
// all newly loaded versions
for
(
auto
&
loaded
:
load_tracker
->
load_set_
)
{
// Unload directly, the object is being managed either in frontground
// or background
std
::
lock_guard
<
std
::
mutex
>
lock
(
loaded
.
second
->
mtx_
);
if
(
loaded
.
second
->
model_
!=
nullptr
)
{
loaded
.
second
->
Release
();
}
}
if
(
lagent_list
)
{
auto
status
=
lagent_list
->
InvokeAgentModels
(
TRITONREPOAGENT_ACTION_LOAD_FAIL
);
if
(
!
status
.
IsOk
())
{
LOG_ERROR
<<
"Agent model returns error on "
"TRITONREPOAGENT_ACTION_LOAD_FAIL: "
<<
status
.
AsString
();
}
}
}
else
{
// Unload any previous loaded versions that are still available
for
(
auto
&
version_info
:
it
->
second
)
{
auto
&
mi
=
version_info
.
second
;
std
::
lock_guard
<
std
::
mutex
>
info_lk
(
mi
->
mtx_
);
if
((
mi
->
state_
==
ModelReadyState
::
READY
)
&&
(
mi
->
last_update_ns_
<
load_tracker
->
last_update_ns_
))
{
if
(
mi
->
agent_model_list_
!=
nullptr
)
{
auto
status
=
mi
->
agent_model_list_
->
InvokeAgentModels
(
TRITONREPOAGENT_ACTION_UNLOAD
);
if
(
!
status
.
IsOk
())
{
LOG_ERROR
<<
"Agent model returns error on "
"TRITONREPOAGENT_ACTION_UNLOAD: "
<<
status
.
AsString
();
}
}
mi
->
Release
();
}
}
// Mark current versions ready and track info in foreground
for
(
auto
&
loaded
:
load_tracker
->
load_set_
)
{
std
::
lock_guard
<
std
::
mutex
>
curr_info_lk
(
loaded
.
second
->
mtx_
);
loaded
.
second
->
state_
=
ModelReadyState
::
READY
;
model_info
->
state_reason_
.
clear
();
LOG_INFO
<<
"successfully loaded '"
<<
model_name
<<
"' version "
<<
version
;
auto
bit
=
background_models_
.
find
((
uintptr_t
)
loaded
.
second
);
// Check if the version model is loaded in background, if so,
// replace and unload the current serving version
if
(
bit
!=
background_models_
.
end
())
{
auto
vit
=
it
->
second
.
find
(
loaded
.
first
);
// Need to lock the previous model info for in case the model is
// loading / unloading, this ensure the model state is consistent
// even when the load / unload is completed.
std
::
lock_guard
<
std
::
mutex
>
prev_info_lk
(
vit
->
second
->
mtx_
);
// swap previous info into local unique pointer
auto
linfo
=
std
::
move
(
bit
->
second
);
vit
->
second
.
swap
(
linfo
);
background_models_
.
erase
(
bit
);
// if previous info is under change, put into 'background_models_'
if
((
linfo
->
state_
==
ModelReadyState
::
LOADING
)
||
(
linfo
->
state_
==
ModelReadyState
::
UNLOADING
))
{
ModelInfo
*
key
=
linfo
.
get
();
background_models_
[(
uintptr_t
)
key
]
=
std
::
move
(
linfo
);
}
}
}
if
(
model_info
->
agent_model_list_
)
{
auto
status
=
model_info
->
agent_model_list_
->
InvokeAgentModels
(
TRITONREPOAGENT_ACTION_LOAD_COMPLETE
);
if
(
!
status
.
IsOk
())
{
LOG_ERROR
<<
"Agent model returns error on "
"TRITONREPOAGENT_ACTION_LOAD_COMPLETE: "
<<
status
.
AsString
();
}
}
}
if
(
OnComplete
!=
nullptr
)
{
OnComplete
(
load_tracker
->
load_failed_
?
Status
(
Status
::
Code
::
INVALID_ARG
,
load_tracker
->
reason_
)
:
Status
::
Success
);
}
}
}
}}
// namespace triton::core
3rdparty/core-r22.12/src/model_lifecycle.h
deleted
100644 → 0
View file @
d592fbea
// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
#pragma once
#include <functional>
#include <map>
#include <mutex>
#include "infer_parameter.h"
#include "model_config.pb.h"
#include "repo_agent.h"
#include "status.h"
#include "triton/common/model_config.h"
#include "triton/common/thread_pool.h"
namespace
triton
{
namespace
core
{
struct
ModelLifeCycleOptions
{
explicit
ModelLifeCycleOptions
(
const
double
min_compute_capability
,
const
triton
::
common
::
BackendCmdlineConfigMap
&
backend_cmdline_config_map
,
const
triton
::
common
::
HostPolicyCmdlineConfigMap
&
host_policy_map
,
const
unsigned
int
model_load_thread_count
)
:
min_compute_capability_
(
min_compute_capability
),
backend_cmdline_config_map_
(
backend_cmdline_config_map
),
host_policy_map_
(
host_policy_map
),
model_load_thread_count_
(
model_load_thread_count
)
{
}
// The minimum supported CUDA compute capability.
const
double
min_compute_capability_
;
// The backend configuration settings specified on the command-line
const
triton
::
common
::
BackendCmdlineConfigMap
&
backend_cmdline_config_map_
;
// The host policy setting used when loading models.
const
triton
::
common
::
HostPolicyCmdlineConfigMap
&
host_policy_map_
;
// Number of the threads to use for concurrently loading models
const
unsigned
int
model_load_thread_count_
;
};
/// Readiness status for models.
enum
class
ModelReadyState
{
// The model is in an unknown state. The model is not available for
// inferencing.
UNKNOWN
,
// The model is ready and available for inferencing.
READY
,
// The model is unavailable, indicating that the model failed to
// load or has been implicitly or explicitly unloaded. The model is
// not available for inferencing.
UNAVAILABLE
,
// The model is being loaded by the inference server. The model is
// not available for inferencing.
LOADING
,
// The model is being unloaded by the inference server. The model is
// not available for inferencing.
UNLOADING
};
/// Get the string representation for a ModelReadyState
const
std
::
string
&
ModelReadyStateString
(
ModelReadyState
state
);
using
VersionStateMap
=
std
::
map
<
int64_t
,
std
::
pair
<
ModelReadyState
,
std
::
string
>>
;
using
ModelStateMap
=
std
::
map
<
std
::
string
,
VersionStateMap
>
;
// Helper class to manage the lifecycle of a list of associated agent models
class
TritonRepoAgentModelList
{
public:
TritonRepoAgentModelList
()
:
last_action_type_
(
TRITONREPOAGENT_ACTION_UNLOAD_COMPLETE
){};
~
TritonRepoAgentModelList
()
{
// Using destructor to finish the unload lifecycle without
// explicitly managing the last step in ModelLifecycle.
if
(
last_action_type_
==
TRITONREPOAGENT_ACTION_UNLOAD
)
{
InvokeAgentModels
(
TRITONREPOAGENT_ACTION_UNLOAD_COMPLETE
);
}
}
Status
AddAgentModel
(
std
::
unique_ptr
<
TritonRepoAgentModel
>&&
agent_model
)
{
agent_models_
.
emplace_back
(
std
::
move
(
agent_model
));
return
Status
::
Success
;
}
size_t
Size
()
{
return
agent_models_
.
size
();
}
TritonRepoAgentModel
*
Back
()
{
return
agent_models_
.
back
().
get
();
}
Status
InvokeAgentModels
(
const
TRITONREPOAGENT_ActionType
action_type
)
{
// Special handling for the current model lifecycle implementation,
// the repo agent may be asked to perform UNLOAD action multiple times,
// and the requests after the first should be ignored.
const
bool
first_unload
=
(
action_type
==
TRITONREPOAGENT_ACTION_UNLOAD
)
&&
(
last_action_type_
!=
TRITONREPOAGENT_ACTION_UNLOAD
);
if
(
!
first_unload
)
{
return
Status
::
Success
;
}
last_action_type_
=
action_type
;
switch
(
action_type
)
{
case
TRITONREPOAGENT_ACTION_LOAD
:
case
TRITONREPOAGENT_ACTION_UNLOAD
:
{
for
(
size_t
idx
=
0
;
idx
<
agent_models_
.
size
();
++
idx
)
{
RETURN_IF_ERROR
(
agent_models_
[
idx
]
->
InvokeAgent
(
action_type
));
}
break
;
}
case
TRITONREPOAGENT_ACTION_LOAD_COMPLETE
:
case
TRITONREPOAGENT_ACTION_LOAD_FAIL
:
case
TRITONREPOAGENT_ACTION_UNLOAD_COMPLETE
:
{
// reverse order
for
(
size_t
one_pass_idx
=
agent_models_
.
size
();
one_pass_idx
>
0
;
--
one_pass_idx
)
{
RETURN_IF_ERROR
(
agent_models_
[
one_pass_idx
-
1
]
->
InvokeAgent
(
action_type
));
}
break
;
}
}
return
Status
::
Success
;
}
private:
DISALLOW_COPY_AND_ASSIGN
(
TritonRepoAgentModelList
);
std
::
vector
<
std
::
unique_ptr
<
TritonRepoAgentModel
>>
agent_models_
;
TRITONREPOAGENT_ActionType
last_action_type_
;
};
class
InferenceServer
;
class
Model
;
class
ModelLifeCycle
{
public:
static
Status
Create
(
InferenceServer
*
server
,
const
ModelLifeCycleOptions
&
options
,
std
::
unique_ptr
<
ModelLifeCycle
>*
life_cycle
);
~
ModelLifeCycle
()
{
// Explicitly clean up thread pool first to clean up any pending callbacks
// that may modify model lifecycle members
load_pool_
.
reset
();
map_
.
clear
();
}
// Start loading model with specified versions asynchronously.
// All versions that are being served will be unloaded only after
// the load is finished sucessfully.
Status
AsyncLoad
(
const
std
::
string
&
model_name
,
const
std
::
string
&
model_path
,
const
inference
::
ModelConfig
&
model_config
,
const
bool
is_config_provided
,
const
std
::
shared_ptr
<
TritonRepoAgentModelList
>&
agent_model_list
,
std
::
function
<
void
(
Status
)
>&&
OnComplete
);
// Unload model asynchronously.
Status
AsyncUnload
(
const
std
::
string
&
model_name
);
// Get specified version of the model. Latest ready version will
// be retrieved if 'version' is -1. Return error if the version specified is
// not found or it is not ready.
Status
GetModel
(
const
std
::
string
&
model_name
,
const
int64_t
version
,
std
::
shared_ptr
<
Model
>*
model
);
// Get the ModelStateMap representation of the live models. A model is
// live if at least one of the versions is not unknown nor unavailable.
// If 'strict_readiness' is true, a model is only live if
// at least one of the versions is ready.
const
ModelStateMap
LiveModelStates
(
bool
strict_readiness
=
false
);
// Get the ModelStateMap representation of the models.
const
ModelStateMap
ModelStates
();
// Get the VersionStateMap representation of the specified model.
const
VersionStateMap
VersionStates
(
const
std
::
string
&
model_name
);
// Get the state of a specific model version.
Status
ModelState
(
const
std
::
string
&
model_name
,
const
int64_t
model_version
,
ModelReadyState
*
state
);
// Instruct the model to stop accepting new inference requests.
Status
StopAllModels
();
// Return the number of in-flight inference if any, model versions
// that don't have in-flight inferences will not be included.
const
std
::
set
<
std
::
tuple
<
std
::
string
,
int64_t
,
size_t
>>
InflightStatus
();
private:
struct
ModelInfo
{
ModelInfo
(
const
std
::
string
&
model_path
,
const
inference
::
ModelConfig
&
model_config
,
const
uint64_t
last_update_ns
)
:
model_config_
(
model_config
),
model_path_
(
model_path
),
#ifdef TRITON_ENABLE_ENSEMBLE
is_ensemble_
(
model_config
.
platform
()
==
kEnsemblePlatform
),
#else
is_ensemble_
(
false
),
#endif // TRITON_ENABLE_ENSEMBLE
last_update_ns_
(
last_update_ns
),
state_
(
ModelReadyState
::
UNKNOWN
)
{
}
// Release the flyweight in ModelInfo object, reflect as 'UNLOADING' in
// model state. Note that 'mtx_' should be acquired before invoking this
// function to prevent possible data race.
void
Release
()
{
state_
=
ModelReadyState
::
UNLOADING
;
state_reason_
.
clear
();
agent_model_list_
.
reset
();
model_
.
reset
();
}
const
inference
::
ModelConfig
model_config_
;
const
std
::
string
model_path_
;
const
bool
is_ensemble_
;
std
::
mutex
mtx_
;
uint64_t
last_update_ns_
;
ModelReadyState
state_
;
std
::
string
state_reason_
;
// flyweight
std
::
shared_ptr
<
TritonRepoAgentModelList
>
agent_model_list_
;
std
::
shared_ptr
<
Model
>
model_
;
};
struct
LoadTracker
{
LoadTracker
(
const
size_t
affected_version_cnt
,
const
uint64_t
last_update_ns
)
:
last_update_ns_
(
last_update_ns
),
affected_version_cnt_
(
affected_version_cnt
),
load_failed_
(
false
),
completed_version_cnt_
(
0
)
{
}
const
uint64_t
last_update_ns_
;
const
size_t
affected_version_cnt_
;
std
::
mutex
mtx_
;
bool
load_failed_
;
std
::
string
reason_
;
size_t
completed_version_cnt_
;
std
::
map
<
int64_t
,
ModelInfo
*>
load_set_
;
};
ModelLifeCycle
(
InferenceServer
*
server
,
const
ModelLifeCycleOptions
&
options
)
:
server_
(
server
),
min_compute_capability_
(
options
.
min_compute_capability_
),
cmdline_config_map_
(
options
.
backend_cmdline_config_map_
),
host_policy_map_
(
options
.
host_policy_map_
)
{
load_pool_
.
reset
(
new
triton
::
common
::
ThreadPool
(
std
::
max
(
1u
,
options
.
model_load_thread_count_
)));
}
void
CreateModel
(
const
std
::
string
&
model_name
,
const
int64_t
version
,
ModelInfo
*
model_info
,
const
bool
is_config_provided
);
// Callback function template for model load.
// 'OnComplete' needs to be passed by value for now as there can be
// multiple versions to be loaded and each holds a copy of
// the 'OnComplete' callback.
void
OnLoadComplete
(
const
std
::
string
&
model_name
,
const
int64_t
version
,
ModelInfo
*
model_info
,
std
::
function
<
void
(
Status
)
>
OnComplete
,
std
::
shared_ptr
<
LoadTracker
>
load_tracker
);
// Mutex for 'map_' and 'background_models_'
std
::
mutex
map_mtx_
;
using
VersionMap
=
std
::
map
<
int64_t
,
std
::
unique_ptr
<
ModelInfo
>>
;
using
ModelMap
=
std
::
map
<
std
::
string
,
VersionMap
>
;
ModelMap
map_
;
// Models that are being loaded / unloaded in background
std
::
map
<
uintptr_t
,
std
::
unique_ptr
<
ModelInfo
>>
background_models_
;
InferenceServer
*
server_
;
const
double
min_compute_capability_
;
const
triton
::
common
::
BackendCmdlineConfigMap
cmdline_config_map_
;
const
triton
::
common
::
HostPolicyCmdlineConfigMap
host_policy_map_
;
// Fixed-size thread pool to load models at specified concurrency
std
::
unique_ptr
<
triton
::
common
::
ThreadPool
>
load_pool_
;
};
}}
// namespace triton::core
3rdparty/core-r22.12/src/model_repository_manager.cc
deleted
100644 → 0
View file @
d592fbea
// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
#include "model_repository_manager.h"
#include <algorithm>
#include <deque>
#include <future>
#include <stdexcept>
#include <thread>
#include "constants.h"
#include "ensemble_utils.h"
#include "filesystem.h"
#include "model.h"
#include "model_config_utils.h"
#include "triton/common/logging.h"
#include "backend_model.h"
#ifdef TRITON_ENABLE_ENSEMBLE
#include "ensemble_model.h"
#endif // TRITON_ENABLE_ENSEMBLE
namespace
triton
{
namespace
core
{
namespace
{
static
std
::
string
file_prefix
=
"file:"
;
// Internal repo agent used for model file override
class
LocalizeRepoAgent
:
public
TritonRepoAgent
{
public:
LocalizeRepoAgent
()
:
TritonRepoAgent
(
"ModelRepositoryManager::LocalizeRepoAgent"
)
{
// Callbacks below interact with TritonRepoAgentModel directly knowing that
// it is the internal implementation of TRITONREPOAGENT_AgentModel
model_action_fn_
=
[](
TRITONREPOAGENT_Agent
*
agent
,
TRITONREPOAGENT_AgentModel
*
model
,
const
TRITONREPOAGENT_ActionType
action_type
)
->
TRITONSERVER_Error
*
{
auto
agent_model
=
reinterpret_cast
<
TritonRepoAgentModel
*>
(
model
);
switch
(
action_type
)
{
case
TRITONREPOAGENT_ACTION_LOAD
:
{
// localize the override files for model loading,
// as currently the model is expected to load from local directory
const
char
*
temp_dir_cstr
=
nullptr
;
RETURN_TRITONSERVER_ERROR_IF_ERROR
(
agent_model
->
AcquireMutableLocation
(
TRITONREPOAGENT_ARTIFACT_FILESYSTEM
,
&
temp_dir_cstr
));
const
std
::
string
temp_dir
=
temp_dir_cstr
;
const
auto
&
files
=
*
reinterpret_cast
<
std
::
vector
<
const
InferenceParameter
*>*>
(
agent_model
->
State
());
bool
found_config
=
false
;
for
(
const
auto
&
file
:
files
)
{
if
(
file
->
Name
()
==
"config"
)
{
if
(
file
->
Type
()
!=
TRITONSERVER_PARAMETER_STRING
)
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INVALID_ARG
,
"Config parameter 'config' must have string type for its "
"value"
);
}
inference
::
ModelConfig
config
;
RETURN_TRITONSERVER_ERROR_IF_ERROR
(
JsonToModelConfig
(
file
->
ValueString
(),
1
/* config_version */
,
&
config
));
RETURN_TRITONSERVER_ERROR_IF_ERROR
(
WriteTextProto
(
JoinPath
({
temp_dir
,
kModelConfigPbTxt
}),
config
));
found_config
=
true
;
}
else
if
(
file
->
Name
().
rfind
(
file_prefix
,
0
)
==
0
)
{
if
(
file
->
Type
()
!=
TRITONSERVER_PARAMETER_BYTES
)
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INVALID_ARG
,
(
std
::
string
(
"File parameter '"
)
+
file
->
Name
()
+
"' must have bytes type for its value"
)
.
c_str
());
}
// Save model file to the instructed directory
// mkdir
const
std
::
string
file_path
=
JoinPath
({
temp_dir
,
file
->
Name
().
substr
(
file_prefix
.
size
())});
const
std
::
string
dir
=
DirName
(
file_path
);
bool
dir_exist
=
false
;
RETURN_TRITONSERVER_ERROR_IF_ERROR
(
FileExists
(
dir
,
&
dir_exist
));
if
(
dir_exist
)
{
bool
is_dir
=
false
;
RETURN_TRITONSERVER_ERROR_IF_ERROR
(
IsDirectory
(
dir
,
&
is_dir
));
if
(
!
is_dir
)
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INVALID_ARG
,
(
std
::
string
(
"Invalid file parameter '"
)
+
file
->
Name
()
+
"', directory has been created as a file"
)
.
c_str
());
}
}
else
{
RETURN_TRITONSERVER_ERROR_IF_ERROR
(
MakeDirectory
(
dir
,
true
/* recursive */
));
}
// write
RETURN_TRITONSERVER_ERROR_IF_ERROR
(
WriteBinaryFile
(
file_path
,
reinterpret_cast
<
const
char
*>
(
file
->
ValuePointer
()),
file
->
ValueByteSize
()));
}
}
if
(
!
found_config
)
{
return
TRITONSERVER_ErrorNew
(
TRITONSERVER_ERROR_INVALID_ARG
,
"Load parameter 'config' must be specified for model file "
"override"
);
}
// Commit the temporary directory
RETURN_TRITONSERVER_ERROR_IF_ERROR
(
agent_model
->
SetLocation
(
TRITONREPOAGENT_ARTIFACT_FILESYSTEM
,
temp_dir_cstr
));
break
;
}
default:
break
;
}
return
nullptr
;
// success
};
model_fini_fn_
=
[](
TRITONREPOAGENT_Agent
*
agent
,
TRITONREPOAGENT_AgentModel
*
model
)
->
TRITONSERVER_Error
*
{
auto
agent_model
=
reinterpret_cast
<
TritonRepoAgentModel
*>
(
model
);
RETURN_TRITONSERVER_ERROR_IF_ERROR
(
agent_model
->
DeleteMutableLocation
());
return
nullptr
;
// success
};
}
};
Status
CreateAgentModelListWithLoadAction
(
const
inference
::
ModelConfig
&
original_model_config
,
const
std
::
string
&
original_model_path
,
std
::
shared_ptr
<
TritonRepoAgentModelList
>*
agent_model_list
)
{
if
(
original_model_config
.
has_model_repository_agents
())
{
// Trick to append user specified repo agent on top of internal ones
std
::
shared_ptr
<
TritonRepoAgentModelList
>
lagent_model_list
;
if
(
*
agent_model_list
!=
nullptr
)
{
lagent_model_list
=
std
::
move
(
*
agent_model_list
);
}
else
{
lagent_model_list
.
reset
(
new
TritonRepoAgentModelList
());
}
FileSystemType
filesystem_type
;
RETURN_IF_ERROR
(
GetFileSystemType
(
original_model_path
,
&
filesystem_type
));
TRITONREPOAGENT_ArtifactType
artifact_type
=
TRITONREPOAGENT_ARTIFACT_FILESYSTEM
;
if
(
filesystem_type
!=
FileSystemType
::
LOCAL
)
{
artifact_type
=
TRITONREPOAGENT_ARTIFACT_REMOTE_FILESYSTEM
;
}
const
char
*
location
=
original_model_path
.
c_str
();
inference
::
ModelConfig
model_config
=
original_model_config
;
for
(
const
auto
&
agent_config
:
original_model_config
.
model_repository_agents
().
agents
())
{
std
::
shared_ptr
<
TritonRepoAgent
>
agent
;
RETURN_IF_ERROR
(
TritonRepoAgentManager
::
CreateAgent
(
agent_config
.
name
(),
&
agent
));
TritonRepoAgent
::
Parameters
agent_params
;
for
(
const
auto
&
parameter
:
agent_config
.
parameters
())
{
agent_params
.
emplace_back
(
parameter
.
first
,
parameter
.
second
);
}
std
::
unique_ptr
<
TritonRepoAgentModel
>
agent_model
;
if
(
lagent_model_list
->
Size
()
!=
0
)
{
lagent_model_list
->
Back
()
->
Location
(
&
artifact_type
,
&
location
);
const
auto
config_path
=
JoinPath
({
location
,
kModelConfigPbTxt
});
if
(
!
ReadTextProto
(
config_path
,
&
model_config
).
IsOk
())
{
model_config
.
Clear
();
}
}
RETURN_IF_ERROR
(
TritonRepoAgentModel
::
Create
(
artifact_type
,
location
,
model_config
,
agent
,
agent_params
,
&
agent_model
));
RETURN_IF_ERROR
(
agent_model
->
InvokeAgent
(
TRITONREPOAGENT_ACTION_LOAD
));
lagent_model_list
->
AddAgentModel
(
std
::
move
(
agent_model
));
}
*
agent_model_list
=
std
::
move
(
lagent_model_list
);
}
return
Status
::
Success
;
}
int64_t
GetModifiedTime
(
const
std
::
string
&
path
)
{
// If there is an error in any step the fall-back default
// modification time is 0. This means that in error cases 'path'
// will show as not modified. This is the safe fall-back to avoid
// assuming a model is constantly being modified.
bool
path_is_dir
;
Status
status
=
IsDirectory
(
path
,
&
path_is_dir
);
if
(
!
status
.
IsOk
())
{
LOG_ERROR
<<
"Failed to determine modification time for '"
<<
path
<<
"': "
<<
status
.
AsString
();
return
0
;
}
// If 'path' is a file return its mtime. Otherwise, using the modification
// time of the directory as baseline in case of file deletion
int64_t
mtime
=
0
;
status
=
FileModificationTime
(
path
,
&
mtime
);
if
(
!
status
.
IsOk
())
{
LOG_ERROR
<<
"Failed to determine modification time for '"
<<
path
<<
"': "
<<
status
.
AsString
();
return
0
;
}
if
(
!
path_is_dir
)
{
return
mtime
;
}
// 'path' is a directory. Return the most recent mtime of the
// contents of the directory.
std
::
set
<
std
::
string
>
contents
;
status
=
GetDirectoryContents
(
path
,
&
contents
);
if
(
!
status
.
IsOk
())
{
LOG_ERROR
<<
"Failed to determine modification time for '"
<<
path
<<
"': "
<<
status
.
AsString
();
return
0
;
}
for
(
const
auto
&
child
:
contents
)
{
const
auto
full_path
=
JoinPath
({
path
,
child
});
mtime
=
std
::
max
(
mtime
,
GetModifiedTime
(
full_path
));
}
return
mtime
;
}
// Return true if any file in the subdirectory root at 'path' has been
// modified more recently than 'last'. Return the most-recent modified
// time in 'last'.
bool
IsModified
(
const
std
::
string
&
path
,
int64_t
*
last_ns
)
{
const
int64_t
repo_ns
=
GetModifiedTime
(
path
);
bool
modified
=
repo_ns
>
*
last_ns
;
*
last_ns
=
repo_ns
;
return
modified
;
}
}
// namespace
struct
ModelRepositoryManager
::
ModelInfo
{
ModelInfo
(
const
int64_t
mtime_nsec
,
const
int64_t
prev_mtime_ns
,
const
std
::
string
&
model_path
)
:
mtime_nsec_
(
mtime_nsec
),
prev_mtime_ns_
(
prev_mtime_ns
),
explicitly_load_
(
true
),
model_path_
(
model_path
),
is_config_provided_
(
false
)
{
}
ModelInfo
()
:
mtime_nsec_
(
0
),
prev_mtime_ns_
(
0
),
explicitly_load_
(
true
),
is_config_provided_
(
false
)
{
}
int64_t
mtime_nsec_
;
int64_t
prev_mtime_ns_
;
bool
explicitly_load_
;
inference
::
ModelConfig
model_config_
;
std
::
string
model_path_
;
// Temporary location to hold agent model list before creating the model
// the ownership must transfer to ModelLifeCycle to ensure
// the agent model life cycle is handled properly.
std
::
shared_ptr
<
TritonRepoAgentModelList
>
agent_model_list_
;
bool
is_config_provided_
;
};
ModelRepositoryManager
::
ModelRepositoryManager
(
const
std
::
set
<
std
::
string
>&
repository_paths
,
const
bool
autofill
,
const
bool
polling_enabled
,
const
bool
model_control_enabled
,
const
double
min_compute_capability
,
std
::
unique_ptr
<
ModelLifeCycle
>
life_cycle
)
:
repository_paths_
(
repository_paths
),
autofill_
(
autofill
),
polling_enabled_
(
polling_enabled
),
model_control_enabled_
(
model_control_enabled
),
min_compute_capability_
(
min_compute_capability
),
model_life_cycle_
(
std
::
move
(
life_cycle
))
{
}
ModelRepositoryManager
::~
ModelRepositoryManager
()
{}
Status
ModelRepositoryManager
::
Create
(
InferenceServer
*
server
,
const
std
::
string
&
server_version
,
const
std
::
set
<
std
::
string
>&
repository_paths
,
const
std
::
set
<
std
::
string
>&
startup_models
,
const
bool
strict_model_config
,
const
bool
polling_enabled
,
const
bool
model_control_enabled
,
const
ModelLifeCycleOptions
&
life_cycle_options
,
std
::
unique_ptr
<
ModelRepositoryManager
>*
model_repository_manager
)
{
// The rest only matters if repository path is valid directory
for
(
const
auto
&
path
:
repository_paths
)
{
bool
path_is_dir
;
RETURN_IF_ERROR
(
IsDirectory
(
path
,
&
path_is_dir
));
if
(
!
path_is_dir
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"repository path is not a valid directory"
);
}
}
if
(
polling_enabled
&&
model_control_enabled
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"cannot enable both polling and explicit model control"
);
}
std
::
unique_ptr
<
ModelLifeCycle
>
life_cycle
;
RETURN_IF_ERROR
(
ModelLifeCycle
::
Create
(
server
,
life_cycle_options
,
&
life_cycle
));
// Not setting the smart pointer directly to simplify clean up
std
::
unique_ptr
<
ModelRepositoryManager
>
local_manager
(
new
ModelRepositoryManager
(
repository_paths
,
!
strict_model_config
,
polling_enabled
,
model_control_enabled
,
life_cycle_options
.
min_compute_capability_
,
std
::
move
(
life_cycle
)));
*
model_repository_manager
=
std
::
move
(
local_manager
);
// Support loading all models on startup in explicit model control mode with
// special startup_model name "*". This does not imply support for pattern
// matching in model names.
bool
load_all_models_on_startup
=
false
;
if
((
startup_models
.
find
(
"*"
)
!=
startup_models
.
end
())
&&
model_control_enabled
)
{
if
(
startup_models
.
size
()
>
1
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"Wildcard model name '*' must be the ONLY startup model "
"if specified at all."
);
}
load_all_models_on_startup
=
true
;
}
bool
all_models_polled
=
true
;
if
(
!
model_control_enabled
||
load_all_models_on_startup
)
{
// only error happens before model load / unload will be return
// model loading / unloading error will be printed but ignored
RETURN_IF_ERROR
(
(
*
model_repository_manager
)
->
PollAndUpdateInternal
(
&
all_models_polled
));
}
else
{
// Load each specified startup_model
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
const
InferenceParameter
*>>
models
;
for
(
const
auto
&
model_name
:
startup_models
)
{
models
[
model_name
];
}
RETURN_IF_ERROR
(
(
*
model_repository_manager
)
->
LoadUnloadModels
(
models
,
ActionType
::
LOAD
,
false
,
&
all_models_polled
));
}
if
(
!
all_models_polled
)
{
return
Status
(
Status
::
Code
::
INTERNAL
,
"failed to load all models"
);
}
// Some models may failed to be loaded after model manager is created,
// return proper error and let function caller decide whether to proceed.
for
(
const
auto
&
model
:
(
*
model_repository_manager
)
->
infos_
)
{
const
auto
version_states
=
(
*
model_repository_manager
)
->
model_life_cycle_
->
VersionStates
(
model
.
first
);
// Return general error message, detail of each model's loading state
// is logged separately.
if
(
version_states
.
empty
())
{
return
Status
(
Status
::
Code
::
INTERNAL
,
"failed to load all models"
);
}
for
(
const
auto
&
state
:
version_states
)
{
if
(
state
.
second
.
first
!=
ModelReadyState
::
READY
)
{
return
Status
(
Status
::
Code
::
INTERNAL
,
"failed to load all models"
);
}
}
}
return
Status
::
Success
;
}
Status
ModelRepositoryManager
::
PollAndUpdate
()
{
if
(
!
polling_enabled_
)
{
return
Status
(
Status
::
Code
::
UNAVAILABLE
,
"polling is disabled"
);
}
bool
all_models_polled
;
return
PollAndUpdateInternal
(
&
all_models_polled
);
}
Status
ModelRepositoryManager
::
PollAndUpdateInternal
(
bool
*
all_models_polled
)
{
// Serialize all operations that change model state
std
::
lock_guard
<
std
::
mutex
>
lock
(
poll_mu_
);
std
::
set
<
std
::
string
>
added
,
deleted
,
modified
,
unmodified
;
// We don't modify 'infos_' in place to minimize how long we need to
// hold the lock and also prevent any partial changes to do an error
// during processing.
ModelInfoMap
new_infos
;
// Each subdirectory of repository path is a model directory from
// which we read the model configuration.
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
const
InferenceParameter
*>>
subdirs
;
RETURN_IF_ERROR
(
Poll
(
subdirs
,
&
added
,
&
deleted
,
&
modified
,
&
unmodified
,
&
new_infos
,
all_models_polled
));
// Anything in 'infos_' that is not in "added", "modified", or
// "unmodified" is deleted.
for
(
const
auto
&
pr
:
infos_
)
{
if
((
added
.
find
(
pr
.
first
)
==
added
.
end
())
&&
(
modified
.
find
(
pr
.
first
)
==
modified
.
end
())
&&
(
unmodified
.
find
(
pr
.
first
)
==
unmodified
.
end
()))
{
deleted
.
insert
(
pr
.
first
);
}
}
// Nothing to do if no model adds, deletes or modifies.
if
(
added
.
empty
()
&&
deleted
.
empty
()
&&
modified
.
empty
())
{
return
Status
::
Success
;
}
infos_
.
swap
(
new_infos
);
UpdateDependencyGraph
(
added
,
deleted
,
modified
);
for
(
const
auto
&
name
:
deleted
)
{
model_life_cycle_
->
AsyncUnload
(
name
);
}
// model loading / unloading error will be printed but ignored
LoadModelByDependency
();
return
Status
::
Success
;
}
std
::
map
<
std
::
string
,
Status
>
ModelRepositoryManager
::
LoadModelByDependency
()
{
std
::
map
<
std
::
string
,
Status
>
res
;
struct
ModelState
{
ModelState
(
DependencyNode
*
node
)
:
node_
(
node
),
status_
(
Status
::
Success
)
{}
DependencyNode
*
node_
;
Status
status_
;
std
::
promise
<
void
>
ready_
;
};
NodeSet
loaded_models
;
auto
set_pair
=
ModelsToLoadUnload
(
loaded_models
);
// Loop until all model are loaded / unloaded
while
((
!
set_pair
.
first
.
empty
())
||
(
!
set_pair
.
second
.
empty
()))
{
loaded_models
.
clear
();
// Unload invalid models first
for
(
auto
&
invalid_model
:
set_pair
.
second
)
{
model_life_cycle_
->
AsyncUnload
(
invalid_model
->
model_name_
);
LOG_ERROR
<<
invalid_model
->
status_
.
AsString
();
invalid_model
->
loaded_versions_
=
std
::
set
<
int64_t
>
();
loaded_models
.
emplace
(
invalid_model
);
}
// load valid models and wait for load results
std
::
vector
<
std
::
unique_ptr
<
ModelState
>>
model_states
;
for
(
auto
&
valid_model
:
set_pair
.
first
)
{
model_states
.
emplace_back
(
new
ModelState
(
valid_model
));
auto
model_state
=
model_states
.
back
().
get
();
const
auto
itr
=
infos_
.
find
(
valid_model
->
model_name_
);
auto
status
=
model_life_cycle_
->
AsyncLoad
(
valid_model
->
model_name_
,
itr
->
second
->
model_path_
,
valid_model
->
model_config_
,
itr
->
second
->
is_config_provided_
,
itr
->
second
->
agent_model_list_
,
[
model_state
](
Status
load_status
)
{
model_state
->
status_
=
load_status
;
model_state
->
ready_
.
set_value
();
});
if
(
!
status
.
IsOk
())
{
model_state
->
status_
=
status
;
model_state
->
ready_
.
set_value
();
LOG_ERROR
<<
"failed to load model '"
<<
valid_model
->
model_name_
<<
"': "
<<
status
.
Message
();
}
loaded_models
.
emplace
(
valid_model
);
}
for
(
auto
&
model_state
:
model_states
)
{
model_state
->
ready_
.
get_future
().
wait
();
res
[
model_state
->
node_
->
model_name_
]
=
model_state
->
status_
;
const
auto
version_state
=
model_life_cycle_
->
VersionStates
(
model_state
->
node_
->
model_name_
);
model_state
->
node_
->
loaded_versions_
.
clear
();
for
(
const
auto
&
vs
:
version_state
)
{
if
(
vs
.
second
.
first
==
ModelReadyState
::
READY
)
{
model_state
->
node_
->
loaded_versions_
.
emplace
(
vs
.
first
);
}
}
// If the model failed to load, should revert the timestamp to
// ensure the next load request will attempt to load the model again
// for operation consistency.
if
(
!
model_state
->
status_
.
IsOk
())
{
auto
&
model_info
=
infos_
.
find
(
model_state
->
node_
->
model_name_
)
->
second
;
model_info
->
mtime_nsec_
=
model_info
->
prev_mtime_ns_
;
}
}
set_pair
=
ModelsToLoadUnload
(
loaded_models
);
}
// Clear temporary stored agent model list after all loads are triggerred
for
(
auto
&
info
:
infos_
)
{
info
.
second
->
agent_model_list_
.
reset
();
}
return
res
;
}
Status
ModelRepositoryManager
::
LoadUnloadModel
(
const
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
const
InferenceParameter
*>>&
models
,
const
ActionType
type
,
const
bool
unload_dependents
)
{
if
(
!
model_control_enabled_
)
{
return
Status
(
Status
::
Code
::
UNAVAILABLE
,
"explicit model load / unload is not allowed if polling is enabled"
);
}
if
(
models
.
size
()
>
1
)
{
return
Status
(
Status
::
Code
::
UNSUPPORTED
,
"explicit load / unload multiple models is not currently supported"
);
}
// Serialize all operations that change model state
std
::
lock_guard
<
std
::
mutex
>
lock
(
poll_mu_
);
bool
polled
=
true
;
RETURN_IF_ERROR
(
LoadUnloadModels
(
models
,
type
,
unload_dependents
,
&
polled
));
// Check if model is loaded / unloaded properly
const
auto
&
model_name
=
models
.
begin
()
->
first
;
if
(
!
polled
)
{
return
Status
(
Status
::
Code
::
INTERNAL
,
"failed to load '"
+
model_name
+
"', failed to poll from model repository"
);
}
const
auto
version_states
=
model_life_cycle_
->
VersionStates
(
model_name
);
if
(
type
==
ActionType
::
LOAD
)
{
if
(
version_states
.
empty
())
{
return
Status
(
Status
::
Code
::
INTERNAL
,
"failed to load '"
+
model_name
+
"', no version is available"
);
}
auto
it
=
infos_
.
find
(
model_name
);
if
(
it
==
infos_
.
end
())
{
return
Status
(
Status
::
Code
::
INTERNAL
,
"failed to load '"
+
model_name
+
"', failed to poll from model repository"
);
}
}
else
{
std
::
string
ready_version_str
;
for
(
const
auto
&
version_state
:
version_states
)
{
if
(
version_state
.
second
.
first
==
ModelReadyState
::
READY
)
{
ready_version_str
+=
std
::
to_string
(
version_state
.
first
);
ready_version_str
+=
","
;
}
}
if
(
!
ready_version_str
.
empty
())
{
ready_version_str
.
pop_back
();
return
Status
(
Status
::
Code
::
INTERNAL
,
"failed to unload '"
+
model_name
+
"', versions that are still available: "
+
ready_version_str
);
}
}
return
Status
::
Success
;
}
Status
ModelRepositoryManager
::
LoadUnloadModels
(
const
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
const
InferenceParameter
*>>&
models
,
const
ActionType
type
,
const
bool
unload_dependents
,
bool
*
all_models_polled
)
{
auto
status
=
Status
::
Success
;
*
all_models_polled
=
true
;
// Update ModelInfo related to file system accordingly
std
::
set
<
std
::
string
>
added
,
deleted
,
modified
,
unmodified
;
{
if
(
type
==
ActionType
::
UNLOAD
)
{
for
(
const
auto
&
model
:
models
)
{
deleted
.
insert
(
model
.
first
);
}
}
// ActionType::LOAD and in model control mode
else
{
std
::
set
<
std
::
string
>
checked_models
;
auto
current_models
=
models
;
for
(
const
auto
&
model
:
models
)
{
checked_models
.
emplace
(
model
.
first
);
}
ModelInfoMap
new_infos
;
#ifdef TRITON_ENABLE_ENSEMBLE
bool
first_iteration
=
true
;
#endif // TRITON_ENABLE_ENSEMBLE
while
(
!
current_models
.
empty
())
{
bool
polled
=
true
;
RETURN_IF_ERROR
(
Poll
(
current_models
,
&
added
,
&
deleted
,
&
modified
,
&
unmodified
,
&
new_infos
,
&
polled
));
*
all_models_polled
&=
polled
;
// More models should be polled if the polled models are ensembles
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
const
InferenceParameter
*>>
next_models
;
#ifdef TRITON_ENABLE_ENSEMBLE
for
(
const
auto
&
model
:
current_models
)
{
auto
it
=
new_infos
.
find
(
model
.
first
);
// Some models may be marked as deleted and not in 'new_infos'
if
(
it
!=
new_infos
.
end
())
{
it
->
second
->
explicitly_load_
=
first_iteration
;
const
auto
&
config
=
it
->
second
->
model_config_
;
if
(
config
.
has_ensemble_scheduling
())
{
for
(
const
auto
&
step
:
config
.
ensemble_scheduling
().
step
())
{
bool
need_poll
=
checked_models
.
emplace
(
step
.
model_name
()).
second
;
if
(
need_poll
)
{
next_models
[
step
.
model_name
()];
}
}
}
}
}
first_iteration
=
false
;
#endif // TRITON_ENABLE_ENSEMBLE
current_models
.
swap
(
next_models
);
}
// Only update the infos when all validation is completed
for
(
const
auto
&
model_name
:
added
)
{
auto
nitr
=
new_infos
.
find
(
model_name
);
infos_
.
emplace
(
model_name
,
std
::
move
(
nitr
->
second
));
}
for
(
const
auto
&
model_name
:
modified
)
{
auto
nitr
=
new_infos
.
find
(
model_name
);
auto
itr
=
infos_
.
find
(
model_name
);
itr
->
second
=
std
::
move
(
nitr
->
second
);
}
}
}
std
::
set
<
std
::
string
>
deleted_dependents
;
// Update dependency graph and load
UpdateDependencyGraph
(
added
,
deleted
,
modified
,
unload_dependents
?
&
deleted_dependents
:
nullptr
);
// The models are in 'deleted' either when they are asked to be unloaded or
// they are not found / are duplicated across all model repositories.
// In all cases, should unload them and remove from 'infos_' explicitly.
for
(
const
auto
&
name
:
(
unload_dependents
?
deleted_dependents
:
deleted
))
{
infos_
.
erase
(
name
);
model_life_cycle_
->
AsyncUnload
(
name
);
}
// load / unload the models affected, and check the load status of
// the requested models
const
auto
&
load_status
=
LoadModelByDependency
();
if
(
status
.
IsOk
()
&&
(
type
==
ActionType
::
LOAD
))
{
std
::
string
load_error_message
=
""
;
for
(
const
auto
&
model
:
models
)
{
auto
it
=
load_status
.
find
(
model
.
first
);
// If 'model.first' not in load status, it means the (re-)load is not
// necessary because there is no change in the model's directory
if
((
it
!=
load_status
.
end
())
&&
!
it
->
second
.
IsOk
())
{
load_error_message
+=
(
"load failed for model '"
+
model
.
first
+
"': "
+
it
->
second
.
Message
()
+
"
\n
"
);
}
}
if
(
!
load_error_message
.
empty
())
{
status
=
Status
(
Status
::
Code
::
INVALID_ARG
,
load_error_message
);
}
}
return
status
;
}
Status
ModelRepositoryManager
::
UnloadAllModels
()
{
Status
status
;
for
(
const
auto
&
name_info
:
infos_
)
{
Status
unload_status
=
model_life_cycle_
->
AsyncUnload
(
name_info
.
first
);
if
(
!
unload_status
.
IsOk
())
{
status
=
Status
(
unload_status
.
ErrorCode
(),
"Failed to gracefully unload models: "
+
unload_status
.
Message
());
}
}
return
Status
::
Success
;
}
Status
ModelRepositoryManager
::
StopAllModels
()
{
return
model_life_cycle_
->
StopAllModels
();
}
const
std
::
set
<
std
::
tuple
<
std
::
string
,
int64_t
,
size_t
>>
ModelRepositoryManager
::
InflightStatus
()
{
return
model_life_cycle_
->
InflightStatus
();
}
const
ModelStateMap
ModelRepositoryManager
::
LiveModelStates
(
bool
strict_readiness
)
{
return
model_life_cycle_
->
LiveModelStates
(
strict_readiness
);
}
const
ModelStateMap
ModelRepositoryManager
::
ModelStates
()
{
return
model_life_cycle_
->
ModelStates
();
}
const
VersionStateMap
ModelRepositoryManager
::
VersionStates
(
const
std
::
string
&
model_name
)
{
return
model_life_cycle_
->
VersionStates
(
model_name
);
}
Status
ModelRepositoryManager
::
ModelState
(
const
std
::
string
&
model_name
,
const
int64_t
model_version
,
ModelReadyState
*
state
)
{
return
model_life_cycle_
->
ModelState
(
model_name
,
model_version
,
state
);
}
Status
ModelRepositoryManager
::
RepositoryIndex
(
const
bool
ready_only
,
std
::
vector
<
ModelIndex
>*
index
)
{
std
::
set
<
std
::
string
>
seen_models
;
std
::
set
<
std
::
string
>
duplicate_models
;
for
(
const
auto
&
repository_path
:
repository_paths_
)
{
// For any mapped models in this repository, save the mapping
// from their subdirectory name to model name.
std
::
map
<
std
::
string
,
std
::
string
>
models_in_repo
;
for
(
const
auto
&
mapping_it
:
model_mappings_
)
{
if
(
mapping_it
.
second
.
first
==
repository_path
)
{
models_in_repo
.
emplace
(
BaseName
(
mapping_it
.
second
.
second
),
mapping_it
.
first
);
}
}
std
::
set
<
std
::
string
>
subdirs
;
RETURN_IF_ERROR
(
GetDirectorySubdirs
(
repository_path
,
&
subdirs
));
for
(
const
auto
&
subdir
:
subdirs
)
{
auto
model
=
subdir
;
auto
model_it
=
models_in_repo
.
find
(
subdir
);
if
(
model_it
!=
models_in_repo
.
end
())
{
model
=
model_it
->
second
;
}
if
(
seen_models
.
find
(
model
)
!=
seen_models
.
end
())
{
duplicate_models
.
insert
(
model
);
}
seen_models
.
insert
(
model
);
}
}
ModelStateMap
states
=
ModelStates
();
for
(
const
auto
&
model
:
seen_models
)
{
// If the same model appears in multiple repostories then show it
// as unavailable since duplicate models are not allowed to load.
if
(
duplicate_models
.
find
(
model
)
!=
duplicate_models
.
end
())
{
index
->
emplace_back
(
model
,
-
1
/* version */
,
ModelReadyState
::
UNAVAILABLE
,
MODEL_READY_REASON_DUPLICATE
);
continue
;
}
// If there is any version/state/reason associated with the model
// then include that in the index.
auto
sitr
=
states
.
find
(
model
);
if
(
sitr
==
states
.
end
())
{
if
(
!
ready_only
)
{
index
->
emplace_back
(
model
);
}
}
else
{
for
(
const
auto
&
pr
:
sitr
->
second
)
{
if
(
!
ready_only
||
(
pr
.
second
.
first
==
ModelReadyState
::
READY
))
{
index
->
emplace_back
(
model
,
pr
.
first
,
pr
.
second
.
first
,
pr
.
second
.
second
);
}
}
}
}
return
Status
::
Success
;
}
Status
ModelRepositoryManager
::
GetModel
(
const
std
::
string
&
model_name
,
const
int64_t
model_version
,
std
::
shared_ptr
<
Model
>*
model
)
{
Status
status
=
model_life_cycle_
->
GetModel
(
model_name
,
model_version
,
model
);
if
(
!
status
.
IsOk
())
{
model
->
reset
();
status
=
Status
(
status
.
ErrorCode
(),
"Request for unknown model: "
+
status
.
Message
());
}
return
status
;
}
Status
ModelRepositoryManager
::
Poll
(
const
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
const
InferenceParameter
*>>&
models
,
std
::
set
<
std
::
string
>*
added
,
std
::
set
<
std
::
string
>*
deleted
,
std
::
set
<
std
::
string
>*
modified
,
std
::
set
<
std
::
string
>*
unmodified
,
ModelInfoMap
*
updated_infos
,
bool
*
all_models_polled
)
{
*
all_models_polled
=
true
;
// empty path is the special case to indicate the model should be loaded
// from override file content in 'models'.
std
::
map
<
std
::
string
,
std
::
string
>
model_to_path
;
// If no model is specified, poll all models in all model repositories.
// Otherwise, only poll the specified models
if
(
models
.
empty
())
{
std
::
set
<
std
::
string
>
duplicated_models
;
for
(
const
auto
&
repository_path
:
repository_paths_
)
{
std
::
set
<
std
::
string
>
subdirs
;
Status
status
=
GetDirectorySubdirs
(
repository_path
,
&
subdirs
);
if
(
!
status
.
IsOk
())
{
LOG_ERROR
<<
"failed to poll model repository '"
<<
repository_path
<<
"': "
<<
status
.
Message
();
*
all_models_polled
=
false
;
}
else
{
for
(
const
auto
&
subdir
:
subdirs
)
{
if
(
!
model_to_path
.
emplace
(
subdir
,
JoinPath
({
repository_path
,
subdir
}))
.
second
)
{
duplicated_models
.
insert
(
subdir
);
*
all_models_polled
=
false
;
}
}
}
}
// If the model is not unique, mark as deleted to unload it
for
(
const
auto
&
model
:
duplicated_models
)
{
model_to_path
.
erase
(
model
);
deleted
->
insert
(
model
);
LOG_ERROR
<<
"failed to poll model '"
<<
model
<<
"': not unique across all model repositories"
;
}
}
// If models are specified, this is explicit model control mode.
else
{
for
(
const
auto
&
model
:
models
)
{
// Skip repository polling if override model files
if
(
ModelDirectoryOverride
(
model
.
second
))
{
model_to_path
.
emplace
(
model
.
first
,
""
);
continue
;
}
// Check model mapping first to see if matching model to load.
bool
exists
=
false
;
auto
model_it
=
model_mappings_
.
find
(
model
.
first
);
if
(
model_it
!=
model_mappings_
.
end
())
{
bool
exists_in_this_repo
=
false
;
auto
full_path
=
model_it
->
second
.
second
;
Status
status
=
FileExists
(
full_path
,
&
exists_in_this_repo
);
if
(
!
status
.
IsOk
())
{
LOG_ERROR
<<
"failed to poll mapped path '"
<<
full_path
<<
"' for model '"
<<
model
.
first
<<
"': "
<<
status
.
Message
();
*
all_models_polled
=
false
;
}
if
(
exists_in_this_repo
)
{
model_to_path
.
emplace
(
model
.
first
,
model_it
->
second
.
second
);
exists
=
true
;
}
else
{
LOG_ERROR
<<
"mapped path '"
<<
full_path
<<
"' does not exist for model '"
<<
model
.
first
<<
"'"
;
exists
=
false
;
}
}
else
{
for
(
const
auto
repository_path
:
repository_paths_
)
{
bool
exists_in_this_repo
=
false
;
const
auto
full_path
=
JoinPath
({
repository_path
,
model
.
first
});
Status
status
=
FileExists
(
full_path
,
&
exists_in_this_repo
);
if
(
!
status
.
IsOk
())
{
LOG_ERROR
<<
"failed to poll model repository '"
<<
repository_path
<<
"' for model '"
<<
model
.
first
<<
"': "
<<
status
.
Message
();
*
all_models_polled
=
false
;
}
else
if
(
exists_in_this_repo
)
{
// Check to make sure this directory is not mapped.
// If mapped, continue to next repository path.
bool
mapped
=
false
;
for
(
auto
const
&
mapping
:
model_mappings_
)
{
if
(
mapping
.
second
.
second
==
full_path
)
{
mapped
=
true
;
break
;
}
}
if
(
mapped
)
{
continue
;
}
auto
res
=
model_to_path
.
emplace
(
model
.
first
,
JoinPath
({
repository_path
,
model
.
first
}));
if
(
res
.
second
)
{
exists
=
true
;
}
else
{
exists
=
false
;
model_to_path
.
erase
(
res
.
first
);
LOG_ERROR
<<
"failed to poll model '"
<<
model
.
first
<<
"': not unique across all model repositories"
;
break
;
}
}
}
}
// For an explicitly specified model that doesn't exist, we don't mark it
// as deleted, we simply mark that we couldn't poll all models.
if
(
!
exists
)
{
*
all_models_polled
=
false
;
}
}
}
// Poll each of the models. If error happens during polling the model,
// its state will fallback to the state before the polling.
for
(
const
auto
&
pair
:
model_to_path
)
{
std
::
unique_ptr
<
ModelInfo
>
model_info
;
const
auto
&
mit
=
models
.
find
(
pair
.
first
);
static
std
::
vector
<
const
InferenceParameter
*>
empty_params
;
auto
status
=
InitializeModelInfo
(
pair
.
first
,
pair
.
second
,
((
mit
==
models
.
end
())
?
empty_params
:
mit
->
second
),
&
model_info
);
const
auto
&
iitr
=
infos_
.
find
(
pair
.
first
);
const
bool
invalid_add
=
(
!
status
.
IsOk
())
&&
(
iitr
==
infos_
.
end
());
if
(
!
invalid_add
)
{
const
auto
&
ret
=
updated_infos
->
emplace
(
pair
.
first
,
nullptr
);
if
(
!
ret
.
second
)
{
return
Status
(
Status
::
Code
::
ALREADY_EXISTS
,
"unexpected model info for model '"
+
pair
.
first
+
"'"
);
}
// Classify load state and set updated info
if
(
model_info
==
nullptr
)
{
ret
.
first
->
second
.
reset
(
new
ModelInfo
(
*
iitr
->
second
));
unmodified
->
insert
(
pair
.
first
);
}
else
{
ret
.
first
->
second
=
std
::
move
(
model_info
);
if
(
iitr
!=
infos_
.
end
())
{
modified
->
insert
(
pair
.
first
);
}
else
{
added
->
insert
(
pair
.
first
);
}
}
}
if
(
!
status
.
IsOk
())
{
LOG_ERROR
<<
"Poll failed for model directory '"
<<
pair
.
first
<<
"': "
<<
status
.
Message
();
*
all_models_polled
=
false
;
}
}
return
Status
::
Success
;
}
bool
ModelRepositoryManager
::
ModelDirectoryOverride
(
const
std
::
vector
<
const
InferenceParameter
*>&
model_params
)
{
for
(
const
auto
&
param
:
model_params
)
{
if
(
param
->
Name
().
rfind
(
file_prefix
,
0
)
==
0
)
{
// param name starts with prefix if user provides override file
return
true
;
}
}
return
false
;
}
Status
ModelRepositoryManager
::
InitializeModelInfo
(
const
std
::
string
&
name
,
const
std
::
string
&
path
,
const
std
::
vector
<
const
InferenceParameter
*>&
params
,
std
::
unique_ptr
<
ModelInfo
>*
info
)
{
std
::
unique_ptr
<
ModelInfo
>
linfo
(
new
ModelInfo
());
linfo
->
model_path_
=
path
;
bool
unmodified
=
false
;
const
auto
iitr
=
infos_
.
find
(
name
);
// Set 'prev_mtime_ns_' if there is existing ModelInfo
if
(
iitr
!=
infos_
.
end
())
{
linfo
->
prev_mtime_ns_
=
iitr
->
second
->
mtime_nsec_
;
}
else
{
linfo
->
prev_mtime_ns_
=
0
;
}
// Set 'mtime_nsec_' and override 'model_path_' if current path is empty
// (file override is specified)
if
(
linfo
->
model_path_
.
empty
())
{
// Need to localize the override files, use repo agent to manage
// the lifecycle of the localized files
std
::
shared_ptr
<
TritonRepoAgent
>
localize_agent
(
new
LocalizeRepoAgent
());
std
::
unique_ptr
<
TritonRepoAgentModel
>
localize_agent_model
;
RETURN_IF_ERROR
(
TritonRepoAgentModel
::
Create
(
TRITONREPOAGENT_ARTIFACT_FILESYSTEM
,
""
,
inference
::
ModelConfig
(),
localize_agent
,
{},
&
localize_agent_model
));
// Set agent model state so the repo agent can access the encoded files
// Using const_cast here but we are safe as the RepoAgent will not
// modify the state
localize_agent_model
->
SetState
(
const_cast
<
void
*>
(
reinterpret_cast
<
const
void
*>
(
&
params
)));
RETURN_IF_ERROR
(
localize_agent_model
->
InvokeAgent
(
TRITONREPOAGENT_ACTION_LOAD
));
const
char
*
location
;
TRITONREPOAGENT_ArtifactType
type
;
RETURN_IF_ERROR
(
localize_agent_model
->
Location
(
&
type
,
&
location
));
// For file override, set 'mtime_nsec_' to minimum value so that
// the next load without override will trigger re-load to undo
// the override while the local files may still be unchanged.
linfo
->
mtime_nsec_
=
0
;
linfo
->
model_path_
=
location
;
linfo
->
agent_model_list_
.
reset
(
new
TritonRepoAgentModelList
());
linfo
->
agent_model_list_
->
AddAgentModel
(
std
::
move
(
localize_agent_model
));
}
else
{
if
(
iitr
==
infos_
.
end
())
{
linfo
->
mtime_nsec_
=
GetModifiedTime
(
std
::
string
(
linfo
->
model_path_
));
}
else
{
// Check the current timestamps to determine if model actually has been
// modified
linfo
->
mtime_nsec_
=
linfo
->
prev_mtime_ns_
;
unmodified
=
!
IsModified
(
std
::
string
(
linfo
->
model_path_
),
&
linfo
->
mtime_nsec_
);
}
}
// Set 'model_config_'
bool
parsed_config
=
false
;
// Check if there is config override
for
(
const
auto
&
override_parameter
:
params
)
{
if
((
override_parameter
->
Name
()
==
"config"
)
&&
(
override_parameter
->
Type
()
==
TRITONSERVER_PARAMETER_STRING
))
{
// When override happens, set 'mtime_nsec_' to minimum value so that
// the next load without override will trigger re-load to undo
// the override while the local files may still be unchanged.
linfo
->
mtime_nsec_
=
0
;
unmodified
=
false
;
const
std
::
string
&
override_config
=
override_parameter
->
ValueString
();
auto
err
=
JsonToModelConfig
(
override_config
,
1
/* config_version */
,
&
linfo
->
model_config_
);
if
(
!
err
.
IsOk
())
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"Invalid config override: "
+
std
::
string
(
err
.
Message
()));
}
parsed_config
=
true
;
break
;
}
else
if
(
override_parameter
->
Name
().
rfind
(
file_prefix
,
0
)
!=
0
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"Unrecognized load parameter '"
+
override_parameter
->
Name
()
+
"' with type '"
+
TRITONSERVER_ParameterTypeString
(
override_parameter
->
Type
())
+
"'"
);
}
}
// Polling model is considered unmodified by this point and can be returned
// with info == nullptr
if
(
unmodified
)
{
return
Status
::
Success
;
}
// Create the associated repo agent models when a model is to be loaded,
// this must be done before normalizing model config as agents might
// redirect to use the model config at a different location
if
(
!
parsed_config
)
{
const
auto
config_path
=
JoinPath
({
linfo
->
model_path_
,
kModelConfigPbTxt
});
bool
model_config_exists
=
false
;
RETURN_IF_ERROR
(
FileExists
(
config_path
,
&
model_config_exists
));
// model config can be missing if auto fill is set
if
(
autofill_
&&
!
model_config_exists
)
{
linfo
->
model_config_
.
Clear
();
}
else
{
RETURN_IF_ERROR
(
ReadTextProto
(
config_path
,
&
linfo
->
model_config_
));
parsed_config
=
true
;
}
}
if
(
parsed_config
)
{
RETURN_IF_ERROR
(
CreateAgentModelListWithLoadAction
(
linfo
->
model_config_
,
linfo
->
model_path_
,
&
linfo
->
agent_model_list_
));
if
(
linfo
->
agent_model_list_
!=
nullptr
)
{
// Get the latest repository path
const
char
*
location
;
TRITONREPOAGENT_ArtifactType
artifact_type
;
RETURN_IF_ERROR
(
linfo
->
agent_model_list_
->
Back
()
->
Location
(
&
artifact_type
,
&
location
));
auto
latest_path
=
std
::
string
(
location
);
linfo
->
model_path_
=
latest_path
;
}
}
linfo
->
is_config_provided_
=
parsed_config
;
// Try to automatically generate missing parts of the model
// configuration (autofill) that don't require model detail
RETURN_IF_ERROR
(
GetNormalizedModelConfig
(
name
,
linfo
->
model_path_
,
min_compute_capability_
,
&
linfo
->
model_config_
));
// Note that the model inputs and outputs are not validated until
// the model model is intialized as they may not be auto-completed
// until model is intialized.
RETURN_IF_ERROR
(
ValidateModelConfig
(
linfo
->
model_config_
,
min_compute_capability_
));
if
(
!
autofill_
)
{
RETURN_IF_ERROR
(
ValidateModelIOConfig
(
linfo
->
model_config_
));
}
// If the model is mapped, update its config name based on the
// mapping.
if
(
model_mappings_
.
find
(
name
)
!=
model_mappings_
.
end
())
{
linfo
->
model_config_
.
set_name
(
name
);
}
else
{
// If there is no model mapping, make sure the name of the model
// matches the name of the directory. This is a somewhat arbitrary
// requirement but seems like good practice to require it of the user.
// It also acts as a check to make sure we don't have two different
// models with the same name.
if
(
linfo
->
model_config_
.
name
()
!=
name
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"unexpected directory name '"
+
name
+
"' for model '"
+
linfo
->
model_config_
.
name
()
+
"', directory name must equal model name"
);
}
}
*
info
=
std
::
move
(
linfo
);
return
Status
::
Success
;
}
Status
ModelRepositoryManager
::
UpdateDependencyGraph
(
const
std
::
set
<
std
::
string
>&
added
,
const
std
::
set
<
std
::
string
>&
deleted
,
const
std
::
set
<
std
::
string
>&
modified
,
std
::
set
<
std
::
string
>*
deleted_dependents
)
{
// update dependency graph, if the state of a node is changed, all its
// downstreams will be affected
// deleted, drop from dependency_graph, add to missing_nodes if downstreams is
// not empty affected_nodes are all ensembles as only ensembles are depending
// on other models
std
::
set
<
DependencyNode
*>
affected_nodes
;
std
::
set
<
DependencyNode
*>
updated_nodes
;
std
::
set
<
std
::
string
>
current_deleted
=
deleted
;
while
(
!
current_deleted
.
empty
())
{
std
::
set
<
std
::
string
>
next_deleted
;
for
(
const
auto
&
model_name
:
current_deleted
)
{
auto
it
=
dependency_graph_
.
find
(
model_name
);
if
(
it
!=
dependency_graph_
.
end
())
{
// remove this node from its upstreams
for
(
auto
&
upstream
:
it
->
second
->
upstreams_
)
{
upstream
.
first
->
downstreams_
.
erase
(
it
->
second
.
get
());
// Check if the upstream should be removed as well
if
((
deleted_dependents
!=
nullptr
)
&&
(
upstream
.
first
->
downstreams_
.
empty
())
&&
(
!
upstream
.
first
->
explicitly_load_
))
{
next_deleted
.
emplace
(
upstream
.
first
->
model_name_
);
}
}
it
->
second
->
upstreams_
.
clear
();
if
(
!
it
->
second
->
downstreams_
.
empty
())
{
UncheckDownstream
(
&
it
->
second
->
downstreams_
,
&
affected_nodes
);
// mark this node as missing upstream in its downstreams
for
(
auto
&
downstream
:
it
->
second
->
downstreams_
)
{
downstream
->
missing_upstreams_
.
emplace
(
it
->
second
.
get
());
}
missing_nodes_
.
emplace
(
std
::
make_pair
(
model_name
,
std
::
move
(
it
->
second
)));
}
// Make sure deleted node will not be in affected nodes
affected_nodes
.
erase
(
it
->
second
.
get
());
dependency_graph_
.
erase
(
it
);
}
if
(
deleted_dependents
!=
nullptr
)
{
deleted_dependents
->
emplace
(
model_name
);
}
}
current_deleted
.
swap
(
next_deleted
);
}
// modified, invalidate (uncheck) all downstreams
for
(
const
auto
&
model_name
:
modified
)
{
auto
it
=
dependency_graph_
.
find
(
model_name
);
if
(
it
!=
dependency_graph_
.
end
())
{
UncheckDownstream
(
&
it
->
second
->
downstreams_
,
&
affected_nodes
);
ModelInfo
*
info
=
nullptr
;
GetModelInfo
(
model_name
,
&
info
);
it
->
second
->
model_config_
=
info
->
model_config_
;
it
->
second
->
explicitly_load_
=
info
->
explicitly_load_
;
// remove this node from its upstream node
for
(
auto
&
upstream
:
it
->
second
->
upstreams_
)
{
upstream
.
first
->
downstreams_
.
erase
(
it
->
second
.
get
());
}
it
->
second
->
upstreams_
.
clear
();
it
->
second
->
checked_
=
false
;
it
->
second
->
status_
=
Status
::
Success
;
updated_nodes
.
emplace
(
it
->
second
.
get
());
}
}
// added, add to dependency_graph, if in missing_node, invalidate (uncheck)
// and associate all downstreams, remove from missing_node
for
(
const
auto
&
model_name
:
added
)
{
std
::
unique_ptr
<
DependencyNode
>
added_node
;
auto
it
=
missing_nodes_
.
find
(
model_name
);
if
(
it
!=
missing_nodes_
.
end
())
{
UncheckDownstream
(
&
it
->
second
->
downstreams_
,
&
affected_nodes
);
// remove this node from missing upstream node in its downstream nodes
for
(
auto
&
downstream
:
it
->
second
->
downstreams_
)
{
downstream
->
missing_upstreams_
.
erase
(
it
->
second
.
get
());
}
it
->
second
->
checked_
=
false
;
added_node
=
std
::
move
(
it
->
second
);
missing_nodes_
.
erase
(
it
);
}
else
{
// Right now, nothing is going to be filled until validation
added_node
.
reset
(
new
DependencyNode
(
model_name
));
}
ModelInfo
*
info
=
nullptr
;
GetModelInfo
(
model_name
,
&
info
);
added_node
->
model_config_
=
info
->
model_config_
;
added_node
->
explicitly_load_
=
info
->
explicitly_load_
;
updated_nodes
.
emplace
(
added_node
.
get
());
dependency_graph_
.
emplace
(
std
::
make_pair
(
model_name
,
std
::
move
(
added_node
)));
}
auto
&
affected_ensembles
=
affected_nodes
;
for
(
auto
&
updated_node
:
updated_nodes
)
{
bool
is_ensemble
=
ConnectDependencyGraph
(
updated_node
);
if
(
is_ensemble
)
{
affected_ensembles
.
emplace
(
updated_node
);
}
}
#ifdef TRITON_ENABLE_ENSEMBLE
// After the dependency graph is updated, check ensemble dependencies
for
(
auto
&
ensemble
:
affected_ensembles
)
{
if
(
ensemble
->
status_
.
IsOk
())
{
if
(
!
ensemble
->
missing_upstreams_
.
empty
())
{
std
::
string
name_list
;
for
(
auto
it
=
ensemble
->
missing_upstreams_
.
begin
();
it
!=
ensemble
->
missing_upstreams_
.
end
();
it
++
)
{
if
(
it
!=
ensemble
->
missing_upstreams_
.
begin
())
{
name_list
+=
", "
;
}
name_list
+=
(
*
it
)
->
model_name_
;
}
ensemble
->
status_
=
Status
(
Status
::
Code
::
INVALID_ARG
,
"ensemble "
+
ensemble
->
model_name_
+
" contains models that are not available: "
+
name_list
);
}
else
{
ensemble
->
status_
=
CircularcyCheck
(
ensemble
,
ensemble
);
}
}
}
#endif // TRITON_ENABLE_ENSEMBLE
return
Status
::
Success
;
}
Status
ModelRepositoryManager
::
RegisterModelRepository
(
const
std
::
string
&
repository
,
const
std
::
unordered_map
<
std
::
string
,
std
::
string
>&
model_mapping
)
{
if
(
!
model_control_enabled_
)
{
return
Status
(
Status
::
Code
::
UNSUPPORTED
,
"repository registration is not allowed if model control mode is not "
"EXPLICIT"
);
}
bool
is_directory
=
false
;
auto
status
=
IsDirectory
(
repository
,
&
is_directory
);
if
(
!
status
.
IsOk
()
||
!
is_directory
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
(
std
::
string
(
"failed to register '"
)
+
repository
+
"', repository not found"
)
.
c_str
());
}
{
// Serialize all operations that change model state
std
::
lock_guard
<
std
::
mutex
>
lock
(
poll_mu_
);
// Check repository and mapped models do not yet exist.
if
(
repository_paths_
.
find
(
repository
)
!=
repository_paths_
.
end
())
{
return
Status
(
Status
::
Code
::
ALREADY_EXISTS
,
"model repository '"
+
repository
+
"' has already been registered"
);
}
for
(
const
auto
&
mapping
:
model_mapping
)
{
if
(
model_mappings_
.
find
(
mapping
.
first
)
!=
model_mappings_
.
end
())
{
return
Status
(
Status
::
Code
::
ALREADY_EXISTS
,
(
std
::
string
(
"failed to register '"
)
+
mapping
.
first
+
"', there is a conflicting mapping for '"
+
std
::
string
(
mapping
.
first
)
+
"'"
)
.
c_str
());
}
}
repository_paths_
.
emplace
(
repository
);
for
(
const
auto
&
mapping
:
model_mapping
)
{
model_mappings_
.
emplace
(
mapping
.
first
,
std
::
make_pair
(
repository
,
JoinPath
({
repository
,
mapping
.
second
})));
}
}
LOG_INFO
<<
"Model repository registered: "
<<
repository
;
return
Status
::
Success
;
}
Status
ModelRepositoryManager
::
UnregisterModelRepository
(
const
std
::
string
&
repository
)
{
if
(
!
model_control_enabled_
)
{
return
Status
(
Status
::
Code
::
UNSUPPORTED
,
"repository unregistration is not allowed if model control mode is not "
"EXPLICIT"
);
}
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
poll_mu_
);
if
(
repository_paths_
.
erase
(
repository
)
!=
1
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"failed to unregister '"
+
repository
+
"', repository not found"
);
}
std
::
set
<
std
::
string
>
models_to_delete
;
for
(
auto
const
&
mapping
:
model_mappings_
)
{
if
(
mapping
.
second
.
first
==
repository
)
{
models_to_delete
.
insert
(
mapping
.
first
);
}
}
for
(
auto
const
&
model
:
models_to_delete
)
{
model_mappings_
.
erase
(
model
);
}
}
LOG_INFO
<<
"Model repository unregistered: "
<<
repository
;
return
Status
::
Success
;
}
Status
ModelRepositoryManager
::
CircularcyCheck
(
DependencyNode
*
current_node
,
const
DependencyNode
*
start_node
)
{
for
(
auto
&
downstream
:
current_node
->
downstreams_
)
{
if
(
downstream
->
model_name_
==
start_node
->
model_name_
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
"circular dependency between ensembles: "
+
start_node
->
model_name_
+
" -> ... -> "
+
current_node
->
model_name_
+
" -> "
+
start_node
->
model_name_
);
}
else
{
const
auto
status
=
CircularcyCheck
(
downstream
,
start_node
);
if
(
!
status
.
IsOk
()
&&
current_node
->
status_
.
IsOk
())
{
current_node
->
status_
=
status
;
return
status
;
}
}
}
return
Status
::
Success
;
}
void
ModelRepositoryManager
::
UncheckDownstream
(
NodeSet
*
downstreams
,
NodeSet
*
updated_nodes
)
{
// Mark downstream nodes as unchecked recursively
for
(
auto
&
node
:
*
downstreams
)
{
if
(
node
->
checked_
)
{
node
->
checked_
=
false
;
node
->
status_
=
Status
::
Success
;
UncheckDownstream
(
&
node
->
downstreams_
,
updated_nodes
);
updated_nodes
->
emplace
(
node
);
}
}
}
bool
ModelRepositoryManager
::
ConnectDependencyGraph
(
DependencyNode
*
updated_node
)
{
// Check the node's model config to determine if it depends on other models
// and if those models are present
updated_node
->
upstreams_
.
clear
();
updated_node
->
missing_upstreams_
.
clear
();
if
(
updated_node
->
model_config_
.
has_ensemble_scheduling
())
{
for
(
const
auto
&
step
:
updated_node
->
model_config_
.
ensemble_scheduling
().
step
())
{
DependencyNode
*
upstream_node
=
nullptr
;
const
auto
&
model_name
=
step
.
model_name
();
auto
dit
=
dependency_graph_
.
find
(
model_name
);
if
(
dit
==
dependency_graph_
.
end
())
{
auto
mit
=
missing_nodes_
.
find
(
model_name
);
if
(
mit
==
missing_nodes_
.
end
())
{
std
::
unique_ptr
<
DependencyNode
>
node
(
new
DependencyNode
(
model_name
));
updated_node
->
missing_upstreams_
.
emplace
(
node
.
get
());
mit
=
missing_nodes_
.
emplace
(
model_name
,
std
::
move
(
node
)).
first
;
}
// Add the node to missing node's downstream so that when the missing
// node is added, the downstreams can be found easily.
mit
->
second
->
downstreams_
.
emplace
(
updated_node
);
upstream_node
=
mit
->
second
.
get
();
}
else
{
dit
->
second
->
downstreams_
.
emplace
(
updated_node
);
upstream_node
=
dit
->
second
.
get
();
}
auto
res
=
updated_node
->
upstreams_
.
emplace
(
upstream_node
,
std
::
set
<
int64_t
>
({
step
.
model_version
()}));
// If map insertion doesn't happen, the same model is required in
// different step, insert the version to existing required version set.
if
(
!
res
.
second
)
{
res
.
first
->
second
.
insert
(
step
.
model_version
());
}
}
return
true
;
}
return
false
;
}
Status
ModelRepositoryManager
::
GetModelInfo
(
const
std
::
string
&
name
,
ModelInfo
**
model_info
)
{
const
auto
itr
=
infos_
.
find
(
name
);
if
(
itr
==
infos_
.
end
())
{
return
Status
(
Status
::
Code
::
NOT_FOUND
,
"no configuration for model '"
+
name
+
"'"
);
}
*
model_info
=
itr
->
second
.
get
();
return
Status
::
Success
;
}
std
::
pair
<
ModelRepositoryManager
::
NodeSet
,
ModelRepositoryManager
::
NodeSet
>
ModelRepositoryManager
::
ModelsToLoadUnload
(
const
NodeSet
&
loaded_models
)
{
// <valid model set, invalid model set>
std
::
pair
<
NodeSet
,
NodeSet
>
res
;
// first call to this function
if
(
loaded_models
.
empty
())
{
for
(
auto
&
pair
:
dependency_graph_
)
{
auto
node
=
pair
.
second
.
get
();
// only care about nodes that are affected by the update
if
(
!
node
->
checked_
)
{
if
(
CheckNode
(
node
))
{
if
(
node
->
status_
.
IsOk
())
{
res
.
first
.
emplace
(
node
);
}
else
{
res
.
second
.
emplace
(
node
);
}
}
}
}
}
else
{
for
(
const
auto
&
model
:
loaded_models
)
{
for
(
auto
node
:
model
->
downstreams_
)
{
// only care about nodes that are affected by the update
if
(
!
node
->
checked_
)
{
if
(
CheckNode
(
node
))
{
if
(
node
->
status_
.
IsOk
())
{
res
.
first
.
emplace
(
node
);
}
else
{
res
.
second
.
emplace
(
node
);
}
}
}
}
}
}
for
(
auto
&
node
:
res
.
first
)
{
node
->
checked_
=
true
;
}
for
(
auto
&
node
:
res
.
second
)
{
node
->
checked_
=
true
;
}
return
res
;
}
bool
ModelRepositoryManager
::
CheckNode
(
DependencyNode
*
node
)
{
bool
node_ready
=
true
;
// if the node is in invalid status, mark as ready as we know
// it should not be loaded
if
(
node
->
status_
.
IsOk
())
{
for
(
auto
&
upstream
:
node
->
upstreams_
)
{
if
(
!
upstream
.
first
->
checked_
)
{
node_ready
=
false
;
break
;
}
if
(
!
upstream
.
first
->
status_
.
IsOk
())
{
node
->
status_
=
Status
(
Status
::
Code
::
INVALID_ARG
,
"ensemble '"
+
node
->
model_name_
+
"' depends on '"
+
upstream
.
first
->
model_name_
+
"' which is not valid"
);
}
else
if
(
upstream
.
first
->
loaded_versions_
.
empty
())
{
node
->
status_
=
Status
(
Status
::
Code
::
INVALID_ARG
,
"ensemble '"
+
node
->
model_name_
+
"' depends on '"
+
upstream
.
first
->
model_name_
+
"' which has no loaded version"
);
}
else
{
for
(
const
auto
&
required_version
:
upstream
.
second
)
{
if
(
required_version
==
-
1
)
{
continue
;
}
auto
it
=
upstream
.
first
->
loaded_versions_
.
find
(
required_version
);
if
(
it
==
upstream
.
first
->
loaded_versions_
.
end
())
{
node
->
status_
=
Status
(
Status
::
Code
::
INVALID_ARG
,
"ensemble '"
+
node
->
model_name_
+
"' depends on '"
+
upstream
.
first
->
model_name_
+
"' whose required version "
+
std
::
to_string
(
required_version
)
+
" is not loaded"
);
}
}
}
if
(
!
node
->
status_
.
IsOk
())
{
break
;
}
}
#ifdef TRITON_ENABLE_ENSEMBLE
// Validate ensemble config if the node is ready. By this point, the
// depending models are loaded and their configs are completed
if
(
node_ready
&&
node
->
status_
.
IsOk
())
{
node
->
status_
=
ValidateEnsembleConfig
(
this
,
node
);
}
#endif // TRITON_ENABLE_ENSEMBLE
}
return
node_ready
;
}
}}
// namespace triton::core
3rdparty/core-r22.12/src/model_repository_manager.h
deleted
100644 → 0
View file @
d592fbea
// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
#pragma once
#include <functional>
#include <map>
#include <mutex>
#include <set>
#include "infer_parameter.h"
#include "model_config.pb.h"
#include "model_lifecycle.h"
#include "status.h"
#include "triton/common/model_config.h"
namespace
triton
{
namespace
core
{
class
InferenceServer
;
class
Model
;
// [FIXME] should have separated load / unload functions for clarity
enum
ActionType
{
NO_ACTION
,
LOAD
,
UNLOAD
};
/// Predefined reason strings
#define MODEL_READY_REASON_DUPLICATE "model appears in two or more repositories"
/// An object to manage the model repository active in the server.
class
ModelRepositoryManager
{
public:
// Index information for a model.
struct
ModelIndex
{
ModelIndex
(
const
std
::
string
&
n
)
:
name_only_
(
true
),
name_
(
n
),
version_
(
-
1
),
state_
(
ModelReadyState
::
UNKNOWN
)
{
}
ModelIndex
(
const
std
::
string
&
n
,
const
int64_t
v
,
const
ModelReadyState
s
,
const
std
::
string
&
r
)
:
name_only_
(
false
),
name_
(
n
),
version_
(
v
),
state_
(
s
),
reason_
(
r
)
{
}
const
bool
name_only_
;
const
std
::
string
name_
;
const
int64_t
version_
;
const
ModelReadyState
state_
;
const
std
::
string
reason_
;
};
/// A basic unit in dependency graph that records the models seen by the model
/// repository manager.
struct
DependencyNode
{
DependencyNode
(
const
std
::
string
&
model_name
)
:
model_name_
(
model_name
),
status_
(
Status
::
Success
),
checked_
(
false
)
{
}
std
::
string
model_name_
;
Status
status_
;
bool
checked_
;
bool
explicitly_load_
;
inference
::
ModelConfig
model_config_
;
std
::
set
<
int64_t
>
loaded_versions_
;
std
::
set
<
DependencyNode
*>
missing_upstreams_
;
std
::
unordered_map
<
DependencyNode
*
,
std
::
set
<
int64_t
>>
upstreams_
;
std
::
set
<
DependencyNode
*>
downstreams_
;
};
~
ModelRepositoryManager
();
/// Create a manager for a repository.
/// \param server The pointer to the inference server.
/// \param server_version The version of the inference server.
/// \param repository_paths A set of file-system paths of the repositories.
/// \param startup_models A set of models to be loaded at startup
/// if model control is enabled.
/// \param strict_model_config If false attempt to autofill missing required
/// information in each model configuration.
/// \param polling_enabled If true, then PollAndUpdate() is allowed.
/// Otherwise, it is not allowed.
/// \param model_control_enabled If true, then LoadUnloadModel() is allowed
/// and the models in the model repository will not be loaded at startup.
/// Otherwise, LoadUnloadModel() is not allowed and the models will be loaded.
/// Cannot be set to true if polling_enabled is true.
/// \param life_cycle_options The options to configure ModelLifeCycle.
/// \param model_repository_manager Return the model repository manager.
/// \return The error status.
static
Status
Create
(
InferenceServer
*
server
,
const
std
::
string
&
server_version
,
const
std
::
set
<
std
::
string
>&
repository_paths
,
const
std
::
set
<
std
::
string
>&
startup_models
,
const
bool
strict_model_config
,
const
bool
polling_enabled
,
const
bool
model_control_enabled
,
const
ModelLifeCycleOptions
&
life_cycle_options
,
std
::
unique_ptr
<
ModelRepositoryManager
>*
model_repository_manager
);
/// Poll the model repository to determine the new set of models and
/// compare with the current set. And serve the new set of models based
/// on their version policy.
Status
PollAndUpdate
();
/// Load or unload a specified model.
/// \param models The models and the parameters to be loaded or unloaded
/// \param type The type action to be performed. If the action is LOAD and
/// the model has been loaded, the model will be re-loaded.
/// \return error status. Return "NOT_FOUND" if it tries to load
/// a non-existing model or if it tries to unload a model that hasn't been
/// loaded.
Status
LoadUnloadModel
(
const
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
const
InferenceParameter
*>>&
models
,
const
ActionType
type
,
const
bool
unload_dependents
);
/// Unload all models. This function should be called before shutting down
/// the model repository manager.
/// \return error status.
Status
UnloadAllModels
();
/// Instruct all models to stop accepting new inference requests. However,
/// the models are still capable of processing inference requests
/// if the model considers them as part of the in-flight inference.
/// \return error status.
Status
StopAllModels
();
/// \return the number of in-flight inferences for the all versions of all
/// models. The set element will be a tuple of <model_name, model_version,
/// in-flight inference count>. Note that a model version will not be included
/// if it doesn't have in-flight inferences.
const
std
::
set
<
std
::
tuple
<
std
::
string
,
int64_t
,
size_t
>>
InflightStatus
();
/// \param strict_readiness If true, only models that have at least one
/// ready version will be considered as live. Otherwise, the models that
/// have loading / unloading versions will also be live.
/// \return the state of all versions of all live models.
const
ModelStateMap
LiveModelStates
(
bool
strict_readiness
=
false
);
/// \return the state of all versions of all models that have every
/// been (attempted) loaded over the lifetime of the server.
const
ModelStateMap
ModelStates
();
/// \return the states of all versions of a specific model.
const
VersionStateMap
VersionStates
(
const
std
::
string
&
model_name
);
/// \return the ready-state of a specific model version.
Status
ModelState
(
const
std
::
string
&
model_name
,
const
int64_t
model_version
,
ModelReadyState
*
state
);
/// Get the index of all models in all repositories.
/// \param ready_only If true return only index of models that are ready.
/// \param index Returns the index.
/// \return error status.
Status
RepositoryIndex
(
const
bool
ready_only
,
std
::
vector
<
ModelIndex
>*
index
);
/// Obtain the specified model.
/// \param model_name The name of the model.
/// \param model_version The version of the model.
/// \param model Return the model object.
/// \return error status.
Status
GetModel
(
const
std
::
string
&
model_name
,
const
int64_t
model_version
,
std
::
shared_ptr
<
Model
>*
model
);
// Register model repository path.
/// \param repository Path to model repository.
/// \param model_mapping Mapping with (overridden) model name as key, subdir
/// name as value.
/// \return error status
Status
RegisterModelRepository
(
const
std
::
string
&
repository
,
const
std
::
unordered_map
<
std
::
string
,
std
::
string
>&
model_mapping
);
// Unregister model repository path.
/// \param repository Path to model repository.
/// \return error status
Status
UnregisterModelRepository
(
const
std
::
string
&
repository
);
private:
struct
ModelInfo
;
// Map from model name to information about the model.
using
ModelInfoMap
=
std
::
unordered_map
<
std
::
string
,
std
::
unique_ptr
<
ModelInfo
>>
;
// Set of DependencyNode
using
NodeSet
=
std
::
set
<
DependencyNode
*>
;
ModelRepositoryManager
(
const
std
::
set
<
std
::
string
>&
repository_paths
,
const
bool
autofill
,
const
bool
polling_enabled
,
const
bool
model_control_enabled
,
const
double
min_compute_capability
,
std
::
unique_ptr
<
ModelLifeCycle
>
life_cycle
);
/// The internal function that are called in Create() and PollAndUpdate().
Status
PollAndUpdateInternal
(
bool
*
all_models_polled
);
/// The internal function that load or unload a set of models.
Status
LoadUnloadModels
(
const
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
const
InferenceParameter
*>>&
models
,
const
ActionType
type
,
const
bool
unload_dependents
,
bool
*
all_models_polled
);
/// Poll the requested models in the model repository and
/// compare with the current set. Return the additions, deletions,
/// and modifications that have occurred. This function will not updated
/// the current model info, it is caller's responsibility to do so.
/// \param models The map from models to be polled to their associated
/// parameters.
/// \param added The names of the models added to the repository.
/// \param deleted The names of the models removed from the repository.
/// \param modified The names of the models remaining in the
/// repository that have been changed.
/// \param unmodified The names of the models remaining in the
/// repository that have not changed.
/// \param updated_infos The model infos retrieved from the poll.
/// \param all_models_polled Return true if all models are polled and
/// their model configuration are validated successfully. Instead of aborting
/// the polling, the models that fail will be ignored and their model infos
/// will stay in the previous state.
/// \return The error status.
Status
Poll
(
const
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
const
InferenceParameter
*>>&
models
,
std
::
set
<
std
::
string
>*
added
,
std
::
set
<
std
::
string
>*
deleted
,
std
::
set
<
std
::
string
>*
modified
,
std
::
set
<
std
::
string
>*
unmodified
,
ModelInfoMap
*
updated_infos
,
bool
*
all_models_polled
);
/// Helper function for Poll() to initialize ModelInfo for the model.
/// \param name The name of the model.
/// \param path The model path. Empty path means the model is provided via
/// 'params'
/// \param params The model parameters provided for polling model.
/// \param info Return the updated ModelInfo. 'nullptr' will be returned if
/// existing ModelInfo for the model should be reused.
/// \return The error status.
Status
InitializeModelInfo
(
const
std
::
string
&
name
,
const
std
::
string
&
path
,
const
std
::
vector
<
const
InferenceParameter
*>&
params
,
std
::
unique_ptr
<
ModelInfo
>*
info
);
/// Load models based on the dependency graph. The function will iteratively
/// load models that all the models they depend on has been loaded, and unload
/// models if their dependencies are no longer satisfied.
/// \return The status of the model loads.
std
::
map
<
std
::
string
,
Status
>
LoadModelByDependency
();
/// Helper function to update the dependency graph based on the poll result
/// \param added The names of the models added to the repository.
/// \param deleted The names of the models removed from the repository.
/// \param modified The names of the models remaining in the
/// repository that have been changed.
/// \param deleted_dependents The names of dependent models to be removed
/// from the repository.
/// \return The error status.
Status
UpdateDependencyGraph
(
const
std
::
set
<
std
::
string
>&
added
,
const
std
::
set
<
std
::
string
>&
deleted
,
const
std
::
set
<
std
::
string
>&
modified
,
std
::
set
<
std
::
string
>*
deleted_dependents
=
nullptr
);
/// Helper function to uncheck the nodes because the model that they depends
/// on has changed. The unchecked nodes will be validated again.
/// The function will be call recursively to uncheck all downstreams.
/// \param downstreams The nodes to be unchecked.
/// \param updated_nodes Return the nodes that have been unchecked
void
UncheckDownstream
(
NodeSet
*
downstreams
,
NodeSet
*
updated_nodes
);
/// Helper function to construct the edges between nodes in dependency graph.
/// \param updated_node The node that is newly added or modified.
/// \return True if the node represents an ensemble model. False otherwise.
bool
ConnectDependencyGraph
(
DependencyNode
*
updated_node
);
/// Get the model info for a named model.
/// \param name The model name.
/// \param model_info Returns the model information.
/// \return OK if found, NOT_FOUND otherwise.
Status
GetModelInfo
(
const
std
::
string
&
name
,
ModelInfo
**
model_info
);
/// Get the models to be loaded / unloaded based on the model loaded in
/// previous iteration.
/// \param loaded_models The models loaded / unloaded in previous iteration.
/// Unloaded models will be represented as models with no loaded versions.
/// \return A pair of node set containing models to be loaded and models to be
/// unloaded for the next iteration.
std
::
pair
<
NodeSet
,
NodeSet
>
ModelsToLoadUnload
(
const
NodeSet
&
loaded_models
);
/// Check if the node is ready for the next iteration. A node is ready if the
/// node is invalid (containing invalid model config or its depdencies failed
/// to load) or all of its dependencies are satisfied.
/// \param node The node to be checked.
/// \return True if the node is ready. False otherwise.
bool
CheckNode
(
DependencyNode
*
node
);
Status
CircularcyCheck
(
DependencyNode
*
current_node
,
const
DependencyNode
*
start_node
);
bool
ModelDirectoryOverride
(
const
std
::
vector
<
const
InferenceParameter
*>&
model_params
);
std
::
set
<
std
::
string
>
repository_paths_
;
const
bool
autofill_
;
const
bool
polling_enabled_
;
const
bool
model_control_enabled_
;
const
double
min_compute_capability_
;
std
::
mutex
poll_mu_
;
ModelInfoMap
infos_
;
std
::
unordered_map
<
std
::
string
,
std
::
unique_ptr
<
DependencyNode
>>
dependency_graph_
;
std
::
unordered_map
<
std
::
string
,
std
::
unique_ptr
<
DependencyNode
>>
missing_nodes_
;
// Mappings from (overridden) model names to a pair of their repository and
// absolute path
std
::
unordered_map
<
std
::
string
,
std
::
pair
<
std
::
string
,
std
::
string
>>
model_mappings_
;
std
::
unique_ptr
<
ModelLifeCycle
>
model_life_cycle_
;
};
}}
// namespace triton::core
3rdparty/core-r22.12/src/numa_utils.cc
deleted
100644 → 0
View file @
d592fbea
// Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "numa_utils.h"
#ifndef _WIN32
#include <numa.h>
#include <numaif.h>
#endif
#include "triton/common/logging.h"
namespace
triton
{
namespace
core
{
namespace
{
std
::
string
VectorToString
(
const
std
::
vector
<
int
>&
vec
)
{
std
::
string
str
(
"["
);
for
(
const
auto
&
element
:
vec
)
{
str
+=
std
::
to_string
(
element
);
str
+=
","
;
}
str
+=
"]"
;
return
str
;
}
Status
ParseIntOption
(
const
std
::
string
&
msg
,
const
std
::
string
&
arg
,
int
*
value
)
{
try
{
*
value
=
std
::
stoi
(
arg
);
}
catch
(
const
std
::
invalid_argument
&
ia
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
msg
+
": Can't parse '"
+
arg
+
"' to integer"
);
}
return
Status
::
Success
;
}
}
// namespace
// NUMA setting will be ignored on Windows platform
#ifdef _WIN32
Status
SetNumaConfigOnThread
(
const
triton
::
common
::
HostPolicyCmdlineConfig
&
host_policy
)
{
return
Status
::
Success
;
}
Status
SetNumaMemoryPolicy
(
const
triton
::
common
::
HostPolicyCmdlineConfig
&
host_policy
)
{
return
Status
::
Success
;
}
Status
GetNumaMemoryPolicyNodeMask
(
unsigned
long
*
node_mask
)
{
*
node_mask
=
0
;
return
Status
::
Success
;
}
Status
ResetNumaMemoryPolicy
()
{
return
Status
::
Success
;
}
Status
SetNumaThreadAffinity
(
std
::
thread
::
native_handle_type
thread
,
const
triton
::
common
::
HostPolicyCmdlineConfig
&
host_policy
)
{
return
Status
::
Success
;
}
#else
// Use variable to make sure no NUMA related function is actually called
// if Triton is not running with NUMA awareness. i.e. Extra docker permission
// is needed to call the NUMA functions and this ensures backward compatibility.
thread_local
bool
numa_set
=
false
;
Status
SetNumaConfigOnThread
(
const
triton
::
common
::
HostPolicyCmdlineConfig
&
host_policy
)
{
// Set thread affinity
RETURN_IF_ERROR
(
SetNumaThreadAffinity
(
pthread_self
(),
host_policy
));
// Set memory policy
RETURN_IF_ERROR
(
SetNumaMemoryPolicy
(
host_policy
));
return
Status
::
Success
;
}
Status
SetNumaMemoryPolicy
(
const
triton
::
common
::
HostPolicyCmdlineConfig
&
host_policy
)
{
const
auto
it
=
host_policy
.
find
(
"numa-node"
);
if
(
it
!=
host_policy
.
end
())
{
int
node_id
;
RETURN_IF_ERROR
(
ParseIntOption
(
"Parsing 'numa-node' value"
,
it
->
second
,
&
node_id
));
LOG_VERBOSE
(
1
)
<<
"Thread is binding to NUMA node "
<<
it
->
second
<<
". Max NUMA node count: "
<<
(
numa_max_node
()
+
1
);
numa_set
=
true
;
unsigned
long
node_mask
=
1UL
<<
node_id
;
if
(
set_mempolicy
(
MPOL_BIND
,
&
node_mask
,
(
numa_max_node
()
+
1
)
+
1
)
!=
0
)
{
return
Status
(
Status
::
Code
::
INTERNAL
,
std
::
string
(
"Unable to set NUMA memory policy: "
)
+
strerror
(
errno
));
}
}
return
Status
::
Success
;
}
Status
GetNumaMemoryPolicyNodeMask
(
unsigned
long
*
node_mask
)
{
*
node_mask
=
0
;
int
mode
;
if
(
numa_set
&&
get_mempolicy
(
&
mode
,
node_mask
,
numa_max_node
()
+
1
,
NULL
,
0
)
!=
0
)
{
return
Status
(
Status
::
Code
::
INTERNAL
,
std
::
string
(
"Unable to get NUMA node for current thread: "
)
+
strerror
(
errno
));
}
return
Status
::
Success
;
}
Status
ResetNumaMemoryPolicy
()
{
if
(
numa_set
&&
(
set_mempolicy
(
MPOL_DEFAULT
,
nullptr
,
0
)
!=
0
))
{
return
Status
(
Status
::
Code
::
INTERNAL
,
std
::
string
(
"Unable to reset NUMA memory policy: "
)
+
strerror
(
errno
));
}
numa_set
=
false
;
return
Status
::
Success
;
}
Status
SetNumaThreadAffinity
(
std
::
thread
::
native_handle_type
thread
,
const
triton
::
common
::
HostPolicyCmdlineConfig
&
host_policy
)
{
const
auto
it
=
host_policy
.
find
(
"cpu-cores"
);
if
(
it
!=
host_policy
.
end
())
{
// Parse CPUs
std
::
vector
<
int
>
cpus
;
{
const
auto
&
cpu_str
=
it
->
second
;
auto
delim_cpus
=
cpu_str
.
find
(
","
);
int
current_pos
=
0
;
while
(
true
)
{
auto
delim_range
=
cpu_str
.
find
(
"-"
,
current_pos
);
if
(
delim_range
==
std
::
string
::
npos
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
std
::
string
(
"host policy setting 'cpu-cores' format is "
"'<lower_cpu_core_id>-<upper_cpu_core_id>'. Got "
)
+
cpu_str
.
substr
(
current_pos
,
((
delim_cpus
==
std
::
string
::
npos
)
?
(
cpu_str
.
length
()
+
1
)
:
delim_cpus
)
-
current_pos
));
}
int
lower
,
upper
;
RETURN_IF_ERROR
(
ParseIntOption
(
"Parsing 'cpu-cores' value"
,
cpu_str
.
substr
(
current_pos
,
delim_range
-
current_pos
),
&
lower
));
RETURN_IF_ERROR
(
ParseIntOption
(
"Parsing 'cpu-cores' value"
,
(
delim_cpus
==
std
::
string
::
npos
)
?
cpu_str
.
substr
(
delim_range
+
1
)
:
cpu_str
.
substr
(
delim_range
+
1
,
delim_cpus
-
(
delim_range
+
1
)),
&
upper
));
for
(;
lower
<=
upper
;
++
lower
)
{
cpus
.
push_back
(
lower
);
}
// break if the processed range is the last specified range
if
(
delim_cpus
!=
std
::
string
::
npos
)
{
current_pos
=
delim_cpus
+
1
;
delim_cpus
=
cpu_str
.
find
(
","
,
current_pos
);
}
else
{
break
;
}
}
}
LOG_VERBOSE
(
1
)
<<
"Thread is binding to one of the CPUs: "
<<
VectorToString
(
cpus
);
numa_set
=
true
;
cpu_set_t
cpuset
;
CPU_ZERO
(
&
cpuset
);
for
(
int
cpu
:
cpus
)
{
CPU_SET
(
cpu
,
&
cpuset
);
}
if
(
pthread_setaffinity_np
(
thread
,
sizeof
(
cpu_set_t
),
&
cpuset
)
!=
0
)
{
return
Status
(
Status
::
Code
::
INTERNAL
,
std
::
string
(
"Unable to set NUMA thread affinity: "
)
+
strerror
(
errno
));
}
}
return
Status
::
Success
;
}
#endif
}}
// namespace triton::core
3rdparty/core-r22.12/src/numa_utils.h
deleted
100644 → 0
View file @
d592fbea
// Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <map>
#include <thread>
#include <vector>
#include "status.h"
#include "triton/common/model_config.h"
#include "tritonserver_apis.h"
namespace
triton
{
namespace
core
{
// Helper function to set memory policy and thread affinity on current thread
Status
SetNumaConfigOnThread
(
const
triton
::
common
::
HostPolicyCmdlineConfig
&
host_policy
);
// Restrict the memory allocation to specific NUMA node.
Status
SetNumaMemoryPolicy
(
const
triton
::
common
::
HostPolicyCmdlineConfig
&
host_policy
);
// Retrieve the node mask used to set memory policy for the current thread
Status
GetNumaMemoryPolicyNodeMask
(
unsigned
long
*
node_mask
);
// Reset the memory allocation setting.
Status
ResetNumaMemoryPolicy
();
// Set a thread affinity to be on specific cpus.
Status
SetNumaThreadAffinity
(
std
::
thread
::
native_handle_type
thread
,
const
triton
::
common
::
HostPolicyCmdlineConfig
&
host_policy
);
}}
// namespace triton::core
3rdparty/core-r22.12/src/payload.cc
deleted
100644 → 0
View file @
d592fbea
// Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "payload.h"
namespace
triton
{
namespace
core
{
Payload
::
Payload
()
:
op_type_
(
Operation
::
INFER_RUN
),
requests_
(
std
::
vector
<
std
::
unique_ptr
<
InferenceRequest
>>
()),
OnCallback_
([]()
{}),
instance_
(
nullptr
),
state_
(
State
::
UNINITIALIZED
),
batcher_start_ns_
(
0
),
saturated_
(
false
)
{
exec_mu_
.
reset
(
new
std
::
mutex
());
}
const
Status
&
Payload
::
MergePayload
(
std
::
shared_ptr
<
Payload
>&
payload
)
{
if
((
payload
->
GetOpType
()
!=
Operation
::
INFER_RUN
)
||
(
op_type_
!=
Operation
::
INFER_RUN
))
{
static
Status
op_type_error
(
Status
::
Code
::
INTERNAL
,
"Attempted to merge payloads of type that are not INFER_RUN"
);
return
op_type_error
;
}
if
(
payload
->
GetInstance
()
!=
instance_
)
{
static
Status
instance_error
(
Status
::
Code
::
INTERNAL
,
"Attempted to merge payloads of mismatching instance"
);
return
instance_error
;
}
if
((
payload
->
GetState
()
!=
State
::
EXECUTING
)
||
(
state_
!=
State
::
EXECUTING
))
{
static
Status
state_error
(
Status
::
Code
::
INTERNAL
,
"Attempted to merge payloads that are not in executing state"
);
return
state_error
;
}
// Skip comparison if not initialized (required), here assume either all
// payloads are initialized or otherwise.
if
(
required_equal_inputs_
.
Initialized
()
&&
!
required_equal_inputs_
.
HasEqualInputs
(
*
payload
->
Requests
().
begin
()))
{
static
Status
shape_error
(
Status
::
Code
::
INVALID_ARG
,
"Attempted to merge payloads that has non-equal inputs"
);
return
shape_error
;
}
requests_
.
insert
(
requests_
.
end
(),
std
::
make_move_iterator
(
payload
->
Requests
().
begin
()),
std
::
make_move_iterator
(
payload
->
Requests
().
end
()));
payload
->
Callback
();
return
Status
::
Success
;
}
void
Payload
::
Reset
(
const
Operation
op_type
,
TritonModelInstance
*
instance
)
{
op_type_
=
op_type
;
requests_
.
clear
();
OnCallback_
=
[]()
{};
release_callbacks_
.
clear
();
instance_
=
instance
;
state_
=
State
::
UNINITIALIZED
;
status_
.
reset
(
new
std
::
promise
<
Status
>
());
required_equal_inputs_
=
RequiredEqualInputs
();
batcher_start_ns_
=
0
;
saturated_
=
false
;
}
void
Payload
::
Release
()
{
op_type_
=
Operation
::
INFER_RUN
;
requests_
.
clear
();
OnCallback_
=
[]()
{};
release_callbacks_
.
clear
();
instance_
=
nullptr
;
state_
=
State
::
RELEASED
;
required_equal_inputs_
=
RequiredEqualInputs
();
batcher_start_ns_
=
0
;
saturated_
=
false
;
}
size_t
Payload
::
BatchSize
()
{
size_t
batch_size
=
0
;
for
(
const
auto
&
request
:
requests_
)
{
batch_size
+=
std
::
max
(
1U
,
request
->
BatchSize
());
}
return
batch_size
;
}
void
Payload
::
ReserveRequests
(
size_t
size
)
{
requests_
.
reserve
(
size
);
}
void
Payload
::
AddRequest
(
std
::
unique_ptr
<
InferenceRequest
>
request
)
{
if
((
batcher_start_ns_
==
0
)
||
(
batcher_start_ns_
>
request
->
BatcherStartNs
()))
{
batcher_start_ns_
=
request
->
BatcherStartNs
();
}
requests_
.
push_back
(
std
::
move
(
request
));
}
void
Payload
::
SetCallback
(
std
::
function
<
void
()
>
OnCallback
)
{
OnCallback_
=
OnCallback
;
}
void
Payload
::
SetInstance
(
TritonModelInstance
*
model_instance
)
{
instance_
=
model_instance
;
}
void
Payload
::
AddInternalReleaseCallback
(
std
::
function
<
void
()
>&&
callback
)
{
release_callbacks_
.
emplace_back
(
std
::
move
(
callback
));
}
void
Payload
::
MarkSaturated
()
{
saturated_
=
true
;
}
void
Payload
::
SetState
(
Payload
::
State
state
)
{
state_
=
state
;
}
Status
Payload
::
Wait
()
{
return
status_
->
get_future
().
get
();
}
void
Payload
::
Callback
()
{
OnCallback_
();
}
void
Payload
::
OnRelease
()
{
// Invoke the release callbacks added internally before releasing the
// request to user provided callback.
for
(
auto
it
=
release_callbacks_
.
rbegin
();
it
!=
release_callbacks_
.
rend
();
it
++
)
{
(
*
it
)();
}
release_callbacks_
.
clear
();
}
void
Payload
::
Execute
(
bool
*
should_exit
)
{
*
should_exit
=
false
;
Status
status
;
switch
(
op_type_
)
{
case
Operation
::
INFER_RUN
:
instance_
->
Schedule
(
std
::
move
(
requests_
),
OnCallback_
);
break
;
case
Operation
::
INIT
:
status
=
instance_
->
Initialize
();
break
;
case
Operation
::
WARM_UP
:
status
=
instance_
->
WarmUp
();
break
;
case
Operation
::
EXIT
:
*
should_exit
=
true
;
}
status_
->
set_value
(
status
);
}
}}
// namespace triton::core
3rdparty/core-r22.12/src/payload.h
deleted
100644 → 0
View file @
d592fbea
// Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <functional>
#include <future>
#include <memory>
#include <mutex>
#include <queue>
#include <vector>
#include "backend_model_instance.h"
#include "infer_request.h"
#include "scheduler_utils.h"
#include "status.h"
namespace
triton
{
namespace
core
{
class
Payload
{
public:
enum
Operation
{
INFER_RUN
=
0
,
INIT
=
1
,
WARM_UP
=
2
,
EXIT
=
3
};
enum
State
{
UNINITIALIZED
=
0
,
READY
=
1
,
REQUESTED
=
2
,
SCHEDULED
=
3
,
EXECUTING
=
4
,
RELEASED
=
5
};
Payload
();
void
Reset
(
const
Operation
op_type
,
TritonModelInstance
*
instance
=
nullptr
);
const
Status
&
MergePayload
(
std
::
shared_ptr
<
Payload
>&
payload
);
Operation
GetOpType
()
{
return
op_type_
;
}
std
::
mutex
*
GetExecMutex
()
{
return
exec_mu_
.
get
();
}
size_t
RequestCount
()
{
return
requests_
.
size
();
}
size_t
BatchSize
();
void
ReserveRequests
(
size_t
size
);
void
AddRequest
(
std
::
unique_ptr
<
InferenceRequest
>
request
);
std
::
vector
<
std
::
unique_ptr
<
InferenceRequest
>>&
Requests
()
{
return
requests_
;
}
uint64_t
BatcherStartNs
()
{
return
batcher_start_ns_
;
}
void
SetCallback
(
std
::
function
<
void
()
>
OnCallback
);
void
Callback
();
void
AddInternalReleaseCallback
(
std
::
function
<
void
()
>&&
callback
);
void
OnRelease
();
void
SetInstance
(
TritonModelInstance
*
model_instance
);
TritonModelInstance
*
GetInstance
()
{
return
instance_
;
}
void
MarkSaturated
();
bool
IsSaturated
()
{
return
saturated_
;
}
RequiredEqualInputs
*
MutableRequiredEqualInputs
()
{
return
&
required_equal_inputs_
;
}
State
GetState
()
{
return
state_
;
}
void
SetState
(
State
state
);
void
Execute
(
bool
*
should_exit
);
Status
Wait
();
void
Release
();
private:
Operation
op_type_
;
std
::
vector
<
std
::
unique_ptr
<
InferenceRequest
>>
requests_
;
std
::
function
<
void
()
>
OnCallback_
;
std
::
vector
<
std
::
function
<
void
()
>>
release_callbacks_
;
TritonModelInstance
*
instance_
;
State
state_
;
std
::
unique_ptr
<
std
::
promise
<
Status
>>
status_
;
std
::
unique_ptr
<
std
::
mutex
>
exec_mu_
;
uint64_t
batcher_start_ns_
;
RequiredEqualInputs
required_equal_inputs_
;
bool
saturated_
;
};
}}
// namespace triton::core
3rdparty/core-r22.12/src/pinned_memory_manager.cc
deleted
100644 → 0
View file @
d592fbea
// Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
#include "pinned_memory_manager.h"
#include <sstream>
#include "numa_utils.h"
#include "triton/common/logging.h"
#ifdef TRITON_ENABLE_GPU
#include <cuda_runtime_api.h>
#endif // TRITON_ENABLE_GPU
namespace
triton
{
namespace
core
{
namespace
{
std
::
string
PointerToString
(
void
*
ptr
)
{
std
::
stringstream
ss
;
ss
<<
ptr
;
return
ss
.
str
();
}
Status
ParseIntOption
(
const
std
::
string
&
msg
,
const
std
::
string
&
arg
,
int
*
value
)
{
try
{
*
value
=
std
::
stoi
(
arg
);
}
catch
(
const
std
::
invalid_argument
&
ia
)
{
return
Status
(
Status
::
Code
::
INVALID_ARG
,
msg
+
": Can't parse '"
+
arg
+
"' to integer"
);
}
return
Status
::
Success
;
}
}
// namespace
std
::
unique_ptr
<
PinnedMemoryManager
>
PinnedMemoryManager
::
instance_
;
uint64_t
PinnedMemoryManager
::
pinned_memory_byte_size_
;
PinnedMemoryManager
::
PinnedMemory
::
PinnedMemory
(
void
*
pinned_memory_buffer
,
uint64_t
size
)
:
pinned_memory_buffer_
(
pinned_memory_buffer
)
{
if
(
pinned_memory_buffer_
!=
nullptr
)
{
managed_pinned_memory_
=
boost
::
interprocess
::
managed_external_buffer
(
boost
::
interprocess
::
create_only_t
{},
pinned_memory_buffer_
,
size
);
}
}
PinnedMemoryManager
::
PinnedMemory
::~
PinnedMemory
()
{
#ifdef TRITON_ENABLE_GPU
if
(
pinned_memory_buffer_
!=
nullptr
)
{
cudaFreeHost
(
pinned_memory_buffer_
);
}
#endif // TRITON_ENABLE_GPU
}
PinnedMemoryManager
::~
PinnedMemoryManager
()
{
// Clean up
for
(
const
auto
&
memory_info
:
memory_info_
)
{
const
auto
&
is_pinned
=
memory_info
.
second
.
first
;
if
(
!
is_pinned
)
{
free
(
memory_info
.
first
);
}
}
}
void
PinnedMemoryManager
::
AddPinnedMemoryBuffer
(
const
std
::
shared_ptr
<
PinnedMemory
>&
pinned_memory_buffer
,
unsigned
long
node_mask
)
{
pinned_memory_buffers_
[
node_mask
]
=
pinned_memory_buffer
;
}
Status
PinnedMemoryManager
::
AllocInternal
(
void
**
ptr
,
uint64_t
size
,
TRITONSERVER_MemoryType
*
allocated_type
,
bool
allow_nonpinned_fallback
,
PinnedMemory
*
pinned_memory_buffer
)
{
auto
status
=
Status
::
Success
;
if
(
pinned_memory_buffer
->
pinned_memory_buffer_
!=
nullptr
)
{
std
::
lock_guard
<
std
::
mutex
>
lk
(
pinned_memory_buffer
->
buffer_mtx_
);
*
ptr
=
pinned_memory_buffer
->
managed_pinned_memory_
.
allocate
(
size
,
std
::
nothrow_t
{});
*
allocated_type
=
TRITONSERVER_MEMORY_CPU_PINNED
;
if
(
*
ptr
==
nullptr
)
{
status
=
Status
(
Status
::
Code
::
INTERNAL
,
"failed to allocate pinned system memory"
);
}
}
else
{
status
=
Status
(
Status
::
Code
::
INTERNAL
,
"failed to allocate pinned system memory: no pinned memory pool"
);
}
bool
is_pinned
=
true
;
if
((
!
status
.
IsOk
())
&&
allow_nonpinned_fallback
)
{
static
bool
warning_logged
=
false
;
if
(
!
warning_logged
)
{
LOG_WARNING
<<
status
.
Message
()
<<
", falling back to non-pinned system memory"
;
warning_logged
=
true
;
}
*
ptr
=
malloc
(
size
);
*
allocated_type
=
TRITONSERVER_MEMORY_CPU
;
is_pinned
=
false
;
if
(
*
ptr
==
nullptr
)
{
status
=
Status
(
Status
::
Code
::
INTERNAL
,
"failed to allocate non-pinned system memory"
);
}
else
{
status
=
Status
::
Success
;
}
}
// keep track of allocated buffer or clean up
{
std
::
lock_guard
<
std
::
mutex
>
lk
(
info_mtx_
);
if
(
status
.
IsOk
())
{
auto
res
=
memory_info_
.
emplace
(
*
ptr
,
std
::
make_pair
(
is_pinned
,
pinned_memory_buffer
));
if
(
!
res
.
second
)
{
status
=
Status
(
Status
::
Code
::
INTERNAL
,
"unexpected memory address collision, '"
+
PointerToString
(
*
ptr
)
+
"' has been managed"
);
}
LOG_VERBOSE
(
1
)
<<
(
is_pinned
?
""
:
"non-"
)
<<
"pinned memory allocation: "
<<
"size "
<<
size
<<
", addr "
<<
*
ptr
;
}
}
if
((
!
status
.
IsOk
())
&&
(
*
ptr
!=
nullptr
))
{
if
(
is_pinned
)
{
std
::
lock_guard
<
std
::
mutex
>
lk
(
pinned_memory_buffer
->
buffer_mtx_
);
pinned_memory_buffer
->
managed_pinned_memory_
.
deallocate
(
*
ptr
);
}
else
{
free
(
*
ptr
);
}
}
return
status
;
}
Status
PinnedMemoryManager
::
FreeInternal
(
void
*
ptr
)
{
bool
is_pinned
=
true
;
PinnedMemory
*
pinned_memory_buffer
=
nullptr
;
{
std
::
lock_guard
<
std
::
mutex
>
lk
(
info_mtx_
);
auto
it
=
memory_info_
.
find
(
ptr
);
if
(
it
!=
memory_info_
.
end
())
{
is_pinned
=
it
->
second
.
first
;
pinned_memory_buffer
=
it
->
second
.
second
;
LOG_VERBOSE
(
1
)
<<
(
is_pinned
?
""
:
"non-"
)
<<
"pinned memory deallocation: "
<<
"addr "
<<
ptr
;
memory_info_
.
erase
(
it
);
}
else
{
return
Status
(
Status
::
Code
::
INTERNAL
,
"unexpected memory address '"
+
PointerToString
(
ptr
)
+
"' is not being managed"
);
}
}
if
(
is_pinned
)
{
std
::
lock_guard
<
std
::
mutex
>
lk
(
pinned_memory_buffer
->
buffer_mtx_
);
pinned_memory_buffer
->
managed_pinned_memory_
.
deallocate
(
ptr
);
}
else
{
free
(
ptr
);
}
return
Status
::
Success
;
}
void
PinnedMemoryManager
::
Reset
()
{
instance_
.
reset
();
}
Status
PinnedMemoryManager
::
Create
(
const
Options
&
options
)
{
if
(
instance_
!=
nullptr
)
{
LOG_WARNING
<<
"New pinned memory pool of size "
<<
options
.
pinned_memory_pool_byte_size_
<<
" could not be created since one already exists"
<<
" of size "
<<
pinned_memory_byte_size_
;
return
Status
::
Success
;
}
instance_
.
reset
(
new
PinnedMemoryManager
());
if
(
options
.
host_policy_map_
.
empty
())
{
void
*
buffer
=
nullptr
;
#ifdef TRITON_ENABLE_GPU
auto
err
=
cudaHostAlloc
(
&
buffer
,
options
.
pinned_memory_pool_byte_size_
,
cudaHostAllocPortable
);
if
(
err
!=
cudaSuccess
)
{
buffer
=
nullptr
;
LOG_WARNING
<<
"Unable to allocate pinned system memory, pinned memory "
"pool will not be available: "
<<
std
::
string
(
cudaGetErrorString
(
err
));
}
else
if
(
options
.
pinned_memory_pool_byte_size_
!=
0
)
{
LOG_INFO
<<
"Pinned memory pool is created at '"
<<
PointerToString
(
buffer
)
<<
"' with size "
<<
options
.
pinned_memory_pool_byte_size_
;
}
else
{
LOG_INFO
<<
"Pinned memory pool disabled"
;
}
#endif // TRITON_ENABLE_GPU
try
{
instance_
->
AddPinnedMemoryBuffer
(
std
::
shared_ptr
<
PinnedMemory
>
(
new
PinnedMemory
(
buffer
,
options
.
pinned_memory_pool_byte_size_
)),
0
);
}
catch
(
const
std
::
exception
&
ex
)
{
return
Status
(
Status
::
Code
::
INTERNAL
,
"Failed to add Pinned Memory buffer: "
+
std
::
string
(
ex
.
what
()));
}
}
else
{
// Create only one buffer / manager should be created for one node,
// and all associated devices should request memory from the shared manager
std
::
map
<
int32_t
,
std
::
string
>
numa_map
;
for
(
const
auto
host_policy
:
options
.
host_policy_map_
)
{
const
auto
numa_it
=
host_policy
.
second
.
find
(
"numa-node"
);
if
(
numa_it
!=
host_policy
.
second
.
end
())
{
int32_t
numa_id
;
if
(
ParseIntOption
(
"Parsing NUMA node"
,
numa_it
->
second
,
&
numa_id
)
.
IsOk
())
{
numa_map
.
emplace
(
numa_id
,
host_policy
.
first
);
}
}
}
for
(
const
auto
node_policy
:
numa_map
)
{
auto
status
=
SetNumaMemoryPolicy
(
options
.
host_policy_map_
.
at
(
node_policy
.
second
));
if
(
!
status
.
IsOk
())
{
LOG_WARNING
<<
"Unable to allocate pinned system memory for NUMA node "
<<
node_policy
.
first
<<
": "
<<
status
.
AsString
();
continue
;
}
unsigned
long
node_mask
;
status
=
GetNumaMemoryPolicyNodeMask
(
&
node_mask
);
if
(
!
status
.
IsOk
())
{
LOG_WARNING
<<
"Unable to get NUMA node set for current thread: "
<<
status
.
AsString
();
continue
;
}
void
*
buffer
=
nullptr
;
#ifdef TRITON_ENABLE_GPU
auto
err
=
cudaHostAlloc
(
&
buffer
,
options
.
pinned_memory_pool_byte_size_
,
cudaHostAllocPortable
);
if
(
err
!=
cudaSuccess
)
{
buffer
=
nullptr
;
LOG_WARNING
<<
"Unable to allocate pinned system memory, pinned memory "
"pool will not be available: "
<<
std
::
string
(
cudaGetErrorString
(
err
));
}
else
if
(
options
.
pinned_memory_pool_byte_size_
!=
0
)
{
LOG_INFO
<<
"Pinned memory pool is created at '"
<<
PointerToString
(
buffer
)
<<
"' with size "
<<
options
.
pinned_memory_pool_byte_size_
;
}
else
{
LOG_INFO
<<
"Pinned memory pool disabled"
;
}
#endif // TRITON_ENABLE_GPU
ResetNumaMemoryPolicy
();
try
{
instance_
->
AddPinnedMemoryBuffer
(
std
::
shared_ptr
<
PinnedMemory
>
(
new
PinnedMemory
(
buffer
,
options
.
pinned_memory_pool_byte_size_
)),
node_mask
);
}
catch
(
const
std
::
exception
&
ex
)
{
return
Status
(
Status
::
Code
::
INTERNAL
,
"Failed to add Pinned Memory buffer with host policy: "
+
std
::
string
(
ex
.
what
()));
}
}
// If no pinned memory is allocated, add an empty entry where all allocation
// will be on normal system memory
if
(
instance_
->
pinned_memory_buffers_
.
empty
())
{
try
{
instance_
->
AddPinnedMemoryBuffer
(
std
::
shared_ptr
<
PinnedMemory
>
(
new
PinnedMemory
(
nullptr
,
options
.
pinned_memory_pool_byte_size_
)),
0
);
}
catch
(
const
std
::
exception
&
ex
)
{
return
Status
(
Status
::
Code
::
INTERNAL
,
"Failed to add empty Pinned Memory entry: "
+
std
::
string
(
ex
.
what
()));
}
}
}
pinned_memory_byte_size_
=
options
.
pinned_memory_pool_byte_size_
;
return
Status
::
Success
;
}
Status
PinnedMemoryManager
::
Alloc
(
void
**
ptr
,
uint64_t
size
,
TRITONSERVER_MemoryType
*
allocated_type
,
bool
allow_nonpinned_fallback
)
{
if
(
instance_
==
nullptr
)
{
return
Status
(
Status
::
Code
::
UNAVAILABLE
,
"PinnedMemoryManager has not been created"
);
}
auto
pinned_memory_buffer
=
instance_
->
pinned_memory_buffers_
.
begin
()
->
second
.
get
();
if
(
instance_
->
pinned_memory_buffers_
.
size
()
>
1
)
{
unsigned
long
node_mask
;
if
(
GetNumaMemoryPolicyNodeMask
(
&
node_mask
).
IsOk
())
{
auto
it
=
instance_
->
pinned_memory_buffers_
.
find
(
node_mask
);
if
(
it
!=
instance_
->
pinned_memory_buffers_
.
end
())
{
pinned_memory_buffer
=
it
->
second
.
get
();
}
}
}
return
instance_
->
AllocInternal
(
ptr
,
size
,
allocated_type
,
allow_nonpinned_fallback
,
pinned_memory_buffer
);
}
Status
PinnedMemoryManager
::
Free
(
void
*
ptr
)
{
if
(
instance_
==
nullptr
)
{
return
Status
(
Status
::
Code
::
UNAVAILABLE
,
"PinnedMemoryManager has not been created"
);
}
return
instance_
->
FreeInternal
(
ptr
);
}
}}
// namespace triton::core
3rdparty/core-r22.12/src/pinned_memory_manager.h
deleted
100644 → 0
View file @
d592fbea
// Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
#pragma once
#include <boost/interprocess/managed_external_buffer.hpp>
#include <map>
#include <memory>
#include <mutex>
#include "status.h"
#include "triton/common/model_config.h"
namespace
triton
{
namespace
core
{
// This is a singleton class responsible for maintaining pinned memory pool
// used by the inference server. Pinned memory allocations and deallocations
// must be requested via functions provided by this class.
class
PinnedMemoryManager
{
public:
// Options to configure pinned memeory manager.
struct
Options
{
Options
(
uint64_t
b
=
0
,
const
triton
::
common
::
HostPolicyCmdlineConfigMap
&
host_policy_map
=
{})
:
pinned_memory_pool_byte_size_
(
b
),
host_policy_map_
(
host_policy_map
)
{
}
uint64_t
pinned_memory_pool_byte_size_
;
triton
::
common
::
HostPolicyCmdlineConfigMap
host_policy_map_
;
};
~
PinnedMemoryManager
();
// Create the pinned memory manager based on 'options' specified.
// Return Status object indicating success or failure.
static
Status
Create
(
const
Options
&
options
);
// Allocate pinned memory with the requested 'size' and return the pointer
// in 'ptr'. If 'allow_nonpinned_fallback' is true, regular system memory
// will be allocated as fallback in the case where pinned memory fails to
// be allocated.
// Return Status object indicating success or failure.
static
Status
Alloc
(
void
**
ptr
,
uint64_t
size
,
TRITONSERVER_MemoryType
*
allocated_type
,
bool
allow_nonpinned_fallback
);
// Free the memory allocated by the pinned memory manager.
// Return Status object indicating success or failure.
static
Status
Free
(
void
*
ptr
);
protected:
// Provide explicit control on the lifecycle of the CUDA memory manager,
// for testing only.
static
void
Reset
();
private:
class
PinnedMemory
{
public:
PinnedMemory
(
void
*
pinned_memory_buffer
,
uint64_t
size
);
~
PinnedMemory
();
void
*
pinned_memory_buffer_
;
std
::
mutex
buffer_mtx_
;
boost
::
interprocess
::
managed_external_buffer
managed_pinned_memory_
;
};
PinnedMemoryManager
()
=
default
;
Status
AllocInternal
(
void
**
ptr
,
uint64_t
size
,
TRITONSERVER_MemoryType
*
allocated_type
,
bool
allow_nonpinned_fallback
,
PinnedMemory
*
pinned_memory_buffer
);
Status
FreeInternal
(
void
*
ptr
);
void
AddPinnedMemoryBuffer
(
const
std
::
shared_ptr
<
PinnedMemory
>&
pinned_memory_buffer
,
unsigned
long
node_mask
);
static
std
::
unique_ptr
<
PinnedMemoryManager
>
instance_
;
static
uint64_t
pinned_memory_byte_size_
;
std
::
mutex
info_mtx_
;
std
::
map
<
void
*
,
std
::
pair
<
bool
,
PinnedMemory
*>>
memory_info_
;
std
::
map
<
unsigned
long
,
std
::
shared_ptr
<
PinnedMemory
>>
pinned_memory_buffers_
;
};
}}
// namespace triton::core
Prev
1
…
5
6
7
8
9
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment