Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
cd05da66
Unverified
Commit
cd05da66
authored
Feb 24, 2021
by
SparkSnail
Committed by
GitHub
Feb 24, 2021
Browse files
Add recently-idle environment scheduler in reuse mode (#3375)
parent
aea82d71
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
26 additions
and
2 deletions
+26
-2
ts/nni_manager/training_service/reusable/environment.ts
ts/nni_manager/training_service/reusable/environment.ts
+2
-0
ts/nni_manager/training_service/reusable/gpuScheduler.ts
ts/nni_manager/training_service/reusable/gpuScheduler.ts
+23
-2
ts/nni_manager/training_service/reusable/trialDispatcher.ts
ts/nni_manager/training_service/reusable/trialDispatcher.ts
+1
-0
No files found.
ts/nni_manager/training_service/reusable/environment.ts
View file @
cd05da66
...
...
@@ -51,6 +51,8 @@ export class EnvironmentInformation {
// uses to count how many trial runs on this environment.
// it can be used in many scenarios, but for now, it uses for reusable.
public
assignedTrialCount
:
number
=
0
;
// it is used to get environment idle time interval
public
latestTrialReleasedTime
:
number
=
-
1
;
// NNI environment ID
public
id
:
string
;
...
...
ts/nni_manager/training_service/reusable/gpuScheduler.ts
View file @
cd05da66
...
...
@@ -10,7 +10,7 @@ import { GPUInfo, ScheduleResultType } from '../common/gpuData';
import
{
EnvironmentInformation
}
from
'
./environment
'
;
import
{
TrialDetail
}
from
'
./trial
'
;
type
SCHEDULE_POLICY_NAME
=
'
random
'
|
'
round-robin
'
;
type
SCHEDULE_POLICY_NAME
=
'
random
'
|
'
round-robin
'
|
'
recently-idle
'
;
export
class
GpuSchedulerSetting
{
public
useActiveGpu
:
boolean
=
false
;
...
...
@@ -30,7 +30,7 @@ export class GpuScheduler {
// private readonly machineExecutorMap: Set<TrialDetail>;
private
readonly
log
:
Logger
=
getLogger
();
private
readonly
policyName
:
SCHEDULE_POLICY_NAME
=
'
r
ound-robin
'
;
private
readonly
policyName
:
SCHEDULE_POLICY_NAME
=
'
r
ecently-idle
'
;
private
defaultSetting
:
GpuSchedulerSetting
;
private
roundRobinIndex
:
number
=
0
;
...
...
@@ -101,6 +101,7 @@ export class GpuScheduler {
trial
.
environment
.
defaultGpuSummary
!==
undefined
&&
trial
.
assignedGpus
!==
undefined
&&
trial
.
assignedGpus
.
length
>
0
)
{
for
(
const
gpuInfo
of
trial
.
assignedGpus
)
{
const
defaultGpuSummary
=
trial
.
environment
.
defaultGpuSummary
;
const
num
:
number
|
undefined
=
defaultGpuSummary
.
assignedGpuIndexMap
.
get
(
gpuInfo
.
index
);
...
...
@@ -190,10 +191,30 @@ export class GpuScheduler {
return
randomSelect
(
qualifiedEnvironments
);
}
else
if
(
this
.
policyName
===
'
round-robin
'
)
{
return
this
.
roundRobinSelect
(
qualifiedEnvironments
,
allEnvironments
);
}
else
if
(
this
.
policyName
===
'
recently-idle
'
)
{
return
this
.
recentlyIdleSelect
(
qualifiedEnvironments
,
allEnvironments
);
}
else
{
throw
new
Error
(
`Unsupported schedule policy:
${
this
.
policyName
}
`
);
}
}
// Select the environment which is idle most recently. If all environments are not idle, use round robin to select an environment.
private
recentlyIdleSelect
(
qualifiedEnvironments
:
EnvironmentInformation
[],
allEnvironments
:
EnvironmentInformation
[]):
EnvironmentInformation
{
const
now
=
Date
.
now
();
let
selectedEnvironment
:
EnvironmentInformation
|
undefined
=
undefined
;
let
minTimeInterval
=
Number
.
MAX_SAFE_INTEGER
;
for
(
const
environment
of
qualifiedEnvironments
)
{
if
(
environment
.
latestTrialReleasedTime
>
0
&&
(
now
-
environment
.
latestTrialReleasedTime
)
<
minTimeInterval
)
{
selectedEnvironment
=
environment
;
minTimeInterval
=
now
-
environment
.
latestTrialReleasedTime
;
}
}
if
(
selectedEnvironment
===
undefined
)
{
return
this
.
roundRobinSelect
(
qualifiedEnvironments
,
allEnvironments
);
}
selectedEnvironment
.
latestTrialReleasedTime
=
-
1
;
return
selectedEnvironment
;
}
private
roundRobinSelect
(
qualifiedEnvironments
:
EnvironmentInformation
[],
allEnvironments
:
EnvironmentInformation
[]):
EnvironmentInformation
{
while
(
!
qualifiedEnvironments
.
includes
(
allEnvironments
[
this
.
roundRobinIndex
%
allEnvironments
.
length
]))
{
...
...
ts/nni_manager/training_service/reusable/trialDispatcher.ts
View file @
cd05da66
...
...
@@ -732,6 +732,7 @@ class TrialDispatcher implements TrainingService {
throw
new
Error
(
`TrialDispatcher: environment
${
trial
.
environment
.
id
}
has no counted running trial!`
);
}
trial
.
environment
.
runningTrialCount
--
;
trial
.
environment
.
latestTrialReleasedTime
=
Date
.
now
();
trial
.
environment
=
undefined
;
}
if
(
true
===
this
.
enableGpuScheduler
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment