Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
cd05da66
Unverified
Commit
cd05da66
authored
Feb 24, 2021
by
SparkSnail
Committed by
GitHub
Feb 24, 2021
Browse files
Add recently-idle environment scheduler in reuse mode (#3375)
parent
aea82d71
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
26 additions
and
2 deletions
+26
-2
ts/nni_manager/training_service/reusable/environment.ts
ts/nni_manager/training_service/reusable/environment.ts
+2
-0
ts/nni_manager/training_service/reusable/gpuScheduler.ts
ts/nni_manager/training_service/reusable/gpuScheduler.ts
+23
-2
ts/nni_manager/training_service/reusable/trialDispatcher.ts
ts/nni_manager/training_service/reusable/trialDispatcher.ts
+1
-0
No files found.
ts/nni_manager/training_service/reusable/environment.ts
View file @
cd05da66
...
@@ -51,6 +51,8 @@ export class EnvironmentInformation {
...
@@ -51,6 +51,8 @@ export class EnvironmentInformation {
// uses to count how many trial runs on this environment.
// uses to count how many trial runs on this environment.
// it can be used in many scenarios, but for now, it uses for reusable.
// it can be used in many scenarios, but for now, it uses for reusable.
public
assignedTrialCount
:
number
=
0
;
public
assignedTrialCount
:
number
=
0
;
// it is used to get environment idle time interval
public
latestTrialReleasedTime
:
number
=
-
1
;
// NNI environment ID
// NNI environment ID
public
id
:
string
;
public
id
:
string
;
...
...
ts/nni_manager/training_service/reusable/gpuScheduler.ts
View file @
cd05da66
...
@@ -10,7 +10,7 @@ import { GPUInfo, ScheduleResultType } from '../common/gpuData';
...
@@ -10,7 +10,7 @@ import { GPUInfo, ScheduleResultType } from '../common/gpuData';
import
{
EnvironmentInformation
}
from
'
./environment
'
;
import
{
EnvironmentInformation
}
from
'
./environment
'
;
import
{
TrialDetail
}
from
'
./trial
'
;
import
{
TrialDetail
}
from
'
./trial
'
;
type
SCHEDULE_POLICY_NAME
=
'
random
'
|
'
round-robin
'
;
type
SCHEDULE_POLICY_NAME
=
'
random
'
|
'
round-robin
'
|
'
recently-idle
'
;
export
class
GpuSchedulerSetting
{
export
class
GpuSchedulerSetting
{
public
useActiveGpu
:
boolean
=
false
;
public
useActiveGpu
:
boolean
=
false
;
...
@@ -30,7 +30,7 @@ export class GpuScheduler {
...
@@ -30,7 +30,7 @@ export class GpuScheduler {
// private readonly machineExecutorMap: Set<TrialDetail>;
// private readonly machineExecutorMap: Set<TrialDetail>;
private
readonly
log
:
Logger
=
getLogger
();
private
readonly
log
:
Logger
=
getLogger
();
private
readonly
policyName
:
SCHEDULE_POLICY_NAME
=
'
r
ound-robin
'
;
private
readonly
policyName
:
SCHEDULE_POLICY_NAME
=
'
r
ecently-idle
'
;
private
defaultSetting
:
GpuSchedulerSetting
;
private
defaultSetting
:
GpuSchedulerSetting
;
private
roundRobinIndex
:
number
=
0
;
private
roundRobinIndex
:
number
=
0
;
...
@@ -101,6 +101,7 @@ export class GpuScheduler {
...
@@ -101,6 +101,7 @@ export class GpuScheduler {
trial
.
environment
.
defaultGpuSummary
!==
undefined
&&
trial
.
environment
.
defaultGpuSummary
!==
undefined
&&
trial
.
assignedGpus
!==
undefined
&&
trial
.
assignedGpus
!==
undefined
&&
trial
.
assignedGpus
.
length
>
0
)
{
trial
.
assignedGpus
.
length
>
0
)
{
for
(
const
gpuInfo
of
trial
.
assignedGpus
)
{
for
(
const
gpuInfo
of
trial
.
assignedGpus
)
{
const
defaultGpuSummary
=
trial
.
environment
.
defaultGpuSummary
;
const
defaultGpuSummary
=
trial
.
environment
.
defaultGpuSummary
;
const
num
:
number
|
undefined
=
defaultGpuSummary
.
assignedGpuIndexMap
.
get
(
gpuInfo
.
index
);
const
num
:
number
|
undefined
=
defaultGpuSummary
.
assignedGpuIndexMap
.
get
(
gpuInfo
.
index
);
...
@@ -190,10 +191,30 @@ export class GpuScheduler {
...
@@ -190,10 +191,30 @@ export class GpuScheduler {
return
randomSelect
(
qualifiedEnvironments
);
return
randomSelect
(
qualifiedEnvironments
);
}
else
if
(
this
.
policyName
===
'
round-robin
'
)
{
}
else
if
(
this
.
policyName
===
'
round-robin
'
)
{
return
this
.
roundRobinSelect
(
qualifiedEnvironments
,
allEnvironments
);
return
this
.
roundRobinSelect
(
qualifiedEnvironments
,
allEnvironments
);
}
else
if
(
this
.
policyName
===
'
recently-idle
'
)
{
return
this
.
recentlyIdleSelect
(
qualifiedEnvironments
,
allEnvironments
);
}
else
{
}
else
{
throw
new
Error
(
`Unsupported schedule policy:
${
this
.
policyName
}
`
);
throw
new
Error
(
`Unsupported schedule policy:
${
this
.
policyName
}
`
);
}
}
}
}
// Select the environment which is idle most recently. If all environments are not idle, use round robin to select an environment.
private
recentlyIdleSelect
(
qualifiedEnvironments
:
EnvironmentInformation
[],
allEnvironments
:
EnvironmentInformation
[]):
EnvironmentInformation
{
const
now
=
Date
.
now
();
let
selectedEnvironment
:
EnvironmentInformation
|
undefined
=
undefined
;
let
minTimeInterval
=
Number
.
MAX_SAFE_INTEGER
;
for
(
const
environment
of
qualifiedEnvironments
)
{
if
(
environment
.
latestTrialReleasedTime
>
0
&&
(
now
-
environment
.
latestTrialReleasedTime
)
<
minTimeInterval
)
{
selectedEnvironment
=
environment
;
minTimeInterval
=
now
-
environment
.
latestTrialReleasedTime
;
}
}
if
(
selectedEnvironment
===
undefined
)
{
return
this
.
roundRobinSelect
(
qualifiedEnvironments
,
allEnvironments
);
}
selectedEnvironment
.
latestTrialReleasedTime
=
-
1
;
return
selectedEnvironment
;
}
private
roundRobinSelect
(
qualifiedEnvironments
:
EnvironmentInformation
[],
allEnvironments
:
EnvironmentInformation
[]):
EnvironmentInformation
{
private
roundRobinSelect
(
qualifiedEnvironments
:
EnvironmentInformation
[],
allEnvironments
:
EnvironmentInformation
[]):
EnvironmentInformation
{
while
(
!
qualifiedEnvironments
.
includes
(
allEnvironments
[
this
.
roundRobinIndex
%
allEnvironments
.
length
]))
{
while
(
!
qualifiedEnvironments
.
includes
(
allEnvironments
[
this
.
roundRobinIndex
%
allEnvironments
.
length
]))
{
...
...
ts/nni_manager/training_service/reusable/trialDispatcher.ts
View file @
cd05da66
...
@@ -732,6 +732,7 @@ class TrialDispatcher implements TrainingService {
...
@@ -732,6 +732,7 @@ class TrialDispatcher implements TrainingService {
throw
new
Error
(
`TrialDispatcher: environment
${
trial
.
environment
.
id
}
has no counted running trial!`
);
throw
new
Error
(
`TrialDispatcher: environment
${
trial
.
environment
.
id
}
has no counted running trial!`
);
}
}
trial
.
environment
.
runningTrialCount
--
;
trial
.
environment
.
runningTrialCount
--
;
trial
.
environment
.
latestTrialReleasedTime
=
Date
.
now
();
trial
.
environment
=
undefined
;
trial
.
environment
=
undefined
;
}
}
if
(
true
===
this
.
enableGpuScheduler
)
{
if
(
true
===
this
.
enableGpuScheduler
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment