Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
fa339ca3
Unverified
Commit
fa339ca3
authored
Nov 02, 2021
by
SparkSnail
Committed by
GitHub
Nov 02, 2021
Browse files
Fix hybrid pipeline (#4287)
parent
fde9e1a0
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
43 additions
and
26 deletions
+43
-26
ts/nni_manager/training_service/reusable/trialDispatcher.ts
ts/nni_manager/training_service/reusable/trialDispatcher.ts
+43
-26
No files found.
ts/nni_manager/training_service/reusable/trialDispatcher.ts
View file @
fa339ca3
...
...
@@ -209,31 +209,9 @@ class TrialDispatcher implements TrainingService {
}
}
public
async
run
():
Promise
<
void
>
{
await
Promise
.
all
(
this
.
environmentServiceList
.
map
(
env
=>
env
.
init
()));
for
(
const
environmentService
of
this
.
environmentServiceList
)
{
const
runnerSettings
:
RunnerSettings
=
new
RunnerSettings
();
runnerSettings
.
nniManagerIP
=
this
.
config
.
nniManagerIp
===
undefined
?
await
getIPV4Address
()
:
this
.
config
.
nniManagerIp
;
runnerSettings
.
nniManagerPort
=
getBasePort
()
+
1
;
runnerSettings
.
commandChannel
=
environmentService
.
getCommandChannel
.
channelName
;
runnerSettings
.
enableGpuCollector
=
this
.
enableGpuScheduler
;
runnerSettings
.
command
=
this
.
config
.
trialCommand
;
runnerSettings
.
nniManagerVersion
=
this
.
enableVersionCheck
?
await
getVersion
()
:
''
;
runnerSettings
.
logCollection
=
this
.
logCollection
;
runnerSettings
.
platform
=
environmentService
.
getName
;
runnerSettings
.
experimentId
=
this
.
experimentId
;
await
environmentService
.
getCommandChannel
.
start
();
this
.
log
.
info
(
`TrialDispatcher: started channel:
${
environmentService
.
getCommandChannel
.
constructor
.
name
}
`
);
this
.
log
.
info
(
`TrialDispatcher: copying code and settings.`
);
private
getStorageService
(
environmentService
:
EnvironmentService
):
StorageService
{
let
storageService
:
StorageService
;
if
(
this
.
useSharedStorage
)
{
if
(
this
.
fileCopyCompleted
)
{
this
.
log
.
debug
(
`TrialDispatcher: file already copy to shared storage.`
);
continue
;
}
this
.
log
.
debug
(
`TrialDispatcher: use shared storage service.`
);
storageService
=
component
.
get
<
SharedStorageService
>
(
SharedStorageService
).
storageService
;
}
else
if
(
environmentService
.
hasStorageService
)
{
...
...
@@ -245,6 +223,25 @@ class TrialDispatcher implements TrainingService {
const
environmentLocalTempFolder
=
path
.
join
(
this
.
experimentRootDir
,
"
environment-temp
"
);
storageService
.
initialize
(
this
.
config
.
trialCodeDirectory
,
environmentLocalTempFolder
);
}
return
storageService
;
}
public
async
run
():
Promise
<
void
>
{
await
Promise
.
all
(
this
.
environmentServiceList
.
map
(
env
=>
env
.
init
()));
for
(
const
environmentService
of
this
.
environmentServiceList
)
{
await
environmentService
.
getCommandChannel
.
start
();
this
.
log
.
info
(
`TrialDispatcher: started channel:
${
environmentService
.
getCommandChannel
.
constructor
.
name
}
`
);
this
.
log
.
info
(
`TrialDispatcher: copying code.`
);
if
(
this
.
useSharedStorage
)
{
if
(
this
.
fileCopyCompleted
)
{
continue
;
}
}
const
storageService
:
StorageService
=
this
.
getStorageService
(
environmentService
);
// Copy the compressed file to remoteDirectory and delete it
const
codeDir
=
path
.
resolve
(
this
.
config
.
trialCodeDirectory
);
const
envDir
=
storageService
.
joinPath
(
"
envs
"
);
...
...
@@ -256,9 +253,6 @@ class TrialDispatcher implements TrainingService {
await
storageService
.
save
(
CONTAINER_INSTALL_NNI_SHELL_FORMAT
,
installFileName
);
await
storageService
.
save
(
CONTAINER_INSTALL_NNI_SHELL_FORMAT_FOR_WIN
,
installFileNameForWin
);
const
runnerSettingsConfig
=
storageService
.
joinPath
(
envDir
,
"
settings.json
"
);
await
storageService
.
save
(
JSON
.
stringify
(
runnerSettings
),
runnerSettingsConfig
);
if
(
this
.
isDeveloping
)
{
let
trialToolsPath
=
path
.
join
(
__dirname
,
"
../../../../../tools/nni_trial_tool
"
);
if
(
false
===
fs
.
existsSync
(
trialToolsPath
))
{
...
...
@@ -655,6 +649,27 @@ class TrialDispatcher implements TrainingService {
}
}
private
async
setEnvironmentSetting
(
environment
:
EnvironmentInformation
):
Promise
<
void
>
{
if
(
environment
.
environmentService
===
undefined
)
{
throw
new
Error
(
`Environmentservice for
${
environment
.
id
}
not initialized!`
);
}
const
environmentService
=
environment
.
environmentService
;
const
runnerSettings
:
RunnerSettings
=
new
RunnerSettings
();
runnerSettings
.
nniManagerIP
=
this
.
config
.
nniManagerIp
===
undefined
?
await
getIPV4Address
()
:
this
.
config
.
nniManagerIp
;
runnerSettings
.
nniManagerPort
=
getBasePort
()
+
1
;
runnerSettings
.
commandChannel
=
environmentService
.
getCommandChannel
.
channelName
;
runnerSettings
.
enableGpuCollector
=
this
.
enableGpuScheduler
;
runnerSettings
.
command
=
this
.
config
.
trialCommand
;
runnerSettings
.
nniManagerVersion
=
this
.
enableVersionCheck
?
await
getVersion
()
:
''
;
runnerSettings
.
logCollection
=
this
.
logCollection
;
runnerSettings
.
platform
=
environmentService
.
getName
;
runnerSettings
.
experimentId
=
this
.
experimentId
;
const
storageService
:
StorageService
=
this
.
getStorageService
(
environmentService
);
const
envDir
=
storageService
.
joinPath
(
"
envs
"
);
const
runnerSettingsConfig
=
storageService
.
joinPath
(
envDir
,
environment
.
id
,
"
settings.json
"
);
await
storageService
.
save
(
JSON
.
stringify
(
runnerSettings
),
runnerSettingsConfig
);
}
private
async
requestEnvironment
(
environmentService
:
EnvironmentService
):
Promise
<
void
>
{
if
(
this
.
stopping
)
{
this
.
log
.
info
(
`Experiment is stopping, stop creating new environment`
);
...
...
@@ -674,6 +689,8 @@ class TrialDispatcher implements TrainingService {
environment
.
command
=
`mkdir -p envs/
${
envId
}
&& cd envs/
${
envId
}
&&
${
environment
.
command
}
`
;
environment
.
useSharedStorage
=
this
.
useSharedStorage
;
// Generate setting.json file per environment to avoid conflict
await
this
.
setEnvironmentSetting
(
environment
);
await
environmentService
.
startEnvironment
(
environment
);
this
.
environments
.
set
(
environment
.
id
,
environment
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment