Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
fa339ca3
Unverified
Commit
fa339ca3
authored
Nov 02, 2021
by
SparkSnail
Committed by
GitHub
Nov 02, 2021
Browse files
Fix hybrid pipeline (#4287)
parent
fde9e1a0
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
43 additions
and
26 deletions
+43
-26
ts/nni_manager/training_service/reusable/trialDispatcher.ts
ts/nni_manager/training_service/reusable/trialDispatcher.ts
+43
-26
No files found.
ts/nni_manager/training_service/reusable/trialDispatcher.ts
View file @
fa339ca3
...
...
@@ -209,31 +209,9 @@ class TrialDispatcher implements TrainingService {
}
}
public
async
run
():
Promise
<
void
>
{
await
Promise
.
all
(
this
.
environmentServiceList
.
map
(
env
=>
env
.
init
()));
for
(
const
environmentService
of
this
.
environmentServiceList
)
{
const
runnerSettings
:
RunnerSettings
=
new
RunnerSettings
();
runnerSettings
.
nniManagerIP
=
this
.
config
.
nniManagerIp
===
undefined
?
await
getIPV4Address
()
:
this
.
config
.
nniManagerIp
;
runnerSettings
.
nniManagerPort
=
getBasePort
()
+
1
;
runnerSettings
.
commandChannel
=
environmentService
.
getCommandChannel
.
channelName
;
runnerSettings
.
enableGpuCollector
=
this
.
enableGpuScheduler
;
runnerSettings
.
command
=
this
.
config
.
trialCommand
;
runnerSettings
.
nniManagerVersion
=
this
.
enableVersionCheck
?
await
getVersion
()
:
''
;
runnerSettings
.
logCollection
=
this
.
logCollection
;
runnerSettings
.
platform
=
environmentService
.
getName
;
runnerSettings
.
experimentId
=
this
.
experimentId
;
await
environmentService
.
getCommandChannel
.
start
();
this
.
log
.
info
(
`TrialDispatcher: started channel:
${
environmentService
.
getCommandChannel
.
constructor
.
name
}
`
);
this
.
log
.
info
(
`TrialDispatcher: copying code and settings.`
);
private
getStorageService
(
environmentService
:
EnvironmentService
):
StorageService
{
let
storageService
:
StorageService
;
if
(
this
.
useSharedStorage
)
{
if
(
this
.
fileCopyCompleted
)
{
this
.
log
.
debug
(
`TrialDispatcher: file already copy to shared storage.`
);
continue
;
}
this
.
log
.
debug
(
`TrialDispatcher: use shared storage service.`
);
storageService
=
component
.
get
<
SharedStorageService
>
(
SharedStorageService
).
storageService
;
}
else
if
(
environmentService
.
hasStorageService
)
{
...
...
@@ -245,6 +223,25 @@ class TrialDispatcher implements TrainingService {
const
environmentLocalTempFolder
=
path
.
join
(
this
.
experimentRootDir
,
"
environment-temp
"
);
storageService
.
initialize
(
this
.
config
.
trialCodeDirectory
,
environmentLocalTempFolder
);
}
return
storageService
;
}
public
async
run
():
Promise
<
void
>
{
await
Promise
.
all
(
this
.
environmentServiceList
.
map
(
env
=>
env
.
init
()));
for
(
const
environmentService
of
this
.
environmentServiceList
)
{
await
environmentService
.
getCommandChannel
.
start
();
this
.
log
.
info
(
`TrialDispatcher: started channel:
${
environmentService
.
getCommandChannel
.
constructor
.
name
}
`
);
this
.
log
.
info
(
`TrialDispatcher: copying code.`
);
if
(
this
.
useSharedStorage
)
{
if
(
this
.
fileCopyCompleted
)
{
continue
;
}
}
const
storageService
:
StorageService
=
this
.
getStorageService
(
environmentService
);
// Copy the compressed file to remoteDirectory and delete it
const
codeDir
=
path
.
resolve
(
this
.
config
.
trialCodeDirectory
);
const
envDir
=
storageService
.
joinPath
(
"
envs
"
);
...
...
@@ -256,9 +253,6 @@ class TrialDispatcher implements TrainingService {
await
storageService
.
save
(
CONTAINER_INSTALL_NNI_SHELL_FORMAT
,
installFileName
);
await
storageService
.
save
(
CONTAINER_INSTALL_NNI_SHELL_FORMAT_FOR_WIN
,
installFileNameForWin
);
const
runnerSettingsConfig
=
storageService
.
joinPath
(
envDir
,
"
settings.json
"
);
await
storageService
.
save
(
JSON
.
stringify
(
runnerSettings
),
runnerSettingsConfig
);
if
(
this
.
isDeveloping
)
{
let
trialToolsPath
=
path
.
join
(
__dirname
,
"
../../../../../tools/nni_trial_tool
"
);
if
(
false
===
fs
.
existsSync
(
trialToolsPath
))
{
...
...
@@ -655,6 +649,27 @@ class TrialDispatcher implements TrainingService {
}
}
private
async
setEnvironmentSetting
(
environment
:
EnvironmentInformation
):
Promise
<
void
>
{
if
(
environment
.
environmentService
===
undefined
)
{
throw
new
Error
(
`Environmentservice for
${
environment
.
id
}
not initialized!`
);
}
const
environmentService
=
environment
.
environmentService
;
const
runnerSettings
:
RunnerSettings
=
new
RunnerSettings
();
runnerSettings
.
nniManagerIP
=
this
.
config
.
nniManagerIp
===
undefined
?
await
getIPV4Address
()
:
this
.
config
.
nniManagerIp
;
runnerSettings
.
nniManagerPort
=
getBasePort
()
+
1
;
runnerSettings
.
commandChannel
=
environmentService
.
getCommandChannel
.
channelName
;
runnerSettings
.
enableGpuCollector
=
this
.
enableGpuScheduler
;
runnerSettings
.
command
=
this
.
config
.
trialCommand
;
runnerSettings
.
nniManagerVersion
=
this
.
enableVersionCheck
?
await
getVersion
()
:
''
;
runnerSettings
.
logCollection
=
this
.
logCollection
;
runnerSettings
.
platform
=
environmentService
.
getName
;
runnerSettings
.
experimentId
=
this
.
experimentId
;
const
storageService
:
StorageService
=
this
.
getStorageService
(
environmentService
);
const
envDir
=
storageService
.
joinPath
(
"
envs
"
);
const
runnerSettingsConfig
=
storageService
.
joinPath
(
envDir
,
environment
.
id
,
"
settings.json
"
);
await
storageService
.
save
(
JSON
.
stringify
(
runnerSettings
),
runnerSettingsConfig
);
}
private
async
requestEnvironment
(
environmentService
:
EnvironmentService
):
Promise
<
void
>
{
if
(
this
.
stopping
)
{
this
.
log
.
info
(
`Experiment is stopping, stop creating new environment`
);
...
...
@@ -674,6 +689,8 @@ class TrialDispatcher implements TrainingService {
environment
.
command
=
`mkdir -p envs/
${
envId
}
&& cd envs/
${
envId
}
&&
${
environment
.
command
}
`
;
environment
.
useSharedStorage
=
this
.
useSharedStorage
;
// Generate setting.json file per environment to avoid conflict
await
this
.
setEnvironmentSetting
(
environment
);
await
environmentService
.
startEnvironment
(
environment
);
this
.
environments
.
set
(
environment
.
id
,
environment
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment