Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
8314d6ee
Commit
8314d6ee
authored
Sep 07, 2018
by
Deshui Yu
Committed by
fishyds
Sep 07, 2018
Browse files
Merge from dogfood branch to master
parent
98530fd2
Changes
103
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
376 additions
and
249 deletions
+376
-249
install.sh
install.sh
+11
-4
setup.py
setup.py
+96
-0
src/nni_manager/common/manager.ts
src/nni_manager/common/manager.ts
+14
-8
src/nni_manager/common/trainingService.ts
src/nni_manager/common/trainingService.ts
+2
-0
src/nni_manager/common/utils.ts
src/nni_manager/common/utils.ts
+59
-2
src/nni_manager/core/ipcInterface.ts
src/nni_manager/core/ipcInterface.ts
+3
-11
src/nni_manager/core/nniDataStore.ts
src/nni_manager/core/nniDataStore.ts
+3
-0
src/nni_manager/core/nnimanager.ts
src/nni_manager/core/nnimanager.ts
+51
-127
src/nni_manager/core/test/dataStore.test.ts
src/nni_manager/core/test/dataStore.test.ts
+3
-4
src/nni_manager/core/test/dummy_assessor.py
src/nni_manager/core/test/dummy_assessor.py
+1
-3
src/nni_manager/core/test/dummy_tuner.py
src/nni_manager/core/test/dummy_tuner.py
+35
-0
src/nni_manager/core/test/ipcInterface.test.ts
src/nni_manager/core/test/ipcInterface.test.ts
+8
-8
src/nni_manager/core/test/ipcInterfaceTerminate.test.ts
src/nni_manager/core/test/ipcInterfaceTerminate.test.ts
+28
-12
src/nni_manager/core/test/mockedTrainingService.ts
src/nni_manager/core/test/mockedTrainingService.ts
+9
-2
src/nni_manager/core/test/nnimanager.test.ts
src/nni_manager/core/test/nnimanager.test.ts
+9
-8
src/nni_manager/core/test/sqlDatabase.test.ts
src/nni_manager/core/test/sqlDatabase.test.ts
+7
-10
src/nni_manager/rest_server/test/mockedNNIManager.ts
src/nni_manager/rest_server/test/mockedNNIManager.ts
+8
-17
src/nni_manager/rest_server/test/restserver.test.ts
src/nni_manager/rest_server/test/restserver.test.ts
+4
-29
src/nni_manager/training_service/local/localTrainingService.ts
...ni_manager/training_service/local/localTrainingService.ts
+18
-2
src/nni_manager/training_service/remote_machine/metricsCollector.ts
...nager/training_service/remote_machine/metricsCollector.ts
+7
-2
No files found.
install.sh
View file @
8314d6ee
#!/bin/bash
#!/bin/bash
INSTALL_PREFIX
=
${
HOME
}
/.local
mkdir
-p
${
INSTALL_PREFIX
}
wget
-4
-nc
https://nodejs.org/dist/v10.9.0/node-v10.9.0-linux-x64.tar.xz
--header
"Referer: nodejs.org"
wget
-4
-nc
https://nodejs.org/dist/v10.9.0/node-v10.9.0-linux-x64.tar.xz
--header
"Referer: nodejs.org"
tar
-xf
'node-v10.9.0-linux-x64.tar.xz'
tar
-xf
'node-v10.9.0-linux-x64.tar.xz'
sudo
cp
-r
f
node-v10.9.0-linux-x64
/
*
/usr/local
/node
/
cp
-r
T
node-v10.9.0-linux-x64
${
INSTALL_PREFIX
}
/node
rm
-rf
node-v10.9.0-linux-x64
*
rm
-rf
node-v10.9.0-linux-x64
*
wget
-4
-nc
https://github.com/yarnpkg/yarn/releases/download/v1.9.4/yarn-v1.9.4.tar.gz
wget
-4
-nc
https://github.com/yarnpkg/yarn/releases/download/v1.9.4/yarn-v1.9.4.tar.gz
tar
-xf
'yarn-v1.9.4.tar.gz'
tar
-xf
'yarn-v1.9.4.tar.gz'
sudo
cp
-r
f
yarn-v1.9.4
/
*
/usr/local
/yarn
/
cp
-r
T
yarn-v1.9.4
${
INSTALL_PREFIX
}
/yarn
rm
-rf
yarn-v1.9.4
*
rm
-rf
yarn-v1.9.4
*
export
PATH
=
/usr/local/node/bin:/usr/local/yarn/bin:
$PATH
NODE_BIN
=
${
INSTALL_PREFIX
}
/node/bin
YARN_BIN
=
${
INSTALL_PREFIX
}
/yarn/bin
export
PATH
=
${
INSTALL_PREFIX
}
/node/bin:
${
INSTALL_PREFIX
}
/yarn/bin:
$PATH
echo
$PATH
|grep
-q
${
NODE_BIN
}
||
echo
"export PATH=
${
NODE_BIN
}
:
\$
{PATH}"
>>
${
HOME
}
/.bashrc
echo
$PATH
|grep
-q
${
YARN_BIN
}
||
echo
"export PATH=
${
YARN_BIN
}
:
\$
{PATH}"
>>
${
HOME
}
/.bashrc
source
${
HOME
}
/.bashrc
make
make
sudo
make
install
make
install
\ No newline at end of file
setup.py
0 → 100644
View file @
8314d6ee
# Copyright (c) Microsoft Corporation. All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
# associated documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish, distribute,
# sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# ==================================================================================================
import
os
from
setuptools
import
setup
,
find_packages
from
setuptools.command.install
import
install
from
subprocess
import
Popen
def
read
(
fname
):
return
open
(
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
fname
)).
read
()
class
CustomInstallCommand
(
install
):
'''a customized install class in pip module'''
def
makeInstall
(
self
):
'''execute make pip-install command'''
cmds
=
[
'make'
,
'pip-install'
]
process
=
Popen
(
cmds
)
if
process
.
wait
()
!=
0
:
print
(
'Error: Make Install Failed'
)
exit
(
-
1
)
def
writeEnvironmentVariables
(
self
,
variable_name
):
'''write an environment variable into ~/.bashrc'''
paths
=
os
.
getenv
(
"PATH"
).
split
(
':'
)
bin_path
=
os
.
path
.
join
(
os
.
getenv
(
'HOME'
),
'.local/'
+
variable_name
+
'/bin'
)
if
bin_path
not
in
paths
:
bashrc_path
=
os
.
path
.
join
(
os
.
getenv
(
'HOME'
),
'.bashrc'
)
process
=
Popen
(
'echo export PATH='
+
bin_path
+
':\$PATH >> '
+
bashrc_path
,
shell
=
True
)
if
process
.
wait
()
!=
0
:
print
(
'Error: Write Environment Variables Failed'
)
exit
(
-
1
)
def
run
(
self
):
install
.
run
(
self
)
self
.
makeInstall
()
self
.
writeEnvironmentVariables
(
'node'
)
self
.
writeEnvironmentVariables
(
'yarn'
)
setup
(
name
=
'NNI'
,
version
=
'0.0.1'
,
author
=
'Microsoft NNI Team'
,
author_email
=
'nni@microsoft.com'
,
description
=
'Neural Network Intelligence project'
,
long_description
=
read
(
'docs/NNICTLDOC.md'
),
license
=
'MIT'
,
url
=
'https://msrasrg.visualstudio.com/NeuralNetworkIntelligence'
,
packages
=
find_packages
(
'src/sdk/pynni'
,
exclude
=
[
'tests'
])
+
find_packages
(
'tools'
),
package_dir
=
{
'annotation'
:
'tools/annotation'
,
'nni'
:
'src/sdk/pynni/nni'
,
'nnicmd'
:
'tools/nnicmd'
},
python_requires
=
'>=3.5'
,
install_requires
=
[
'astor'
,
'json_tricks'
,
'numpy'
,
'psutil'
,
'pymc3'
,
'pyyaml'
,
'requests'
,
'scipy'
],
dependency_links
=
[
'git+https://github.com/hyperopt/hyperopt.git'
,
],
cmdclass
=
{
'install'
:
CustomInstallCommand
},
entry_points
=
{
'console_scripts'
:
[
'nnictl = nnicmd.nnictl:parse_args'
]
}
)
src/nni_manager/common/manager.ts
View file @
8314d6ee
...
@@ -32,16 +32,22 @@ interface ExperimentParams {
...
@@ -32,16 +32,22 @@ interface ExperimentParams {
maxTrialNum
:
number
;
maxTrialNum
:
number
;
searchSpace
:
string
;
searchSpace
:
string
;
tuner
:
{
tuner
:
{
tunerCommand
:
string
;
className
:
string
;
tunerCwd
:
string
;
builtinTunerName
?:
string
;
tunerCheckpointDirectory
:
string
;
codeDir
?:
string
;
tunerGpuNum
?:
number
;
classArgs
?:
any
;
classFileName
?:
string
;
checkpointDir
:
string
;
gpuNum
?:
number
;
};
};
assessor
?:
{
assessor
?:
{
assessorCommand
:
string
;
className
:
string
;
assessorCwd
:
string
;
builtinAssessorName
?:
string
;
assessorCheckpointDirectory
:
string
;
codeDir
?:
string
;
assessorGpuNum
?:
number
;
classArgs
?:
any
;
classFileName
?:
string
;
checkpointDir
:
string
;
gpuNum
?:
number
;
};
};
clusterMetaData
?:
{
clusterMetaData
?:
{
key
:
string
;
key
:
string
;
...
...
src/nni_manager/common/trainingService.ts
View file @
8314d6ee
...
@@ -105,6 +105,8 @@ abstract class TrainingService {
...
@@ -105,6 +105,8 @@ abstract class TrainingService {
public
abstract
addTrialJobMetricListener
(
listener
:
(
metric
:
TrialJobMetric
)
=>
void
):
void
;
public
abstract
addTrialJobMetricListener
(
listener
:
(
metric
:
TrialJobMetric
)
=>
void
):
void
;
public
abstract
removeTrialJobMetricListener
(
listener
:
(
metric
:
TrialJobMetric
)
=>
void
):
void
;
public
abstract
removeTrialJobMetricListener
(
listener
:
(
metric
:
TrialJobMetric
)
=>
void
):
void
;
public
abstract
submitTrialJob
(
form
:
JobApplicationForm
):
Promise
<
TrialJobDetail
>
;
public
abstract
submitTrialJob
(
form
:
JobApplicationForm
):
Promise
<
TrialJobDetail
>
;
public
abstract
updateTrialJob
(
trialJobId
:
string
,
form
:
JobApplicationForm
):
Promise
<
TrialJobDetail
>
;
public
abstract
get
isMultiPhaseJobSupported
():
boolean
;
public
abstract
cancelTrialJob
(
trialJobId
:
string
):
Promise
<
void
>
;
public
abstract
cancelTrialJob
(
trialJobId
:
string
):
Promise
<
void
>
;
public
abstract
setClusterMetadata
(
key
:
string
,
value
:
string
):
Promise
<
void
>
;
public
abstract
setClusterMetadata
(
key
:
string
,
value
:
string
):
Promise
<
void
>
;
public
abstract
getClusterMetadata
(
key
:
string
):
Promise
<
string
>
;
public
abstract
getClusterMetadata
(
key
:
string
):
Promise
<
string
>
;
...
...
src/nni_manager/common/utils.ts
View file @
8314d6ee
...
@@ -28,7 +28,7 @@ import { Container } from 'typescript-ioc';
...
@@ -28,7 +28,7 @@ import { Container } from 'typescript-ioc';
import
*
as
util
from
'
util
'
;
import
*
as
util
from
'
util
'
;
import
{
Database
,
DataStore
}
from
'
./datastore
'
;
import
{
Database
,
DataStore
}
from
'
./datastore
'
;
import
{
ExperimentStartupInfo
,
setExperimentStartupInfo
,
getExperimentId
}
from
'
./experimentStartupInfo
'
;
import
{
ExperimentStartupInfo
,
getExperimentId
,
setExperimentStartupInfo
}
from
'
./experimentStartupInfo
'
;
import
{
Manager
}
from
'
./manager
'
;
import
{
Manager
}
from
'
./manager
'
;
import
{
TrainingService
}
from
'
./trainingService
'
;
import
{
TrainingService
}
from
'
./trainingService
'
;
...
@@ -127,6 +127,63 @@ function parseArg(names: string[]): string {
...
@@ -127,6 +127,63 @@ function parseArg(names: string[]): string {
return
''
;
return
''
;
}
}
/**
* Generate command line to start advisor process which runs tuner and assessor
* @param tuner : For builtin tuner:
* {
* className: 'EvolutionTuner'
* classArgs: {
* optimize_mode: 'maximize',
* population_size: 3
* }
* }
* customized:
* {
* codeDir: '/tmp/mytuner'
* classFile: 'best_tuner.py'
* className: 'BestTuner'
* classArgs: {
* optimize_mode: 'maximize',
* population_size: 3
* }
* }
*
* @param assessor: similiar as tuner
*
*/
function
getMsgDispatcherCommand
(
tuner
:
any
,
assessor
:
any
):
string
{
let
command
:
string
=
`python3 -m nni --tuner_class_name
${
tuner
.
className
}
`
;
if
(
tuner
.
classArgs
!==
undefined
)
{
command
+=
` --tuner_args
${
JSON
.
stringify
(
JSON
.
stringify
(
tuner
.
classArgs
))}
`
;
}
if
(
tuner
.
codeDir
!==
undefined
&&
tuner
.
codeDir
.
length
>
1
)
{
command
+=
` --tuner_directory
${
tuner
.
codeDir
}
`
;
}
if
(
tuner
.
classFileName
!==
undefined
&&
tuner
.
classFileName
.
length
>
1
)
{
command
+=
` --tuner_class_filename
${
tuner
.
classFileName
}
`
;
}
if
(
assessor
!==
undefined
&&
assessor
.
className
!==
undefined
)
{
command
+=
` --assessor_class_name
${
assessor
.
className
}
`
;
if
(
assessor
.
classArgs
!==
undefined
)
{
command
+=
` --assessor_args
${
JSON
.
stringify
(
JSON
.
stringify
(
assessor
.
classArgs
))}
`
;
}
if
(
assessor
.
codeDir
!==
undefined
&&
assessor
.
codeDir
.
length
>
1
)
{
command
+=
` --assessor_directory
${
assessor
.
codeDir
}
`
;
}
if
(
assessor
.
classFileName
!==
undefined
&&
assessor
.
classFileName
.
length
>
1
)
{
command
+=
` --assessor_class_filename
${
assessor
.
classFileName
}
`
;
}
}
return
command
;
}
/**
/**
* Initialize a pseudo experiment environment for unit test.
* Initialize a pseudo experiment environment for unit test.
* Must be paired with `cleanupUnitTest()`.
* Must be paired with `cleanupUnitTest()`.
...
@@ -161,5 +218,5 @@ function cleanupUnitTest(): void {
...
@@ -161,5 +218,5 @@ function cleanupUnitTest(): void {
Container
.
restore
(
ExperimentStartupInfo
);
Container
.
restore
(
ExperimentStartupInfo
);
}
}
export
{
getLogDir
,
getExperimentRootDir
,
getDefaultDatabaseDir
,
mkDirP
,
delay
,
prepareUnitTest
,
export
{
getMsgDispatcherCommand
,
getLogDir
,
getExperimentRootDir
,
getDefaultDatabaseDir
,
mkDirP
,
delay
,
prepareUnitTest
,
parseArg
,
cleanupUnitTest
,
uniqueString
};
parseArg
,
cleanupUnitTest
,
uniqueString
};
src/nni_manager/core/ipcInterface.ts
View file @
8314d6ee
...
@@ -135,16 +135,8 @@ class IpcInterface {
...
@@ -135,16 +135,8 @@ class IpcInterface {
* Create IPC proxy for tuner process
* Create IPC proxy for tuner process
* @param process_ the tuner process
* @param process_ the tuner process
*/
*/
function
create
Tun
erInterface
(
process
:
ChildProcess
):
IpcInterface
{
function
create
Dispatch
erInterface
(
process
:
ChildProcess
):
IpcInterface
{
return
new
IpcInterface
(
process
,
CommandType
.
TUNER_COMMANDS
);
return
new
IpcInterface
(
process
,
new
Set
([...
CommandType
.
TUNER_COMMANDS
,
...
CommandType
.
ASSESSOR_COMMANDS
])
);
}
}
/**
export
{
IpcInterface
,
createDispatcherInterface
};
* Create IPC proxy for assessor process
* @param process_ the assessor process
*/
function
createAssessorInterface
(
process
:
ChildProcess
):
IpcInterface
{
return
new
IpcInterface
(
process
,
CommandType
.
ASSESSOR_COMMANDS
);
}
export
{
IpcInterface
,
createTunerInterface
,
createAssessorInterface
};
src/nni_manager/core/nniDataStore.ts
View file @
8314d6ee
...
@@ -185,6 +185,9 @@ class NNIDataStore implements DataStore {
...
@@ -185,6 +185,9 @@ class NNIDataStore implements DataStore {
// assume data is stored by time ASC order
// assume data is stored by time ASC order
for
(
const
record
of
trialJobEvents
)
{
for
(
const
record
of
trialJobEvents
)
{
let
jobInfo
:
TrialJobInfo
|
undefined
;
let
jobInfo
:
TrialJobInfo
|
undefined
;
if
(
record
.
trialJobId
===
undefined
||
record
.
trialJobId
.
length
<
1
)
{
continue
;
}
if
(
map
.
has
(
record
.
trialJobId
))
{
if
(
map
.
has
(
record
.
trialJobId
))
{
jobInfo
=
map
.
get
(
record
.
trialJobId
);
jobInfo
=
map
.
get
(
record
.
trialJobId
);
}
else
{
}
else
{
...
...
src/nni_manager/core/nnimanager.ts
View file @
8314d6ee
...
@@ -34,12 +34,12 @@ import {
...
@@ -34,12 +34,12 @@ import {
import
{
import
{
TrainingService
,
TrialJobApplicationForm
,
TrialJobDetail
,
TrialJobMetric
,
TrialJobStatus
TrainingService
,
TrialJobApplicationForm
,
TrialJobDetail
,
TrialJobMetric
,
TrialJobStatus
}
from
'
../common/trainingService
'
;
}
from
'
../common/trainingService
'
;
import
{
delay
,
getLogDir
}
from
'
../common/utils
'
;
import
{
delay
,
getLogDir
,
getMsgDispatcherCommand
}
from
'
../common/utils
'
;
import
{
import
{
ADD_CUSTOMIZED_TRIAL_JOB
,
KILL_TRIAL_JOB
,
NEW_TRIAL_JOB
,
NO_MORE_TRIAL_JOBS
,
REPORT_METRIC_DATA
,
ADD_CUSTOMIZED_TRIAL_JOB
,
KILL_TRIAL_JOB
,
NEW_TRIAL_JOB
,
NO_MORE_TRIAL_JOBS
,
REPORT_METRIC_DATA
,
REQUEST_TRIAL_JOBS
,
TERMINATE
,
TRIAL_END
,
UPDATE_SEARCH_SPACE
REQUEST_TRIAL_JOBS
,
TERMINATE
,
TRIAL_END
,
UPDATE_SEARCH_SPACE
}
from
'
./commands
'
;
}
from
'
./commands
'
;
import
{
create
AssessorInterface
,
createTun
erInterface
,
IpcInterface
}
from
'
./ipcInterface
'
;
import
{
create
Dispatch
erInterface
,
IpcInterface
}
from
'
./ipcInterface
'
;
import
{
TrialJobMaintainerEvent
,
TrialJobs
}
from
'
./trialJobs
'
;
import
{
TrialJobMaintainerEvent
,
TrialJobs
}
from
'
./trialJobs
'
;
/**
/**
...
@@ -47,8 +47,7 @@ import { TrialJobMaintainerEvent, TrialJobs } from './trialJobs';
...
@@ -47,8 +47,7 @@ import { TrialJobMaintainerEvent, TrialJobs } from './trialJobs';
*/
*/
class
NNIManager
implements
Manager
{
class
NNIManager
implements
Manager
{
private
trainingService
:
TrainingService
;
private
trainingService
:
TrainingService
;
private
tuner
:
IpcInterface
|
undefined
;
private
dispatcher
:
IpcInterface
|
undefined
;
private
assessor
:
IpcInterface
|
undefined
;
private
trialJobsMaintainer
:
TrialJobs
|
undefined
;
private
trialJobsMaintainer
:
TrialJobs
|
undefined
;
private
currSubmittedTrialNum
:
number
;
// need to be recovered
private
currSubmittedTrialNum
:
number
;
// need to be recovered
private
trialConcurrencyReduction
:
number
;
private
trialConcurrencyReduction
:
number
;
...
@@ -56,9 +55,7 @@ class NNIManager implements Manager {
...
@@ -56,9 +55,7 @@ class NNIManager implements Manager {
private
log
:
Logger
;
private
log
:
Logger
;
private
dataStore
:
DataStore
;
private
dataStore
:
DataStore
;
private
experimentProfile
:
ExperimentProfile
;
private
experimentProfile
:
ExperimentProfile
;
// TO DO: could use struct here
private
dispatcherPid
:
number
;
private
tunerPid
:
number
;
private
assessorPid
:
number
;
constructor
()
{
constructor
()
{
this
.
currSubmittedTrialNum
=
0
;
this
.
currSubmittedTrialNum
=
0
;
...
@@ -67,8 +64,7 @@ class NNIManager implements Manager {
...
@@ -67,8 +64,7 @@ class NNIManager implements Manager {
const
experimentId
:
string
=
getExperimentId
();
const
experimentId
:
string
=
getExperimentId
();
this
.
trainingService
=
component
.
get
(
TrainingService
);
this
.
trainingService
=
component
.
get
(
TrainingService
);
assert
(
this
.
trainingService
);
assert
(
this
.
trainingService
);
this
.
tunerPid
=
0
;
this
.
dispatcherPid
=
0
;
this
.
assessorPid
=
0
;
this
.
log
=
getLogger
();
this
.
log
=
getLogger
();
this
.
dataStore
=
component
.
get
(
DataStore
);
this
.
dataStore
=
component
.
get
(
DataStore
);
...
@@ -84,9 +80,9 @@ class NNIManager implements Manager {
...
@@ -84,9 +80,9 @@ class NNIManager implements Manager {
maxTrialNum
:
0
,
// maxTrialNum includes all the submitted trial jobs
maxTrialNum
:
0
,
// maxTrialNum includes all the submitted trial jobs
searchSpace
:
''
,
searchSpace
:
''
,
tuner
:
{
tuner
:
{
tunerCommand
:
''
,
className
:
''
,
tunerCwd
:
''
,
classArgs
:
{}
,
tunerC
heckpointDir
ectory
:
''
c
heckpointDir
:
''
}
}
}
}
};
};
...
@@ -134,21 +130,15 @@ class NNIManager implements Manager {
...
@@ -134,21 +130,15 @@ class NNIManager implements Manager {
this
.
experimentProfile
.
params
=
expParams
;
this
.
experimentProfile
.
params
=
expParams
;
await
this
.
storeExperimentProfile
();
await
this
.
storeExperimentProfile
();
this
.
log
.
debug
(
'
Setup tuner...
'
);
this
.
log
.
debug
(
'
Setup tuner...
'
);
const
dispatcherCommand
:
string
=
getMsgDispatcherCommand
(
expParams
.
tuner
,
expParams
.
assessor
);
console
.
log
(
`dispatcher command:
${
dispatcherCommand
}
`
);
this
.
setupTuner
(
this
.
setupTuner
(
expParams
.
tuner
.
tunerCommand
,
//expParams.tuner.tunerCommand,
expParams
.
tuner
.
tunerCwd
,
dispatcherCommand
,
undefined
,
'
start
'
,
'
start
'
,
expParams
.
tuner
.
tunerCheckpointDirectory
);
expParams
.
tuner
.
checkpointDir
);
if
(
expParams
.
assessor
!==
undefined
)
{
this
.
log
.
debug
(
'
Setup assessor...
'
);
this
.
setupAssessor
(
expParams
.
assessor
.
assessorCommand
,
expParams
.
assessor
.
assessorCwd
,
'
start
'
,
expParams
.
assessor
.
assessorCheckpointDirectory
);
}
this
.
experimentProfile
.
startTime
=
new
Date
();
this
.
experimentProfile
.
startTime
=
new
Date
();
await
this
.
storeExperimentProfile
();
await
this
.
storeExperimentProfile
();
...
@@ -164,20 +154,13 @@ class NNIManager implements Manager {
...
@@ -164,20 +154,13 @@ class NNIManager implements Manager {
this
.
experimentProfile
=
await
this
.
dataStore
.
getExperimentProfile
(
experimentId
);
this
.
experimentProfile
=
await
this
.
dataStore
.
getExperimentProfile
(
experimentId
);
const
expParams
:
ExperimentParams
=
this
.
experimentProfile
.
params
;
const
expParams
:
ExperimentParams
=
this
.
experimentProfile
.
params
;
const
dispatcherCommand
:
string
=
getMsgDispatcherCommand
(
expParams
.
tuner
,
expParams
.
assessor
);
console
.
log
(
`dispatcher command:
${
dispatcherCommand
}
`
);
this
.
setupTuner
(
this
.
setupTuner
(
expParams
.
tuner
.
tun
erCommand
,
dispatch
erCommand
,
expParams
.
tuner
.
tunerCw
d
,
undefine
d
,
'
resume
'
,
'
resume
'
,
expParams
.
tuner
.
tunerCheckpointDirectory
);
expParams
.
tuner
.
checkpointDir
);
if
(
expParams
.
assessor
!==
undefined
)
{
this
.
setupAssessor
(
expParams
.
assessor
.
assessorCommand
,
expParams
.
assessor
.
assessorCwd
,
'
resume
'
,
expParams
.
assessor
.
assessorCheckpointDirectory
);
}
const
allTrialJobs
:
TrialJobInfo
[]
=
await
this
.
dataStore
.
listTrialJobs
();
const
allTrialJobs
:
TrialJobInfo
[]
=
await
this
.
dataStore
.
listTrialJobs
();
...
@@ -204,7 +187,7 @@ class NNIManager implements Manager {
...
@@ -204,7 +187,7 @@ class NNIManager implements Manager {
// TO DO: move timeout value to constants file
// TO DO: move timeout value to constants file
const
delay1
:
Promise
<
{}
>
=
new
Promise
((
resolve
:
Function
,
reject
:
Function
):
void
=>
{
const
delay1
:
Promise
<
{}
>
=
new
Promise
((
resolve
:
Function
,
reject
:
Function
):
void
=>
{
timeoutId
=
setTimeout
(
timeoutId
=
setTimeout
(
()
=>
{
reject
(
new
Error
(
'
TrainingService setClusterMetadata timeout.
'
));
},
()
=>
{
reject
(
new
Error
(
'
TrainingService setClusterMetadata timeout.
Please check your config file.
'
));
},
10000
);
10000
);
});
});
await
Promise
.
race
([
delay1
,
this
.
trainingService
.
setClusterMetadata
(
key
,
value
)]).
finally
(()
=>
{
await
Promise
.
race
([
delay1
,
this
.
trainingService
.
setClusterMetadata
(
key
,
value
)]).
finally
(()
=>
{
...
@@ -248,8 +231,8 @@ class NNIManager implements Manager {
...
@@ -248,8 +231,8 @@ class NNIManager implements Manager {
return
this
.
dataStore
.
listTrialJobs
(
status
);
return
this
.
dataStore
.
listTrialJobs
(
status
);
}
}
private
setupTuner
(
command
:
string
,
cwd
:
string
,
mode
:
'
start
'
|
'
resume
'
,
dataDirectory
:
string
):
void
{
private
setupTuner
(
command
:
string
,
cwd
:
string
|
undefined
,
mode
:
'
start
'
|
'
resume
'
,
dataDirectory
:
string
):
void
{
if
(
this
.
tun
er
!==
undefined
)
{
if
(
this
.
dispatch
er
!==
undefined
)
{
return
;
return
;
}
}
const
stdio
:
(
string
|
NodeJS
.
WriteStream
)[]
=
[
'
ignore
'
,
process
.
stdout
,
process
.
stderr
,
'
pipe
'
,
'
pipe
'
];
const
stdio
:
(
string
|
NodeJS
.
WriteStream
)[]
=
[
'
ignore
'
,
process
.
stdout
,
process
.
stderr
,
'
pipe
'
,
'
pipe
'
];
...
@@ -270,36 +253,8 @@ class NNIManager implements Manager {
...
@@ -270,36 +253,8 @@ class NNIManager implements Manager {
},
},
shell
:
true
shell
:
true
});
});
this
.
tunerPid
=
tunerProc
.
pid
;
this
.
dispatcherPid
=
tunerProc
.
pid
;
this
.
tuner
=
createTunerInterface
(
tunerProc
);
this
.
dispatcher
=
createDispatcherInterface
(
tunerProc
);
return
;
}
private
setupAssessor
(
command
:
string
,
cwd
:
string
,
mode
:
'
start
'
|
'
resume
'
,
dataDirectory
:
string
):
void
{
if
(
this
.
assessor
!==
undefined
)
{
return
;
}
const
stdio
:
(
string
|
NodeJS
.
WriteStream
)[]
=
[
'
ignore
'
,
process
.
stdout
,
process
.
stderr
,
'
pipe
'
,
'
pipe
'
];
let
newCwd
:
string
;
if
(
cwd
===
undefined
||
cwd
===
''
)
{
newCwd
=
getLogDir
();
}
else
{
newCwd
=
cwd
;
}
// TO DO: add CUDA_VISIBLE_DEVICES
const
assessorProc
:
ChildProcess
=
spawn
(
command
,
[],
{
stdio
,
cwd
:
newCwd
,
env
:
{
NNI_MODE
:
mode
,
NNI_CHECKPOINT_DIRECTORY
:
dataDirectory
,
NNI_LOG_DIRECTORY
:
getLogDir
()
},
shell
:
true
});
this
.
assessorPid
=
assessorProc
.
pid
;
this
.
assessor
=
createAssessorInterface
(
assessorProc
);
return
;
return
;
}
}
...
@@ -307,10 +262,10 @@ class NNIManager implements Manager {
...
@@ -307,10 +262,10 @@ class NNIManager implements Manager {
private
updateTrialConcurrency
(
trialConcurrency
:
number
):
void
{
private
updateTrialConcurrency
(
trialConcurrency
:
number
):
void
{
// TO DO: this method can only be called after startExperiment/resumeExperiment
// TO DO: this method can only be called after startExperiment/resumeExperiment
if
(
trialConcurrency
>
this
.
experimentProfile
.
params
.
trialConcurrency
)
{
if
(
trialConcurrency
>
this
.
experimentProfile
.
params
.
trialConcurrency
)
{
if
(
this
.
tun
er
===
undefined
)
{
if
(
this
.
dispatch
er
===
undefined
)
{
throw
new
Error
(
'
Error: tuner has to be initialized
'
);
throw
new
Error
(
'
Error: tuner has to be initialized
'
);
}
}
this
.
tun
er
.
sendCommand
(
this
.
dispatch
er
.
sendCommand
(
REQUEST_TRIAL_JOBS
,
REQUEST_TRIAL_JOBS
,
String
(
trialConcurrency
-
this
.
experimentProfile
.
params
.
trialConcurrency
)
String
(
trialConcurrency
-
this
.
experimentProfile
.
params
.
trialConcurrency
)
);
);
...
@@ -333,45 +288,31 @@ class NNIManager implements Manager {
...
@@ -333,45 +288,31 @@ class NNIManager implements Manager {
}
}
private
updateSearchSpace
(
searchSpace
:
string
):
void
{
private
updateSearchSpace
(
searchSpace
:
string
):
void
{
if
(
this
.
tun
er
===
undefined
)
{
if
(
this
.
dispatch
er
===
undefined
)
{
throw
new
Error
(
'
Error: tuner has not been setup
'
);
throw
new
Error
(
'
Error: tuner has not been setup
'
);
}
}
this
.
tun
er
.
sendCommand
(
UPDATE_SEARCH_SPACE
,
searchSpace
);
this
.
dispatch
er
.
sendCommand
(
UPDATE_SEARCH_SPACE
,
searchSpace
);
this
.
experimentProfile
.
params
.
searchSpace
=
searchSpace
;
this
.
experimentProfile
.
params
.
searchSpace
=
searchSpace
;
return
;
return
;
}
}
private
async
experimentDoneCleanUp
():
Promise
<
void
>
{
private
async
experimentDoneCleanUp
():
Promise
<
void
>
{
if
(
this
.
tun
er
===
undefined
)
{
if
(
this
.
dispatch
er
===
undefined
)
{
throw
new
Error
(
'
Error: tuner has not been setup
'
);
throw
new
Error
(
'
Error: tuner has not been setup
'
);
}
}
this
.
tuner
.
sendCommand
(
TERMINATE
);
this
.
dispatcher
.
sendCommand
(
TERMINATE
);
if
(
this
.
assessor
!==
undefined
)
{
this
.
assessor
.
sendCommand
(
TERMINATE
);
}
let
tunerAlive
:
boolean
=
true
;
let
tunerAlive
:
boolean
=
true
;
let
assessorAlive
:
boolean
=
true
;
// gracefully terminate tuner and assessor here, wait at most 30 seconds.
// gracefully terminate tuner and assessor here, wait at most 30 seconds.
for
(
let
i
:
number
=
0
;
i
<
30
;
i
++
)
{
for
(
let
i
:
number
=
0
;
i
<
30
;
i
++
)
{
if
(
!
tunerAlive
&&
!
assessorAlive
)
{
break
;
}
if
(
!
tunerAlive
)
{
break
;
}
try
{
try
{
await
cpp
.
exec
(
`kill -0
${
this
.
tun
erPid
}
`
);
await
cpp
.
exec
(
`kill -0
${
this
.
dispatch
erPid
}
`
);
}
catch
(
error
)
{
tunerAlive
=
false
;
}
}
catch
(
error
)
{
tunerAlive
=
false
;
}
if
(
this
.
assessor
!==
undefined
)
{
try
{
await
cpp
.
exec
(
`kill -0
${
this
.
assessorPid
}
`
);
}
catch
(
error
)
{
assessorAlive
=
false
;
}
}
else
{
assessorAlive
=
false
;
}
await
delay
(
1000
);
await
delay
(
1000
);
}
}
try
{
try
{
await
cpp
.
exec
(
`kill
${
this
.
tunerPid
}
`
);
await
cpp
.
exec
(
`kill
${
this
.
dispatcherPid
}
`
);
if
(
this
.
assessorPid
!==
undefined
)
{
await
cpp
.
exec
(
`kill
${
this
.
assessorPid
}
`
);
}
}
catch
(
error
)
{
}
catch
(
error
)
{
// this.tunerPid does not exist, do nothing here
// this.tunerPid does not exist, do nothing here
}
}
...
@@ -408,25 +349,18 @@ class NNIManager implements Manager {
...
@@ -408,25 +349,18 @@ class NNIManager implements Manager {
return
this
.
dataStore
.
storeExperimentProfile
(
this
.
experimentProfile
);
return
this
.
dataStore
.
storeExperimentProfile
(
this
.
experimentProfile
);
}
}
// tslint:disable-next-line:max-func-body-length
private
runInternal
():
Promise
<
void
>
{
private
runInternal
():
Promise
<
void
>
{
// TO DO: cannot run this method more than once in one NNIManager instance
// TO DO: cannot run this method more than once in one NNIManager instance
if
(
this
.
tun
er
===
undefined
)
{
if
(
this
.
dispatch
er
===
undefined
)
{
throw
new
Error
(
'
Error: tuner has not been setup
'
);
throw
new
Error
(
'
Error: tuner has not been setup
'
);
}
}
this
.
trainingService
.
addTrialJobMetricListener
(
async
(
metric
:
TrialJobMetric
)
=>
{
this
.
trainingService
.
addTrialJobMetricListener
(
async
(
metric
:
TrialJobMetric
)
=>
{
await
this
.
dataStore
.
storeMetricData
(
metric
.
id
,
metric
.
data
);
await
this
.
dataStore
.
storeMetricData
(
metric
.
id
,
metric
.
data
);
if
(
this
.
tun
er
===
undefined
)
{
if
(
this
.
dispatch
er
===
undefined
)
{
throw
new
Error
(
'
Error: tuner has not been setup
'
);
throw
new
Error
(
'
Error: tuner has not been setup
'
);
}
}
this
.
tuner
.
sendCommand
(
REPORT_METRIC_DATA
,
metric
.
data
);
this
.
dispatcher
.
sendCommand
(
REPORT_METRIC_DATA
,
metric
.
data
);
if
(
this
.
assessor
!==
undefined
)
{
try
{
this
.
assessor
.
sendCommand
(
REPORT_METRIC_DATA
,
metric
.
data
);
}
catch
(
error
)
{
this
.
log
.
critical
(
`ASSESSOR ERROR:
${
error
.
message
}
`
);
this
.
log
.
critical
(
`ASSESSOR ERROR:
${
error
.
stack
}
`
);
}
}
});
});
this
.
trialJobsMaintainer
=
new
TrialJobs
(
this
.
trialJobsMaintainer
=
new
TrialJobs
(
...
@@ -439,7 +373,7 @@ class NNIManager implements Manager {
...
@@ -439,7 +373,7 @@ class NNIManager implements Manager {
}
else
{
}
else
{
this
.
log
.
debug
(
`Job event:
${
event
}
`
);
this
.
log
.
debug
(
`Job event:
${
event
}
`
);
}
}
if
(
this
.
tun
er
===
undefined
)
{
if
(
this
.
dispatch
er
===
undefined
)
{
throw
new
Error
(
'
Error: tuner has not been setup
'
);
throw
new
Error
(
'
Error: tuner has not been setup
'
);
}
}
switch
(
event
)
{
switch
(
event
)
{
...
@@ -453,15 +387,13 @@ class NNIManager implements Manager {
...
@@ -453,15 +387,13 @@ class NNIManager implements Manager {
if
(
this
.
currSubmittedTrialNum
<
this
.
experimentProfile
.
params
.
maxTrialNum
)
{
if
(
this
.
currSubmittedTrialNum
<
this
.
experimentProfile
.
params
.
maxTrialNum
)
{
if
(
this
.
customizedTrials
.
length
>
0
)
{
if
(
this
.
customizedTrials
.
length
>
0
)
{
const
hyperParams
:
string
|
undefined
=
this
.
customizedTrials
.
shift
();
const
hyperParams
:
string
|
undefined
=
this
.
customizedTrials
.
shift
();
this
.
tun
er
.
sendCommand
(
ADD_CUSTOMIZED_TRIAL_JOB
,
hyperParams
);
this
.
dispatch
er
.
sendCommand
(
ADD_CUSTOMIZED_TRIAL_JOB
,
hyperParams
);
}
else
{
}
else
{
this
.
tuner
.
sendCommand
(
REQUEST_TRIAL_JOBS
,
'
1
'
);
this
.
dispatcher
.
sendCommand
(
REQUEST_TRIAL_JOBS
,
'
1
'
);
}
}
}
}
}
if
(
this
.
assessor
!==
undefined
)
{
this
.
assessor
.
sendCommand
(
TRIAL_END
,
JSON
.
stringify
({
trial_job_id
:
trialJobDetail
.
id
,
event
:
event
}));
}
}
this
.
dispatcher
.
sendCommand
(
TRIAL_END
,
JSON
.
stringify
({
trial_job_id
:
trialJobDetail
.
id
,
event
:
event
}));
await
this
.
dataStore
.
storeTrialJobEvent
(
event
,
trialJobDetail
.
id
,
undefined
,
trialJobDetail
.
url
);
await
this
.
dataStore
.
storeTrialJobEvent
(
event
,
trialJobDetail
.
id
,
undefined
,
trialJobDetail
.
url
);
break
;
break
;
case
'
RUNNING
'
:
case
'
RUNNING
'
:
...
@@ -478,15 +410,14 @@ class NNIManager implements Manager {
...
@@ -478,15 +410,14 @@ class NNIManager implements Manager {
});
});
// TO DO: we should send INITIALIZE command to tuner if user's tuner needs to run init method in tuner
// TO DO: we should send INITIALIZE command to tuner if user's tuner needs to run init method in tuner
// TO DO: we should send INITIALIZE command to assessor if user's tuner needs to run init method in tuner
this
.
log
.
debug
(
`Send tuner command: update search space:
${
this
.
experimentProfile
.
params
.
searchSpace
}
`
);
this
.
log
.
debug
(
`Send tuner command: update search space:
${
this
.
experimentProfile
.
params
.
searchSpace
}
`
)
this
.
dispatcher
.
sendCommand
(
UPDATE_SEARCH_SPACE
,
this
.
experimentProfile
.
params
.
searchSpace
);
this
.
tuner
.
sendCommand
(
UPDATE_SEARCH_SPACE
,
this
.
experimentProfile
.
params
.
searchSpace
);
if
(
this
.
trialConcurrencyReduction
!==
0
)
{
if
(
this
.
trialConcurrencyReduction
!==
0
)
{
return
Promise
.
reject
(
new
Error
(
'
Error: cannot modify trialConcurrency before startExperiment
'
));
return
Promise
.
reject
(
new
Error
(
'
Error: cannot modify trialConcurrency before startExperiment
'
));
}
}
this
.
log
.
debug
(
`Send tuner command:
${
this
.
experimentProfile
.
params
.
trialConcurrency
}
`
)
this
.
log
.
debug
(
`Send tuner command:
${
this
.
experimentProfile
.
params
.
trialConcurrency
}
`
)
this
.
tun
er
.
sendCommand
(
REQUEST_TRIAL_JOBS
,
String
(
this
.
experimentProfile
.
params
.
trialConcurrency
));
this
.
dispatch
er
.
sendCommand
(
REQUEST_TRIAL_JOBS
,
String
(
this
.
experimentProfile
.
params
.
trialConcurrency
));
this
.
tun
er
.
onCommand
(
async
(
commandType
:
string
,
content
:
string
)
=>
{
this
.
dispatch
er
.
onCommand
(
async
(
commandType
:
string
,
content
:
string
)
=>
{
this
.
log
.
info
(
`Command from tuner:
${
commandType
}
,
${
content
}
`
);
this
.
log
.
info
(
`Command from tuner:
${
commandType
}
,
${
content
}
`
);
if
(
this
.
trialJobsMaintainer
===
undefined
)
{
if
(
this
.
trialJobsMaintainer
===
undefined
)
{
throw
new
Error
(
'
Error: trialJobsMaintainer not initialized
'
);
throw
new
Error
(
'
Error: trialJobsMaintainer not initialized
'
);
...
@@ -501,8 +432,7 @@ class NNIManager implements Manager {
...
@@ -501,8 +432,7 @@ class NNIManager implements Manager {
};
};
const
trialJobDetail
:
TrialJobDetail
=
await
this
.
trainingService
.
submitTrialJob
(
trialJobAppForm
);
const
trialJobDetail
:
TrialJobDetail
=
await
this
.
trainingService
.
submitTrialJob
(
trialJobAppForm
);
this
.
trialJobsMaintainer
.
setTrialJob
(
trialJobDetail
.
id
,
Object
.
assign
({},
trialJobDetail
));
this
.
trialJobsMaintainer
.
setTrialJob
(
trialJobDetail
.
id
,
Object
.
assign
({},
trialJobDetail
));
// TO DO: to uncomment
assert
(
trialJobDetail
.
status
===
'
WAITING
'
);
//assert(trialJobDetail.status === 'WAITING');
await
this
.
dataStore
.
storeTrialJobEvent
(
trialJobDetail
.
status
,
trialJobDetail
.
id
,
content
,
trialJobDetail
.
url
);
await
this
.
dataStore
.
storeTrialJobEvent
(
trialJobDetail
.
status
,
trialJobDetail
.
id
,
content
,
trialJobDetail
.
url
);
if
(
this
.
currSubmittedTrialNum
===
this
.
experimentProfile
.
params
.
maxTrialNum
)
{
if
(
this
.
currSubmittedTrialNum
===
this
.
experimentProfile
.
params
.
maxTrialNum
)
{
this
.
trialJobsMaintainer
.
setNoMoreTrials
();
this
.
trialJobsMaintainer
.
setNoMoreTrials
();
...
@@ -512,19 +442,13 @@ class NNIManager implements Manager {
...
@@ -512,19 +442,13 @@ class NNIManager implements Manager {
case
NO_MORE_TRIAL_JOBS
:
case
NO_MORE_TRIAL_JOBS
:
this
.
trialJobsMaintainer
.
setNoMoreTrials
();
this
.
trialJobsMaintainer
.
setNoMoreTrials
();
break
;
break
;
default
:
case
KILL_TRIAL_JOB
:
throw
new
Error
(
'
Error: unsupported command type from tuner
'
);
}
});
if
(
this
.
assessor
!==
undefined
)
{
this
.
assessor
.
onCommand
(
async
(
commandType
:
string
,
content
:
string
)
=>
{
if
(
commandType
===
KILL_TRIAL_JOB
)
{
await
this
.
trainingService
.
cancelTrialJob
(
JSON
.
parse
(
content
));
await
this
.
trainingService
.
cancelTrialJob
(
JSON
.
parse
(
content
));
}
else
{
break
;
throw
new
Error
(
'
Error: unsupported command type from assessor
'
);
default
:
throw
new
Error
(
`Error: unsupported command type: [
${
commandType
}
]`
);
}
}
});
});
}
return
this
.
trialJobsMaintainer
.
run
();
return
this
.
trialJobsMaintainer
.
run
();
}
}
...
...
src/nni_manager/core/test/dataStore.test.ts
View file @
8314d6ee
...
@@ -69,10 +69,9 @@ describe('Unit test for dataStore', () => {
...
@@ -69,10 +69,9 @@ describe('Unit test for dataStore', () => {
}
}
}`
,
}`
,
tuner
:
{
tuner
:
{
tunerCommand
:
'
python3 tunner.py
'
,
className
:
'
testTuner
'
,
tunerCwd
:
'
/tmp
'
,
checkpointDir
:
'
/tmp/cp
'
,
tunerCheckpointDirectory
:
'
/tmp/cp
'
,
gpuNum
:
0
tunerGpuNum
:
0
}
}
},
},
id
:
'
exp123
'
,
id
:
'
exp123
'
,
...
...
src/nni_manager/core/test/dummy_assessor.py
View file @
8314d6ee
...
@@ -21,5 +21,3 @@ from nni.assessor import Assessor, AssessResult
...
@@ -21,5 +21,3 @@ from nni.assessor import Assessor, AssessResult
class
DummyAssessor
(
Assessor
):
class
DummyAssessor
(
Assessor
):
def
assess_trial
(
self
,
trial_job_id
,
trial_history
):
def
assess_trial
(
self
,
trial_job_id
,
trial_history
):
return
AssessResult
.
Good
return
AssessResult
.
Good
DummyAssessor
().
run
()
src/nni_manager/core/test/dummy_tuner.py
0 → 100644
View file @
8314d6ee
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
from
nni.tuner
import
Tuner
class
DummyTuner
(
Tuner
):
def
generate_parameters
(
self
,
parameter_id
):
return
'unit-test-parm'
def
generate_multiple_parameters
(
self
,
parameter_id_list
):
return
[
'unit-test-param1'
,
'unit-test-param2'
]
def
receive_trial_result
(
self
,
parameter_id
,
parameters
,
reward
):
pass
def
receive_customized_trial_result
(
self
,
parameter_id
,
parameters
,
reward
):
pass
def
update_search_space
(
self
,
search_space
):
pass
src/nni_manager/core/test/ipcInterface.test.ts
View file @
8314d6ee
...
@@ -24,7 +24,7 @@ import { ChildProcess, spawn } from 'child_process';
...
@@ -24,7 +24,7 @@ import { ChildProcess, spawn } from 'child_process';
import
{
Deferred
}
from
'
ts-deferred
'
;
import
{
Deferred
}
from
'
ts-deferred
'
;
import
{
cleanupUnitTest
,
prepareUnitTest
}
from
'
../../common/utils
'
;
import
{
cleanupUnitTest
,
prepareUnitTest
}
from
'
../../common/utils
'
;
import
*
as
CommandType
from
'
../commands
'
;
import
*
as
CommandType
from
'
../commands
'
;
import
{
create
AssessorInterface
,
createTun
erInterface
,
IpcInterface
}
from
'
../ipcInterface
'
;
import
{
create
Dispatch
erInterface
,
IpcInterface
}
from
'
../ipcInterface
'
;
let
sentCommands
:
{[
key
:
string
]:
string
}[]
=
[];
let
sentCommands
:
{[
key
:
string
]:
string
}[]
=
[];
const
receivedCommands
:
{[
key
:
string
]:
string
}[]
=
[];
const
receivedCommands
:
{[
key
:
string
]:
string
}[]
=
[];
...
@@ -52,27 +52,27 @@ function runProcess(): Promise<Error | null> {
...
@@ -52,27 +52,27 @@ function runProcess(): Promise<Error | null> {
});
});
// create IPC interface
// create IPC interface
const
assesso
r
:
IpcInterface
=
create
Assesso
rInterface
(
proc
);
const
dispatche
r
:
IpcInterface
=
create
Dispatche
rInterface
(
proc
);
assesso
r
.
onCommand
((
commandType
:
string
,
content
:
string
):
void
=>
{
dispatche
r
.
onCommand
((
commandType
:
string
,
content
:
string
):
void
=>
{
receivedCommands
.
push
({
commandType
,
content
});
receivedCommands
.
push
({
commandType
,
content
});
});
});
// Command #1: ok
// Command #1: ok
assesso
r
.
sendCommand
(
'
IN
'
);
dispatche
r
.
sendCommand
(
'
IN
'
);
// Command #2: ok
// Command #2: ok
assesso
r
.
sendCommand
(
'
ME
'
,
'
123
'
);
dispatche
r
.
sendCommand
(
'
ME
'
,
'
123
'
);
// Command #3: too long
// Command #3: too long
try
{
try
{
assesso
r
.
sendCommand
(
'
ME
'
,
'
x
'
.
repeat
(
1
_000_000
));
dispatche
r
.
sendCommand
(
'
ME
'
,
'
x
'
.
repeat
(
1
_000_000
));
}
catch
(
error
)
{
}
catch
(
error
)
{
commandTooLong
=
error
;
commandTooLong
=
error
;
}
}
// Command #4:
not assessor command
// Command #4:
FE is not tuner/assessor command, test the exception type of send non-valid command
try
{
try
{
assesso
r
.
sendCommand
(
'
G
E
'
,
'
1
'
);
dispatche
r
.
sendCommand
(
'
F
E
'
,
'
1
'
);
}
catch
(
error
)
{
}
catch
(
error
)
{
rejectCommandType
=
error
;
rejectCommandType
=
error
;
}
}
...
...
src/nni_manager/core/test/ipcInterfaceTerminate.test.ts
View file @
8314d6ee
...
@@ -22,18 +22,34 @@
...
@@ -22,18 +22,34 @@
import
*
as
assert
from
'
assert
'
;
import
*
as
assert
from
'
assert
'
;
import
{
ChildProcess
,
spawn
}
from
'
child_process
'
;
import
{
ChildProcess
,
spawn
}
from
'
child_process
'
;
import
{
Deferred
}
from
'
ts-deferred
'
;
import
{
Deferred
}
from
'
ts-deferred
'
;
import
{
cleanupUnitTest
,
prepareUnitTest
}
from
'
../../common/utils
'
;
import
{
cleanupUnitTest
,
prepareUnitTest
,
getMsgDispatcherCommand
}
from
'
../../common/utils
'
;
import
*
as
CommandType
from
'
../commands
'
;
import
*
as
CommandType
from
'
../commands
'
;
import
{
create
Assesso
rInterface
,
IpcInterface
}
from
'
../ipcInterface
'
;
import
{
create
Dispatche
rInterface
,
IpcInterface
}
from
'
../ipcInterface
'
;
let
assesso
r
:
IpcInterface
|
undefined
;
let
dispatche
r
:
IpcInterface
|
undefined
;
let
procExit
:
boolean
=
false
;
let
procExit
:
boolean
=
false
;
let
procError
:
boolean
=
false
;
let
procError
:
boolean
=
false
;
function
startProcess
():
void
{
function
startProcess
():
void
{
// create fake assessor process
// create fake assessor process
const
stdio
:
{}[]
=
[
'
ignore
'
,
'
pipe
'
,
process
.
stderr
,
'
pipe
'
,
'
pipe
'
];
const
stdio
:
{}[]
=
[
'
ignore
'
,
'
pipe
'
,
process
.
stderr
,
'
pipe
'
,
'
pipe
'
];
const
proc
:
ChildProcess
=
spawn
(
'
python3 dummy_assessor.py
'
,
[],
{
stdio
,
cwd
:
'
core/test
'
,
shell
:
true
});
const
dispatcherCmd
:
string
=
getMsgDispatcherCommand
(
// Mock tuner config
{
className
:
'
DummyTuner
'
,
codeDir
:
'
./
'
,
classFileName
:
'
dummy_tuner.py
'
},
// Mock assessor config
{
className
:
'
DummyAssessor
'
,
codeDir
:
'
./
'
,
classFileName
:
'
dummy_assessor.py
'
}
);
const
proc
:
ChildProcess
=
spawn
(
dispatcherCmd
,
[],
{
stdio
,
cwd
:
'
core/test
'
,
shell
:
true
});
proc
.
on
(
'
error
'
,
(
error
:
Error
):
void
=>
{
proc
.
on
(
'
error
'
,
(
error
:
Error
):
void
=>
{
procExit
=
true
;
procExit
=
true
;
...
@@ -45,8 +61,8 @@ function startProcess(): void {
...
@@ -45,8 +61,8 @@ function startProcess(): void {
});
});
// create IPC interface
// create IPC interface
assesso
r
=
create
Assesso
rInterface
(
proc
);
dispatche
r
=
create
Dispatche
rInterface
(
proc
);
(
<
IpcInterface
>
assesso
r
).
onCommand
((
commandType
:
string
,
content
:
string
):
void
=>
{
(
<
IpcInterface
>
dispatche
r
).
onCommand
((
commandType
:
string
,
content
:
string
):
void
=>
{
console
.
log
(
commandType
,
content
);
// tslint:disable-line:no-console
console
.
log
(
commandType
,
content
);
// tslint:disable-line:no-console
});
});
}
}
...
@@ -62,9 +78,9 @@ describe('core/ipcInterface.terminate', (): void => {
...
@@ -62,9 +78,9 @@ describe('core/ipcInterface.terminate', (): void => {
});
});
it
(
'
normal
'
,
()
=>
{
it
(
'
normal
'
,
()
=>
{
(
<
IpcInterface
>
assesso
r
).
sendCommand
(
(
<
IpcInterface
>
dispatche
r
).
sendCommand
(
CommandType
.
REPORT_METRIC_DATA
,
CommandType
.
REPORT_METRIC_DATA
,
'
{"trial_job_id":"A","type":"
periodical
","value":1}
'
);
'
{"trial_job_id":"A","type":"
PERIODICAL
","value":1
,"sequence":123
}
'
);
const
deferred
:
Deferred
<
void
>
=
new
Deferred
<
void
>
();
const
deferred
:
Deferred
<
void
>
=
new
Deferred
<
void
>
();
setTimeout
(
setTimeout
(
...
@@ -79,7 +95,7 @@ describe('core/ipcInterface.terminate', (): void => {
...
@@ -79,7 +95,7 @@ describe('core/ipcInterface.terminate', (): void => {
});
});
it
(
'
terminate
'
,
()
=>
{
it
(
'
terminate
'
,
()
=>
{
(
<
IpcInterface
>
assesso
r
).
sendCommand
(
CommandType
.
TERMINATE
);
(
<
IpcInterface
>
dispatche
r
).
sendCommand
(
CommandType
.
TERMINATE
);
const
deferred
:
Deferred
<
void
>
=
new
Deferred
<
void
>
();
const
deferred
:
Deferred
<
void
>
=
new
Deferred
<
void
>
();
setTimeout
(
setTimeout
(
...
@@ -88,7 +104,7 @@ describe('core/ipcInterface.terminate', (): void => {
...
@@ -88,7 +104,7 @@ describe('core/ipcInterface.terminate', (): void => {
assert
.
ok
(
!
procError
);
assert
.
ok
(
!
procError
);
deferred
.
resolve
();
deferred
.
resolve
();
},
},
1
000
);
2
000
);
return
deferred
.
promise
;
return
deferred
.
promise
;
});
});
...
...
src/nni_manager/core/test/mockedTrainingService.ts
View file @
8314d6ee
...
@@ -30,7 +30,6 @@ const testTrainingServiceProvider: Provider = {
...
@@ -30,7 +30,6 @@ const testTrainingServiceProvider: Provider = {
};
};
class
MockedTrainingService
extends
TrainingService
{
class
MockedTrainingService
extends
TrainingService
{
public
mockedMetaDataValue
:
string
=
"
default
"
;
public
mockedMetaDataValue
:
string
=
"
default
"
;
public
jobDetail1
:
TrialJobDetail
=
{
public
jobDetail1
:
TrialJobDetail
=
{
id
:
'
1234
'
,
id
:
'
1234
'
,
...
@@ -93,6 +92,14 @@ class MockedTrainingService extends TrainingService {
...
@@ -93,6 +92,14 @@ class MockedTrainingService extends TrainingService {
return
deferred
.
promise
;
return
deferred
.
promise
;
}
}
public
updateTrialJob
(
trialJobId
:
string
,
form
:
TrialJobApplicationForm
):
Promise
<
TrialJobDetail
>
{
throw
new
MethodNotImplementedError
();
}
public
get
isMultiPhaseJobSupported
():
boolean
{
return
false
;
}
public
cancelTrialJob
(
trialJobId
:
string
):
Promise
<
void
>
{
public
cancelTrialJob
(
trialJobId
:
string
):
Promise
<
void
>
{
const
deferred
=
new
Deferred
<
void
>
();
const
deferred
=
new
Deferred
<
void
>
();
if
(
trialJobId
===
'
1234
'
||
trialJobId
===
'
3456
'
){
if
(
trialJobId
===
'
1234
'
||
trialJobId
===
'
3456
'
){
...
@@ -125,7 +132,7 @@ class MockedTrainingService extends TrainingService {
...
@@ -125,7 +132,7 @@ class MockedTrainingService extends TrainingService {
}
}
public
cleanUp
():
Promise
<
void
>
{
public
cleanUp
():
Promise
<
void
>
{
throw
new
MethodNotImplementedError
();
return
Promise
.
resolve
();
}
}
}
}
...
...
src/nni_manager/core/test/nnimanager.test.ts
View file @
8314d6ee
...
@@ -56,16 +56,17 @@ describe('Unit test for nnimanager', function () {
...
@@ -56,16 +56,17 @@ describe('Unit test for nnimanager', function () {
maxTrialNum
:
2
,
maxTrialNum
:
2
,
searchSpace
:
'
{"x":1}
'
,
searchSpace
:
'
{"x":1}
'
,
tuner
:
{
tuner
:
{
tunerCommand
:
'
python3 hyperopt.py
'
,
className
:
'
EvolutionTuner
'
,
tunerCwd
:
'
core/test
'
,
classArgs
:
{
tunerCheckpointDirectory
:
''
,
optimize_mode
:
'
maximize
'
tunerGpuNum
:
1
},
checkpointDir
:
''
,
gpuNum
:
1
},
},
assessor
:
{
assessor
:
{
assessorCommand
:
'
python3 dummy_assessor.py
'
,
className
:
'
MedianstopAssessor
'
,
assessorCwd
:
'
core/test
'
,
checkpointDir
:
''
,
assessorCheckpointDirectory
:
''
,
gpuNum
:
1
assessorGpuNum
:
1
}
}
}
}
...
...
src/nni_manager/core/test/sqlDatabase.test.ts
View file @
8314d6ee
...
@@ -38,10 +38,9 @@ const expParams1: ExperimentParams = {
...
@@ -38,10 +38,9 @@ const expParams1: ExperimentParams = {
maxTrialNum
:
5
,
maxTrialNum
:
5
,
searchSpace
:
'
SS
'
,
searchSpace
:
'
SS
'
,
tuner
:
{
tuner
:
{
tunerCommand
:
'
./tuner.sh
'
,
className
:
'
testTuner
'
,
tunerCwd
:
'
.
'
,
checkpointDir
:
'
/tmp
'
,
tunerCheckpointDirectory
:
'
/tmp
'
,
gpuNum
:
0
tunerGpuNum
:
0
}
}
};
};
...
@@ -53,14 +52,12 @@ const expParams2: ExperimentParams = {
...
@@ -53,14 +52,12 @@ const expParams2: ExperimentParams = {
maxTrialNum
:
5
,
maxTrialNum
:
5
,
searchSpace
:
''
,
searchSpace
:
''
,
tuner
:
{
tuner
:
{
tunerCommand
:
'
python tuner.py
'
,
className
:
'
testTuner
'
,
tunerCwd
:
'
/tmp
'
,
checkpointDir
:
'
/tmp
'
tunerCheckpointDirectory
:
'
/tmp
'
},
},
assessor
:
{
assessor
:
{
assessorCommand
:
'
python assessor.py
'
,
className
:
'
testAssessor
'
,
assessorCwd
:
'
/tmp
'
,
checkpointDir
:
'
/tmp
'
assessorCheckpointDirectory
:
'
/tmp
'
}
}
};
};
...
...
src/nni_manager/rest_server/test/mockedNNIManager.ts
View file @
8314d6ee
...
@@ -37,7 +37,7 @@ export const testManagerProvider: Provider = {
...
@@ -37,7 +37,7 @@ export const testManagerProvider: Provider = {
};
};
export
class
MockedNNIManager
extends
Manager
{
export
class
MockedNNIManager
extends
Manager
{
public
updateExperimentProfile
(
experimentProfile
:
ExperimentProfile
,
updateType
:
ProfileUpdateType
):
Promise
<
void
>
{
public
updateExperimentProfile
(
experimentProfile
:
ExperimentProfile
,
updateType
:
ProfileUpdateType
):
Promise
<
void
>
{
return
Promise
.
resolve
();
return
Promise
.
resolve
();
}
}
public
getTrialJobStatistics
():
Promise
<
TrialJobStatistics
[]
>
{
public
getTrialJobStatistics
():
Promise
<
TrialJobStatistics
[]
>
{
...
@@ -103,23 +103,15 @@ export class MockedNNIManager extends Manager {
...
@@ -103,23 +103,15 @@ export class MockedNNIManager extends Manager {
return
deferred
.
promise
;
return
deferred
.
promise
;
}
}
public
getTrialJob
(
trialJobId
:
string
):
Promise
<
TrialJob
Detail
>
{
public
getTrialJob
(
trialJobId
:
string
):
Promise
<
TrialJob
Info
>
{
const
deferred
:
Deferred
<
TrialJob
Detail
>
=
new
Deferred
<
TrialJob
Detail
>
();
const
deferred
:
Deferred
<
TrialJob
Info
>
=
new
Deferred
<
TrialJob
Info
>
();
const
job
Detail
:
TrialJob
Detail
=
{
const
job
Info
:
TrialJob
Info
=
{
id
:
'
1234
'
,
id
:
'
1234
'
,
status
:
'
SUCCEEDED
'
,
status
:
'
SUCCEEDED
'
,
submitTime
:
new
Date
(),
startTime
:
new
Date
(),
startTime
:
new
Date
(),
endTime
:
new
Date
(),
endTime
:
new
Date
()
tags
:
[
'
test
'
],
// tslint:disable-next-line:no-http-string
url
:
'
http://test
'
,
workingDirectory
:
'
/tmp/mocked
'
,
form
:
{
jobType
:
'
TRIAL
'
}
};
};
deferred
.
resolve
(
job
Detail
);
deferred
.
resolve
(
job
Info
);
return
deferred
.
promise
;
return
deferred
.
promise
;
}
}
...
@@ -139,9 +131,8 @@ export class MockedNNIManager extends Manager {
...
@@ -139,9 +131,8 @@ export class MockedNNIManager extends Manager {
maxTrialNum
:
3
,
maxTrialNum
:
3
,
searchSpace
:
'
{lr: 0.01}
'
,
searchSpace
:
'
{lr: 0.01}
'
,
tuner
:
{
tuner
:
{
tunerCommand
:
'
python3 tuner.py
'
,
className
:
'
testTuner
'
,
tunerCwd
:
'
/tmp/tunner
'
,
checkpointDir
:
''
tunerCheckpointDirectory
:
''
}
}
},
},
id
:
'
2345
'
,
id
:
'
2345
'
,
...
...
src/nni_manager/rest_server/test/restserver.test.ts
View file @
8314d6ee
...
@@ -116,7 +116,7 @@ describe('Unit test for rest server', () => {
...
@@ -116,7 +116,7 @@ describe('Unit test for rest server', () => {
}
}
const
req
:
request
.
Options
=
{
const
req
:
request
.
Options
=
{
uri
:
`
${
ROOT_URL
}
/experiment`
,
uri
:
`
${
ROOT_URL
}
/experiment
?update_type=TRIAL_CONCURRENCY
`
,
method
:
'
PUT
'
,
method
:
'
PUT
'
,
json
:
true
,
json
:
true
,
body
:
profile
body
:
profile
...
@@ -141,7 +141,7 @@ describe('Unit test for rest server', () => {
...
@@ -141,7 +141,7 @@ describe('Unit test for rest server', () => {
body
:
{
body
:
{
exception_test_key
:
'
test
'
exception_test_key
:
'
test
'
}
}
}
}
;
request
(
req
,
(
err
:
Error
,
res
:
request
.
Response
)
=>
{
request
(
req
,
(
err
:
Error
,
res
:
request
.
Response
)
=>
{
if
(
err
)
{
if
(
err
)
{
assert
.
fail
(
err
.
message
);
assert
.
fail
(
err
.
message
);
...
@@ -158,7 +158,7 @@ describe('Unit test for rest server', () => {
...
@@ -158,7 +158,7 @@ describe('Unit test for rest server', () => {
method
:
'
PUT
'
,
method
:
'
PUT
'
,
json
:
true
,
json
:
true
,
body
:
{
body
:
{
MACHINE_LIST
:
[{
machine_list
:
[{
ip
:
'
10.10.10.101
'
,
ip
:
'
10.10.10.101
'
,
port
:
22
,
port
:
22
,
username
:
'
test
'
,
username
:
'
test
'
,
...
@@ -170,37 +170,12 @@ describe('Unit test for rest server', () => {
...
@@ -170,37 +170,12 @@ describe('Unit test for rest server', () => {
passwd
:
'
1234
'
passwd
:
'
1234
'
}]
}]
}
}
}
request
(
req
,
(
err
:
Error
,
res
:
request
.
Response
)
=>
{
if
(
err
)
{
assert
.
fail
(
err
.
message
);
}
else
{
expect
(
res
.
statusCode
).
to
.
equal
(
200
);
}
done
();
});
});
it
(
'
Test POST experiment
'
,
(
done
:
Mocha
.
Done
)
=>
{
const
req
:
request
.
Options
=
{
uri
:
`
${
ROOT_URL
}
/experiment`
,
method
:
'
POST
'
,
json
:
true
,
body
:
{
author
:
'
test
'
,
trial
:
{
entrypoint
:
'
python
'
,
args
:
'
mnist.py
'
}
}
};
};
// tslint:disable-next-line:no-any
request
(
req
,
(
err
:
Error
,
res
:
request
.
Response
)
=>
{
request
(
req
,
(
err
:
Error
,
res
:
request
.
Response
,
body
:
any
)
=>
{
if
(
err
)
{
if
(
err
)
{
assert
.
fail
(
err
.
message
);
assert
.
fail
(
err
.
message
);
}
else
{
}
else
{
expect
(
res
.
statusCode
).
to
.
equal
(
200
);
expect
(
res
.
statusCode
).
to
.
equal
(
200
);
expect
(
body
.
experiment_id
).
to
.
equal
(
'
id-1234
'
);
}
}
done
();
done
();
});
});
...
...
src/nni_manager/training_service/local/localTrainingService.ts
View file @
8314d6ee
...
@@ -25,7 +25,7 @@ import { EventEmitter } from 'events';
...
@@ -25,7 +25,7 @@ import { EventEmitter } from 'events';
import
*
as
fs
from
'
fs
'
;
import
*
as
fs
from
'
fs
'
;
import
*
as
path
from
'
path
'
;
import
*
as
path
from
'
path
'
;
import
*
as
ts
from
'
tail-stream
'
;
import
*
as
ts
from
'
tail-stream
'
;
import
{
NNIError
,
NNIErrorNames
}
from
'
../../common/errors
'
;
import
{
MethodNotImplementedError
,
NNIError
,
NNIErrorNames
}
from
'
../../common/errors
'
;
import
{
getLogger
,
Logger
}
from
'
../../common/log
'
;
import
{
getLogger
,
Logger
}
from
'
../../common/log
'
;
import
{
TrialConfig
}
from
'
../common/trialConfig
'
;
import
{
TrialConfig
}
from
'
../common/trialConfig
'
;
import
{
TrialConfigMetadataKey
}
from
'
../common/trialConfigMetadataKey
'
;
import
{
TrialConfigMetadataKey
}
from
'
../common/trialConfigMetadataKey
'
;
...
@@ -205,6 +205,22 @@ class LocalTrainingService implements TrainingService {
...
@@ -205,6 +205,22 @@ class LocalTrainingService implements TrainingService {
}
}
}
}
/**
* Update trial job for multi-phase
* @param trialJobId trial job id
* @param form job application form
*/
public
updateTrialJob
(
trialJobId
:
string
,
form
:
JobApplicationForm
):
Promise
<
TrialJobDetail
>
{
throw
new
MethodNotImplementedError
();
}
/**
* Is multiphase job supported in current training service
*/
public
get
isMultiPhaseJobSupported
():
boolean
{
return
false
;
}
public
async
cancelTrialJob
(
trialJobId
:
string
):
Promise
<
void
>
{
public
async
cancelTrialJob
(
trialJobId
:
string
):
Promise
<
void
>
{
this
.
log
.
info
(
`cancelTrialJob:
${
trialJobId
}
`
);
this
.
log
.
info
(
`cancelTrialJob:
${
trialJobId
}
`
);
const
trialJob
:
LocalTrialJobDetail
|
undefined
=
this
.
jobMap
.
get
(
trialJobId
);
const
trialJob
:
LocalTrialJobDetail
|
undefined
=
this
.
jobMap
.
get
(
trialJobId
);
...
@@ -309,7 +325,7 @@ class LocalTrainingService implements TrainingService {
...
@@ -309,7 +325,7 @@ class LocalTrainingService implements TrainingService {
runScriptLines
.
push
(
`export
${
variable
.
key
}
=
${
variable
.
value
}
`
);
runScriptLines
.
push
(
`export
${
variable
.
key
}
=
${
variable
.
value
}
`
);
}
}
runScriptLines
.
push
(
runScriptLines
.
push
(
`eval
${
this
.
localTrailConfig
.
command
}
2>
${
path
.
join
(
trialJobDetail
.
workingDirectory
,
'
.nni
'
,
'
stderr
'
)}
`
,
`eval
${
this
.
localTrailConfig
.
command
}
2>
${
path
.
join
(
trialJobDetail
.
workingDirectory
,
'
stderr
'
)}
`
,
`echo $?
\`
date +%s%3N
\`
>
${
path
.
join
(
trialJobDetail
.
workingDirectory
,
'
.nni
'
,
'
state
'
)}
`
);
`echo $?
\`
date +%s%3N
\`
>
${
path
.
join
(
trialJobDetail
.
workingDirectory
,
'
.nni
'
,
'
state
'
)}
`
);
await
cpp
.
exec
(
`mkdir -p
${
trialJobDetail
.
workingDirectory
}
`
);
await
cpp
.
exec
(
`mkdir -p
${
trialJobDetail
.
workingDirectory
}
`
);
...
...
src/nni_manager/training_service/remote_machine/metricsCollector.ts
View file @
8314d6ee
...
@@ -82,7 +82,12 @@ export class MetricsCollector {
...
@@ -82,7 +82,12 @@ export class MetricsCollector {
private
getTrialJobIdsGroupByRmMeta
(
status
:
TrialJobStatus
[]):
Map
<
RemoteMachineMeta
,
string
[]
>
{
private
getTrialJobIdsGroupByRmMeta
(
status
:
TrialJobStatus
[]):
Map
<
RemoteMachineMeta
,
string
[]
>
{
const
map
:
Map
<
RemoteMachineMeta
,
string
[]
>
=
new
Map
<
RemoteMachineMeta
,
string
[]
>
();
const
map
:
Map
<
RemoteMachineMeta
,
string
[]
>
=
new
Map
<
RemoteMachineMeta
,
string
[]
>
();
this
.
trialJobsMap
.
forEach
((
trialJob
,
id
)
=>
{
this
.
trialJobsMap
.
forEach
((
trialJob
,
id
)
=>
{
if
(
status
.
includes
(
trialJob
.
status
))
{
let
reservedTrialJobIds
:
string
[]
=
[];
if
(
trialJob
.
rmMeta
!==
undefined
&&
trialJob
.
rmMeta
.
gpuReservation
!==
undefined
)
{
reservedTrialJobIds
=
Array
.
from
(
trialJob
.
rmMeta
.
gpuReservation
.
values
());
}
if
(
reservedTrialJobIds
.
includes
(
id
)
||
status
.
includes
(
trialJob
.
status
))
{
if
(
map
.
has
(
trialJob
.
rmMeta
))
{
if
(
map
.
has
(
trialJob
.
rmMeta
))
{
const
ids
=
map
.
get
(
trialJob
.
rmMeta
);
const
ids
=
map
.
get
(
trialJob
.
rmMeta
);
if
(
ids
!==
undefined
&&
!
ids
.
includes
(
id
))
{
if
(
ids
!==
undefined
&&
!
ids
.
includes
(
id
))
{
...
@@ -93,7 +98,7 @@ export class MetricsCollector {
...
@@ -93,7 +98,7 @@ export class MetricsCollector {
// If the remote machine has jobs reserve GPU, also put that jobs into list to get metrics data
// If the remote machine has jobs reserve GPU, also put that jobs into list to get metrics data
if
(
trialJob
.
rmMeta
.
gpuReservation
!==
undefined
)
{
if
(
trialJob
.
rmMeta
.
gpuReservation
!==
undefined
)
{
const
concatJobIds
:
string
[]
=
initJobIds
.
concat
(
Array
.
from
(
trialJob
.
rmMeta
.
gpuReservation
.
values
())
);
const
concatJobIds
:
string
[]
=
initJobIds
.
concat
(
reservedTrialJobIds
);
initJobIds
=
concatJobIds
.
filter
((
item
,
pos
)
=>
concatJobIds
.
indexOf
(
item
)
===
pos
);
initJobIds
=
concatJobIds
.
filter
((
item
,
pos
)
=>
concatJobIds
.
indexOf
(
item
)
===
pos
);
}
}
...
...
Prev
1
2
3
4
5
6
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment