Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
99f7d79c
Unverified
Commit
99f7d79c
authored
Sep 26, 2019
by
SparkSnail
Committed by
GitHub
Sep 26, 2019
Browse files
Support experiment view (#1524)
parent
0b7d6260
Changes
9
Show whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
208 additions
and
145 deletions
+208
-145
docs/en_US/Tutorial/Nnictl.md
docs/en_US/Tutorial/Nnictl.md
+30
-0
src/nni_manager/common/experimentStartupInfo.ts
src/nni_manager/common/experimentStartupInfo.ts
+19
-5
src/nni_manager/common/log.ts
src/nni_manager/common/log.ts
+18
-8
src/nni_manager/common/manager.ts
src/nni_manager/common/manager.ts
+6
-2
src/nni_manager/core/nnimanager.ts
src/nni_manager/core/nnimanager.ts
+23
-3
src/nni_manager/main.ts
src/nni_manager/main.ts
+16
-8
src/nni_manager/rest_server/restHandler.ts
src/nni_manager/rest_server/restHandler.ts
+17
-17
tools/nni_cmd/launcher.py
tools/nni_cmd/launcher.py
+72
-101
tools/nni_cmd/nnictl.py
tools/nni_cmd/nnictl.py
+7
-1
No files found.
docs/en_US/Tutorial/Nnictl.md
View file @
99f7d79c
...
@@ -10,6 +10,7 @@ nnictl support commands:
...
@@ -10,6 +10,7 @@ nnictl support commands:
*
[
nnictl create
](
#create
)
*
[
nnictl create
](
#create
)
*
[
nnictl resume
](
#resume
)
*
[
nnictl resume
](
#resume
)
*
[
nnictl view
](
#view
)
*
[
nnictl stop
](
#stop
)
*
[
nnictl stop
](
#stop
)
*
[
nnictl update
](
#update
)
*
[
nnictl update
](
#update
)
*
[
nnictl trial
](
#trial
)
*
[
nnictl trial
](
#trial
)
...
@@ -104,6 +105,35 @@ Debug mode will disable version check function in Trialkeeper.
...
@@ -104,6 +105,35 @@ Debug mode will disable version check function in Trialkeeper.
nnictl resume
[
experiment_id]
--port
8088
nnictl resume
[
experiment_id]
--port
8088
```
```
<a
name=
"view"
></a>

`nnictl view`
*
Description
You can use this command to view a stopped experiment.
*
Usage
```
bash
nnictl view
[
OPTIONS]
```
*
Options
|Name, shorthand|Required|Default|Description|
|------|------|------ |------|
|id| True| |The id of the experiment you want to view|
|--port, -p| False| |Rest port of the experiment you want to view|
*
Example
> view an experiment with specified port 8088
```
bash
nnictl view
[
experiment_id]
--port
8088
```
<a
name=
"stop"
></a>
<a
name=
"stop"
></a>

`nnictl stop`

`nnictl stop`
...
...
src/nni_manager/common/experimentStartupInfo.ts
View file @
99f7d79c
...
@@ -33,11 +33,11 @@ class ExperimentStartupInfo {
...
@@ -33,11 +33,11 @@ class ExperimentStartupInfo {
private
initTrialSequenceID
:
number
=
0
;
private
initTrialSequenceID
:
number
=
0
;
private
logDir
:
string
=
''
;
private
logDir
:
string
=
''
;
private
logLevel
:
string
=
''
;
private
logLevel
:
string
=
''
;
private
readonly
:
boolean
=
false
;
public
setStartupInfo
(
newExperiment
:
boolean
,
experimentId
:
string
,
basePort
:
number
,
logDir
?:
string
,
logLevel
?:
string
):
void
{
public
setStartupInfo
(
newExperiment
:
boolean
,
experimentId
:
string
,
basePort
:
number
,
logDir
?:
string
,
logLevel
?:
string
,
readonly
?:
boolean
):
void
{
assert
(
!
this
.
initialized
);
assert
(
!
this
.
initialized
);
assert
(
experimentId
.
trim
().
length
>
0
);
assert
(
experimentId
.
trim
().
length
>
0
);
this
.
newExperiment
=
newExperiment
;
this
.
newExperiment
=
newExperiment
;
this
.
experimentId
=
experimentId
;
this
.
experimentId
=
experimentId
;
this
.
basePort
=
basePort
;
this
.
basePort
=
basePort
;
...
@@ -52,6 +52,10 @@ class ExperimentStartupInfo {
...
@@ -52,6 +52,10 @@ class ExperimentStartupInfo {
if
(
logLevel
!==
undefined
&&
logLevel
.
length
>
1
)
{
if
(
logLevel
!==
undefined
&&
logLevel
.
length
>
1
)
{
this
.
logLevel
=
logLevel
;
this
.
logLevel
=
logLevel
;
}
}
if
(
readonly
!==
undefined
)
{
this
.
readonly
=
readonly
;
}
}
}
public
getExperimentId
():
string
{
public
getExperimentId
():
string
{
...
@@ -84,6 +88,12 @@ class ExperimentStartupInfo {
...
@@ -84,6 +88,12 @@ class ExperimentStartupInfo {
return
this
.
logLevel
;
return
this
.
logLevel
;
}
}
public
isReadonly
():
boolean
{
assert
(
this
.
initialized
);
return
this
.
readonly
;
}
public
setInitTrialSequenceId
(
initSequenceId
:
number
):
void
{
public
setInitTrialSequenceId
(
initSequenceId
:
number
):
void
{
assert
(
this
.
initialized
);
assert
(
this
.
initialized
);
this
.
initTrialSequenceID
=
initSequenceId
;
this
.
initTrialSequenceID
=
initSequenceId
;
...
@@ -121,10 +131,14 @@ function getExperimentStartupInfo(): ExperimentStartupInfo {
...
@@ -121,10 +131,14 @@ function getExperimentStartupInfo(): ExperimentStartupInfo {
}
}
function
setExperimentStartupInfo
(
function
setExperimentStartupInfo
(
newExperiment
:
boolean
,
experimentId
:
string
,
basePort
:
number
,
logDir
?:
string
,
logLevel
?:
string
):
void
{
newExperiment
:
boolean
,
experimentId
:
string
,
basePort
:
number
,
logDir
?:
string
,
logLevel
?:
string
,
readonly
?:
boolean
):
void
{
component
.
get
<
ExperimentStartupInfo
>
(
ExperimentStartupInfo
)
component
.
get
<
ExperimentStartupInfo
>
(
ExperimentStartupInfo
)
.
setStartupInfo
(
newExperiment
,
experimentId
,
basePort
,
logDir
,
logLevel
);
.
setStartupInfo
(
newExperiment
,
experimentId
,
basePort
,
logDir
,
logLevel
,
readonly
);
}
function
isReadonly
():
boolean
{
return
component
.
get
<
ExperimentStartupInfo
>
(
ExperimentStartupInfo
).
isReadonly
();
}
}
export
{
ExperimentStartupInfo
,
getBasePort
,
getExperimentId
,
isNewExperiment
,
getExperimentStartupInfo
,
export
{
ExperimentStartupInfo
,
getBasePort
,
getExperimentId
,
isNewExperiment
,
getExperimentStartupInfo
,
setExperimentStartupInfo
,
setInitTrialSequenceId
,
getInitTrialSequenceId
};
setExperimentStartupInfo
,
setInitTrialSequenceId
,
getInitTrialSequenceId
,
isReadonly
};
src/nni_manager/common/log.ts
View file @
99f7d79c
...
@@ -26,7 +26,7 @@ import { Writable } from 'stream';
...
@@ -26,7 +26,7 @@ import { Writable } from 'stream';
import
{
WritableStreamBuffer
}
from
'
stream-buffers
'
;
import
{
WritableStreamBuffer
}
from
'
stream-buffers
'
;
import
{
format
}
from
'
util
'
;
import
{
format
}
from
'
util
'
;
import
*
as
component
from
'
../common/component
'
;
import
*
as
component
from
'
../common/component
'
;
import
{
getExperimentStartupInfo
}
from
'
./experimentStartupInfo
'
;
import
{
getExperimentStartupInfo
,
isReadonly
}
from
'
./experimentStartupInfo
'
;
import
{
getLogDir
}
from
'
./utils
'
;
import
{
getLogDir
}
from
'
./utils
'
;
const
FATAL
:
number
=
1
;
const
FATAL
:
number
=
1
;
...
@@ -76,6 +76,7 @@ class Logger {
...
@@ -76,6 +76,7 @@ class Logger {
private
level
:
number
=
INFO
;
private
level
:
number
=
INFO
;
private
bufferSerialEmitter
:
BufferSerialEmitter
;
private
bufferSerialEmitter
:
BufferSerialEmitter
;
private
writable
:
Writable
;
private
writable
:
Writable
;
private
readonly
:
boolean
=
false
;
constructor
(
fileName
?:
string
)
{
constructor
(
fileName
?:
string
)
{
let
logFile
:
string
|
undefined
=
fileName
;
let
logFile
:
string
|
undefined
=
fileName
;
...
@@ -95,6 +96,8 @@ class Logger {
...
@@ -95,6 +96,8 @@ class Logger {
if
(
logLevel
!==
undefined
)
{
if
(
logLevel
!==
undefined
)
{
this
.
level
=
logLevel
;
this
.
level
=
logLevel
;
}
}
this
.
readonly
=
isReadonly
();
}
}
public
close
()
{
public
close
()
{
...
@@ -135,7 +138,13 @@ class Logger {
...
@@ -135,7 +138,13 @@ class Logger {
this
.
log
(
'
FATAL
'
,
param
);
this
.
log
(
'
FATAL
'
,
param
);
}
}
/**
* if the experiment is not in readonly mode, write log content to stream
* @param level log level
* @param param the params to be written
*/
private
log
(
level
:
string
,
param
:
any
[]):
void
{
private
log
(
level
:
string
,
param
:
any
[]):
void
{
if
(
!
this
.
readonly
)
{
const
buffer
:
WritableStreamBuffer
=
new
WritableStreamBuffer
();
const
buffer
:
WritableStreamBuffer
=
new
WritableStreamBuffer
();
buffer
.
write
(
`[
${(
new
Date
()).
toLocaleString
()}
]
${
level
}
`
);
buffer
.
write
(
`[
${(
new
Date
()).
toLocaleString
()}
]
${
level
}
`
);
buffer
.
write
(
format
(
param
));
buffer
.
write
(
format
(
param
));
...
@@ -143,6 +152,7 @@ class Logger {
...
@@ -143,6 +152,7 @@ class Logger {
buffer
.
end
();
buffer
.
end
();
this
.
bufferSerialEmitter
.
feed
(
buffer
.
getContents
());
this
.
bufferSerialEmitter
.
feed
(
buffer
.
getContents
());
}
}
}
}
}
function
getLogger
(
fileName
?:
string
):
Logger
{
function
getLogger
(
fileName
?:
string
):
Logger
{
...
...
src/nni_manager/common/manager.ts
View file @
99f7d79c
...
@@ -24,6 +24,10 @@ import { TrialJobStatus } from './trainingService';
...
@@ -24,6 +24,10 @@ import { TrialJobStatus } from './trainingService';
type
ProfileUpdateType
=
'
TRIAL_CONCURRENCY
'
|
'
MAX_EXEC_DURATION
'
|
'
SEARCH_SPACE
'
|
'
MAX_TRIAL_NUM
'
;
type
ProfileUpdateType
=
'
TRIAL_CONCURRENCY
'
|
'
MAX_EXEC_DURATION
'
|
'
SEARCH_SPACE
'
|
'
MAX_TRIAL_NUM
'
;
type
ExperimentStatus
=
'
INITIALIZED
'
|
'
RUNNING
'
|
'
ERROR
'
|
'
STOPPING
'
|
'
STOPPED
'
|
'
DONE
'
|
'
NO_MORE_TRIAL
'
|
'
TUNER_NO_MORE_TRIAL
'
;
type
ExperimentStatus
=
'
INITIALIZED
'
|
'
RUNNING
'
|
'
ERROR
'
|
'
STOPPING
'
|
'
STOPPED
'
|
'
DONE
'
|
'
NO_MORE_TRIAL
'
|
'
TUNER_NO_MORE_TRIAL
'
;
namespace
ExperimentStartUpMode
{
export
const
NEW
=
'
new
'
;
export
const
RESUME
=
'
resume
'
;
}
interface
ExperimentParams
{
interface
ExperimentParams
{
authorName
:
string
;
authorName
:
string
;
...
@@ -95,7 +99,7 @@ interface NNIManagerStatus {
...
@@ -95,7 +99,7 @@ interface NNIManagerStatus {
abstract
class
Manager
{
abstract
class
Manager
{
public
abstract
startExperiment
(
experimentParams
:
ExperimentParams
):
Promise
<
string
>
;
public
abstract
startExperiment
(
experimentParams
:
ExperimentParams
):
Promise
<
string
>
;
public
abstract
resumeExperiment
():
Promise
<
void
>
;
public
abstract
resumeExperiment
(
readonly
:
boolean
):
Promise
<
void
>
;
public
abstract
stopExperiment
():
Promise
<
void
>
;
public
abstract
stopExperiment
():
Promise
<
void
>
;
public
abstract
getExperimentProfile
():
Promise
<
ExperimentProfile
>
;
public
abstract
getExperimentProfile
():
Promise
<
ExperimentProfile
>
;
public
abstract
updateExperimentProfile
(
experimentProfile
:
ExperimentProfile
,
updateType
:
ProfileUpdateType
):
Promise
<
void
>
;
public
abstract
updateExperimentProfile
(
experimentProfile
:
ExperimentProfile
,
updateType
:
ProfileUpdateType
):
Promise
<
void
>
;
...
@@ -115,4 +119,4 @@ abstract class Manager {
...
@@ -115,4 +119,4 @@ abstract class Manager {
public
abstract
getStatus
():
NNIManagerStatus
;
public
abstract
getStatus
():
NNIManagerStatus
;
}
}
export
{
Manager
,
ExperimentParams
,
ExperimentProfile
,
TrialJobStatistics
,
ProfileUpdateType
,
NNIManagerStatus
,
ExperimentStatus
};
export
{
Manager
,
ExperimentParams
,
ExperimentProfile
,
TrialJobStatistics
,
ProfileUpdateType
,
NNIManagerStatus
,
ExperimentStatus
,
ExperimentStartUpMode
};
src/nni_manager/core/nnimanager.ts
View file @
99f7d79c
...
@@ -59,6 +59,7 @@ class NNIManager implements Manager {
...
@@ -59,6 +59,7 @@ class NNIManager implements Manager {
private
waitingTrials
:
string
[];
private
waitingTrials
:
string
[];
private
trialJobs
:
Map
<
string
,
TrialJobDetail
>
;
private
trialJobs
:
Map
<
string
,
TrialJobDetail
>
;
private
trialDataForTuner
:
string
;
private
trialDataForTuner
:
string
;
private
readonly
:
boolean
;
private
trialJobMetricListener
:
(
metric
:
TrialJobMetric
)
=>
void
;
private
trialJobMetricListener
:
(
metric
:
TrialJobMetric
)
=>
void
;
...
@@ -72,6 +73,7 @@ class NNIManager implements Manager {
...
@@ -72,6 +73,7 @@ class NNIManager implements Manager {
this
.
waitingTrials
=
[];
this
.
waitingTrials
=
[];
this
.
trialJobs
=
new
Map
<
string
,
TrialJobDetail
>
();
this
.
trialJobs
=
new
Map
<
string
,
TrialJobDetail
>
();
this
.
trialDataForTuner
=
''
;
this
.
trialDataForTuner
=
''
;
this
.
readonly
=
false
;
this
.
log
=
getLogger
();
this
.
log
=
getLogger
();
this
.
dataStore
=
component
.
get
(
DataStore
);
this
.
dataStore
=
component
.
get
(
DataStore
);
...
@@ -88,6 +90,9 @@ class NNIManager implements Manager {
...
@@ -88,6 +90,9 @@ class NNIManager implements Manager {
}
}
public
updateExperimentProfile
(
experimentProfile
:
ExperimentProfile
,
updateType
:
ProfileUpdateType
):
Promise
<
void
>
{
public
updateExperimentProfile
(
experimentProfile
:
ExperimentProfile
,
updateType
:
ProfileUpdateType
):
Promise
<
void
>
{
if
(
this
.
readonly
)
{
return
Promise
.
reject
(
new
Error
(
'
Error: can not update experiment profile in readonly mode!
'
));
}
switch
(
updateType
)
{
switch
(
updateType
)
{
case
'
TRIAL_CONCURRENCY
'
:
case
'
TRIAL_CONCURRENCY
'
:
this
.
updateTrialConcurrency
(
experimentProfile
.
params
.
trialConcurrency
);
this
.
updateTrialConcurrency
(
experimentProfile
.
params
.
trialConcurrency
);
...
@@ -109,6 +114,9 @@ class NNIManager implements Manager {
...
@@ -109,6 +114,9 @@ class NNIManager implements Manager {
}
}
public
importData
(
data
:
string
):
Promise
<
void
>
{
public
importData
(
data
:
string
):
Promise
<
void
>
{
if
(
this
.
readonly
)
{
return
Promise
.
reject
(
new
Error
(
'
Error: can not import data in readonly mode!
'
));
}
if
(
this
.
dispatcher
===
undefined
)
{
if
(
this
.
dispatcher
===
undefined
)
{
return
Promise
.
reject
(
return
Promise
.
reject
(
new
Error
(
'
tuner has not been setup
'
)
new
Error
(
'
tuner has not been setup
'
)
...
@@ -124,6 +132,9 @@ class NNIManager implements Manager {
...
@@ -124,6 +132,9 @@ class NNIManager implements Manager {
}
}
public
addCustomizedTrialJob
(
hyperParams
:
string
):
Promise
<
void
>
{
public
addCustomizedTrialJob
(
hyperParams
:
string
):
Promise
<
void
>
{
if
(
this
.
readonly
)
{
return
Promise
.
reject
(
new
Error
(
'
Error: can not add customized trial job in readonly mode!
'
));
}
if
(
this
.
currSubmittedTrialNum
>=
this
.
experimentProfile
.
params
.
maxTrialNum
)
{
if
(
this
.
currSubmittedTrialNum
>=
this
.
experimentProfile
.
params
.
maxTrialNum
)
{
return
Promise
.
reject
(
return
Promise
.
reject
(
new
Error
(
'
reach maxTrialNum
'
)
new
Error
(
'
reach maxTrialNum
'
)
...
@@ -136,6 +147,9 @@ class NNIManager implements Manager {
...
@@ -136,6 +147,9 @@ class NNIManager implements Manager {
}
}
public
async
cancelTrialJobByUser
(
trialJobId
:
string
):
Promise
<
void
>
{
public
async
cancelTrialJobByUser
(
trialJobId
:
string
):
Promise
<
void
>
{
if
(
this
.
readonly
)
{
return
Promise
.
reject
(
new
Error
(
'
Error: can not cancel trial job in readonly mode!
'
));
}
this
.
log
.
info
(
`User cancelTrialJob:
${
trialJobId
}
`
);
this
.
log
.
info
(
`User cancelTrialJob:
${
trialJobId
}
`
);
await
this
.
trainingService
.
cancelTrialJob
(
trialJobId
);
await
this
.
trainingService
.
cancelTrialJob
(
trialJobId
);
await
this
.
dataStore
.
storeTrialJobEvent
(
'
USER_TO_CANCEL
'
,
trialJobId
,
''
);
await
this
.
dataStore
.
storeTrialJobEvent
(
'
USER_TO_CANCEL
'
,
trialJobId
,
''
);
...
@@ -180,13 +194,16 @@ class NNIManager implements Manager {
...
@@ -180,13 +194,16 @@ class NNIManager implements Manager {
return
this
.
experimentProfile
.
id
;
return
this
.
experimentProfile
.
id
;
}
}
public
async
resumeExperiment
():
Promise
<
void
>
{
public
async
resumeExperiment
(
readonly
:
boolean
):
Promise
<
void
>
{
this
.
log
.
info
(
`Resuming experiment:
${
this
.
experimentProfile
.
id
}
`
);
this
.
log
.
info
(
`Resuming experiment:
${
this
.
experimentProfile
.
id
}
`
);
//Fetch back the experiment profile
//Fetch back the experiment profile
const
experimentId
:
string
=
getExperimentId
();
const
experimentId
:
string
=
getExperimentId
();
this
.
experimentProfile
=
await
this
.
dataStore
.
getExperimentProfile
(
experimentId
);
this
.
experimentProfile
=
await
this
.
dataStore
.
getExperimentProfile
(
experimentId
);
this
.
readonly
=
readonly
;
if
(
readonly
)
{
return
Promise
.
resolve
();
}
const
expParams
:
ExperimentParams
=
this
.
experimentProfile
.
params
;
const
expParams
:
ExperimentParams
=
this
.
experimentProfile
.
params
;
setInitTrialSequenceId
(
this
.
experimentProfile
.
maxSequenceId
+
1
);
setInitTrialSequenceId
(
this
.
experimentProfile
.
maxSequenceId
+
1
);
// Set up multiphase config
// Set up multiphase config
...
@@ -196,7 +213,7 @@ class NNIManager implements Manager {
...
@@ -196,7 +213,7 @@ class NNIManager implements Manager {
// Set up versionCheck config
// Set up versionCheck config
if
(
expParams
.
versionCheck
!==
undefined
)
{
if
(
expParams
.
versionCheck
!==
undefined
)
{
this
.
trainingService
.
setClusterMetadata
(
'
version
C
heck
'
,
expParams
.
versionCheck
.
toString
());
this
.
trainingService
.
setClusterMetadata
(
'
version
_c
heck
'
,
expParams
.
versionCheck
.
toString
());
}
}
const
dispatcherCommand
:
string
=
getMsgDispatcherCommand
(
expParams
.
tuner
,
expParams
.
assessor
,
expParams
.
advisor
,
const
dispatcherCommand
:
string
=
getMsgDispatcherCommand
(
expParams
.
tuner
,
expParams
.
assessor
,
expParams
.
advisor
,
...
@@ -247,6 +264,9 @@ class NNIManager implements Manager {
...
@@ -247,6 +264,9 @@ class NNIManager implements Manager {
}
}
public
async
setClusterMetadata
(
key
:
string
,
value
:
string
):
Promise
<
void
>
{
public
async
setClusterMetadata
(
key
:
string
,
value
:
string
):
Promise
<
void
>
{
if
(
this
.
readonly
)
{
return
Promise
.
reject
(
new
Error
(
'
Error: can not set cluster metadata in readonly mode!
'
));
}
this
.
log
.
info
(
`NNIManager setClusterMetadata, key:
${
key
}
, value:
${
value
}
`
);
this
.
log
.
info
(
`NNIManager setClusterMetadata, key:
${
key
}
, value:
${
value
}
`
);
let
timeoutId
:
NodeJS
.
Timer
;
let
timeoutId
:
NodeJS
.
Timer
;
// TO DO: move timeout value to constants file
// TO DO: move timeout value to constants file
...
...
src/nni_manager/main.ts
View file @
99f7d79c
...
@@ -26,7 +26,7 @@ import * as component from './common/component';
...
@@ -26,7 +26,7 @@ import * as component from './common/component';
import
{
Database
,
DataStore
}
from
'
./common/datastore
'
;
import
{
Database
,
DataStore
}
from
'
./common/datastore
'
;
import
{
setExperimentStartupInfo
}
from
'
./common/experimentStartupInfo
'
;
import
{
setExperimentStartupInfo
}
from
'
./common/experimentStartupInfo
'
;
import
{
getLogger
,
Logger
,
logLevelNameMap
}
from
'
./common/log
'
;
import
{
getLogger
,
Logger
,
logLevelNameMap
}
from
'
./common/log
'
;
import
{
Manager
}
from
'
./common/manager
'
;
import
{
Manager
,
ExperimentStartUpMode
}
from
'
./common/manager
'
;
import
{
TrainingService
}
from
'
./common/trainingService
'
;
import
{
TrainingService
}
from
'
./common/trainingService
'
;
import
{
getLogDir
,
mkDirP
,
parseArg
,
uniqueString
}
from
'
./common/utils
'
;
import
{
getLogDir
,
mkDirP
,
parseArg
,
uniqueString
}
from
'
./common/utils
'
;
import
{
NNIDataStore
}
from
'
./core/nniDataStore
'
;
import
{
NNIDataStore
}
from
'
./core/nniDataStore
'
;
...
@@ -43,10 +43,10 @@ import {
...
@@ -43,10 +43,10 @@ import {
function
initStartupInfo
(
function
initStartupInfo
(
startExpMode
:
string
,
resumeExperimentId
:
string
,
basePort
:
number
,
startExpMode
:
string
,
resumeExperimentId
:
string
,
basePort
:
number
,
logDirectory
:
string
,
experimentLogLevel
:
string
):
void
{
logDirectory
:
string
,
experimentLogLevel
:
string
,
readonly
:
boolean
):
void
{
const
createNew
:
boolean
=
(
startExpMode
===
'
new
'
);
const
createNew
:
boolean
=
(
startExpMode
===
ExperimentStartUpMode
.
NEW
);
const
expId
:
string
=
createNew
?
uniqueString
(
8
)
:
resumeExperimentId
;
const
expId
:
string
=
createNew
?
uniqueString
(
8
)
:
resumeExperimentId
;
setExperimentStartupInfo
(
createNew
,
expId
,
basePort
,
logDirectory
,
experimentLogLevel
);
setExperimentStartupInfo
(
createNew
,
expId
,
basePort
,
logDirectory
,
experimentLogLevel
,
readonly
);
}
}
async
function
initContainer
(
platformMode
:
string
):
Promise
<
void
>
{
async
function
initContainer
(
platformMode
:
string
):
Promise
<
void
>
{
...
@@ -108,15 +108,15 @@ if (!['local', 'remote', 'pai', 'kubeflow', 'frameworkcontroller'].includes(mode
...
@@ -108,15 +108,15 @@ if (!['local', 'remote', 'pai', 'kubeflow', 'frameworkcontroller'].includes(mode
}
}
const
startMode
:
string
=
parseArg
([
'
--start_mode
'
,
'
-s
'
]);
const
startMode
:
string
=
parseArg
([
'
--start_mode
'
,
'
-s
'
]);
if
(
!
[
'
new
'
,
'
resume
'
].
includes
(
startMode
))
{
if
(
!
[
ExperimentStartUpMode
.
NEW
,
ExperimentStartUpMode
.
RESUME
].
includes
(
startMode
))
{
console
.
log
(
`FATAL: unknown start_mode:
${
startMode
}
`
);
console
.
log
(
`FATAL: unknown start_mode:
${
startMode
}
`
);
usage
();
usage
();
process
.
exit
(
1
);
process
.
exit
(
1
);
}
}
const
experimentId
:
string
=
parseArg
([
'
--experiment_id
'
,
'
-id
'
]);
const
experimentId
:
string
=
parseArg
([
'
--experiment_id
'
,
'
-id
'
]);
if
(
startMode
===
'
resume
'
&&
experimentId
.
trim
().
length
<
1
)
{
if
(
(
startMode
===
ExperimentStartUpMode
.
RESUME
)
&&
experimentId
.
trim
().
length
<
1
)
{
console
.
log
(
`FATAL: cannot resume experiment, invalid experiment_id:
${
experimentId
}
`
);
console
.
log
(
`FATAL: cannot resume
the
experiment, invalid experiment_id:
${
experimentId
}
`
);
usage
();
usage
();
process
.
exit
(
1
);
process
.
exit
(
1
);
}
}
...
@@ -133,7 +133,15 @@ if (logLevel.length > 0 && !logLevelNameMap.has(logLevel)) {
...
@@ -133,7 +133,15 @@ if (logLevel.length > 0 && !logLevelNameMap.has(logLevel)) {
console
.
log
(
`FATAL: invalid log_level:
${
logLevel
}
`
);
console
.
log
(
`FATAL: invalid log_level:
${
logLevel
}
`
);
}
}
initStartupInfo
(
startMode
,
experimentId
,
port
,
logDir
,
logLevel
);
const
readonlyArg
:
string
=
parseArg
([
'
--readonly
'
,
'
-r
'
]);
if
(
!
(
'
true
'
||
'
false
'
).
includes
(
readonlyArg
.
toLowerCase
()))
{
console
.
log
(
`FATAL: readonly property should only be true or false`
);
usage
();
process
.
exit
(
1
);
}
const
readonly
=
readonlyArg
.
toLowerCase
()
==
'
true
'
?
true
:
false
;
initStartupInfo
(
startMode
,
experimentId
,
port
,
logDir
,
logLevel
,
readonly
);
mkDirP
(
getLogDir
())
mkDirP
(
getLogDir
())
.
then
(
async
()
=>
{
.
then
(
async
()
=>
{
...
...
src/nni_manager/rest_server/restHandler.ts
View file @
99f7d79c
...
@@ -25,9 +25,9 @@ import * as path from 'path';
...
@@ -25,9 +25,9 @@ import * as path from 'path';
import
*
as
component
from
'
../common/component
'
;
import
*
as
component
from
'
../common/component
'
;
import
{
DataStore
,
MetricDataRecord
,
TrialJobInfo
}
from
'
../common/datastore
'
;
import
{
DataStore
,
MetricDataRecord
,
TrialJobInfo
}
from
'
../common/datastore
'
;
import
{
NNIError
,
NNIErrorNames
}
from
'
../common/errors
'
;
import
{
NNIError
,
NNIErrorNames
}
from
'
../common/errors
'
;
import
{
isNewExperiment
}
from
'
../common/experimentStartupInfo
'
;
import
{
isNewExperiment
,
isReadonly
}
from
'
../common/experimentStartupInfo
'
;
import
{
getLogger
,
Logger
}
from
'
../common/log
'
;
import
{
getLogger
,
Logger
}
from
'
../common/log
'
;
import
{
ExperimentProfile
,
Manager
,
TrialJobStatistics
}
from
'
../common/manager
'
;
import
{
ExperimentProfile
,
Manager
,
TrialJobStatistics
,
ExperimentStartUpMode
}
from
'
../common/manager
'
;
import
{
ValidationSchemas
}
from
'
./restValidationSchemas
'
;
import
{
ValidationSchemas
}
from
'
./restValidationSchemas
'
;
import
{
NNIRestServer
}
from
'
./nniRestServer
'
;
import
{
NNIRestServer
}
from
'
./nniRestServer
'
;
import
{
getVersion
}
from
'
../common/utils
'
;
import
{
getVersion
}
from
'
../common/utils
'
;
...
@@ -86,11 +86,11 @@ class NNIRestHandler {
...
@@ -86,11 +86,11 @@ class NNIRestHandler {
return
router
;
return
router
;
}
}
private
handle_error
(
err
:
Error
,
res
:
Response
,
isFatal
:
boolean
=
false
):
void
{
private
handle_error
(
err
:
Error
,
res
:
Response
,
isFatal
:
boolean
=
false
,
errorCode
:
number
=
500
):
void
{
if
(
err
instanceof
NNIError
&&
err
.
name
===
NNIErrorNames
.
NOT_FOUND
)
{
if
(
err
instanceof
NNIError
&&
err
.
name
===
NNIErrorNames
.
NOT_FOUND
)
{
res
.
status
(
404
);
res
.
status
(
404
);
}
else
{
}
else
{
res
.
status
(
500
);
res
.
status
(
errorCode
);
}
}
res
.
send
({
res
.
send
({
error
:
err
.
message
error
:
err
.
message
...
@@ -169,7 +169,7 @@ class NNIRestHandler {
...
@@ -169,7 +169,7 @@ class NNIRestHandler {
this
.
handle_error
(
err
,
res
);
this
.
handle_error
(
err
,
res
);
});
});
}
else
{
}
else
{
this
.
nniManager
.
resumeExperiment
().
then
(()
=>
{
this
.
nniManager
.
resumeExperiment
(
isReadonly
()
).
then
(()
=>
{
res
.
send
();
res
.
send
();
}).
catch
((
err
:
Error
)
=>
{
}).
catch
((
err
:
Error
)
=>
{
// Resume experiment is a step of initialization, so any exception thrown is a fatal
// Resume experiment is a step of initialization, so any exception thrown is a fatal
...
...
tools/nni_cmd/launcher.py
View file @
99f7d79c
...
@@ -118,12 +118,17 @@ def start_rest_server(port, platform, mode, config_file_name, experiment_id=None
...
@@ -118,12 +118,17 @@ def start_rest_server(port, platform, mode, config_file_name, experiment_id=None
node_command
=
'node'
node_command
=
'node'
if
sys
.
platform
==
'win32'
:
if
sys
.
platform
==
'win32'
:
node_command
=
os
.
path
.
join
(
entry_dir
[:
-
3
],
'Scripts'
,
'node.exe'
)
node_command
=
os
.
path
.
join
(
entry_dir
[:
-
3
],
'Scripts'
,
'node.exe'
)
cmds
=
[
node_command
,
entry_file
,
'--port'
,
str
(
port
),
'--mode'
,
platform
,
'--start_mode'
,
mode
]
cmds
=
[
node_command
,
entry_file
,
'--port'
,
str
(
port
),
'--mode'
,
platform
]
if
mode
==
'view'
:
cmds
+=
[
'--start_mode'
,
'resume'
]
cmds
+=
[
'--readonly'
,
'true'
]
else
:
cmds
+=
[
'--start_mode'
,
mode
]
if
log_dir
is
not
None
:
if
log_dir
is
not
None
:
cmds
+=
[
'--log_dir'
,
log_dir
]
cmds
+=
[
'--log_dir'
,
log_dir
]
if
log_level
is
not
None
:
if
log_level
is
not
None
:
cmds
+=
[
'--log_level'
,
log_level
]
cmds
+=
[
'--log_level'
,
log_level
]
if
mode
==
'resume'
:
if
mode
in
[
'resume'
,
'view'
]
:
cmds
+=
[
'--experiment_id'
,
experiment_id
]
cmds
+=
[
'--experiment_id'
,
experiment_id
]
stdout_full_path
,
stderr_full_path
=
get_log_path
(
config_file_name
)
stdout_full_path
,
stderr_full_path
=
get_log_path
(
config_file_name
)
with
open
(
stdout_full_path
,
'a+'
)
as
stdout_file
,
open
(
stderr_full_path
,
'a+'
)
as
stderr_file
:
with
open
(
stdout_full_path
,
'a+'
)
as
stdout_file
,
open
(
stderr_full_path
,
'a+'
)
as
stderr_file
:
...
@@ -156,7 +161,6 @@ def set_trial_config(experiment_config, port, config_file_name):
...
@@ -156,7 +161,6 @@ def set_trial_config(experiment_config, port, config_file_name):
def
set_local_config
(
experiment_config
,
port
,
config_file_name
):
def
set_local_config
(
experiment_config
,
port
,
config_file_name
):
'''set local configuration'''
'''set local configuration'''
#set machine_list
request_data
=
dict
()
request_data
=
dict
()
if
experiment_config
.
get
(
'localConfig'
):
if
experiment_config
.
get
(
'localConfig'
):
request_data
[
'local_config'
]
=
experiment_config
[
'localConfig'
]
request_data
[
'local_config'
]
=
experiment_config
[
'localConfig'
]
...
@@ -177,7 +181,7 @@ def set_local_config(experiment_config, port, config_file_name):
...
@@ -177,7 +181,7 @@ def set_local_config(experiment_config, port, config_file_name):
fout
.
write
(
json
.
dumps
(
json
.
loads
(
err_message
),
indent
=
4
,
sort_keys
=
True
,
separators
=
(
','
,
':'
)))
fout
.
write
(
json
.
dumps
(
json
.
loads
(
err_message
),
indent
=
4
,
sort_keys
=
True
,
separators
=
(
','
,
':'
)))
return
False
,
err_message
return
False
,
err_message
return
set_trial_config
(
experiment_config
,
port
,
config_file_name
)
return
set_trial_config
(
experiment_config
,
port
,
config_file_name
)
,
None
def
set_remote_config
(
experiment_config
,
port
,
config_file_name
):
def
set_remote_config
(
experiment_config
,
port
,
config_file_name
):
'''Call setClusterMetadata to pass trial'''
'''Call setClusterMetadata to pass trial'''
...
@@ -345,7 +349,6 @@ def set_experiment(experiment_config, mode, port, config_file_name):
...
@@ -345,7 +349,6 @@ def set_experiment(experiment_config, mode, port, config_file_name):
{
'key'
:
'frameworkcontroller_config'
,
'value'
:
experiment_config
[
'frameworkcontrollerConfig'
]})
{
'key'
:
'frameworkcontroller_config'
,
'value'
:
experiment_config
[
'frameworkcontrollerConfig'
]})
request_data
[
'clusterMetaData'
].
append
(
request_data
[
'clusterMetaData'
].
append
(
{
'key'
:
'trial_config'
,
'value'
:
experiment_config
[
'trial'
]})
{
'key'
:
'trial_config'
,
'value'
:
experiment_config
[
'trial'
]})
response
=
rest_post
(
experiment_url
(
port
),
json
.
dumps
(
request_data
),
REST_TIME_OUT
,
show_error
=
True
)
response
=
rest_post
(
experiment_url
(
port
),
json
.
dumps
(
request_data
),
REST_TIME_OUT
,
show_error
=
True
)
if
check_response
(
response
):
if
check_response
(
response
):
return
response
return
response
...
@@ -357,6 +360,33 @@ def set_experiment(experiment_config, mode, port, config_file_name):
...
@@ -357,6 +360,33 @@ def set_experiment(experiment_config, mode, port, config_file_name):
print_error
(
'Setting experiment error, error message is {}'
.
format
(
response
.
text
))
print_error
(
'Setting experiment error, error message is {}'
.
format
(
response
.
text
))
return
None
return
None
def
set_platform_config
(
platform
,
experiment_config
,
port
,
config_file_name
,
rest_process
):
'''call set_cluster_metadata for specific platform'''
print_normal
(
'Setting {0} config...'
.
format
(
platform
))
config_result
,
err_msg
=
None
,
None
if
platform
==
'local'
:
config_result
,
err_msg
=
set_local_config
(
experiment_config
,
port
,
config_file_name
)
elif
platform
==
'remote'
:
config_result
,
err_msg
=
set_remote_config
(
experiment_config
,
port
,
config_file_name
)
elif
platform
==
'pai'
:
config_result
,
err_msg
=
set_pai_config
(
experiment_config
,
port
,
config_file_name
)
elif
platform
==
'kubeflow'
:
config_result
,
err_msg
=
set_kubeflow_config
(
experiment_config
,
port
,
config_file_name
)
elif
platform
==
'frameworkcontroller'
:
config_result
,
err_msg
=
set_frameworkcontroller_config
(
experiment_config
,
port
,
config_file_name
)
else
:
raise
Exception
(
ERROR_INFO
%
'Unsupported platform!'
)
exit
(
1
)
if
config_result
:
print_normal
(
'Successfully set {0} config!'
.
format
(
platform
))
else
:
print_error
(
'Failed! Error is: {}'
.
format
(
err_msg
))
try
:
kill_command
(
rest_process
.
pid
)
except
Exception
:
raise
Exception
(
ERROR_INFO
%
'Rest server stopped!'
)
exit
(
1
)
def
launch_experiment
(
args
,
experiment_config
,
mode
,
config_file_name
,
experiment_id
=
None
):
def
launch_experiment
(
args
,
experiment_config
,
mode
,
config_file_name
,
experiment_id
=
None
):
'''follow steps to start rest server and start experiment'''
'''follow steps to start rest server and start experiment'''
nni_config
=
Config
(
config_file_name
)
nni_config
=
Config
(
config_file_name
)
...
@@ -381,6 +411,8 @@ def launch_experiment(args, experiment_config, mode, config_file_name, experimen
...
@@ -381,6 +411,8 @@ def launch_experiment(args, experiment_config, mode, config_file_name, experimen
exit
(
1
)
exit
(
1
)
log_dir
=
experiment_config
[
'logDir'
]
if
experiment_config
.
get
(
'logDir'
)
else
None
log_dir
=
experiment_config
[
'logDir'
]
if
experiment_config
.
get
(
'logDir'
)
else
None
log_level
=
experiment_config
[
'logLevel'
]
if
experiment_config
.
get
(
'logLevel'
)
else
None
log_level
=
experiment_config
[
'logLevel'
]
if
experiment_config
.
get
(
'logLevel'
)
else
None
#view experiment mode do not need debug function, when view an experiment, there will be no new logs created
if
mode
!=
'view'
:
if
log_level
not
in
[
'trace'
,
'debug'
]
and
(
args
.
debug
or
experiment_config
.
get
(
'debug'
)
is
True
):
if
log_level
not
in
[
'trace'
,
'debug'
]
and
(
args
.
debug
or
experiment_config
.
get
(
'debug'
)
is
True
):
log_level
=
'debug'
log_level
=
'debug'
# start rest server
# start rest server
...
@@ -416,83 +448,14 @@ def launch_experiment(args, experiment_config, mode, config_file_name, experimen
...
@@ -416,83 +448,14 @@ def launch_experiment(args, experiment_config, mode, config_file_name, experimen
except
Exception
:
except
Exception
:
raise
Exception
(
ERROR_INFO
%
'Rest server stopped!'
)
raise
Exception
(
ERROR_INFO
%
'Rest server stopped!'
)
exit
(
1
)
exit
(
1
)
if
mode
!=
'view'
:
# set remote config
# set platform configuration
if
experiment_config
[
'trainingServicePlatform'
]
==
'remote'
:
set_platform_config
(
experiment_config
[
'trainingServicePlatform'
],
experiment_config
,
args
.
port
,
config_file_name
,
rest_process
)
print_normal
(
'Setting remote config...'
)
config_result
,
err_msg
=
set_remote_config
(
experiment_config
,
args
.
port
,
config_file_name
)
if
config_result
:
print_normal
(
'Successfully set remote config!'
)
else
:
print_error
(
'Failed! Error is: {}'
.
format
(
err_msg
))
try
:
kill_command
(
rest_process
.
pid
)
except
Exception
:
raise
Exception
(
ERROR_INFO
%
'Rest server stopped!'
)
exit
(
1
)
# set local config
if
experiment_config
[
'trainingServicePlatform'
]
==
'local'
:
print_normal
(
'Setting local config...'
)
if
set_local_config
(
experiment_config
,
args
.
port
,
config_file_name
):
print_normal
(
'Successfully set local config!'
)
else
:
print_error
(
'Set local config failed!'
)
try
:
kill_command
(
rest_process
.
pid
)
except
Exception
:
raise
Exception
(
ERROR_INFO
%
'Rest server stopped!'
)
exit
(
1
)
#set pai config
if
experiment_config
[
'trainingServicePlatform'
]
==
'pai'
:
print_normal
(
'Setting pai config...'
)
config_result
,
err_msg
=
set_pai_config
(
experiment_config
,
args
.
port
,
config_file_name
)
if
config_result
:
print_normal
(
'Successfully set pai config!'
)
else
:
if
err_msg
:
print_error
(
'Failed! Error is: {}'
.
format
(
err_msg
))
try
:
kill_command
(
rest_process
.
pid
)
except
Exception
:
raise
Exception
(
ERROR_INFO
%
'Restful server stopped!'
)
exit
(
1
)
#set kubeflow config
if
experiment_config
[
'trainingServicePlatform'
]
==
'kubeflow'
:
print_normal
(
'Setting kubeflow config...'
)
config_result
,
err_msg
=
set_kubeflow_config
(
experiment_config
,
args
.
port
,
config_file_name
)
if
config_result
:
print_normal
(
'Successfully set kubeflow config!'
)
else
:
if
err_msg
:
print_error
(
'Failed! Error is: {}'
.
format
(
err_msg
))
try
:
kill_command
(
rest_process
.
pid
)
except
Exception
:
raise
Exception
(
ERROR_INFO
%
'Restful server stopped!'
)
exit
(
1
)
#set frameworkcontroller config
if
experiment_config
[
'trainingServicePlatform'
]
==
'frameworkcontroller'
:
print_normal
(
'Setting frameworkcontroller config...'
)
config_result
,
err_msg
=
set_frameworkcontroller_config
(
experiment_config
,
args
.
port
,
config_file_name
)
if
config_result
:
print_normal
(
'Successfully set frameworkcontroller config!'
)
else
:
if
err_msg
:
print_error
(
'Failed! Error is: {}'
.
format
(
err_msg
))
try
:
kill_command
(
rest_process
.
pid
)
except
Exception
:
raise
Exception
(
ERROR_INFO
%
'Restful server stopped!'
)
exit
(
1
)
# start a new experiment
# start a new experiment
print_normal
(
'Starting experiment...'
)
print_normal
(
'Starting experiment...'
)
# set debug configuration
# set debug configuration
if
experiment_config
.
get
(
'debug'
)
is
None
:
if
mode
!=
'view'
and
experiment_config
.
get
(
'debug'
)
is
None
:
experiment_config
[
'debug'
]
=
args
.
debug
experiment_config
[
'debug'
]
=
args
.
debug
response
=
set_experiment
(
experiment_config
,
mode
,
args
.
port
,
config_file_name
)
response
=
set_experiment
(
experiment_config
,
mode
,
args
.
port
,
config_file_name
)
if
response
:
if
response
:
...
@@ -519,8 +482,23 @@ def launch_experiment(args, experiment_config, mode, config_file_name, experimen
...
@@ -519,8 +482,23 @@ def launch_experiment(args, experiment_config, mode, config_file_name, experimen
print_normal
(
EXPERIMENT_SUCCESS_INFO
%
(
experiment_id
,
' '
.
join
(
web_ui_url_list
)))
print_normal
(
EXPERIMENT_SUCCESS_INFO
%
(
experiment_id
,
' '
.
join
(
web_ui_url_list
)))
def
resume_experiment
(
args
):
def
create_experiment
(
args
):
'''resume an experiment'''
'''start a new experiment'''
config_file_name
=
''
.
join
(
random
.
sample
(
string
.
ascii_letters
+
string
.
digits
,
8
))
nni_config
=
Config
(
config_file_name
)
config_path
=
os
.
path
.
abspath
(
args
.
config
)
if
not
os
.
path
.
exists
(
config_path
):
print_error
(
'Please set correct config path!'
)
exit
(
1
)
experiment_config
=
get_yml_content
(
config_path
)
validate_all_content
(
experiment_config
,
config_path
)
nni_config
.
set_config
(
'experimentConfig'
,
experiment_config
)
launch_experiment
(
args
,
experiment_config
,
'new'
,
config_file_name
)
nni_config
.
set_config
(
'restServerPort'
,
args
.
port
)
def
manage_stopped_experiment
(
args
,
mode
):
'''view a stopped experiment'''
update_experiment
()
update_experiment
()
experiment_config
=
Experiments
()
experiment_config
=
Experiments
()
experiment_dict
=
experiment_config
.
get_all_experiments
()
experiment_dict
=
experiment_config
.
get_all_experiments
()
...
@@ -528,38 +506,31 @@ def resume_experiment(args):
...
@@ -528,38 +506,31 @@ def resume_experiment(args):
experiment_endTime
=
None
experiment_endTime
=
None
#find the latest stopped experiment
#find the latest stopped experiment
if
not
args
.
id
:
if
not
args
.
id
:
print_error
(
'Please set experiment id!
\n
You could use
\'
nnictl
resume
{id}
\'
to
resume
a stopped experiment!
\n
'
\
print_error
(
'Please set experiment id!
\n
You could use
\'
nnictl
{0}
{id}
\'
to
{0}
a stopped experiment!
\n
'
\
'You could use
\'
nnictl experiment list --all
\'
to show all experiments!'
)
'You could use
\'
nnictl experiment list --all
\'
to show all experiments!'
.
format
(
mode
)
)
exit
(
1
)
exit
(
1
)
else
:
else
:
if
experiment_dict
.
get
(
args
.
id
)
is
None
:
if
experiment_dict
.
get
(
args
.
id
)
is
None
:
print_error
(
'Id %s not exist!'
%
args
.
id
)
print_error
(
'Id %s not exist!'
%
args
.
id
)
exit
(
1
)
exit
(
1
)
if
experiment_dict
[
args
.
id
][
'status'
]
!=
'STOPPED'
:
if
experiment_dict
[
args
.
id
][
'status'
]
!=
'STOPPED'
:
print_error
(
'Only stopped experiments can be
resumed!'
)
print_error
(
'Only stopped experiments can be
{0}ed!'
.
format
(
mode
)
)
exit
(
1
)
exit
(
1
)
experiment_id
=
args
.
id
experiment_id
=
args
.
id
print_normal
(
'
Resuming
experiment
%s
...'
%
experiment_id
)
print_normal
(
'
{0}
experiment
{1}
...'
.
format
(
mode
,
experiment_id
)
)
nni_config
=
Config
(
experiment_dict
[
experiment_id
][
'fileName'
])
nni_config
=
Config
(
experiment_dict
[
experiment_id
][
'fileName'
])
experiment_config
=
nni_config
.
get_config
(
'experimentConfig'
)
experiment_config
=
nni_config
.
get_config
(
'experimentConfig'
)
experiment_id
=
nni_config
.
get_config
(
'experimentId'
)
experiment_id
=
nni_config
.
get_config
(
'experimentId'
)
new_config_file_name
=
''
.
join
(
random
.
sample
(
string
.
ascii_letters
+
string
.
digits
,
8
))
new_config_file_name
=
''
.
join
(
random
.
sample
(
string
.
ascii_letters
+
string
.
digits
,
8
))
new_nni_config
=
Config
(
new_config_file_name
)
new_nni_config
=
Config
(
new_config_file_name
)
new_nni_config
.
set_config
(
'experimentConfig'
,
experiment_config
)
new_nni_config
.
set_config
(
'experimentConfig'
,
experiment_config
)
launch_experiment
(
args
,
experiment_config
,
'resume'
,
new_config_file_name
,
experiment_id
)
launch_experiment
(
args
,
experiment_config
,
mode
,
new_config_file_name
,
experiment_id
)
new_nni_config
.
set_config
(
'restServerPort'
,
args
.
port
)
new_nni_config
.
set_config
(
'restServerPort'
,
args
.
port
)
def
create_experiment
(
args
):
def
view_experiment
(
args
):
'''start a new experiment'''
'''view a stopped experiment'''
config_file_name
=
''
.
join
(
random
.
sample
(
string
.
ascii_letters
+
string
.
digits
,
8
))
manage_stopped_experiment
(
args
,
'view'
)
nni_config
=
Config
(
config_file_name
)
config_path
=
os
.
path
.
abspath
(
args
.
config
)
if
not
os
.
path
.
exists
(
config_path
):
print_error
(
'Please set correct config path!'
)
exit
(
1
)
experiment_config
=
get_yml_content
(
config_path
)
validate_all_content
(
experiment_config
,
config_path
)
nni_config
.
set_config
(
'experimentConfig'
,
experiment
_config
)
def
resume_
experiment
(
args
):
launch_experiment
(
args
,
experiment_config
,
'new'
,
config_file_name
)
'''resume an experiment'''
nni_config
.
set_config
(
'restServerPort'
,
args
.
port
)
manage_stopped_experiment
(
args
,
'resume'
)
\ No newline at end of file
tools/nni_cmd/nnictl.py
View file @
99f7d79c
...
@@ -21,7 +21,7 @@
...
@@ -21,7 +21,7 @@
import
argparse
import
argparse
import
pkg_resources
import
pkg_resources
from
.launcher
import
create_experiment
,
resume_experiment
from
.launcher
import
create_experiment
,
resume_experiment
,
view_experiment
from
.updater
import
update_searchspace
,
update_concurrency
,
update_duration
,
update_trialnum
,
import_data
from
.updater
import
update_searchspace
,
update_concurrency
,
update_duration
,
update_trialnum
,
import_data
from
.nnictl_utils
import
*
from
.nnictl_utils
import
*
from
.package_management
import
*
from
.package_management
import
*
...
@@ -66,6 +66,12 @@ def parse_args():
...
@@ -66,6 +66,12 @@ def parse_args():
parser_resume
.
add_argument
(
'--debug'
,
'-d'
,
action
=
'store_true'
,
help
=
' set debug mode'
)
parser_resume
.
add_argument
(
'--debug'
,
'-d'
,
action
=
'store_true'
,
help
=
' set debug mode'
)
parser_resume
.
set_defaults
(
func
=
resume_experiment
)
parser_resume
.
set_defaults
(
func
=
resume_experiment
)
# parse view command
parser_resume
=
subparsers
.
add_parser
(
'view'
,
help
=
'view a stopped experiment'
)
parser_resume
.
add_argument
(
'id'
,
nargs
=
'?'
,
help
=
'The id of the experiment you want to view'
)
parser_resume
.
add_argument
(
'--port'
,
'-p'
,
default
=
DEFAULT_REST_PORT
,
dest
=
'port'
,
help
=
'the port of restful server'
)
parser_resume
.
set_defaults
(
func
=
view_experiment
)
# parse update command
# parse update command
parser_updater
=
subparsers
.
add_parser
(
'update'
,
help
=
'update the experiment'
)
parser_updater
=
subparsers
.
add_parser
(
'update'
,
help
=
'update the experiment'
)
#add subparsers for parser_updater
#add subparsers for parser_updater
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment