Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
817ec68b
Unverified
Commit
817ec68b
authored
Apr 09, 2021
by
liuzhe-lz
Committed by
GitHub
Apr 09, 2021
Browse files
Add native support for v2 config (#3466)
parent
6aaca5f7
Changes
69
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
633 additions
and
645 deletions
+633
-645
nni/__main__.py
nni/__main__.py
+13
-15
nni/experiment/config/__init__.py
nni/experiment/config/__init__.py
+1
-0
nni/experiment/config/base.py
nni/experiment/config/base.py
+2
-0
nni/experiment/config/common.py
nni/experiment/config/common.py
+22
-8
nni/experiment/config/convert.py
nni/experiment/config/convert.py
+246
-315
nni/experiment/config/kubeflow.py
nni/experiment/config/kubeflow.py
+1
-1
nni/experiment/config/openpai.py
nni/experiment/config/openpai.py
+1
-1
nni/experiment/config/remote.py
nni/experiment/config/remote.py
+1
-1
nni/experiment/config/shared_storage.py
nni/experiment/config/shared_storage.py
+23
-0
nni/experiment/config/util.py
nni/experiment/config/util.py
+1
-1
nni/experiment/launcher.py
nni/experiment/launcher.py
+5
-9
nni/tools/nnictl/algo_management.py
nni/tools/nnictl/algo_management.py
+11
-5
nni/tools/nnictl/launcher.py
nni/tools/nnictl/launcher.py
+57
-217
nni/tools/nnictl/launcher_utils.py
nni/tools/nnictl/launcher_utils.py
+2
-1
nni/tools/package_utils/__init__.py
nni/tools/package_utils/__init__.py
+9
-10
test/config/pr_tests.yml
test/config/pr_tests.yml
+0
-7
ts/nni_manager/.eslintrc
ts/nni_manager/.eslintrc
+2
-1
ts/nni_manager/common/experimentConfig.ts
ts/nni_manager/common/experimentConfig.ts
+222
-0
ts/nni_manager/common/log.ts
ts/nni_manager/common/log.ts
+8
-2
ts/nni_manager/common/manager.ts
ts/nni_manager/common/manager.ts
+6
-51
No files found.
nni/__main__.py
View file @
817ec68b
...
...
@@ -7,7 +7,7 @@ import logging
import
json
import
base64
from
.runtime.common
import
enable_multi_thread
,
enable_multi_phase
from
.runtime.common
import
enable_multi_thread
from
.runtime.msg_dispatcher
import
MsgDispatcher
from
.tools.package_utils
import
create_builtin_class_instance
,
create_customized_class_instance
...
...
@@ -29,10 +29,8 @@ def main():
exp_params
=
json
.
loads
(
exp_params_decode
)
logger
.
debug
(
'exp_params json obj: [%s]'
,
json
.
dumps
(
exp_params
,
indent
=
4
))
if
exp_params
.
get
(
'multiThread'
):
if
exp_params
.
get
(
'
deprecated'
,
{}).
get
(
'
multiThread'
):
enable_multi_thread
()
if
exp_params
.
get
(
'multiPhase'
):
enable_multi_phase
()
if
exp_params
.
get
(
'advisor'
)
is
not
None
:
# advisor is enabled and starts to run
...
...
@@ -61,10 +59,10 @@ def main():
def
_run_advisor
(
exp_params
):
if
exp_params
.
get
(
'advisor'
).
get
(
'
builtinAdvisorN
ame'
):
if
exp_params
.
get
(
'advisor'
).
get
(
'
n
ame'
):
dispatcher
=
create_builtin_class_instance
(
exp_params
.
get
(
'advisor'
).
get
(
'builtinAdvisorN
ame'
)
,
exp_params
.
get
(
'advisor'
)
.
get
(
'classArgs'
),
exp_params
[
'advisor'
][
'n
ame'
]
,
exp_params
[
'advisor'
]
.
get
(
'classArgs'
),
'advisors'
)
else
:
dispatcher
=
create_customized_class_instance
(
exp_params
.
get
(
'advisor'
))
...
...
@@ -78,26 +76,26 @@ def _run_advisor(exp_params):
def
_create_tuner
(
exp_params
):
if
exp_params
.
get
(
'tuner'
)
.
get
(
'
builtinTunerN
ame'
):
if
exp_params
[
'tuner'
]
.
get
(
'
n
ame'
):
tuner
=
create_builtin_class_instance
(
exp_params
.
get
(
'tuner'
).
get
(
'builtinTunerN
ame'
)
,
exp_params
.
get
(
'tuner'
)
.
get
(
'classArgs'
),
exp_params
[
'tuner'
][
'n
ame'
]
,
exp_params
[
'tuner'
]
.
get
(
'classArgs'
),
'tuners'
)
else
:
tuner
=
create_customized_class_instance
(
exp_params
.
get
(
'tuner'
)
)
tuner
=
create_customized_class_instance
(
exp_params
[
'tuner'
]
)
if
tuner
is
None
:
raise
AssertionError
(
'Failed to create Tuner instance'
)
return
tuner
def
_create_assessor
(
exp_params
):
if
exp_params
.
get
(
'assessor'
)
.
get
(
'
builtinAssessorN
ame'
):
if
exp_params
[
'assessor'
]
.
get
(
'
n
ame'
):
assessor
=
create_builtin_class_instance
(
exp_params
.
get
(
'assessor'
).
get
(
'builtinAssessorN
ame'
)
,
exp_params
.
get
(
'assessor'
)
.
get
(
'classArgs'
),
exp_params
[
'assessor'
][
'n
ame'
]
,
exp_params
[
'assessor'
]
.
get
(
'classArgs'
),
'assessors'
)
else
:
assessor
=
create_customized_class_instance
(
exp_params
.
get
(
'assessor'
)
)
assessor
=
create_customized_class_instance
(
exp_params
[
'assessor'
]
)
if
assessor
is
None
:
raise
AssertionError
(
'Failed to create Assessor instance'
)
return
assessor
...
...
nni/experiment/config/__init__.py
View file @
817ec68b
...
...
@@ -9,3 +9,4 @@ from .aml import *
from
.kubeflow
import
*
from
.frameworkcontroller
import
*
from
.adl
import
*
from
.shared_storage
import
*
nni/experiment/config/base.py
View file @
817ec68b
...
...
@@ -101,6 +101,8 @@ class ConfigBase:
elif
isinstance
(
value
,
ConfigBase
):
setattr
(
ret
,
key
,
value
.
canonical
())
# value will be copied twice, should not be a performance issue anyway
elif
isinstance
(
value
,
Path
):
setattr
(
ret
,
key
,
str
(
value
))
return
ret
def
validate
(
self
)
->
None
:
...
...
nni/experiment/config/common.py
View file @
817ec68b
...
...
@@ -5,6 +5,8 @@ from dataclasses import dataclass
from
pathlib
import
Path
from
typing
import
Any
,
Dict
,
List
,
Optional
,
Union
from
ruamel.yaml
import
YAML
from
.base
import
ConfigBase
,
PathLike
from
.
import
util
...
...
@@ -27,23 +29,27 @@ class _AlgorithmConfig(ConfigBase):
super
().
validate
()
_validate_algo
(
self
)
@
dataclass
(
init
=
False
)
class
AlgorithmConfig
(
_AlgorithmConfig
):
name
:
str
class_args
:
Optional
[
Dict
[
str
,
Any
]]
=
None
@
dataclass
(
init
=
False
)
class
CustomAlgorithmConfig
(
_AlgorithmConfig
):
class_name
:
str
class_directory
:
Optional
[
PathLike
]
=
None
class_directory
:
Optional
[
PathLike
]
=
'.'
class_args
:
Optional
[
Dict
[
str
,
Any
]]
=
None
class
TrainingServiceConfig
(
ConfigBase
):
platform
:
str
class
SharedStorageConfig
(
ConfigBase
):
storage_type
:
str
local_mount_point
:
str
remote_mount_point
:
str
local_mounted
:
str
@
dataclass
(
init
=
False
)
class
ExperimentConfig
(
ConfigBase
):
...
...
@@ -53,19 +59,21 @@ class ExperimentConfig(ConfigBase):
trial_command
:
str
trial_code_directory
:
PathLike
=
'.'
trial_concurrency
:
int
trial_gpu_number
:
Optional
[
int
]
=
None
trial_gpu_number
:
Optional
[
int
]
=
None
# TODO: in openpai cannot be None
max_experiment_duration
:
Optional
[
str
]
=
None
max_trial_number
:
Optional
[
int
]
=
None
nni_manager_ip
:
Optional
[
str
]
=
None
use_annotation
:
bool
=
False
debug
:
bool
=
False
log_level
:
Optional
[
str
]
=
None
experiment_working_directory
:
Optional
[
PathLike
]
=
None
experiment_working_directory
:
PathLike
=
'~/nni-experiments'
tuner_gpu_indices
:
Optional
[
Union
[
List
[
int
],
str
]]
=
None
tuner
:
Optional
[
_AlgorithmConfig
]
=
None
assessor
:
Optional
[
_AlgorithmConfig
]
=
None
advisor
:
Optional
[
_AlgorithmConfig
]
=
None
training_service
:
Union
[
TrainingServiceConfig
,
List
[
TrainingServiceConfig
]]
shared_storage
:
Optional
[
SharedStorageConfig
]
=
None
_deprecated
:
Optional
[
Dict
[
str
,
Any
]]
=
None
def
__init__
(
self
,
training_service_platform
:
Optional
[
Union
[
str
,
List
[
str
]]]
=
None
,
**
kwargs
):
base_path
=
kwargs
.
pop
(
'_base_path'
,
None
)
...
...
@@ -100,6 +108,12 @@ class ExperimentConfig(ConfigBase):
if
self
.
training_service
.
use_active_gpu
is
None
:
raise
ValueError
(
'Please set "use_active_gpu"'
)
def
json
(
self
)
->
Dict
[
str
,
Any
]:
obj
=
super
().
json
()
if
obj
.
get
(
'searchSpaceFile'
):
obj
[
'searchSpace'
]
=
YAML
().
load
(
open
(
obj
.
pop
(
'searchSpaceFile'
)))
return
obj
## End of public API ##
@
property
...
...
@@ -117,9 +131,9 @@ _canonical_rules = {
'max_experiment_duration'
:
lambda
value
:
f
'
{
util
.
parse_time
(
value
)
}
s'
if
value
is
not
None
else
None
,
'experiment_working_directory'
:
util
.
canonical_path
,
'tuner_gpu_indices'
:
lambda
value
:
[
int
(
idx
)
for
idx
in
value
.
split
(
','
)]
if
isinstance
(
value
,
str
)
else
value
,
'tuner'
:
lambda
config
:
None
if
config
is
None
or
config
.
name
==
'_none_'
else
config
,
'assessor'
:
lambda
config
:
None
if
config
is
None
or
config
.
name
==
'_none_'
else
config
,
'advisor'
:
lambda
config
:
None
if
config
is
None
or
config
.
name
==
'_none_'
else
config
,
'tuner'
:
lambda
config
:
None
if
config
is
None
or
config
.
name
==
'_none_'
else
config
.
canonical
()
,
'assessor'
:
lambda
config
:
None
if
config
is
None
or
config
.
name
==
'_none_'
else
config
.
canonical
()
,
'advisor'
:
lambda
config
:
None
if
config
is
None
or
config
.
name
==
'_none_'
else
config
.
canonical
()
,
}
_validation_rules
=
{
...
...
nni/experiment/config/convert.py
View file @
817ec68b
This diff is collapsed.
Click to expand it.
nni/experiment/config/kubeflow.py
View file @
817ec68b
...
...
@@ -56,7 +56,7 @@ class KubeflowConfig(TrainingServiceConfig):
parameter_server
:
Optional
[
KubeflowRoleConfig
]
=
None
def
__init__
(
self
,
**
kwargs
):
kwargs
=
util
.
case_insensitve
(
kwargs
)
kwargs
=
util
.
case_insensit
i
ve
(
kwargs
)
kwargs
[
'storage'
]
=
util
.
load_config
(
_KubeflowStorageConfig
,
kwargs
.
get
(
'storage'
))
kwargs
[
'worker'
]
=
util
.
load_config
(
KubeflowRoleConfig
,
kwargs
.
get
(
'worker'
))
kwargs
[
'parameterserver'
]
=
util
.
load_config
(
KubeflowRoleConfig
,
kwargs
.
get
(
'parameterserver'
))
...
...
nni/experiment/config/openpai.py
View file @
817ec68b
...
...
@@ -23,7 +23,7 @@ class OpenpaiConfig(TrainingServiceConfig):
docker_image
:
str
=
'msranni/nni:latest'
local_storage_mount_point
:
PathLike
container_storage_mount_point
:
str
reuse_mode
:
bool
=
Fals
e
reuse_mode
:
bool
=
Tru
e
openpai_config
:
Optional
[
Dict
[
str
,
Any
]]
=
None
openpai_config_file
:
Optional
[
PathLike
]
=
None
...
...
nni/experiment/config/remote.py
View file @
817ec68b
...
...
@@ -46,7 +46,7 @@ class RemoteMachineConfig(ConfigBase):
@
dataclass
(
init
=
False
)
class
RemoteConfig
(
TrainingServiceConfig
):
platform
:
str
=
'remote'
reuse_mode
:
bool
=
Fals
e
reuse_mode
:
bool
=
Tru
e
machine_list
:
List
[
RemoteMachineConfig
]
def
__init__
(
self
,
**
kwargs
):
...
...
nni/experiment/config/shared_storage.py
0 → 100644
View file @
817ec68b
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from
dataclasses
import
dataclass
from
typing
import
Optional
from
.common
import
SharedStorageConfig
__all__
=
[
'NfsConfig'
,
'AzureBlobConfig'
]
@
dataclass
(
init
=
False
)
class
NfsConfig
(
SharedStorageConfig
):
storage_type
:
str
=
'NFS'
nfs_server
:
str
exported_directory
:
str
@
dataclass
(
init
=
False
)
class
AzureBlobConfig
(
SharedStorageConfig
):
storage_type
:
str
=
'AzureBlob'
storage_account_name
:
str
storage_account_key
:
Optional
[
str
]
=
None
resource_group_name
:
Optional
[
str
]
=
None
container_name
:
str
nni/experiment/config/util.py
View file @
817ec68b
...
...
@@ -19,7 +19,7 @@ def case_insensitive(key_or_kwargs: Union[str, Dict[str, Any]]) -> Union[str, Di
return
{
key
.
lower
().
replace
(
'_'
,
''
):
value
for
key
,
value
in
key_or_kwargs
.
items
()}
def
camel_case
(
key
:
str
)
->
str
:
words
=
key
.
split
(
'_'
)
words
=
key
.
strip
(
'_'
).
split
(
'_'
)
return
words
[
0
]
+
''
.
join
(
word
.
title
()
for
word
in
words
[
1
:])
def
canonical_path
(
path
:
Optional
[
PathLike
])
->
Optional
[
str
]:
...
...
nni/experiment/launcher.py
View file @
817ec68b
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import
contextlib
import
logging
from
pathlib
import
Path
...
...
@@ -13,7 +16,6 @@ import nni_node # pylint: disable=import-error
import
nni.runtime.protocol
from
.config
import
ExperimentConfig
from
.config
import
convert
from
.pipe
import
Pipe
from
.
import
rest
from
..tools.nnictl.config_utils
import
Experiments
...
...
@@ -40,7 +42,7 @@ def start_experiment(exp_id: str, config: ExperimentConfig, port: int, debug: bo
_save_experiment_information
(
exp_id
,
port
,
start_time
,
platform
,
config
.
experiment_name
,
proc
.
pid
,
config
.
experiment_working_directory
)
_logger
.
info
(
'Setting up...'
)
_init_
experiment
(
config
,
port
,
debug
)
rest
.
post
(
port
,
'/
experiment
'
,
config
.
json
()
)
return
proc
except
Exception
as
e
:
...
...
@@ -75,7 +77,7 @@ def start_experiment_retiarii(exp_id: str, config: ExperimentConfig, port: int,
_save_experiment_information
(
exp_id
,
port
,
start_time
,
platform
,
config
.
experiment_name
,
proc
.
pid
,
config
.
experiment_working_directory
)
_logger
.
info
(
'Setting up...'
)
_init_
experiment
(
config
,
port
,
debug
)
rest
.
post
(
port
,
'/
experiment
'
,
config
.
json
()
)
return
proc
,
pipe
except
Exception
as
e
:
...
...
@@ -145,12 +147,6 @@ def _check_rest_server(port: int, retry: int = 3) -> None:
rest
.
get
(
port
,
'/check-status'
)
def
_init_experiment
(
config
:
ExperimentConfig
,
port
:
int
,
debug
:
bool
)
->
None
:
for
cluster_metadata
in
convert
.
to_cluster_metadata
(
config
):
rest
.
put
(
port
,
'/experiment/cluster-metadata'
,
cluster_metadata
)
rest
.
post
(
port
,
'/experiment'
,
convert
.
to_rest_json
(
config
))
def
_save_experiment_information
(
experiment_id
:
str
,
port
:
int
,
start_time
:
int
,
platform
:
str
,
name
:
str
,
pid
:
int
,
logDir
:
str
)
->
None
:
experiments_config
=
Experiments
()
experiments_config
.
add_experiment
(
experiment_id
,
port
,
start_time
,
platform
,
name
,
pid
=
pid
,
logDir
=
logDir
)
nni/tools/nnictl/algo_management.py
View file @
817ec68b
...
...
@@ -35,11 +35,17 @@ def verify_algo_import(meta):
def
algo_reg
(
args
):
meta_list
=
read_reg_meta_list
(
args
.
meta_path
)
for
meta
in
meta_list
:
if
get_registered_algo_meta
(
meta
[
'builtinName'
])
is
not
None
:
print_error
(
'builtinName {} already registered'
.
format
(
meta
[
'builtinName'
]))
return
verify_algo_import
(
meta
)
save_algo_meta_data
(
meta
)
old
=
get_registered_algo_meta
(
meta
[
'builtinName'
])
if
old
is
None
:
verify_algo_import
(
meta
)
save_algo_meta_data
(
meta
)
elif
old
[
'source'
]
!=
'nni'
:
verify_algo_import
(
meta
)
print_green
(
f
'Updating exist algorithm'
)
remove_algo_meta_data
(
meta
[
'builtinName'
])
save_algo_meta_data
(
meta
)
else
:
print_error
(
f
'Cannot overwrite builtin algorithm'
)
print_green
(
'{} registered sucessfully!'
.
format
(
meta
[
'builtinName'
]))
def
algo_unreg
(
args
):
...
...
nni/tools/nnictl/launcher.py
View file @
817ec68b
This diff is collapsed.
Click to expand it.
nni/tools/nnictl/launcher_utils.py
View file @
817ec68b
...
...
@@ -124,4 +124,5 @@ def validate_all_content(experiment_config, config_path):
NNIConfigSchema
().
validate
(
experiment_config
)
experiment_config
[
'maxExecDuration'
]
=
parse_time
(
experiment_config
[
'maxExecDuration'
])
if
'maxExecDuration'
in
experiment_config
:
experiment_config
[
'maxExecDuration'
]
=
parse_time
(
experiment_config
[
'maxExecDuration'
])
nni/tools/package_utils/__init__.py
View file @
817ec68b
...
...
@@ -178,25 +178,24 @@ def create_customized_class_instance(class_params):
----------
class_params: dict
class_params should contains following keys:
codeDir: code directory
classFileName: python file name of the class
className: class name
codeDirectory: code directory
className: qualified class name
classArgs (optional): kwargs pass to class constructor
Returns: object
-------
Returns customized class instance.
"""
code_dir
=
class_params
.
get
(
'codeDir'
)
class_filename
=
class_params
.
get
(
'classFileName'
)
class_name
=
class_params
.
get
(
'className'
)
code_dir
=
class_params
.
get
(
'classDirectory'
)
qualified_class_name
=
class_params
.
get
(
'className'
)
class_args
=
class_params
.
get
(
'classArgs'
)
if
not
os
.
path
.
is
file
(
os
.
path
.
join
(
code_dir
,
class_filename
)
):
raise
ValueError
(
'Class file
not found: {
}'
.
format
(
os
.
path
.
join
(
code_dir
,
class_filename
)))
if
code_dir
and
not
os
.
path
.
is
dir
(
code_dir
):
raise
ValueError
(
f
'Directory
not found:
{
code_dir
}
'
)
sys
.
path
.
append
(
code_dir
)
module_name
=
os
.
path
.
splitext
(
class_filename
)[
0
]
module_name
,
class_name
=
qualified_class_name
.
rsplit
(
'.'
,
1
)
class_module
=
importlib
.
import_module
(
module_name
)
class_constructor
=
getattr
(
class_module
,
class_name
)
...
...
test/config/pr_tests.yml
View file @
817ec68b
...
...
@@ -45,13 +45,6 @@ testCases:
-
name
:
multi-thread
configFile
:
test/config/multi_thread/config.yml
-
name
:
multi-phase-batch
configFile
:
test/config/multi_phase/batch.yml
config
:
# for batch tuner, maxTrialNum can not exceed length of search space
maxTrialNum
:
2
trialConcurrency
:
2
#########################################################################
# nni assessor test
#########################################################################
...
...
ts/nni_manager/.eslintrc
View file @
817ec68b
...
...
@@ -30,7 +30,8 @@
"argsIgnorePattern": "^_"
}
],
"@typescript-eslint/no-var-requires": 0
"@typescript-eslint/no-var-requires": 0,
"@typescript-eslint/no-non-null-assertion": 0
},
"ignorePatterns": [
"node_modules/",
...
...
ts/nni_manager/common/experimentConfig.ts
0 → 100644
View file @
817ec68b
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
'
use strict
'
;
import
*
as
assert
from
'
assert
'
;
export
interface
TrainingServiceConfig
{
platform
:
string
;
}
/* Local */
export
interface
LocalConfig
extends
TrainingServiceConfig
{
platform
:
'
local
'
;
useActiveGpu
?:
boolean
;
maxTrialNumberPerGpu
:
number
;
gpuIndices
?:
number
[];
}
/* Remote */
export
interface
RemoteMachineConfig
{
host
:
string
;
port
:
number
;
user
:
string
;
password
?:
string
;
sshKeyFile
:
string
;
sshPassphrase
?:
string
;
useActiveGpu
:
boolean
;
maxTrialNumberPerGpu
:
number
;
gpuIndices
?:
number
[];
pythonPath
?:
string
;
}
export
interface
RemoteConfig
extends
TrainingServiceConfig
{
platform
:
'
remote
'
;
reuseMode
:
boolean
;
machineList
:
RemoteMachineConfig
[];
}
/* OpenPAI */
export
interface
OpenpaiConfig
extends
TrainingServiceConfig
{
platform
:
'
openpai
'
;
host
:
string
;
username
:
string
;
token
:
string
;
trialCpuNumber
:
number
;
trialMemorySize
:
string
;
storageConfigName
:
string
;
dockerImage
:
string
;
localStorageMountPoint
:
string
;
containerStorageMountPoint
:
string
;
reuseMode
:
boolean
;
openpaiConfig
?:
object
;
}
/* AML */
export
interface
AmlConfig
extends
TrainingServiceConfig
{
platform
:
'
aml
'
;
subscriptionId
:
string
;
resourceGroup
:
string
;
workspaceName
:
string
;
computeTarget
:
string
;
dockerImage
:
string
;
}
/* Kubeflow */
// FIXME: merge with shared storage config
export
interface
KubeflowStorageConfig
{
storage
:
string
;
server
?:
string
;
path
?:
string
;
azureAccount
?:
string
;
azureShare
?:
string
;
keyVault
?:
string
;
keyVaultSecret
?:
string
;
}
export
interface
KubeflowRoleConfig
{
replicas
:
number
;
command
:
string
;
gpuNumber
:
number
;
cpuNumber
:
number
;
memorySize
:
string
;
dockerImage
:
string
;
}
export
interface
KubeflowConfig
extends
TrainingServiceConfig
{
platform
:
'
kubeflow
'
;
operator
:
string
;
apiVersion
:
string
;
storage
:
KubeflowStorageConfig
;
worker
:
KubeflowRoleConfig
;
parameterServer
?:
KubeflowRoleConfig
;
}
/* FrameworkController */
type
FrameworkControllerStorageConfig
=
KubeflowStorageConfig
;
export
interface
FrameworkControllerRoleConfig
{
name
:
string
;
dockerImage
:
string
;
taskNumber
:
number
;
command
:
string
;
gpuNumber
:
number
;
cpuNumber
:
number
;
memorySize
:
string
;
attemptCompletionMinFailedTasks
:
number
;
attemptCompletionMinSucceededTasks
:
number
;
}
export
interface
FrameworkControllerConfig
extends
TrainingServiceConfig
{
platform
:
'
frameworkcontroller
'
;
serviceAccountName
:
string
;
storage
:
FrameworkControllerStorageConfig
;
taskRoles
:
FrameworkControllerRoleConfig
[];
}
/* shared storage */
export
interface
SharedStorageConfig
{
storageType
:
string
;
localMountPoint
:
string
;
remoteMountPoint
:
string
;
localMounted
:
string
;
}
export
interface
NfsConfig
extends
SharedStorageConfig
{
storageType
:
'
NFS
'
;
nfsServer
:
string
;
exportedDirectory
:
string
;
}
export
interface
AzureBlobConfig
extends
SharedStorageConfig
{
storageAccountName
:
string
;
storageAccountKey
?:
string
;
resourceGroupName
?:
string
;
containerName
:
string
;
}
/* common */
export
interface
AlgorithmConfig
{
name
?:
string
;
className
?:
string
;
codeDirectory
?:
string
;
classArgs
?:
object
;
}
export
interface
ExperimentConfig
{
experimentName
?:
string
;
searchSpace
:
any
;
trialCommand
:
string
;
trialCodeDirectory
:
string
;
trialConcurrency
:
number
;
trialGpuNumber
?:
number
;
maxExperimentDuration
?:
string
;
maxTrialNumber
?:
number
;
nniManagerIp
?:
string
;
//useAnnotation: boolean; // dealed inside nnictl
debug
:
boolean
;
logLevel
?:
string
;
experimentWorkingDirectory
?:
string
;
tunerGpuIndices
?:
number
[];
tuner
?:
AlgorithmConfig
;
assessor
?:
AlgorithmConfig
;
advisor
?:
AlgorithmConfig
;
trainingService
:
TrainingServiceConfig
|
TrainingServiceConfig
[];
sharedStorage
?:
SharedStorageConfig
;
deprecated
?:
any
;
// configs that are not yet natively supported by v2 (workaround)
}
/* util functions */
const
timeUnits
=
{
d
:
24
*
3600
,
h
:
3600
,
m
:
60
,
s
:
1
};
export
function
toSeconds
(
time
:
string
):
number
{
for
(
const
[
unit
,
factor
]
of
Object
.
entries
(
timeUnits
))
{
if
(
time
.
toLowerCase
().
endsWith
(
unit
))
{
const
digits
=
time
.
slice
(
0
,
-
1
);
return
Number
(
digits
)
*
factor
;
}
}
throw
new
Error
(
`Bad time string "
${
time
}
"`
);
}
const
sizeUnits
=
{
tb
:
1024
*
1024
,
gb
:
1024
*
1024
,
mb
:
1
,
kb
:
1
/
1024
};
export
function
toMegaBytes
(
size
:
string
):
number
{
for
(
const
[
unit
,
factor
]
of
Object
.
entries
(
sizeUnits
))
{
if
(
size
.
toLowerCase
().
endsWith
(
unit
))
{
const
digits
=
size
.
slice
(
0
,
-
2
);
return
Math
.
floor
(
Number
(
digits
)
*
factor
);
}
}
throw
new
Error
(
`Bad size string "
${
size
}
"`
);
}
export
function
toCudaVisibleDevices
(
gpuIndices
?:
number
[]):
string
{
return
gpuIndices
===
undefined
?
''
:
gpuIndices
.
join
(
'
,
'
);
}
export
function
flattenConfig
<
T
>
(
config
:
ExperimentConfig
,
platform
:
string
):
T
{
const
flattened
=
{
};
Object
.
assign
(
flattened
,
config
);
if
(
Array
.
isArray
(
config
.
trainingService
))
{
for
(
const
trainingService
of
config
.
trainingService
)
{
if
(
trainingService
.
platform
===
platform
)
{
Object
.
assign
(
flattened
,
trainingService
);
}
}
}
else
{
assert
(
config
.
trainingService
.
platform
===
platform
);
Object
.
assign
(
flattened
,
config
.
trainingService
);
}
return
<
T
>
flattened
;
}
ts/nni_manager/common/log.ts
View file @
817ec68b
...
...
@@ -17,8 +17,14 @@ const INFO: number = 4;
const
DEBUG
:
number
=
5
;
const
TRACE
:
number
=
6
;
const
logLevelNameMap
:
Map
<
string
,
number
>
=
new
Map
([[
'
fatal
'
,
FATAL
],
[
'
error
'
,
ERROR
],
[
'
warning
'
,
WARNING
],
[
'
info
'
,
INFO
],
[
'
debug
'
,
DEBUG
],
[
'
trace
'
,
TRACE
]]);
const
logLevelNameMap
:
Map
<
string
,
number
>
=
new
Map
([
[
'
fatal
'
,
FATAL
],
[
'
error
'
,
ERROR
],
[
'
warning
'
,
WARNING
],
[
'
info
'
,
INFO
],
[
'
debug
'
,
DEBUG
],
[
'
trace
'
,
TRACE
]
]);
class
BufferSerialEmitter
{
private
buffer
:
Buffer
;
...
...
ts/nni_manager/common/manager.ts
View file @
817ec68b
...
...
@@ -5,6 +5,7 @@
import
{
MetricDataRecord
,
MetricType
,
TrialJobInfo
}
from
'
./datastore
'
;
import
{
TrialJobStatus
,
LogType
}
from
'
./trainingService
'
;
import
{
ExperimentConfig
}
from
'
./experimentConfig
'
;
type
ProfileUpdateType
=
'
TRIAL_CONCURRENCY
'
|
'
MAX_EXEC_DURATION
'
|
'
SEARCH_SPACE
'
|
'
MAX_TRIAL_NUM
'
;
type
ExperimentStatus
=
'
INITIALIZED
'
|
'
RUNNING
'
|
'
ERROR
'
|
'
STOPPING
'
|
'
STOPPED
'
|
'
DONE
'
|
'
NO_MORE_TRIAL
'
|
'
TUNER_NO_MORE_TRIAL
'
;
...
...
@@ -13,58 +14,12 @@ namespace ExperimentStartUpMode {
export
const
RESUME
=
'
resume
'
;
}
interface
ExperimentParams
{
authorName
:
string
;
experimentName
:
string
;
description
?:
string
;
trialConcurrency
:
number
;
maxExecDuration
:
number
;
//seconds
maxTrialNum
:
number
;
searchSpace
:
string
;
trainingServicePlatform
:
string
;
multiPhase
?:
boolean
;
multiThread
?:
boolean
;
versionCheck
?:
boolean
;
logCollection
?:
string
;
tuner
?:
{
className
?:
string
;
builtinTunerName
?:
string
;
codeDir
?:
string
;
classArgs
?:
any
;
classFileName
?:
string
;
checkpointDir
:
string
;
includeIntermediateResults
?:
boolean
;
gpuIndices
?:
string
;
};
assessor
?:
{
className
?:
string
;
builtinAssessorName
?:
string
;
codeDir
?:
string
;
classArgs
?:
any
;
classFileName
?:
string
;
checkpointDir
:
string
;
};
advisor
?:
{
className
?:
string
;
builtinAdvisorName
?:
string
;
codeDir
?:
string
;
classArgs
?:
any
;
classFileName
?:
string
;
checkpointDir
:
string
;
gpuIndices
?:
string
;
};
clusterMetaData
?:
{
key
:
string
;
value
:
string
;
}[];
}
interface
ExperimentProfile
{
params
:
Experiment
Params
;
params
:
Experiment
Config
;
id
:
string
;
execDuration
:
number
;
logDir
?
:
string
;
startTime
?
:
number
;
logDir
:
string
;
startTime
:
number
;
endTime
?:
number
;
nextSequenceId
:
number
;
revision
:
number
;
...
...
@@ -81,7 +36,7 @@ interface NNIManagerStatus {
}
abstract
class
Manager
{
public
abstract
startExperiment
(
experiment
Params
:
Experiment
Params
):
Promise
<
string
>
;
public
abstract
startExperiment
(
experiment
Config
:
Experiment
Config
):
Promise
<
string
>
;
public
abstract
resumeExperiment
(
readonly
:
boolean
):
Promise
<
void
>
;
public
abstract
stopExperiment
():
Promise
<
void
>
;
public
abstract
stopExperimentTopHalf
():
Promise
<
void
>
;
...
...
@@ -113,4 +68,4 @@ abstract class Manager {
public
abstract
fetchTrialOutput
(
trialJobId
:
string
,
subpath
:
string
):
Promise
<
void
>
;
}
export
{
Manager
,
Experiment
Params
,
ExperimentProfile
,
TrialJobStatistics
,
ProfileUpdateType
,
NNIManagerStatus
,
ExperimentStatus
,
ExperimentStartUpMode
};
export
{
Manager
,
Experiment
Config
,
ExperimentProfile
,
TrialJobStatistics
,
ProfileUpdateType
,
NNIManagerStatus
,
ExperimentStatus
,
ExperimentStartUpMode
};
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment