Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
df6145a2
Commit
df6145a2
authored
Dec 16, 2020
by
Yuge Zhang
Browse files
Merge branch 'master' of
https://github.com/microsoft/nni
into dev-retiarii
parents
0f0c6288
f8424a9f
Changes
205
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
114 additions
and
28 deletions
+114
-28
nni/algorithms/hpo/pbt_tuner/__init__.py
nni/algorithms/hpo/pbt_tuner/__init__.py
+0
-0
nni/algorithms/hpo/ppo_tuner/__init__.py
nni/algorithms/hpo/ppo_tuner/__init__.py
+1
-1
nni/algorithms/hpo/ppo_tuner/requirements.txt
nni/algorithms/hpo/ppo_tuner/requirements.txt
+0
-2
nni/algorithms/hpo/regularized_evolution_tuner.py
nni/algorithms/hpo/regularized_evolution_tuner.py
+0
-0
nni/algorithms/hpo/regularized_evolution_tuner/__init__.py
nni/algorithms/hpo/regularized_evolution_tuner/__init__.py
+0
-1
nni/algorithms/hpo/smac_tuner/__init__.py
nni/algorithms/hpo/smac_tuner/__init__.py
+1
-1
nni/algorithms/hpo/smac_tuner/requirements.txt
nni/algorithms/hpo/smac_tuner/requirements.txt
+0
-2
nni/algorithms/nas/pytorch/cdarts/__init__.py
nni/algorithms/nas/pytorch/cdarts/__init__.py
+1
-1
nni/algorithms/nas/pytorch/random/__init__.py
nni/algorithms/nas/pytorch/random/__init__.py
+1
-1
nni/compression/pytorch/compressor.py
nni/compression/pytorch/compressor.py
+1
-1
nni/experiment/config/base.py
nni/experiment/config/base.py
+1
-1
nni/experiment/launcher.py
nni/experiment/launcher.py
+1
-1
nni/nas/pytorch/__init__.py
nni/nas/pytorch/__init__.py
+6
-0
nni/runtime/env_vars.py
nni/runtime/env_vars.py
+2
-1
nni/runtime/log.py
nni/runtime/log.py
+1
-1
nni/runtime/platform/__init__.py
nni/runtime/platform/__init__.py
+1
-1
nni/runtime/platform/local.py
nni/runtime/platform/local.py
+2
-1
nni/tools/nnictl/config_schema.py
nni/tools/nnictl/config_schema.py
+43
-4
nni/tools/nnictl/config_utils.py
nni/tools/nnictl/config_utils.py
+4
-1
nni/tools/nnictl/launcher.py
nni/tools/nnictl/launcher.py
+48
-7
No files found.
nni/algorithms/hpo/pbt_tuner/__init__.py
deleted
100644 → 0
View file @
0f0c6288
nni/algorithms/hpo/ppo_tuner/__init__.py
View file @
df6145a2
from
.ppo_tuner
import
PPOTuner
from
.ppo_tuner
import
PPOTuner
,
PPOClassArgsValidator
nni/algorithms/hpo/ppo_tuner/requirements.txt
deleted
100644 → 0
View file @
0f0c6288
enum34
gym
nni/algorithms/hpo/regularized_evolution_tuner
/regularized_evolution_tuner
.py
→
nni/algorithms/hpo/regularized_evolution_tuner.py
View file @
df6145a2
File moved
nni/algorithms/hpo/regularized_evolution_tuner/__init__.py
deleted
100644 → 0
View file @
0f0c6288
from
.regularized_evolution_tuner
import
RegularizedEvolutionTuner
nni/algorithms/hpo/smac_tuner/__init__.py
View file @
df6145a2
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from
.smac_tuner
import
SMACTuner
from
.smac_tuner
import
SMACTuner
,
SMACClassArgsValidator
nni/algorithms/hpo/smac_tuner/requirements.txt
deleted
100644 → 0
View file @
0f0c6288
git+https://github.com/QuanluZhang/ConfigSpace.git
git+https://github.com/QuanluZhang/SMAC3.git
nni/algorithms/nas/pytorch/cdarts/__init__.py
View file @
df6145a2
...
...
@@ -2,4 +2,4 @@
# Licensed under the MIT license.
from
.mutator
import
RegularizedDartsMutator
,
RegularizedMutatorParallel
,
DartsDiscreteMutator
from
.trainer
import
CdartsTrainer
\ No newline at end of file
from
.trainer
import
CdartsTrainer
nni/algorithms/nas/pytorch/random/__init__.py
View file @
df6145a2
from
.mutator
import
RandomMutator
\ No newline at end of file
from
.mutator
import
RandomMutator
nni/compression/pytorch/compressor.py
View file @
df6145a2
...
...
@@ -662,7 +662,7 @@ class QuantGrad(torch.autograd.Function):
if
quant_type
==
QuantType
.
QUANT_INPUT
:
output
=
wrapper
.
quantizer
.
quantize_input
(
tensor
,
wrapper
,
**
kwargs
)
elif
quant_type
==
QuantType
.
QUANT_WEIGHT
:
output
=
wrapper
.
quantizer
.
quantize_weight
(
wrapper
,
**
kwargs
)
output
=
wrapper
.
quantizer
.
quantize_weight
(
wrapper
,
**
kwargs
)
elif
quant_type
==
QuantType
.
QUANT_OUTPUT
:
output
=
wrapper
.
quantizer
.
quantize_output
(
tensor
,
wrapper
,
**
kwargs
)
else
:
...
...
nni/experiment/config/base.py
View file @
df6145a2
...
...
@@ -87,7 +87,7 @@ class ConfigBase:
"""
return
dataclasses
.
asdict
(
self
.
canonical
(),
dict_factory
=
lambda
items
:
dict
((
util
.
camel_case
(
k
),
v
)
for
k
,
v
in
items
if
v
is
not
None
)
dict_factory
=
lambda
items
:
dict
((
util
.
camel_case
(
k
),
v
)
for
k
,
v
in
items
if
v
is
not
None
)
)
def
canonical
(
self
:
T
)
->
T
:
...
...
nni/experiment/launcher.py
View file @
df6145a2
...
...
@@ -32,7 +32,7 @@ def start_experiment(config: ExperimentConfig, port: int, debug: bool) -> Tuple[
exp_id
=
management
.
generate_experiment_id
()
try
:
_logger
.
info
(
f
'Creating experiment
{
colorama
.
Fore
.
CYAN
}{
exp_id
}
'
)
_logger
.
info
(
'Creating experiment
%s%s'
,
colorama
.
Fore
.
CYAN
,
exp_id
)
pipe
=
Pipe
(
exp_id
)
proc
=
_start_rest_server
(
config
,
port
,
debug
,
exp_id
,
pipe
.
path
)
_logger
.
info
(
'Connecting IPC pipe...'
)
...
...
nni/nas/pytorch/__init__.py
View file @
df6145a2
from
.base_mutator
import
BaseMutator
from
.base_trainer
import
BaseTrainer
from
.fixed
import
apply_fixed_architecture
from
.mutables
import
Mutable
,
LayerChoice
,
InputChoice
from
.mutator
import
Mutator
from
.trainer
import
Trainer
nni/runtime/env_vars.py
View file @
df6145a2
...
...
@@ -12,7 +12,8 @@ _trial_env_var_names = [
'NNI_SYS_DIR'
,
'NNI_OUTPUT_DIR'
,
'NNI_TRIAL_SEQ_ID'
,
'MULTI_PHASE'
'MULTI_PHASE'
,
'REUSE_MODE'
]
_dispatcher_env_var_names
=
[
...
...
nni/runtime/log.py
View file @
df6145a2
...
...
@@ -31,7 +31,7 @@ def init_logger() -> None:
if
trial_platform
==
'unittest'
:
return
if
trial_platform
:
if
trial_platform
and
not
trial_env_vars
.
REUSE_MODE
:
_init_logger_trial
()
return
...
...
nni/runtime/platform/__init__.py
View file @
df6145a2
...
...
@@ -9,7 +9,7 @@ if trial_env_vars.NNI_PLATFORM is None:
from
.standalone
import
*
elif
trial_env_vars
.
NNI_PLATFORM
==
'unittest'
:
from
.test
import
*
elif
trial_env_vars
.
NNI_PLATFORM
in
(
'adl'
,
'local'
,
'remote'
,
'pai'
,
'kubeflow'
,
'frameworkcontroller'
,
'paiYarn'
,
'dlts'
,
'aml'
):
elif
trial_env_vars
.
NNI_PLATFORM
in
(
'local'
,
'remote'
,
'pai'
,
'kubeflow'
,
'frameworkcontroller'
,
'paiYarn'
,
'dlts'
,
'aml'
,
'adl'
,
'heterogeneous'
):
from
.local
import
*
else
:
raise
RuntimeError
(
'Unknown platform %s'
%
trial_env_vars
.
NNI_PLATFORM
)
nni/runtime/platform/local.py
View file @
df6145a2
...
...
@@ -19,6 +19,7 @@ _outputdir = trial_env_vars.NNI_OUTPUT_DIR
if
not
os
.
path
.
exists
(
_outputdir
):
os
.
makedirs
(
_outputdir
)
_reuse_mode
=
trial_env_vars
.
REUSE_MODE
_nni_platform
=
trial_env_vars
.
NNI_PLATFORM
_multiphase
=
trial_env_vars
.
MULTI_PHASE
...
...
@@ -58,7 +59,7 @@ def get_next_parameter():
return
params
def
send_metric
(
string
):
if
_nni_platform
!=
'local'
:
if
_nni_platform
!=
'local'
or
_reuse_mode
in
(
'true'
,
'True'
)
:
assert
len
(
string
)
<
1000000
,
'Metric too long'
print
(
"NNISDK_MEb'%s'"
%
(
string
),
flush
=
True
)
else
:
...
...
nni/tools/nnictl/config_schema.py
View file @
df6145a2
...
...
@@ -124,7 +124,7 @@ common_schema = {
Optional
(
'maxExecDuration'
):
And
(
Regex
(
r
'^[1-9][0-9]*[s|m|h|d]$'
,
error
=
'ERROR: maxExecDuration format is [digit]{s,m,h,d}'
)),
Optional
(
'maxTrialNum'
):
setNumberRange
(
'maxTrialNum'
,
int
,
1
,
99999
),
'trainingServicePlatform'
:
setChoice
(
'trainingServicePlatform'
,
'adl'
,
'remote'
,
'local'
,
'pai'
,
'kubeflow'
,
'frameworkcontroller'
,
'paiYarn'
,
'dlts'
,
'aml'
),
'trainingServicePlatform'
,
'remote'
,
'local'
,
'pai'
,
'kubeflow'
,
'frameworkcontroller'
,
'paiYarn'
,
'dlts'
,
'aml'
,
'adl'
,
'heterogeneous'
),
Optional
(
'searchSpacePath'
):
And
(
os
.
path
.
exists
,
error
=
SCHEMA_PATH_ERROR
%
'searchSpacePath'
),
Optional
(
'multiPhase'
):
setType
(
'multiPhase'
,
bool
),
Optional
(
'multiThread'
):
setType
(
'multiThread'
,
bool
),
...
...
@@ -208,7 +208,7 @@ pai_trial_schema = {
}
pai_config_schema
=
{
'paiConfig'
:
{
Optional
(
'paiConfig'
)
:
{
'userName'
:
setType
(
'userName'
,
str
),
Or
(
'passWord'
,
'token'
,
only_one
=
True
):
str
,
'host'
:
setType
(
'host'
,
str
),
...
...
@@ -252,7 +252,7 @@ aml_trial_schema = {
}
aml_config_schema
=
{
'amlConfig'
:
{
Optional
(
'amlConfig'
)
:
{
'subscriptionId'
:
setType
(
'subscriptionId'
,
str
),
'resourceGroup'
:
setType
(
'resourceGroup'
,
str
),
'workspaceName'
:
setType
(
'workspaceName'
,
str
),
...
...
@@ -262,6 +262,29 @@ aml_config_schema = {
}
}
heterogeneous_trial_schema
=
{
'trial'
:
{
'codeDir'
:
setPathCheck
(
'codeDir'
),
Optional
(
'nniManagerNFSMountPath'
):
setPathCheck
(
'nniManagerNFSMountPath'
),
Optional
(
'containerNFSMountPath'
):
setType
(
'containerNFSMountPath'
,
str
),
Optional
(
'nasMode'
):
setChoice
(
'nasMode'
,
'classic_mode'
,
'enas_mode'
,
'oneshot_mode'
,
'darts_mode'
),
'command'
:
setType
(
'command'
,
str
),
Optional
(
'gpuNum'
):
setNumberRange
(
'gpuNum'
,
int
,
0
,
99999
),
Optional
(
'cpuNum'
):
setNumberRange
(
'cpuNum'
,
int
,
0
,
99999
),
Optional
(
'memoryMB'
):
setType
(
'memoryMB'
,
int
),
Optional
(
'image'
):
setType
(
'image'
,
str
),
Optional
(
'virtualCluster'
):
setType
(
'virtualCluster'
,
str
),
Optional
(
'paiStorageConfigName'
):
setType
(
'paiStorageConfigName'
,
str
),
Optional
(
'paiConfigPath'
):
And
(
os
.
path
.
exists
,
error
=
SCHEMA_PATH_ERROR
%
'paiConfigPath'
)
}
}
heterogeneous_config_schema
=
{
'heterogeneousConfig'
:
{
'trainingServicePlatforms'
:
[
'local'
,
'remote'
,
'pai'
,
'aml'
]
}
}
adl_trial_schema
=
{
'trial'
:{
'codeDir'
:
setType
(
'codeDir'
,
str
),
...
...
@@ -404,7 +427,7 @@ remote_config_schema = {
}
machine_list_schema
=
{
'machineList'
:
[
Or
(
Optional
(
'machineList'
)
:
[
Or
(
{
'ip'
:
setType
(
'ip'
,
str
),
Optional
(
'port'
):
setNumberRange
(
'port'
,
int
,
1
,
65535
),
...
...
@@ -438,6 +461,8 @@ training_service_schema_dict = {
'frameworkcontroller'
:
Schema
({
**
common_schema
,
**
frameworkcontroller_trial_schema
,
**
frameworkcontroller_config_schema
}),
'aml'
:
Schema
({
**
common_schema
,
**
aml_trial_schema
,
**
aml_config_schema
}),
'dlts'
:
Schema
({
**
common_schema
,
**
dlts_trial_schema
,
**
dlts_config_schema
}),
'heterogeneous'
:
Schema
({
**
common_schema
,
**
heterogeneous_trial_schema
,
**
heterogeneous_config_schema
,
**
machine_list_schema
,
**
pai_config_schema
,
**
aml_config_schema
,
**
remote_config_schema
}),
}
...
...
@@ -454,6 +479,7 @@ class NNIConfigSchema:
self
.
validate_pai_trial_conifg
(
experiment_config
)
self
.
validate_kubeflow_operators
(
experiment_config
)
self
.
validate_eth0_device
(
experiment_config
)
self
.
validate_heterogeneous_platforms
(
experiment_config
)
def
validate_tuner_adivosr_assessor
(
self
,
experiment_config
):
if
experiment_config
.
get
(
'advisor'
):
...
...
@@ -563,3 +589,16 @@ class NNIConfigSchema:
and
not
experiment_config
.
get
(
'nniManagerIp'
)
\
and
'eth0'
not
in
netifaces
.
interfaces
():
raise
SchemaError
(
'This machine does not contain eth0 network device, please set nniManagerIp in config file!'
)
def
validate_heterogeneous_platforms
(
self
,
experiment_config
):
required_config_name_map
=
{
'remote'
:
'machineList'
,
'aml'
:
'amlConfig'
,
'pai'
:
'paiConfig'
}
if
experiment_config
.
get
(
'trainingServicePlatform'
)
==
'heterogeneous'
:
for
platform
in
experiment_config
[
'heterogeneousConfig'
][
'trainingServicePlatforms'
]:
config_name
=
required_config_name_map
.
get
(
platform
)
if
config_name
and
not
experiment_config
.
get
(
config_name
):
raise
SchemaError
(
'Need to set {0} for {1} in heterogeneous mode!'
.
format
(
config_name
,
platform
))
\ No newline at end of file
nni/tools/nnictl/config_utils.py
View file @
df6145a2
...
...
@@ -85,7 +85,10 @@ class Experiments:
self
.
experiments
=
self
.
read_file
()
if
expId
not
in
self
.
experiments
:
return
False
self
.
experiments
[
expId
][
key
]
=
value
if
value
is
None
:
self
.
experiments
[
expId
].
pop
(
key
,
None
)
else
:
self
.
experiments
[
expId
][
key
]
=
value
self
.
write_file
()
return
True
...
...
nni/tools/nnictl/launcher.py
View file @
df6145a2
...
...
@@ -118,13 +118,6 @@ def set_local_config(experiment_config, port, config_file_name):
request_data
=
dict
()
if
experiment_config
.
get
(
'localConfig'
):
request_data
[
'local_config'
]
=
experiment_config
[
'localConfig'
]
if
request_data
[
'local_config'
]:
if
request_data
[
'local_config'
].
get
(
'gpuIndices'
)
and
isinstance
(
request_data
[
'local_config'
].
get
(
'gpuIndices'
),
int
):
request_data
[
'local_config'
][
'gpuIndices'
]
=
str
(
request_data
[
'local_config'
].
get
(
'gpuIndices'
))
if
request_data
[
'local_config'
].
get
(
'maxTrialNumOnEachGpu'
):
request_data
[
'local_config'
][
'maxTrialNumOnEachGpu'
]
=
request_data
[
'local_config'
].
get
(
'maxTrialNumOnEachGpu'
)
if
request_data
[
'local_config'
].
get
(
'useActiveGpu'
):
request_data
[
'local_config'
][
'useActiveGpu'
]
=
request_data
[
'local_config'
].
get
(
'useActiveGpu'
)
response
=
rest_put
(
cluster_metadata_url
(
port
),
json
.
dumps
(
request_data
),
REST_TIME_OUT
)
err_message
=
''
if
not
response
or
not
check_response
(
response
):
...
...
@@ -306,6 +299,37 @@ def set_aml_config(experiment_config, port, config_file_name):
#set trial_config
return
set_trial_config
(
experiment_config
,
port
,
config_file_name
),
err_message
def
set_heterogeneous_config
(
experiment_config
,
port
,
config_file_name
):
'''set heterogeneous configuration'''
heterogeneous_config_data
=
dict
()
heterogeneous_config_data
[
'heterogeneous_config'
]
=
experiment_config
[
'heterogeneousConfig'
]
platform_list
=
experiment_config
[
'heterogeneousConfig'
][
'trainingServicePlatforms'
]
for
platform
in
platform_list
:
if
platform
==
'aml'
:
heterogeneous_config_data
[
'aml_config'
]
=
experiment_config
[
'amlConfig'
]
elif
platform
==
'remote'
:
if
experiment_config
.
get
(
'remoteConfig'
):
heterogeneous_config_data
[
'remote_config'
]
=
experiment_config
[
'remoteConfig'
]
heterogeneous_config_data
[
'machine_list'
]
=
experiment_config
[
'machineList'
]
elif
platform
==
'local'
and
experiment_config
.
get
(
'localConfig'
):
heterogeneous_config_data
[
'local_config'
]
=
experiment_config
[
'localConfig'
]
elif
platform
==
'pai'
:
heterogeneous_config_data
[
'pai_config'
]
=
experiment_config
[
'paiConfig'
]
response
=
rest_put
(
cluster_metadata_url
(
port
),
json
.
dumps
(
heterogeneous_config_data
),
REST_TIME_OUT
)
err_message
=
None
if
not
response
or
not
response
.
status_code
==
200
:
if
response
is
not
None
:
err_message
=
response
.
text
_
,
stderr_full_path
=
get_log_path
(
config_file_name
)
with
open
(
stderr_full_path
,
'a+'
)
as
fout
:
fout
.
write
(
json
.
dumps
(
json
.
loads
(
err_message
),
indent
=
4
,
sort_keys
=
True
,
separators
=
(
','
,
':'
)))
return
False
,
err_message
result
,
message
=
setNNIManagerIp
(
experiment_config
,
port
,
config_file_name
)
if
not
result
:
return
result
,
message
#set trial_config
return
set_trial_config
(
experiment_config
,
port
,
config_file_name
),
err_message
def
set_experiment
(
experiment_config
,
mode
,
port
,
config_file_name
):
'''Call startExperiment (rest POST /experiment) with yaml file content'''
request_data
=
dict
()
...
...
@@ -387,6 +411,21 @@ def set_experiment(experiment_config, mode, port, config_file_name):
{
'key'
:
'aml_config'
,
'value'
:
experiment_config
[
'amlConfig'
]})
request_data
[
'clusterMetaData'
].
append
(
{
'key'
:
'trial_config'
,
'value'
:
experiment_config
[
'trial'
]})
elif
experiment_config
[
'trainingServicePlatform'
]
==
'heterogeneous'
:
request_data
[
'clusterMetaData'
].
append
(
{
'key'
:
'heterogeneous_config'
,
'value'
:
experiment_config
[
'heterogeneousConfig'
]})
platform_list
=
experiment_config
[
'heterogeneousConfig'
][
'trainingServicePlatforms'
]
request_dict
=
{
'aml'
:
{
'key'
:
'aml_config'
,
'value'
:
experiment_config
.
get
(
'amlConfig'
)},
'remote'
:
{
'key'
:
'machine_list'
,
'value'
:
experiment_config
.
get
(
'machineList'
)},
'pai'
:
{
'key'
:
'pai_config'
,
'value'
:
experiment_config
.
get
(
'paiConfig'
)},
'local'
:
{
'key'
:
'local_config'
,
'value'
:
experiment_config
.
get
(
'localConfig'
)}
}
for
platform
in
platform_list
:
if
request_dict
.
get
(
platform
):
request_data
[
'clusterMetaData'
].
append
(
request_dict
[
platform
])
request_data
[
'clusterMetaData'
].
append
(
{
'key'
:
'trial_config'
,
'value'
:
experiment_config
[
'trial'
]})
response
=
rest_post
(
experiment_url
(
port
),
json
.
dumps
(
request_data
),
REST_TIME_OUT
,
show_error
=
True
)
if
check_response
(
response
):
return
response
...
...
@@ -420,6 +459,8 @@ def set_platform_config(platform, experiment_config, port, config_file_name, res
config_result
,
err_msg
=
set_dlts_config
(
experiment_config
,
port
,
config_file_name
)
elif
platform
==
'aml'
:
config_result
,
err_msg
=
set_aml_config
(
experiment_config
,
port
,
config_file_name
)
elif
platform
==
'heterogeneous'
:
config_result
,
err_msg
=
set_heterogeneous_config
(
experiment_config
,
port
,
config_file_name
)
else
:
raise
Exception
(
ERROR_INFO
%
'Unsupported platform!'
)
exit
(
1
)
...
...
Prev
1
…
5
6
7
8
9
10
11
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment