Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
f04d423a
Unverified
Commit
f04d423a
authored
May 24, 2021
by
SparkSnail
Committed by
GitHub
May 24, 2021
Browse files
Support hybrid and V2 config pipieline (#3648)
parent
35c3d169
Changes
25
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
67 additions
and
8 deletions
+67
-8
test/config/training_service.yml
test/config/training_service.yml
+20
-0
test/config/training_service_v2.yml
test/config/training_service_v2.yml
+9
-0
test/config/tuners/regularized_evolution_tuner-v2.yml
test/config/tuners/regularized_evolution_tuner-v2.yml
+14
-0
test/nni_test/nnitest/generate_ts_config.py
test/nni_test/nnitest/generate_ts_config.py
+13
-3
test/nni_test/nnitest/run_tests.py
test/nni_test/nnitest/run_tests.py
+11
-5
No files found.
test/config/training_service.yml
View file @
f04d423a
...
...
@@ -87,6 +87,26 @@ remote:
port
:
username
:
trainingServicePlatform
:
remote
hybrid
:
maxExecDuration
:
15m
nniManagerIp
:
maxTrialNum
:
2
trialConcurrency
:
2
trial
:
gpuNum
:
0
trainingServicePlatform
:
hybrid
hybridConfig
:
# TODO: Add more platforms
trainingServicePlatforms
:
-
remote
-
local
machineList
:
-
ip
:
passwd
:
port
:
username
:
remoteConfig
:
reuse
:
true
adl
:
maxExecDuration
:
15m
nniManagerIp
:
...
...
test/config/training_service_v2.yml
0 → 100644
View file @
f04d423a
hybrid
:
trainingService
:
-
platform
:
remote
machineList
:
-
host
:
user
:
password
:
port
:
-
platform
:
local
\ No newline at end of file
test/config/tuners/regularized_evolution_tuner-v2.yml
0 → 100644
View file @
f04d423a
experimentName
:
default_test
searchSpaceFile
:
seach_space_classic_nas.json
trialCommand
:
python3 mnist.py --epochs
1
trialCodeDirectory
:
../../../examples/nas/legacy/classic_nas
trialGpuNumber
:
0
trialConcurrency
:
1
maxExperimentDuration
:
15m
maxTrialNumber
:
1
tuner
:
name
:
RegularizedEvolutionTuner
classArgs
:
optimize_mode
:
maximize
trainingService
:
platform
:
local
test/nni_test/nnitest/generate_ts_config.py
View file @
f04d423a
...
...
@@ -8,10 +8,11 @@ import argparse
from
utils
import
get_yml_content
,
dump_yml_content
TRAINING_SERVICE_FILE
=
os
.
path
.
join
(
'config'
,
'training_service.yml'
)
TRAINING_SERVICE_FILE_V2
=
os
.
path
.
join
(
'config'
,
'training_service_v2.yml'
)
def
update_training_service_config
(
args
):
config
=
get_yml_content
(
TRAINING_SERVICE_FILE
)
if
args
.
nni_manager_ip
is
not
None
:
if
args
.
nni_manager_ip
is
not
None
and
args
.
config_version
==
'v1'
:
config
[
args
.
ts
][
'nniManagerIp'
]
=
args
.
nni_manager_ip
if
args
.
ts
==
'pai'
:
if
args
.
pai_user
is
not
None
:
...
...
@@ -99,13 +100,22 @@ def update_training_service_config(args):
config
[
args
.
ts
][
'amlConfig'
][
'workspaceName'
]
=
args
.
workspace_name
if
args
.
compute_target
is
not
None
:
config
[
args
.
ts
][
'amlConfig'
][
'computeTarget'
]
=
args
.
compute_target
dump_yml_content
(
TRAINING_SERVICE_FILE
,
config
)
if
args
.
ts
==
'hybrid'
:
config
=
get_yml_content
(
TRAINING_SERVICE_FILE_V2
)
config
[
args
.
ts
][
'trainingService'
][
0
][
'machineList'
][
0
][
'user'
]
=
args
.
remote_user
config
[
args
.
ts
][
'trainingService'
][
0
][
'machineList'
][
0
][
'host'
]
=
args
.
remote_host
config
[
args
.
ts
][
'trainingService'
][
0
][
'machineList'
][
0
][
'password'
]
=
args
.
remote_pwd
config
[
args
.
ts
][
'trainingService'
][
0
][
'machineList'
][
0
][
'port'
]
=
args
.
remote_port
config
[
args
.
ts
][
'nni_manager_ip'
]
=
args
.
nni_manager_ip
dump_yml_content
(
TRAINING_SERVICE_FILE_V2
,
config
)
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"--ts"
,
type
=
str
,
choices
=
[
'pai'
,
'kubeflow'
,
'remote'
,
'local'
,
'frameworkcontroller'
,
'adl'
,
'aml'
],
default
=
'pai'
)
parser
.
add_argument
(
"--ts"
,
type
=
str
,
choices
=
[
'pai'
,
'kubeflow'
,
'remote'
,
'local'
,
'frameworkcontroller'
,
'adl'
,
'aml'
,
'hybrid'
],
default
=
'pai'
)
parser
.
add_argument
(
"--config_version"
,
type
=
str
,
choices
=
[
'v1'
,
'v2'
],
default
=
'v1'
)
parser
.
add_argument
(
"--nni_docker_image"
,
type
=
str
)
parser
.
add_argument
(
"--nni_manager_ip"
,
type
=
str
)
# args for PAI
...
...
test/nni_test/nnitest/run_tests.py
View file @
f04d423a
...
...
@@ -53,6 +53,9 @@ def update_training_service_config(config, training_service, config_file_path):
it_ts_config
[
training_service
][
'trial'
][
'codeDir'
]
=
containerCodeDir
it_ts_config
[
training_service
][
'trial'
][
'command'
]
=
'cd {0} && {1}'
.
format
(
containerCodeDir
,
config
[
'trial'
][
'command'
])
if
training_service
==
'hybrid'
:
it_ts_config
=
get_yml_content
(
os
.
path
.
join
(
'config'
,
'training_service_v2.yml'
))
else
:
deep_update
(
config
,
it_ts_config
[
'all'
])
deep_update
(
config
,
it_ts_config
[
training_service
])
...
...
@@ -123,7 +126,10 @@ def invoke_validator(test_case_config, nni_source_dir, training_service):
def
get_max_values
(
config_file
):
experiment_config
=
get_yml_content
(
config_file
)
if
experiment_config
.
get
(
'maxExecDuration'
):
return
parse_max_duration_time
(
experiment_config
[
'maxExecDuration'
]),
experiment_config
[
'maxTrialNum'
]
else
:
return
parse_max_duration_time
(
experiment_config
[
'maxExperimentDuration'
]),
experiment_config
[
'maxTrialNumber'
]
def
get_command
(
test_case_config
,
commandKey
):
...
...
@@ -259,7 +265,7 @@ def run(args):
name
,
args
.
ts
,
test_case_config
[
'trainingService'
]))
continue
# remote mode need more time to cleanup
if
args
.
ts
==
'remote'
:
if
args
.
ts
==
'remote'
or
args
.
ts
==
'hybrid'
:
wait_for_port_available
(
8080
,
240
)
else
:
wait_for_port_available
(
8080
,
60
)
...
...
@@ -281,7 +287,7 @@ if __name__ == '__main__':
parser
.
add_argument
(
"--cases"
,
type
=
str
,
default
=
None
)
parser
.
add_argument
(
"--exclude"
,
type
=
str
,
default
=
None
)
parser
.
add_argument
(
"--ts"
,
type
=
str
,
choices
=
[
'local'
,
'remote'
,
'pai'
,
'kubeflow'
,
'frameworkcontroller'
,
'adl'
,
'aml'
],
default
=
'local'
)
'kubeflow'
,
'frameworkcontroller'
,
'adl'
,
'aml'
,
'hybrid'
],
default
=
'local'
)
args
=
parser
.
parse_args
()
run
(
args
)
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment