Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
f04d423a
"docs/archive_en_US/Tutorial/AnnotationSpec.md" did not exist on "a441558c7b79fa0feaf4868b4b8fa1d66b4120c1"
Unverified
Commit
f04d423a
authored
May 24, 2021
by
SparkSnail
Committed by
GitHub
May 24, 2021
Browse files
Support hybrid and V2 config pipieline (#3648)
parent
35c3d169
Changes
25
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
67 additions
and
8 deletions
+67
-8
test/config/training_service.yml
test/config/training_service.yml
+20
-0
test/config/training_service_v2.yml
test/config/training_service_v2.yml
+9
-0
test/config/tuners/regularized_evolution_tuner-v2.yml
test/config/tuners/regularized_evolution_tuner-v2.yml
+14
-0
test/nni_test/nnitest/generate_ts_config.py
test/nni_test/nnitest/generate_ts_config.py
+13
-3
test/nni_test/nnitest/run_tests.py
test/nni_test/nnitest/run_tests.py
+11
-5
No files found.
test/config/training_service.yml
View file @
f04d423a
...
@@ -87,6 +87,26 @@ remote:
...
@@ -87,6 +87,26 @@ remote:
port
:
port
:
username
:
username
:
trainingServicePlatform
:
remote
trainingServicePlatform
:
remote
hybrid
:
maxExecDuration
:
15m
nniManagerIp
:
maxTrialNum
:
2
trialConcurrency
:
2
trial
:
gpuNum
:
0
trainingServicePlatform
:
hybrid
hybridConfig
:
# TODO: Add more platforms
trainingServicePlatforms
:
-
remote
-
local
machineList
:
-
ip
:
passwd
:
port
:
username
:
remoteConfig
:
reuse
:
true
adl
:
adl
:
maxExecDuration
:
15m
maxExecDuration
:
15m
nniManagerIp
:
nniManagerIp
:
...
...
test/config/training_service_v2.yml
0 → 100644
View file @
f04d423a
hybrid
:
trainingService
:
-
platform
:
remote
machineList
:
-
host
:
user
:
password
:
port
:
-
platform
:
local
\ No newline at end of file
test/config/tuners/regularized_evolution_tuner-v2.yml
0 → 100644
View file @
f04d423a
experimentName
:
default_test
searchSpaceFile
:
seach_space_classic_nas.json
trialCommand
:
python3 mnist.py --epochs
1
trialCodeDirectory
:
../../../examples/nas/legacy/classic_nas
trialGpuNumber
:
0
trialConcurrency
:
1
maxExperimentDuration
:
15m
maxTrialNumber
:
1
tuner
:
name
:
RegularizedEvolutionTuner
classArgs
:
optimize_mode
:
maximize
trainingService
:
platform
:
local
test/nni_test/nnitest/generate_ts_config.py
View file @
f04d423a
...
@@ -8,10 +8,11 @@ import argparse
...
@@ -8,10 +8,11 @@ import argparse
from
utils
import
get_yml_content
,
dump_yml_content
from
utils
import
get_yml_content
,
dump_yml_content
TRAINING_SERVICE_FILE
=
os
.
path
.
join
(
'config'
,
'training_service.yml'
)
TRAINING_SERVICE_FILE
=
os
.
path
.
join
(
'config'
,
'training_service.yml'
)
TRAINING_SERVICE_FILE_V2
=
os
.
path
.
join
(
'config'
,
'training_service_v2.yml'
)
def
update_training_service_config
(
args
):
def
update_training_service_config
(
args
):
config
=
get_yml_content
(
TRAINING_SERVICE_FILE
)
config
=
get_yml_content
(
TRAINING_SERVICE_FILE
)
if
args
.
nni_manager_ip
is
not
None
:
if
args
.
nni_manager_ip
is
not
None
and
args
.
config_version
==
'v1'
:
config
[
args
.
ts
][
'nniManagerIp'
]
=
args
.
nni_manager_ip
config
[
args
.
ts
][
'nniManagerIp'
]
=
args
.
nni_manager_ip
if
args
.
ts
==
'pai'
:
if
args
.
ts
==
'pai'
:
if
args
.
pai_user
is
not
None
:
if
args
.
pai_user
is
not
None
:
...
@@ -99,13 +100,22 @@ def update_training_service_config(args):
...
@@ -99,13 +100,22 @@ def update_training_service_config(args):
config
[
args
.
ts
][
'amlConfig'
][
'workspaceName'
]
=
args
.
workspace_name
config
[
args
.
ts
][
'amlConfig'
][
'workspaceName'
]
=
args
.
workspace_name
if
args
.
compute_target
is
not
None
:
if
args
.
compute_target
is
not
None
:
config
[
args
.
ts
][
'amlConfig'
][
'computeTarget'
]
=
args
.
compute_target
config
[
args
.
ts
][
'amlConfig'
][
'computeTarget'
]
=
args
.
compute_target
dump_yml_content
(
TRAINING_SERVICE_FILE
,
config
)
dump_yml_content
(
TRAINING_SERVICE_FILE
,
config
)
if
args
.
ts
==
'hybrid'
:
config
=
get_yml_content
(
TRAINING_SERVICE_FILE_V2
)
config
[
args
.
ts
][
'trainingService'
][
0
][
'machineList'
][
0
][
'user'
]
=
args
.
remote_user
config
[
args
.
ts
][
'trainingService'
][
0
][
'machineList'
][
0
][
'host'
]
=
args
.
remote_host
config
[
args
.
ts
][
'trainingService'
][
0
][
'machineList'
][
0
][
'password'
]
=
args
.
remote_pwd
config
[
args
.
ts
][
'trainingService'
][
0
][
'machineList'
][
0
][
'port'
]
=
args
.
remote_port
config
[
args
.
ts
][
'nni_manager_ip'
]
=
args
.
nni_manager_ip
dump_yml_content
(
TRAINING_SERVICE_FILE_V2
,
config
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
()
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"--ts"
,
type
=
str
,
choices
=
[
'pai'
,
'kubeflow'
,
'remote'
,
'local'
,
'frameworkcontroller'
,
'adl'
,
'aml'
],
default
=
'pai'
)
parser
.
add_argument
(
"--ts"
,
type
=
str
,
choices
=
[
'pai'
,
'kubeflow'
,
'remote'
,
'local'
,
'frameworkcontroller'
,
'adl'
,
'aml'
,
'hybrid'
],
default
=
'pai'
)
parser
.
add_argument
(
"--config_version"
,
type
=
str
,
choices
=
[
'v1'
,
'v2'
],
default
=
'v1'
)
parser
.
add_argument
(
"--nni_docker_image"
,
type
=
str
)
parser
.
add_argument
(
"--nni_docker_image"
,
type
=
str
)
parser
.
add_argument
(
"--nni_manager_ip"
,
type
=
str
)
parser
.
add_argument
(
"--nni_manager_ip"
,
type
=
str
)
# args for PAI
# args for PAI
...
...
test/nni_test/nnitest/run_tests.py
View file @
f04d423a
...
@@ -53,6 +53,9 @@ def update_training_service_config(config, training_service, config_file_path):
...
@@ -53,6 +53,9 @@ def update_training_service_config(config, training_service, config_file_path):
it_ts_config
[
training_service
][
'trial'
][
'codeDir'
]
=
containerCodeDir
it_ts_config
[
training_service
][
'trial'
][
'codeDir'
]
=
containerCodeDir
it_ts_config
[
training_service
][
'trial'
][
'command'
]
=
'cd {0} && {1}'
.
format
(
containerCodeDir
,
config
[
'trial'
][
'command'
])
it_ts_config
[
training_service
][
'trial'
][
'command'
]
=
'cd {0} && {1}'
.
format
(
containerCodeDir
,
config
[
'trial'
][
'command'
])
if
training_service
==
'hybrid'
:
it_ts_config
=
get_yml_content
(
os
.
path
.
join
(
'config'
,
'training_service_v2.yml'
))
else
:
deep_update
(
config
,
it_ts_config
[
'all'
])
deep_update
(
config
,
it_ts_config
[
'all'
])
deep_update
(
config
,
it_ts_config
[
training_service
])
deep_update
(
config
,
it_ts_config
[
training_service
])
...
@@ -123,7 +126,10 @@ def invoke_validator(test_case_config, nni_source_dir, training_service):
...
@@ -123,7 +126,10 @@ def invoke_validator(test_case_config, nni_source_dir, training_service):
def
get_max_values
(
config_file
):
def
get_max_values
(
config_file
):
experiment_config
=
get_yml_content
(
config_file
)
experiment_config
=
get_yml_content
(
config_file
)
if
experiment_config
.
get
(
'maxExecDuration'
):
return
parse_max_duration_time
(
experiment_config
[
'maxExecDuration'
]),
experiment_config
[
'maxTrialNum'
]
return
parse_max_duration_time
(
experiment_config
[
'maxExecDuration'
]),
experiment_config
[
'maxTrialNum'
]
else
:
return
parse_max_duration_time
(
experiment_config
[
'maxExperimentDuration'
]),
experiment_config
[
'maxTrialNumber'
]
def
get_command
(
test_case_config
,
commandKey
):
def
get_command
(
test_case_config
,
commandKey
):
...
@@ -259,7 +265,7 @@ def run(args):
...
@@ -259,7 +265,7 @@ def run(args):
name
,
args
.
ts
,
test_case_config
[
'trainingService'
]))
name
,
args
.
ts
,
test_case_config
[
'trainingService'
]))
continue
continue
# remote mode need more time to cleanup
# remote mode need more time to cleanup
if
args
.
ts
==
'remote'
:
if
args
.
ts
==
'remote'
or
args
.
ts
==
'hybrid'
:
wait_for_port_available
(
8080
,
240
)
wait_for_port_available
(
8080
,
240
)
else
:
else
:
wait_for_port_available
(
8080
,
60
)
wait_for_port_available
(
8080
,
60
)
...
@@ -281,7 +287,7 @@ if __name__ == '__main__':
...
@@ -281,7 +287,7 @@ if __name__ == '__main__':
parser
.
add_argument
(
"--cases"
,
type
=
str
,
default
=
None
)
parser
.
add_argument
(
"--cases"
,
type
=
str
,
default
=
None
)
parser
.
add_argument
(
"--exclude"
,
type
=
str
,
default
=
None
)
parser
.
add_argument
(
"--exclude"
,
type
=
str
,
default
=
None
)
parser
.
add_argument
(
"--ts"
,
type
=
str
,
choices
=
[
'local'
,
'remote'
,
'pai'
,
parser
.
add_argument
(
"--ts"
,
type
=
str
,
choices
=
[
'local'
,
'remote'
,
'pai'
,
'kubeflow'
,
'frameworkcontroller'
,
'adl'
,
'aml'
],
default
=
'local'
)
'kubeflow'
,
'frameworkcontroller'
,
'adl'
,
'aml'
,
'hybrid'
],
default
=
'local'
)
args
=
parser
.
parse_args
()
args
=
parser
.
parse_args
()
run
(
args
)
run
(
args
)
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment