Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
8e7025ba
"src/vscode:/vscode.git/clone" did not exist on "3885c9bc739cc8687eac5766a4b929462310e376"
Unverified
Commit
8e7025ba
authored
Jan 05, 2021
by
SparkSnail
Committed by
GitHub
Jan 05, 2021
Browse files
Fix hybrid remote connection timeout error (#3262)
parent
9e26e354
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
10 additions
and
6 deletions
+10
-6
nni/tools/nnictl/launcher.py
nni/tools/nnictl/launcher.py
+7
-5
nni/tools/nnictl/launcher_utils.py
nni/tools/nnictl/launcher_utils.py
+3
-1
No files found.
nni/tools/nnictl/launcher.py
View file @
8e7025ba
...
...
@@ -47,10 +47,10 @@ def start_rest_server(port, platform, mode, experiment_id, foreground=False, log
'You could use
\'
nnictl create --help
\'
to get help information'
%
port
)
exit
(
1
)
if
(
platform
!=
'local'
)
and
detect_port
(
int
(
port
)
+
1
):
print_error
(
'
PAI
mode need an additional adjacent port %d, and the port %d is used by another process!
\n
'
\
if
(
platform
not
in
[
'local'
,
'aml'
]
)
and
detect_port
(
int
(
port
)
+
1
):
print_error
(
'
%s
mode need an additional adjacent port %d, and the port %d is used by another process!
\n
'
\
'You could set another port to start experiment!
\n
'
\
'You could use
\'
nnictl create --help
\'
to get help information'
%
((
int
(
port
)
+
1
),
(
int
(
port
)
+
1
)))
'You could use
\'
nnictl create --help
\'
to get help information'
%
(
platform
,
(
int
(
port
)
+
1
),
(
int
(
port
)
+
1
)))
exit
(
1
)
print_normal
(
'Starting restful server...'
)
...
...
@@ -316,7 +316,9 @@ def set_hybrid_config(experiment_config, port, config_file_name):
hybrid_config_data
[
'local_config'
]
=
experiment_config
[
'localConfig'
]
elif
platform
==
'pai'
:
hybrid_config_data
[
'pai_config'
]
=
experiment_config
[
'paiConfig'
]
response
=
rest_put
(
cluster_metadata_url
(
port
),
json
.
dumps
(
hybrid_config_data
),
REST_TIME_OUT
)
# It needs to connect all remote machines, set longer timeout here to wait for restful server connection response.
time_out
=
60
if
'remote'
in
platform_list
else
REST_TIME_OUT
response
=
rest_put
(
cluster_metadata_url
(
port
),
json
.
dumps
(
hybrid_config_data
),
time_out
)
err_message
=
None
if
not
response
or
not
response
.
status_code
==
200
:
if
response
is
not
None
:
...
...
@@ -567,7 +569,7 @@ def launch_experiment(args, experiment_config, mode, experiment_id):
raise
Exception
(
ERROR_INFO
%
'Restful server stopped!'
)
exit
(
1
)
if
experiment_config
.
get
(
'nniManagerIp'
):
web_ui_url_list
=
[
'{0}:{1}'
.
format
(
experiment_config
[
'nniManagerIp'
],
str
(
args
.
port
))]
web_ui_url_list
=
[
'
http://
{0}:{1}'
.
format
(
experiment_config
[
'nniManagerIp'
],
str
(
args
.
port
))]
else
:
web_ui_url_list
=
get_local_urls
(
args
.
port
)
nni_config
.
set_config
(
'webuiUrl'
,
web_ui_url_list
)
...
...
nni/tools/nnictl/launcher_utils.py
View file @
8e7025ba
...
...
@@ -105,7 +105,9 @@ def set_default_values(experiment_config):
experiment_config
[
'maxExecDuration'
]
=
'999d'
if
experiment_config
.
get
(
'maxTrialNum'
)
is
None
:
experiment_config
[
'maxTrialNum'
]
=
99999
if
experiment_config
[
'trainingServicePlatform'
]
==
'remote'
:
if
experiment_config
[
'trainingServicePlatform'
]
==
'remote'
or
\
experiment_config
[
'trainingServicePlatform'
]
==
'hybrid'
and
\
'remote'
in
experiment_config
[
'hybridConfig'
][
'trainingServicePlatforms'
]:
for
index
in
range
(
len
(
experiment_config
[
'machineList'
])):
if
experiment_config
[
'machineList'
][
index
].
get
(
'port'
)
is
None
:
experiment_config
[
'machineList'
][
index
][
'port'
]
=
22
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment