Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
ae72aec8
Unverified
Commit
ae72aec8
authored
Apr 17, 2020
by
chicm-ms
Committed by
GitHub
Apr 17, 2020
Browse files
Show more log info for failed test cases (#2321)
parent
649eabc0
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
45 additions
and
25 deletions
+45
-25
test/nni_test/nnitest/run_tests.py
test/nni_test/nnitest/run_tests.py
+31
-19
test/nni_test/nnitest/utils.py
test/nni_test/nnitest/utils.py
+14
-6
No files found.
test/nni_test/nnitest/run_tests.py
View file @
ae72aec8
...
@@ -70,7 +70,7 @@ def run_test_case(test_case_config, it_config, args):
...
@@ -70,7 +70,7 @@ def run_test_case(test_case_config, it_config, args):
try
:
try
:
launch_test
(
new_config_file
,
args
.
ts
,
test_case_config
)
launch_test
(
new_config_file
,
args
.
ts
,
test_case_config
)
invoke_validator
(
test_case_config
,
args
.
nni_source_dir
)
invoke_validator
(
test_case_config
,
args
.
nni_source_dir
,
args
.
ts
)
finally
:
finally
:
stop_command
=
get_command
(
test_case_config
,
'stopCommand'
)
stop_command
=
get_command
(
test_case_config
,
'stopCommand'
)
print
(
'Stop command:'
,
stop_command
,
flush
=
True
)
print
(
'Stop command:'
,
stop_command
,
flush
=
True
)
...
@@ -80,7 +80,7 @@ def run_test_case(test_case_config, it_config, args):
...
@@ -80,7 +80,7 @@ def run_test_case(test_case_config, it_config, args):
if
os
.
path
.
exists
(
new_config_file
):
if
os
.
path
.
exists
(
new_config_file
):
os
.
remove
(
new_config_file
)
os
.
remove
(
new_config_file
)
def
invoke_validator
(
test_case_config
,
nni_source_dir
):
def
invoke_validator
(
test_case_config
,
nni_source_dir
,
training_service
):
validator_config
=
test_case_config
.
get
(
'validator'
)
validator_config
=
test_case_config
.
get
(
'validator'
)
if
validator_config
is
None
or
validator_config
.
get
(
'class'
)
is
None
:
if
validator_config
is
None
or
validator_config
.
get
(
'class'
)
is
None
:
return
return
...
@@ -88,7 +88,13 @@ def invoke_validator(test_case_config, nni_source_dir):
...
@@ -88,7 +88,13 @@ def invoke_validator(test_case_config, nni_source_dir):
validator
=
validators
.
__dict__
[
validator_config
.
get
(
'class'
)]()
validator
=
validators
.
__dict__
[
validator_config
.
get
(
'class'
)]()
kwargs
=
validator_config
.
get
(
'kwargs'
,
{})
kwargs
=
validator_config
.
get
(
'kwargs'
,
{})
print
(
'kwargs:'
,
kwargs
)
print
(
'kwargs:'
,
kwargs
)
validator
(
REST_ENDPOINT
,
get_experiment_dir
(
EXPERIMENT_URL
),
nni_source_dir
,
**
kwargs
)
experiment_id
=
get_experiment_id
(
EXPERIMENT_URL
)
try
:
validator
(
REST_ENDPOINT
,
get_experiment_dir
(
EXPERIMENT_URL
),
nni_source_dir
,
**
kwargs
)
except
:
print_experiment_log
(
experiment_id
=
experiment_id
)
print_trial_job_log
(
training_service
,
TRIAL_JOBS_URL
)
raise
def
get_max_values
(
config_file
):
def
get_max_values
(
config_file
):
experiment_config
=
get_yml_content
(
config_file
)
experiment_config
=
get_yml_content
(
config_file
)
...
@@ -117,7 +123,7 @@ def launch_test(config_file, training_service, test_case_config):
...
@@ -117,7 +123,7 @@ def launch_test(config_file, training_service, test_case_config):
proc
=
subprocess
.
run
(
shlex
.
split
(
launch_command
))
proc
=
subprocess
.
run
(
shlex
.
split
(
launch_command
))
assert
proc
.
returncode
==
0
,
'
`nnictl create`
failed with code %d'
%
proc
.
returncode
assert
proc
.
returncode
==
0
,
'
launch command
failed with code %d'
%
proc
.
returncode
# set experiment ID into variable
# set experiment ID into variable
exp_var_name
=
test_case_config
.
get
(
'setExperimentIdtoVar'
)
exp_var_name
=
test_case_config
.
get
(
'setExperimentIdtoVar'
)
...
@@ -134,24 +140,30 @@ def launch_test(config_file, training_service, test_case_config):
...
@@ -134,24 +140,30 @@ def launch_test(config_file, training_service, test_case_config):
bg_time
=
time
.
time
()
bg_time
=
time
.
time
()
print
(
str
(
datetime
.
datetime
.
now
()),
' waiting ...'
,
flush
=
True
)
print
(
str
(
datetime
.
datetime
.
now
()),
' waiting ...'
,
flush
=
True
)
while
True
:
try
:
# wait restful server to be ready
time
.
sleep
(
3
)
time
.
sleep
(
3
)
waited_time
=
time
.
time
()
-
bg_time
experiment_id
=
get_experiment_id
(
EXPERIMENT_URL
)
if
waited_time
>
max_duration
+
10
:
while
True
:
print
(
'waited: {}, max_duration: {}'
.
format
(
waited_time
,
max_duration
))
waited_time
=
time
.
time
()
-
bg_time
break
if
waited_time
>
max_duration
+
10
:
status
=
get_experiment_status
(
STATUS_URL
)
print
(
'waited: {}, max_duration: {}'
.
format
(
waited_time
,
max_duration
))
if
status
in
[
'DONE'
,
'ERROR'
]:
break
print
(
'experiment status:'
,
status
)
status
=
get_experiment_status
(
STATUS_URL
)
break
if
status
in
[
'DONE'
,
'ERROR'
]:
num_failed
=
len
(
get_failed_trial_jobs
(
TRIAL_JOBS_URL
))
print
(
'experiment status:'
,
status
)
if
num_failed
>
0
:
break
print
(
'failed jobs: '
,
num_failed
)
num_failed
=
len
(
get_failed_trial_jobs
(
TRIAL_JOBS_URL
))
break
if
num_failed
>
0
:
print
(
'failed jobs: '
,
num_failed
)
break
time
.
sleep
(
3
)
except
:
print_experiment_log
(
experiment_id
=
experiment_id
)
raise
print
(
str
(
datetime
.
datetime
.
now
()),
' waiting done'
,
flush
=
True
)
print
(
str
(
datetime
.
datetime
.
now
()),
' waiting done'
,
flush
=
True
)
if
get_experiment_status
(
STATUS_URL
)
==
'ERROR'
:
if
get_experiment_status
(
STATUS_URL
)
==
'ERROR'
:
print_experiment_log
(
EXPERIMENT_URL
)
print_experiment_log
(
experiment_id
=
experiment_id
)
trial_stats
=
get_trial_stats
(
TRIAL_JOBS_URL
)
trial_stats
=
get_trial_stats
(
TRIAL_JOBS_URL
)
print
(
json
.
dumps
(
trial_stats
,
indent
=
4
),
flush
=
True
)
print
(
json
.
dumps
(
trial_stats
,
indent
=
4
),
flush
=
True
)
...
...
test/nni_test/nnitest/utils.py
View file @
ae72aec8
...
@@ -10,6 +10,7 @@ import subprocess
...
@@ -10,6 +10,7 @@ import subprocess
import
requests
import
requests
import
time
import
time
import
ruamel.yaml
as
yaml
import
ruamel.yaml
as
yaml
import
shlex
EXPERIMENT_DONE_SIGNAL
=
'Experiment done'
EXPERIMENT_DONE_SIGNAL
=
'Experiment done'
...
@@ -65,14 +66,16 @@ def get_experiment_id(experiment_url):
...
@@ -65,14 +66,16 @@ def get_experiment_id(experiment_url):
experiment_id
=
requests
.
get
(
experiment_url
).
json
()[
'id'
]
experiment_id
=
requests
.
get
(
experiment_url
).
json
()[
'id'
]
return
experiment_id
return
experiment_id
def
get_experiment_dir
(
experiment_url
):
def
get_experiment_dir
(
experiment_url
=
None
,
experiment_id
=
None
):
'''get experiment root directory'''
'''get experiment root directory'''
experiment_id
=
get_experiment_id
(
experiment_url
)
assert
any
([
experiment_url
,
experiment_id
])
if
experiment_id
is
None
:
experiment_id
=
get_experiment_id
(
experiment_url
)
return
os
.
path
.
join
(
os
.
path
.
expanduser
(
'~'
),
'nni'
,
'experiments'
,
experiment_id
)
return
os
.
path
.
join
(
os
.
path
.
expanduser
(
'~'
),
'nni'
,
'experiments'
,
experiment_id
)
def
get_nni_log_dir
(
experiment_url
):
def
get_nni_log_dir
(
experiment_url
=
None
,
experiment_id
=
None
):
'''get nni's log directory from nni's experiment url'''
'''get nni's log directory from nni's experiment url'''
return
os
.
path
.
join
(
get_experiment_dir
(
experiment_url
),
'log'
)
return
os
.
path
.
join
(
get_experiment_dir
(
experiment_url
,
experiment_id
),
'log'
)
def
get_nni_log_path
(
experiment_url
):
def
get_nni_log_path
(
experiment_url
):
'''get nni's log path from nni's experiment url'''
'''get nni's log path from nni's experiment url'''
...
@@ -125,12 +128,17 @@ def print_trial_job_log(training_service, trial_jobs_url):
...
@@ -125,12 +128,17 @@ def print_trial_job_log(training_service, trial_jobs_url):
for
log_file
in
log_files
:
for
log_file
in
log_files
:
print_file_content
(
os
.
path
.
join
(
trial_log_dir
,
log_file
))
print_file_content
(
os
.
path
.
join
(
trial_log_dir
,
log_file
))
def
print_experiment_log
(
experiment_
url
):
def
print_experiment_log
(
experiment_
id
):
log_dir
=
get_nni_log_dir
(
experiment_
url
)
log_dir
=
get_nni_log_dir
(
experiment_
id
=
experiment_id
)
for
log_file
in
[
'dispatcher.log'
,
'nnimanager.log'
]:
for
log_file
in
[
'dispatcher.log'
,
'nnimanager.log'
]:
filepath
=
os
.
path
.
join
(
log_dir
,
log_file
)
filepath
=
os
.
path
.
join
(
log_dir
,
log_file
)
print_file_content
(
filepath
)
print_file_content
(
filepath
)
print
(
'nnictl log stderr:'
)
subprocess
.
run
(
shlex
.
split
(
'nnictl log stderr {}'
.
format
(
experiment_id
)))
print
(
'nnictl log stdout:'
)
subprocess
.
run
(
shlex
.
split
(
'nnictl log stdout {}'
.
format
(
experiment_id
)))
def
parse_max_duration_time
(
max_exec_duration
):
def
parse_max_duration_time
(
max_exec_duration
):
unit
=
max_exec_duration
[
-
1
]
unit
=
max_exec_duration
[
-
1
]
time
=
max_exec_duration
[:
-
1
]
time
=
max_exec_duration
[:
-
1
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment