Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
f548d82f
Unverified
Commit
f548d82f
authored
May 20, 2020
by
SparkSnail
Committed by
GitHub
May 20, 2020
Browse files
Merge pull request #250 from microsoft/master
merge master
parents
0a742aff
69cae211
Changes
34
Show whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
104 additions
and
42 deletions
+104
-42
test/config/examples/mnist-annotation.yml
test/config/examples/mnist-annotation.yml
+0
-1
test/config/examples/mnist-keras.yml
test/config/examples/mnist-keras.yml
+0
-1
test/config/examples/mnist-nested-search-space.yml
test/config/examples/mnist-nested-search-space.yml
+0
-1
test/config/examples/mnist-pytorch.yml
test/config/examples/mnist-pytorch.yml
+0
-1
test/config/examples/mnist-tfv1.yml
test/config/examples/mnist-tfv1.yml
+0
-1
test/config/integration_tests.yml
test/config/integration_tests.yml
+11
-2
test/nni_test/nnitest/naive_test.py
test/nni_test/nnitest/naive_test.py
+6
-5
test/nni_test/nnitest/run_tests.py
test/nni_test/nnitest/run_tests.py
+2
-12
test/nni_test/nnitest/utils.py
test/nni_test/nnitest/utils.py
+10
-3
test/pipelines/pipelines-it-remote-linux-to-windows.yml
test/pipelines/pipelines-it-remote-linux-to-windows.yml
+48
-0
tools/nni_cmd/launcher.py
tools/nni_cmd/launcher.py
+3
-1
tools/nni_cmd/nnictl_utils.py
tools/nni_cmd/nnictl_utils.py
+1
-1
tools/nni_trial_tool/constants.py
tools/nni_trial_tool/constants.py
+0
-2
tools/nni_trial_tool/trial_keeper.py
tools/nni_trial_tool/trial_keeper.py
+23
-11
No files found.
test/config/examples/mnist-annotation.yml
View file @
f548d82f
...
@@ -13,7 +13,6 @@ assessor:
...
@@ -13,7 +13,6 @@ assessor:
trial
:
trial
:
codeDir
:
../../../examples/trials/mnist-annotation
codeDir
:
../../../examples/trials/mnist-annotation
command
:
python3 mnist.py --batch_num
10
command
:
python3 mnist.py --batch_num
10
gpuNum
:
0
useAnnotation
:
true
useAnnotation
:
true
multiPhase
:
false
multiPhase
:
false
...
...
test/config/examples/mnist-keras.yml
View file @
f548d82f
...
@@ -14,7 +14,6 @@ assessor:
...
@@ -14,7 +14,6 @@ assessor:
trial
:
trial
:
codeDir
:
../../../examples/trials/mnist-keras
codeDir
:
../../../examples/trials/mnist-keras
command
:
python3 mnist-keras.py --num_train 200 --epochs
1
command
:
python3 mnist-keras.py --num_train 200 --epochs
1
gpuNum
:
0
useAnnotation
:
false
useAnnotation
:
false
multiPhase
:
false
multiPhase
:
false
...
...
test/config/examples/mnist-nested-search-space.yml
View file @
f548d82f
...
@@ -15,7 +15,6 @@ assessor:
...
@@ -15,7 +15,6 @@ assessor:
trial
:
trial
:
codeDir
:
../../../examples/trials/mnist-nested-search-space
codeDir
:
../../../examples/trials/mnist-nested-search-space
command
:
python3 mnist.py --batch_num
10
command
:
python3 mnist.py --batch_num
10
gpuNum
:
0
useAnnotation
:
false
useAnnotation
:
false
multiPhase
:
false
multiPhase
:
false
...
...
test/config/examples/mnist-pytorch.yml
View file @
f548d82f
...
@@ -14,7 +14,6 @@ assessor:
...
@@ -14,7 +14,6 @@ assessor:
trial
:
trial
:
codeDir
:
../../../examples/trials/mnist-pytorch
codeDir
:
../../../examples/trials/mnist-pytorch
command
:
python3 mnist.py --epochs 1 --batch_num
10
command
:
python3 mnist.py --epochs 1 --batch_num
10
gpuNum
:
0
useAnnotation
:
false
useAnnotation
:
false
multiPhase
:
false
multiPhase
:
false
...
...
test/config/examples/mnist-tfv1.yml
View file @
f548d82f
...
@@ -14,7 +14,6 @@ assessor:
...
@@ -14,7 +14,6 @@ assessor:
trial
:
trial
:
codeDir
:
../../../examples/trials/mnist-tfv1
codeDir
:
../../../examples/trials/mnist-tfv1
command
:
python3 mnist.py --batch_num
10
command
:
python3 mnist.py --batch_num
10
gpuNum
:
0
useAnnotation
:
false
useAnnotation
:
false
multiPhase
:
false
multiPhase
:
false
...
...
test/config/integration_tests.yml
View file @
f548d82f
defaultTestCaseConfig
:
defaultTestCaseConfig
:
launchCommand
:
nnictl create --config $configFile
launchCommand
:
nnictl create --config $configFile
--debug
stopCommand
:
nnictl stop
stopCommand
:
nnictl stop
experimentStatusCheck
:
True
experimentStatusCheck
:
True
platform
:
linux darwin win32
platform
:
linux darwin win32
...
@@ -22,7 +22,7 @@ testCases:
...
@@ -22,7 +22,7 @@ testCases:
validator
:
validator
:
# launch command, default launch command is 'nnictl create --config $configFile'
# launch command, default launch command is 'nnictl create --config $configFile'
launchCommand
:
nnictl create --config $configFile
launchCommand
:
nnictl create --config $configFile
--debug
# stop command, default stop command is 'nnictl stop', empty means no stop command
# stop command, default stop command is 'nnictl stop', empty means no stop command
stopCommand
:
nnictl stop
stopCommand
:
nnictl stop
...
@@ -38,15 +38,24 @@ testCases:
...
@@ -38,15 +38,24 @@ testCases:
-
name
:
mnist-tfv1
-
name
:
mnist-tfv1
configFile
:
test/config/examples/mnist-tfv1.yml
configFile
:
test/config/examples/mnist-tfv1.yml
config
:
maxTrialNum
:
1
trialConcurrency
:
1
-
name
:
mnist-keras
-
name
:
mnist-keras
configFile
:
test/config/examples/mnist-keras.yml
configFile
:
test/config/examples/mnist-keras.yml
config
:
maxTrialNum
:
2
trialConcurrency
:
1
-
name
:
mnist-pytorch
-
name
:
mnist-pytorch
configFile
:
test/config/examples/mnist-pytorch.yml
configFile
:
test/config/examples/mnist-pytorch.yml
-
name
:
mnist-annotation
-
name
:
mnist-annotation
configFile
:
test/config/examples/mnist-annotation.yml
configFile
:
test/config/examples/mnist-annotation.yml
config
:
maxTrialNum
:
1
trialConcurrency
:
1
-
name
:
cifar10-pytorch
-
name
:
cifar10-pytorch
configFile
:
test/config/examples/cifar10-pytorch.yml
configFile
:
test/config/examples/cifar10-pytorch.yml
...
...
test/nni_test/nnitest/naive_test.py
View file @
f548d82f
...
@@ -10,7 +10,7 @@ import sys
...
@@ -10,7 +10,7 @@ import sys
import
time
import
time
import
traceback
import
traceback
from
utils
import
is_experiment_done
,
get_experiment_id
,
get_nni_log_path
,
read_last_line
,
remove_files
,
setup_experiment
,
detect_port
,
snooz
e
from
utils
import
is_experiment_done
,
get_experiment_id
,
get_nni_log_path
,
read_last_line
,
remove_files
,
setup_experiment
,
detect_port
,
wait_for_port_availabl
e
from
utils
import
GREEN
,
RED
,
CLEAR
,
EXPERIMENT_URL
from
utils
import
GREEN
,
RED
,
CLEAR
,
EXPERIMENT_URL
NNI_SOURCE_DIR
=
'..'
NNI_SOURCE_DIR
=
'..'
...
@@ -71,7 +71,7 @@ def naive_test(args):
...
@@ -71,7 +71,7 @@ def naive_test(args):
assert
assessor_result
==
expected
,
'Bad assessor result'
assert
assessor_result
==
expected
,
'Bad assessor result'
subprocess
.
run
([
'nnictl'
,
'stop'
])
subprocess
.
run
([
'nnictl'
,
'stop'
])
snooze
(
)
wait_for_port_available
(
8080
,
10
)
def
stop_experiment_test
(
args
):
def
stop_experiment_test
(
args
):
config_file
=
args
.
config
config_file
=
args
.
config
...
@@ -86,19 +86,20 @@ def stop_experiment_test(args):
...
@@ -86,19 +86,20 @@ def stop_experiment_test(args):
experiment_id
=
get_experiment_id
(
EXPERIMENT_URL
)
experiment_id
=
get_experiment_id
(
EXPERIMENT_URL
)
proc
=
subprocess
.
run
([
'nnictl'
,
'stop'
,
experiment_id
])
proc
=
subprocess
.
run
([
'nnictl'
,
'stop'
,
experiment_id
])
assert
proc
.
returncode
==
0
,
'`nnictl stop %s` failed with code %d'
%
(
experiment_id
,
proc
.
returncode
)
assert
proc
.
returncode
==
0
,
'`nnictl stop %s` failed with code %d'
%
(
experiment_id
,
proc
.
returncode
)
snooze
(
)
wait_for_port_available
(
8080
,
10
)
assert
not
detect_port
(
8080
),
'`nnictl stop %s` failed to stop experiments'
%
experiment_id
assert
not
detect_port
(
8080
),
'`nnictl stop %s` failed to stop experiments'
%
experiment_id
# test cmd `nnictl stop --port`
# test cmd `nnictl stop --port`
proc
=
subprocess
.
run
([
'nnictl'
,
'stop'
,
'--port'
,
'8990'
])
proc
=
subprocess
.
run
([
'nnictl'
,
'stop'
,
'--port'
,
'8990'
])
assert
proc
.
returncode
==
0
,
'`nnictl stop %s` failed with code %d'
%
(
experiment_id
,
proc
.
returncode
)
assert
proc
.
returncode
==
0
,
'`nnictl stop %s` failed with code %d'
%
(
experiment_id
,
proc
.
returncode
)
snooze
(
)
wait_for_port_available
(
8990
,
10
)
assert
not
detect_port
(
8990
),
'`nnictl stop %s` failed to stop experiments'
%
experiment_id
assert
not
detect_port
(
8990
),
'`nnictl stop %s` failed to stop experiments'
%
experiment_id
# test cmd `nnictl stop --all`
# test cmd `nnictl stop --all`
proc
=
subprocess
.
run
([
'nnictl'
,
'stop'
,
'--all'
])
proc
=
subprocess
.
run
([
'nnictl'
,
'stop'
,
'--all'
])
assert
proc
.
returncode
==
0
,
'`nnictl stop --all` failed with code %d'
%
proc
.
returncode
assert
proc
.
returncode
==
0
,
'`nnictl stop --all` failed with code %d'
%
proc
.
returncode
snooze
()
wait_for_port_available
(
8888
,
10
)
wait_for_port_available
(
8989
,
10
)
assert
not
detect_port
(
8888
)
and
not
detect_port
(
8989
),
'`nnictl stop --all` failed to stop experiments'
assert
not
detect_port
(
8888
)
and
not
detect_port
(
8989
),
'`nnictl stop --all` failed to stop experiments'
...
...
test/nni_test/nnitest/run_tests.py
View file @
f548d82f
...
@@ -15,7 +15,7 @@ import ruamel.yaml as yaml
...
@@ -15,7 +15,7 @@ import ruamel.yaml as yaml
from
utils
import
get_experiment_status
,
get_yml_content
,
dump_yml_content
,
get_experiment_id
,
\
from
utils
import
get_experiment_status
,
get_yml_content
,
dump_yml_content
,
get_experiment_id
,
\
parse_max_duration_time
,
get_trial_stats
,
deep_update
,
print_trial_job_log
,
get_failed_trial_jobs
,
\
parse_max_duration_time
,
get_trial_stats
,
deep_update
,
print_trial_job_log
,
get_failed_trial_jobs
,
\
get_experiment_dir
,
print_experiment_log
get_experiment_dir
,
print_experiment_log
from
utils
import
GREEN
,
RED
,
CLEAR
,
STATUS_URL
,
TRIAL_JOBS_URL
,
EXPERIMENT_URL
,
REST_ENDPOINT
,
detect_port
from
utils
import
GREEN
,
RED
,
CLEAR
,
STATUS_URL
,
TRIAL_JOBS_URL
,
EXPERIMENT_URL
,
REST_ENDPOINT
,
wait_for_port_available
import
validators
import
validators
it_variables
=
{}
it_variables
=
{}
...
@@ -157,7 +157,7 @@ def launch_test(config_file, training_service, test_case_config):
...
@@ -157,7 +157,7 @@ def launch_test(config_file, training_service, test_case_config):
if
num_failed
>
0
:
if
num_failed
>
0
:
print
(
'failed jobs: '
,
num_failed
)
print
(
'failed jobs: '
,
num_failed
)
break
break
time
.
sleep
(
3
)
time
.
sleep
(
1
)
except
:
except
:
print_experiment_log
(
experiment_id
=
experiment_id
)
print_experiment_log
(
experiment_id
=
experiment_id
)
raise
raise
...
@@ -189,16 +189,6 @@ def case_included(name, cases):
...
@@ -189,16 +189,6 @@ def case_included(name, cases):
return
True
return
True
return
False
return
False
def
wait_for_port_available
(
port
,
timeout
):
begin_time
=
time
.
time
()
while
True
:
if
not
detect_port
(
port
):
return
if
time
.
time
()
-
begin_time
>
timeout
:
msg
=
'port {} is not available in {} seconds.'
.
format
(
port
,
timeout
)
raise
RuntimeError
(
msg
)
time
.
sleep
(
5
)
def
match_platform
(
test_case_config
):
def
match_platform
(
test_case_config
):
return
sys
.
platform
in
test_case_config
[
'platform'
].
split
(
' '
)
return
sys
.
platform
in
test_case_config
[
'platform'
].
split
(
' '
)
...
...
test/nni_test/nnitest/utils.py
View file @
f548d82f
...
@@ -168,6 +168,13 @@ def detect_port(port):
...
@@ -168,6 +168,13 @@ def detect_port(port):
except
:
except
:
return
False
return
False
def
snooze
():
'''Sleep to make sure previous stopped exp has enough time to exit'''
def
wait_for_port_available
(
port
,
timeout
):
time
.
sleep
(
6
)
begin_time
=
time
.
time
()
while
True
:
if
not
detect_port
(
port
):
return
if
time
.
time
()
-
begin_time
>
timeout
:
msg
=
'port {} is not available in {} seconds.'
.
format
(
port
,
timeout
)
raise
RuntimeError
(
msg
)
time
.
sleep
(
1
)
test/pipelines/pipelines-it-remote-linux-to-windows.yml
0 → 100644
View file @
f548d82f
jobs
:
-
job
:
"
integration_test_remote_linux_to_windows"
timeoutInMinutes
:
120
steps
:
-
script
:
make clean
displayName
:
"
clean
nni
source
code"
-
task
:
CopyFilesOverSSH@0
inputs
:
sshEndpoint
:
$(end_point)
contents
:
|
**
!**/dist/**
!**/node_modules/**
targetFolder
:
/tmp/nnitest/$(Build.BuildId)
overwrite
:
true
displayName
:
"
Copy
all
files
to
remote
machine"
timeoutInMinutes
:
10
-
task
:
SSH@0
inputs
:
sshEndpoint
:
$(end_point)
runOptions
:
commands
commands
:
cd "\tmp\nnitest\$(Build.BuildId)" && powershell.exe -command "conda activate l2w | .\uninstall.ps1 | .\install.ps1"
failOnStdErr
:
false
displayName
:
"
install
on
remote
windows"
-
script
:
python3 -m pip install --upgrade pip setuptools --user
displayName
:
"
Install
python
tools"
-
script
:
make easy-install
displayName
:
"
Install
nni
via
source
code"
-
script
:
|
sudo apt-get install swig -y
PATH=$HOME/.local/bin:$PATH nnictl package install --name=SMAC
PATH=$HOME/.local/bin:$PATH nnictl package install --name=BOHB
displayName
:
"
Install
dependencies
for
integration
tests
in
remote
mode"
-
script
:
|
set -e
cd test
python3 nni_test/nnitest/generate_ts_config.py --ts remote --remote_user $(remote_user) --remote_host $(remote_host) \
--remote_port $(remote_port) --remote_pwd $(remote_pwd) --nni_manager_ip $(nni_manager_ip)
cat config/training_service.yml
PATH=$HOME/.local/bin:$PATH python3 nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts remote
displayName
:
"
integration
test"
-
task
:
SSH@0
inputs
:
sshEndpoint
:
$(end_point)
runOptions
:
commands
commands
:
rmdir /s /q "\\?\c:\tmp\nnitest\$(Build.BuildId)"
condition
:
always()
displayName
:
"
clean
up
on
remote
server"
tools/nni_cmd/launcher.py
View file @
f548d82f
...
@@ -139,7 +139,9 @@ def set_remote_config(experiment_config, port, config_file_name):
...
@@ -139,7 +139,9 @@ def set_remote_config(experiment_config, port, config_file_name):
for
i
in
range
(
len
(
request_data
[
'machine_list'
])):
for
i
in
range
(
len
(
request_data
[
'machine_list'
])):
if
isinstance
(
request_data
[
'machine_list'
][
i
].
get
(
'gpuIndices'
),
int
):
if
isinstance
(
request_data
[
'machine_list'
][
i
].
get
(
'gpuIndices'
),
int
):
request_data
[
'machine_list'
][
i
][
'gpuIndices'
]
=
str
(
request_data
[
'machine_list'
][
i
].
get
(
'gpuIndices'
))
request_data
[
'machine_list'
][
i
][
'gpuIndices'
]
=
str
(
request_data
[
'machine_list'
][
i
].
get
(
'gpuIndices'
))
response
=
rest_put
(
cluster_metadata_url
(
port
),
json
.
dumps
(
request_data
),
REST_TIME_OUT
)
# It needs to connect all remote machines, the time out of connection is 30 seconds.
# So timeout of this place should be longer.
response
=
rest_put
(
cluster_metadata_url
(
port
),
json
.
dumps
(
request_data
),
60
,
True
)
err_message
=
''
err_message
=
''
if
not
response
or
not
check_response
(
response
):
if
not
response
or
not
check_response
(
response
):
if
response
is
not
None
:
if
response
is
not
None
:
...
...
tools/nni_cmd/nnictl_utils.py
View file @
f548d82f
...
@@ -227,7 +227,7 @@ def stop_experiment(args):
...
@@ -227,7 +227,7 @@ def stop_experiment(args):
experiment_config
=
Experiments
()
experiment_config
=
Experiments
()
experiment_dict
=
experiment_config
.
get_all_experiments
()
experiment_dict
=
experiment_config
.
get_all_experiments
()
for
experiment_id
in
experiment_id_list
:
for
experiment_id
in
experiment_id_list
:
print_normal
(
'Stoping experiment %s'
%
experiment_id
)
print_normal
(
'Stop
p
ing experiment %s'
%
experiment_id
)
nni_config
=
Config
(
experiment_dict
[
experiment_id
][
'fileName'
])
nni_config
=
Config
(
experiment_dict
[
experiment_id
][
'fileName'
])
rest_pid
=
nni_config
.
get_config
(
'restServerPid'
)
rest_pid
=
nni_config
.
get_config
(
'restServerPid'
)
if
rest_pid
:
if
rest_pid
:
...
...
tools/nni_trial_tool/constants.py
View file @
f548d82f
...
@@ -7,8 +7,6 @@ API_ROOT_URL = '/api/v1/nni-pai'
...
@@ -7,8 +7,6 @@ API_ROOT_URL = '/api/v1/nni-pai'
BASE_URL
=
'http://{}'
BASE_URL
=
'http://{}'
HOME_DIR
=
os
.
path
.
join
(
os
.
environ
[
'HOME'
],
'nni'
)
LOG_DIR
=
os
.
environ
[
'NNI_OUTPUT_DIR'
]
LOG_DIR
=
os
.
environ
[
'NNI_OUTPUT_DIR'
]
NNI_PLATFORM
=
os
.
environ
[
'NNI_PLATFORM'
]
NNI_PLATFORM
=
os
.
environ
[
'NNI_PLATFORM'
]
...
...
tools/nni_trial_tool/trial_keeper.py
View file @
f548d82f
...
@@ -2,23 +2,27 @@
...
@@ -2,23 +2,27 @@
# Licensed under the MIT license.
# Licensed under the MIT license.
import
argparse
import
argparse
import
os
import
ctypes
from
subprocess
import
Popen
import
json
import
time
import
logging
import
logging
import
s
hlex
import
o
s
import
re
import
re
import
shlex
import
sys
import
sys
import
json
import
threading
import
threading
from
pyhdfs
import
HdfsClient
import
time
from
subprocess
import
Popen
import
pkg_resources
import
pkg_resources
from
.rest_utils
import
rest_post
,
rest_get
from
pyhdfs
import
HdfsClient
from
.url_utils
import
gen_send_version_url
,
gen_parameter_meta_url
from
.constants
import
LOG_DIR
,
NNI_PLATFORM
,
MULTI_PHASE
,
NNI_TRIAL_JOB_ID
,
NNI_SYS_DIR
,
NNI_EXP_ID
from
.constants
import
(
LOG_DIR
,
MULTI_PHASE
,
NNI_EXP_ID
,
NNI_PLATFORM
,
from
.hdfsClientUtility
import
copyDirectoryToHdfs
,
copyHdfsDirectoryToLocal
,
copyHdfsFileToLocal
NNI_SYS_DIR
,
NNI_TRIAL_JOB_ID
)
from
.log_utils
import
LogType
,
nni_log
,
RemoteLogger
,
StdOutputType
from
.hdfsClientUtility
import
(
copyDirectoryToHdfs
,
copyHdfsDirectoryToLocal
,
copyHdfsFileToLocal
)
from
.log_utils
import
LogType
,
RemoteLogger
,
StdOutputType
,
nni_log
from
.rest_utils
import
rest_get
,
rest_post
from
.url_utils
import
gen_parameter_meta_url
,
gen_send_version_url
logger
=
logging
.
getLogger
(
'trial_keeper'
)
logger
=
logging
.
getLogger
(
'trial_keeper'
)
regular
=
re
.
compile
(
'v?(?P<version>[0-9](\.[0-9]){0,1}).*'
)
regular
=
re
.
compile
(
'v?(?P<version>[0-9](\.[0-9]){0,1}).*'
)
...
@@ -80,6 +84,10 @@ def main_loop(args):
...
@@ -80,6 +84,10 @@ def main_loop(args):
if
hdfs_client
is
not
None
:
if
hdfs_client
is
not
None
:
copyHdfsDirectoryToLocal
(
args
.
nni_hdfs_exp_dir
,
os
.
getcwd
(),
hdfs_client
)
copyHdfsDirectoryToLocal
(
args
.
nni_hdfs_exp_dir
,
os
.
getcwd
(),
hdfs_client
)
if
args
.
job_id_file
:
with
open
(
args
.
job_id_file
,
'w'
)
as
job_file
:
job_file
.
write
(
"%d"
%
os
.
getpid
())
# Notice: We don't appoint env, which means subprocess wil inherit current environment and that is expected behavior
# Notice: We don't appoint env, which means subprocess wil inherit current environment and that is expected behavior
log_pipe_stdout
=
trial_syslogger_stdout
.
get_pipelog_reader
()
log_pipe_stdout
=
trial_syslogger_stdout
.
get_pipelog_reader
()
process
=
Popen
(
args
.
trial_command
,
shell
=
True
,
stdout
=
log_pipe_stdout
,
stderr
=
log_pipe_stdout
)
process
=
Popen
(
args
.
trial_command
,
shell
=
True
,
stdout
=
log_pipe_stdout
,
stderr
=
log_pipe_stdout
)
...
@@ -91,6 +99,9 @@ def main_loop(args):
...
@@ -91,6 +99,9 @@ def main_loop(args):
retCode
=
process
.
poll
()
retCode
=
process
.
poll
()
# child worker process exits and all stdout data is read
# child worker process exits and all stdout data is read
if
retCode
is
not
None
and
log_pipe_stdout
.
set_process_exit
()
and
log_pipe_stdout
.
is_read_completed
==
True
:
if
retCode
is
not
None
and
log_pipe_stdout
.
set_process_exit
()
and
log_pipe_stdout
.
is_read_completed
==
True
:
# In Windows, the retCode -1 is 4294967295. It's larger than c_long, and raise OverflowError.
# So covert it to int32.
retCode
=
ctypes
.
c_long
(
retCode
).
value
nni_log
(
LogType
.
Info
,
'subprocess terminated. Exit code is {}. Quit'
.
format
(
retCode
))
nni_log
(
LogType
.
Info
,
'subprocess terminated. Exit code is {}. Quit'
.
format
(
retCode
))
if
hdfs_output_dir
is
not
None
:
if
hdfs_output_dir
is
not
None
:
# Copy local directory to hdfs for OpenPAI
# Copy local directory to hdfs for OpenPAI
...
@@ -218,6 +229,7 @@ if __name__ == '__main__':
...
@@ -218,6 +229,7 @@ if __name__ == '__main__':
PARSER
.
add_argument
(
'--webhdfs_path'
,
type
=
str
,
help
=
'the webhdfs path used in webhdfs URL'
)
PARSER
.
add_argument
(
'--webhdfs_path'
,
type
=
str
,
help
=
'the webhdfs path used in webhdfs URL'
)
PARSER
.
add_argument
(
'--nni_manager_version'
,
type
=
str
,
help
=
'the nni version transmitted from nniManager'
)
PARSER
.
add_argument
(
'--nni_manager_version'
,
type
=
str
,
help
=
'the nni version transmitted from nniManager'
)
PARSER
.
add_argument
(
'--log_collection'
,
type
=
str
,
help
=
'set the way to collect log in trialkeeper'
)
PARSER
.
add_argument
(
'--log_collection'
,
type
=
str
,
help
=
'set the way to collect log in trialkeeper'
)
PARSER
.
add_argument
(
'--job_id_file'
,
type
=
str
,
help
=
'set job id file for operating and monitoring job.'
)
args
,
unknown
=
PARSER
.
parse_known_args
()
args
,
unknown
=
PARSER
.
parse_known_args
()
if
args
.
trial_command
is
None
:
if
args
.
trial_command
is
None
:
exit
(
1
)
exit
(
1
)
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment