Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
649a9c38
Unverified
Commit
649a9c38
authored
Feb 07, 2020
by
SparkSnail
Committed by
GitHub
Feb 07, 2020
Browse files
Add frameworkcontroller pipeline (#1971)
merging to master
parent
b49b38f8
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
105 additions
and
3 deletions
+105
-3
test/config_test.py
test/config_test.py
+7
-1
test/generate_ts_config.py
test/generate_ts_config.py
+17
-2
test/pipelines-it-frameworkcontroller.yml
test/pipelines-it-frameworkcontroller.yml
+55
-0
test/training_service.yml
test/training_service.yml
+26
-0
No files found.
test/config_test.py
View file @
649a9c38
...
...
@@ -29,6 +29,12 @@ def gen_new_config(config_file, training_service='local'):
config
[
'trial'
].
pop
(
'command'
)
if
'gpuNum'
in
config
[
'trial'
]:
config
[
'trial'
].
pop
(
'gpuNum'
)
if
training_service
==
'frameworkcontroller'
:
it_config
[
training_service
][
'trial'
][
'taskRoles'
][
0
][
'command'
]
=
config
[
'trial'
][
'command'
]
config
[
'trial'
].
pop
(
'command'
)
if
'gpuNum'
in
config
[
'trial'
]:
config
[
'trial'
].
pop
(
'gpuNum'
)
deep_update
(
config
,
it_config
[
'all'
])
deep_update
(
config
,
it_config
[
training_service
])
...
...
@@ -106,7 +112,7 @@ if __name__ == '__main__':
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"--config"
,
type
=
str
,
default
=
None
)
parser
.
add_argument
(
"--exclude"
,
type
=
str
,
default
=
None
)
parser
.
add_argument
(
"--ts"
,
type
=
str
,
choices
=
[
'local'
,
'remote'
,
'pai'
,
'kubeflow'
],
default
=
'local'
)
parser
.
add_argument
(
"--ts"
,
type
=
str
,
choices
=
[
'local'
,
'remote'
,
'pai'
,
'kubeflow'
,
'frameworkcontroller'
],
default
=
'local'
)
parser
.
add_argument
(
"--local_gpu"
,
action
=
'store_true'
)
parser
.
add_argument
(
"--preinstall"
,
action
=
'store_true'
)
args
=
parser
.
parse_args
()
...
...
test/generate_ts_config.py
View file @
649a9c38
...
...
@@ -42,6 +42,21 @@ def update_training_service_config(args):
config
[
args
.
ts
][
'kubeflowConfig'
][
'azureStorage'
][
'azureShare'
]
=
args
.
azs_share
if
args
.
nni_docker_image
is
not
None
:
config
[
args
.
ts
][
'trial'
][
'worker'
][
'image'
]
=
args
.
nni_docker_image
elif
args
.
ts
==
'frameworkcontroller'
:
if
args
.
nfs_server
is
not
None
:
config
[
args
.
ts
][
'frameworkcontrollerConfig'
][
'nfs'
][
'server'
]
=
args
.
nfs_server
if
args
.
nfs_path
is
not
None
:
config
[
args
.
ts
][
'frameworkcontrollerConfig'
][
'nfs'
][
'path'
]
=
args
.
nfs_path
if
args
.
keyvault_vaultname
is
not
None
:
config
[
args
.
ts
][
'frameworkcontrollerConfig'
][
'keyVault'
][
'vaultName'
]
=
args
.
keyvault_vaultname
if
args
.
keyvault_name
is
not
None
:
config
[
args
.
ts
][
'frameworkcontrollerConfig'
][
'keyVault'
][
'name'
]
=
args
.
keyvault_name
if
args
.
azs_account
is
not
None
:
config
[
args
.
ts
][
'frameworkcontrollerConfig'
][
'azureStorage'
][
'accountName'
]
=
args
.
azs_account
if
args
.
azs_share
is
not
None
:
config
[
args
.
ts
][
'frameworkcontrollerConfig'
][
'azureStorage'
][
'azureShare'
]
=
args
.
azs_share
if
args
.
nni_docker_image
is
not
None
:
config
[
args
.
ts
][
'trial'
][
'taskRoles'
][
0
][
'image'
]
=
args
.
nni_docker_image
elif
args
.
ts
==
'remote'
:
if
args
.
remote_user
is
not
None
:
config
[
args
.
ts
][
'machineList'
][
0
][
'username'
]
=
args
.
remote_user
...
...
@@ -69,7 +84,7 @@ def convert_command():
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"--ts"
,
type
=
str
,
choices
=
[
'pai'
,
'kubeflow'
,
'remote'
,
'local'
],
default
=
'pai'
)
parser
.
add_argument
(
"--ts"
,
type
=
str
,
choices
=
[
'pai'
,
'kubeflow'
,
'remote'
,
'local'
,
'frameworkcontroller'
],
default
=
'pai'
)
parser
.
add_argument
(
"--nni_docker_image"
,
type
=
str
)
parser
.
add_argument
(
"--nni_manager_ip"
,
type
=
str
)
# args for PAI
...
...
@@ -79,7 +94,7 @@ if __name__ == '__main__':
parser
.
add_argument
(
"--data_dir"
,
type
=
str
)
parser
.
add_argument
(
"--output_dir"
,
type
=
str
)
parser
.
add_argument
(
"--vc"
,
type
=
str
)
# args for kubeflow
# args for kubeflow
and frameworkController
parser
.
add_argument
(
"--nfs_server"
,
type
=
str
)
parser
.
add_argument
(
"--nfs_path"
,
type
=
str
)
parser
.
add_argument
(
"--keyvault_vaultname"
,
type
=
str
)
...
...
test/pipelines-it-frameworkcontroller.yml
0 → 100644
View file @
649a9c38
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
jobs
:
-
job
:
'
integration_test_frameworkController'
timeoutInMinutes
:
0
steps
:
-
script
:
python3 -m pip install --upgrade pip setuptools --user
displayName
:
'
Install
python
tools'
-
script
:
|
cd deployment/pypi
echo 'building prerelease package...'
make build
ls $(Build.SourcesDirectory)/deployment/pypi/dist/
condition
:
eq( variables['build_docker_img'], 'true' )
displayName
:
'
build
nni
bdsit_wheel'
-
script
:
|
source install.sh
displayName
:
'
Install
nni
toolkit
via
source
code'
-
script
:
|
sudo apt-get install swig -y
PATH=$HOME/.local/bin:$PATH nnictl package install --name=SMAC
PATH=$HOME/.local/bin:$PATH nnictl package install --name=BOHB
displayName
:
'
Install
dependencies
for
integration
tests
in
frameworkcontroller
mode'
-
script
:
|
if [ $(build_docker_img) = 'true' ]
then
cd deployment/pypi
docker login -u $(docker_hub_user) -p $(docker_hub_pwd)
echo 'updating docker file for installing nni from local...'
# update Dockerfile to install NNI in docker image from whl file built in last step
sed -ie 's/RUN python3 -m pip --no-cache-dir install nni/COPY .\/dist\/* .\nRUN python3 -m pip install nni-*.whl/' ../docker/Dockerfile
cat ../docker/Dockerfile
export IMG_TAG=`date -u +%y%m%d%H%M`
docker build -f ../docker/Dockerfile -t $(test_docker_img_name):$IMG_TAG .
docker push $(test_docker_img_name):$IMG_TAG
export TEST_IMG=$(test_docker_img_name):$IMG_TAG
cd ../../
else
export TEST_IMG=$(existing_docker_img)
fi
echo "TEST_IMG:$TEST_IMG"
cd test
python3 generate_ts_config.py --ts frameworkcontroller --keyvault_vaultname $(keyVault_vaultName) --keyvault_name $(keyVault_name) \
--azs_account $(azureStorage_accountName) --azs_share $(azureStorage_azureShare) --nni_docker_image $TEST_IMG --nni_manager_ip $(nni_manager_ip)
cat training_service.yml
PATH=$HOME/.local/bin:$PATH python3 config_test.py --ts frameworkcontroller --exclude multi_phase
displayName
:
'
integration
test'
test/training_service.yml
View file @
649a9c38
...
...
@@ -24,6 +24,32 @@ kubeflow:
image
:
trainingServicePlatform
:
kubeflow
frameworkcontroller
:
maxExecDuration
:
15m
nniManagerIp
:
frameworkcontrollerConfig
:
serviceAccountName
:
frameworkbarrier
storage
:
azureStorage
keyVault
:
vaultName
:
name
:
azureStorage
:
accountName
:
azureShare
:
trial
:
taskRoles
:
-
name
:
worker
taskNum
:
1
command
:
gpuNum
:
1
cpuNum
:
1
memoryMB
:
8192
image
:
frameworkAttemptCompletionPolicy
:
minFailedTaskCount
:
1
minSucceededTaskCount
:
1
trainingServicePlatform
:
frameworkcontroller
local
:
trainingServicePlatform
:
local
pai
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment