Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
649a9c38
Unverified
Commit
649a9c38
authored
Feb 07, 2020
by
SparkSnail
Committed by
GitHub
Feb 07, 2020
Browse files
Add frameworkcontroller pipeline (#1971)
merging to master
parent
b49b38f8
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
105 additions
and
3 deletions
+105
-3
test/config_test.py
test/config_test.py
+7
-1
test/generate_ts_config.py
test/generate_ts_config.py
+17
-2
test/pipelines-it-frameworkcontroller.yml
test/pipelines-it-frameworkcontroller.yml
+55
-0
test/training_service.yml
test/training_service.yml
+26
-0
No files found.
test/config_test.py
View file @
649a9c38
...
...
@@ -30,6 +30,12 @@ def gen_new_config(config_file, training_service='local'):
if
'gpuNum'
in
config
[
'trial'
]:
config
[
'trial'
].
pop
(
'gpuNum'
)
if
training_service
==
'frameworkcontroller'
:
it_config
[
training_service
][
'trial'
][
'taskRoles'
][
0
][
'command'
]
=
config
[
'trial'
][
'command'
]
config
[
'trial'
].
pop
(
'command'
)
if
'gpuNum'
in
config
[
'trial'
]:
config
[
'trial'
].
pop
(
'gpuNum'
)
deep_update
(
config
,
it_config
[
'all'
])
deep_update
(
config
,
it_config
[
training_service
])
...
...
@@ -106,7 +112,7 @@ if __name__ == '__main__':
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"--config"
,
type
=
str
,
default
=
None
)
parser
.
add_argument
(
"--exclude"
,
type
=
str
,
default
=
None
)
parser
.
add_argument
(
"--ts"
,
type
=
str
,
choices
=
[
'local'
,
'remote'
,
'pai'
,
'kubeflow'
],
default
=
'local'
)
parser
.
add_argument
(
"--ts"
,
type
=
str
,
choices
=
[
'local'
,
'remote'
,
'pai'
,
'kubeflow'
,
'frameworkcontroller'
],
default
=
'local'
)
parser
.
add_argument
(
"--local_gpu"
,
action
=
'store_true'
)
parser
.
add_argument
(
"--preinstall"
,
action
=
'store_true'
)
args
=
parser
.
parse_args
()
...
...
test/generate_ts_config.py
View file @
649a9c38
...
...
@@ -42,6 +42,21 @@ def update_training_service_config(args):
config
[
args
.
ts
][
'kubeflowConfig'
][
'azureStorage'
][
'azureShare'
]
=
args
.
azs_share
if
args
.
nni_docker_image
is
not
None
:
config
[
args
.
ts
][
'trial'
][
'worker'
][
'image'
]
=
args
.
nni_docker_image
elif
args
.
ts
==
'frameworkcontroller'
:
if
args
.
nfs_server
is
not
None
:
config
[
args
.
ts
][
'frameworkcontrollerConfig'
][
'nfs'
][
'server'
]
=
args
.
nfs_server
if
args
.
nfs_path
is
not
None
:
config
[
args
.
ts
][
'frameworkcontrollerConfig'
][
'nfs'
][
'path'
]
=
args
.
nfs_path
if
args
.
keyvault_vaultname
is
not
None
:
config
[
args
.
ts
][
'frameworkcontrollerConfig'
][
'keyVault'
][
'vaultName'
]
=
args
.
keyvault_vaultname
if
args
.
keyvault_name
is
not
None
:
config
[
args
.
ts
][
'frameworkcontrollerConfig'
][
'keyVault'
][
'name'
]
=
args
.
keyvault_name
if
args
.
azs_account
is
not
None
:
config
[
args
.
ts
][
'frameworkcontrollerConfig'
][
'azureStorage'
][
'accountName'
]
=
args
.
azs_account
if
args
.
azs_share
is
not
None
:
config
[
args
.
ts
][
'frameworkcontrollerConfig'
][
'azureStorage'
][
'azureShare'
]
=
args
.
azs_share
if
args
.
nni_docker_image
is
not
None
:
config
[
args
.
ts
][
'trial'
][
'taskRoles'
][
0
][
'image'
]
=
args
.
nni_docker_image
elif
args
.
ts
==
'remote'
:
if
args
.
remote_user
is
not
None
:
config
[
args
.
ts
][
'machineList'
][
0
][
'username'
]
=
args
.
remote_user
...
...
@@ -69,7 +84,7 @@ def convert_command():
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"--ts"
,
type
=
str
,
choices
=
[
'pai'
,
'kubeflow'
,
'remote'
,
'local'
],
default
=
'pai'
)
parser
.
add_argument
(
"--ts"
,
type
=
str
,
choices
=
[
'pai'
,
'kubeflow'
,
'remote'
,
'local'
,
'frameworkcontroller'
],
default
=
'pai'
)
parser
.
add_argument
(
"--nni_docker_image"
,
type
=
str
)
parser
.
add_argument
(
"--nni_manager_ip"
,
type
=
str
)
# args for PAI
...
...
@@ -79,7 +94,7 @@ if __name__ == '__main__':
parser
.
add_argument
(
"--data_dir"
,
type
=
str
)
parser
.
add_argument
(
"--output_dir"
,
type
=
str
)
parser
.
add_argument
(
"--vc"
,
type
=
str
)
# args for kubeflow
# args for kubeflow
and frameworkController
parser
.
add_argument
(
"--nfs_server"
,
type
=
str
)
parser
.
add_argument
(
"--nfs_path"
,
type
=
str
)
parser
.
add_argument
(
"--keyvault_vaultname"
,
type
=
str
)
...
...
test/pipelines-it-frameworkcontroller.yml
0 → 100644
View file @
649a9c38
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
jobs
:
-
job
:
'
integration_test_frameworkController'
timeoutInMinutes
:
0
steps
:
-
script
:
python3 -m pip install --upgrade pip setuptools --user
displayName
:
'
Install
python
tools'
-
script
:
|
cd deployment/pypi
echo 'building prerelease package...'
make build
ls $(Build.SourcesDirectory)/deployment/pypi/dist/
condition
:
eq( variables['build_docker_img'], 'true' )
displayName
:
'
build
nni
bdsit_wheel'
-
script
:
|
source install.sh
displayName
:
'
Install
nni
toolkit
via
source
code'
-
script
:
|
sudo apt-get install swig -y
PATH=$HOME/.local/bin:$PATH nnictl package install --name=SMAC
PATH=$HOME/.local/bin:$PATH nnictl package install --name=BOHB
displayName
:
'
Install
dependencies
for
integration
tests
in
frameworkcontroller
mode'
-
script
:
|
if [ $(build_docker_img) = 'true' ]
then
cd deployment/pypi
docker login -u $(docker_hub_user) -p $(docker_hub_pwd)
echo 'updating docker file for installing nni from local...'
# update Dockerfile to install NNI in docker image from whl file built in last step
sed -ie 's/RUN python3 -m pip --no-cache-dir install nni/COPY .\/dist\/* .\nRUN python3 -m pip install nni-*.whl/' ../docker/Dockerfile
cat ../docker/Dockerfile
export IMG_TAG=`date -u +%y%m%d%H%M`
docker build -f ../docker/Dockerfile -t $(test_docker_img_name):$IMG_TAG .
docker push $(test_docker_img_name):$IMG_TAG
export TEST_IMG=$(test_docker_img_name):$IMG_TAG
cd ../../
else
export TEST_IMG=$(existing_docker_img)
fi
echo "TEST_IMG:$TEST_IMG"
cd test
python3 generate_ts_config.py --ts frameworkcontroller --keyvault_vaultname $(keyVault_vaultName) --keyvault_name $(keyVault_name) \
--azs_account $(azureStorage_accountName) --azs_share $(azureStorage_azureShare) --nni_docker_image $TEST_IMG --nni_manager_ip $(nni_manager_ip)
cat training_service.yml
PATH=$HOME/.local/bin:$PATH python3 config_test.py --ts frameworkcontroller --exclude multi_phase
displayName
:
'
integration
test'
test/training_service.yml
View file @
649a9c38
...
...
@@ -24,6 +24,32 @@ kubeflow:
image
:
trainingServicePlatform
:
kubeflow
frameworkcontroller
:
maxExecDuration
:
15m
nniManagerIp
:
frameworkcontrollerConfig
:
serviceAccountName
:
frameworkbarrier
storage
:
azureStorage
keyVault
:
vaultName
:
name
:
azureStorage
:
accountName
:
azureShare
:
trial
:
taskRoles
:
-
name
:
worker
taskNum
:
1
command
:
gpuNum
:
1
cpuNum
:
1
memoryMB
:
8192
image
:
frameworkAttemptCompletionPolicy
:
minFailedTaskCount
:
1
minSucceededTaskCount
:
1
trainingServicePlatform
:
frameworkcontroller
local
:
trainingServicePlatform
:
local
pai
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment