Unverified Commit e8d253c0 authored by SparkSnail's avatar SparkSnail Committed by GitHub
Browse files

Fix pai examples (#1996)

parent 649a9c38
......@@ -37,6 +37,7 @@ trial:
virtualCluster: default
nniManagerNFSMountPath: /home/user/mnt
containerNFSMountPath: /mnt/data/user
paiStoragePlugin: team_wise
# Configuration to access OpenPAI Cluster
paiConfig:
userName: your_pai_nni_user
......
......@@ -6,7 +6,7 @@ The original `pai` mode is modificated to `paiYarn` mode, which is a distributed
Install NNI, follow the install guide [here](../Tutorial/QuickStart.md).
## Run an experiment
Use `examples/trials/mnist-annotation` as an example. The NNI config YAML file's content is like:
Use `examples/trials/mnist-tfv1` as an example. The NNI config YAML file's content is like:
```yaml
authorName: your_name
......@@ -22,14 +22,14 @@ trainingServicePlatform: paiYarn
# search space file
searchSpacePath: search_space.json
# choice: true, false
useAnnotation: true
useAnnotation: false
tuner:
builtinTunerName: TPE
classArgs:
optimize_mode: maximize
trial:
command: python3 mnist.py
codeDir: ~/nni/examples/trials/mnist-annotation
codeDir: ~/nni/examples/trials/mnist-tfv1
gpuNum: 0
cpuNum: 1
memoryMB: 8196
......
......@@ -23,10 +23,13 @@ trial:
memoryMB: 8196
#The docker image to run nni job on pai
image: msranni/nni:latest
nniManagerNFSMountPath: /home/user/mnt
containerNFSMountPath: /mnt/data/user
paiStoragePlugin: team_wise
paiConfig:
#The username to login pai
userName: username
#The password to login pai
passWord: password
#The token to login pai
token: token
#The host of restful server of pai
host: 10.10.10.10
\ No newline at end of file
authorName: default
experimentName: example_auto-gbdt
trialConcurrency: 1
maxExecDuration: 10h
maxTrialNum: 10
#choice: local, remote, pai
trainingServicePlatform: paiYarn
searchSpacePath: search_space.json
#choice: true, false
useAnnotation: false
tuner:
#choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner, GPTuner
#SMAC (SMAC should be installed through nnictl)
builtinTunerName: TPE
classArgs:
#choice: maximize, minimize
optimize_mode: minimize
trial:
command: python3 main.py
codeDir: .
gpuNum: 0
cpuNum: 1
memoryMB: 8196
#The docker image to run nni job on pai
image: msranni/nni:latest
paiYarnConfig:
#The username to login pai
userName: username
#The password to login pai
passWord: password
#The host of restful server of pai
host: 10.10.10.10
\ No newline at end of file
......@@ -23,10 +23,13 @@ trial:
memoryMB: 8196
#The docker image to run nni job on pai
image: msranni/nni:latest
nniManagerNFSMountPath: /home/user/mnt
containerNFSMountPath: /mnt/data/user
paiStoragePlugin: team_wise
paiConfig:
#The username to login pai
userName: username
#The password to login pai
passWord: password
#The token to login pai
token: token
#The host of restful server of pai
host: 10.10.10.10
authorName: default
experimentName: example_pytorch_cifar10
trialConcurrency: 1
maxExecDuration: 100h
maxTrialNum: 10
#choice: local, remote, pai
trainingServicePlatform: paiYarn
searchSpacePath: search_space.json
#choice: true, false
useAnnotation: false
tuner:
#choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner
#SMAC (SMAC should be installed through nnictl)
builtinTunerName: TPE
classArgs:
#choice: maximize, minimize
optimize_mode: maximize
trial:
command: python3 main.py
codeDir: .
gpuNum: 1
cpuNum: 1
memoryMB: 8196
#The docker image to run nni job on pai
image: msranni/nni:latest
paiYarnConfig:
#The username to login pai
userName: username
#The password to login pai
passWord: password
#The host of restful server of pai
host: 10.10.10.10
......@@ -21,8 +21,11 @@ trial:
gpuNum: 1
virtualCluster: nni
image: msranni/nni:latest
nniManagerNFSMountPath: /home/user/mnt
containerNFSMountPath: /mnt/data/user
paiStoragePlugin: team_wise
nniManagerIp: <nni_manager_ip>
paiConfig:
userName: <username>
passWord: <password>
token: <token>
host: <host>
authorName: unknown
experimentName: example_efficient_net
trialConcurrency: 8
maxExecDuration: 48h
maxTrialNum: 100
trainingServicePlatform: paiYarn
searchSpacePath: search_net.json
useAnnotation: false
tuner:
codeDir: .
classFileName: tuner.py
className: FixedProductTuner
classArgs:
product: 2
trial:
codeDir: EfficientNet-PyTorch
command: sh train_imagenet.sh
cpuNum: 4
memoryMB: 25000
shmMB: 25000
gpuNum: 1
virtualCluster: nni
image: msranni/nni:latest
nniManagerIp: <nni_manager_ip>
paiYarnConfig:
userName: <username>
passWord: <password>
host: <host>
......@@ -23,10 +23,13 @@ trial:
memoryMB: 32869
#The docker image to run nni job on pai
image: msranni/nni:latest
nniManagerNFSMountPath: /home/user/mnt
containerNFSMountPath: /mnt/data/user
paiStoragePlugin: team_wise
paiConfig:
#The username to login pai
userName: username
#The password to login pai
passWord: password
#The token to login pai
token: token
#The host of restful server of pai
host: 10.10.10.10
authorName: default
experimentName: example_ga_squad
trialConcurrency: 1
maxExecDuration: 1h
maxTrialNum: 10
#choice: local, remote, pai
trainingServicePlatform: paiYarn
#choice: true, false
useAnnotation: false
#Your nni_manager ip
nniManagerIp: 10.10.10.10
tuner:
codeDir: ../../tuners/ga_customer_tuner
classFileName: customer_tuner.py
className: CustomerTuner
classArgs:
optimize_mode: maximize
trial:
command: chmod +x ./download.sh && ./download.sh && python3 trial.py
codeDir: .
gpuNum: 0
cpuNum: 1
memoryMB: 32869
#The docker image to run nni job on pai
image: msranni/nni:latest
paiYarnConfig:
#The username to login pai
userName: username
#The password to login pai
passWord: password
#The host of restful server of pai
host: 10.10.10.10
......@@ -27,10 +27,13 @@ trial:
memoryMB: 8196
#The docker image to run nni job on pai
image: msranni/nni:latest
nniManagerNFSMountPath: /home/user/mnt
containerNFSMountPath: /mnt/data/user
paiStoragePlugin: team_wise
paiConfig:
#The username to login pai
userName: username
#The password to login pai
passWord: password
#The token to login pai
token: token
#The host of restful server of pai
host: 10.10.10.10
authorName: default
experimentName: example_mnist_hyperband
maxExecDuration: 1h
maxTrialNum: 10000
trialConcurrency: 10
#choice: local, remote, pai
trainingServicePlatform: paiYarn
searchSpacePath: search_space.json
#choice: true, false
useAnnotation: false
advisor:
#choice: Hyperband, BOHB
#(BOHB should be installed through nnictl)
builtinAdvisorName: Hyperband
classArgs:
#R: the maximum trial budget
R: 100
#eta: proportion of discarded trials
eta: 3
#choice: maximize, minimize
optimize_mode: maximize
trial:
command: python3 mnist.py
codeDir: .
gpuNum: 0
cpuNum: 1
memoryMB: 8196
#The docker image to run nni job on pai
image: msranni/nni:latest
paiYarnConfig:
#The username to login pai
userName: username
#The password to login pai
passWord: password
#The host of restful server of pai
host: 10.10.10.10
......@@ -22,10 +22,13 @@ trial:
memoryMB: 8196
#The docker image to run nni job on pai
image: msranni/nni:latest
nniManagerNFSMountPath: /home/user/mnt
containerNFSMountPath: /mnt/data/user
paiStoragePlugin: team_wise
paiConfig:
#The username to login pai
userName: username
#The password to login pai
passWord: password
#The token to login pai
token: token
#The host of restful server of pai
host: 10.10.10.10
\ No newline at end of file
authorName: default
experimentName: example_mnist
trialConcurrency: 1
maxExecDuration: 1h
maxTrialNum: 10
#choice: local, remote, pai
trainingServicePlatform: paiYarn
#choice: true, false
useAnnotation: true
tuner:
#choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner
#SMAC (SMAC should be installed through nnictl)
builtinTunerName: TPE
classArgs:
#choice: maximize, minimize
optimize_mode: maximize
trial:
command: python3 mnist.py
codeDir: .
gpuNum: 0
cpuNum: 1
memoryMB: 8196
#The docker image to run nni job on pai
image: msranni/nni:latest
paiYarnConfig:
#The username to login pai
userName: username
#The password to login pai
passWord: password
#The host of restful server of pai
host: 10.10.10.10
\ No newline at end of file
......@@ -20,10 +20,13 @@ trial:
memoryMB: 8196
#The docker image to run nni job on pai
image: msranni/nni:latest
nniManagerNFSMountPath: /home/user/mnt
containerNFSMountPath: /mnt/data/user
paiStoragePlugin: team_wise
paiConfig:
#The username to login pai
userName: username
#The password to login pai
passWord: password
#The token to login pai
token: token
#The host of restful server of pai
host: 10.10.10.10
authorName: default
experimentName: example_mnist-keras
trialConcurrency: 1
maxExecDuration: 1h
maxTrialNum: 10
#choice: local, remote, pai
trainingServicePlatform: paiYarn
searchSpacePath: search_space.json
#choice: true, false
useAnnotation: false
tuner:
#choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner
#SMAC (SMAC should be installed through nnictl)
builtinTunerName: BatchTuner
trial:
command: python3 mnist-keras.py
codeDir: .
gpuNum: 0
cpuNum: 1
memoryMB: 8196
#The docker image to run nni job on pai
image: msranni/nni:latest
paiYarnConfig:
#The username to login pai
userName: username
#The password to login pai
passWord: password
#The host of restful server of pai
host: 10.10.10.10
......@@ -23,10 +23,13 @@ trial:
memoryMB: 8196
#The docker image to run nni job on pai
image: msranni/nni:latest
nniManagerNFSMountPath: /home/user/mnt
containerNFSMountPath: /mnt/data/user
paiStoragePlugin: team_wise
paiConfig:
#The username to login pai
userName: username
#The password to login pai
passWord: password
#The token to login pai
token: token
#The host of restful server of pai
host: 10.10.10.10
\ No newline at end of file
authorName: default
experimentName: example_mnist-keras
trialConcurrency: 1
maxExecDuration: 1h
maxTrialNum: 10
#choice: local, remote, pai
trainingServicePlatform: paiYarn
searchSpacePath: search_space.json
#choice: true, false
useAnnotation: false
tuner:
#choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner
#SMAC (SMAC should be installed through nnictl)
builtinTunerName: TPE
classArgs:
#choice: maximize, minimize
optimize_mode: maximize
trial:
command: python3 mnist-keras.py
codeDir: .
gpuNum: 0
cpuNum: 1
memoryMB: 8196
#The docker image to run nni job on pai
image: msranni/nni:latest
paiYarnConfig:
#The username to login pai
userName: username
#The password to login pai
passWord: password
#The host of restful server of pai
host: 10.10.10.10
\ No newline at end of file
......@@ -23,10 +23,13 @@ trial:
memoryMB: 8196
#The docker image to run nni job on pai
image: msranni/nni:latest
nniManagerNFSMountPath: /home/user/mnt
containerNFSMountPath: /mnt/data/user
paiStoragePlugin: team_wise
paiConfig:
#The username to login pai
userName: username
#The password to login pai
passWord: password
#The token to login pai
token: token
#The host of restful server of pai
host: 10.10.10.10
\ No newline at end of file
authorName: default
experimentName: example_mnist_pytorch
trialConcurrency: 1
maxExecDuration: 1h
maxTrialNum: 10
#choice: local, remote, pai
trainingServicePlatform: paiYarn
searchSpacePath: search_space.json
#choice: true, false
useAnnotation: false
tuner:
#choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner, GPTuner
#SMAC (SMAC should be installed through nnictl)
builtinTunerName: TPE
classArgs:
#choice: maximize, minimize
optimize_mode: maximize
trial:
command: python3 mnist.py
codeDir: .
gpuNum: 0
cpuNum: 1
memoryMB: 8196
#The docker image to run nni job on pai
image: msranni/nni:latest
paiYarnConfig:
#The username to login pai
userName: username
#The password to login pai
passWord: password
#The host of restful server of pai
host: 10.10.10.10
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment