Port trial examples' config file to v2 (#3721)

Co-authored-by: liuzhe <zhe.liu@microsoft.com>

Port trial examples' config file to v2 (#3721)
Co-authored-by: liuzhe <zhe.liu@microsoft.com>
eb65bc32 · liuzhe-lz · GitHub · c4d449c5 · c4d449c5 · c4d449c5
Unverified Commit eb65bc32 authored Jun 08, 2021 by liuzhe-lz Committed by GitHub Jun 08, 2021
20 changed files
--- a/examples/trials/mnist-annotation/config_gpu.yml
+++ b/examples/trials/mnist-annotation/config_gpu.yml
-authorName: default
-experimentName: example_mnist
-trialConcurrency: 4
-maxExecDuration: 1h
-maxTrialNum: 10
-#choice: local, remote, pai
-trainingServicePlatform: local
-#choice: true, false
-useAnnotation: true
-tuner:
-  #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner
-  #SMAC (SMAC should be installed through nnictl)
-  builtinTunerName: TPE
-  classArgs:
-    #choice: maximize, minimize
-    optimize_mode: maximize
-trial:
-  command: python3 mnist.py
-  codeDir: .
-  gpuNum: 1
--- a/examples/trials/mnist-annotation/config_kubeflow.yml
+++ b/examples/trials/mnist-annotation/config_kubeflow.yml
-authorName: default
-experimentName: example_dist
-trialConcurrency: 1
-maxExecDuration: 1h
-maxTrialNum: 1
-#choice: local, remote, pai, kubeflow
-trainingServicePlatform: kubeflow
-#choice: true, false
-useAnnotation: true
-tuner:
-  #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner
-  builtinTunerName: TPE
-  classArgs:
-    #choice: maximize, minimize
-    optimize_mode: maximize
-trial:
-  codeDir: .
-  worker:
-    replicas: 1
-    command: python3 mnist.py
-    gpuNum: 0
-    cpuNum: 1
-    memoryMB: 8192
-    image: msranni/nni:latest
-kubeflowConfig:
-  operator: tf-operator
-  apiVersion: v1alpha2
-  storage: nfs
-  nfs:
-    server: 10.10.10.10
-    path: /var/nfs/general
\ No newline at end of file
--- a/examples/trials/mnist-annotation/config_pai.yml
+++ b/examples/trials/mnist-annotation/config_pai.yml
-authorName: default
-experimentName: example_mnist
-trialConcurrency: 1
-maxExecDuration: 1h
-maxTrialNum: 10
-#choice: local, remote, pai
-trainingServicePlatform: pai
-#choice: true, false
-useAnnotation: true
-tuner:
-  #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner
-  #SMAC (SMAC should be installed through nnictl)
-  builtinTunerName: TPE
-  classArgs:
-    #choice: maximize, minimize
-    optimize_mode: maximize
-trial:
-  command: python3 mnist.py
-  codeDir: .
-  gpuNum: 0
-  cpuNum: 1
-  memoryMB: 8196
-  #The docker image to run nni job on pai
-  image: msranni/nni:latest
-  nniManagerNFSMountPath: /home/user/mnt
-  containerNFSMountPath: /mnt/data/user
-  paiStorageConfigName: confignfs-data
-paiConfig:
-  #The username to login pai
-  userName: username
-  #The token to login pai
-  token: token
-  #The host of restful server of pai
-  host: 10.10.10.10
\ No newline at end of file
--- a/examples/trials/mnist-annotation/config_remote.yml
+++ b/examples/trials/mnist-annotation/config_remote.yml
-authorName: default
-experimentName: example_mnist
-trialConcurrency: 1
-maxExecDuration: 1h
-maxTrialNum: 10
-#choice: local, remote, pai
-trainingServicePlatform: remote
-#choice: true, false
-useAnnotation: true
-tuner:
-  #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner
-  #SMAC (SMAC should be installed through nnictl)
-  builtinTunerName: TPE
-  classArgs:
-    #choice: maximize, minimize
-    optimize_mode: maximize
-trial:
-  command: python3 mnist.py
-  codeDir: .
-  gpuNum: 0
-#machineList can be empty if the platform is local
-machineList:
-  - ip: 10.1.1.1
-    username: bob
-    passwd: bob123
-    #port can be skip if using default ssh port 22
-    #port: 22
-  - ip: 10.1.1.2
-    username: bob
-    passwd: bob123
-  - ip: 10.1.1.3
-    username: bob
-    passwd: bob123
--- a/examples/trials/mnist-batch-tune-keras/config.yml
+++ b/examples/trials/mnist-batch-tune-keras/config.yml
-authorName: default
+searchSpaceFile: search_space.json
-experimentName: example_mnist-keras
+trialCommand: python3 mnist-keras.py
+trialGpuNumber: 0
 trialConcurrency: 1
-maxExecDuration: 1h
+maxTrialNumber: 10
-maxTrialNum: 10
+maxExperimentDuration: 1h
-#choice: local, remote, pai
-trainingServicePlatform: local
-searchSpacePath: search_space.json
-#choice: true, false
-useAnnotation: false
 tuner:
-  #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner
+  name: BatchTuner
-  #SMAC (SMAC should be installed through nnictl)
+trainingService:  # For other platforms, check mnist-pytorch example
-  builtinTunerName: BatchTuner
+  platform: local
-trial:
-  command: python3 mnist-keras.py
-  codeDir: .
-  gpuNum: 0
--- a/examples/trials/mnist-batch-tune-keras/config_pai.yml
+++ b/examples/trials/mnist-batch-tune-keras/config_pai.yml
-authorName: default
-experimentName: example_mnist-keras
-trialConcurrency: 1
-maxExecDuration: 1h
-maxTrialNum: 10
-#choice: local, remote, pai
-trainingServicePlatform: pai
-searchSpacePath: search_space.json
-#choice: true, false
-useAnnotation: false
-tuner:
-  #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner
-  #SMAC (SMAC should be installed through nnictl)
-  builtinTunerName: BatchTuner
-trial:
-  command: python3 mnist-keras.py
-  codeDir: .
-  gpuNum: 0
-  cpuNum: 1
-  memoryMB: 8196
-  #The docker image to run nni job on pai
-  image: msranni/nni:latest
-  nniManagerNFSMountPath: {replace_to_your_nfs_mount_path}
-  containerNFSMountPath: {replace_to_your_container_mount_path}
-  paiStorageConfigName: {replace_to_your_storage_config_name}
-paiConfig:
-  #The username to login pai
-  userName: username
-  #The token to login pai
-  token: token
-  #The host of restful server of pai
-  host: 10.10.10.10
--- a/examples/trials/mnist-distributed/config_kubeflow.yml
+++ b/examples/trials/mnist-distributed/config_kubeflow.yml
--- a/examples/trials/mnist-distributed/dist_mnist.py
+++ b/examples/trials/mnist-distributed/dist_mnist.py
--- a/examples/trials/mnist-distributed/search_space.json
+++ b/examples/trials/mnist-distributed/search_space.json
--- a/examples/trials/mnist-nested-search-space/config.yml
+++ b/examples/trials/mnist-nested-search-space/config.yml
-authorName: default
+searchSpaceFile: search_space.json
-experimentName: mnist-nested-search-space
+trialCommand: python3 mnist.py
+trialGpuNumber: 0
 trialConcurrency: 2
-maxExecDuration: 1h
+maxTrialNumber: 100
-maxTrialNum: 100
+maxExperimentDuration: 1h
-#choice: local, remote
-trainingServicePlatform: local
-searchSpacePath: search_space.json
-#choice: true, false
-useAnnotation: false
 tuner:
-  #choice: TPE, Random, Anneal, Evolution
+  name: TPE
-  builtinTunerName: TPE
  classArgs:
-    #choice: maximize, minimize
    optimize_mode: maximize
-trial:
+trainingService:  # For other platforms, check mnist-pytorch example
-  command: python3 mnist.py
+  platform: local
-  codeDir: .
+  useActiveGpu: false  # NOTE: Use "true" if you are using an OS with graphical interface (e.g. Windows 10, Ubuntu desktop)
-  gpuNum: 0
+                       # Check the doc for details: https://nni.readthedocs.io/en/latest/reference/experiment_config.html#useactivegpu
--- a/examples/trials/mnist-nested-search-space/search_space.json
+++ b/examples/trials/mnist-nested-search-space/search_space.json
--- a/examples/trials/mnist-pbt-tuner-pytorch/config.yml
+++ b/examples/trials/mnist-pbt-tuner-pytorch/config.yml
-authorName: default
+searchSpaceFile: search_space.json
-experimentName: example_mnist_pbt_tuner_pytorch
+trialCommand: python3 mnist.py
+trialGpuNumber: 1
 trialConcurrency: 3
-maxExecDuration: 2h
+maxTrialNumber: 100
-maxTrialNum: 100
+maxExperimentDuration: 2h
-#choice: local, remote, pai
-trainingServicePlatform: local
-searchSpacePath: search_space.json
-#choice: true, false
-useAnnotation: false
 tuner:
-#  codeDir: ~/nni/src/sdk/pynni/nni/pbt_tuner
+  name: PBTTuner
-#  classFileName: pbt_tuner.py
-#  className: PBTTuner
-  builtinTunerName: PBTTuner
  classArgs:
-    #choice: maximize, minimize
    optimize_mode: maximize
-trial:
+trainingService:  # For other platforms, check mnist-pytorch example
-  command: python3 mnist.py
+  platform: local
-  codeDir: .
+  useActiveGpu: false  # NOTE: Use "true" if you are using an OS with graphical interface (e.g. Windows 10, Ubuntu desktop)
-  gpuNum: 1
+                       # Check the doc for details: https://nni.readthedocs.io/en/latest/reference/experiment_config.html#useactivegpu
--- a/examples/trials/mnist-pytorch/.gitignore
+++ b/examples/trials/mnist-pytorch/.gitignore
+data
--- a/examples/trials/mnist-pytorch/config.yml
+++ b/examples/trials/mnist-pytorch/config.yml
-authorName: default
+# This is the minimal config file for an NNI experiment.
-experimentName: example_mnist_pytorch
+# Use "nnictl create --config config.yml" to launch this experiment.
+# Afterwards, you can check "config_detailed.yml" for more explanation.
+searchSpaceFile: search_space.json
+trialCommand: python3 mnist.py  # NOTE: change "python3" to "python" if you are using Windows
+trialGpuNumber: 0
 trialConcurrency: 1
-maxExecDuration: 1h
-maxTrialNum: 10
-#choice: local, remote, pai
-trainingServicePlatform: local
-searchSpacePath: search_space.json
-#choice: true, false
-useAnnotation: false
 tuner:
-  #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner, GPTuner
+  name: TPE
-  #SMAC (SMAC should be installed through nnictl)
-  builtinTunerName: TPE
  classArgs:
-    #choice: maximize, minimize
    optimize_mode: maximize
-trial:
+trainingService:
-  command: python3 mnist.py
+  platform: local
-  codeDir: .
-  gpuNum: 0
--- a/examples/trials/mnist-pytorch/config_aml.yml
+++ b/examples/trials/mnist-pytorch/config_aml.yml
-authorName: default
+searchSpaceFile: search_space.json
-experimentName: example_mnist_pytorch
+trialCommand: python3 mnist.py
 trialConcurrency: 1
-maxExecDuration: 1h
+maxTrialNumber: 10
-maxTrialNum: 10
-trainingServicePlatform: aml
-searchSpacePath: search_space.json
-#choice: true, false
-useAnnotation: false
 tuner:
-  #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner, GPTuner
+  name: TPE
-  #SMAC (SMAC should be installed through nnictl)
-  builtinTunerName: TPE
  classArgs:
-    #choice: maximize, minimize
    optimize_mode: maximize
-trial:
+trainingService:
-  command: python3 mnist.py
+  platform: aml
-  codeDir: .
+  dockerImage: msranni/nni
-  image: msranni/nni
+  subscriptionId: ${your subscription ID}
-amlConfig:
+  resourceGroup: ${your resource group}
-  subscriptionId: ${replace_to_your_subscriptionId}
+  workspaceName: ${your workspace name}
-  resourceGroup: ${replace_to_your_resourceGroup}
+  computeTarget: ${your compute target}
-  workspaceName: ${replace_to_your_workspaceName}
-  computeTarget: ${replace_to_your_computeTarget}
--- a/examples/trials/mnist-pytorch/config_detailed.yml
+++ b/examples/trials/mnist-pytorch/config_detailed.yml
+# This example shows more configurable fields comparing to the minimal "config.yml"
+# You can use "nnictl create --config config_detailed.yml" to launch this experiment.
+# If you see an error message saying "port 8080 is used", use "nnictl stop --all" to stop previous experiments.
+name: MNIST                     # An optional name to help you distinguish experiments.
+# Hyper-parameter search space can either be configured here or in a seperate file.
+# "config.yml" shows how to specify a seperate search space file.
+# The common schema of search space is documented here:
+#   https://nni.readthedocs.io/en/stable/Tutorial/SearchSpaceSpec.html
+searchSpace:
+  batch_size:
+    _type: choice
+    _value: [16, 32, 64, 128]
+  hidden_size:
+    _type: choice
+    _value: [128, 256, 512, 1024]
+  lr:
+    _type: choice
+    _value: [0.0001, 0.001, 0.01, 0.1]
+  momentum:
+    _type: uniform
+    _value: [0, 1]
+trialCommand: python3 mnist.py  # The command to launch a trial. NOTE: change "python3" to "python" if you are using Windows.
+trialCodeDirectory: .           # The path of trial code. By default it's ".", which means the same directory of this config file.
+trialGpuNumber: 1               # How many GPUs should each trial use. CUDA is required when it's greater than zero.
+trialConcurrency: 4             # Run 4 trials concurrently.
+maxTrialNumber: 10              # Generate at most 10 trials.
+maxExperimentDuration: 1h       # Stop generating trials after 1 hour.
+tuner:                          # Configure the tuning alogrithm.
+  name: TPE                     # Supported algorithms: TPE, Random, Anneal, Evolution, GridSearch, GPTuner, PBTTuner, etc.
+                                #   Full list:  https://nni.readthedocs.io/en/latest/Tuner/BuiltinTuner.html
+  classArgs:                    # Algorithm specific arguments. See the tuner's doc for details.
+    optimize_mode: maximize     #   "minimize" or "maximize"
+# Configure the training platform.
+# Supported platforms: local, remote, openpai, aml, kubeflow, kubernetes, adl.
+trainingService:
+  platform: local
+  useActiveGpu: false           # NOTE: Use "true" if you are using an OS with graphical interface (e.g. Windows 10, Ubuntu desktop)
+                                #   Reason and details:  https://nni.readthedocs.io/en/latest/reference/experiment_config.html#useactivegpu
--- a/examples/trials/mnist-pytorch/config_hybrid.yml
+++ b/examples/trials/mnist-pytorch/config_hybrid.yml
+searchSpaceFile: search_space.json
+trialCommand: python3 mnist.py
+trialGpuNumber: 0
+trialConcurrency: 5
+maxTrialNumber: 20
+tuner:
+  name: TPE
+  classArgs:
+    optimize_mode: maximize
+# For local, remote, openpai, and aml, NNI can use multiple training services at one time
+trainingService:
+  - platform: local
+  - platform: remote
+    machineList:
+      - host: ${your server's IP or domain name}
+        user: ${your user name}
+        ssh_key_file: ~/.ssh/id_rsa
+  - platform: aml
+    dockerImage: msranni/nni
+    subscriptionId: ${your subscription ID}
+    resourceGroup: ${your resource group}
+    workspaceName: ${your workspace name}
+    computeTarget: ${your compute target}
--- a/examples/trials/mnist-pytorch/config_openpai.yml
+++ b/examples/trials/mnist-pytorch/config_openpai.yml
+searchSpaceFile: search_space.json
+trialCommand: python3 mnist.py
+trialGpuNumber: 0
+trialConcurrency: 1
+maxTrialNumber: 10
+tuner:
+  name: TPE
+  classArgs:
+    optimize_mode: maximize
+trainingService:
+  platform: openpai
+  host: http://123.123.123.123
+  username: ${your user name}
+  token: ${your token}
+  dockerImage: msranni/nni
+  trialCpuNumber: 1
+  trialMemorySize: 8GB
+  storageConfigName: ${your storage config name}
+  localStorageMountPoint: ${NFS mount point on local machine}
+  containerStorageMountPoint: ${NFS mount point inside Docker container}
--- a/examples/trials/mnist-pytorch/config_pai.yml
+++ b/examples/trials/mnist-pytorch/config_pai.yml
-authorName: default
-experimentName: example_mnist_pytorch
-trialConcurrency: 1
-maxExecDuration: 1h
-maxTrialNum: 10
-#choice: local, remote, pai
-trainingServicePlatform: pai
-searchSpacePath: search_space.json
-#choice: true, false
-useAnnotation: false
-tuner:
-  #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner, GPTuner
-  #SMAC (SMAC should be installed through nnictl)
-  builtinTunerName: TPE
-  classArgs:
-    #choice: maximize, minimize
-    optimize_mode: maximize
-trial:
-  command: python3 mnist.py
-  codeDir: .
-  gpuNum: 0
-  cpuNum: 1
-  memoryMB: 8196
-  #The docker image to run nni job on pai
-  image: msranni/nni:latest
-  nniManagerNFSMountPath: {replace_to_your_nfs_mount_path}
-  containerNFSMountPath: {replace_to_your_container_mount_path}
-  paiStorageConfigName: {replace_to_your_storage_config_name}
-paiConfig:
-  #The username to login pai
-  userName: username
-  #The token to login pai
-  token: token
-  #The host of restful server of pai
-  host: 10.10.10.10
\ No newline at end of file
--- a/examples/trials/mnist-pytorch/config_remote.yml
+++ b/examples/trials/mnist-pytorch/config_remote.yml
+searchSpaceFile: search_space.json
+trialCommand: python3 mnist.py
+trialGpuNumber: 0
+trialConcurrency: 4
+maxTrialNumber: 20
+tuner:
+  name: TPE
+  classArgs:
+    optimize_mode: maximize
+trainingService:
+  platform: remote
+  machineList:
+    - host: ${your server's IP or domain name}
+      user: ${your user name}
+      ssh_key_file: ~/.ssh/id_rsa  # We recommend public key over password, it's more secure and convenient.
+    # You can specify more than one SSH servers:
+    - host: 123.123.123.123
+      port: 10022
+      user: nniuser
+      password: 12345
+      pythonPath: /usr/bin  # Other examples:
+                            #   /opt/python3.9/bin
+                            #   C:/Python39
+                            #   C:/Users/USERNAME/.conda/envs/ENVNAME;C:/Users/USERNAME/.conda/envs/ENVNAME/Scripts;C:/Users/USERNAME/.conda/envs/ENVNAME/Library/bin