Merge pull request #3302 from microsoft/v2.0-merge

Merge branch v2.0 into master (no squash)

Merge pull request #3302 from microsoft/v2.0-merge
Merge branch v2.0 into master (no squash)
4784cc6c · liuzhe-lz · GitHub · 25db55ca · 349ead41 · 4784cc6c
Unverified Commit 4784cc6c authored Jan 14, 2021 by liuzhe-lz Committed by GitHub Jan 14, 2021
20 changed files
--- a/examples/nas/search_space_zoo/nasbench201.py
+++ b/examples/nas/search_space_zoo/nasbench201.py
@@ -10,13 +10,13 @@ import torch
 import torch.nn as nn
 import torch.optim as optim
 from torch.utils.data import DataLoader
-from nni.nas.pytorch import enas
+from nni.algorithms.nas.pytorch.darts import DartsTrainer
+from nni.algorithms.nas.pytorch import enas
 from nni.nas.pytorch.utils import AverageMeterGroup
 from nni.nas.pytorch.nasbench201 import NASBench201Cell
 from nni.nas.pytorch.fixed import apply_fixed_architecture
 from nni.nas.benchmarks.nasbench201 import query_nb201_trial_stats
 from nni.nas.pytorch.callbacks import ArchitectureCheckpoint, LRSchedulerCallback
-from nni.nas.pytorch.darts import DartsTrainer
 from utils import accuracy, reward_accuracy
 import datasets

--- a/examples/nas/spos/utils.py
+++ b/examples/nas/spos/utils.py
@@ -36,6 +36,6 @@ def accuracy(output, target, topk=(1, 5)):
    res = dict()
    for k in topk:
-        correct_k = correct[:k].view(-1).float().sum(0)
+        correct_k = correct[:k].reshape(-1).float().sum(0)
        res["acc{}".format(k)] = correct_k.mul_(1.0 / batch_size).item()
    return res
--- a/examples/nas/textnas/run_retrain.sh
+++ b/examples/nas/textnas/run_retrain.sh
@@ -2,7 +2,6 @@
 # Licensed under the MIT license.
 export PYTHONPATH="$(pwd)"
-export CUDA_VISIBLE_DEVICES=0
 python3 -u retrain.py \
  --train_ratio=1.0 \

--- a/examples/nas/textnas/utils.py
+++ b/examples/nas/textnas/utils.py
@@ -14,7 +14,7 @@ logger = logging.getLogger("nni.textnas")
 def get_length(mask):
    length = torch.sum(mask, 1)
-    length = length.long()
+    length = length.long().cpu()
    return length

--- a/examples/trials/auto-gbdt/config_paiYarn.yml
+++ b/examples/trials/auto-gbdt/config_paiYarn.yml
-authorName: default
-experimentName: example_auto-gbdt
-trialConcurrency: 1
-maxExecDuration: 10h
-maxTrialNum: 10
-#choice: local, remote, pai
-trainingServicePlatform: paiYarn
-searchSpacePath: search_space.json
-#choice: true, false
-useAnnotation: false
-tuner:
-  #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner, GPTuner
-  #SMAC (SMAC should be installed through nnictl)
-  builtinTunerName: TPE
-  classArgs:
-    #choice: maximize, minimize
-    optimize_mode: minimize
-trial:
-  command: python3 main.py
-  codeDir: .
-  gpuNum: 0
-  cpuNum: 1
-  memoryMB: 8196
-  #The docker image to run nni job on pai
-  image: msranni/nni:latest
-paiYarnConfig:
-  #The username to login pai
-  userName: username
-  #The password to login pai
-  passWord: password
-  #The host of restful server of pai
-  host: 10.10.10.10
\ No newline at end of file
--- a/examples/trials/cifar10_pytorch/config_paiYarn.yml
+++ b/examples/trials/cifar10_pytorch/config_paiYarn.yml
-authorName: default
-experimentName: example_pytorch_cifar10
-trialConcurrency: 1
-maxExecDuration: 100h
-maxTrialNum: 10
-#choice: local, remote, pai
-trainingServicePlatform: paiYarn
-searchSpacePath: search_space.json
-#choice: true, false
-useAnnotation: false
-tuner:
-  #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner
-  #SMAC (SMAC should be installed through nnictl)
-  builtinTunerName: TPE
-  classArgs:
-    #choice: maximize, minimize
-    optimize_mode: maximize
-trial:
-  command: python3 main.py
-  codeDir: .
-  gpuNum: 1
-  cpuNum: 1
-  memoryMB: 8196
-  #The docker image to run nni job on pai
-  image: msranni/nni:latest
-paiYarnConfig:
-  #The username to login pai
-  userName: username
-  #The password to login pai
-  passWord: password
-  #The host of restful server of pai
-  host: 10.10.10.10
--- a/examples/trials/efficientnet/config_paiYarn.yml
+++ b/examples/trials/efficientnet/config_paiYarn.yml
-authorName: unknown
-experimentName: example_efficient_net
-trialConcurrency: 8
-maxExecDuration: 48h
-maxTrialNum: 100
-trainingServicePlatform: paiYarn
-searchSpacePath: search_net.json
-useAnnotation: false
-tuner:
-  codeDir: .
-  classFileName: tuner.py
-  className: FixedProductTuner
-  classArgs:
-    product: 2
-trial:
-  codeDir: EfficientNet-PyTorch
-  command: sh train_imagenet.sh
-  cpuNum: 4
-  memoryMB: 25000
-  shmMB: 25000
-  gpuNum: 1
-  virtualCluster: nni
-  image: msranni/nni:latest
-nniManagerIp: <nni_manager_ip>
-paiYarnConfig:
-  userName: <username>
-  passWord: <password>
-  host: <host>
--- a/examples/trials/ga_squad/config_paiYarn.yml
+++ b/examples/trials/ga_squad/config_paiYarn.yml
-authorName: default
-experimentName: example_ga_squad
-trialConcurrency: 1
-maxExecDuration: 1h
-maxTrialNum: 10
-#choice: local, remote, pai
-trainingServicePlatform: paiYarn
-#choice: true, false
-useAnnotation: false
-#Your nni_manager ip
-nniManagerIp: 10.10.10.10
-tuner:
-  codeDir: ../../tuners/ga_customer_tuner
-  classFileName: customer_tuner.py
-  className: CustomerTuner
-  classArgs:
-    optimize_mode: maximize
-trial:
-  command: chmod +x ./download.sh && ./download.sh && python3 trial.py
-  codeDir: .
-  gpuNum: 0
-  cpuNum: 1
-  memoryMB: 32869
-  #The docker image to run nni job on pai
-  image: msranni/nni:latest
-paiYarnConfig:
-  #The username to login pai
-  userName: username
-  #The password to login pai
-  passWord: password
-  #The host of restful server of pai
-  host: 10.10.10.10
--- a/examples/trials/mnist-advisor/config_paiYarn.yml
+++ b/examples/trials/mnist-advisor/config_paiYarn.yml
-authorName: default
-experimentName: example_mnist_hyperband
-maxExecDuration: 1h
-maxTrialNum: 10000
-trialConcurrency: 10
-#choice: local, remote, pai
-trainingServicePlatform: paiYarn
-searchSpacePath: search_space.json
-#choice: true, false
-useAnnotation: false
-advisor:
-  #choice: Hyperband, BOHB
-  #(BOHB should be installed through nnictl)
-  builtinAdvisorName: Hyperband
-  classArgs:
-    #R: the maximum trial budget
-    R: 100
-    #eta: proportion of discarded trials
-    eta: 3
-    #choice: maximize, minimize
-    optimize_mode: maximize
-    #choice: serial, parallelism
-    exec_mode: parallelism
-trial:
-  command: python3 mnist.py
-  codeDir: .
-  gpuNum: 0
-  cpuNum: 1
-  memoryMB: 8196
-  #The docker image to run nni job on pai
-  image: msranni/nni:latest
-paiYarnConfig:
-  #The username to login pai
-  userName: username
-  #The password to login pai
-  passWord: password
-  #The host of restful server of pai
-  host: 10.10.10.10
--- a/examples/trials/mnist-annotation/config_paiYarn.yml
+++ b/examples/trials/mnist-annotation/config_paiYarn.yml
-authorName: default
-experimentName: example_mnist
-trialConcurrency: 1
-maxExecDuration: 1h
-maxTrialNum: 10
-#choice: local, remote, pai
-trainingServicePlatform: paiYarn
-#choice: true, false
-useAnnotation: true
-tuner:
-  #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner
-  #SMAC (SMAC should be installed through nnictl)
-  builtinTunerName: TPE
-  classArgs:
-    #choice: maximize, minimize
-    optimize_mode: maximize
-trial:
-  command: python3 mnist.py
-  codeDir: .
-  gpuNum: 0
-  cpuNum: 1
-  memoryMB: 8196
-  #The docker image to run nni job on pai
-  image: msranni/nni:latest
-paiYarnConfig:
-  #The username to login pai
-  userName: username
-  #The password to login pai
-  passWord: password
-  #The host of restful server of pai
-  host: 10.10.10.10
\ No newline at end of file
--- a/examples/trials/mnist-batch-tune-keras/config_paiYarn.yml
+++ b/examples/trials/mnist-batch-tune-keras/config_paiYarn.yml
-authorName: default
-experimentName: example_mnist-keras
-trialConcurrency: 1
-maxExecDuration: 1h
-maxTrialNum: 10
-#choice: local, remote, pai
-trainingServicePlatform: paiYarn
-searchSpacePath: search_space.json
-#choice: true, false
-useAnnotation: false
-tuner:
-  #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner
-  #SMAC (SMAC should be installed through nnictl)
-  builtinTunerName: BatchTuner
-trial:
-  command: python3 mnist-keras.py
-  codeDir: .
-  gpuNum: 0
-  cpuNum: 1
-  memoryMB: 8196
-  #The docker image to run nni job on pai
-  image: msranni/nni:latest
-paiYarnConfig:
-  #The username to login pai
-  userName: username
-  #The password to login pai
-  passWord: password
-  #The host of restful server of pai
-  host: 10.10.10.10
--- a/examples/trials/mnist-keras/config_paiYarn.yml
+++ b/examples/trials/mnist-keras/config_paiYarn.yml
-authorName: default
-experimentName: example_mnist-keras
-trialConcurrency: 1
-maxExecDuration: 1h
-maxTrialNum: 10
-#choice: local, remote, pai
-trainingServicePlatform: paiYarn
-searchSpacePath: search_space.json
-#choice: true, false
-useAnnotation: false
-tuner:
-  #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner
-  #SMAC (SMAC should be installed through nnictl)
-  builtinTunerName: TPE
-  classArgs:
-    #choice: maximize, minimize
-    optimize_mode: maximize
-trial:
-  command: python3 mnist-keras.py
-  codeDir: .
-  gpuNum: 0
-  cpuNum: 1
-  memoryMB: 8196
-  #The docker image to run nni job on pai
-  image: msranni/nni:latest
-paiYarnConfig:
-  #The username to login pai
-  userName: username
-  #The password to login pai
-  passWord: password
-  #The host of restful server of pai
-  host: 10.10.10.10
\ No newline at end of file
--- a/examples/trials/mnist-pytorch/config_paiYarn.yml
+++ b/examples/trials/mnist-pytorch/config_paiYarn.yml
-authorName: default
-experimentName: example_mnist_pytorch
-trialConcurrency: 1
-maxExecDuration: 1h
-maxTrialNum: 10
-#choice: local, remote, pai
-trainingServicePlatform: paiYarn
-searchSpacePath: search_space.json
-#choice: true, false
-useAnnotation: false
-tuner:
-  #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner, GPTuner
-  #SMAC (SMAC should be installed through nnictl)
-  builtinTunerName: TPE
-  classArgs:
-    #choice: maximize, minimize
-    optimize_mode: maximize
-trial:
-  command: python3 mnist.py
-  codeDir: .
-  gpuNum: 0
-  cpuNum: 1
-  memoryMB: 8196
-  #The docker image to run nni job on pai
-  image: msranni/nni:latest
-paiYarnConfig:
-  #The username to login pai
-  userName: username
-  #The password to login pai
-  passWord: password
-  #The host of restful server of pai
-  host: 10.10.10.10
\ No newline at end of file
--- a/examples/trials/mnist-pytorch/config_v2.yml
+++ b/examples/trials/mnist-pytorch/config_v2.yml
+searchSpace:
+  momentum:
+    _type: uniform
+    _value: [0, 1]
+  hidden_size:
+    _type: choice
+    _value: [128, 256, 512, 1024]
+  batch_size:
+    _type: choice
+    _value: [16, 32, 64, 128]
+  lr:
+    _type: choice
+    _value: [0.0001, 0.001, 0.01, 0.1]
+trainingService:
+  platform: local
+trialCodeDirectory: .
+trialCommand: python3 mnist.py
+trialConcurrency: 1
+trialGpuNumber: 0
+tuner:
+  name: TPE
+  classArgs:
+    optimize_mode: maximize
--- a/examples/trials/mnist-tfv1/config_heterogeneous.yml
+++ b/examples/trials/mnist-tfv1/config_heterogeneous.yml
@@ -3,7 +3,7 @@ experimentName: example_mnist
 trialConcurrency: 3
 maxExecDuration: 1h
 maxTrialNum: 10
-trainingServicePlatform: heterogeneous
+trainingServicePlatform: hybrid
 searchSpacePath: search_space.json
 #choice: true, false
 useAnnotation: false
@@ -18,7 +18,7 @@ trial:
  command: python3 mnist.py
  codeDir: .
  gpuNum: 0
-heterogeneousConfig:
+hybridConfig:
  trainingServicePlatforms:
    - local
    - remote
@@ -26,7 +26,6 @@ remoteConfig:
  reuse: true
 machineList:
  - ip: 10.1.1.1
-    username: bob
+    username: xxx
-    passwd: bob123
+    passwd: xxx
-    #port can be skip if using default ssh port 22
+    port: 22
-    #port: 22
\ No newline at end of file
--- a/examples/trials/mnist-tfv1/config_hybrid_v2.yml
+++ b/examples/trials/mnist-tfv1/config_hybrid_v2.yml
+experimentName: example_mnist
+trialConcurrency: 3
+maxExperimentDuration: 1h
+maxTrialNumber: 10
+searchSpaceFile: search_space.json
+trialCodeDirectory: .
+trialCommand: python3 mnist.py
+trialGpuNumber: 0
+tuner:
+  name: TPE
+  classArgs:
+    optimize_mode: maximize
+trainingService:
+  - platform: local
+  - platform: remote
+    reuseMode: true
+    machineList:
+      - host: 10.1.1.1
+        user: xxx
+        password: xxx
+        #port can be skip if using default ssh port 22
+        port: 22
--- a/examples/trials/mnist-tfv1/config_paiYarn.yml
+++ b/examples/trials/mnist-tfv1/config_paiYarn.yml
-authorName: default
-experimentName: example_mnist
-trialConcurrency: 1
-maxExecDuration: 1h
-maxTrialNum: 10
-#choice: local, remote, pai
-trainingServicePlatform: paiYarn
-searchSpacePath: search_space.json
-#choice: true, false
-useAnnotation: false
-tuner:
-  #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner, GPTuner
-  #SMAC (SMAC should be installed through nnictl)
-  builtinTunerName: TPE
-  classArgs:
-    #choice: maximize, minimize
-    optimize_mode: maximize
-trial:
-  command: python3 mnist.py
-  codeDir: .
-  gpuNum: 0
-  cpuNum: 1
-  memoryMB: 8196
-  #The docker image to run nni job on pai
-  image: msranni/nni:latest
-paiYarnConfig:
-  #The username to login pai
-  userName: username
-  #The password to login pai
-  passWord: password
-  #The host of restful server of pai
-  host: 10.10.10.10
\ No newline at end of file
--- a/examples/trials/mnist-tfv1/launch_hybrid.py
+++ b/examples/trials/mnist-tfv1/launch_hybrid.py
+# FIXME: For demonstration only. It should not be here
+from pathlib import Path
+from nni.experiment import Experiment
+from nni.experiment import RemoteMachineConfig
+from nni.algorithms.hpo.hyperopt_tuner import HyperoptTuner
+tuner = HyperoptTuner('tpe')
+search_space = {
+    "dropout_rate": { "_type": "uniform", "_value": [0.5, 0.9] },
+    "conv_size": { "_type": "choice", "_value": [2, 3, 5, 7] },
+    "hidden_size": { "_type": "choice", "_value": [124, 512, 1024] },
+    "batch_size": { "_type": "choice", "_value": [16, 32] },
+    "learning_rate": { "_type": "choice", "_value": [0.0001, 0.001, 0.01, 0.1] }
+}
+experiment = Experiment(tuner, ['local', 'remote'])
+experiment.config.experiment_name = 'test'
+experiment.config.trial_concurrency = 3
+experiment.config.max_trial_number = 10
+experiment.config.search_space = search_space
+experiment.config.trial_command = 'python3 mnist.py'
+experiment.config.trial_code_directory = Path(__file__).parent
+experiment.config.training_service[0].use_active_gpu = True
+experiment.config.training_service[1].reuse_mode = True
+rm_conf = RemoteMachineConfig()
+rm_conf.host = '10.1.1.1'
+rm_conf.user = 'xxx'
+rm_conf.password = 'xxx'
+rm_conf.port = 22
+experiment.config.training_service[1].machine_list = [rm_conf]
+experiment.run(26780, debug=True)
--- a/examples/trials/mnist-tfv2/config_v2.yml
+++ b/examples/trials/mnist-tfv2/config_v2.yml
+searchSpace:
+  dropout_rate:
+    _type: uniform
+    _value: [0.5, 0.9]
+  conv_size:
+    _type: choice
+    _value: [2, 3, 5, 7]
+  hidden_size:
+    _type: choice
+    _value: [128, 512, 1024]
+  batch_size:
+    _type: choice
+    _value: [16, 32]
+  learning_rate:
+    _type: choice
+    _value: [0.0001, 0.001, 0.01, 0.1]
+trainingService:
+  platform: local
+trialCodeDirectory: .
+trialCommand: python3 mnist.py
+trialConcurrency: 1
+trialGpuNumber: 0
+tuner:
+  name: TPE
+  classArgs:
+    optimize_mode: maximize
--- a/examples/trials/network_morphism/FashionMNIST/config_paiYarn.yml
+++ b/examples/trials/network_morphism/FashionMNIST/config_paiYarn.yml
-authorName: default
-experimentName: example_FashionMNIST-network-morphism
-trialConcurrency: 1
-maxExecDuration: 24h
-maxTrialNum: 10
-#choice: local, remote, pai
-trainingServicePlatform: paiYarn
-#choice: true, false
-useAnnotation: false
-tuner:
-  #choice: TPE, Random, Anneal, Evolution, BatchTuner, NetworkMorphism
-  #SMAC (SMAC should be installed through nnictl)
-  builtinTunerName: NetworkMorphism
-  classArgs:
-    #choice: maximize, minimize
-    optimize_mode: maximize
-    # for now, this tuner only supports cv domain
-    task: cv
-    #input image width
-    input_width: 28
-    #input image channel
-    input_channel: 1
-    #number of classes
-    n_output_node: 10
-trial:
-  command: python3 FashionMNIST_keras.py
-  codeDir: .
-  gpuNum: 1
-  cpuNum: 1
-  memoryMB: 8196
-  #The docker image to run nni job on pai
-  image: msranni/nni:latest
-paiYarnConfig:
-  #The username to login pai
-  userName: username
-  #The password to login pai
-  passWord: password
-  #The host of restful server of pai
-  host: 10.10.10.10
\ No newline at end of file