Unverified Commit cd3a912a authored by SparkSnail's avatar SparkSnail Committed by GitHub
Browse files

Merge pull request #218 from microsoft/master

merge master
parents a0846f2a e9cba778
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from nni.nas.pytorch.mutables import LayerChoice, InputChoice
from nni.nas.pytorch.darts import DartsTrainer
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = LayerChoice([nn.Conv2d(3, 6, 3, padding=1), nn.Conv2d(3, 6, 5, padding=2)])
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = LayerChoice([nn.Conv2d(6, 16, 3, padding=1), nn.Conv2d(6, 16, 5, padding=2)])
self.conv3 = nn.Conv2d(16, 16, 1)
self.skipconnect = InputChoice(n_candidates=1)
self.bn = nn.BatchNorm2d(16)
self.gap = nn.AdaptiveAvgPool2d(4)
self.fc1 = nn.Linear(16 * 4 * 4, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
bs = x.size(0)
x = self.pool(F.relu(self.conv1(x)))
x0 = F.relu(self.conv2(x))
x1 = F.relu(self.conv3(x0))
x0 = self.skipconnect([x0])
if x0 is not None:
x1 += x0
x = self.pool(self.bn(x1))
x = self.gap(x).view(bs, -1)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
def accuracy(output, target):
batch_size = target.size(0)
_, predicted = torch.max(output.data, 1)
return {"acc1": (predicted == target).sum().item() / batch_size}
if __name__ == "__main__":
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
dataset_train = torchvision.datasets.CIFAR10(root="./data", train=True, download=True, transform=transform)
dataset_valid = torchvision.datasets.CIFAR10(root="./data", train=False, download=True, transform=transform)
net = Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
trainer = DartsTrainer(net,
loss=criterion,
metrics=accuracy,
optimizer=optimizer,
num_epochs=2,
dataset_train=dataset_train,
dataset_valid=dataset_valid,
batch_size=64,
log_frequency=10)
trainer.train()
trainer.export("checkpoint.json")
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import logging
import sys
import time
from argparse import ArgumentParser
import torch
import torch.nn as nn
from nni.nas.pytorch.callbacks import ArchitectureCheckpoint
from nni.nas.pytorch.pdarts import PdartsTrainer
# prevent it to be reordered.
if True:
sys.path.append('../darts')
from utils import accuracy
from model import CNN
import datasets
logger = logging.getLogger('nni')
if __name__ == "__main__":
parser = ArgumentParser("pdarts")
parser.add_argument('--add_layers', action='append',
default=[0, 6, 12], help='add layers')
parser.add_argument("--nodes", default=4, type=int)
parser.add_argument("--layers", default=5, type=int)
parser.add_argument("--batch-size", default=64, type=int)
parser.add_argument("--log-frequency", default=1, type=int)
parser.add_argument("--epochs", default=50, type=int)
args = parser.parse_args()
logger.info("loading data")
dataset_train, dataset_valid = datasets.get_dataset("cifar10")
def model_creator(layers):
model = CNN(32, 3, 16, 10, layers, n_nodes=args.nodes)
criterion = nn.CrossEntropyLoss()
optim = torch.optim.SGD(model.parameters(), 0.025, momentum=0.9, weight_decay=3.0E-4)
lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optim, args.epochs, eta_min=0.001)
return model, criterion, optim, lr_scheduler
logger.info("initializing trainer")
trainer = PdartsTrainer(model_creator,
layers=args.layers,
metrics=lambda output, target: accuracy(output, target, topk=(1,)),
pdarts_num_layers=[0, 6, 12],
pdarts_num_to_drop=[3, 2, 2],
num_epochs=args.epochs,
dataset_train=dataset_train,
dataset_valid=dataset_valid,
batch_size=args.batch_size,
log_frequency=args.log_frequency,
callbacks=[ArchitectureCheckpoint("./checkpoints")])
logger.info("training")
trainer.train()
authorName: NNI Example
experimentName: MNIST TF v2.x
trialConcurrency: 1
maxExecDuration: 1h
maxTrialNum: 10
trainingServicePlatform: local # choices: local, remote, pai
searchSpacePath: search_space.json
useAnnotation: false
tuner:
builtinTunerName: TPE # choices: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner,
# GPTuner, SMAC (SMAC should be installed through nnictl)
classArgs:
optimize_mode: maximize # choices: maximize, minimize
trial:
command: python3 mnist.py
codeDir: .
gpuNum: 0
authorName: NNI Example
experimentName: MNIST TF v2.x with assessor
trialConcurrency: 1
maxExecDuration: 1h
maxTrialNum: 50
#choice: local, remote
trainingServicePlatform: local
searchSpacePath: search_space.json
#choice: true, false
useAnnotation: false
tuner:
#choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner, GPTuner
#SMAC (SMAC should be installed through nnictl)
builtinTunerName: TPE
classArgs:
#choice: maximize, minimize
optimize_mode: maximize
assessor:
#choice: Medianstop, Curvefitting
builtinAssessorName: Curvefitting
classArgs:
#choice: maximize, minimize
optimize_mode: maximize
epoch_num: 20
threshold: 0.9
trial:
command: python3 mnist.py
codeDir: .
gpuNum: 0
authorName: NNI Example
experimentName: MNIST TF v2.x
trialConcurrency: 1
maxExecDuration: 1h
maxTrialNum: 10
#choice: local, remote, pai
trainingServicePlatform: local
searchSpacePath: search_space.json
#choice: true, false
useAnnotation: false
tuner:
#choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner, GPTuner
#SMAC (SMAC should be installed through nnictl)
builtinTunerName: TPE
classArgs:
#choice: maximize, minimize
optimize_mode: maximize
trial:
command: python mnist.py
codeDir: .
gpuNum: 0
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
"""
NNI example trial code.
- Experiment type: Hyper-parameter Optimization
- Trial framework: Tensorflow v2.x (Keras API)
- Model: LeNet-5
- Dataset: MNIST
"""
import logging
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.callbacks import Callback
from tensorflow.keras.layers import (Conv2D, Dense, Dropout, Flatten, MaxPool2D)
from tensorflow.keras.optimizers import Adam
import nni
_logger = logging.getLogger('mnist_example')
_logger.setLevel(logging.INFO)
class MnistModel(Model):
"""
LeNet-5 Model with customizable hyper-parameters
"""
def __init__(self, conv_size, hidden_size, dropout_rate):
"""
Initialize hyper-parameters.
Parameters
----------
conv_size : int
Kernel size of convolutional layers.
hidden_size : int
Dimensionality of last hidden layer.
dropout_rate : float
Dropout rate between two fully connected (dense) layers, to prevent co-adaptation.
"""
super().__init__()
self.conv1 = Conv2D(filters=32, kernel_size=conv_size, activation='relu')
self.pool1 = MaxPool2D(pool_size=2)
self.conv2 = Conv2D(filters=64, kernel_size=conv_size, activation='relu')
self.pool2 = MaxPool2D(pool_size=2)
self.flatten = Flatten()
self.fc1 = Dense(units=hidden_size, activation='relu')
self.dropout = Dropout(rate=dropout_rate)
self.fc2 = Dense(units=10, activation='softmax')
def call(self, x):
"""Override ``Model.call`` to build LeNet-5 model."""
x = self.conv1(x)
x = self.pool1(x)
x = self.conv2(x)
x = self.pool2(x)
x = self.flatten(x)
x = self.fc1(x)
x = self.dropout(x)
return self.fc2(x)
class ReportIntermediates(Callback):
"""
Callback class for reporting intermediate accuracy metrics.
This callback sends accuracy to NNI framework every 100 steps,
so you can view the learning curve on web UI.
If an assessor is configured in experiment's YAML file,
it will use these metrics for early stopping.
"""
def on_epoch_end(self, epoch, logs=None):
"""Reports intermediate accuracy to NNI framework"""
# TensorFlow 2.0 API reference claims the key is `val_acc`, but in fact it's `val_accuracy`
if 'val_acc' in logs:
nni.report_intermediate_result(logs['val_acc'])
else:
nni.report_intermediate_result(logs['val_accuracy'])
def load_dataset():
"""Download and reformat MNIST dataset"""
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
x_train = x_train[..., tf.newaxis]
x_test = x_test[..., tf.newaxis]
return (x_train, y_train), (x_test, y_test)
def main(params):
"""
Main program:
- Build network
- Prepare dataset
- Train the model
- Report accuracy to tuner
"""
model = MnistModel(
conv_size=params['conv_size'],
hidden_size=params['hidden_size'],
dropout_rate=params['dropout_rate']
)
optimizer = Adam(learning_rate=params['learning_rate'])
model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
_logger.info('Model built')
(x_train, y_train), (x_test, y_test) = load_dataset()
_logger.info('Dataset loaded')
model.fit(
x_train,
y_train,
batch_size=params['batch_size'],
epochs=10,
verbose=0,
callbacks=[ReportIntermediates()],
validation_data=(x_test, y_test)
)
_logger.info('Training completed')
loss, accuracy = model.evaluate(x_test, y_test, verbose=0)
nni.report_final_result(accuracy) # send final accuracy to NNI tuner and web UI
_logger.info('Final accuracy reported: %s', accuracy)
if __name__ == '__main__':
params = {
'dropout_rate': 0.5,
'conv_size': 5,
'hidden_size': 1024,
'batch_size': 32,
'learning_rate': 1e-4,
}
# fetch hyper-parameters from HPO tuner
# comment out following two lines to run the code without NNI framework
tuned_params = nni.get_next_parameter()
params.update(tuned_params)
_logger.info('Hyper-parameters: %s', params)
main(params)
{
"dropout_rate": { "_type": "uniform", "_value": [0.5, 0.9] },
"conv_size": { "_type": "choice", "_value": [2, 3, 5, 7] },
"hidden_size": { "_type": "choice", "_value": [124, 512, 1024] },
"batch_size": { "_type": "choice", "_value": [16, 32] },
"learning_rate": { "_type": "choice", "_value": [0.0001, 0.001, 0.01, 0.1] }
}
......@@ -6,36 +6,27 @@ from nni.tuner import Tuner
def random_archi_generator(nas_ss, random_state):
'''random
'''
chosen_archi = {}
for block_name, block_value in nas_ss.items():
assert block_value['_type'] == "mutable_layer", \
"Random NAS Tuner only receives NAS search space whose _type is 'mutable_layer'"
block = block_value['_value']
tmp_block = {}
for layer_name, layer in block.items():
tmp_layer = {}
for key, value in layer.items():
if key == 'layer_choice':
index = random_state.randint(len(value))
tmp_layer['chosen_layer'] = value[index]
elif key == 'optional_inputs':
tmp_layer['chosen_inputs'] = []
if layer['optional_inputs']:
if isinstance(layer['optional_input_size'], int):
choice_num = layer['optional_input_size']
else:
choice_range = layer['optional_input_size']
choice_num = random_state.randint(choice_range[0], choice_range[1] + 1)
for _ in range(choice_num):
index = random_state.randint(len(layer['optional_inputs']))
tmp_layer['chosen_inputs'].append(layer['optional_inputs'][index])
elif key == 'optional_input_size':
pass
else:
raise ValueError('Unknown field %s in layer %s of block %s' % (key, layer_name, block_name))
tmp_block[layer_name] = tmp_layer
chosen_archi[block_name] = tmp_block
return chosen_archi
chosen_arch = {}
for key, val in nas_ss.items():
assert val['_type'] in ['layer_choice', 'input_choice'], \
"Random NAS Tuner only receives NAS search space whose _type is 'layer_choice' or 'input_choice'"
if val['_type'] == 'layer_choice':
choices = val['_value']
index = random_state.randint(len(choices))
chosen_arch[key] = {'_value': choices[index], '_idx': index}
elif val['_type'] == 'input_choice':
choices = val['_value']['candidates']
n_chosen = val['_value']['n_chosen']
chosen = []
idxs = []
for _ in range(n_chosen):
index = random_state.randint(len(choices))
chosen.append(choices[index])
idxs.append(index)
chosen_arch[key] = {'_value': chosen, '_idx': idxs}
else:
raise ValueError('Unknown key %s and value %s' % (key, val))
return chosen_arch
class RandomNASTuner(Tuner):
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment