Add section of how to load best model for Network Morphism (#921)

* udpate structure * Fix parameter size calculation error * add how to save and load model * Update networkmorphismTuner.md

Add section of how to load best model for Network Morphism (#921)
* udpate structure * Fix parameter size calculation error * add how to save and load model * Update networkmorphismTuner.md
7ba8922b · Lee · xuehui · 9e109471 · 7ba8922b · 7ba8922b
Commit 7ba8922b authored Apr 03, 2019 by Lee Committed by xuehui Apr 03, 2019
7 changed files
--- a/docs/en_US/networkmorphismTuner.md
+++ b/docs/en_US/networkmorphismTuner.md
@@ -54,6 +54,52 @@ net = build_graph_from_json(RCV_CONFIG)
 nni.report_final_result(best_acc)
 ```

+If you want to save and **load the best model**, the following methods are recommended. 
+
+```python
+# 1. Use NNI API
+## You can get the best model ID from WebUI 
+## or `nni/experiments/experiment_id/log/model_path/best_model.txt'
+
+## read the json string from model file and load it with NNI API
+with open("best-model.json") as json_file:
+    json_of_model = json_file.read()
+model = build_graph_from_json(json_of_model)
+
+# 2. Use Framework API (Related to Framework) 
+## 2.1 Keras API
+
+## Save the model with Keras API in the trial code
+## it's better to save model with id in nni local mode
+model_id = nni.get_sequence_id()
+## serialize model to JSON
+model_json = model.to_json()
+with open("model-{}.json".format(model_id), "w") as json_file:
+    json_file.write(model_json)
+## serialize weights to HDF5
+model.save_weights("model-{}.h5".format(model_id))
+
+## Load the model with Keras API if you want to reuse the model
+## load json and create model
+model_id = "" # id of the model you want to reuse
+with open('model-{}.json'.format(model_id), 'r') as json_file:
+    loaded_model_json = json_file.read()
+loaded_model = model_from_json(loaded_model_json)
+## load weights into new model
+loaded_model.load_weights("model-{}.h5".format(model_id))
+
+## 2.2 PyTorch API
+
+## Save the model with PyTorch API in the trial code
+model_id = nni.get_sequence_id()
+torch.save(model, "model-{}.pt".format(model_id))
+
+## Load the model with PyTorch API if you want to reuse the model
+model_id = "" # id of the model you want to reuse
+loaded_model = torch.load("model-{}.pt".format(model_id))
+
+```
+
 ## 3. File Structure

 The tuner has a lot of different files, functions and classes. Here we will only give most of those files a brief introduction:

--- a/examples/trials/network_morphism/FashionMNIST/FashionMNIST_pytorch.py
+++ b/examples/trials/network_morphism/FashionMNIST/FashionMNIST_pytorch.py
@@ -27,8 +27,7 @@ import torch.nn as nn
 import torch.optim as optim
 import torchvision

-sys.path.append("../")
-from network_morphism import utils
+import utils


 # set the logger format

--- a/examples/trials/network_morphism/utils.py
+++ b/examples/trials/network_morphism/utils.py
--- a/examples/trials/network_morphism/cifar10/cifar10_pytorch.py
+++ b/examples/trials/network_morphism/cifar10/cifar10_pytorch.py
@@ -27,8 +27,7 @@ import torch.nn as nn
 import torch.optim as optim
 import torchvision

-sys.path.append("../")
-from network_morphism import utils
+import utils

 # set the logger format
 log_format = "%(asctime)s %(message)s"

--- a/examples/trials/network_morphism/cifar10/utils.py
+++ b/examples/trials/network_morphism/cifar10/utils.py
+# Copyright (c) Microsoft Corporation
+# All rights reserved.
+#
+# MIT License
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
+# documentation files (the "Software"), to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
+# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.init as init
+import torchvision.transforms as transforms
+
+
+class EarlyStopping:
+    """ EarlyStopping class to keep NN from overfitting
+    """
+
+    # pylint: disable=E0202
+    def __init__(self, mode="min", min_delta=0, patience=10, percentage=False):
+        self.mode = mode
+        self.min_delta = min_delta
+        self.patience = patience
+        self.best = None
+        self.num_bad_epochs = 0
+        self.is_better = None
+        self._init_is_better(mode, min_delta, percentage)
+
+        if patience == 0:
+            self.is_better = lambda a, b: True
+            self.step = lambda a: False
+
+    def step(self, metrics):
+        """ EarlyStopping step on each epoch
+        Arguments:
+            metrics {float} -- metric value
+        """
+
+        if self.best is None:
+            self.best = metrics
+            return False
+
+        if np.isnan(metrics):
+            return True
+
+        if self.is_better(metrics, self.best):
+            self.num_bad_epochs = 0
+            self.best = metrics
+        else:
+            self.num_bad_epochs += 1
+
+        if self.num_bad_epochs >= self.patience:
+            return True
+
+        return False
+
+    def _init_is_better(self, mode, min_delta, percentage):
+        if mode not in {"min", "max"}:
+            raise ValueError("mode " + mode + " is unknown!")
+        if not percentage:
+            if mode == "min":
+                self.is_better = lambda a, best: a < best - min_delta
+            if mode == "max":
+                self.is_better = lambda a, best: a > best + min_delta
+        else:
+            if mode == "min":
+                self.is_better = lambda a, best: a < best - (best * min_delta / 100)
+            if mode == "max":
+                self.is_better = lambda a, best: a > best + (best * min_delta / 100)
+
+
+class Cutout:
+    """Randomly mask out one or more patches from an image.
+    Args:
+        n_holes (int): Number of patches to cut out of each image.
+        length (int): The length (in pixels) of each square patch.
+    """
+
+    def __init__(self, length):
+        self.length = length
+
+    def __call__(self, img):
+        """
+        Args:
+            img (Tensor): Tensor image of size (C, H, W).
+        Returns:
+            Tensor: Image with n_holes of dimension length x length cut out of it.
+        """
+        h_img, w_img = img.size(1), img.size(2)
+        mask = np.ones((h_img, w_img), np.float32)
+        y_img = np.random.randint(h_img)
+        x_img = np.random.randint(w_img)
+
+        y1_img = np.clip(y_img - self.length // 2, 0, h_img)
+        y2_img = np.clip(y_img + self.length // 2, 0, h_img)
+        x1_img = np.clip(x_img - self.length // 2, 0, w_img)
+        x2_img = np.clip(x_img + self.length // 2, 0, w_img)
+
+        mask[y1_img:y2_img, x1_img:x2_img] = 0.0
+        mask = torch.from_numpy(mask)
+        mask = mask.expand_as(img)
+        img *= mask
+        return img
+
+
+def data_transforms_cifar10(args):
+    """ data_transforms for cifar10 dataset
+    """
+
+    cifar_mean = [0.49139968, 0.48215827, 0.44653124]
+    cifar_std = [0.24703233, 0.24348505, 0.26158768]
+
+    train_transform = transforms.Compose(
+        [
+            transforms.RandomCrop(32, padding=4),
+            transforms.RandomHorizontalFlip(),
+            transforms.ToTensor(),
+            transforms.Normalize(cifar_mean, cifar_std),
+        ]
+    )
+    if args.cutout:
+        train_transform.transforms.append(Cutout(args.cutout_length))
+
+    valid_transform = transforms.Compose(
+        [transforms.ToTensor(), transforms.Normalize(cifar_mean, cifar_std)]
+    )
+    return train_transform, valid_transform
+
+
+def data_transforms_mnist(args, mnist_mean=None, mnist_std=None):
+    """ data_transforms for mnist dataset
+    """
+    if mnist_mean is None:
+        mnist_mean = [0.5]
+
+    if mnist_std is None:
+        mnist_std = [0.5]
+
+    train_transform = transforms.Compose(
+        [
+            transforms.RandomCrop(28, padding=4),
+            transforms.RandomHorizontalFlip(),
+            transforms.ToTensor(),
+            transforms.Normalize(mnist_mean, mnist_std),
+        ]
+    )
+    if args.cutout:
+        train_transform.transforms.append(Cutout(args.cutout_length))
+
+    valid_transform = transforms.Compose(
+        [transforms.ToTensor(), transforms.Normalize(mnist_mean, mnist_std)]
+    )
+    return train_transform, valid_transform
+
+
+def get_mean_and_std(dataset):
+    """Compute the mean and std value of dataset."""
+    dataloader = torch.utils.data.DataLoader(
+        dataset, batch_size=1, shuffle=True, num_workers=2
+    )
+    mean = torch.zeros(3)
+    std = torch.zeros(3)
+    print("==> Computing mean and std..")
+    for inputs, _ in dataloader:
+        for i in range(3):
+            mean[i] += inputs[:, i, :, :].mean()
+            std[i] += inputs[:, i, :, :].std()
+    mean.div_(len(dataset))
+    std.div_(len(dataset))
+    return mean, std
+
+
+def init_params(net):
+    """Init layer parameters."""
+    for module in net.modules():
+        if isinstance(module, nn.Conv2d):
+            init.kaiming_normal(module.weight, mode="fan_out")
+            if module.bias:
+                init.constant(module.bias, 0)
+        elif isinstance(module, nn.BatchNorm2d):
+            init.constant(module.weight, 1)
+            init.constant(module.bias, 0)
+        elif isinstance(module, nn.Linear):
+            init.normal(module.weight, std=1e-3)
+            if module.bias:
+                init.constant(module.bias, 0)
--- a/examples/trials/network_morphism/requirements.txt
+++ b/examples/trials/network_morphism/requirements.txt
@@ -2,5 +2,4 @@ numpy==1.14.2
 tensorflow==1.12.0
 torchvision==0.2.1
 Keras==2.2.2
-nni==0.4.1
 torch==0.4.1
--- a/src/sdk/pynni/nni/networkmorphism_tuner/layers.py
+++ b/src/sdk/pynni/nni/networkmorphism_tuner/layers.py
@@ -254,7 +254,7 @@ class StubConv(StubWeightBiasLayer):
        keras_layer.set_weights((self.weights[0].T, self.weights[1]))

    def size(self):
-        return self.filters * self.kernel_size * self.kernel_size + self.filters
+        return (self.input_channel * self.kernel_size * self.kernel_size + 1) * self.filters

    @abstractmethod
    def to_real_layer(self):