Improve Classic NAS mutator (#1865)

7a558113 · Yuge Zhang · GitHub · 56be400c · 7a558113 · 7a558113
Unverified Commit 7a558113 authored Dec 23, 2019 by Yuge Zhang Committed by GitHub Dec 23, 2019
6 changed files
--- a/src/sdk/pynni/nni/nas/pytorch/callbacks.py
+++ b/src/sdk/pynni/nni/nas/pytorch/callbacks.py
@@ -4,6 +4,9 @@
 import logging
 import os

+import torch
+import torch.nn as nn
+
 _logger = logging.getLogger(__name__)


@@ -44,11 +47,28 @@ class LRSchedulerCallback(Callback):


 class ArchitectureCheckpoint(Callback):
-    def __init__(self, checkpoint_dir, every="epoch"):
+    def __init__(self, checkpoint_dir):
+        super().__init__()
+        self.checkpoint_dir = checkpoint_dir
+        os.makedirs(self.checkpoint_dir, exist_ok=True)
+
+    def on_epoch_end(self, epoch):
+        dest_path = os.path.join(self.checkpoint_dir, "epoch_{}.json".format(epoch))
+        _logger.info("Saving architecture to %s", dest_path)
+        self.trainer.export(dest_path)
+
+
+class ModelCheckpoint(Callback):
+    def __init__(self, checkpoint_dir):
        super().__init__()
-        assert every == "epoch"
        self.checkpoint_dir = checkpoint_dir
        os.makedirs(self.checkpoint_dir, exist_ok=True)

    def on_epoch_end(self, epoch):
-        self.trainer.export(os.path.join(self.checkpoint_dir, "epoch_{}.json".format(epoch)))
+        if isinstance(self.model, nn.DataParallel):
+            state_dict = self.model.module.state_dict()
+        else:
+            state_dict = self.model.state_dict()
+        dest_path = os.path.join(self.checkpoint_dir, "epoch_{}.pth.tar".format(epoch))
+        _logger.info("Saving model to %s", dest_path)
+        torch.save(state_dict, dest_path)
--- a/src/sdk/pynni/nni/nas/pytorch/classic_nas/__init__.py
+++ b/src/sdk/pynni/nni/nas/pytorch/classic_nas/__init__.py
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
 from .mutator import get_and_apply_next_architecture
--- a/src/sdk/pynni/nni/nas/pytorch/classic_nas/mutator.py
+++ b/src/sdk/pynni/nni/nas/pytorch/classic_nas/mutator.py
-import os
-import sys
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
 import json
 import logging
+import os
+import sys
+
 import torch
+
 import nni
 from nni.env_vars import trial_env_vars
-from nni.nas.pytorch.base_mutator import BaseMutator
 from nni.nas.pytorch.mutables import LayerChoice, InputChoice
+from nni.nas.pytorch.mutator import Mutator

 logger = logging.getLogger(__name__)

+NNI_GEN_SEARCH_SPACE = "NNI_GEN_SEARCH_SPACE"
+LAYER_CHOICE = "layer_choice"
+INPUT_CHOICE = "input_choice"
+
+
 def get_and_apply_next_architecture(model):
    """
    Wrapper of ClassicMutator to make it more meaningful,
    similar to ```get_next_parameter``` for HPO.
-
    Parameters
    ----------
    model : pytorch model
@@ -22,12 +31,14 @@ def get_and_apply_next_architecture(model):
    """
    ClassicMutator(model)

-class ClassicMutator(BaseMutator):
+
+class ClassicMutator(Mutator):
    """
    This mutator is to apply the architecture chosen from tuner.
    It implements the forward function of LayerChoice and InputChoice,
    to only activate the chosen ones
    """
+
    def __init__(self, model):
        """
        Generate search space based on ```model```.
@@ -37,70 +48,131 @@ class ClassicMutator(BaseMutator):
        use ```nnictl``` to start an experiment. The other is standalone mode
        where users directly run the trial command, this mode chooses the first
        one(s) for each LayerChoice and InputChoice.
-
        Parameters
        ----------
-        model : pytorch model
+        model : PyTorch model
            user's model with search space (e.g., LayerChoice, InputChoice) embedded in it
        """
        super(ClassicMutator, self).__init__(model)
-        self.chosen_arch = {}
-        self.search_space = self._generate_search_space()
-        if 'NNI_GEN_SEARCH_SPACE' in os.environ:
+        self._chosen_arch = {}
+        self._search_space = self._generate_search_space()
+        if NNI_GEN_SEARCH_SPACE in os.environ:
            # dry run for only generating search space
-            self._dump_search_space(self.search_space, os.environ.get('NNI_GEN_SEARCH_SPACE'))
+            self._dump_search_space(os.environ[NNI_GEN_SEARCH_SPACE])
            sys.exit(0)
-        # get chosen arch from tuner
-        self.chosen_arch = nni.get_next_parameter()
-        if not self.chosen_arch and trial_env_vars.NNI_PLATFORM is None:
-            logger.warning('This is in standalone mode, the chosen are the first one(s)')
-            self.chosen_arch = self._standalone_generate_chosen()
-        self._validate_chosen_arch()

-    def _validate_chosen_arch(self):
-        pass
+        if trial_env_vars.NNI_PLATFORM is None:
+            logger.warning("This is in standalone mode, the chosen are the first one(s).")
+            self._chosen_arch = self._standalone_generate_chosen()
+        else:
+            # get chosen arch from tuner
+            self._chosen_arch = nni.get_next_parameter()
+        self.reset()

-    def _standalone_generate_chosen(self):
+    def _sample_layer_choice(self, mutable, idx, value, search_space_item):
        """
-        Generate the chosen architecture for standalone mode,
-        i.e., choose the first one(s) for LayerChoice and InputChoice
+        Convert layer choice to tensor representation.

-        { key_name: {'_value': "conv1",
-                     '_idx': 0} }
+        Parameters
+        ----------
+        mutable : Mutable
+        idx : int
+            Number `idx` of list will be selected.
+        value : str
+            The verbose representation of the selected value.
+        search_space_item : list
+            The list for corresponding search space.
+        """
+        # doesn't support multihot for layer choice yet
+        onehot_list = [False] * mutable.length
+        assert 0 <= idx < mutable.length and search_space_item[idx] == value, \
+            "Index '{}' in search space '{}' is not '{}'".format(idx, search_space_item, value)
+        onehot_list[idx] = True
+        return torch.tensor(onehot_list, dtype=torch.bool)  # pylint: disable=not-callable
+
+    def _sample_input_choice(self, mutable, idx, value, search_space_item):
+        """
+        Convert input choice to tensor representation.

-        { key_name: {'_value': ["in1"],
-                     '_idx': [0]} }
+        Parameters
+        ----------
+        mutable : Mutable
+        idx : int
+            Number `idx` of list will be selected.
+        value : str
+            The verbose representation of the selected value.
+        search_space_item : list
+            The list for corresponding search space.
+        """
+        multihot_list = [False] * mutable.n_candidates
+        for i, v in zip(idx, value):
+            assert 0 <= i < mutable.n_candidates and search_space_item[i] == v, \
+                "Index '{}' in search space '{}' is not '{}'".format(i, search_space_item, v)
+            assert not multihot_list[i], "'{}' is selected twice in '{}', which is not allowed.".format(i, idx)
+            multihot_list[i] = True
+        return torch.tensor(multihot_list, dtype=torch.bool)  # pylint: disable=not-callable
+
+    def sample_search(self):
+        return self.sample_final()
+
+    def sample_final(self):
+        assert set(self._chosen_arch.keys()) == set(self._search_space.keys()), \
+            "Unmatched keys, expected keys '{}' from search space, found '{}'.".format(self._search_space.keys(),
+                                                                                       self._chosen_arch.keys())
+        result = dict()
+        for mutable in self.mutables:
+            assert mutable.key in self._chosen_arch, "Expected '{}' in chosen arch, but not found.".format(mutable.key)
+            data = self._chosen_arch[mutable.key]
+            assert isinstance(data, dict) and "_value" in data and "_idx" in data, \
+                "'{}' is not a valid choice.".format(data)
+            value = data["_value"]
+            idx = data["_idx"]
+            search_space_item = self._search_space[mutable.key]["_value"]
+            if isinstance(mutable, LayerChoice):
+                result[mutable.key] = self._sample_layer_choice(mutable, idx, value, search_space_item)
+            elif isinstance(mutable, InputChoice):
+                result[mutable.key] = self._sample_input_choice(mutable, idx, value, search_space_item)
+            else:
+                raise TypeError("Unsupported mutable type: '%s'." % type(mutable))
+        return result

+    def _standalone_generate_chosen(self):
+        """
+        Generate the chosen architecture for standalone mode,
+        i.e., choose the first one(s) for LayerChoice and InputChoice.
+        ::
+            { key_name: {"_value": "conv1",
+                         "_idx": 0} }
+            { key_name: {"_value": ["in1"],
+                         "_idx": [0]} }
        Returns
        -------
        dict
            the chosen architecture
        """
        chosen_arch = {}
-        for key, val in self.search_space.items():
-            if val['_type'] == 'layer_choice':
-                choices = val['_value']
-                chosen_arch[key] = {'_value': choices[0], '_idx': 0}
-            elif val['_type'] == 'input_choice':
-                choices = val['_value']['candidates']
-                n_chosen = val['_value']['n_chosen']
-                chosen_arch[key] = {'_value': choices[:n_chosen], '_idx': list(range(n_chosen))}
+        for key, val in self._search_space.items():
+            if val["_type"] == LAYER_CHOICE:
+                choices = val["_value"]
+                chosen_arch[key] = {"_value": choices[0], "_idx": 0}
+            elif val["_type"] == INPUT_CHOICE:
+                choices = val["_value"]["candidates"]
+                n_chosen = val["_value"]["n_chosen"]
+                chosen_arch[key] = {"_value": choices[:n_chosen], "_idx": list(range(n_chosen))}
            else:
-                raise ValueError('Unknown key %s and value %s' % (key, val))
+                raise ValueError("Unknown key '%s' and value '%s'." % (key, val))
        return chosen_arch

    def _generate_search_space(self):
        """
        Generate search space from mutables.
        Here is the search space format:
-
-        { key_name: {'_type': 'layer_choice',
-                     '_value': ["conv1", "conv2"]} }
-
-        { key_name: {'_type': 'input_choice',
-                     '_value': {'candidates': ["in1", "in2"],
-                                'n_chosen': 1}} }
-
+        ::
+            { key_name: {"_type": "layer_choice",
+                         "_value": ["conv1", "conv2"]} }
+            { key_name: {"_type": "input_choice",
+                         "_value": {"candidates": ["in1", "in2"],
+                                    "n_chosen": 1}} }
        Returns
        -------
        dict
@@ -112,81 +184,16 @@ class ClassicMutator(BaseMutator):
            if isinstance(mutable, LayerChoice):
                key = mutable.key
                val = [repr(choice) for choice in mutable.choices]
-                search_space[key] = {"_type": "layer_choice", "_value": val}
+                search_space[key] = {"_type": LAYER_CHOICE, "_value": val}
            elif isinstance(mutable, InputChoice):
                key = mutable.key
-                search_space[key] = {"_type": "input_choice",
+                search_space[key] = {"_type": INPUT_CHOICE,
                                     "_value": {"candidates": mutable.choose_from,
                                                "n_chosen": mutable.n_chosen}}
            else:
-                raise TypeError('Unsupported mutable type: %s.' % type(mutable))
+                raise TypeError("Unsupported mutable type: '%s'." % type(mutable))
        return search_space

-    def _dump_search_space(self, search_space, file_path):
-        with open(file_path, 'w') as ss_file:
-            json.dump(search_space, ss_file)
-
-    def _tensor_reduction(self, reduction_type, tensor_list):
-        if tensor_list == "none":
-            return tensor_list
-        if not tensor_list:
-            return None  # empty. return None for now
-        if len(tensor_list) == 1:
-            return tensor_list[0]
-        if reduction_type == "sum":
-            return sum(tensor_list)
-        if reduction_type == "mean":
-            return sum(tensor_list) / len(tensor_list)
-        if reduction_type == "concat":
-            return torch.cat(tensor_list, dim=1)
-        raise ValueError("Unrecognized reduction policy: \"{}\"".format(reduction_type))
-
-    def on_forward_layer_choice(self, mutable, *inputs):
-        """
-        Implement the forward of LayerChoice
-
-        Parameters
-        ----------
-        mutable: LayerChoice
-        inputs: list of torch.Tensor
-
-        Returns
-        -------
-        tuple
-            return of the chosen op, the index of the chosen op
-
-        """
-        assert mutable.key in self.chosen_arch
-        val = self.chosen_arch[mutable.key]
-        assert isinstance(val, dict)
-        idx = val['_idx']
-        assert self.search_space[mutable.key]['_value'][idx] == val['_value']
-        return mutable.choices[idx](*inputs), idx
-
-    def on_forward_input_choice(self, mutable, tensor_list):
-        """
-        Implement the forward of InputChoice
-
-        Parameters
-        ----------
-        mutable: InputChoice
-        tensor_list: list of torch.Tensor
-        tags: list of string
-
-        Returns
-        -------
-        tuple of torch.Tensor and list
-            reduced tensor, mask list
-
-        """
-        assert mutable.key in self.chosen_arch
-        val = self.chosen_arch[mutable.key]
-        assert isinstance(val, dict)
-        mask = [0 for _ in range(mutable.n_candidates)]
-        out = []
-        for i, idx in enumerate(val['_idx']):
-            # check whether idx matches the chosen candidate name
-            assert self.search_space[mutable.key]['_value']['candidates'][idx] == val['_value'][i]
-            out.append(tensor_list[idx])
-            mask[idx] = 1
-        return self._tensor_reduction(mutable.reduction, out), mask
+    def _dump_search_space(self, file_path):
+        with open(file_path, "w") as ss_file:
+            json.dump(self._search_space, ss_file, sort_keys=True, indent=2)
--- a/src/sdk/pynni/nni/nas/pytorch/mutator.py
+++ b/src/sdk/pynni/nni/nas/pytorch/mutator.py
@@ -41,7 +41,8 @@ class Mutator(BaseMutator):

    def reset(self):
        """
-        Reset the mutator by call the `sample_search` to resample (for search).
+        Reset the mutator by call the `sample_search` to resample (for search). Stores the result in a local
+        variable so that `on_forward_layer_choice` and `on_forward_input_choice` can use the decision directly.

        Returns
        -------

--- a/src/sdk/pynni/nni/nas/pytorch/utils.py
+++ b/src/sdk/pynni/nni/nas/pytorch/utils.py
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT license.

+import logging
 from collections import OrderedDict

 _counter = 0

+_logger = logging.getLogger(__name__)
+

 def global_mutable_counting():
    global _counter
@@ -23,6 +26,12 @@ class AverageMeterGroup:
                self.meters[k] = AverageMeter(k, ":4f")
            self.meters[k].update(v)

+    def __getattr__(self, item):
+        return self.meters[item]
+
+    def __getitem__(self, item):
+        return self.meters[item]
+
    def __str__(self):
        return "  ".join(str(v) for _, v in self.meters.items())

@@ -52,6 +61,8 @@ class AverageMeter:
        self.count = 0

    def update(self, val, n=1):
+        if not isinstance(val, float) and not isinstance(val, int):
+            _logger.warning("Values passed to AverageMeter must be number, not %s.", type(val))
        self.val = val
        self.sum += val * n
        self.count += n

--- a/tools/nni_cmd/nnictl_utils.py
+++ b/tools/nni_cmd/nnictl_utils.py
@@ -682,10 +682,13 @@ def search_space_auto_gen(args):
    trial_dir = os.path.expanduser(args.trial_dir)
    file_path = os.path.expanduser(args.file)
    if not os.path.isabs(file_path):
-        abs_file_path = os.path.join(os.getcwd(), file_path)
+        file_path = os.path.join(os.getcwd(), file_path)
    assert os.path.exists(trial_dir)
-    if os.path.exists(abs_file_path):
-        print_warning('%s already exits, will be over written' % abs_file_path)
+    if os.path.exists(file_path):
+        print_warning('%s already exists, will be overwritten.' % file_path)
    print_normal('Dry run to generate search space...')
-    Popen(args.trial_command, cwd=trial_dir, env=dict(os.environ, NNI_GEN_SEARCH_SPACE=abs_file_path), shell=True).wait()
-    print_normal('Dry run to generate search space, Done')
\ No newline at end of file
+    Popen(args.trial_command, cwd=trial_dir, env=dict(os.environ, NNI_GEN_SEARCH_SPACE=file_path), shell=True).wait()
+    if not os.path.exists(file_path):
+        print_warning('Expected search space file \'{}\' generated, but not found.'.format(file_path))
+    else:
+        print_normal('Generate search space done: \'{}\'.'.format(file_path))