Merge pull request #2664 from microsoft/v1.7

V1.7 merge back to master

Merge pull request #2664 from microsoft/v1.7
V1.7 merge back to master
755e313f · chicm-ms · GitHub · 51aebf18 · a38df504 · 755e313f
Unverified Commit 755e313f authored Jul 08, 2020 by chicm-ms Committed by GitHub Jul 08, 2020
17 changed files
--- a/examples/nas/benchmarks/nasbench201.sh
+++ b/examples/nas/benchmarks/nasbench201.sh
+#!/bin/bash
 set -e
-mkdir -p /outputs /tmp

-echo "Installing dependencies..."
-apt update && apt install -y wget
-pip uninstall -y enum34  # https://github.com/iterative/dvc/issues/1995
-pip install --no-cache-dir gdown tqdm peewee
-
-echo "Installing NNI..."
-cd /nni && echo "y" | source install.sh
-
-cd /tmp
+if [ -z "${NASBENCHMARK_DIR}" ]; then
+    NASBENCHMARK_DIR=~/.nni/nasbenchmark
+fi

 echo "Downloading NAS-Bench-201..."
-gdown https://drive.google.com/uc\?id\=1OOfVPpt-lA4u2HJrXbgrRd42IbfvJMyE -O a.pth
+if [ -f "a.pth" ]; then
+    echo "a.pth found. Skip download."
+else
+    gdown https://drive.google.com/uc\?id\=1OOfVPpt-lA4u2HJrXbgrRd42IbfvJMyE -O a.pth
+fi

 echo "Generating database..."
-rm -f /outputs/nasbench201.db /outputs/nasbench201.db-journal
-NASBENCHMARK_DIR=/outputs python -m nni.nas.benchmarks.nasbench201.db_gen a.pth
+rm -f ${NASBENCHMARK_DIR}/nasbench201.db ${NASBENCHMARK_DIR}/nasbench201.db-journal
+mkdir -p ${NASBENCHMARK_DIR}
+python -m nni.nas.benchmarks.nasbench201.db_gen a.pth
+rm -f a.pth
--- a/examples/nas/benchmarks/nds.requirements.txt
+++ b/examples/nas/benchmarks/nds.requirements.txt
+tqdm
+peewee
--- a/examples/nas/benchmarks/nds.sh
+++ b/examples/nas/benchmarks/nds.sh
+#!/bin/bash
 set -e
-mkdir -p /outputs /tmp

-echo "Installing dependencies..."
-apt update && apt install -y wget zip
-pip install --no-cache-dir tqdm peewee
-
-echo "Installing NNI..."
-cd /nni && echo "y" | source install.sh
-
-cd /tmp
+if [ -z "${NASBENCHMARK_DIR}" ]; then
+    NASBENCHMARK_DIR=~/.nni/nasbenchmark
+fi

 echo "Downloading NDS..."
-wget https://dl.fbaipublicfiles.com/nds/data.zip -O data.zip
+if [ -f "data.zip" ]; then
+    echo "data.zip found. Skip download."
+else
+    wget https://dl.fbaipublicfiles.com/nds/data.zip -O data.zip
+fi
 unzip data.zip

 echo "Generating database..."
-rm -f /outputs/nds.db /outputs/nds.db-journal
-NASBENCHMARK_DIR=/outputs python -m nni.nas.benchmarks.nds.db_gen nds_data
+rm -f ${NASBENCHMARK_DIR}/nds.db ${NASBENCHMARK_DIR}/nds.db-journal
+mkdir -p ${NASBENCHMARK_DIR}
+python -m nni.nas.benchmarks.nds.db_gen nds_data
+rm -rf data.zip nds_data
--- a/examples/trials/systems/opevo/screenshot.png
+++ b/examples/trials/systems/opevo/screenshot.png
--- a/examples/trials/systems/opevo/src/run.sh
+++ b/examples/trials/systems/opevo/src/run.sh
--- a/src/nni_manager/config/aml/amlUtil.py
+++ b/src/nni_manager/config/aml/amlUtil.py
@@ -28,10 +28,7 @@ if __name__ == "__main__":
    compute_target = ComputeTarget(workspace=ws, name=args.compute_target)
    experiment = Experiment(ws, args.experiment_name)
    run_config = RunConfiguration()
-    dependencies = CondaDependencies()
-    dependencies.add_pip_package("azureml-sdk")
-    dependencies.add_pip_package("azureml")
-    run_config.environment.python.conda_dependencies = dependencies
+    run_config.environment.python.user_managed_dependencies = True
    run_config.environment.docker.enabled = True
    run_config.environment.docker.base_image = args.docker_image
    run_config.target = compute_target

--- a/src/nni_manager/training_service/reusable/aml/amlConfig.ts
+++ b/src/nni_manager/training_service/reusable/aml/amlConfig.ts
@@ -36,4 +36,5 @@ export class AMLTrialConfig extends TrialConfig {

 export class AMLEnvironmentInformation extends EnvironmentInformation {
    public amlClient?: AMLClient;
+    public currentMessageIndex: number = -1;
 }
--- a/src/nni_manager/training_service/reusable/channels/amlCommandChannel.ts
+++ b/src/nni_manager/training_service/reusable/channels/amlCommandChannel.ts
@@ -14,7 +14,6 @@ class AMLRunnerConnection extends RunnerConnection {

 export class AMLCommandChannel extends CommandChannel {
    private stopping: boolean = false;
-    private currentMessageIndex: number = -1;
    private sendQueues: [EnvironmentInformation, string][] = [];
    private readonly NNI_METRICS_PATTERN: string = `NNISDK_MEb'(?<metrics>.*?)'`;
    
@@ -89,7 +88,9 @@ export class AMLCommandChannel extends CommandChannel {
            const runnerConnections = [...this.runnerConnections.values()] as AMLRunnerConnection[];
            for (const runnerConnection of runnerConnections) {
                // to loop all commands
-                const amlClient = (runnerConnection.environment as AMLEnvironmentInformation).amlClient;
+                const amlEnvironmentInformation: AMLEnvironmentInformation = runnerConnection.environment as AMLEnvironmentInformation;
+                const amlClient = amlEnvironmentInformation.amlClient;
+                let currentMessageIndex = amlEnvironmentInformation.currentMessageIndex;
                if (!amlClient) {
                    throw new Error('AML client not initialized!');
                }
@@ -97,15 +98,16 @@ export class AMLCommandChannel extends CommandChannel {
                if (command && Object.prototype.hasOwnProperty.call(command, "trial_runner")) {
                    const messages = command['trial_runner'];
                    if (messages) {
-                        if (messages instanceof Object && this.currentMessageIndex < messages.length - 1) {
-                            for (let index = this.currentMessageIndex + 1; index < messages.length; index ++) {
+                        if (messages instanceof Object && currentMessageIndex < messages.length - 1) {
+                            for (let index = currentMessageIndex + 1; index < messages.length; index ++) {
                                this.handleCommand(runnerConnection.environment, messages[index]);
                            }
-                            this.currentMessageIndex = messages.length - 1;
-                        } else if (this.currentMessageIndex === -1){
+                            currentMessageIndex = messages.length - 1;
+                        } else if (currentMessageIndex === -1){
                            this.handleCommand(runnerConnection.environment, messages);
-                            this.currentMessageIndex += 1;
+                            currentMessageIndex += 1;
                        }
+                        amlEnvironmentInformation.currentMessageIndex = currentMessageIndex;
                    }
                }
            }

--- a/src/nni_manager/training_service/reusable/trialDispatcher.ts
+++ b/src/nni_manager/training_service/reusable/trialDispatcher.ts
@@ -12,7 +12,7 @@ import * as component from '../../common/component';
 import { getBasePort, getExperimentId, getPlatform } from '../../common/experimentStartupInfo';
 import { getLogger, Logger } from '../../common/log';
 import { NNIManagerIpConfig, TrainingService, TrialJobApplicationForm, TrialJobMetric, TrialJobStatus } from '../../common/trainingService';
-import { delay, getExperimentRootDir, getLogLevel, getVersion, mkDirPSync, uniqueString } from '../../common/utils';
+import { delay, getExperimentRootDir, getLogLevel, getVersion, mkDirPSync, uniqueString, getIPV4Address } from '../../common/utils';
 import { GPU_INFO, INITIALIZED, KILL_TRIAL_JOB, NEW_TRIAL_JOB, REPORT_METRIC_DATA, SEND_TRIAL_JOB_PARAMETER, STDOUT, TRIAL_END, VERSION_CHECK } from '../../core/commands';
 import { GPUSummary } from '../../training_service/common/gpuData';
 import { CONTAINER_INSTALL_NNI_SHELL_FORMAT } from '../common/containerJobData';
@@ -164,6 +164,9 @@ class TrialDispatcher implements TrainingService {
        this.commandChannel = environmentService.getCommandChannel(this.commandEmitter);

        // TODO it's a hard code of web channel, it needs to be improved.
+        if (this.runnerSettings.nniManagerIP === "" || this.runnerSettings.nniManagerIP === null) {
+            this.runnerSettings.nniManagerIP = getIPV4Address();
+        }
        this.runnerSettings.nniManagerPort = getBasePort() + 1;
        this.runnerSettings.commandChannel = this.commandChannel.channelName;


--- a/src/sdk/pynni/nni/compression/torch/pruning/structured_pruning.py
+++ b/src/sdk/pynni/nni/compression/torch/pruning/structured_pruning.py
@@ -263,11 +263,12 @@ class ActivationAPoZRankFilterPrunerMasker(ActivationFilterPrunerMasker):
            base_mask['weight_mask'][idx] = 0.
            if base_mask['bias_mask'] is not None:
                base_mask['bias_mask'][idx] = 0.
-        return base_mask

        if len(activations) >= self.statistics_batch_num and self.pruner.hook_id in self.pruner._fwd_hook_handles:
            self.pruner.remove_activation_collector(self.pruner.hook_id)

+        return base_mask
+
    def _calc_apoz(self, activations):
        """
        Calculate APoZ(average percentage of zeros) of activations.

--- a/src/sdk/pynni/nni/compression/torch/speedup/infer_shape.py
+++ b/src/sdk/pynni/nni/compression/torch/speedup/infer_shape.py
@@ -240,7 +240,8 @@ infer_from_inshape = {
    'aten::add': lambda module_mask, mask: add_inshape(module_mask, mask),
    'aten::cat': lambda module_mask, mask, cat_info, last_visited: cat_inshape(module_mask, mask, cat_info, last_visited),
    'aten::mean': lambda module_masks, mask, shape: mean_inshape(module_masks, mask, shape),
-    'Dropout': lambda module_masks, mask: dropout_inshape(module_masks, mask)
+    'Dropout': lambda module_masks, mask: dropout_inshape(module_masks, mask),
+    'Dropout2d': lambda module_masks, mask: dropout_inshape(module_masks, mask)
 }

 """

--- a/src/sdk/pynni/nni/compression/torch/utils/counter.py
+++ b/src/sdk/pynni/nni/compression/torch/utils/counter.py
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT license.

-import logging
 import torch
 import torch.nn as nn
 from nni.compression.torch.compressor import PrunerModuleWrapper

-
-_logger = logging.getLogger(__name__)
-
 try:
    from thop import profile
-except ImportError:
-    _logger.warning('Please install thop using command: pip install thop')
+except Exception as e:
+    print('thop is not found, please install the python package: thop')
+    raise


 def count_flops_params(model: nn.Module, input_size, verbose=True):
@@ -61,8 +58,16 @@ def count_flops_params(model: nn.Module, input_size, verbose=True):

    flops, params = profile(model, inputs=(inputs, ), custom_ops=custom_ops, verbose=verbose)

+
    for m in hook_module_list:
        m._buffers.pop("weight_mask")
+    # Remove registerd buffer on the model, and fixed following issue:
+    # https://github.com/Lyken17/pytorch-OpCounter/issues/96
+    for m in model.modules():
+        if 'total_ops' in m._buffers:
+            m._buffers.pop("total_ops")
+        if 'total_params' in m._buffers:
+            m._buffers.pop("total_params")

    return flops, params


--- a/src/sdk/pynni/nni/compression/torch/utils/mask_conflict.py
+++ b/src/sdk/pynni/nni/compression/torch/utils/mask_conflict.py
@@ -259,7 +259,9 @@ class ChannelMaskConflict(MaskFix):
                _logger.debug('Layer: %s ', name)
                _logger.debug('Original pruned filters: %s', str(all_zeros))
            # Update the masks for the layers in the dependency set
-            if fine_grained:
+            if fine_grained or out_channels is None:
+                # if use the fine-grained pruner or all the layers in
+                # this dependency set are not pruned
                continue
            if not all_pruned:
                # if some layer are not pruned at all

--- a/src/webui/package.json
+++ b/src/webui/package.json
@@ -77,9 +77,9 @@
    "typescript": "3.4.5"
  },
  "scripts": {
-    "start": "node --max-old-space-size=4096 scripts/start.js",
-    "build": "node --max-old-space-size=4096 scripts/build.js",
-    "test": "node --max-old-space-size=4096 scripts/test.js",
+    "start": "node --max-old-space-size=3072 scripts/start.js",
+    "build": "node --max-old-space-size=3072 scripts/build.js",
+    "test": "node --max-old-space-size=3072 scripts/test.js",
    "eslint": "npx eslint ./ --ext .tsx,.ts"
  },
  "eslintConfig": {

--- a/src/webui/src/components/trial-detail/TableList.tsx
+++ b/src/webui/src/components/trial-detail/TableList.tsx
@@ -414,17 +414,19 @@ class TableList extends React.Component<TableListProps, TableListState> {
        const tableSource: Array<TableRecord> = JSON.parse(JSON.stringify(this.props.tableSource));
        // parameter as table column
        const parameterStr: string[] = [];
-        if (tableSource.length > 0) {
-            const trialMess = TRIALS.getTrial(tableSource[0].id);
-            const trial = trialMess.description.parameters;
-            const parameterColumn: string[] = Object.keys(trial);
-            parameterColumn.forEach(value => {
-                parameterStr.push(`${value} (search space)`);
-            });
+        if (!EXPERIMENT.isNestedExp()) {
+            if (tableSource.length > 0) {
+                const trialMess = TRIALS.getTrial(tableSource[0].id);
+                const trial = trialMess.description.parameters;
+                const parameterColumn: string[] = Object.keys(trial);
+                parameterColumn.forEach(value => {
+                    parameterStr.push(`${value} (search space)`);
+                });
+            }
        }
        // concat trial all final keys and remove dup "default" val, return list
-        return (COLUMNPro.concat(parameterStr)).concat(Array.from(new Set(TRIALS.finalKeys())));
-        
+        const finalKeysList = TRIALS.finalKeys().filter(item => item !== 'default');
+        return (COLUMNPro.concat(parameterStr)).concat(finalKeysList);
    }

    // get IColumn[]

--- a/src/webui/src/static/model/experiment.ts
+++ b/src/webui/src/static/model/experiment.ts
@@ -14,6 +14,7 @@ function compareProfiles(profile1?: ExperimentProfile, profile2?: ExperimentProf
 class Experiment {
    private profileField?: ExperimentProfile = undefined;
    private statusField?: NNIManagerStatus = undefined;
+    private isNestedExperiment: boolean = false;
    private isexperimentError: boolean = false;
    private experimentErrorMessage: string = '';
    private isStatusError: boolean = false;
@@ -31,6 +32,10 @@ class Experiment {
        }
    }

+    public isNestedExp(): boolean {
+        return this.isNestedExperiment;
+    }
+
    public experimentError(): boolean {
        return this.isexperimentError;
    }
@@ -114,7 +119,14 @@ class Experiment {
    }

    get searchSpace(): object {
-        return JSON.parse(this.profile.params.searchSpace);
+        const result = JSON.parse(this.profile.params.searchSpace);
+        for (const item in result) {
+            if (result[item]._value && typeof result[item]._value[0] === 'object') {
+                this.isNestedExperiment = true;
+                break;
+            }
+        }
+        return result;
    }

    get logCollectionEnabled(): boolean {

--- a/test/pipelines/pipelines-it-local.yml
+++ b/test/pipelines/pipelines-it-local.yml
@@ -15,6 +15,7 @@ jobs:
      python3 -m pip install torch==1.3.1 --user
      python3 -m pip install keras==2.1.6 --user
      python3 -m pip install tensorflow-gpu==1.15.2 tensorflow-estimator==1.15.1 --force --user
+      python3 -m pip install thop --user
      sudo apt-get install swig -y
      PATH=$HOME/.local/bin:$PATH nnictl package install --name=SMAC
      PATH=$HOME/.local/bin:$PATH nnictl package install --name=BOHB