Unverified Commit 755e313f authored by chicm-ms's avatar chicm-ms Committed by GitHub
Browse files

Merge pull request #2664 from microsoft/v1.7

V1.7 merge back to master
parents 51aebf18 a38df504
#!/bin/bash
set -e
mkdir -p /outputs /tmp
echo "Installing dependencies..."
apt update && apt install -y wget
pip uninstall -y enum34 # https://github.com/iterative/dvc/issues/1995
pip install --no-cache-dir gdown tqdm peewee
echo "Installing NNI..."
cd /nni && echo "y" | source install.sh
cd /tmp
if [ -z "${NASBENCHMARK_DIR}" ]; then
NASBENCHMARK_DIR=~/.nni/nasbenchmark
fi
echo "Downloading NAS-Bench-201..."
gdown https://drive.google.com/uc\?id\=1OOfVPpt-lA4u2HJrXbgrRd42IbfvJMyE -O a.pth
if [ -f "a.pth" ]; then
echo "a.pth found. Skip download."
else
gdown https://drive.google.com/uc\?id\=1OOfVPpt-lA4u2HJrXbgrRd42IbfvJMyE -O a.pth
fi
echo "Generating database..."
rm -f /outputs/nasbench201.db /outputs/nasbench201.db-journal
NASBENCHMARK_DIR=/outputs python -m nni.nas.benchmarks.nasbench201.db_gen a.pth
rm -f ${NASBENCHMARK_DIR}/nasbench201.db ${NASBENCHMARK_DIR}/nasbench201.db-journal
mkdir -p ${NASBENCHMARK_DIR}
python -m nni.nas.benchmarks.nasbench201.db_gen a.pth
rm -f a.pth
#!/bin/bash
set -e
mkdir -p /outputs /tmp
echo "Installing dependencies..."
apt update && apt install -y wget zip
pip install --no-cache-dir tqdm peewee
echo "Installing NNI..."
cd /nni && echo "y" | source install.sh
cd /tmp
if [ -z "${NASBENCHMARK_DIR}" ]; then
NASBENCHMARK_DIR=~/.nni/nasbenchmark
fi
echo "Downloading NDS..."
wget https://dl.fbaipublicfiles.com/nds/data.zip -O data.zip
if [ -f "data.zip" ]; then
echo "data.zip found. Skip download."
else
wget https://dl.fbaipublicfiles.com/nds/data.zip -O data.zip
fi
unzip data.zip
echo "Generating database..."
rm -f /outputs/nds.db /outputs/nds.db-journal
NASBENCHMARK_DIR=/outputs python -m nni.nas.benchmarks.nds.db_gen nds_data
rm -f ${NASBENCHMARK_DIR}/nds.db ${NASBENCHMARK_DIR}/nds.db-journal
mkdir -p ${NASBENCHMARK_DIR}
python -m nni.nas.benchmarks.nds.db_gen nds_data
rm -rf data.zip nds_data
File mode changed from 100644 to 100755
......@@ -28,10 +28,7 @@ if __name__ == "__main__":
compute_target = ComputeTarget(workspace=ws, name=args.compute_target)
experiment = Experiment(ws, args.experiment_name)
run_config = RunConfiguration()
dependencies = CondaDependencies()
dependencies.add_pip_package("azureml-sdk")
dependencies.add_pip_package("azureml")
run_config.environment.python.conda_dependencies = dependencies
run_config.environment.python.user_managed_dependencies = True
run_config.environment.docker.enabled = True
run_config.environment.docker.base_image = args.docker_image
run_config.target = compute_target
......
......@@ -36,4 +36,5 @@ export class AMLTrialConfig extends TrialConfig {
export class AMLEnvironmentInformation extends EnvironmentInformation {
public amlClient?: AMLClient;
public currentMessageIndex: number = -1;
}
......@@ -14,7 +14,6 @@ class AMLRunnerConnection extends RunnerConnection {
export class AMLCommandChannel extends CommandChannel {
private stopping: boolean = false;
private currentMessageIndex: number = -1;
private sendQueues: [EnvironmentInformation, string][] = [];
private readonly NNI_METRICS_PATTERN: string = `NNISDK_MEb'(?<metrics>.*?)'`;
......@@ -89,7 +88,9 @@ export class AMLCommandChannel extends CommandChannel {
const runnerConnections = [...this.runnerConnections.values()] as AMLRunnerConnection[];
for (const runnerConnection of runnerConnections) {
// to loop all commands
const amlClient = (runnerConnection.environment as AMLEnvironmentInformation).amlClient;
const amlEnvironmentInformation: AMLEnvironmentInformation = runnerConnection.environment as AMLEnvironmentInformation;
const amlClient = amlEnvironmentInformation.amlClient;
let currentMessageIndex = amlEnvironmentInformation.currentMessageIndex;
if (!amlClient) {
throw new Error('AML client not initialized!');
}
......@@ -97,15 +98,16 @@ export class AMLCommandChannel extends CommandChannel {
if (command && Object.prototype.hasOwnProperty.call(command, "trial_runner")) {
const messages = command['trial_runner'];
if (messages) {
if (messages instanceof Object && this.currentMessageIndex < messages.length - 1) {
for (let index = this.currentMessageIndex + 1; index < messages.length; index ++) {
if (messages instanceof Object && currentMessageIndex < messages.length - 1) {
for (let index = currentMessageIndex + 1; index < messages.length; index ++) {
this.handleCommand(runnerConnection.environment, messages[index]);
}
this.currentMessageIndex = messages.length - 1;
} else if (this.currentMessageIndex === -1){
currentMessageIndex = messages.length - 1;
} else if (currentMessageIndex === -1){
this.handleCommand(runnerConnection.environment, messages);
this.currentMessageIndex += 1;
currentMessageIndex += 1;
}
amlEnvironmentInformation.currentMessageIndex = currentMessageIndex;
}
}
}
......
......@@ -12,7 +12,7 @@ import * as component from '../../common/component';
import { getBasePort, getExperimentId, getPlatform } from '../../common/experimentStartupInfo';
import { getLogger, Logger } from '../../common/log';
import { NNIManagerIpConfig, TrainingService, TrialJobApplicationForm, TrialJobMetric, TrialJobStatus } from '../../common/trainingService';
import { delay, getExperimentRootDir, getLogLevel, getVersion, mkDirPSync, uniqueString } from '../../common/utils';
import { delay, getExperimentRootDir, getLogLevel, getVersion, mkDirPSync, uniqueString, getIPV4Address } from '../../common/utils';
import { GPU_INFO, INITIALIZED, KILL_TRIAL_JOB, NEW_TRIAL_JOB, REPORT_METRIC_DATA, SEND_TRIAL_JOB_PARAMETER, STDOUT, TRIAL_END, VERSION_CHECK } from '../../core/commands';
import { GPUSummary } from '../../training_service/common/gpuData';
import { CONTAINER_INSTALL_NNI_SHELL_FORMAT } from '../common/containerJobData';
......@@ -164,6 +164,9 @@ class TrialDispatcher implements TrainingService {
this.commandChannel = environmentService.getCommandChannel(this.commandEmitter);
// TODO it's a hard code of web channel, it needs to be improved.
if (this.runnerSettings.nniManagerIP === "" || this.runnerSettings.nniManagerIP === null) {
this.runnerSettings.nniManagerIP = getIPV4Address();
}
this.runnerSettings.nniManagerPort = getBasePort() + 1;
this.runnerSettings.commandChannel = this.commandChannel.channelName;
......
......@@ -263,11 +263,12 @@ class ActivationAPoZRankFilterPrunerMasker(ActivationFilterPrunerMasker):
base_mask['weight_mask'][idx] = 0.
if base_mask['bias_mask'] is not None:
base_mask['bias_mask'][idx] = 0.
return base_mask
if len(activations) >= self.statistics_batch_num and self.pruner.hook_id in self.pruner._fwd_hook_handles:
self.pruner.remove_activation_collector(self.pruner.hook_id)
return base_mask
def _calc_apoz(self, activations):
"""
Calculate APoZ(average percentage of zeros) of activations.
......
......@@ -240,7 +240,8 @@ infer_from_inshape = {
'aten::add': lambda module_mask, mask: add_inshape(module_mask, mask),
'aten::cat': lambda module_mask, mask, cat_info, last_visited: cat_inshape(module_mask, mask, cat_info, last_visited),
'aten::mean': lambda module_masks, mask, shape: mean_inshape(module_masks, mask, shape),
'Dropout': lambda module_masks, mask: dropout_inshape(module_masks, mask)
'Dropout': lambda module_masks, mask: dropout_inshape(module_masks, mask),
'Dropout2d': lambda module_masks, mask: dropout_inshape(module_masks, mask)
}
"""
......
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import logging
import torch
import torch.nn as nn
from nni.compression.torch.compressor import PrunerModuleWrapper
_logger = logging.getLogger(__name__)
try:
from thop import profile
except ImportError:
_logger.warning('Please install thop using command: pip install thop')
except Exception as e:
print('thop is not found, please install the python package: thop')
raise
def count_flops_params(model: nn.Module, input_size, verbose=True):
......@@ -61,8 +58,16 @@ def count_flops_params(model: nn.Module, input_size, verbose=True):
flops, params = profile(model, inputs=(inputs, ), custom_ops=custom_ops, verbose=verbose)
for m in hook_module_list:
m._buffers.pop("weight_mask")
# Remove registerd buffer on the model, and fixed following issue:
# https://github.com/Lyken17/pytorch-OpCounter/issues/96
for m in model.modules():
if 'total_ops' in m._buffers:
m._buffers.pop("total_ops")
if 'total_params' in m._buffers:
m._buffers.pop("total_params")
return flops, params
......
......@@ -259,7 +259,9 @@ class ChannelMaskConflict(MaskFix):
_logger.debug('Layer: %s ', name)
_logger.debug('Original pruned filters: %s', str(all_zeros))
# Update the masks for the layers in the dependency set
if fine_grained:
if fine_grained or out_channels is None:
# if use the fine-grained pruner or all the layers in
# this dependency set are not pruned
continue
if not all_pruned:
# if some layer are not pruned at all
......
......@@ -77,9 +77,9 @@
"typescript": "3.4.5"
},
"scripts": {
"start": "node --max-old-space-size=4096 scripts/start.js",
"build": "node --max-old-space-size=4096 scripts/build.js",
"test": "node --max-old-space-size=4096 scripts/test.js",
"start": "node --max-old-space-size=3072 scripts/start.js",
"build": "node --max-old-space-size=3072 scripts/build.js",
"test": "node --max-old-space-size=3072 scripts/test.js",
"eslint": "npx eslint ./ --ext .tsx,.ts"
},
"eslintConfig": {
......
......@@ -414,17 +414,19 @@ class TableList extends React.Component<TableListProps, TableListState> {
const tableSource: Array<TableRecord> = JSON.parse(JSON.stringify(this.props.tableSource));
// parameter as table column
const parameterStr: string[] = [];
if (tableSource.length > 0) {
const trialMess = TRIALS.getTrial(tableSource[0].id);
const trial = trialMess.description.parameters;
const parameterColumn: string[] = Object.keys(trial);
parameterColumn.forEach(value => {
parameterStr.push(`${value} (search space)`);
});
if (!EXPERIMENT.isNestedExp()) {
if (tableSource.length > 0) {
const trialMess = TRIALS.getTrial(tableSource[0].id);
const trial = trialMess.description.parameters;
const parameterColumn: string[] = Object.keys(trial);
parameterColumn.forEach(value => {
parameterStr.push(`${value} (search space)`);
});
}
}
// concat trial all final keys and remove dup "default" val, return list
return (COLUMNPro.concat(parameterStr)).concat(Array.from(new Set(TRIALS.finalKeys())));
const finalKeysList = TRIALS.finalKeys().filter(item => item !== 'default');
return (COLUMNPro.concat(parameterStr)).concat(finalKeysList);
}
// get IColumn[]
......
......@@ -14,6 +14,7 @@ function compareProfiles(profile1?: ExperimentProfile, profile2?: ExperimentProf
class Experiment {
private profileField?: ExperimentProfile = undefined;
private statusField?: NNIManagerStatus = undefined;
private isNestedExperiment: boolean = false;
private isexperimentError: boolean = false;
private experimentErrorMessage: string = '';
private isStatusError: boolean = false;
......@@ -31,6 +32,10 @@ class Experiment {
}
}
public isNestedExp(): boolean {
return this.isNestedExperiment;
}
public experimentError(): boolean {
return this.isexperimentError;
}
......@@ -114,7 +119,14 @@ class Experiment {
}
get searchSpace(): object {
return JSON.parse(this.profile.params.searchSpace);
const result = JSON.parse(this.profile.params.searchSpace);
for (const item in result) {
if (result[item]._value && typeof result[item]._value[0] === 'object') {
this.isNestedExperiment = true;
break;
}
}
return result;
}
get logCollectionEnabled(): boolean {
......
......@@ -15,6 +15,7 @@ jobs:
python3 -m pip install torch==1.3.1 --user
python3 -m pip install keras==2.1.6 --user
python3 -m pip install tensorflow-gpu==1.15.2 tensorflow-estimator==1.15.1 --force --user
python3 -m pip install thop --user
sudo apt-get install swig -y
PATH=$HOME/.local/bin:$PATH nnictl package install --name=SMAC
PATH=$HOME/.local/bin:$PATH nnictl package install --name=BOHB
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment