Unverified Commit b63b5246 authored by Yan Ni's avatar Yan Ni Committed by GitHub
Browse files

Merge pull request #1911 from leckie-chn/v1.3-rc0

merge v1.3 (conflict resolved) back to master
parents 1b219414 87dc3cdc
...@@ -25,7 +25,7 @@ export class PAIJobInfoCollector { ...@@ -25,7 +25,7 @@ export class PAIJobInfoCollector {
this.finalStatuses = ['SUCCEEDED', 'FAILED', 'USER_CANCELED', 'SYS_CANCELED', 'EARLY_STOPPED']; this.finalStatuses = ['SUCCEEDED', 'FAILED', 'USER_CANCELED', 'SYS_CANCELED', 'EARLY_STOPPED'];
} }
public async retrieveTrialStatus(token? : string, paiBaseClusterConfig?: PAIClusterConfig): Promise<void> { public async retrieveTrialStatus(protocol: string, token? : string, paiBaseClusterConfig?: PAIClusterConfig): Promise<void> {
if (paiBaseClusterConfig === undefined || token === undefined) { if (paiBaseClusterConfig === undefined || token === undefined) {
return Promise.resolve(); return Promise.resolve();
} }
...@@ -35,13 +35,13 @@ export class PAIJobInfoCollector { ...@@ -35,13 +35,13 @@ export class PAIJobInfoCollector {
if (paiTrialJob === undefined) { if (paiTrialJob === undefined) {
throw new NNIError(NNIErrorNames.NOT_FOUND, `trial job id ${trialJobId} not found`); throw new NNIError(NNIErrorNames.NOT_FOUND, `trial job id ${trialJobId} not found`);
} }
updatePaiTrialJobs.push(this.getSinglePAITrialJobInfo(paiTrialJob, token, paiBaseClusterConfig)); updatePaiTrialJobs.push(this.getSinglePAITrialJobInfo(protocol, paiTrialJob, token, paiBaseClusterConfig));
} }
await Promise.all(updatePaiTrialJobs); await Promise.all(updatePaiTrialJobs);
} }
private getSinglePAITrialJobInfo(paiTrialJob: PAITrialJobDetail, paiToken: string, paiClusterConfig: PAIClusterConfig): Promise<void> { private getSinglePAITrialJobInfo(protocol: string, paiTrialJob: PAITrialJobDetail, paiToken: string, paiClusterConfig: PAIClusterConfig): Promise<void> {
const deferred: Deferred<void> = new Deferred<void>(); const deferred: Deferred<void> = new Deferred<void>();
if (!this.statusesNeedToCheck.includes(paiTrialJob.status)) { if (!this.statusesNeedToCheck.includes(paiTrialJob.status)) {
deferred.resolve(); deferred.resolve();
...@@ -52,7 +52,7 @@ export class PAIJobInfoCollector { ...@@ -52,7 +52,7 @@ export class PAIJobInfoCollector {
// Rest call to get PAI job info and update status // Rest call to get PAI job info and update status
// Refer https://github.com/Microsoft/pai/blob/master/docs/rest-server/API.md for more detail about PAI Rest API // Refer https://github.com/Microsoft/pai/blob/master/docs/rest-server/API.md for more detail about PAI Rest API
const getJobInfoRequest: request.Options = { const getJobInfoRequest: request.Options = {
uri: `http://${paiClusterConfig.host}/rest-server/api/v1/user/${paiClusterConfig.userName}/jobs/${paiTrialJob.paiJobName}`, uri: `${protocol}://${paiClusterConfig.host}/rest-server/api/v1/user/${paiClusterConfig.userName}/jobs/${paiTrialJob.paiJobName}`,
method: 'GET', method: 'GET',
json: true, json: true,
headers: { headers: {
...@@ -81,7 +81,11 @@ export class PAIJobInfoCollector { ...@@ -81,7 +81,11 @@ export class PAIJobInfoCollector {
paiTrialJob.startTime = response.body.jobStatus.appLaunchedTime; paiTrialJob.startTime = response.body.jobStatus.appLaunchedTime;
} }
if (paiTrialJob.url === undefined) { if (paiTrialJob.url === undefined) {
if (response.body.jobStatus.appTrackingUrl) {
paiTrialJob.url = response.body.jobStatus.appTrackingUrl; paiTrialJob.url = response.body.jobStatus.appTrackingUrl;
} else {
paiTrialJob.url = paiTrialJob.logPath;
}
} }
break; break;
case 'SUCCEEDED': case 'SUCCEEDED':
...@@ -114,7 +118,7 @@ export class PAIJobInfoCollector { ...@@ -114,7 +118,7 @@ export class PAIJobInfoCollector {
} }
// Set pai trial job's url to WebHDFS output path // Set pai trial job's url to WebHDFS output path
if (paiTrialJob.logPath !== undefined) { if (paiTrialJob.logPath !== undefined) {
if (paiTrialJob.url) { if (paiTrialJob.url && paiTrialJob.url !== paiTrialJob.logPath) {
paiTrialJob.url += `,${paiTrialJob.logPath}`; paiTrialJob.url += `,${paiTrialJob.logPath}`;
} else { } else {
paiTrialJob.url = `${paiTrialJob.logPath}`; paiTrialJob.url = `${paiTrialJob.logPath}`;
......
...@@ -62,6 +62,7 @@ class PAIK8STrainingService extends PAITrainingService { ...@@ -62,6 +62,7 @@ class PAIK8STrainingService extends PAITrainingService {
case TrialConfigMetadataKey.PAI_CLUSTER_CONFIG: case TrialConfigMetadataKey.PAI_CLUSTER_CONFIG:
this.paiJobRestServer = new PAIJobRestServer(component.get(PAIK8STrainingService)); this.paiJobRestServer = new PAIJobRestServer(component.get(PAIK8STrainingService));
this.paiClusterConfig = <PAIClusterConfig>JSON.parse(value); this.paiClusterConfig = <PAIClusterConfig>JSON.parse(value);
this.paiClusterConfig.host = this.formatPAIHost(this.paiClusterConfig.host);
if(this.paiClusterConfig.passWord) { if(this.paiClusterConfig.passWord) {
// Get PAI authentication token // Get PAI authentication token
await this.updatePaiToken(); await this.updatePaiToken();
...@@ -257,7 +258,7 @@ class PAIK8STrainingService extends PAITrainingService { ...@@ -257,7 +258,7 @@ class PAIK8STrainingService extends PAITrainingService {
// Step 3. Submit PAI job via Rest call // Step 3. Submit PAI job via Rest call
// Refer https://github.com/Microsoft/pai/blob/master/docs/rest-server/API.md for more detail about PAI Rest API // Refer https://github.com/Microsoft/pai/blob/master/docs/rest-server/API.md for more detail about PAI Rest API
const submitJobRequest: request.Options = { const submitJobRequest: request.Options = {
uri: `http://${this.paiClusterConfig.host}/rest-server/api/v2/jobs`, uri: `${this.protocol}://${this.paiClusterConfig.host}/rest-server/api/v2/jobs`,
method: 'POST', method: 'POST',
body: paiJobConfig, body: paiJobConfig,
headers: { headers: {
......
...@@ -52,6 +52,7 @@ abstract class PAITrainingService implements TrainingService { ...@@ -52,6 +52,7 @@ abstract class PAITrainingService implements TrainingService {
protected authFileHdfsPath: string | undefined = undefined; protected authFileHdfsPath: string | undefined = undefined;
protected portList?: string | undefined; protected portList?: string | undefined;
protected paiJobRestServer?: PAIJobRestServer; protected paiJobRestServer?: PAIJobRestServer;
protected protocol: string = 'http';
constructor() { constructor() {
this.log = getLogger(); this.log = getLogger();
...@@ -165,7 +166,7 @@ abstract class PAITrainingService implements TrainingService { ...@@ -165,7 +166,7 @@ abstract class PAITrainingService implements TrainingService {
} }
const stopJobRequest: request.Options = { const stopJobRequest: request.Options = {
uri: `http://${this.paiClusterConfig.host}/rest-server/api/v1/user/${this.paiClusterConfig.userName}\ uri: `${this.protocol}://${this.paiClusterConfig.host}/rest-server/api/v1/user/${this.paiClusterConfig.userName}\
/jobs/${trialJobDetail.paiJobName}/executionType`, /jobs/${trialJobDetail.paiJobName}/executionType`,
method: 'PUT', method: 'PUT',
json: true, json: true,
...@@ -216,6 +217,20 @@ abstract class PAITrainingService implements TrainingService { ...@@ -216,6 +217,20 @@ abstract class PAITrainingService implements TrainingService {
return this.metricsEmitter; return this.metricsEmitter;
} }
protected formatPAIHost(host: string): string {
// If users' host start with 'http://' or 'https://', use the original host,
// or format to 'http//${host}'
if (host.startsWith('http://')) {
this.protocol = 'http';
return host.replace('http://', '');
} else if (host.startsWith('https://')) {
this.protocol = 'https';
return host.replace('https://', '');
} else {
return host;
}
}
protected async statusCheckingLoop(): Promise<void> { protected async statusCheckingLoop(): Promise<void> {
while (!this.stopping) { while (!this.stopping) {
if(this.paiClusterConfig && this.paiClusterConfig.passWord) { if(this.paiClusterConfig && this.paiClusterConfig.passWord) {
...@@ -229,7 +244,7 @@ abstract class PAITrainingService implements TrainingService { ...@@ -229,7 +244,7 @@ abstract class PAITrainingService implements TrainingService {
} }
} }
} }
await this.paiJobCollector.retrieveTrialStatus(this.paiToken, this.paiClusterConfig); await this.paiJobCollector.retrieveTrialStatus(this.protocol, this.paiToken, this.paiClusterConfig);
if (this.paiJobRestServer === undefined) { if (this.paiJobRestServer === undefined) {
throw new Error('paiBaseJobRestServer not implemented!'); throw new Error('paiBaseJobRestServer not implemented!');
} }
...@@ -259,7 +274,7 @@ abstract class PAITrainingService implements TrainingService { ...@@ -259,7 +274,7 @@ abstract class PAITrainingService implements TrainingService {
} }
const authenticationReq: request.Options = { const authenticationReq: request.Options = {
uri: `http://${this.paiClusterConfig.host}/rest-server/api/v1/token`, uri: `${this.protocol}://${this.paiClusterConfig.host}/rest-server/api/v1/token`,
method: 'POST', method: 'POST',
json: true, json: true,
body: { body: {
......
...@@ -91,6 +91,7 @@ class PAIYarnTrainingService extends PAITrainingService { ...@@ -91,6 +91,7 @@ class PAIYarnTrainingService extends PAITrainingService {
case TrialConfigMetadataKey.PAI_YARN_CLUSTER_CONFIG: case TrialConfigMetadataKey.PAI_YARN_CLUSTER_CONFIG:
this.paiJobRestServer = new PAIJobRestServer(component.get(PAIYarnTrainingService)); this.paiJobRestServer = new PAIJobRestServer(component.get(PAIYarnTrainingService));
this.paiClusterConfig = <PAIClusterConfig>JSON.parse(value); this.paiClusterConfig = <PAIClusterConfig>JSON.parse(value);
this.paiClusterConfig.host = this.formatPAIHost(this.paiClusterConfig.host);
this.hdfsClient = WebHDFS.createClient({ this.hdfsClient = WebHDFS.createClient({
user: this.paiClusterConfig.userName, user: this.paiClusterConfig.userName,
...@@ -98,7 +99,9 @@ class PAIYarnTrainingService extends PAITrainingService { ...@@ -98,7 +99,9 @@ class PAIYarnTrainingService extends PAITrainingService {
port: 80, port: 80,
path: '/webhdfs/api/v1', path: '/webhdfs/api/v1',
host: this.paiClusterConfig.host host: this.paiClusterConfig.host
}); });
this.paiClusterConfig.host = this.formatPAIHost(this.paiClusterConfig.host);
if(this.paiClusterConfig.passWord) { if(this.paiClusterConfig.passWord) {
// Get PAI authentication token // Get PAI authentication token
await this.updatePaiToken(); await this.updatePaiToken();
...@@ -107,7 +110,6 @@ class PAIYarnTrainingService extends PAITrainingService { ...@@ -107,7 +110,6 @@ class PAIYarnTrainingService extends PAITrainingService {
} else { } else {
throw new Error('pai cluster config format error, please set password or token!'); throw new Error('pai cluster config format error, please set password or token!');
} }
break; break;
case TrialConfigMetadataKey.TRIAL_CONFIG: case TrialConfigMetadataKey.TRIAL_CONFIG:
...@@ -272,7 +274,7 @@ class PAIYarnTrainingService extends PAITrainingService { ...@@ -272,7 +274,7 @@ class PAIYarnTrainingService extends PAITrainingService {
// Step 3. Submit PAI job via Rest call // Step 3. Submit PAI job via Rest call
// Refer https://github.com/Microsoft/pai/blob/master/docs/rest-server/API.md for more detail about PAI Rest API // Refer https://github.com/Microsoft/pai/blob/master/docs/rest-server/API.md for more detail about PAI Rest API
const submitJobRequest: request.Options = { const submitJobRequest: request.Options = {
uri: `http://${this.paiClusterConfig.host}/rest-server/api/v1/user/${this.paiClusterConfig.userName}/jobs`, uri: `${this.protocol}://${this.paiClusterConfig.host}/rest-server/api/v1/user/${this.paiClusterConfig.userName}/jobs`,
method: 'POST', method: 'POST',
json: true, json: true,
body: paiJobConfig, body: paiJobConfig,
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
# Licensed under the MIT license. # Licensed under the MIT license.
from .compressor import LayerInfo, Compressor, Pruner, Quantizer from .compressor import LayerInfo, Compressor, Pruner, Quantizer
from .builtin_pruners import * from .pruners import *
from .builtin_quantizers import * from .weight_rank_filter_pruners import *
from .lottery_ticket import LotteryTicketPruner from .activation_rank_filter_pruners import *
from .quantizers import *
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import logging
import torch
from .compressor import Pruner
__all__ = ['ActivationAPoZRankFilterPruner', 'ActivationMeanRankFilterPruner']
logger = logging.getLogger('torch activation rank filter pruners')
class ActivationRankFilterPruner(Pruner):
"""
A structured pruning base class that prunes the filters with the smallest
importance criterion in convolution layers (using activation values)
to achieve a preset level of network sparsity.
"""
def __init__(self, model, config_list, activation='relu', statistics_batch_num=1):
"""
Parameters
----------
model : torch.nn.module
Model to be pruned
config_list : list
support key for each list item:
- sparsity: percentage of convolutional filters to be pruned.
activation : str
Activation function
statistics_batch_num : int
Num of batches for activation statistics
"""
super().__init__(model, config_list)
self.mask_calculated_ops = set()
self.statistics_batch_num = statistics_batch_num
self.collected_activation = {}
self.hooks = {}
assert activation in ['relu', 'relu6']
if activation == 'relu':
self.activation = torch.nn.functional.relu
elif activation == 'relu6':
self.activation = torch.nn.functional.relu6
else:
self.activation = None
def compress(self):
"""
Compress the model, register a hook for collecting activations.
"""
modules_to_compress = self.detect_modules_to_compress()
for layer, config in modules_to_compress:
self._instrument_layer(layer, config)
self.collected_activation[layer.name] = []
def _hook(module_, input_, output, name=layer.name):
if len(self.collected_activation[name]) < self.statistics_batch_num:
self.collected_activation[name].append(self.activation(output.detach().cpu()))
layer.module.register_forward_hook(_hook)
return self.bound_model
def get_mask(self, base_mask, activations, num_prune):
raise NotImplementedError('{} get_mask is not implemented'.format(self.__class__.__name__))
def calc_mask(self, layer, config):
"""
Calculate the mask of given layer.
Filters with the smallest importance criterion which is calculated from the activation are masked.
Parameters
----------
layer : LayerInfo
the layer to instrument the compression operation
config : dict
layer's pruning config
Returns
-------
dict
dictionary for storing masks
"""
weight = layer.module.weight.data
op_name = layer.name
op_type = layer.type
assert 0 <= config.get('sparsity') < 1, "sparsity must in the range [0, 1)"
assert op_type in ['Conv2d'], "only support Conv2d"
assert op_type in config.get('op_types')
if op_name in self.mask_calculated_ops:
assert op_name in self.mask_dict
return self.mask_dict.get(op_name)
mask_weight = torch.ones(weight.size()).type_as(weight).detach()
if hasattr(layer.module, 'bias') and layer.module.bias is not None:
mask_bias = torch.ones(layer.module.bias.size()).type_as(layer.module.bias).detach()
else:
mask_bias = None
mask = {'weight': mask_weight, 'bias': mask_bias}
try:
filters = weight.size(0)
num_prune = int(filters * config.get('sparsity'))
if filters < 2 or num_prune < 1 or len(self.collected_activation[layer.name]) < self.statistics_batch_num:
return mask
mask = self.get_mask(mask, self.collected_activation[layer.name], num_prune)
finally:
if len(self.collected_activation[layer.name]) == self.statistics_batch_num:
self.mask_dict.update({op_name: mask})
self.mask_calculated_ops.add(op_name)
return mask
class ActivationAPoZRankFilterPruner(ActivationRankFilterPruner):
"""
A structured pruning algorithm that prunes the filters with the
smallest APoZ(average percentage of zeros) of output activations.
Hengyuan Hu, Rui Peng, Yu-Wing Tai and Chi-Keung Tang,
"Network Trimming: A Data-Driven Neuron Pruning Approach towards Efficient Deep Architectures", ICLR 2016.
https://arxiv.org/abs/1607.03250
"""
def __init__(self, model, config_list, activation='relu', statistics_batch_num=1):
"""
Parameters
----------
model : torch.nn.module
Model to be pruned
config_list : list
support key for each list item:
- sparsity: percentage of convolutional filters to be pruned.
activation : str
Activation function
statistics_batch_num : int
Num of batches for activation statistics
"""
super().__init__(model, config_list, activation, statistics_batch_num)
def get_mask(self, base_mask, activations, num_prune):
"""
Calculate the mask of given layer.
Filters with the smallest APoZ(average percentage of zeros) of output activations are masked.
Parameters
----------
base_mask : dict
The basic mask with the same shape of weight, all item in the basic mask is 1.
activations : list
Layer's output activations
num_prune : int
Num of filters to prune
Returns
-------
dict
dictionary for storing masks
"""
apoz = self._calc_apoz(activations)
prune_indices = torch.argsort(apoz, descending=True)[:num_prune]
for idx in prune_indices:
base_mask['weight'][idx] = 0.
if base_mask['bias'] is not None:
base_mask['bias'][idx] = 0.
return base_mask
def _calc_apoz(self, activations):
"""
Calculate APoZ(average percentage of zeros) of activations.
Parameters
----------
activations : list
Layer's output activations
Returns
-------
torch.Tensor
Filter's APoZ(average percentage of zeros) of the activations
"""
activations = torch.cat(activations, 0)
_eq_zero = torch.eq(activations, torch.zeros_like(activations))
_apoz = torch.sum(_eq_zero, dim=(0, 2, 3)) / torch.numel(_eq_zero[:, 0, :, :])
return _apoz
class ActivationMeanRankFilterPruner(ActivationRankFilterPruner):
"""
A structured pruning algorithm that prunes the filters with the
smallest mean value of output activations.
Pavlo Molchanov, Stephen Tyree, Tero Karras, Timo Aila and Jan Kautz,
"Pruning Convolutional Neural Networks for Resource Efficient Inference", ICLR 2017.
https://arxiv.org/abs/1611.06440
"""
def __init__(self, model, config_list, activation='relu', statistics_batch_num=1):
"""
Parameters
----------
model : torch.nn.module
Model to be pruned
config_list : list
support key for each list item:
- sparsity: percentage of convolutional filters to be pruned.
activation : str
Activation function
statistics_batch_num : int
Num of batches for activation statistics
"""
super().__init__(model, config_list, activation, statistics_batch_num)
def get_mask(self, base_mask, activations, num_prune):
"""
Calculate the mask of given layer.
Filters with the smallest APoZ(average percentage of zeros) of output activations are masked.
Parameters
----------
base_mask : dict
The basic mask with the same shape of weight, all item in the basic mask is 1.
activations : list
Layer's output activations
num_prune : int
Num of filters to prune
Returns
-------
dict
dictionary for storing masks
"""
mean_activation = self._cal_mean_activation(activations)
prune_indices = torch.argsort(mean_activation)[:num_prune]
for idx in prune_indices:
base_mask['weight'][idx] = 0.
if base_mask['bias'] is not None:
base_mask['bias'][idx] = 0.
return base_mask
def _cal_mean_activation(self, activations):
"""
Calculate mean value of activations.
Parameters
----------
activations : list
Layer's output activations
Returns
-------
torch.Tensor
Filter's mean value of the output activations
"""
activations = torch.cat(activations, 0)
mean_activation = torch.mean(activations, dim=(0, 2, 3))
return mean_activation
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import copy import copy
import logging import logging
import torch import torch
from .compressor import Pruner from .compressor import Pruner
_logger = logging.getLogger(__name__) __all__ = ['LevelPruner', 'AGP_Pruner', 'SlimPruner', 'LotteryTicketPruner']
logger = logging.getLogger('torch pruner')
class LevelPruner(Pruner):
"""
Prune to an exact pruning level specification
"""
def __init__(self, model, config_list):
"""
Parameters
----------
model : torch.nn.module
Model to be pruned
config_list : list
List on pruning configs
"""
super().__init__(model, config_list)
self.mask_calculated_ops = set()
def calc_mask(self, layer, config):
"""
Calculate the mask of given layer
Parameters
----------
layer : LayerInfo
the layer to instrument the compression operation
config : dict
layer's pruning config
Returns
-------
dict
dictionary for storing masks
"""
weight = layer.module.weight.data
op_name = layer.name
if op_name not in self.mask_calculated_ops:
w_abs = weight.abs()
k = int(weight.numel() * config['sparsity'])
if k == 0:
return torch.ones(weight.shape).type_as(weight)
threshold = torch.topk(w_abs.view(-1), k, largest=False)[0].max()
mask_weight = torch.gt(w_abs, threshold).type_as(weight)
mask = {'weight': mask_weight}
self.mask_dict.update({op_name: mask})
self.mask_calculated_ops.add(op_name)
else:
assert op_name in self.mask_dict, "op_name not in the mask_dict"
mask = self.mask_dict[op_name]
return mask
class AGP_Pruner(Pruner):
"""
An automated gradual pruning algorithm that prunes the smallest magnitude
weights to achieve a preset level of network sparsity.
Michael Zhu and Suyog Gupta, "To prune, or not to prune: exploring the
efficacy of pruning for model compression", 2017 NIPS Workshop on Machine
Learning of Phones and other Consumer Devices,
https://arxiv.org/pdf/1710.01878.pdf
"""
def __init__(self, model, config_list):
"""
Parameters
----------
model : torch.nn.module
Model to be pruned
config_list : list
List on pruning configs
"""
super().__init__(model, config_list)
self.now_epoch = 0
self.if_init_list = {}
def calc_mask(self, layer, config):
"""
Calculate the mask of given layer
Parameters
----------
layer : LayerInfo
the layer to instrument the compression operation
config : dict
layer's pruning config
Returns
-------
dict
dictionary for storing masks
"""
weight = layer.module.weight.data
op_name = layer.name
start_epoch = config.get('start_epoch', 0)
freq = config.get('frequency', 1)
if self.now_epoch >= start_epoch and self.if_init_list.get(op_name, True) \
and (self.now_epoch - start_epoch) % freq == 0:
mask = self.mask_dict.get(op_name, {'weight': torch.ones(weight.shape).type_as(weight)})
target_sparsity = self.compute_target_sparsity(config)
k = int(weight.numel() * target_sparsity)
if k == 0 or target_sparsity >= 1 or target_sparsity <= 0:
return mask
# if we want to generate new mask, we should update weigth first
w_abs = weight.abs() * mask
threshold = torch.topk(w_abs.view(-1), k, largest=False)[0].max()
new_mask = {'weight': torch.gt(w_abs, threshold).type_as(weight)}
self.mask_dict.update({op_name: new_mask})
self.if_init_list.update({op_name: False})
else:
new_mask = self.mask_dict.get(op_name, {'weight': torch.ones(weight.shape).type_as(weight)})
return new_mask
def compute_target_sparsity(self, config):
"""
Calculate the sparsity for pruning
Parameters
----------
config : dict
Layer's pruning config
Returns
-------
float
Target sparsity to be pruned
"""
end_epoch = config.get('end_epoch', 1)
start_epoch = config.get('start_epoch', 0)
freq = config.get('frequency', 1)
final_sparsity = config.get('final_sparsity', 0)
initial_sparsity = config.get('initial_sparsity', 0)
if end_epoch <= start_epoch or initial_sparsity >= final_sparsity:
logger.warning('your end epoch <= start epoch or initial_sparsity >= final_sparsity')
return final_sparsity
if end_epoch <= self.now_epoch:
return final_sparsity
span = ((end_epoch - start_epoch - 1) // freq) * freq
assert span > 0
target_sparsity = (final_sparsity +
(initial_sparsity - final_sparsity) *
(1.0 - ((self.now_epoch - start_epoch) / span)) ** 3)
return target_sparsity
def update_epoch(self, epoch):
"""
Update epoch
Parameters
----------
epoch : int
current training epoch
"""
if epoch > 0:
self.now_epoch = epoch
for k in self.if_init_list.keys():
self.if_init_list[k] = True
class SlimPruner(Pruner):
"""
A structured pruning algorithm that prunes channels by pruning the weights of BN layers.
Zhuang Liu, Jianguo Li, Zhiqiang Shen, Gao Huang, Shoumeng Yan and Changshui Zhang
"Learning Efficient Convolutional Networks through Network Slimming", 2017 ICCV
https://arxiv.org/pdf/1708.06519.pdf
"""
def __init__(self, model, config_list):
"""
Parameters
----------
config_list : list
support key for each list item:
- sparsity: percentage of convolutional filters to be pruned.
"""
super().__init__(model, config_list)
self.mask_calculated_ops = set()
weight_list = []
if len(config_list) > 1:
logger.warning('Slim pruner only supports 1 configuration')
config = config_list[0]
for (layer, config) in self.detect_modules_to_compress():
assert layer.type == 'BatchNorm2d', 'SlimPruner only supports 2d batch normalization layer pruning'
weight_list.append(layer.module.weight.data.abs().clone())
all_bn_weights = torch.cat(weight_list)
k = int(all_bn_weights.shape[0] * config['sparsity'])
self.global_threshold = torch.topk(all_bn_weights.view(-1), k, largest=False)[0].max()
def calc_mask(self, layer, config):
"""
Calculate the mask of given layer.
Scale factors with the smallest absolute value in the BN layer are masked.
Parameters
----------
layer : LayerInfo
the layer to instrument the compression operation
config : dict
layer's pruning config
Returns
-------
dict
dictionary for storing masks
"""
weight = layer.module.weight.data
op_name = layer.name
op_type = layer.type
assert op_type == 'BatchNorm2d', 'SlimPruner only supports 2d batch normalization layer pruning'
if op_name in self.mask_calculated_ops:
assert op_name in self.mask_dict
return self.mask_dict.get(op_name)
base_mask = torch.ones(weight.size()).type_as(weight).detach()
mask = {'weight': base_mask.detach(), 'bias': base_mask.clone().detach()}
try:
filters = weight.size(0)
num_prune = int(filters * config.get('sparsity'))
if filters < 2 or num_prune < 1:
return mask
w_abs = weight.abs()
mask_weight = torch.gt(w_abs, self.global_threshold).type_as(weight)
mask_bias = mask_weight.clone()
mask = {'weight': mask_weight.detach(), 'bias': mask_bias.detach()}
finally:
self.mask_dict.update({layer.name: mask})
self.mask_calculated_ops.add(layer.name)
return mask
class LotteryTicketPruner(Pruner): class LotteryTicketPruner(Pruner):
""" """
......
...@@ -5,7 +5,7 @@ import logging ...@@ -5,7 +5,7 @@ import logging
import torch import torch
from .compressor import Quantizer, QuantGrad, QuantType from .compressor import Quantizer, QuantGrad, QuantType
__all__ = ['NaiveQuantizer', 'QAT_Quantizer', 'DoReFaQuantizer'] __all__ = ['NaiveQuantizer', 'QAT_Quantizer', 'DoReFaQuantizer', 'BNNQuantizer']
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
......
...@@ -5,240 +5,9 @@ import logging ...@@ -5,240 +5,9 @@ import logging
import torch import torch
from .compressor import Pruner from .compressor import Pruner
__all__ = ['LevelPruner', 'AGP_Pruner', 'SlimPruner', 'L1FilterPruner', 'L2FilterPruner', 'FPGMPruner', __all__ = ['L1FilterPruner', 'L2FilterPruner', 'FPGMPruner']
'ActivationAPoZRankFilterPruner', 'ActivationMeanRankFilterPruner']
logger = logging.getLogger('torch pruner')
class LevelPruner(Pruner):
"""
Prune to an exact pruning level specification
"""
def __init__(self, model, config_list):
"""
Parameters
----------
model : torch.nn.module
Model to be pruned
config_list : list
List on pruning configs
"""
super().__init__(model, config_list)
self.mask_calculated_ops = set()
def calc_mask(self, layer, config):
"""
Calculate the mask of given layer
Parameters
----------
layer : LayerInfo
the layer to instrument the compression operation
config : dict
layer's pruning config
Returns
-------
dict
dictionary for storing masks
"""
weight = layer.module.weight.data
op_name = layer.name
if op_name not in self.mask_calculated_ops:
w_abs = weight.abs()
k = int(weight.numel() * config['sparsity'])
if k == 0:
return torch.ones(weight.shape).type_as(weight)
threshold = torch.topk(w_abs.view(-1), k, largest=False)[0].max()
mask_weight = torch.gt(w_abs, threshold).type_as(weight)
mask = {'weight': mask_weight}
self.mask_dict.update({op_name: mask})
self.mask_calculated_ops.add(op_name)
else:
assert op_name in self.mask_dict, "op_name not in the mask_dict"
mask = self.mask_dict[op_name]
return mask
class AGP_Pruner(Pruner):
"""
An automated gradual pruning algorithm that prunes the smallest magnitude
weights to achieve a preset level of network sparsity.
Michael Zhu and Suyog Gupta, "To prune, or not to prune: exploring the
efficacy of pruning for model compression", 2017 NIPS Workshop on Machine
Learning of Phones and other Consumer Devices,
https://arxiv.org/pdf/1710.01878.pdf
"""
def __init__(self, model, config_list):
"""
Parameters
----------
model : torch.nn.module
Model to be pruned
config_list : list
List on pruning configs
"""
super().__init__(model, config_list)
self.now_epoch = 0
self.if_init_list = {}
def calc_mask(self, layer, config):
"""
Calculate the mask of given layer
Parameters
----------
layer : LayerInfo
the layer to instrument the compression operation
config : dict
layer's pruning config
Returns
-------
dict
dictionary for storing masks
"""
weight = layer.module.weight.data
op_name = layer.name
start_epoch = config.get('start_epoch', 0)
freq = config.get('frequency', 1)
if self.now_epoch >= start_epoch and self.if_init_list.get(op_name, True) \
and (self.now_epoch - start_epoch) % freq == 0:
mask = self.mask_dict.get(op_name, {'weight': torch.ones(weight.shape).type_as(weight)})
target_sparsity = self.compute_target_sparsity(config)
k = int(weight.numel() * target_sparsity)
if k == 0 or target_sparsity >= 1 or target_sparsity <= 0:
return mask
# if we want to generate new mask, we should update weigth first
w_abs = weight.abs() * mask
threshold = torch.topk(w_abs.view(-1), k, largest=False)[0].max()
new_mask = {'weight': torch.gt(w_abs, threshold).type_as(weight)}
self.mask_dict.update({op_name: new_mask})
self.if_init_list.update({op_name: False})
else:
new_mask = self.mask_dict.get(op_name, {'weight': torch.ones(weight.shape).type_as(weight)})
return new_mask
def compute_target_sparsity(self, config):
"""
Calculate the sparsity for pruning
Parameters
----------
config : dict
Layer's pruning config
Returns
-------
float
Target sparsity to be pruned
"""
end_epoch = config.get('end_epoch', 1)
start_epoch = config.get('start_epoch', 0)
freq = config.get('frequency', 1)
final_sparsity = config.get('final_sparsity', 0)
initial_sparsity = config.get('initial_sparsity', 0)
if end_epoch <= start_epoch or initial_sparsity >= final_sparsity:
logger.warning('your end epoch <= start epoch or initial_sparsity >= final_sparsity')
return final_sparsity
if end_epoch <= self.now_epoch:
return final_sparsity
span = ((end_epoch - start_epoch - 1) // freq) * freq
assert span > 0
target_sparsity = (final_sparsity +
(initial_sparsity - final_sparsity) *
(1.0 - ((self.now_epoch - start_epoch) / span)) ** 3)
return target_sparsity
def update_epoch(self, epoch):
"""
Update epoch
Parameters
----------
epoch : int
current training epoch
"""
if epoch > 0:
self.now_epoch = epoch
for k in self.if_init_list.keys():
self.if_init_list[k] = True
class SlimPruner(Pruner):
"""
A structured pruning algorithm that prunes channels by pruning the weights of BN layers.
Zhuang Liu, Jianguo Li, Zhiqiang Shen, Gao Huang, Shoumeng Yan and Changshui Zhang
"Learning Efficient Convolutional Networks through Network Slimming", 2017 ICCV
https://arxiv.org/pdf/1708.06519.pdf
"""
def __init__(self, model, config_list):
"""
Parameters
----------
config_list : list
support key for each list item:
- sparsity: percentage of convolutional filters to be pruned.
"""
super().__init__(model, config_list)
self.mask_calculated_ops = set()
weight_list = []
if len(config_list) > 1:
logger.warning('Slim pruner only supports 1 configuration')
config = config_list[0]
for (layer, config) in self.detect_modules_to_compress():
assert layer.type == 'BatchNorm2d', 'SlimPruner only supports 2d batch normalization layer pruning'
weight_list.append(layer.module.weight.data.abs().clone())
all_bn_weights = torch.cat(weight_list)
k = int(all_bn_weights.shape[0] * config['sparsity'])
self.global_threshold = torch.topk(all_bn_weights.view(-1), k, largest=False)[0].max()
def calc_mask(self, layer, config):
"""
Calculate the mask of given layer.
Scale factors with the smallest absolute value in the BN layer are masked.
Parameters
----------
layer : LayerInfo
the layer to instrument the compression operation
config : dict
layer's pruning config
Returns
-------
dict
dictionary for storing masks
"""
weight = layer.module.weight.data
op_name = layer.name
op_type = layer.type
assert op_type == 'BatchNorm2d', 'SlimPruner only supports 2d batch normalization layer pruning'
if op_name in self.mask_calculated_ops:
assert op_name in self.mask_dict
return self.mask_dict.get(op_name)
base_mask = torch.ones(weight.size()).type_as(weight).detach()
mask = {'weight': base_mask.detach(), 'bias': base_mask.clone().detach()}
try:
filters = weight.size(0)
num_prune = int(filters * config.get('sparsity'))
if filters < 2 or num_prune < 1:
return mask
w_abs = weight.abs()
mask_weight = torch.gt(w_abs, self.global_threshold).type_as(weight)
mask_bias = mask_weight.clone()
mask = {'weight': mask_weight.detach(), 'bias': mask_bias.detach()}
finally:
self.mask_dict.update({layer.name: mask})
self.mask_calculated_ops.add(layer.name)
return mask
logger = logging.getLogger('torch weight rank filter pruners')
class WeightRankFilterPruner(Pruner): class WeightRankFilterPruner(Pruner):
""" """
...@@ -260,8 +29,8 @@ class WeightRankFilterPruner(Pruner): ...@@ -260,8 +29,8 @@ class WeightRankFilterPruner(Pruner):
super().__init__(model, config_list) super().__init__(model, config_list)
self.mask_calculated_ops = set() # operations whose mask has been calculated self.mask_calculated_ops = set() # operations whose mask has been calculated
def _get_mask(self, base_mask, weight, num_prune): def get_mask(self, base_mask, weight, num_prune):
return {'weight': None, 'bias': None} raise NotImplementedError('{} get_mask is not implemented'.format(self.__class__.__name__))
def calc_mask(self, layer, config): def calc_mask(self, layer, config):
""" """
...@@ -299,7 +68,7 @@ class WeightRankFilterPruner(Pruner): ...@@ -299,7 +68,7 @@ class WeightRankFilterPruner(Pruner):
num_prune = int(filters * config.get('sparsity')) num_prune = int(filters * config.get('sparsity'))
if filters < 2 or num_prune < 1: if filters < 2 or num_prune < 1:
return mask return mask
mask = self._get_mask(mask, weight, num_prune) mask = self.get_mask(mask, weight, num_prune)
finally: finally:
self.mask_dict.update({op_name: mask}) self.mask_dict.update({op_name: mask})
self.mask_calculated_ops.add(op_name) self.mask_calculated_ops.add(op_name)
...@@ -328,7 +97,7 @@ class L1FilterPruner(WeightRankFilterPruner): ...@@ -328,7 +97,7 @@ class L1FilterPruner(WeightRankFilterPruner):
super().__init__(model, config_list) super().__init__(model, config_list)
def _get_mask(self, base_mask, weight, num_prune): def get_mask(self, base_mask, weight, num_prune):
""" """
Calculate the mask of given layer. Calculate the mask of given layer.
Filters with the smallest sum of its absolute kernel weights are masked. Filters with the smallest sum of its absolute kernel weights are masked.
...@@ -376,7 +145,7 @@ class L2FilterPruner(WeightRankFilterPruner): ...@@ -376,7 +145,7 @@ class L2FilterPruner(WeightRankFilterPruner):
super().__init__(model, config_list) super().__init__(model, config_list)
def _get_mask(self, base_mask, weight, num_prune): def get_mask(self, base_mask, weight, num_prune):
""" """
Calculate the mask of given layer. Calculate the mask of given layer.
Filters with the smallest L2 norm of the absolute kernel weights are masked. Filters with the smallest L2 norm of the absolute kernel weights are masked.
...@@ -422,7 +191,7 @@ class FPGMPruner(WeightRankFilterPruner): ...@@ -422,7 +191,7 @@ class FPGMPruner(WeightRankFilterPruner):
""" """
super().__init__(model, config_list) super().__init__(model, config_list)
def _get_mask(self, base_mask, weight, num_prune): def get_mask(self, base_mask, weight, num_prune):
""" """
Calculate the mask of given layer. Calculate the mask of given layer.
Filters with the smallest sum of its absolute kernel weights are masked. Filters with the smallest sum of its absolute kernel weights are masked.
...@@ -491,251 +260,3 @@ class FPGMPruner(WeightRankFilterPruner): ...@@ -491,251 +260,3 @@ class FPGMPruner(WeightRankFilterPruner):
def update_epoch(self, epoch): def update_epoch(self, epoch):
self.mask_calculated_ops = set() self.mask_calculated_ops = set()
class ActivationRankFilterPruner(Pruner):
"""
A structured pruning base class that prunes the filters with the smallest
importance criterion in convolution layers to achieve a preset level of network sparsity.
Hengyuan Hu, Rui Peng, Yu-Wing Tai and Chi-Keung Tang,
"Network Trimming: A Data-Driven Neuron Pruning Approach towards Efficient Deep Architectures", ICLR 2016.
https://arxiv.org/abs/1607.03250
Pavlo Molchanov, Stephen Tyree, Tero Karras, Timo Aila and Jan Kautz,
"Pruning Convolutional Neural Networks for Resource Efficient Inference", ICLR 2017.
https://arxiv.org/abs/1611.06440
"""
def __init__(self, model, config_list, activation='relu', statistics_batch_num=1):
"""
Parameters
----------
model : torch.nn.module
Model to be pruned
config_list : list
support key for each list item:
- sparsity: percentage of convolutional filters to be pruned.
activation : str
Activation function
statistics_batch_num : int
Num of batches for activation statistics
"""
super().__init__(model, config_list)
self.mask_calculated_ops = set()
self.statistics_batch_num = statistics_batch_num
self.collected_activation = {}
self.hooks = {}
assert activation in ['relu', 'relu6']
if activation == 'relu':
self.activation = torch.nn.functional.relu
elif activation == 'relu6':
self.activation = torch.nn.functional.relu6
else:
self.activation = None
def compress(self):
"""
Compress the model, register a hook for collecting activations.
"""
modules_to_compress = self.detect_modules_to_compress()
for layer, config in modules_to_compress:
self._instrument_layer(layer, config)
self.collected_activation[layer.name] = []
def _hook(module_, input_, output, name=layer.name):
if len(self.collected_activation[name]) < self.statistics_batch_num:
self.collected_activation[name].append(self.activation(output.detach().cpu()))
layer.module.register_forward_hook(_hook)
return self.bound_model
def _get_mask(self, base_mask, activations, num_prune):
return {'weight': None, 'bias': None}
def calc_mask(self, layer, config):
"""
Calculate the mask of given layer.
Filters with the smallest importance criterion which is calculated from the activation are masked.
Parameters
----------
layer : LayerInfo
the layer to instrument the compression operation
config : dict
layer's pruning config
Returns
-------
dict
dictionary for storing masks
"""
weight = layer.module.weight.data
op_name = layer.name
op_type = layer.type
assert 0 <= config.get('sparsity') < 1, "sparsity must in the range [0, 1)"
assert op_type in ['Conv2d'], "only support Conv2d"
assert op_type in config.get('op_types')
if op_name in self.mask_calculated_ops:
assert op_name in self.mask_dict
return self.mask_dict.get(op_name)
mask_weight = torch.ones(weight.size()).type_as(weight).detach()
if hasattr(layer.module, 'bias') and layer.module.bias is not None:
mask_bias = torch.ones(layer.module.bias.size()).type_as(layer.module.bias).detach()
else:
mask_bias = None
mask = {'weight': mask_weight, 'bias': mask_bias}
try:
filters = weight.size(0)
num_prune = int(filters * config.get('sparsity'))
if filters < 2 or num_prune < 1 or len(self.collected_activation[layer.name]) < self.statistics_batch_num:
return mask
mask = self._get_mask(mask, self.collected_activation[layer.name], num_prune)
finally:
if len(self.collected_activation[layer.name]) == self.statistics_batch_num:
self.mask_dict.update({op_name: mask})
self.mask_calculated_ops.add(op_name)
return mask
class ActivationAPoZRankFilterPruner(ActivationRankFilterPruner):
"""
A structured pruning algorithm that prunes the filters with the
smallest APoZ(average percentage of zeros) of output activations.
Hengyuan Hu, Rui Peng, Yu-Wing Tai and Chi-Keung Tang,
"Network Trimming: A Data-Driven Neuron Pruning Approach towards Efficient Deep Architectures", ICLR 2016.
https://arxiv.org/abs/1607.03250
"""
def __init__(self, model, config_list, activation='relu', statistics_batch_num=1):
"""
Parameters
----------
model : torch.nn.module
Model to be pruned
config_list : list
support key for each list item:
- sparsity: percentage of convolutional filters to be pruned.
activation : str
Activation function
statistics_batch_num : int
Num of batches for activation statistics
"""
super().__init__(model, config_list, activation, statistics_batch_num)
def _get_mask(self, base_mask, activations, num_prune):
"""
Calculate the mask of given layer.
Filters with the smallest APoZ(average percentage of zeros) of output activations are masked.
Parameters
----------
base_mask : dict
The basic mask with the same shape of weight, all item in the basic mask is 1.
activations : list
Layer's output activations
num_prune : int
Num of filters to prune
Returns
-------
dict
dictionary for storing masks
"""
apoz = self._calc_apoz(activations)
prune_indices = torch.argsort(apoz, descending=True)[:num_prune]
for idx in prune_indices:
base_mask['weight'][idx] = 0.
if base_mask['bias'] is not None:
base_mask['bias'][idx] = 0.
return base_mask
def _calc_apoz(self, activations):
"""
Calculate APoZ(average percentage of zeros) of activations.
Parameters
----------
activations : list
Layer's output activations
Returns
-------
torch.Tensor
Filter's APoZ(average percentage of zeros) of the activations
"""
activations = torch.cat(activations, 0)
_eq_zero = torch.eq(activations, torch.zeros_like(activations))
_apoz = torch.sum(_eq_zero, dim=(0, 2, 3)) / torch.numel(_eq_zero[:, 0, :, :])
return _apoz
class ActivationMeanRankFilterPruner(ActivationRankFilterPruner):
"""
A structured pruning algorithm that prunes the filters with the
smallest mean value of output activations.
Pavlo Molchanov, Stephen Tyree, Tero Karras, Timo Aila and Jan Kautz,
"Pruning Convolutional Neural Networks for Resource Efficient Inference", ICLR 2017.
https://arxiv.org/abs/1611.06440
"""
def __init__(self, model, config_list, activation='relu', statistics_batch_num=1):
"""
Parameters
----------
model : torch.nn.module
Model to be pruned
config_list : list
support key for each list item:
- sparsity: percentage of convolutional filters to be pruned.
activation : str
Activation function
statistics_batch_num : int
Num of batches for activation statistics
"""
super().__init__(model, config_list, activation, statistics_batch_num)
def _get_mask(self, base_mask, activations, num_prune):
"""
Calculate the mask of given layer.
Filters with the smallest APoZ(average percentage of zeros) of output activations are masked.
Parameters
----------
base_mask : dict
The basic mask with the same shape of weight, all item in the basic mask is 1.
activations : list
Layer's output activations
num_prune : int
Num of filters to prune
Returns
-------
dict
dictionary for storing masks
"""
mean_activation = self._cal_mean_activation(activations)
prune_indices = torch.argsort(mean_activation)[:num_prune]
for idx in prune_indices:
base_mask['weight'][idx] = 0.
if base_mask['bias'] is not None:
base_mask['bias'][idx] = 0.
return base_mask
def _cal_mean_activation(self, activations):
"""
Calculate mean value of activations.
Parameters
----------
activations : list
Layer's output activations
Returns
-------
torch.Tensor
Filter's mean value of the output activations
"""
activations = torch.cat(activations, 0)
mean_activation = torch.mean(activations, dim=(0, 2, 3))
return mean_activation
...@@ -10,7 +10,7 @@ import torch ...@@ -10,7 +10,7 @@ import torch
import nni import nni
from nni.env_vars import trial_env_vars from nni.env_vars import trial_env_vars
from nni.nas.pytorch.mutables import LayerChoice, InputChoice from nni.nas.pytorch.mutables import LayerChoice, InputChoice, MutableScope
from nni.nas.pytorch.mutator import Mutator from nni.nas.pytorch.mutator import Mutator
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
...@@ -104,10 +104,11 @@ class ClassicMutator(Mutator): ...@@ -104,10 +104,11 @@ class ClassicMutator(Mutator):
search_space_item : list search_space_item : list
The list for corresponding search space. The list for corresponding search space.
""" """
candidate_repr = search_space_item["candidates"]
multihot_list = [False] * mutable.n_candidates multihot_list = [False] * mutable.n_candidates
for i, v in zip(idx, value): for i, v in zip(idx, value):
assert 0 <= i < mutable.n_candidates and search_space_item[i] == v, \ assert 0 <= i < mutable.n_candidates and candidate_repr[i] == v, \
"Index '{}' in search space '{}' is not '{}'".format(i, search_space_item, v) "Index '{}' in search space '{}' is not '{}'".format(i, candidate_repr, v)
assert not multihot_list[i], "'{}' is selected twice in '{}', which is not allowed.".format(i, idx) assert not multihot_list[i], "'{}' is selected twice in '{}', which is not allowed.".format(i, idx)
multihot_list[i] = True multihot_list[i] = True
return torch.tensor(multihot_list, dtype=torch.bool) # pylint: disable=not-callable return torch.tensor(multihot_list, dtype=torch.bool) # pylint: disable=not-callable
...@@ -121,17 +122,20 @@ class ClassicMutator(Mutator): ...@@ -121,17 +122,20 @@ class ClassicMutator(Mutator):
self._chosen_arch.keys()) self._chosen_arch.keys())
result = dict() result = dict()
for mutable in self.mutables: for mutable in self.mutables:
assert mutable.key in self._chosen_arch, "Expected '{}' in chosen arch, but not found.".format(mutable.key) if isinstance(mutable, (LayerChoice, InputChoice)):
assert mutable.key in self._chosen_arch, \
"Expected '{}' in chosen arch, but not found.".format(mutable.key)
data = self._chosen_arch[mutable.key] data = self._chosen_arch[mutable.key]
assert isinstance(data, dict) and "_value" in data and "_idx" in data, \ assert isinstance(data, dict) and "_value" in data and "_idx" in data, \
"'{}' is not a valid choice.".format(data) "'{}' is not a valid choice.".format(data)
value = data["_value"]
idx = data["_idx"]
search_space_item = self._search_space[mutable.key]["_value"]
if isinstance(mutable, LayerChoice): if isinstance(mutable, LayerChoice):
result[mutable.key] = self._sample_layer_choice(mutable, idx, value, search_space_item) result[mutable.key] = self._sample_layer_choice(mutable, data["_idx"], data["_value"],
self._search_space[mutable.key]["_value"])
elif isinstance(mutable, InputChoice): elif isinstance(mutable, InputChoice):
result[mutable.key] = self._sample_input_choice(mutable, idx, value, search_space_item) result[mutable.key] = self._sample_input_choice(mutable, data["_idx"], data["_value"],
self._search_space[mutable.key]["_value"])
elif isinstance(mutable, MutableScope):
logger.info("Mutable scope '%s' is skipped during parsing choices.", mutable.key)
else: else:
raise TypeError("Unsupported mutable type: '%s'." % type(mutable)) raise TypeError("Unsupported mutable type: '%s'." % type(mutable))
return result return result
...@@ -190,6 +194,8 @@ class ClassicMutator(Mutator): ...@@ -190,6 +194,8 @@ class ClassicMutator(Mutator):
search_space[key] = {"_type": INPUT_CHOICE, search_space[key] = {"_type": INPUT_CHOICE,
"_value": {"candidates": mutable.choose_from, "_value": {"candidates": mutable.choose_from,
"n_chosen": mutable.n_chosen}} "n_chosen": mutable.n_chosen}}
elif isinstance(mutable, MutableScope):
logger.info("Mutable scope '%s' is skipped during generating search space.", mutable.key)
else: else:
raise TypeError("Unsupported mutable type: '%s'." % type(mutable)) raise TypeError("Unsupported mutable type: '%s'." % type(mutable))
return search_space return search_space
......
...@@ -14,6 +14,26 @@ _logger = logging.getLogger(__name__) ...@@ -14,6 +14,26 @@ _logger = logging.getLogger(__name__)
class DartsMutator(Mutator): class DartsMutator(Mutator):
"""
Connects the model in a DARTS (differentiable) way.
An extra connection is automatically inserted for each LayerChoice, when this connection is selected, there is no
op on this LayerChoice (namely a ``ZeroOp``), in which case, every element in the exported choice list is ``false``
(not chosen).
All input choice will be fully connected in the search phase. On exporting, the input choice will choose inputs based
on keys in ``choose_from``. If the keys were to be keys of LayerChoices, the top logit of the corresponding LayerChoice
will join the competition of input choice to compete against other logits. Otherwise, the logit will be assumed 0.
It's possible to cut branches by setting parameter ``choices`` in a particular position to ``-inf``. After softmax, the
value would be 0. Framework will ignore 0 values and not connect. Note that the gradient on the ``-inf`` location will
be 0. Since manipulations with ``-inf`` will be ``nan``, you need to handle the gradient update phase carefully.
Attributes
----------
choices: ParameterDict
dict that maps keys of LayerChoices to weighted-connection float tensors.
"""
def __init__(self, model): def __init__(self, model):
super().__init__(model) super().__init__(model)
self.choices = nn.ParameterDict() self.choices = nn.ParameterDict()
......
...@@ -19,6 +19,42 @@ class DartsTrainer(Trainer): ...@@ -19,6 +19,42 @@ class DartsTrainer(Trainer):
optimizer, num_epochs, dataset_train, dataset_valid, optimizer, num_epochs, dataset_train, dataset_valid,
mutator=None, batch_size=64, workers=4, device=None, log_frequency=None, mutator=None, batch_size=64, workers=4, device=None, log_frequency=None,
callbacks=None, arc_learning_rate=3.0E-4, unrolled=False): callbacks=None, arc_learning_rate=3.0E-4, unrolled=False):
"""
Initialize a DartsTrainer.
Parameters
----------
model : nn.Module
PyTorch model to be trained.
loss : callable
Receives logits and ground truth label, return a loss tensor.
metrics : callable
Receives logits and ground truth label, return a dict of metrics.
optimizer : Optimizer
The optimizer used for optimizing the model.
num_epochs : int
Number of epochs planned for training.
dataset_train : Dataset
Dataset for training. Will be split for training weights and architecture weights.
dataset_valid : Dataset
Dataset for testing.
mutator : DartsMutator
Use in case of customizing your own DartsMutator. By default will instantiate a DartsMutator.
batch_size : int
Batch size.
workers : int
Workers for data loading.
device : torch.device
``torch.device("cpu")`` or ``torch.device("cuda")``.
log_frequency : int
Step count per logging.
callbacks : list of Callback
list of callbacks to trigger at events.
arc_learning_rate : float
Learning rate of architecture parameters.
unrolled : float
``True`` if using second order optimization, else first order optimization.
"""
super().__init__(model, mutator if mutator is not None else DartsMutator(model), super().__init__(model, mutator if mutator is not None else DartsMutator(model),
loss, metrics, optimizer, num_epochs, dataset_train, dataset_valid, loss, metrics, optimizer, num_epochs, dataset_train, dataset_valid,
batch_size, workers, device, log_frequency, callbacks) batch_size, workers, device, log_frequency, callbacks)
......
...@@ -31,6 +31,31 @@ class EnasMutator(Mutator): ...@@ -31,6 +31,31 @@ class EnasMutator(Mutator):
def __init__(self, model, lstm_size=64, lstm_num_layers=1, tanh_constant=1.5, cell_exit_extra_step=False, def __init__(self, model, lstm_size=64, lstm_num_layers=1, tanh_constant=1.5, cell_exit_extra_step=False,
skip_target=0.4, branch_bias=0.25): skip_target=0.4, branch_bias=0.25):
"""
Initialize a EnasMutator.
Parameters
----------
model : nn.Module
PyTorch model.
lstm_size : int
Controller LSTM hidden units.
lstm_num_layers : int
Number of layers for stacked LSTM.
tanh_constant : float
Logits will be equal to ``tanh_constant * tanh(logits)``. Don't use ``tanh`` if this value is ``None``.
cell_exit_extra_step : bool
If true, RL controller will perform an extra step at the exit of each MutableScope, dump the hidden state
and mark it as the hidden state of this MutableScope. This is to align with the original implementation of paper.
skip_target : float
Target probability that skipconnect will appear.
branch_bias : float
Manual bias applied to make some operations more likely to be chosen.
Currently this is implemented with a hardcoded match rule that aligns with original repo.
If a mutable has a ``reduce`` in its key, all its op choices
that contains `conv` in their typename will receive a bias of ``+self.branch_bias`` initially; while others
receive a bias of ``-self.branch_bias``.
"""
super().__init__(model) super().__init__(model)
self.lstm_size = lstm_size self.lstm_size = lstm_size
self.lstm_num_layers = lstm_num_layers self.lstm_num_layers = lstm_num_layers
......
...@@ -18,6 +18,54 @@ class EnasTrainer(Trainer): ...@@ -18,6 +18,54 @@ class EnasTrainer(Trainer):
mutator=None, batch_size=64, workers=4, device=None, log_frequency=None, callbacks=None, mutator=None, batch_size=64, workers=4, device=None, log_frequency=None, callbacks=None,
entropy_weight=0.0001, skip_weight=0.8, baseline_decay=0.999, entropy_weight=0.0001, skip_weight=0.8, baseline_decay=0.999,
mutator_lr=0.00035, mutator_steps_aggregate=20, mutator_steps=50, aux_weight=0.4): mutator_lr=0.00035, mutator_steps_aggregate=20, mutator_steps=50, aux_weight=0.4):
"""
Initialize an EnasTrainer.
Parameters
----------
model : nn.Module
PyTorch model to be trained.
loss : callable
Receives logits and ground truth label, return a loss tensor.
metrics : callable
Receives logits and ground truth label, return a dict of metrics.
reward_function : callable
Receives logits and ground truth label, return a tensor, which will be feeded to RL controller as reward.
optimizer : Optimizer
The optimizer used for optimizing the model.
num_epochs : int
Number of epochs planned for training.
dataset_train : Dataset
Dataset for training. Will be split for training weights and architecture weights.
dataset_valid : Dataset
Dataset for testing.
mutator : EnasMutator
Use when customizing your own mutator or a mutator with customized parameters.
batch_size : int
Batch size.
workers : int
Workers for data loading.
device : torch.device
``torch.device("cpu")`` or ``torch.device("cuda")``.
log_frequency : int
Step count per logging.
callbacks : list of Callback
list of callbacks to trigger at events.
entropy_weight : float
Weight of sample entropy loss.
skip_weight : float
Weight of skip penalty loss.
baseline_decay : float
Decay factor of baseline. New baseline will be equal to ``baseline_decay * baseline_old + reward * (1 - baseline_decay)``.
mutator_lr : float
Learning rate for RL controller.
mutator_steps_aggregate : int
Number of steps that will be aggregated into one mini-batch for RL controller.
mutator_steps : int
Number of mini-batches for each epoch of RL controller learning.
aux_weight : float
Weight of auxiliary head loss. ``aux_weight * aux_loss`` will be added to total loss.
"""
super().__init__(model, mutator if mutator is not None else EnasMutator(model), super().__init__(model, mutator if mutator is not None else EnasMutator(model),
loss, metrics, optimizer, num_epochs, dataset_train, dataset_valid, loss, metrics, optimizer, num_epochs, dataset_train, dataset_valid,
batch_size, workers, device, log_frequency, callbacks) batch_size, workers, device, log_frequency, callbacks)
......
...@@ -41,18 +41,18 @@ class FixedArchitecture(Mutator): ...@@ -41,18 +41,18 @@ class FixedArchitecture(Mutator):
return self._fixed_arc return self._fixed_arc
def _encode_tensor(data, device): def _encode_tensor(data):
if isinstance(data, list): if isinstance(data, list):
if all(map(lambda o: isinstance(o, bool), data)): if all(map(lambda o: isinstance(o, bool), data)):
return torch.tensor(data, dtype=torch.bool, device=device) # pylint: disable=not-callable return torch.tensor(data, dtype=torch.bool) # pylint: disable=not-callable
else: else:
return torch.tensor(data, dtype=torch.float, device=device) # pylint: disable=not-callable return torch.tensor(data, dtype=torch.float) # pylint: disable=not-callable
if isinstance(data, dict): if isinstance(data, dict):
return {k: _encode_tensor(v, device) for k, v in data.items()} return {k: _encode_tensor(v) for k, v in data.items()}
return data return data
def apply_fixed_architecture(model, fixed_arc_path, device=None): def apply_fixed_architecture(model, fixed_arc_path):
""" """
Load architecture from `fixed_arc_path` and apply to model. Load architecture from `fixed_arc_path` and apply to model.
...@@ -62,21 +62,16 @@ def apply_fixed_architecture(model, fixed_arc_path, device=None): ...@@ -62,21 +62,16 @@ def apply_fixed_architecture(model, fixed_arc_path, device=None):
Model with mutables. Model with mutables.
fixed_arc_path : str fixed_arc_path : str
Path to the JSON that stores the architecture. Path to the JSON that stores the architecture.
device : torch.device
Architecture weights will be transfered to `device`.
Returns Returns
------- -------
FixedArchitecture FixedArchitecture
""" """
if device is None:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if isinstance(fixed_arc_path, str): if isinstance(fixed_arc_path, str):
with open(fixed_arc_path, "r") as f: with open(fixed_arc_path, "r") as f:
fixed_arc = json.load(f) fixed_arc = json.load(f)
fixed_arc = _encode_tensor(fixed_arc, device) fixed_arc = _encode_tensor(fixed_arc)
architecture = FixedArchitecture(model, fixed_arc) architecture = FixedArchitecture(model, fixed_arc)
architecture.to(device)
architecture.reset() architecture.reset()
return architecture return architecture
...@@ -211,6 +211,7 @@ class SPOSEvolution(Tuner): ...@@ -211,6 +211,7 @@ class SPOSEvolution(Tuner):
Parameters Parameters
---------- ----------
result : dict result : dict
Chosen architectures to be exported.
""" """
os.makedirs("checkpoints", exist_ok=True) os.makedirs("checkpoints", exist_ok=True)
for i, cand in enumerate(result): for i, cand in enumerate(result):
......
...@@ -17,6 +17,7 @@ class SPOSSupernetTrainingMutator(RandomMutator): ...@@ -17,6 +17,7 @@ class SPOSSupernetTrainingMutator(RandomMutator):
Parameters Parameters
---------- ----------
model : nn.Module model : nn.Module
PyTorch model.
flops_func : callable flops_func : callable
Callable that takes a candidate from `sample_search` and returns its candidate. When `flops_func` Callable that takes a candidate from `sample_search` and returns its candidate. When `flops_func`
is None, functions related to flops will be deactivated. is None, functions related to flops will be deactivated.
......
...@@ -21,6 +21,37 @@ class SPOSSupernetTrainer(Trainer): ...@@ -21,6 +21,37 @@ class SPOSSupernetTrainer(Trainer):
optimizer, num_epochs, train_loader, valid_loader, optimizer, num_epochs, train_loader, valid_loader,
mutator=None, batch_size=64, workers=4, device=None, log_frequency=None, mutator=None, batch_size=64, workers=4, device=None, log_frequency=None,
callbacks=None): callbacks=None):
"""
Parameters
----------
model : nn.Module
Model with mutables.
mutator : Mutator
A mutator object that has been initialized with the model.
loss : callable
Called with logits and targets. Returns a loss tensor.
metrics : callable
Returns a dict that maps metrics keys to metrics data.
optimizer : Optimizer
Optimizer that optimizes the model.
num_epochs : int
Number of epochs of training.
train_loader : iterable
Data loader of training. Raise ``StopIteration`` when one epoch is exhausted.
dataset_valid : iterable
Data loader of validation. Raise ``StopIteration`` when one epoch is exhausted.
batch_size : int
Batch size.
workers: int
Number of threads for data preprocessing. Not used for this trainer. Maybe removed in future.
device : torch.device
Device object. Either ``torch.device("cuda")`` or ``torch.device("cpu")``. When ``None``, trainer will
automatic detects GPU and selects GPU first.
log_frequency : int
Number of mini-batches to log metrics.
callbacks : list of Callback
Callbacks to plug into the trainer. See Callbacks.
"""
assert torch.cuda.is_available() assert torch.cuda.is_available()
super().__init__(model, mutator if mutator is not None else SPOSSupernetTrainingMutator(model), super().__init__(model, mutator if mutator is not None else SPOSSupernetTrainingMutator(model),
loss, metrics, optimizer, num_epochs, None, None, loss, metrics, optimizer, num_epochs, None, None,
......
...@@ -52,7 +52,7 @@ class Trainer(BaseTrainer): ...@@ -52,7 +52,7 @@ class Trainer(BaseTrainer):
workers : int workers : int
Number of workers used in data preprocessing. Number of workers used in data preprocessing.
device : torch.device device : torch.device
Device object. Either `torch.device("cuda")` or torch.device("cpu")`. When `None`, trainer will Device object. Either ``torch.device("cuda")`` or ``torch.device("cpu")``. When ``None``, trainer will
automatic detects GPU and selects GPU first. automatic detects GPU and selects GPU first.
log_frequency : int log_frequency : int
Number of mini-batches to log metrics. Number of mini-batches to log metrics.
......
...@@ -91,7 +91,8 @@ class Compare extends React.Component<CompareProps, {}> { ...@@ -91,7 +91,8 @@ class Compare extends React.Component<CompareProps, {}> {
}, },
yAxis: { yAxis: {
type: 'value', type: 'value',
name: 'Metric' name: 'Metric',
scale: true
}, },
series: trialIntermediate series: trialIntermediate
}; };
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment