Commit b40e3db7 authored by quzha's avatar quzha
Browse files

Merge branch 'master' of github.com:Microsoft/nni into dev-retiarii

parents efa4e31c 95f731e4
authorName: default
experimentName: auto_rocksdb_SMAC
trialConcurrency: 1
maxExecDuration: 12h
maxTrialNum: 256
#choice: local, remote, pai
trainingServicePlatform: local
searchSpacePath: search_space.json
#choice: true, false
useAnnotation: false
tuner:
#choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner
#SMAC (SMAC should be installed through nnictl)
builtinTunerName: SMAC
classArgs:
#choice: maximize, minimize
optimize_mode: maximize
trial:
command: python3 main.py
codeDir: .
gpuNum: 0
authorName: default
experimentName: auto_rocksdb_SMAC
trialConcurrency: 1
maxExecDuration: 12h
maxTrialNum: 256
#choice: local, remote, pai
trainingServicePlatform: local
searchSpacePath: search_space.json
#choice: true, false
useAnnotation: false
tuner:
#choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner
#SMAC (SMAC should be installed through nnictl)
builtinTunerName: SMAC
classArgs:
#choice: maximize, minimize
optimize_mode: maximize
trial:
command: python3 main.py
codeDir: .
gpuNum: 0
authorName: default
experimentName: auto_rocksdb_TPE
trialConcurrency: 1
maxExecDuration: 12h
maxTrialNum: 256
#choice: local, remote, pai
trainingServicePlatform: local
searchSpacePath: search_space.json
#choice: true, false
useAnnotation: false
tuner:
#choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner
#SMAC (SMAC should be installed through nnictl)
builtinTunerName: TPE
classArgs:
#choice: maximize, minimize
optimize_mode: maximize
trial:
command: python3 main.py
codeDir: .
gpuNum: 0
authorName: default
experimentName: auto_rocksdb_TPE
trialConcurrency: 1
maxExecDuration: 12h
maxTrialNum: 256
#choice: local, remote, pai
trainingServicePlatform: local
searchSpacePath: search_space.json
#choice: true, false
useAnnotation: false
tuner:
#choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner
#SMAC (SMAC should be installed through nnictl)
builtinTunerName: TPE
classArgs:
#choice: maximize, minimize
optimize_mode: maximize
trial:
command: python3 main.py
codeDir: .
gpuNum: 0
{
"write_buffer_size": {
"_type": "quniform",
"_value": [2097152, 16777216, 1048576]
},
"min_write_buffer_number_to_merge": {
"_type": "quniform",
"_value": [2, 16, 1]
},
"level0_file_num_compaction_trigger": {
"_type": "quniform",
"_value": [2, 16, 1]
}
}
{
"write_buffer_size": {
"_type": "quniform",
"_value": [2097152, 16777216, 1048576]
},
"min_write_buffer_number_to_merge": {
"_type": "quniform",
"_value": [2, 16, 1]
},
"level0_file_num_compaction_trigger": {
"_type": "quniform",
"_value": [2, 16, 1]
}
}
......@@ -219,8 +219,7 @@ def run_epoch(batches, answer_net, is_training):
loss, _, = sess.run(
[answer_net.loss, answer_net.train_op], feed_dict=feed_dict)
if count % 100 == 0:
logger.debug('%d %g except:%g, loss:%g' %
(count, used, used / count * len(batches), loss))
logger.debug('%d %g except:%g, loss:%g', count, used, used / count * len(batches), loss)
loss_sum += loss
else:
feed_dict = {answer_net.query_word: query,
......@@ -240,8 +239,7 @@ def run_epoch(batches, answer_net, is_training):
contexts += context
ids = np.concatenate((ids, sample_id))
if count % 100 == 0:
logger.debug('%d %g except:%g' %
(count, used, used / count * len(batches)))
logger.debug('%d %g except:%g', count, used, used / count * len(batches))
loss = loss_sum / len(batches)
if is_training:
return loss
......@@ -333,7 +331,7 @@ def train_with_graph(p_graph, qp_pairs, dev_qp_pairs):
train_batches = data.get_batches(qp_pairs, cfg.batch_size)
train_loss = run_epoch(train_batches, train_model, True)
logger.debug('epoch ' + str(epoch) +
' loss: ' + str(train_loss))
' loss: ', str(train_loss))
dev_batches = list(data.get_batches(
dev_qp_pairs, cfg.batch_size))
_, position1, position2, ids, contexts = run_epoch(
......@@ -369,8 +367,7 @@ def train_with_graph(p_graph, qp_pairs, dev_qp_pairs):
with open(os.path.join(save_path, 'epoch%d.score' % epoch), 'wb') as file:
pickle.dump(
(position1, position2, ids, contexts), file)
logger.debug('epoch %d acc %g bestacc %g' %
(epoch, acc, bestacc))
logger.debug('epoch %d acc %g bestacc %g', epoch, acc, bestacc)
if patience <= iter:
break
logger.debug('save done.')
......
......@@ -4,6 +4,7 @@
import logging
import torch
from schema import And, Optional
import copy
from nni.compression.pytorch.utils.config_validation import CompressorSchema
from .constants import MASKER_DICT
......@@ -53,7 +54,7 @@ class ADMMPruner(OneshotPruner):
row : float
Penalty parameters for ADMM training.
base_algo : str
Base pruning algorithm. `level`, `l1` or `l2`, by default `l1`. Given the sparsity distribution among the ops,
Base pruning algorithm. `level`, `l1`, `l2` or `fpgm`, by default `l1`. Given the sparsity distribution among the ops,
the assigned `base_algo` is used to decide which filters/channels/weights to prune.
"""
......@@ -87,7 +88,7 @@ class ADMMPruner(OneshotPruner):
Optional('op_types'): [str],
Optional('op_names'): [str],
}], model, _logger)
elif self._base_algo in ['l1', 'l2']:
elif self._base_algo in ['l1', 'l2', 'fpgm']:
schema = CompressorSchema([{
'sparsity': And(float, lambda n: 0 < n < 1),
'op_types': ['Conv2d'],
......@@ -96,7 +97,7 @@ class ADMMPruner(OneshotPruner):
schema.validate(config_list)
def _projection(self, weight, sparsity):
def _projection(self, weight, sparsity, wrapper):
'''
Return the Euclidean projection of the weight matrix according to the pruning mode.
......@@ -106,31 +107,17 @@ class ADMMPruner(OneshotPruner):
original matrix
sparsity : float
the ratio of parameters which need to be set to zero
wrapper: PrunerModuleWrapper
layer wrapper of this layer
Returns
-------
tensor
the projected matrix
'''
w_abs = weight.abs()
if self._base_algo == 'level':
k = int(weight.numel() * sparsity)
if k == 0:
mask_weight = torch.ones(weight.shape).type_as(weight)
else:
threshold = torch.topk(w_abs.view(-1), k, largest=False)[0].max()
mask_weight = torch.gt(w_abs, threshold).type_as(weight)
elif self._base_algo in ['l1', 'l2']:
filters = weight.size(0)
num_prune = int(filters * sparsity)
if filters < 2 or num_prune < 1:
mask_weight = torch.ones(weight.size()).type_as(weight).detach()
else:
w_abs_structured = w_abs.view(filters, -1).sum(dim=1)
threshold = torch.topk(w_abs_structured.view(-1), num_prune, largest=False)[0].max()
mask_weight = torch.gt(w_abs_structured, threshold)[:, None, None, None].expand_as(weight).type_as(weight)
return weight.data.mul(mask_weight)
wrapper_copy = copy.deepcopy(wrapper)
wrapper_copy.module.weight.data = weight
return weight.data.mul(self.masker.calc_mask(sparsity, wrapper_copy)['weight_mask'])
def compress(self):
"""
......@@ -179,7 +166,7 @@ class ADMMPruner(OneshotPruner):
# U_i^{k+1} = U^k + W_i^{k+1} - Z_i^{k+1}
for i, wrapper in enumerate(self.get_modules_wrapper()):
z = wrapper.module.weight.data + U[i]
Z[i] = self._projection(z, wrapper.config['sparsity'])
Z[i] = self._projection(z, wrapper.config['sparsity'], wrapper)
U[i] = U[i] + wrapper.module.weight.data - Z[i]
# apply prune
......
......@@ -80,7 +80,7 @@ class AutoCompressPruner(Pruner):
optimize_mode : str
optimize mode, `maximize` or `minimize`, by default `maximize`.
base_algo : str
Base pruning algorithm. `level`, `l1` or `l2`, by default `l1`. Given the sparsity distribution among the ops,
Base pruning algorithm. `level`, `l1`, `l2` or `fpgm`, by default `l1`. Given the sparsity distribution among the ops,
the assigned `base_algo` is used to decide which filters/channels/weights to prune.
start_temperature : float
Start temperature of the simulated annealing process.
......@@ -151,7 +151,7 @@ class AutoCompressPruner(Pruner):
Optional('op_types'): [str],
Optional('op_names'): [str],
}], model, _logger)
elif self._base_algo in ['l1', 'l2']:
elif self._base_algo in ['l1', 'l2', 'fpgm']:
schema = CompressorSchema([{
'sparsity': And(float, lambda n: 0 < n < 1),
'op_types': ['Conv2d'],
......
......@@ -2,10 +2,11 @@
# Licensed under the MIT license.
from .one_shot import LevelPruner, L1FilterPruner, L2FilterPruner
from .one_shot import LevelPruner, L1FilterPruner, L2FilterPruner, FPGMPruner
PRUNER_DICT = {
'level': LevelPruner,
'l1': L1FilterPruner,
'l2': L2FilterPruner
'l2': L2FilterPruner,
'fpgm': FPGMPruner
}
......@@ -73,7 +73,7 @@ class NetAdaptPruner(Pruner):
optimize_mode : str
optimize mode, `maximize` or `minimize`, by default `maximize`.
base_algo : str
Base pruning algorithm. `level`, `l1` or `l2`, by default `l1`. Given the sparsity distribution among the ops,
Base pruning algorithm. `level`, `l1`, `l2` or `fpgm`, by default `l1`. Given the sparsity distribution among the ops,
the assigned `base_algo` is used to decide which filters/channels/weights to prune.
sparsity_per_iteration : float
sparsity to prune in each iteration.
......@@ -125,7 +125,7 @@ class NetAdaptPruner(Pruner):
Optional('op_types'): [str],
Optional('op_names'): [str],
}], model, _logger)
elif self._base_algo in ['l1', 'l2']:
elif self._base_algo in ['l1', 'l2', 'fpgm']:
schema = CompressorSchema([{
'sparsity': And(float, lambda n: 0 < n < 1),
'op_types': ['Conv2d'],
......@@ -149,7 +149,7 @@ class NetAdaptPruner(Pruner):
return config_list_updated
# if op_name is not in self._config_list_generated, create a new json item
if self._base_algo in ['l1', 'l2']:
if self._base_algo in ['l1', 'l2', 'fpgm']:
config_list_updated.append(
{'sparsity': sparsity, 'op_types': ['Conv2d'], 'op_names': [op_name]})
elif self._base_algo == 'level':
......
......@@ -68,7 +68,7 @@ class SensitivityPruner(Pruner):
>>> loss.backward()
>>> optimizer.step()
base_algo: str
base pruning algorithm. `level`, `l1` or `l2`, by default `l1`.
base pruning algorithm. `level`, `l1`, `l2` or `fpgm`, by default `l1`.
sparsity_proportion_calc: function
This function generate the sparsity proportion between the conv layers according to the
sensitivity analysis results. We provide a default function to quantify the sparsity
......@@ -150,7 +150,7 @@ class SensitivityPruner(Pruner):
Optional('op_types'): [str],
Optional('op_names'): [str],
}], model, _logger)
elif self.base_algo in ['l1', 'l2']:
elif self.base_algo in ['l1', 'l2', 'fpgm']:
schema = CompressorSchema([{
'sparsity': And(float, lambda n: 0 < n < 1),
'op_types': ['Conv2d'],
......
......@@ -54,7 +54,7 @@ class SimulatedAnnealingPruner(Pruner):
optimize_mode : str
Optimize mode, `maximize` or `minimize`, by default `maximize`.
base_algo : str
Base pruning algorithm. `level`, `l1` or `l2`, by default `l1`. Given the sparsity distribution among the ops,
Base pruning algorithm. `level`, `l1`, `l2` or `fpgm`, by default `l1`. Given the sparsity distribution among the ops,
the assigned `base_algo` is used to decide which filters/channels/weights to prune.
start_temperature : float
Start temperature of the simulated annealing process.
......@@ -120,7 +120,7 @@ class SimulatedAnnealingPruner(Pruner):
Optional('op_types'): [str],
Optional('op_names'): [str],
}], model, _logger)
elif self._base_algo in ['l1', 'l2']:
elif self._base_algo in ['l1', 'l2', 'fpgm']:
schema = CompressorSchema([{
'sparsity': And(float, lambda n: 0 < n < 1),
'op_types': ['Conv2d'],
......@@ -152,7 +152,7 @@ class SimulatedAnnealingPruner(Pruner):
# a layer with more weights will have no less pruning rate
for idx, wrapper in enumerate(self.get_modules_wrapper()):
# L1Filter Pruner requires to specify op_types
if self._base_algo in ['l1', 'l2']:
if self._base_algo in ['l1', 'l2', 'fpgm']:
config_list.append(
{'sparsity': sparsities[idx], 'op_types': ['Conv2d'], 'op_names': [wrapper.name]})
elif self._base_algo == 'level':
......
......@@ -73,9 +73,9 @@ def update_quantization_param(bits, rmin, rmax):
----------
bits : int
quantization bits length
rmin : float
rmin : Tensor
min value of real value
rmax : float
rmax : Tensor
max value of real value
Returns
......@@ -85,12 +85,17 @@ def update_quantization_param(bits, rmin, rmax):
# extend the [min, max] interval to ensure that it contains 0.
# Otherwise, we would not meet the requirement that 0 be an exactly
# representable value.
rmin = min(rmin, 0)
rmax = max(rmax, 0)
if rmin.is_cuda:
rmin = torch.min(rmin, torch.Tensor([0]).cuda())
rmax = torch.max(rmax, torch.Tensor([0]).cuda())
qmin = torch.Tensor([0]).cuda()
qmax = torch.Tensor([(1 << bits) - 1]).cuda()
else:
rmin = torch.min(rmin, torch.Tensor([0]))
rmax = torch.max(rmax, torch.Tensor([0]))
qmin = torch.Tensor([0])
qmax = torch.Tensor([(1 << bits) - 1])
# the min and max quantized values, as floating-point values
qmin = 0
qmax = (1 << bits) - 1
# First determine the scale.
scale = (rmax - rmin) / (qmax - qmin)
......@@ -143,11 +148,11 @@ class QAT_Quantizer(Quantizer):
types of nn.module you want to apply quantization, eg. 'Conv2d'
"""
super().__init__(model, config_list, optimizer)
self.steps = 1
modules_to_compress = self.get_modules_to_compress()
self.bound_model.register_buffer("steps", torch.Tensor([1]))
for layer, config in modules_to_compress:
layer.module.register_buffer("zero_point", None)
layer.module.register_buffer("scale", None)
layer.module.register_buffer("zero_point", torch.Tensor([0.0]))
layer.module.register_buffer("scale", torch.Tensor([1.0]))
if "output" in config.get("quant_types", []):
layer.module.register_buffer('ema_decay', torch.Tensor([0.99]))
layer.module.register_buffer('tracked_min_biased', torch.zeros(1))
......@@ -187,13 +192,17 @@ class QAT_Quantizer(Quantizer):
quantization bits length
op : torch.nn.Module
target module
real_val : float
real_val : Tensor
real value to be quantized
Returns
-------
float
Tensor
"""
if real_val.is_cuda:
op.zero_point = op.zero_point.cuda()
op.scale = op.scale.cuda()
transformed_val = op.zero_point + real_val / op.scale
qmin = 0
qmax = (1 << bits) - 1
......@@ -229,7 +238,8 @@ class QAT_Quantizer(Quantizer):
quant_start_step = config.get('quant_start_step', 0)
assert weight_bits >= 1, "quant bits length should be at least 1"
if quant_start_step > self.steps:
# we dont update weight in evaluation stage
if quant_start_step > self.bound_model.steps or not wrapper.training:
return weight
# if bias exists, quantize bias to uint32
......@@ -258,15 +268,17 @@ class QAT_Quantizer(Quantizer):
quant_start_step = config.get('quant_start_step', 0)
assert output_bits >= 1, "quant bits length should be at least 1"
if quant_start_step > self.steps:
if quant_start_step > self.bound_model.steps:
return output
current_min, current_max = torch.min(output), torch.max(output)
module.tracked_min_biased, module.tracked_min = update_ema(module.tracked_min_biased, current_min,
module.ema_decay, self.steps)
module.tracked_max_biased, module.tracked_max = update_ema(module.tracked_max_biased, current_max,
module.ema_decay, self.steps)
module.scale, module.zero_point = update_quantization_param(output_bits, module.tracked_min, module.tracked_max)
# we dont update output quantization parameters in evaluation stage
if wrapper.training:
current_min, current_max = torch.min(output), torch.max(output)
module.tracked_min_biased, module.tracked_min = update_ema(module.tracked_min_biased, current_min,
module.ema_decay, self.bound_model.steps)
module.tracked_max_biased, module.tracked_max = update_ema(module.tracked_max_biased, current_max,
module.ema_decay, self.bound_model.steps)
module.scale, module.zero_point = update_quantization_param(output_bits, module.tracked_min, module.tracked_max)
out = self._quantize(output_bits, module, output)
out = self._dequantize(module, out)
return out
......@@ -279,7 +291,7 @@ class QAT_Quantizer(Quantizer):
"""
override `compressor` `step` method, quantization only happens after certain number of steps
"""
self.steps += 1
self.bound_model.steps +=1
class DoReFaQuantizer(Quantizer):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment