Commit 9f73153f authored by zhanggzh's avatar zhanggzh
Browse files

add dtk24.04 code

parent eb77376e
{% set toctree = toctree(maxdepth=theme_globaltoc_depth|toint, collapse=theme_globaltoc_collapse|tobool, includehidden=theme_globaltoc_includehidden|tobool, titles_only=True) %}
{% if toctree and sidebars and 'globaltoc.html' in sidebars %}
{% set toctree_nodes = derender_toc(toctree, False) %}
<ul class="md-nav__list">
<li class="md-nav__item md-nav__overview">
<a href="{{ pathto('index')|e }}" class="md-nav__link{% if pagename == 'index' %} md-nav__link--active{% endif %}">{{ _('Overview') }}</a>
</li>
{%- for item in toctree_nodes recursive %}
<li class="md-nav__item{% if item.children %} md-nav__expand{% endif %}">
{% if "caption" in item %}
<span class="md-nav__link caption">{{ item.caption }}</span>
{% else %}
{% if item.current %}
<input class="md-toggle md-nav__toggle" data-md-toggle="toc" type="checkbox" id="__toc">
<label class="md-nav__link md-nav__link--active" for="__toc"> {{ item.contents }} </label>
{% endif %}
<a href="{{ item.href|e }}" class="md-nav__link{% if item.current %} md-nav__link--active{% endif %}">
{% if item.children %}
<span class="material-icons md-nav__tocarrow">&#xE5C5;</span>
{% endif %}
{{ item.contents }}
</a>
{% if item.current %}
{%- set sphinx_material_include_searchbox=False %}
{% include "localtoc.html" %}
{% endif %}
{%- set sphinx_material_include_searchbox=True %}
{%- if item.children -%}
<ul class="md-nav__list"> {{ loop(item.children) }}</ul>
{%- endif %}
{% endif %}
</li>
{%- endfor %}
</ul>
{# TODO: Fallback to toc? #}
{% endif %}
<header class="md-header" data-md-component="header">
<nav class="md-header-nav md-grid">
<div class="md-flex navheader">
<div class="md-flex__cell md-flex__cell--shrink">
<a href="{{ pathto(master_doc)|e }}" title="{{ docstitle|e }}"
class="md-header-nav__button md-logo">
{% if theme_logo_icon|e %}
<i class="md-icon">{{ theme_logo_icon }}</i>
{% elif logo %}
<img src="{{ pathto('_static/' ~ logo, 1) }}" height="26"
alt="{{ shorttitle|striptags|e }} logo">
{% else %}
&nbsp;
{% endif %}
</a>
</div>
<div class="md-flex__cell md-flex__cell--shrink">
<label class="md-icon md-icon--menu md-header-nav__button" for="__drawer"></label>
</div>
<div class="md-flex__cell md-flex__cell--stretch">
<div class="md-flex__ellipsis md-header-nav__title" data-md-component="title">
<span class="md-header-nav__topic">{{ theme_nav_title or shorttitle }}</span>
<span class="md-header-nav__topic"> {{ title|striptags|e }} </span>
</div>
</div>
<div class="md-flex__cell md-flex__cell--shrink">
<label class="md-icon md-icon--search md-header-nav__button" for="__search"></label>
{% include "searchbox.html" %}
</div>
<div class="md-flex__cell md-flex__cell--shrink drop version"></div>
<div class="md-flex__cell md-flex__cell--shrink drop language"></div>
<div class="md-flex__cell md-flex__cell--shrink">
{% if theme_repo_url %}
<div class="md-header-nav__source">
{% include "repo.html" %}
</div>
{% endif %}
</div>
</div>
</nav>
</header>
{% if pagename in theme_heroes %}
{% set hero = theme_heroes[pagename] %}
<div class="md-hero{% if nav_bar_tabs %} md-hero--expand{% endif %}" data-md-component="hero">
<div class="md-hero__inner md-grid">
<p>{{ hero }}</p>
</div>
<div class="md-hero__background">
<img src="{{ pathto('_static/img/hero-background.svg', 1) }}" />
</div>
</div>
{% endif %}
{% extends "!layout.html" %}
{#- SPECIFY PARTICULAR FONTS FOR HEADERS #}
{% block font %}
<link href="https://fonts.gstatic.com/" rel="preconnect" crossorigin>
<link href="https://fonts.googleapis.com/css?family=Roboto+Mono:400,500,700|Roboto:400,400i,500,700|Google+Sans:400,500,600|Material+Icons&display=fallback" rel="stylesheet">
{% endblock %}
{#- TO INJECT INFORMATION FROM READTHEDOCS HERE #}
{% block scripts %}
{{ super() }}
{#- CUSTOM THEME #}
<style>
:root {
--custom-color-primary: #1381cf;
--custom-color-accent: #0e619d;
}
</style>
{% if versions %}
<script type="text/javascript">
READTHEDOCS_VERSIONS = {{ versions | tojson }}
</script>
{% endif %}
<script type="text/javascript">
// for gallery links
GIT_COMMIT_ID = "{{ git_commit_id }}";
PAGENAME = "{{ pagename }}";
GALLERY_LINKS = {
colab: "{{ pathto('_static/img/gallery-colab.svg', 1) }}",
notebook: "{{ pathto('_static/img/gallery-download.svg', 1) }}",
github: "{{ pathto('_static/img/gallery-github.svg', 1) }}"
}
</script>
{% endblock %}
{#- REPLACE ATTRIBUTES INSTANTLY TO DISABLE SOME HOOKS #}
{% block footer_scripts %}
<script type="text/javascript">
// try to disable original hook for md-source.
$('*[data-md-source="github"]').attr("data-md-source", "nni");
</script>
{{ super() }}
{% endblock %}
{%- if nav_bar_tabs and pagename != 'index' %}
<nav class="md-tabs" data-md-component="tabs">
<div class="md-tabs__inner md-grid">
<ul class="md-tabs__list">
{%- block rootrellink %}
{%- if theme_master_doc %}
<li class="md-tabs__item">
<a href="{{ pathto(master_doc)|e }}" class="md-tabs__link">
{{ shorttitle }}
<span class="material-icons md-tabs__arrow">&#xEAC9;</span>
</a>
</li>
{%- endif %}
{%- endblock %}
{#- DO NOT SUPPORT THEME NAV LINKS HERE #}
{%- for parent in parents %}
<li class="md-tabs__item">
<a href="{{ parent.link|e }}" class="md-tabs__link">
{{ parent.title }}
<span class="material-icons md-tabs__arrow">&#xEAC9;</span>
</a>
</li>
{%- endfor %}
<li class="md-tabs__item"><a href="#" class="md-tabs__link">{{ title|striptags|e }}</a></li>
</ul>
</div>
</nav>
{%- endif %}
"""
This is to keep Chinese doc update to English doc. Should be run regularly.
There is no sane way to check the contents though. PR review should enforce contributors to update the corresponding translation.
See https://github.com/microsoft/nni/issues/4298 for discussion.
Under docs, run
python tools/chineselink.py
"""
import hashlib
import shutil
import sys
from pathlib import Path
def iterate_dir(path):
for p in Path(path).iterdir():
if p.is_dir():
yield from iterate_dir(p)
continue
yield p
suffix_list = [
'.html',
'.md',
'.rst',
'.ipynb',
]
pipeline_mode = len(sys.argv) > 1 and sys.argv[1] == 'check'
failed_files = []
# in case I need to change `_zh` to something else
# files = list(filter(lambda d: d.name.endswith('zh_CN.rst'), iterate_dir('source')))
# for file in files:
# os.rename(file, file.parent / (file.name[:-7] + file.name[-4:]))
def need_to_translate(source, target):
if not target.exists():
failed_files.append('(missing) ' + target.as_posix())
if pipeline_mode:
return
shutil.copyfile(source, target)
if target.suffix == '.html':
return # FIXME I don't know how to process html
target_checksum = hashlib.sha256(path.open('rb').read()).hexdigest()[:32]
checksum = target.open('r').readline().strip()[3:]
if checksum != target_checksum:
failed_files.append('(out-of-date) ' + target.as_posix())
if pipeline_mode:
return
contents = target.open('r').readlines()
firstline = '.. ' + target_checksum + '\n'
if contents[0].startswith('.. '):
contents = [firstline] + contents[1:]
else:
contents = [firstline, '\n'] + contents
target.open('w').writelines(contents)
for path in iterate_dir(Path('source')):
relative_path = path.relative_to('source')
if relative_path.as_posix().startswith('_build'):
continue
if path.suffix in suffix_list:
if '_zh.' not in path.name:
target_path = path.parent / (path.stem + '_zh' + path.suffix)
if target_path.exists():
# whitelist files. should be translated
need_to_translate(path, target_path)
print(f'Skipped linking for {path} as it is in whitelist.')
else:
source_path = path.parent / (path.stem[:-3] + path.suffix)
if not source_path.exists():
# delete redundant files
failed_files.append('(redundant) ' + source_path.as_posix())
if not pipeline_mode:
print(f'Deleting {source_path}')
path.unlink()
if pipeline_mode and failed_files:
raise ValueError(
'The following files are not up-to-date. Please run "python3 tools/chineselink.py" under docs folder '
'to refresh them and update their corresponding translation.\n' + '\n'.join([' ' + line for line in failed_files]))
if failed_files:
print('Updated files:', failed_files)
import os
import shutil
from pathlib import Path
for root, dirs, files in os.walk('archive_en_US'):
root = Path(root)
for file in files:
moved_root = Path('en_US') / root.relative_to('archive_en_US')
shutil.move(root / file, moved_root / file)
os.remove(moved_root / (Path(file).stem + '.rst'))
"""
Fix a troublsome translation in sphinx.
Related PR: https://github.com/sphinx-doc/sphinx/pull/10303
"""
import subprocess
from pathlib import Path
import sphinx
sphinx_path = Path(sphinx.__path__[0]) / 'locale/zh_CN/LC_MESSAGES'
po_content = (sphinx_path / 'sphinx.po').read_text()
po_content = po_content.replace('%s的别名', '%s 的别名')
(sphinx_path / 'sphinx.po').write_text(po_content)
# build po -> mo
subprocess.run(['msgfmt', '-c', str(sphinx_path / 'sphinx.po'), '-o', str(sphinx_path / 'sphinx.mo')], check=True)
......@@ -22,7 +22,7 @@ import numpy as np
from sklearn.datasets import load_svmlight_file
from sklearn.model_selection import train_test_split
from nni.feature_engineering.gbdt_selector import GBDTSelector
from nni.algorithms.feature_engineering.gbdt_selector import GBDTSelector
url_zip_train = 'https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/rcv1_train.binary.bz2'
urllib.request.urlretrieve(url_zip_train, filename='train.bz2')
......
......@@ -33,7 +33,7 @@ from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.feature_selection import SelectFromModel
from nni.feature_engineering.gradient_selector import FeatureGradientSelector
from nni.algorithms.feature_engineering.gradient_selector import FeatureGradientSelector
class Benchmark():
......
......@@ -28,7 +28,7 @@ from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.feature_selection import SelectFromModel
from nni.feature_engineering.gradient_selector import FeatureGradientSelector
from nni.algorithms.feature_engineering.gradient_selector import FeatureGradientSelector
def test():
......@@ -54,4 +54,4 @@ def test():
print("Pipeline Score: ", pipeline.score(X_train, y_train))
if __name__ == "__main__":
test()
\ No newline at end of file
test()
.pth
.tar.gz
data/
MNIST/
cifar-10-batches-py/
experiment_data/
pruning/models
pruning/pruning_log
\ No newline at end of file
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from typing import Callable, Optional, Iterable
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from nni.algorithms.compression.pytorch.auto_compress import AbstractAutoCompressionModule
torch.manual_seed(1)
class LeNet(nn.Module):
def __init__(self):
super(LeNet, self).__init__()
self.conv1 = nn.Conv2d(1, 32, 3, 1)
self.conv2 = nn.Conv2d(32, 64, 3, 1)
self.dropout1 = nn.Dropout2d(0.25)
self.dropout2 = nn.Dropout2d(0.5)
self.fc1 = nn.Linear(9216, 128)
self.fc2 = nn.Linear(128, 10)
def forward(self, x):
x = self.conv1(x)
x = F.relu(x)
x = self.conv2(x)
x = F.relu(x)
x = F.max_pool2d(x, 2)
x = self.dropout1(x)
x = torch.flatten(x, 1)
x = self.fc1(x)
x = F.relu(x)
x = self.dropout2(x)
x = self.fc2(x)
output = F.log_softmax(x, dim=1)
return output
_use_cuda = torch.cuda.is_available()
_train_kwargs = {'batch_size': 64}
_test_kwargs = {'batch_size': 1000}
if _use_cuda:
_cuda_kwargs = {'num_workers': 1,
'pin_memory': True,
'shuffle': True}
_train_kwargs.update(_cuda_kwargs)
_test_kwargs.update(_cuda_kwargs)
_transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])
_device = torch.device("cuda" if _use_cuda else "cpu")
_train_loader = None
_test_loader = None
def _train(model, optimizer, criterion, epoch):
global _train_loader
if _train_loader is None:
dataset = datasets.MNIST('./data', train=True, download=True, transform=_transform)
_train_loader = torch.utils.data.DataLoader(dataset, **_train_kwargs)
model.train()
for data, target in _train_loader:
data, target = data.to(_device), target.to(_device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
optimizer.step()
def _test(model):
global _test_loader
if _test_loader is None:
dataset = datasets.MNIST('./data', train=False, transform=_transform)
_test_loader = torch.utils.data.DataLoader(dataset, **_test_kwargs)
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in _test_loader:
data, target = data.to(_device), target.to(_device)
output = model(data)
test_loss += F.nll_loss(output, target, reduction='sum').item()
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(_test_loader.dataset)
acc = 100 * correct / len(_test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(_test_loader.dataset), acc))
return acc
_model = LeNet().to(_device)
_model.load_state_dict(torch.load('mnist_pretrain_lenet.pth'))
class AutoCompressionModule(AbstractAutoCompressionModule):
@classmethod
def model(cls) -> nn.Module:
return _model
@classmethod
def evaluator(cls) -> Callable[[nn.Module], float]:
return _test
@classmethod
def optimizer_factory(cls) -> Optional[Callable[[Iterable], optim.Optimizer]]:
def _optimizer_factory(params: Iterable):
return torch.optim.SGD(params, lr=0.01)
return _optimizer_factory
@classmethod
def criterion(cls) -> Optional[Callable]:
return F.nll_loss
@classmethod
def sparsifying_trainer(cls, compress_algorithm_name: str) -> Optional[Callable[[nn.Module, optim.Optimizer, Callable, int], None]]:
return _train
@classmethod
def post_compress_finetuning_trainer(cls, compress_algorithm_name: str) -> Optional[Callable[[nn.Module, optim.Optimizer, Callable, int], None]]:
return _train
@classmethod
def post_compress_finetuning_epochs(cls, compress_algorithm_name: str) -> int:
return 2
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from pathlib import Path
from nni.algorithms.compression.pytorch.auto_compress import AutoCompressionExperiment, AutoCompressionSearchSpaceGenerator
from auto_compress_module import AutoCompressionModule
generator = AutoCompressionSearchSpaceGenerator()
generator.add_config('level', [
{
"sparsity": {
"_type": "uniform",
"_value": [0.01, 0.99]
},
'op_types': ['default']
}
])
generator.add_config('l1', [
{
"sparsity": {
"_type": "uniform",
"_value": [0.01, 0.99]
},
'op_types': ['Conv2d']
}
])
generator.add_config('qat', [
{
'quant_types': ['weight', 'output'],
'quant_bits': {
'weight': 8,
'output': 8
},
'op_types': ['Conv2d', 'Linear']
}])
search_space = generator.dumps()
experiment = AutoCompressionExperiment(AutoCompressionModule, 'local')
experiment.config.experiment_name = 'auto compression torch example'
experiment.config.trial_concurrency = 1
experiment.config.max_trial_number = 10
experiment.config.search_space = search_space
experiment.config.trial_code_directory = Path(__file__).parent
experiment.config.tuner.name = 'TPE'
experiment.config.tuner.class_args['optimize_mode'] = 'maximize'
experiment.config.training_service.use_active_gpu = True
experiment.run(8088)
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
"""
NNI example for combined pruning and quantization to compress a model.
In this example, we show the compression process to first prune a model, then quantize the pruned model.
"""
import argparse
import os
import time
import torch
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
from torchvision import datasets, transforms
from nni.compression.pytorch.utils import count_flops_params
from nni.compression.pytorch import ModelSpeedup
from nni.algorithms.compression.pytorch.pruning import L1FilterPruner
from nni.algorithms.compression.pytorch.quantization import QAT_Quantizer
from models.mnist.naive import NaiveModel
from nni.compression.pytorch.quantization_speedup import ModelSpeedupTensorRT
def get_model_time_cost(model, dummy_input):
model.eval()
n_times = 100
time_list = []
for _ in range(n_times):
torch.cuda.synchronize()
tic = time.time()
_ = model(dummy_input)
torch.cuda.synchronize()
time_list.append(time.time()-tic)
time_list = time_list[10:]
return sum(time_list) / len(time_list)
def train(args, model, device, train_loader, criterion, optimizer, epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
optimizer.step()
if batch_idx % args.log_interval == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
if args.dry_run:
break
def test(args, model, device, criterion, test_loader):
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data)
test_loss += criterion(output, target).item()
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(test_loader.dataset)
acc = 100 * correct / len(test_loader.dataset)
print('Test Loss: {:.6f} Accuracy: {}%\n'.format(
test_loss, acc))
return acc
def test_trt(engine, test_loader):
test_loss = 0
correct = 0
time_elasped = 0
for data, target in test_loader:
output, time = engine.inference(data)
test_loss += F.nll_loss(output, target, reduction='sum').item()
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
time_elasped += time
test_loss /= len(test_loader.dataset)
print('Loss: {} Accuracy: {}%'.format(
test_loss, 100 * correct / len(test_loader.dataset)))
print("Inference elapsed_time (whole dataset): {}s".format(time_elasped))
def main(args):
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
os.makedirs(args.experiment_data_dir, exist_ok=True)
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])
train_loader = torch.utils.data.DataLoader(
datasets.MNIST('data', train=True, download=True, transform=transform),
batch_size=64,)
test_loader = torch.utils.data.DataLoader(
datasets.MNIST('data', train=False, transform=transform),
batch_size=1000)
# Step1. Model Pretraining
model = NaiveModel().to(device)
criterion = torch.nn.NLLLoss()
optimizer = optim.Adadelta(model.parameters(), lr=args.pretrain_lr)
scheduler = StepLR(optimizer, step_size=1, gamma=0.7)
flops, params, _ = count_flops_params(model, (1, 1, 28, 28), verbose=False)
if args.pretrained_model_dir is None:
args.pretrained_model_dir = os.path.join(args.experiment_data_dir, f'pretrained.pth')
best_acc = 0
for epoch in range(args.pretrain_epochs):
train(args, model, device, train_loader, criterion, optimizer, epoch)
scheduler.step()
acc = test(args, model, device, criterion, test_loader)
if acc > best_acc:
best_acc = acc
state_dict = model.state_dict()
model.load_state_dict(state_dict)
torch.save(state_dict, args.pretrained_model_dir)
print(f'Model saved to {args.pretrained_model_dir}')
else:
state_dict = torch.load(args.pretrained_model_dir)
model.load_state_dict(state_dict)
best_acc = test(args, model, device, criterion, test_loader)
dummy_input = torch.randn([1000, 1, 28, 28]).to(device)
time_cost = get_model_time_cost(model, dummy_input)
# 125.49 M, 0.85M, 93.29, 1.1012
print(f'Pretrained model FLOPs {flops/1e6:.2f} M, #Params: {params/1e6:.2f}M, Accuracy: {best_acc: .2f}, Time Cost: {time_cost}')
# Step2. Model Pruning
config_list = [{
'sparsity': args.sparsity,
'op_types': ['Conv2d']
}]
kw_args = {}
if args.dependency_aware:
dummy_input = torch.randn([1000, 1, 28, 28]).to(device)
print('Enable the dependency_aware mode')
# note that, not all pruners support the dependency_aware mode
kw_args['dependency_aware'] = True
kw_args['dummy_input'] = dummy_input
pruner = L1FilterPruner(model, config_list, **kw_args)
model = pruner.compress()
pruner.get_pruned_weights()
mask_path = os.path.join(args.experiment_data_dir, 'mask.pth')
model_path = os.path.join(args.experiment_data_dir, 'pruned.pth')
pruner.export_model(model_path=model_path, mask_path=mask_path)
pruner._unwrap_model() # unwrap all modules to normal state
# Step3. Model Speedup
m_speedup = ModelSpeedup(model, dummy_input, mask_path, device)
m_speedup.speedup_model()
print('model after speedup', model)
flops, params, _ = count_flops_params(model, dummy_input, verbose=False)
acc = test(args, model, device, criterion, test_loader)
time_cost = get_model_time_cost(model, dummy_input)
print(f'Pruned model FLOPs {flops/1e6:.2f} M, #Params: {params/1e6:.2f}M, Accuracy: {acc: .2f}, Time Cost: {time_cost}')
# Step4. Model Finetuning
optimizer = optim.Adadelta(model.parameters(), lr=args.pretrain_lr)
scheduler = StepLR(optimizer, step_size=1, gamma=0.7)
best_acc = 0
for epoch in range(args.finetune_epochs):
train(args, model, device, train_loader, criterion, optimizer, epoch)
scheduler.step()
acc = test(args, model, device, criterion, test_loader)
if acc > best_acc:
best_acc = acc
state_dict = model.state_dict()
model.load_state_dict(state_dict)
save_path = os.path.join(args.experiment_data_dir, f'finetuned.pth')
torch.save(state_dict, save_path)
flops, params, _ = count_flops_params(model, dummy_input, verbose=True)
time_cost = get_model_time_cost(model, dummy_input)
# FLOPs 28.48 M, #Params: 0.18M, Accuracy: 89.03, Time Cost: 1.03
print(f'Finetuned model FLOPs {flops/1e6:.2f} M, #Params: {params/1e6:.2f}M, Accuracy: {best_acc: .2f}, Time Cost: {time_cost}')
print(f'Model saved to {save_path}')
# Step5. Model Quantization via QAT
config_list = [{
'quant_types': ['weight', 'output'],
'quant_bits': {'weight': 8, 'output': 8},
'op_names': ['conv1']
}, {
'quant_types': ['output'],
'quant_bits': {'output':8},
'op_names': ['relu1']
}, {
'quant_types': ['weight', 'output'],
'quant_bits': {'weight': 8, 'output': 8},
'op_names': ['conv2']
}, {
'quant_types': ['output'],
'quant_bits': {'output': 8},
'op_names': ['relu2']
}]
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
quantizer = QAT_Quantizer(model, config_list, optimizer, dummy_input)
quantizer.compress()
# Step6. Quantization Aware Training
best_acc = 0
for epoch in range(1):
train(args, model, device, train_loader, criterion, optimizer, epoch)
scheduler.step()
acc = test(args, model, device, criterion, test_loader)
if acc > best_acc:
best_acc = acc
state_dict = model.state_dict()
calibration_path = os.path.join(args.experiment_data_dir, 'calibration.pth')
calibration_config = quantizer.export_model(model_path, calibration_path)
print("calibration_config: ", calibration_config)
# Step7. Model Speedup
batch_size = 32
input_shape = (batch_size, 1, 28, 28)
engine = ModelSpeedupTensorRT(model, input_shape, config=calibration_config, batchsize=32)
engine.compress()
test_trt(engine, test_loader)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='PyTorch Example for model comporession')
# dataset and model
# parser.add_argument('--dataset', type=str, default='mnist',
# help='dataset to use, mnist, cifar10 or imagenet')
# parser.add_argument('--data-dir', type=str, default='./data/',
# help='dataset directory')
parser.add_argument('--pretrained-model-dir', type=str, default=None,
help='path to pretrained model')
parser.add_argument('--pretrain-epochs', type=int, default=10,
help='number of epochs to pretrain the model')
parser.add_argument('--pretrain-lr', type=float, default=1.0,
help='learning rate to pretrain the model')
parser.add_argument('--experiment-data-dir', type=str, default='./experiment_data',
help='For saving output checkpoints')
parser.add_argument('--log-interval', type=int, default=100, metavar='N',
help='how many batches to wait before logging training status')
parser.add_argument('--dry-run', action='store_true', default=False,
help='quickly check a single pass')
# parser.add_argument('--multi-gpu', action='store_true', default=False,
# help='run on mulitple gpus')
# parser.add_argument('--test-only', action='store_true', default=False,
# help='run test only')
# pruner
# parser.add_argument('--pruner', type=str, default='l1filter',
# choices=['level', 'l1filter', 'l2filter', 'slim', 'agp',
# 'fpgm', 'mean_activation', 'apoz', 'admm'],
# help='pruner to use')
parser.add_argument('--sparsity', type=float, default=0.5,
help='target overall target sparsity')
parser.add_argument('--dependency-aware', action='store_true', default=False,
help='toggle dependency-aware mode')
# finetuning
parser.add_argument('--finetune-epochs', type=int, default=5,
help='epochs to fine tune')
# parser.add_argument('--kd', action='store_true', default=False,
# help='quickly check a single pass')
# parser.add_argument('--kd_T', type=float, default=4,
# help='temperature for KD distillation')
# parser.add_argument('--finetune-lr', type=float, default=0.5,
# help='learning rate to finetune the model')
# speedup
# parser.add_argument('--speedup', action='store_true', default=False,
# help='whether to speedup the pruned model')
# parser.add_argument('--nni', action='store_true', default=False,
# help="whether to tune the pruners using NNi tuners")
args = parser.parse_args()
main(args)
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from pathlib import Path
import torch
from torch.optim import Adam
import nni
from nni.compression.experiment.experiment import CompressionExperiment
from nni.compression.experiment.config import CompressionExperimentConfig, TaylorFOWeightPrunerConfig
from vessel import LeNet, finetuner, evaluator, trainer, criterion, device
model = LeNet().to(device)
# pre-training model
finetuner(model)
optimizer = nni.trace(Adam)(model.parameters())
dummy_input = torch.rand(16, 1, 28, 28).to(device)
# normal experiment setting, no need to set search_space and trial_command
config = CompressionExperimentConfig('local')
config.experiment_name = 'auto compression torch example'
config.trial_concurrency = 1
config.max_trial_number = 10
config.trial_code_directory = Path(__file__).parent
config.tuner.name = 'TPE'
config.tuner.class_args['optimize_mode'] = 'maximize'
# compression experiment specific setting
# single float value means the expected remaining ratio upper limit for flops & params, lower limit for metric
config.compression_setting.flops = 0.2
config.compression_setting.params = 0.5
config.compression_setting.module_types = ['Conv2d', 'Linear']
config.compression_setting.exclude_module_names = ['fc2']
config.compression_setting.pruners = [TaylorFOWeightPrunerConfig()]
experiment = CompressionExperiment(config, model, finetuner, evaluator, dummy_input, trainer, optimizer, criterion, device)
experiment.run(8080)
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
from torchvision import datasets, transforms
import nni
@nni.trace
class LeNet(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(1, 32, 3, 1)
self.conv2 = nn.Conv2d(32, 64, 3, 1)
self.dropout1 = nn.Dropout2d(0.25)
self.dropout2 = nn.Dropout2d(0.5)
self.fc1 = nn.Linear(9216, 128)
self.fc2 = nn.Linear(128, 10)
def forward(self, x):
x = self.conv1(x)
x = F.relu(x)
x = self.conv2(x)
x = F.relu(x)
x = F.max_pool2d(x, 2)
x = self.dropout1(x)
x = torch.flatten(x, 1)
x = self.fc1(x)
x = F.relu(x)
x = self.dropout2(x)
x = self.fc2(x)
output = F.log_softmax(x, dim=1)
return output
_use_cuda = True
device = torch.device("cuda" if _use_cuda else "cpu")
_train_kwargs = {'batch_size': 64}
_test_kwargs = {'batch_size': 1000}
if _use_cuda:
_cuda_kwargs = {'num_workers': 1,
'pin_memory': True,
'shuffle': True}
_train_kwargs.update(_cuda_kwargs)
_test_kwargs.update(_cuda_kwargs)
_transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])
_train_loader = None
_test_loader = None
def trainer(model, optimizer, criterion):
global _train_loader
if _train_loader is None:
dataset = datasets.MNIST('./data', train=True, download=True, transform=_transform)
_train_loader = torch.utils.data.DataLoader(dataset, **_train_kwargs)
model.train()
for data, target in _train_loader:
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
optimizer.step()
def evaluator(model):
global _test_loader
if _test_loader is None:
dataset = datasets.MNIST('./data', train=False, transform=_transform, download=True)
_test_loader = torch.utils.data.DataLoader(dataset, **_test_kwargs)
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in _test_loader:
data, target = data.to(device), target.to(device)
output = model(data)
test_loss += F.nll_loss(output, target, reduction='sum').item()
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(_test_loader.dataset)
acc = 100 * correct / len(_test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(_test_loader.dataset), acc))
return acc
criterion = F.nll_loss
def finetuner(model: nn.Module):
optimizer = Adam(model.parameters())
for i in range(3):
trainer(model, optimizer, criterion)
import torch
import torch.nn as nn
import torch.nn.functional as F
from functools import reduce
class NaiveModel(torch.nn.Module):
def __init__(self):
super().__init__()
self.conv1 = torch.nn.Conv2d(1, 20, 5, 1)
self.conv2 = torch.nn.Conv2d(20, 50, 5, 1)
self.fc1 = torch.nn.Linear(4 * 4 * 50, 500)
self.fc2 = torch.nn.Linear(500, 10)
self.relu1 = torch.nn.ReLU6()
self.relu2 = torch.nn.ReLU6()
self.relu3 = torch.nn.ReLU6()
self.max_pool1 = torch.nn.MaxPool2d(2, 2)
self.max_pool2 = torch.nn.MaxPool2d(2, 2)
def forward(self, x):
x = self.relu1(self.conv1(x))
x = self.max_pool1(x)
x = self.relu2(self.conv2(x))
x = self.max_pool2(x)
x = x.view(-1, x.size()[1:].numel())
x = self.relu3(self.fc1(x))
x = self.fc2(x)
return F.log_softmax(x, dim=1)
\ No newline at end of file
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
'''
NNI example for supported ActivationAPoZRank and ActivationMeanRank pruning algorithms.
In this example, we show the end-to-end pruning process: pre-training -> pruning -> fine-tuning.
Note that pruners use masks to simulate the real pruning. In order to obtain a real compressed model, model speedup is required.
'''
import argparse
import sys
import torch
from torchvision import datasets, transforms
from torch.optim.lr_scheduler import MultiStepLR
import nni
from nni.compression.pytorch import ModelSpeedup
from nni.compression.pytorch.utils import count_flops_params
from nni.compression.pytorch.pruning import ActivationAPoZRankPruner, ActivationMeanRankPruner
from pathlib import Path
sys.path.append(str(Path(__file__).absolute().parents[1] / 'models'))
from cifar10.vgg import VGG
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
normalize = transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
g_epoch = 0
train_loader = torch.utils.data.DataLoader(
datasets.CIFAR10('./data', train=True, transform=transforms.Compose([
transforms.RandomHorizontalFlip(),
transforms.RandomCrop(32, 4),
transforms.ToTensor(),
normalize,
]), download=True),
batch_size=128, shuffle=True)
test_loader = torch.utils.data.DataLoader(
datasets.CIFAR10('./data', train=False, transform=transforms.Compose([
transforms.ToTensor(),
normalize,
])),
batch_size=128, shuffle=False)
def trainer(model, optimizer, criterion):
global g_epoch
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
optimizer.step()
if batch_idx and batch_idx % 100 == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
g_epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
g_epoch += 1
def evaluator(model):
model.eval()
correct = 0.0
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data)
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
acc = 100 * correct / len(test_loader.dataset)
print('Accuracy: {}%\n'.format(acc))
return acc
def optimizer_scheduler_generator(model, _lr=0.1, _momentum=0.9, _weight_decay=5e-4, total_epoch=160):
optimizer = torch.optim.SGD(model.parameters(), lr=_lr, momentum=_momentum, weight_decay=_weight_decay)
scheduler = MultiStepLR(optimizer, milestones=[int(total_epoch * 0.5), int(total_epoch * 0.75)], gamma=0.1)
return optimizer, scheduler
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='PyTorch Example for model comporession')
parser.add_argument('--pruner', type=str, default='apoz',
choices=['apoz', 'mean'],
help='pruner to use')
parser.add_argument('--pretrain-epochs', type=int, default=20,
help='number of epochs to pretrain the model')
parser.add_argument('--fine-tune-epochs', type=int, default=20,
help='number of epochs to fine tune the model')
args = parser.parse_args()
print('\n' + '=' * 50 + ' START TO TRAIN THE MODEL ' + '=' * 50)
model = VGG().to(device)
optimizer, scheduler = optimizer_scheduler_generator(model, total_epoch=args.pretrain_epochs)
criterion = torch.nn.CrossEntropyLoss()
pre_best_acc = 0.0
best_state_dict = None
for i in range(args.pretrain_epochs):
trainer(model, optimizer, criterion)
scheduler.step()
acc = evaluator(model)
if acc > pre_best_acc:
pre_best_acc = acc
best_state_dict = model.state_dict()
print("Best accuracy: {}".format(pre_best_acc))
model.load_state_dict(best_state_dict)
pre_flops, pre_params, _ = count_flops_params(model, torch.randn([128, 3, 32, 32]).to(device))
g_epoch = 0
# Start to prune and speedup
print('\n' + '=' * 50 + ' START TO PRUNE THE BEST ACCURACY PRETRAINED MODEL ' + '=' * 50)
config_list = [{
'total_sparsity': 0.5,
'op_types': ['Conv2d'],
}]
# make sure you have used nni.trace to wrap the optimizer class before initialize
traced_optimizer = nni.trace(torch.optim.SGD)(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
if 'apoz' in args.pruner:
pruner = ActivationAPoZRankPruner(model, config_list, trainer, traced_optimizer, criterion, training_batches=20)
else:
pruner = ActivationMeanRankPruner(model, config_list, trainer, traced_optimizer, criterion, training_batches=20)
_, masks = pruner.compress()
pruner.show_pruned_weights()
pruner._unwrap_model()
ModelSpeedup(model, dummy_input=torch.rand([10, 3, 32, 32]).to(device), masks_file=masks).speedup_model()
print('\n' + '=' * 50 + ' EVALUATE THE MODEL AFTER SPEEDUP ' + '=' * 50)
evaluator(model)
# Optimizer used in the pruner might be patched, so recommend to new an optimizer for fine-tuning stage.
print('\n' + '=' * 50 + ' START TO FINE TUNE THE MODEL ' + '=' * 50)
optimizer, scheduler = optimizer_scheduler_generator(model, _lr=0.01, total_epoch=args.fine_tune_epochs)
best_acc = 0.0
g_epoch = 0
for i in range(args.fine_tune_epochs):
trainer(model, optimizer, criterion)
scheduler.step()
best_acc = max(evaluator(model), best_acc)
flops, params, results = count_flops_params(model, torch.randn([128, 3, 32, 32]).to(device))
print(f'Pretrained model FLOPs {pre_flops/1e6:.2f} M, #Params: {pre_params/1e6:.2f}M, Accuracy: {pre_best_acc: .2f}%')
print(f'Finetuned model FLOPs {flops/1e6:.2f} M, #Params: {params/1e6:.2f}M, Accuracy: {best_acc: .2f}%')
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment