Unverified Commit fe02b808 authored by J-shang's avatar J-shang Committed by GitHub
Browse files

[Doc] split index to overview & toctree compression part (#4749)

parent b8d029b1
...@@ -69,25 +69,21 @@ from scripts.compression_mnist_model import TorchModel, device, trainer, evaluat ...@@ -69,25 +69,21 @@ from scripts.compression_mnist_model import TorchModel, device, trainer, evaluat
config_list = [{ config_list = [{
'quant_types': ['input', 'weight'], 'quant_types': ['input', 'weight'],
'quant_bits': {'input': 8, 'weight': 8}, 'quant_bits': {'input': 8, 'weight': 8},
'op_names': ['conv1'] 'op_types': ['Conv2d']
}, { }, {
'quant_types': ['output'], 'quant_types': ['output'],
'quant_bits': {'output': 8}, 'quant_bits': {'output': 8},
'op_names': ['relu1'] 'op_types': ['ReLU']
}, { }, {
'quant_types': ['input', 'weight'], 'quant_types': ['input', 'weight'],
'quant_bits': {'input': 8, 'weight': 8}, 'quant_bits': {'input': 8, 'weight': 8},
'op_names': ['conv2'] 'op_names': ['fc1', 'fc2']
}, {
'quant_types': ['output'],
'quant_bits': {'output': 8},
'op_names': ['relu2']
}] }]
model = TorchModel().to(device) model = TorchModel().to(device)
optimizer = SGD(model.parameters(), lr=0.01, momentum=0.5) optimizer = SGD(model.parameters(), lr=0.01, momentum=0.5)
criterion = F.nll_loss criterion = F.nll_loss
dummy_input = torch.rand(32, 1, 28,28).to(device) dummy_input = torch.rand(32, 1, 28, 28).to(device)
from nni.algorithms.compression.pytorch.quantization import QAT_Quantizer from nni.algorithms.compression.pytorch.quantization import QAT_Quantizer
quantizer = QAT_Quantizer(model, config_list, optimizer, dummy_input) quantizer = QAT_Quantizer(model, config_list, optimizer, dummy_input)
...@@ -101,6 +97,8 @@ for epoch in range(3): ...@@ -101,6 +97,8 @@ for epoch in range(3):
# %% # %%
# export model and get calibration_config # export model and get calibration_config
import os
os.makedirs('log', exist_ok=True)
model_path = "./log/mnist_model.pth" model_path = "./log/mnist_model.pth"
calibration_path = "./log/mnist_calibration.pth" calibration_path = "./log/mnist_calibration.pth"
calibration_config = quantizer.export_model(model_path, calibration_path) calibration_config = quantizer.export_model(model_path, calibration_path)
...@@ -110,11 +108,11 @@ print("calibration_config: ", calibration_config) ...@@ -110,11 +108,11 @@ print("calibration_config: ", calibration_config)
# %% # %%
# build tensorRT engine to make a real speedup # build tensorRT engine to make a real speedup
# from nni.compression.pytorch.quantization_speedup import ModelSpeedupTensorRT from nni.compression.pytorch.quantization_speedup import ModelSpeedupTensorRT
# input_shape = (32, 1, 28, 28) input_shape = (32, 1, 28, 28)
# engine = ModelSpeedupTensorRT(model, input_shape, config=calibration_config, batchsize=32) engine = ModelSpeedupTensorRT(model, input_shape, config=calibration_config, batchsize=32)
# engine.compress() engine.compress()
# test_trt(engine) test_trt(engine)
# %% # %%
# Note that NNI also supports post-training quantization directly, please refer to complete examples for detail. # Note that NNI also supports post-training quantization directly, please refer to complete examples for detail.
......
...@@ -15,13 +15,19 @@ class TorchModel(nn.Module): ...@@ -15,13 +15,19 @@ class TorchModel(nn.Module):
self.fc1 = nn.Linear(16 * 4 * 4, 120) self.fc1 = nn.Linear(16 * 4 * 4, 120)
self.fc2 = nn.Linear(120, 84) self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10) self.fc3 = nn.Linear(84, 10)
self.relu1 = nn.ReLU()
self.relu2 = nn.ReLU()
self.relu3 = nn.ReLU()
self.relu4 = nn.ReLU()
self.pool1 = nn.MaxPool2d((2, 2))
self.pool2 = nn.MaxPool2d((2, 2))
def forward(self, x): def forward(self, x):
x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2)) x = self.pool1(self.relu1(self.conv1(x)))
x = F.max_pool2d(F.relu(self.conv2(x)), (2, 2)) x = self.pool2(self.relu2(self.conv2(x)))
x = torch.flatten(x, 1) x = torch.flatten(x, 1)
x = F.relu(self.fc1(x)) x = self.relu3(self.fc1(x))
x = F.relu(self.fc2(x)) x = self.relu4(self.fc2(x))
x = self.fc3(x) x = self.fc3(x)
return F.log_softmax(x, dim=1) return F.log_softmax(x, dim=1)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment