Unverified Commit fe02b808 authored by J-shang's avatar J-shang Committed by GitHub
Browse files

[Doc] split index to overview & toctree compression part (#4749)

parent b8d029b1
......@@ -69,25 +69,21 @@ from scripts.compression_mnist_model import TorchModel, device, trainer, evaluat
config_list = [{
'quant_types': ['input', 'weight'],
'quant_bits': {'input': 8, 'weight': 8},
'op_names': ['conv1']
'op_types': ['Conv2d']
}, {
'quant_types': ['output'],
'quant_bits': {'output': 8},
'op_names': ['relu1']
'op_types': ['ReLU']
}, {
'quant_types': ['input', 'weight'],
'quant_bits': {'input': 8, 'weight': 8},
'op_names': ['conv2']
}, {
'quant_types': ['output'],
'quant_bits': {'output': 8},
'op_names': ['relu2']
'op_names': ['fc1', 'fc2']
}]
model = TorchModel().to(device)
optimizer = SGD(model.parameters(), lr=0.01, momentum=0.5)
criterion = F.nll_loss
dummy_input = torch.rand(32, 1, 28,28).to(device)
dummy_input = torch.rand(32, 1, 28, 28).to(device)
from nni.algorithms.compression.pytorch.quantization import QAT_Quantizer
quantizer = QAT_Quantizer(model, config_list, optimizer, dummy_input)
......@@ -101,6 +97,8 @@ for epoch in range(3):
# %%
# export model and get calibration_config
import os
os.makedirs('log', exist_ok=True)
model_path = "./log/mnist_model.pth"
calibration_path = "./log/mnist_calibration.pth"
calibration_config = quantizer.export_model(model_path, calibration_path)
......@@ -110,11 +108,11 @@ print("calibration_config: ", calibration_config)
# %%
# build tensorRT engine to make a real speedup
# from nni.compression.pytorch.quantization_speedup import ModelSpeedupTensorRT
# input_shape = (32, 1, 28, 28)
# engine = ModelSpeedupTensorRT(model, input_shape, config=calibration_config, batchsize=32)
# engine.compress()
# test_trt(engine)
from nni.compression.pytorch.quantization_speedup import ModelSpeedupTensorRT
input_shape = (32, 1, 28, 28)
engine = ModelSpeedupTensorRT(model, input_shape, config=calibration_config, batchsize=32)
engine.compress()
test_trt(engine)
# %%
# Note that NNI also supports post-training quantization directly, please refer to complete examples for detail.
......
......@@ -15,13 +15,19 @@ class TorchModel(nn.Module):
self.fc1 = nn.Linear(16 * 4 * 4, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
self.relu1 = nn.ReLU()
self.relu2 = nn.ReLU()
self.relu3 = nn.ReLU()
self.relu4 = nn.ReLU()
self.pool1 = nn.MaxPool2d((2, 2))
self.pool2 = nn.MaxPool2d((2, 2))
def forward(self, x):
x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
x = F.max_pool2d(F.relu(self.conv2(x)), (2, 2))
x = self.pool1(self.relu1(self.conv1(x)))
x = self.pool2(self.relu2(self.conv2(x)))
x = torch.flatten(x, 1)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.relu3(self.fc1(x))
x = self.relu4(self.fc2(x))
x = self.fc3(x)
return F.log_softmax(x, dim=1)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment