[Doc] split index to overview & toctree compression part (#4749)

fe02b808 · J-shang · GitHub · b8d029b1 · fe02b808 · fe02b808
Unverified Commit fe02b808 authored Apr 12, 2022 by J-shang Committed by GitHub Apr 12, 2022
Showing with 21 additions and 17 deletions

examples/tutorials/quantization_speedup.py examples/tutorials/quantization_speedup.py +11 -13

examples/tutorials/scripts/compression_mnist_model.py examples/tutorials/scripts/compression_mnist_model.py +10 -4

No files found.
--- a/examples/tutorials/quantization_speedup.py
+++ b/examples/tutorials/quantization_speedup.py
@@ -69,25 +69,21 @@ from scripts.compression_mnist_model import TorchModel, device, trainer, evaluat
 config_list = [{
    'quant_types': ['input', 'weight'],
    'quant_bits': {'input': 8, 'weight': 8},
-    'op_names': ['conv1']
+    'op_types': ['Conv2d']
 }, {
    'quant_types': ['output'],
    'quant_bits': {'output': 8},
-    'op_names': ['relu1']
+    'op_types': ['ReLU']
 }, {
    'quant_types': ['input', 'weight'],
    'quant_bits': {'input': 8, 'weight': 8},
-    'op_names': ['conv2']
-}, {
-    'quant_types': ['output'],
-    'quant_bits': {'output': 8},
-    'op_names': ['relu2']
+    'op_names': ['fc1', 'fc2']
 }]

 model = TorchModel().to(device)
 optimizer = SGD(model.parameters(), lr=0.01, momentum=0.5)
 criterion = F.nll_loss
-dummy_input = torch.rand(32, 1, 28,28).to(device)
+dummy_input = torch.rand(32, 1, 28, 28).to(device)

 from nni.algorithms.compression.pytorch.quantization import QAT_Quantizer
 quantizer = QAT_Quantizer(model, config_list, optimizer, dummy_input)
@@ -101,6 +97,8 @@ for epoch in range(3):

 # %%
 # export model and get calibration_config
+import os
+os.makedirs('log', exist_ok=True)
 model_path = "./log/mnist_model.pth"
 calibration_path = "./log/mnist_calibration.pth"
 calibration_config = quantizer.export_model(model_path, calibration_path)
@@ -110,11 +108,11 @@ print("calibration_config: ", calibration_config)
 # %%
 # build tensorRT engine to make a real speedup

-# from nni.compression.pytorch.quantization_speedup import ModelSpeedupTensorRT
-# input_shape = (32, 1, 28, 28)
-# engine = ModelSpeedupTensorRT(model, input_shape, config=calibration_config, batchsize=32)
-# engine.compress()
-# test_trt(engine)
+from nni.compression.pytorch.quantization_speedup import ModelSpeedupTensorRT
+input_shape = (32, 1, 28, 28)
+engine = ModelSpeedupTensorRT(model, input_shape, config=calibration_config, batchsize=32)
+engine.compress()
+test_trt(engine)

 # %%
 # Note that NNI also supports post-training quantization directly, please refer to complete examples for detail.

--- a/examples/tutorials/scripts/compression_mnist_model.py
+++ b/examples/tutorials/scripts/compression_mnist_model.py
@@ -15,13 +15,19 @@ class TorchModel(nn.Module):
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
+        self.relu1 = nn.ReLU()
+        self.relu2 = nn.ReLU()
+        self.relu3 = nn.ReLU()
+        self.relu4 = nn.ReLU()
+        self.pool1 = nn.MaxPool2d((2, 2))
+        self.pool2 = nn.MaxPool2d((2, 2))

    def forward(self, x):
-        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
-        x = F.max_pool2d(F.relu(self.conv2(x)), (2, 2))
+        x = self.pool1(self.relu1(self.conv1(x)))
+        x = self.pool2(self.relu2(self.conv2(x)))
        x = torch.flatten(x, 1)
-        x = F.relu(self.fc1(x))
-        x = F.relu(self.fc2(x))
+        x = self.relu3(self.fc1(x))
+        x = self.relu4(self.fc2(x))
        x = self.fc3(x)
        return F.log_softmax(x, dim=1)