Added dimensions in the comments of googlenet (#788)

* Added dimensions in the comments The update provides the dimensions of the processed data following the style of inceptionV3 implementation. * Changed docs and comments Updated doc with the argument `transform_input`. Modified comments to match inceptionV3 style.

Added dimensions in the comments of googlenet (#788)
* Added dimensions in the comments The update provides the dimensions of the processed data following the style of inceptionV3 implementation. * Changed docs and comments Updated doc with the argument `transform_input`. Modified comments to match inceptionV3 style.
ef6e9afb · ekka · Francisco Massa · 6acae74a · ef6e9afb
Commit ef6e9afb authored Mar 09, 2019 by ekka Committed by Francisco Massa Mar 09, 2019
Hide whitespace changes
Inline Side-by-side

Showing with 29 additions and 1 deletion

torchvision/models/googlenet.py torchvision/models/googlenet.py +29 -1

No files found.
--- a/torchvision/models/googlenet.py
+++ b/torchvision/models/googlenet.py
@@ -16,6 +16,8 @@ def googlenet(pretrained=False, **kwargs):
    `"Going Deeper with Convolutions" <http://arxiv.org/abs/1409.4842>`_.
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        transform_input (bool): If True, preprocesses the input according to the method with which it
+        was trained on ImageNet. Default: *False*
    """
    if pretrained:
        if 'transform_input' not in kwargs:
@@ -84,34 +86,54 @@ class GoogLeNet(nn.Module):
            x_ch2 = torch.unsqueeze(x[:, 2], 1) * (0.225 / 0.5) + (0.406 - 0.5) / 0.5
            x = torch.cat((x_ch0, x_ch1, x_ch2), 1)
+        # N x 3 x 224 x 224
        x = self.conv1(x)
+        # N x 64 x 112 x 112
        x = self.maxpool1(x)
+        # N x 64 x 56 x 56
        x = self.conv2(x)
+        # N x 64 x 56 x 56
        x = self.conv3(x)
+        # N x 192 x 56 x 56
        x = self.maxpool2(x)
+        # N x 192 x 28 x 28
        x = self.inception3a(x)
+        # N x 256 x 28 x 28
        x = self.inception3b(x)
+        # N x 480 x 28 x 28
        x = self.maxpool3(x)
+        # N x 480 x 14 x 14
        x = self.inception4a(x)
+        # N x 512 x 14 x 14
        if self.training and self.aux_logits:
            aux1 = self.aux1(x)
        x = self.inception4b(x)
+        # N x 512 x 14 x 14
        x = self.inception4c(x)
+        # N x 512 x 14 x 14
        x = self.inception4d(x)
+        # N x 528 x 14 x 14
        if self.training and self.aux_logits:
            aux2 = self.aux2(x)
        x = self.inception4e(x)
+        # N x 832 x 14 x 14
        x = self.maxpool4(x)
+        # N x 832 x 7 x 7
        x = self.inception5a(x)
+        # N x 832 x 7 x 7
        x = self.inception5b(x)
+        # N x 1024 x 7 x 7
        x = self.avgpool(x)
+        # N x 1024 x 1 x 1
        x = x.view(x.size(0), -1)
+        # N x 1024
        x = self.dropout(x)
        x = self.fc(x)
+        # N x 1000 (num_classes)
        if self.training and self.aux_logits:
            return aux1, aux2, x
        return x
@@ -159,13 +181,19 @@ class InceptionAux(nn.Module):
        self.fc2 = nn.Linear(1024, num_classes)
    def forward(self, x):
+        # aux1: N x 512 x 14 x 14, aux2: N x 528 x 14 x 14
        x = F.adaptive_avg_pool2d(x, (4, 4))
+        # aux1: N x 512 x 4 x 4, aux2: N x 528 x 4 x 4
        x = self.conv(x)
+        # N x 128 x 4 x 4
        x = x.view(x.size(0), -1)
+        # N x 2048
        x = F.relu(self.fc1(x), inplace=True)
+        # N x 2048
        x = F.dropout(x, 0.7, training=self.training)
+        # N x 2048
        x = self.fc2(x)
+        # N x 1024
        return x