git init

3c15726c · yangzhong · 3c15726c · 3c15726c · 3c15726c · 3c15726c
Commit 3c15726c authored Nov 01, 2025 by yangzhong
20 changed files
--- a/automotive/3d-object-detection/model/pointpillars_core.py
+++ b/automotive/3d-object-detection/model/pointpillars_core.py
--- a/automotive/3d-object-detection/model/segmentation/__init__.py
+++ b/automotive/3d-object-detection/model/segmentation/__init__.py
+from .modeling import *
+from ._deeplab import convert_to_separable_conv
--- a/automotive/3d-object-detection/model/segmentation/_deeplab.py
+++ b/automotive/3d-object-detection/model/segmentation/_deeplab.py
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+from .utils import _SimpleSegmentationModel
+
+
+__all__ = ["DeepLabV3"]
+
+
+class DeepLabV3(_SimpleSegmentationModel):
+    """
+    Implements DeepLabV3 model from
+    `"Rethinking Atrous Convolution for Semantic Image Segmentation"
+    <https://arxiv.org/abs/1706.05587>`_.
+
+    Arguments:
+        backbone (nn.Module): the network used to compute the features for the model.
+            The backbone should return an OrderedDict[Tensor], with the key being
+            "out" for the last feature map used, and "aux" if an auxiliary classifier
+            is used.
+        classifier (nn.Module): module that takes the "out" element returned from
+            the backbone and returns a dense prediction.
+        aux_classifier (nn.Module, optional): auxiliary classifier used during training
+    """
+    pass
+
+
+class DeepLabHeadV3Plus(nn.Module):
+    def __init__(self, in_channels, low_level_channels,
+                 num_classes, aspp_dilate=[12, 24, 36]):
+        super(DeepLabHeadV3Plus, self).__init__()
+        self.project = nn.Sequential(
+            nn.Conv2d(low_level_channels, 48, 1, bias=False),
+            nn.BatchNorm2d(48),
+            nn.ReLU(inplace=True),
+        )
+
+        self.aspp = ASPP(in_channels, aspp_dilate)
+
+        self.classifier = nn.Sequential(
+            nn.Conv2d(304, 256, 3, padding=1, bias=False),
+            nn.BatchNorm2d(256),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(256, num_classes, 1)
+        )
+        self._init_weight()
+
+    def forward(self, feature):
+        low_level_feature = self.project(feature['low_level'])
+        output_feature = self.aspp(feature['out'])
+        output_feature = F.interpolate(output_feature,
+                                       size=low_level_feature.shape[2:],
+                                       mode='bilinear',
+                                       align_corners=False)
+        return self.classifier(
+            torch.cat([low_level_feature, output_feature], dim=1))
+
+    def _init_weight(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight)
+            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+
+
+class DeepLabHead(nn.Module):
+    def __init__(self, in_channels, num_classes, aspp_dilate=[12, 24, 36]):
+        super(DeepLabHead, self).__init__()
+
+        self.classifier = nn.Sequential(
+            ASPP(in_channels, aspp_dilate),
+            nn.Conv2d(256, 256, 3, padding=1, bias=False),
+            nn.BatchNorm2d(256),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(256, num_classes, 1)
+        )
+        self._init_weight()
+
+    def forward(self, feature):
+        return self.classifier(feature['out'])
+
+    def _init_weight(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight)
+            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+
+
+class AtrousSeparableConvolution(nn.Module):
+    """ Atrous Separable Convolution
+    """
+
+    def __init__(self, in_channels, out_channels, kernel_size,
+                 stride=1, padding=0, dilation=1, bias=True):
+        super(AtrousSeparableConvolution, self).__init__()
+        self.body = nn.Sequential(
+            # Separable Conv
+            nn.Conv2d(
+                in_channels,
+                in_channels,
+                kernel_size=kernel_size,
+                stride=stride,
+                padding=padding,
+                dilation=dilation,
+                bias=bias,
+                groups=in_channels),
+            # PointWise Conv
+            nn.Conv2d(
+                in_channels,
+                out_channels,
+                kernel_size=1,
+                stride=1,
+                padding=0,
+                bias=bias),
+        )
+
+        self._init_weight()
+
+    def forward(self, x):
+        return self.body(x)
+
+    def _init_weight(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight)
+            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+
+
+class ASPPConv(nn.Sequential):
+    def __init__(self, in_channels, out_channels, dilation):
+        modules = [
+            nn.Conv2d(
+                in_channels,
+                out_channels,
+                3,
+                padding=dilation,
+                dilation=dilation,
+                bias=False),
+            nn.BatchNorm2d(out_channels),
+            nn.ReLU(inplace=True)
+        ]
+        super(ASPPConv, self).__init__(*modules)
+
+
+class ASPPPooling(nn.Sequential):
+    def __init__(self, in_channels, out_channels):
+        super(ASPPPooling, self).__init__(
+            nn.AdaptiveAvgPool2d(1),
+            nn.Conv2d(in_channels, out_channels, 1, bias=False),
+            nn.BatchNorm2d(out_channels),
+            nn.ReLU(inplace=True))
+
+    def forward(self, x):
+        size = x.shape[-2:]
+        x = super(ASPPPooling, self).forward(x)
+        return F.interpolate(
+            x, size=size, mode='bilinear', align_corners=False)
+
+
+class ASPP(nn.Module):
+    def __init__(self, in_channels, atrous_rates):
+        super(ASPP, self).__init__()
+        out_channels = 256
+        modules = []
+        modules.append(nn.Sequential(
+            nn.Conv2d(in_channels, out_channels, 1, bias=False),
+            nn.BatchNorm2d(out_channels),
+            nn.ReLU(inplace=True)))
+
+        rate1, rate2, rate3 = tuple(atrous_rates)
+        modules.append(ASPPConv(in_channels, out_channels, rate1))
+        modules.append(ASPPConv(in_channels, out_channels, rate2))
+        modules.append(ASPPConv(in_channels, out_channels, rate3))
+        modules.append(ASPPPooling(in_channels, out_channels))
+
+        self.convs = nn.ModuleList(modules)
+
+        self.project = nn.Sequential(
+            nn.Conv2d(5 * out_channels, out_channels, 1, bias=False),
+            nn.BatchNorm2d(out_channels),
+            nn.ReLU(inplace=True),
+            nn.Dropout(0.1),)
+
+    def forward(self, x):
+        res = []
+        for conv in self.convs:
+            res.append(conv(x))
+        res = torch.cat(res, dim=1)
+        return self.project(res)
+
+
+def convert_to_separable_conv(module):
+    new_module = module
+    if isinstance(module, nn.Conv2d) and module.kernel_size[0] > 1:
+        new_module = AtrousSeparableConvolution(module.in_channels,
+                                                module.out_channels,
+                                                module.kernel_size,
+                                                module.stride,
+                                                module.padding,
+                                                module.dilation,
+                                                module.bias)
+    for name, child in module.named_children():
+        new_module.add_module(name, convert_to_separable_conv(child))
+    return new_module
--- a/automotive/3d-object-detection/model/segmentation/backbone/__init__.py
+++ b/automotive/3d-object-detection/model/segmentation/backbone/__init__.py
+from . import resnet
--- a/automotive/3d-object-detection/model/segmentation/backbone/resnet.py
+++ b/automotive/3d-object-detection/model/segmentation/backbone/resnet.py
+import torch
+import torch.nn as nn
+try:  # for torchvision<0.4
+    from torchvision.models.utils import load_state_dict_from_url
+except BaseException:  # for torchvision>=0.4
+    from torch.hub import load_state_dict_from_url
+
+
+__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
+           'resnet152', 'resnext50_32x4d', 'resnext101_32x8d',
+           'wide_resnet50_2', 'wide_resnet101_2']
+
+
+model_urls = {
+    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
+    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
+    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
+    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
+    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
+    'resnext50_32x4d': 'https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth',
+    'resnext101_32x8d': 'https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth',
+    'wide_resnet50_2': 'https://download.pytorch.org/models/wide_resnet50_2-95faca4d.pth',
+    'wide_resnet101_2': 'https://download.pytorch.org/models/wide_resnet101_2-32ee1156.pth',
+}
+
+
+def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
+    """3x3 convolution with padding"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
+                     padding=dilation, groups=groups, bias=False, dilation=dilation)
+
+
+def conv1x1(in_planes, out_planes, stride=1):
+    """1x1 convolution"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=1,
+                     stride=stride, bias=False)
+
+
+class BasicBlock(nn.Module):
+    expansion = 1
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
+                 base_width=64, dilation=1, norm_layer=None):
+        super(BasicBlock, self).__init__()
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        if groups != 1 or base_width != 64:
+            raise ValueError(
+                'BasicBlock only supports groups=1 and base_width=64')
+        if dilation > 1:
+            raise NotImplementedError(
+                "Dilation > 1 not supported in BasicBlock")
+        # Both self.conv1 and self.downsample layers downsample the input when
+        # stride != 1
+        self.conv1 = conv3x3(inplanes, planes, stride)
+        self.bn1 = norm_layer(planes)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes)
+        self.bn2 = norm_layer(planes)
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        identity = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+
+        if self.downsample is not None:
+            identity = self.downsample(x)
+
+        out += identity
+        out = self.relu(out)
+
+        return out
+
+
+class Bottleneck(nn.Module):
+    expansion = 4
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
+                 base_width=64, dilation=1, norm_layer=None):
+        super(Bottleneck, self).__init__()
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        width = int(planes * (base_width / 64.)) * groups
+        # Both self.conv2 and self.downsample layers downsample the input when
+        # stride != 1
+        self.conv1 = conv1x1(inplanes, width)
+        self.bn1 = norm_layer(width)
+        self.conv2 = conv3x3(width, width, stride, groups, dilation)
+        self.bn2 = norm_layer(width)
+        self.conv3 = conv1x1(width, planes * self.expansion)
+        self.bn3 = norm_layer(planes * self.expansion)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        identity = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+
+        out = self.conv3(out)
+        out = self.bn3(out)
+
+        if self.downsample is not None:
+            identity = self.downsample(x)
+
+        out += identity
+        out = self.relu(out)
+
+        return out
+
+
+class ResNet(nn.Module):
+
+    def __init__(self, block, layers, num_classes=1000, zero_init_residual=False,
+                 groups=1, width_per_group=64, replace_stride_with_dilation=None,
+                 norm_layer=None):
+        super(ResNet, self).__init__()
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        self._norm_layer = norm_layer
+
+        self.inplanes = 64
+        self.dilation = 1
+        if replace_stride_with_dilation is None:
+            # each element in the tuple indicates if we should replace
+            # the 2x2 stride with a dilated convolution instead
+            replace_stride_with_dilation = [False, False, False]
+        if len(replace_stride_with_dilation) != 3:
+            raise ValueError("replace_stride_with_dilation should be None "
+                             "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
+        self.groups = groups
+        self.base_width = width_per_group
+        self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
+                               bias=False)
+        self.bn1 = norm_layer(self.inplanes)
+        self.relu = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        self.layer1 = self._make_layer(block, 64, layers[0])
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
+                                       dilate=replace_stride_with_dilation[0])
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
+                                       dilate=replace_stride_with_dilation[1])
+        self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
+                                       dilate=replace_stride_with_dilation[2])
+        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
+        self.fc = nn.Linear(512 * block.expansion, num_classes)
+
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(
+                    m.weight, mode='fan_out', nonlinearity='relu')
+            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+
+        # Zero-initialize the last BN in each residual branch,
+        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
+        # This improves the model by 0.2~0.3% according to
+        # https://arxiv.org/abs/1706.02677
+        if zero_init_residual:
+            for m in self.modules():
+                if isinstance(m, Bottleneck):
+                    nn.init.constant_(m.bn3.weight, 0)
+                elif isinstance(m, BasicBlock):
+                    nn.init.constant_(m.bn2.weight, 0)
+
+    def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
+        norm_layer = self._norm_layer
+        downsample = None
+        previous_dilation = self.dilation
+        if dilate:
+            self.dilation *= stride
+            stride = 1
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                conv1x1(self.inplanes, planes * block.expansion, stride),
+                norm_layer(planes * block.expansion),
+            )
+
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
+                            self.base_width, previous_dilation, norm_layer))
+        self.inplanes = planes * block.expansion
+        for _ in range(1, blocks):
+            layers.append(block(self.inplanes, planes, groups=self.groups,
+                                base_width=self.base_width, dilation=self.dilation,
+                                norm_layer=norm_layer))
+
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+
+        x = self.avgpool(x)
+        x = torch.flatten(x, 1)
+        x = self.fc(x)
+
+        return x
+
+
+def _resnet(arch, block, layers, pretrained, progress, **kwargs):
+    model = ResNet(block, layers, **kwargs)
+    if pretrained:
+        state_dict = load_state_dict_from_url(model_urls[arch],
+                                              progress=progress)
+        model.load_state_dict(state_dict)
+    return model
+
+
+def resnet18(pretrained=False, progress=True, **kwargs):
+    r"""ResNet-18 model from
+    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _resnet('resnet18', BasicBlock, [2, 2, 2, 2], pretrained, progress,
+                   **kwargs)
+
+
+def resnet34(pretrained=False, progress=True, **kwargs):
+    r"""ResNet-34 model from
+    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _resnet('resnet34', BasicBlock, [3, 4, 6, 3], pretrained, progress,
+                   **kwargs)
+
+
+def resnet50(pretrained=False, progress=True, **kwargs):
+    r"""ResNet-50 model from
+    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _resnet('resnet50', Bottleneck, [3, 4, 6, 3], pretrained, progress,
+                   **kwargs)
+
+
+def resnet101(pretrained=False, progress=True, **kwargs):
+    r"""ResNet-101 model from
+    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _resnet('resnet101', Bottleneck, [3, 4, 23, 3], pretrained, progress,
+                   **kwargs)
+
+
+def resnet152(pretrained=False, progress=True, **kwargs):
+    r"""ResNet-152 model from
+    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _resnet('resnet152', Bottleneck, [3, 8, 36, 3], pretrained, progress,
+                   **kwargs)
+
+
+def resnext50_32x4d(pretrained=False, progress=True, **kwargs):
+    r"""ResNeXt-50 32x4d model from
+    `"Aggregated Residual Transformation for Deep Neural Networks" <https://arxiv.org/pdf/1611.05431.pdf>`_
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    kwargs['groups'] = 32
+    kwargs['width_per_group'] = 4
+    return _resnet('resnext50_32x4d', Bottleneck, [3, 4, 6, 3],
+                   pretrained, progress, **kwargs)
+
+
+def resnext101_32x8d(pretrained=False, progress=True, **kwargs):
+    r"""ResNeXt-101 32x8d model from
+    `"Aggregated Residual Transformation for Deep Neural Networks" <https://arxiv.org/pdf/1611.05431.pdf>`_
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    kwargs['groups'] = 32
+    kwargs['width_per_group'] = 8
+    return _resnet('resnext101_32x8d', Bottleneck, [3, 4, 23, 3],
+                   pretrained, progress, **kwargs)
+
+
+def wide_resnet50_2(pretrained=False, progress=True, **kwargs):
+    r"""Wide ResNet-50-2 model from
+    `"Wide Residual Networks" <https://arxiv.org/pdf/1605.07146.pdf>`_
+
+    The model is the same as ResNet except for the bottleneck number of channels
+    which is twice larger in every block. The number of channels in outer 1x1
+    convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048
+    channels, and in Wide ResNet-50-2 has 2048-1024-2048.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    kwargs['width_per_group'] = 64 * 2
+    return _resnet('wide_resnet50_2', Bottleneck, [3, 4, 6, 3],
+                   pretrained, progress, **kwargs)
+
+
+def wide_resnet101_2(pretrained=False, progress=True, **kwargs):
+    r"""Wide ResNet-101-2 model from
+    `"Wide Residual Networks" <https://arxiv.org/pdf/1605.07146.pdf>`_
+
+    The model is the same as ResNet except for the bottleneck number of channels
+    which is twice larger in every block. The number of channels in outer 1x1
+    convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048
+    channels, and in Wide ResNet-50-2 has 2048-1024-2048.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    kwargs['width_per_group'] = 64 * 2
+    return _resnet('wide_resnet101_2', Bottleneck, [3, 4, 23, 3],
+                   pretrained, progress, **kwargs)
--- a/automotive/3d-object-detection/model/segmentation/modeling.py
+++ b/automotive/3d-object-detection/model/segmentation/modeling.py
+from .utils import IntermediateLayerGetter
+from ._deeplab import DeepLabHead, DeepLabHeadV3Plus, DeepLabV3
+from .backbone import (resnet)
+
+
+def _segm_resnet(name, backbone_name, num_classes,
+                 output_stride, pretrained_backbone):
+
+    if output_stride == 8:
+        replace_stride_with_dilation = [False, True, True]
+        aspp_dilate = [12, 24, 36]
+    else:
+        replace_stride_with_dilation = [False, False, True]
+        aspp_dilate = [6, 12, 18]
+
+    backbone = resnet.__dict__[backbone_name](
+        pretrained=pretrained_backbone,
+        replace_stride_with_dilation=replace_stride_with_dilation)
+
+    inplanes = 2048
+    low_level_planes = 256
+
+    if name == 'deeplabv3plus':
+        return_layers = {'layer4': 'out', 'layer1': 'low_level'}
+        classifier = DeepLabHeadV3Plus(
+            inplanes, low_level_planes, num_classes, aspp_dilate)
+    elif name == 'deeplabv3':
+        return_layers = {'layer4': 'out'}
+        classifier = DeepLabHead(inplanes, num_classes, aspp_dilate)
+    backbone = IntermediateLayerGetter(backbone, return_layers=return_layers)
+
+    model = DeepLabV3(backbone, classifier)
+    return model
+
+
+def _load_model(arch_type, backbone, num_classes,
+                output_stride, pretrained_backbone):
+
+    if backbone.startswith('resnet'):
+        model = _segm_resnet(
+            arch_type,
+            backbone,
+            num_classes,
+            output_stride=output_stride,
+            pretrained_backbone=pretrained_backbone)
+    else:
+        raise NotImplementedError
+    return model
+
+
+# Deeplab v3
+def deeplabv3_resnet50(num_classes=21, output_stride=8,
+                       pretrained_backbone=True):
+    """Constructs a DeepLabV3 model with a ResNet-50 backbone.
+
+    Args:
+        num_classes (int): number of classes.
+        output_stride (int): output stride for deeplab.
+        pretrained_backbone (bool): If True, use the pretrained backbone.
+    """
+    return _load_model('deeplabv3', 'resnet50', num_classes,
+                       output_stride=output_stride, pretrained_backbone=pretrained_backbone)
+
+
+def deeplabv3_resnet101(num_classes=21, output_stride=8,
+                        pretrained_backbone=True):
+    """Constructs a DeepLabV3 model with a ResNet-101 backbone.
+
+    Args:
+        num_classes (int): number of classes.
+        output_stride (int): output stride for deeplab.
+        pretrained_backbone (bool): If True, use the pretrained backbone.
+    """
+    return _load_model('deeplabv3', 'resnet101', num_classes,
+                       output_stride=output_stride, pretrained_backbone=pretrained_backbone)
+
+# Deeplab v3+
+
+
+def deeplabv3plus_resnet50(
+        num_classes=21, output_stride=8, pretrained_backbone=True):
+    """Constructs a DeepLabV3 model with a ResNet-50 backbone.
+
+    Args:
+        num_classes (int): number of classes.
+        output_stride (int): output stride for deeplab.
+        pretrained_backbone (bool): If True, use the pretrained backbone.
+    """
+    return _load_model('deeplabv3plus', 'resnet50', num_classes,
+                       output_stride=output_stride, pretrained_backbone=pretrained_backbone)
+
+
+def deeplabv3plus_resnet101(
+        num_classes=21, output_stride=8, pretrained_backbone=True):
+    """Constructs a DeepLabV3+ model with a ResNet-101 backbone.
+
+    Args:
+        num_classes (int): number of classes.
+        output_stride (int): output stride for deeplab.
+        pretrained_backbone (bool): If True, use the pretrained backbone.
+    """
+    return _load_model('deeplabv3plus', 'resnet101', num_classes,
+                       output_stride=output_stride, pretrained_backbone=pretrained_backbone)
--- a/automotive/3d-object-detection/model/segmentation/utils.py
+++ b/automotive/3d-object-detection/model/segmentation/utils.py
+import torch
+import torch.nn as nn
+import numpy as np
+import torch.nn.functional as F
+from collections import OrderedDict
+
+
+class _SimpleSegmentationModel(nn.Module):
+    def __init__(self, backbone, classifier):
+        super(_SimpleSegmentationModel, self).__init__()
+        self.backbone = backbone
+        self.classifier = classifier
+
+    def forward(self, x):
+        input_shape = x.shape[-2:]
+        features = self.backbone(x)
+        x = self.classifier(features)
+        x = F.interpolate(
+            x,
+            size=input_shape,
+            mode='bilinear',
+            align_corners=False)
+        return x
+
+
+class IntermediateLayerGetter(nn.ModuleDict):
+    """
+    Module wrapper that returns intermediate layers from a model
+
+    It has a strong assumption that the modules have been registered
+    into the model in the same order as they are used.
+    This means that one should **not** reuse the same nn.Module
+    twice in the forward if you want this to work.
+
+    Additionally, it is only able to query submodules that are directly
+    assigned to the model. So if `model` is passed, `model.feature1` can
+    be returned, but not `model.feature1.layer2`.
+
+    Arguments:
+        model (nn.Module): model on which we will extract the features
+        return_layers (Dict[name, new_name]): a dict containing the names
+            of the modules for which the activations will be returned as
+            the key of the dict, and the value of the dict is the name
+            of the returned activation (which the user can specify).
+
+    Examples::
+
+        >>> m = torchvision.models.resnet18(pretrained=True)
+        >>> # extract layer1 and layer3, giving as names `feat1` and feat2`
+        >>> new_m = torchvision.models._utils.IntermediateLayerGetter(m,
+        >>>     {'layer1': 'feat1', 'layer3': 'feat2'})
+        >>> out = new_m(torch.rand(1, 3, 224, 224))
+        >>> print([(k, v.shape) for k, v in out.items()])
+        >>>     [('feat1', torch.Size([1, 64, 56, 56])),
+        >>>      ('feat2', torch.Size([1, 256, 14, 14]))]
+    """
+
+    def __init__(self, model, return_layers, hrnet_flag=False):
+        if not set(return_layers).issubset(
+                [name for name, _ in model.named_children()]):
+            raise ValueError("return_layers are not present in model")
+
+        self.hrnet_flag = hrnet_flag
+
+        orig_return_layers = return_layers
+        return_layers = {k: v for k, v in return_layers.items()}
+        layers = OrderedDict()
+        for name, module in model.named_children():
+            layers[name] = module
+            if name in return_layers:
+                del return_layers[name]
+            if not return_layers:
+                break
+
+        super(IntermediateLayerGetter, self).__init__(layers)
+        self.return_layers = orig_return_layers
+
+    def forward(self, x):
+        out = OrderedDict()
+        for name, module in self.named_children():
+            if self.hrnet_flag and name.startswith(
+                    'transition'):  # if using hrnet, you need to take care of transition
+                if name == 'transition1':  # in transition1, you need to split the module to two streams first
+                    x = [trans(x) for trans in module]
+                else:  # all other transition is just an extra one stream split
+                    x.append(module(x[-1]))
+            # other models (ex:resnet,mobilenet) are convolutions in series.
+            else:
+                x = module(x)
+
+            if name in self.return_layers:
+                out_name = self.return_layers[name]
+                if name == 'stage4' and self.hrnet_flag:  # In HRNetV2, we upsample and concat all outputs streams together
+                    # Upsample to size of highest resolution stream
+                    output_h, output_w = x[0].size(2), x[0].size(3)
+                    x1 = F.interpolate(
+                        x[1],
+                        size=(
+                            output_h,
+                            output_w),
+                        mode='bilinear',
+                        align_corners=False)
+                    x2 = F.interpolate(
+                        x[2],
+                        size=(
+                            output_h,
+                            output_w),
+                        mode='bilinear',
+                        align_corners=False)
+                    x3 = F.interpolate(
+                        x[3],
+                        size=(
+                            output_h,
+                            output_w),
+                        mode='bilinear',
+                        align_corners=False)
+                    x = torch.cat([x[0], x1, x2, x3], dim=1)
+                    out[out_name] = x
+                else:
+                    out[out_name] = x
+        return out
--- a/automotive/3d-object-detection/ops/__init__.py
+++ b/automotive/3d-object-detection/ops/__init__.py
+from .voxel_module import Voxelization
--- a/automotive/3d-object-detection/ops/iou3d/iou3d.cpp
+++ b/automotive/3d-object-detection/ops/iou3d/iou3d.cpp
+// Modified from
+// https://github.com/open-mmlab/OpenPCDet/blob/master/pcdet/ops/iou3d_nms/src/iou3d_nms.cpp
+
+/*
+3D IoU Calculation and Rotated NMS(modified from 2D NMS written by others)
+Written by Shaoshuai Shi
+All Rights Reserved 2019-2020.
+*/
+
+#include <cuda.h>
+#include <cuda_runtime_api.h>
+#include <torch/extension.h>
+#include <torch/serialize/tensor.h>
+
+#include <cstdint>
+#include <vector>
+
+#define CHECK_CUDA(x)                                                          \
+  TORCH_CHECK(x.device().is_cuda(), #x, " must be a CUDAtensor ")
+#define CHECK_CONTIGUOUS(x)                                                    \
+  TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ")
+#define CHECK_INPUT(x)                                                         \
+  CHECK_CUDA(x);                                                               \
+  CHECK_CONTIGUOUS(x)
+
+#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
+
+#define CHECK_ERROR(ans)                                                       \
+  { gpuAssert((ans), __FILE__, __LINE__); }
+inline void gpuAssert(cudaError_t code, const char *file, int line,
+                      bool abort = true) {
+  if (code != cudaSuccess) {
+    fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file,
+            line);
+    if (abort)
+      exit(code);
+  }
+}
+
+const int THREADS_PER_BLOCK_NMS = sizeof(unsigned long long) * 8;
+
+void boxesoverlapLauncher(const int num_a, const float *boxes_a,
+                          const int num_b, const float *boxes_b,
+                          float *ans_overlap);
+void boxesioubevLauncher(const int num_a, const float *boxes_a, const int num_b,
+                         const float *boxes_b, float *ans_iou);
+void nmsLauncher(const float *boxes, unsigned long long *mask, int boxes_num,
+                 float nms_overlap_thresh);
+void nmsNormalLauncher(const float *boxes, unsigned long long *mask,
+                       int boxes_num, float nms_overlap_thresh);
+
+int boxes_overlap_bev_gpu(at::Tensor boxes_a, at::Tensor boxes_b,
+                          at::Tensor ans_overlap) {
+  // params boxes_a: (N, 5) [x1, y1, x2, y2, ry]
+  // params boxes_b: (M, 5)
+  // params ans_overlap: (N, M)
+
+  CHECK_INPUT(boxes_a);
+  CHECK_INPUT(boxes_b);
+  CHECK_INPUT(ans_overlap);
+
+  int num_a = boxes_a.size(0);
+  int num_b = boxes_b.size(0);
+
+  const float *boxes_a_data = boxes_a.data_ptr<float>();
+  const float *boxes_b_data = boxes_b.data_ptr<float>();
+  float *ans_overlap_data = ans_overlap.data_ptr<float>();
+
+  boxesoverlapLauncher(num_a, boxes_a_data, num_b, boxes_b_data,
+                       ans_overlap_data);
+
+  return 1;
+}
+
+int boxes_iou_bev_gpu(at::Tensor boxes_a, at::Tensor boxes_b,
+                      at::Tensor ans_iou) {
+  // params boxes_a: (N, 5) [x1, y1, x2, y2, ry]
+  // params boxes_b: (M, 5)
+  // params ans_overlap: (N, M)
+
+  CHECK_INPUT(boxes_a);
+  CHECK_INPUT(boxes_b);
+  CHECK_INPUT(ans_iou);
+
+  int num_a = boxes_a.size(0);
+  int num_b = boxes_b.size(0);
+
+  const float *boxes_a_data = boxes_a.data_ptr<float>();
+  const float *boxes_b_data = boxes_b.data_ptr<float>();
+  float *ans_iou_data = ans_iou.data_ptr<float>();
+
+  boxesioubevLauncher(num_a, boxes_a_data, num_b, boxes_b_data, ans_iou_data);
+
+  return 1;
+}
+
+int nms_gpu(at::Tensor boxes, at::Tensor keep, float nms_overlap_thresh,
+            int device_id) {
+  // params boxes: (N, 5) [x1, y1, x2, y2, ry]
+  // params keep: (N)
+
+  CHECK_INPUT(boxes);
+  CHECK_CONTIGUOUS(keep);
+  cudaSetDevice(device_id);
+
+  int boxes_num = boxes.size(0);
+  const float *boxes_data = boxes.data_ptr<float>();
+  int64_t *keep_data = keep.data_ptr<int64_t>();
+
+  const int col_blocks = DIVUP(boxes_num, THREADS_PER_BLOCK_NMS);
+
+  unsigned long long *mask_data = NULL;
+  CHECK_ERROR(cudaMalloc((void **)&mask_data,
+                         boxes_num * col_blocks * sizeof(unsigned long long)));
+  nmsLauncher(boxes_data, mask_data, boxes_num, nms_overlap_thresh);
+
+  // unsigned long long mask_cpu[boxes_num * col_blocks];
+  // unsigned long long *mask_cpu = new unsigned long long [boxes_num *
+  // col_blocks];
+  std::vector<unsigned long long> mask_cpu(boxes_num * col_blocks);
+
+  //    printf("boxes_num=%d, col_blocks=%d\n", boxes_num, col_blocks);
+  CHECK_ERROR(cudaMemcpy(&mask_cpu[0], mask_data,
+                         boxes_num * col_blocks * sizeof(unsigned long long),
+                         cudaMemcpyDeviceToHost));
+
+  cudaFree(mask_data);
+
+  unsigned long long *remv_cpu = new unsigned long long[col_blocks]();
+
+  int num_to_keep = 0;
+
+  for (int i = 0; i < boxes_num; i++) {
+    int nblock = i / THREADS_PER_BLOCK_NMS;
+    int inblock = i % THREADS_PER_BLOCK_NMS;
+
+    if (!(remv_cpu[nblock] & (1ULL << inblock))) {
+      keep_data[num_to_keep++] = i;
+      unsigned long long *p = &mask_cpu[0] + i * col_blocks;
+      for (int j = nblock; j < col_blocks; j++) {
+        remv_cpu[j] |= p[j];
+      }
+    }
+  }
+  delete[] remv_cpu;
+  if (cudaSuccess != cudaGetLastError())
+    printf("Error!\n");
+
+  return num_to_keep;
+}
+
+int nms_normal_gpu(at::Tensor boxes, at::Tensor keep, float nms_overlap_thresh,
+                   int device_id) {
+  // params boxes: (N, 5) [x1, y1, x2, y2, ry]
+  // params keep: (N)
+
+  CHECK_INPUT(boxes);
+  CHECK_CONTIGUOUS(keep);
+  cudaSetDevice(device_id);
+
+  int boxes_num = boxes.size(0);
+  const float *boxes_data = boxes.data_ptr<float>();
+  int64_t *keep_data = keep.data_ptr<int64_t>();
+
+  const int col_blocks = DIVUP(boxes_num, THREADS_PER_BLOCK_NMS);
+
+  unsigned long long *mask_data = NULL;
+  CHECK_ERROR(cudaMalloc((void **)&mask_data,
+                         boxes_num * col_blocks * sizeof(unsigned long long)));
+  nmsNormalLauncher(boxes_data, mask_data, boxes_num, nms_overlap_thresh);
+
+  // unsigned long long mask_cpu[boxes_num * col_blocks];
+  // unsigned long long *mask_cpu = new unsigned long long [boxes_num *
+  // col_blocks];
+  std::vector<unsigned long long> mask_cpu(boxes_num * col_blocks);
+
+  //    printf("boxes_num=%d, col_blocks=%d\n", boxes_num, col_blocks);
+  CHECK_ERROR(cudaMemcpy(&mask_cpu[0], mask_data,
+                         boxes_num * col_blocks * sizeof(unsigned long long),
+                         cudaMemcpyDeviceToHost));
+
+  cudaFree(mask_data);
+
+  unsigned long long *remv_cpu = new unsigned long long[col_blocks]();
+
+  int num_to_keep = 0;
+
+  for (int i = 0; i < boxes_num; i++) {
+    int nblock = i / THREADS_PER_BLOCK_NMS;
+    int inblock = i % THREADS_PER_BLOCK_NMS;
+
+    if (!(remv_cpu[nblock] & (1ULL << inblock))) {
+      keep_data[num_to_keep++] = i;
+      unsigned long long *p = &mask_cpu[0] + i * col_blocks;
+      for (int j = nblock; j < col_blocks; j++) {
+        remv_cpu[j] |= p[j];
+      }
+    }
+  }
+  delete[] remv_cpu;
+  if (cudaSuccess != cudaGetLastError())
+    printf("Error!\n");
+
+  return num_to_keep;
+}
+
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+  m.def("boxes_overlap_bev_gpu", &boxes_overlap_bev_gpu,
+        "oriented boxes overlap");
+  m.def("boxes_iou_bev_gpu", &boxes_iou_bev_gpu, "oriented boxes iou");
+  m.def("nms_gpu", &nms_gpu, "oriented nms gpu");
+  m.def("nms_normal_gpu", &nms_normal_gpu, "nms gpu");
+}
--- a/automotive/3d-object-detection/ops/iou3d/iou3d_kernel.cu
+++ b/automotive/3d-object-detection/ops/iou3d/iou3d_kernel.cu
--- a/automotive/3d-object-detection/ops/voxel_module.py
+++ b/automotive/3d-object-detection/ops/voxel_module.py
--- a/automotive/3d-object-detection/output/mlperf_log_accuracy.json
+++ b/automotive/3d-object-detection/output/mlperf_log_accuracy.json
+[
\ No newline at end of file
--- a/automotive/3d-object-detection/output/mlperf_log_detail.txt
+++ b/automotive/3d-object-detection/output/mlperf_log_detail.txt
--- a/automotive/3d-object-detection/output/mlperf_log_summary.txt
+++ b/automotive/3d-object-detection/output/mlperf_log_summary.txt
--- a/automotive/3d-object-detection/output/mlperf_log_trace.json
+++ b/automotive/3d-object-detection/output/mlperf_log_trace.json
--- a/automotive/3d-object-detection/requirements.txt
+++ b/automotive/3d-object-detection/requirements.txt
+# TODO: Add requirements
--- a/automotive/3d-object-detection/tools/download_dataset.py
+++ b/automotive/3d-object-detection/tools/download_dataset.py
+# TODO: script to download dataset
--- a/automotive/3d-object-detection/tools/evaluate.py
+++ b/automotive/3d-object-detection/tools/evaluate.py
--- a/automotive/3d-object-detection/tools/process.py
+++ b/automotive/3d-object-detection/tools/process.py
--- a/automotive/3d-object-detection/user.conf
+++ b/automotive/3d-object-detection/user.conf
+# The format of this config file is 'key = value'.
+# The key has the format 'model.scenario.key'. Value is mostly int64_t.
+# Model maybe '*' as wildcard. In that case the value applies to all models.
+# All times are in milli seconds