v1.0: first commit

a8863510 · Yizhou Wang · 16d8dda7 · a8863510 · a8863510 · a8863510
Commit a8863510 authored Nov 10, 2020 by Yizhou Wang
20 changed files
--- a/rodnet/datasets/collate_functions.py
+++ b/rodnet/datasets/collate_functions.py
+import numpy as np
+import torch
+import re
+from torch._six import container_abcs, string_classes, int_classes
+
+np_str_obj_array_pattern = re.compile(r'[SaUO]')
+default_collate_err_msg_format = (
+    "default_collate: batch must contain tensors, numpy arrays, numbers, "
+    "dicts or lists; found {}")
+
+
+def cr_collate(batch):
+    r"""Puts each data field into a tensor with outer dimension batch size"""
+
+    elem = batch[0]
+    elem_type = type(elem)
+    if elem is None:
+        return None
+    elif isinstance(elem, torch.Tensor):
+        out = None
+        if torch.utils.data.get_worker_info() is not None:
+            # If we're in a background process, concatenate directly into a
+            # shared memory tensor to avoid an extra copy
+            numel = sum([x.numel() for x in batch])
+            storage = elem.storage()._new_shared(numel)
+            out = elem.new(storage)
+        return torch.stack(batch, 0, out=out)
+    elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' \
+            and elem_type.__name__ != 'string_':
+        elem = batch[0]
+        if elem_type.__name__ == 'ndarray':
+            # array of string classes and object
+            if np_str_obj_array_pattern.search(elem.dtype.str) is not None:
+                raise TypeError(default_collate_err_msg_format.format(elem.dtype))
+
+            return cr_collate([torch.as_tensor(b) for b in batch])
+        elif elem.shape == ():  # scalars
+            return torch.as_tensor(batch)
+    elif isinstance(elem, float):
+        return torch.tensor(batch, dtype=torch.float64)
+    elif isinstance(elem, bool):
+        return all(batch)
+    elif isinstance(elem, int_classes):
+        return torch.tensor(batch)
+    elif isinstance(elem, string_classes):
+        return batch
+    elif isinstance(elem, container_abcs.Mapping):
+        return {key: cr_collate([d[key] for d in batch]) for key in elem}
+    elif isinstance(elem, tuple) and hasattr(elem, '_fields'):  # namedtuple
+        return elem_type(*(cr_collate(samples) for samples in zip(*batch)))
+    elif isinstance(elem, container_abcs.Sequence):
+        # transposed = zip(*batch)
+        # return [cr_collate(samples) for samples in transposed]
+        return batch
+
+    raise TypeError(default_collate_err_msg_format.format(elem_type))
+
+
+def _cr_collate_npy(batch):
+    r"""Puts each data field into a tensor with outer dimension batch size"""
+
+    elem = batch[0]
+    elem_type = type(elem)
+    if elem_type.__name__ == 'ndarray':
+        return np.stack(batch, 0)
+    elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' \
+            and elem_type.__name__ != 'string_':
+        elem = batch[0]
+        if elem_type.__name__ == 'ndarray':
+            # array of string classes and object
+            if np_str_obj_array_pattern.search(elem.dtype.str) is not None:
+                raise TypeError(default_collate_err_msg_format.format(elem.dtype))
+
+            return _cr_collate_npy([b for b in batch])
+        elif elem.shape == ():  # scalars
+            return batch
+    elif isinstance(elem, float):
+        return batch
+    elif isinstance(elem, bool):
+        return all(batch)
+    elif isinstance(elem, int_classes):
+        return batch
+    elif isinstance(elem, string_classes):
+        return batch
+    elif isinstance(elem, container_abcs.Mapping):
+        return {key: _cr_collate_npy([d[key] for d in batch]) for key in elem}
+    elif isinstance(elem, tuple) and hasattr(elem, '_fields'):  # namedtuple
+        return elem_type(*(_cr_collate_npy(samples) for samples in zip(*batch)))
+    elif isinstance(elem, container_abcs.Sequence):
+        # transposed = zip(*batch)
+        # return [cr_collate(samples) for samples in transposed]
+        return batch
+
+    raise TypeError(default_collate_err_msg_format.format(elem_type))
--- a/rodnet/datasets/loaders/__init__.py
+++ b/rodnet/datasets/loaders/__init__.py
+from .parse_pkl import list_pkl_filenames
+from .read_rod_results import load_rodnet_res, load_vgg_res
--- a/rodnet/datasets/loaders/parse_pkl.py
+++ b/rodnet/datasets/loaders/parse_pkl.py
+import os
+
+
+def list_pkl_filenames(dataset_configs, split):
+    data_root = dataset_configs['data_root']
+    seqs = dataset_configs[split]['seqs']
+    seqs_pkl_names = [name + '.pkl' for name in seqs]
+    return seqs_pkl_names
--- a/rodnet/models/__init__.py
+++ b/rodnet/models/__init__.py
+from .rodnet_cdc import RODNetCDC
+from .rodnet_hg import RODNetHG
+from .rodnet_hgwi import RODNetHGwI
--- a/rodnet/models/backbones/__init__.py
+++ b/rodnet/models/backbones/__init__.py
--- a/rodnet/models/backbones/cdc.py
+++ b/rodnet/models/backbones/cdc.py
+import torch.nn as nn
+
+
+class RODEncode(nn.Module):
+
+    def __init__(self, in_channels=2):
+        super(RODEncode, self).__init__()
+        self.conv1a = nn.Conv3d(in_channels=in_channels, out_channels=64,
+                                kernel_size=(9, 5, 5), stride=(1, 1, 1), padding=(4, 2, 2))
+        self.conv1b = nn.Conv3d(in_channels=64, out_channels=64,
+                                kernel_size=(9, 5, 5), stride=(2, 2, 2), padding=(4, 2, 2))
+        self.conv2a = nn.Conv3d(in_channels=64, out_channels=128,
+                                kernel_size=(9, 5, 5), stride=(1, 1, 1), padding=(4, 2, 2))
+        self.conv2b = nn.Conv3d(in_channels=128, out_channels=128,
+                                kernel_size=(9, 5, 5), stride=(2, 2, 2), padding=(4, 2, 2))
+        self.conv3a = nn.Conv3d(in_channels=128, out_channels=256,
+                                kernel_size=(9, 5, 5), stride=(1, 1, 1), padding=(4, 2, 2))
+        self.conv3b = nn.Conv3d(in_channels=256, out_channels=256,
+                                kernel_size=(9, 5, 5), stride=(1, 2, 2), padding=(4, 2, 2))
+        self.bn1a = nn.BatchNorm3d(num_features=64)
+        self.bn1b = nn.BatchNorm3d(num_features=64)
+        self.bn2a = nn.BatchNorm3d(num_features=128)
+        self.bn2b = nn.BatchNorm3d(num_features=128)
+        self.bn3a = nn.BatchNorm3d(num_features=256)
+        self.bn3b = nn.BatchNorm3d(num_features=256)
+        self.relu = nn.ReLU()
+
+    def forward(self, x):
+        x = self.relu(self.bn1a(self.conv1a(x)))  # (B, 2, W, 128, 128) -> (B, 64, W, 128, 128)
+        x = self.relu(self.bn1b(self.conv1b(x)))  # (B, 64, W, 128, 128) -> (B, 64, W/2, 64, 64)
+        x = self.relu(self.bn2a(self.conv2a(x)))  # (B, 64, W/2, 64, 64) -> (B, 128, W/2, 64, 64)
+        x = self.relu(self.bn2b(self.conv2b(x)))  # (B, 128, W/2, 64, 64) -> (B, 128, W/4, 32, 32)
+        x = self.relu(self.bn3a(self.conv3a(x)))  # (B, 128, W/4, 32, 32) -> (B, 256, W/4, 32, 32)
+        x = self.relu(self.bn3b(self.conv3b(x)))  # (B, 256, W/4, 32, 32) -> (B, 256, W/4, 16, 16)
+
+        return x
+
+
+class RODDecode(nn.Module):
+
+    def __init__(self, n_class):
+        super(RODDecode, self).__init__()
+        self.convt1 = nn.ConvTranspose3d(in_channels=256, out_channels=128,
+                                         kernel_size=(4, 6, 6), stride=(2, 2, 2), padding=(1, 2, 2))
+        self.convt2 = nn.ConvTranspose3d(in_channels=128, out_channels=64,
+                                         kernel_size=(4, 6, 6), stride=(2, 2, 2), padding=(1, 2, 2))
+        self.convt3 = nn.ConvTranspose3d(in_channels=64, out_channels=n_class,
+                                         kernel_size=(3, 6, 6), stride=(1, 2, 2), padding=(1, 2, 2))
+        self.prelu = nn.PReLU()
+        self.sigmoid = nn.Sigmoid()
+        # self.upsample = nn.Upsample(size=(rodnet_configs['win_size'], radar_configs['ramap_rsize'],
+        #                                   radar_configs['ramap_asize']), mode='nearest')
+
+    def forward(self, x):
+        x = self.prelu(self.convt1(x))  # (B, 256, W/4, 16, 16) -> (B, 128, W/2, 32, 32)
+        x = self.prelu(self.convt2(x))  # (B, 128, W/2, 32, 32) -> (B, 64, W, 64, 64)
+        x = self.convt3(x)  # (B, 64, W, 64, 64) -> (B, 3, W, 128, 128)
+        return x
--- a/rodnet/models/backbones/hg.py
+++ b/rodnet/models/backbones/hg.py
+import torch.nn as nn
+
+
+class RadarStackedHourglass(nn.Module):
+
+    def __init__(self, n_class, stacked_num=1, in_channels=2):
+        super(RadarStackedHourglass, self).__init__()
+        self.stacked_num = stacked_num
+        self.conv1a = nn.Conv3d(in_channels=in_channels, out_channels=32,
+                                kernel_size=(9, 5, 5), stride=(1, 1, 1), padding=(4, 2, 2))
+        self.conv1b = nn.Conv3d(in_channels=32, out_channels=64,
+                                kernel_size=(9, 5, 5), stride=(1, 1, 1), padding=(4, 2, 2))
+
+        self.hourglass = []
+        for i in range(stacked_num):
+            self.hourglass.append(nn.ModuleList([RODEncode(), RODDecode(),
+                                                 nn.Conv3d(in_channels=64, out_channels=n_class,
+                                                           kernel_size=(9, 5, 5), stride=(1, 1, 1),
+                                                           padding=(4, 2, 2)),
+                                                 nn.Conv3d(in_channels=n_class, out_channels=64,
+                                                           kernel_size=(9, 5, 5), stride=(1, 1, 1),
+                                                           padding=(4, 2, 2))]))
+        self.hourglass = nn.ModuleList(self.hourglass)
+        self.relu = nn.ReLU()
+        self.bn1a = nn.BatchNorm3d(num_features=32)
+        self.bn1b = nn.BatchNorm3d(num_features=64)
+        self.sigmoid = nn.Sigmoid()
+
+    def forward(self, x):
+        x = self.relu(self.bn1a(self.conv1a(x)))
+        x = self.relu(self.bn1b(self.conv1b(x)))
+
+        out = []
+        for i in range(self.stacked_num):
+            x, x1, x2, x3 = self.hourglass[i][0](x)
+            x = self.hourglass[i][1](x, x1, x2, x3)
+            confmap = self.hourglass[i][2](x)
+            out.append(self.sigmoid(confmap))
+            if i < self.stacked_num - 1:
+                confmap_ = self.hourglass[i][3](confmap)
+                x = x + confmap_
+
+        return out
+
+
+class RODEncode(nn.Module):
+
+    def __init__(self):
+        super(RODEncode, self).__init__()
+        self.conv1a = nn.Conv3d(in_channels=64, out_channels=64,
+                                kernel_size=(9, 5, 5), stride=(1, 1, 1), padding=(4, 2, 2))
+        self.conv1b = nn.Conv3d(in_channels=64, out_channels=64,
+                                kernel_size=(9, 5, 5), stride=(2, 2, 2), padding=(4, 2, 2))
+        self.conv2a = nn.Conv3d(in_channels=64, out_channels=128,
+                                kernel_size=(9, 5, 5), stride=(1, 1, 1), padding=(4, 2, 2))
+        self.conv2b = nn.Conv3d(in_channels=128, out_channels=128,
+                                kernel_size=(9, 5, 5), stride=(2, 2, 2), padding=(4, 2, 2))
+        self.conv3a = nn.Conv3d(in_channels=128, out_channels=256,
+                                kernel_size=(9, 5, 5), stride=(1, 1, 1), padding=(4, 2, 2))
+        self.conv3b = nn.Conv3d(in_channels=256, out_channels=256,
+                                kernel_size=(9, 5, 5), stride=(1, 2, 2), padding=(4, 2, 2))
+
+        self.skipconv1a = nn.Conv3d(in_channels=64, out_channels=64,
+                                    kernel_size=(9, 5, 5), stride=(1, 1, 1), padding=(4, 2, 2))
+        self.skipconv1b = nn.Conv3d(in_channels=64, out_channels=64,
+                                    kernel_size=(9, 5, 5), stride=(2, 2, 2), padding=(4, 2, 2))
+        self.skipconv2a = nn.Conv3d(in_channels=64, out_channels=128,
+                                    kernel_size=(9, 5, 5), stride=(1, 1, 1), padding=(4, 2, 2))
+        self.skipconv2b = nn.Conv3d(in_channels=128, out_channels=128,
+                                    kernel_size=(9, 5, 5), stride=(2, 2, 2), padding=(4, 2, 2))
+        self.skipconv3a = nn.Conv3d(in_channels=128, out_channels=256,
+                                    kernel_size=(9, 5, 5), stride=(1, 1, 1), padding=(4, 2, 2))
+        self.skipconv3b = nn.Conv3d(in_channels=256, out_channels=256,
+                                    kernel_size=(9, 5, 5), stride=(1, 2, 2), padding=(4, 2, 2))
+        self.bn1a = nn.BatchNorm3d(num_features=64)
+        self.bn1b = nn.BatchNorm3d(num_features=64)
+        self.bn2a = nn.BatchNorm3d(num_features=128)
+        self.bn2b = nn.BatchNorm3d(num_features=128)
+        self.bn3a = nn.BatchNorm3d(num_features=256)
+        self.bn3b = nn.BatchNorm3d(num_features=256)
+
+        self.skipbn1a = nn.BatchNorm3d(num_features=64)
+        self.skipbn1b = nn.BatchNorm3d(num_features=64)
+        self.skipbn2a = nn.BatchNorm3d(num_features=128)
+        self.skipbn2b = nn.BatchNorm3d(num_features=128)
+        self.skipbn3a = nn.BatchNorm3d(num_features=256)
+        self.skipbn3b = nn.BatchNorm3d(num_features=256)
+
+        self.relu = nn.ReLU()
+
+    def forward(self, x):
+        x1 = self.relu(self.skipbn1a(self.skipconv1a(x)))
+        x1 = self.relu(self.skipbn1b(self.skipconv1b(x1)))
+        x = self.relu(self.bn1a(self.conv1a(x)))  # (B, 2, W, 128, 128) -> (B, 64, W, 128, 128)
+        x = self.relu(self.bn1b(self.conv1b(x)))  # (B, 64, W, 128, 128) -> (B, 64, W/2, 64, 64)
+
+        x2 = self.relu(self.skipbn2a(self.skipconv2a(x)))
+        x2 = self.relu(self.skipbn2b(self.skipconv2b(x2)))
+        x = self.relu(self.bn2a(self.conv2a(x)))  # (B, 64, W/2, 64, 64) -> (B, 128, W/2, 64, 64)
+        x = self.relu(self.bn2b(self.conv2b(x)))  # (B, 128, W/2, 64, 64) -> (B, 128, W/4, 32, 32)
+
+        x3 = self.relu(self.skipbn3a(self.skipconv3a(x)))
+        x3 = self.relu(self.skipbn3b(self.skipconv3b(x3)))
+        x = self.relu(self.bn3a(self.conv3a(x)))  # (B, 128, W/4, 32, 32) -> (B, 256, W/4, 32, 32)
+        x = self.relu(self.bn3b(self.conv3b(x)))  # (B, 256, W/4, 32, 32) -> (B, 256, W/4, 16, 16)
+
+        return x, x1, x2, x3
+
+
+class RODDecode(nn.Module):
+
+    def __init__(self):
+        super(RODDecode, self).__init__()
+        self.convt1 = nn.ConvTranspose3d(in_channels=256, out_channels=128,
+                                         kernel_size=(3, 6, 6), stride=(1, 2, 2), padding=(1, 2, 2))
+        self.convt2 = nn.ConvTranspose3d(in_channels=128, out_channels=64,
+                                         kernel_size=(4, 6, 6), stride=(2, 2, 2), padding=(1, 2, 2))
+        self.convt3 = nn.ConvTranspose3d(in_channels=64, out_channels=64,
+                                         kernel_size=(4, 6, 6), stride=(2, 2, 2), padding=(1, 2, 2))
+        self.prelu = nn.PReLU()
+        self.sigmoid = nn.Sigmoid()
+        # self.upsample = nn.Upsample(size=(rodnet_configs['win_size'], radar_configs['ramap_rsize'],
+        #                                   radar_configs['ramap_asize']), mode='nearest')
+
+    def forward(self, x, x1, x2, x3):
+        x = self.prelu(self.convt1(x + x3))  # (B, 256, W/4, 16, 16) -> (B, 128, W/2, 32, 32)
+        x = self.prelu(self.convt2(x + x2))  # (B, 128, W/2, 32, 32) -> (B, 64, W, 64, 64)
+        x = self.convt3(x + x1)  # (B, 64, W, 64, 64) -> (B, 3, W, 128, 128)
+        return x
--- a/rodnet/models/backbones/hgwi.py
+++ b/rodnet/models/backbones/hgwi.py
+import torch
+import torch.nn as nn
+
+
+class RadarStackedHourglass(nn.Module):
+
+    def __init__(self, n_class, stacked_num=1):
+        super(RadarStackedHourglass, self).__init__()
+        self.stacked_num = stacked_num
+        self.conv1a = nn.Conv3d(in_channels=2, out_channels=32,
+                                kernel_size=(9, 5, 5), stride=(1, 1, 1), padding=(4, 2, 2))
+        self.conv1b = nn.Conv3d(in_channels=32, out_channels=64,
+                                kernel_size=(9, 5, 5), stride=(1, 1, 1), padding=(4, 2, 2))
+        self.conv1c = nn.Conv3d(in_channels=64, out_channels=160,
+                                kernel_size=(9, 5, 5), stride=(1, 1, 1), padding=(4, 2, 2))
+
+        self.hourglass = []
+        for i in range(stacked_num):
+            self.hourglass.append(nn.ModuleList([RODEncode(), RODDecode(),
+                                                 nn.Conv3d(in_channels=160, out_channels=n_class,
+                                                           kernel_size=(9, 5, 5), stride=(1, 1, 1),
+                                                           padding=(4, 2, 2)),
+                                                 nn.Conv3d(in_channels=n_class, out_channels=160,
+                                                           kernel_size=(9, 5, 5), stride=(1, 1, 1),
+                                                           padding=(4, 2, 2))]))
+        self.hourglass = nn.ModuleList(self.hourglass)
+        self.relu = nn.ReLU()
+        self.bn1a = nn.BatchNorm3d(num_features=32)
+        self.bn1b = nn.BatchNorm3d(num_features=64)
+        self.bn1c = nn.BatchNorm3d(num_features=160)
+        self.sigmoid = nn.Sigmoid()
+
+    def forward(self, x):
+        x = self.relu(self.bn1a(self.conv1a(x)))
+        x = self.relu(self.bn1b(self.conv1b(x)))
+        x = self.relu(self.bn1c(self.conv1c(x)))
+
+        out = []
+        for i in range(self.stacked_num):
+            x, x1, x2, x3 = self.hourglass[i][0](x)
+            x = self.hourglass[i][1](x, x1, x2, x3)
+            confmap = self.hourglass[i][2](x)
+            out.append(self.sigmoid(confmap))
+            if i < self.stacked_num - 1:
+                confmap_ = self.hourglass[i][3](confmap)
+                x = x + confmap_
+
+        return out
+
+
+class InceptionLayerConcat(nn.Module):
+    """
+    Kernal size: for 2d kernal size, since the kernal size in temporal domain will be fixed
+    """
+
+    def __init__(self, kernal_size, in_channel, stride):
+        super(InceptionLayerConcat, self).__init__()
+
+        paddingX = kernal_size[0] // 2
+        paddingY = kernal_size[1] // 2
+
+        self.branch1 = nn.Conv3d(in_channels=in_channel, out_channels=32,
+                                 kernel_size=(5, kernal_size[0], kernal_size[1]), stride=stride,
+                                 padding=(2, paddingX, paddingY))
+        self.branch2a = nn.Conv3d(in_channels=in_channel, out_channels=64,
+                                  kernel_size=(5, kernal_size[0], kernal_size[1]), stride=(1, 1, 1),
+                                  padding=(2, paddingX, paddingY))
+        self.branch2b = nn.Conv3d(in_channels=64, out_channels=64,
+                                  kernel_size=(9, kernal_size[0], kernal_size[1]), stride=stride,
+                                  padding=(4, paddingX, paddingY))
+        self.branch3a = nn.Conv3d(in_channels=in_channel, out_channels=64,
+                                  kernel_size=(5, kernal_size[0], kernal_size[1]), stride=(1, 1, 1),
+                                  padding=(2, paddingX, paddingY))
+        self.branch3b = nn.Conv3d(in_channels=64, out_channels=64,
+                                  kernel_size=(13, kernal_size[0], kernal_size[1]), stride=stride,
+                                  padding=(6, paddingX, paddingY))
+
+    def forward(self, x):
+        branch1 = self.branch1(x)
+
+        branch2 = self.branch2a(x)
+        branch2 = self.branch2b(branch2)
+
+        branch3 = self.branch3a(x)
+        branch3 = self.branch3b(branch3)
+
+        return torch.cat((branch1, branch2, branch3), 1)
+
+
+class RODEncode(nn.Module):
+
+    def __init__(self):
+        super(RODEncode, self).__init__()
+        self.inception1 = InceptionLayerConcat(kernal_size=(5, 5), in_channel=160, stride=(1, 2, 2))
+        self.inception2 = InceptionLayerConcat(kernal_size=(5, 5), in_channel=160, stride=(1, 2, 2))
+        self.inception3 = InceptionLayerConcat(kernal_size=(5, 5), in_channel=160, stride=(1, 2, 2))
+
+        self.skip_inception1 = InceptionLayerConcat(kernal_size=(5, 5), in_channel=160, stride=(1, 2, 2))
+        self.skip_inception2 = InceptionLayerConcat(kernal_size=(5, 5), in_channel=160, stride=(1, 2, 2))
+        self.skip_inception3 = InceptionLayerConcat(kernal_size=(5, 5), in_channel=160, stride=(1, 2, 2))
+        # self.conv4a = nn.Conv3d(in_channels=64, out_channels=64,
+        #                         kernel_size=(9, 5, 5), stride=(1, 1, 1), padding=(4, 2, 2))
+        # self.conv4b = nn.Conv3d(in_channels=64, out_channels=64,
+        #                         kernel_size=(9, 5, 5), stride=(1, 2, 2), padding=(4, 2, 2))
+        # self.conv5a = nn.Conv3d(in_channels=64, out_channels=64,
+        #                         kernel_size=(9, 5, 5), stride=(1, 1, 1), padding=(4, 2, 2))
+        # self.conv5b = nn.Conv3d(in_channels=64, out_channels=64,
+        #                         kernel_size=(9, 5, 5), stride=(1, 2, 2), padding=(4, 2, 2))
+        self.bn1 = nn.BatchNorm3d(num_features=160)
+        self.bn2 = nn.BatchNorm3d(num_features=160)
+        self.bn3 = nn.BatchNorm3d(num_features=160)
+
+        self.skip_bn1 = nn.BatchNorm3d(num_features=160)
+        self.skip_bn2 = nn.BatchNorm3d(num_features=160)
+        self.skip_bn3 = nn.BatchNorm3d(num_features=160)
+        # self.bn4a = nn.BatchNorm3d(num_features=64)
+        # self.bn4b = nn.BatchNorm3d(num_features=64)
+        # self.bn5a = nn.BatchNorm3d(num_features=64)
+        # self.bn5b = nn.BatchNorm3d(num_features=64)
+        self.relu = nn.ReLU()
+
+    def forward(self, x):
+        x1 = self.relu(self.skip_bn1(self.skip_inception1(x)))
+        x = self.relu(self.bn1(self.inception1(x)))  # (B, 2, W, 128, 128) -> (B, 64, W, 128, 128)
+
+        x2 = self.relu(self.skip_bn2(self.skip_inception2(x)))
+        x = self.relu(self.bn2(self.inception2(x)))  # (B, 2, W, 128, 128) -> (B, 64, W, 128, 128)
+
+        x3 = self.relu(self.skip_bn3(self.skip_inception3(x)))
+        x = self.relu(self.bn3(self.inception3(x)))  # (B, 2, W, 128, 128) -> (B, 64, W, 128, 128)
+
+        return x, x1, x2, x3
+
+
+class RODDecode(nn.Module):
+
+    def __init__(self):
+        super(RODDecode, self).__init__()
+        self.convt1 = nn.ConvTranspose3d(in_channels=160, out_channels=160,
+                                         kernel_size=(3, 6, 6), stride=(1, 2, 2), padding=(1, 2, 2))
+        self.convt2 = nn.ConvTranspose3d(in_channels=160, out_channels=160,
+                                         kernel_size=(3, 6, 6), stride=(1, 2, 2), padding=(1, 2, 2))
+        self.convt3 = nn.ConvTranspose3d(in_channels=160, out_channels=160,
+                                         kernel_size=(3, 6, 6), stride=(1, 2, 2), padding=(1, 2, 2))
+        self.conv1 = nn.Conv3d(in_channels=160, out_channels=160,
+                               kernel_size=(9, 5, 5), stride=(1, 1, 1), padding=(4, 2, 2))
+        self.conv2 = nn.Conv3d(in_channels=160, out_channels=160,
+                               kernel_size=(9, 5, 5), stride=(1, 1, 1), padding=(4, 2, 2))
+        self.conv3 = nn.Conv3d(in_channels=160, out_channels=160,
+                               kernel_size=(9, 5, 5), stride=(1, 1, 1), padding=(4, 2, 2))
+        self.prelu = nn.PReLU()
+        self.sigmoid = nn.Sigmoid()
+        # self.upsample = nn.Upsample(size=(rodnet_configs['win_size'], radar_configs['ramap_rsize'],
+        #                                   radar_configs['ramap_asize']), mode='nearest')
+
+    def forward(self, x, x1, x2, x3):
+        x = self.prelu(self.convt1(x + x3))  # (B, 256, W/4, 16, 16) -> (B, 128, W/2, 32, 32)
+        x = self.prelu(self.conv1(x))
+        x = self.prelu(self.convt2(x + x2))  # (B, 128, W/2, 32, 32) -> (B, 64, W, 64, 64)
+        x = self.prelu(self.conv2(x))
+        x = self.prelu(self.convt3(x + x1))  # (B, 64, W, 64, 64) -> (B, 3, W, 128, 128)
+        x = self.prelu(self.conv3(x))
+        return x
--- a/rodnet/models/losses/__init__.py
+++ b/rodnet/models/losses/__init__.py
--- a/rodnet/models/losses/loss.py
+++ b/rodnet/models/losses/loss.py
+from __future__ import print_function
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.autograd import Variable
+
+
+def one_hot_embedding(labels, num_classes):
+    """Embedding labels to one-hot form.
+    Args:
+      labels: (LongTensor) class labels, sized [N,].
+      num_classes: (int) number of classes.
+    Returns:
+      (tensor) encoded labels, sized [N,#classes].
+    """
+    y = torch.eye(num_classes)  # [D,D]
+    return y[labels]  # [N,D]
+
+
+# def _neg_loss(pred, gt):
+#     ''' Modified focal loss. Exactly the same as CornerNet.
+#         Runs faster and costs a little bit more memory
+#     Arguments:
+#         pred (batch x c x h x w)
+#         gt_regr (batch x c x h x w)
+#     '''
+#     pos_inds = gt.eq(1).float()
+#     neg_inds = gt.lt(1).float()
+#
+#     neg_weights = torch.pow(1 - gt, 4)
+#
+#     loss = 0
+#
+#     pos_loss = torch.log(pred) * torch.pow(1 - pred, 2) * pos_inds
+#     neg_loss = torch.log(1 - pred) * torch.pow(pred, 2) * neg_weights * neg_inds
+#
+#     num_pos = pos_inds.float().sum()
+#     pos_loss = pos_loss.sum()
+#     neg_loss = neg_loss.sum()
+#
+#     if num_pos == 0:
+#         loss = loss - neg_loss
+#     else:
+#         loss = loss - (pos_loss + neg_loss) / num_pos
+#     return loss
+#
+#
+# class FocalLoss(nn.Module):
+#     '''nn.Module warpper for focal loss'''
+#
+#     def __init__(self):
+#         super(FocalLoss, self).__init__()
+#         self.neg_loss = _neg_loss
+#
+#     def forward(self, out, target):
+#         return self.neg_loss(out, target)
+#
+#
+# class FocalLoss(nn.Module):
+#
+#     def __init__(self, focusing_param=2, balance_param=0.25):
+#         super(FocalLoss, self).__init__()
+#
+#         self.focusing_param = focusing_param
+#         self.balance_param = balance_param
+#
+#     def forward(self, output, target):
+#         cross_entropy = F.cross_entropy(output, target)
+#         cross_entropy_log = torch.log(cross_entropy)
+#         logpt = - F.cross_entropy(output, target)
+#         pt = torch.exp(logpt)
+#
+#         focal_loss = -((1 - pt) ** self.focusing_param) * logpt
+#
+#         balanced_focal_loss = self.balance_param * focal_loss
+#
+#         return balanced_focal_loss
+
+
+class FocalLoss(nn.Module):
+    def __init__(self, num_classes=20):
+        super(FocalLoss, self).__init__()
+        self.num_classes = num_classes
+
+    def focal_loss(self, x, y):
+        """Focal loss.
+        Args:
+          x: (tensor) sized [N,D].
+          y: (tensor) sized [N,].
+        Return:
+          (tensor) focal loss.
+        """
+        alpha = 0.25
+        gamma = 2
+
+        t = one_hot_embedding(y.data.cpu(), 1 + self.num_classes)  # [N,21]
+        t = t[:, 1:]  # exclude background
+        t = Variable(t).cuda()  # [N,20]
+
+        p = x.sigmoid()
+        pt = p * t + (1 - p) * (1 - t)  # pt = p if t > 0 else 1-p
+        w = alpha * t + (1 - alpha) * (1 - t)  # w = alpha if t > 0 else 1-alpha
+        w = w * (1 - pt).pow(gamma)
+        return F.binary_cross_entropy_with_logits(x, t, w, size_average=False)
+
+    def focal_loss_alt(self, x, y):
+        """Focal loss alternative.
+        Args:
+          x: (tensor) sized [N,D].
+          y: (tensor) sized [N,].
+        Return:
+          (tensor) focal loss.
+        """
+        alpha = 0.25
+
+        t = one_hot_embedding(y.data.cpu(), 1 + self.num_classes)
+        t = t[:, 1:]
+        t = Variable(t).cuda()
+
+        xt = x * (2 * t - 1)  # xt = x if t > 0 else -x
+        pt = (2 * xt + 1).sigmoid()
+
+        w = alpha * t + (1 - alpha) * (1 - t)
+        loss = -w * pt.log() / 2
+        return loss.sum()
+
+    def forward(self, loc_preds, loc_targets, cls_preds, cls_targets):
+        """Compute loss between (loc_preds, loc_targets) and (cls_preds, cls_targets).
+        Args:
+          loc_preds: (tensor) predicted locations, sized [batch_size, #anchors, 4].
+          loc_targets: (tensor) encoded target locations, sized [batch_size, #anchors, 4].
+          cls_preds: (tensor) predicted class confidences, sized [batch_size, #anchors, #classes].
+          cls_targets: (tensor) encoded target labels, sized [batch_size, #anchors].
+        loss:
+          (tensor) loss = SmoothL1Loss(loc_preds, loc_targets) + FocalLoss(cls_preds, cls_targets).
+        """
+        batch_size, num_boxes = cls_targets.size()
+        pos = cls_targets > 0  # [N,#anchors]
+        num_pos = pos.data.long().sum()
+
+        ################################################################
+        # loc_loss = SmoothL1Loss(pos_loc_preds, pos_loc_targets)
+        ################################################################
+        mask = pos.unsqueeze(2).expand_as(loc_preds)  # [N,#anchors,4]
+        masked_loc_preds = loc_preds[mask].view(-1, 4)  # [#pos,4]
+        masked_loc_targets = loc_targets[mask].view(-1, 4)  # [#pos,4]
+        loc_loss = F.smooth_l1_loss(masked_loc_preds, masked_loc_targets, size_average=False)
+
+        ################################################################
+        # cls_loss = FocalLoss(loc_preds, loc_targets)
+        ################################################################
+        pos_neg = cls_targets > -1  # exclude ignored anchors
+        mask = pos_neg.unsqueeze(2).expand_as(cls_preds)
+        masked_cls_preds = cls_preds[mask].view(-1, self.num_classes)
+        cls_loss = self.focal_loss_alt(masked_cls_preds, cls_targets[pos_neg])
+
+        print('loc_loss: %.3f | cls_loss: %.3f' % (loc_loss.data[0] / num_pos, cls_loss.data[0] / num_pos), end=' | ')
+        loss = (loc_loss + cls_loss) / num_pos
+        return loss
--- a/rodnet/models/rodnet_cdc.py
+++ b/rodnet/models/rodnet_cdc.py
+import torch.nn as nn
+
+from .backbones.cdc import RODEncode, RODDecode
+
+
+class RODNetCDC(nn.Module):
+    def __init__(self, n_class):
+        super(RODNetCDC, self).__init__()
+
+        self.c3d_encode = RODEncode()
+        self.c3d_decode = RODDecode(n_class)
+
+    def forward(self, x):
+        x = self.c3d_encode(x)
+        dets = self.c3d_decode(x)
+        return dets
--- a/rodnet/models/rodnet_hg.py
+++ b/rodnet/models/rodnet_hg.py
+import torch.nn as nn
+
+from .backbones.hg import RadarStackedHourglass
+
+
+class RODNetHG(nn.Module):
+    def __init__(self, n_class, stacked_num=2):
+        super(RODNetHG, self).__init__()
+        self.stacked_hourglass = RadarStackedHourglass(n_class, stacked_num=stacked_num)
+
+    def forward(self, x):
+        out = self.stacked_hourglass(x)
+        return out
--- a/rodnet/models/rodnet_hgwi.py
+++ b/rodnet/models/rodnet_hgwi.py
+import torch
+import torch.nn as nn
+
+from .backbones.hgwi import RadarStackedHourglass
+
+
+class RODNetHGwI(nn.Module):
+    def __init__(self, n_class, stacked_num=1):
+        super(RODNetHGwI, self).__init__()
+        self.stacked_hourglass = RadarStackedHourglass(n_class, stacked_num=stacked_num)
+
+    def forward(self, x):
+        out = self.stacked_hourglass(x)
+        return out
+
+
+if __name__ == '__main__':
+    testModel = RODNetHGwI().cuda()
+    x = torch.zeros((1, 2, 16, 128, 128)).cuda()
+    testModel(x)
--- a/rodnet/utils/__init__.py
+++ b/rodnet/utils/__init__.py
--- a/rodnet/utils/load_configs.py
+++ b/rodnet/utils/load_configs.py
+import os
+import sys
+
+from importlib import import_module
+
+
+def load_configs_from_file(config_path):
+    module_name = os.path.basename(config_path)[:-3]
+    if '.' in module_name:
+        raise ValueError('Dots are not allowed in config file path.')
+    config_dir = os.path.dirname(config_path)
+    sys.path.insert(0, config_dir)
+    mod = import_module(module_name)
+    sys.path.pop(0)
+    cfg_dict = {
+        name: value
+        for name, value in mod.__dict__.items()
+        if not name.startswith('__')
+    }
+    return cfg_dict
--- a/rodnet/utils/solve_dir.py
+++ b/rodnet/utils/solve_dir.py
+import os
+import time
+
+
+def create_dir_for_new_model(name, train_model_path):
+    model_name = name + '-' + time.strftime("%Y%m%d-%H%M%S")
+    model_dir = os.path.join(train_model_path, model_name)
+    if not os.path.exists(model_dir):
+        os.makedirs(model_dir)
+    return model_dir, model_name
+
+
+def create_random_model_name(name, checkpoint_path=None):
+    if checkpoint_path is None:
+        model_name = name + '-rand-' + time.strftime("%Y%m%d-%H%M%S")
+    else:
+        folder_name = checkpoint_path.split('/')[-2]
+        if folder_name.startswith(name):
+            model_name = folder_name
+        else:
+            model_name = name + '-rand-' + time.strftime("%Y%m%d-%H%M%S")
+    return model_name
--- a/rodnet/utils/visualization/__init__.py
+++ b/rodnet/utils/visualization/__init__.py
+from .basic import heatmap2rgb
+from .confmap import visualize_confmap, visualize_confmaps_cr, visualize_postprocessing
+from .demo import visualize_train_img, visualize_test_img, visualize_test_img_wo_gt
+from .postprocessing import visualize_ols_hist
+from .ramap import visualize_radar_chirp, visualize_radar_chirps
--- a/rodnet/utils/visualization/basic.py
+++ b/rodnet/utils/visualization/basic.py
+import numpy as np
+import matplotlib.pyplot as plt
+
+
+def heatmap2rgb(heatmap):
+    cmap = plt.get_cmap('jet')
+    rgba_img = cmap(heatmap)
+    rgb_img = np.delete(rgba_img, 3, 2)
+    rgb_img = np.transpose(rgb_img, (2, 0, 1))
+    return rgb_img
--- a/rodnet/utils/visualization/confmap.py
+++ b/rodnet/utils/visualization/confmap.py
+import numpy as np
+import matplotlib.pyplot as plt
+
+
+def visualize_confmap(confmap, pps=[]):
+    if len(confmap.shape) == 2:
+        plt.imshow(confmap, origin='lower', aspect='auto')
+        for pp in pps:
+            plt.scatter(pp[1], pp[0], s=5, c='white')
+        plt.show()
+        return
+    else:
+        n_channel, _, _ = confmap.shape
+    if n_channel == 3:
+        confmap_viz = np.transpose(confmap, (1, 2, 0))
+    elif n_channel > 3:
+        confmap_viz = np.transpose(confmap[:3, :, :], (1, 2, 0))
+        if n_channel == 4:
+            confmap_noise = confmap[3, :, :]
+            plt.imshow(confmap_noise, origin='lower', aspect='auto')
+            plt.show()
+    else:
+        print("Warning: wrong shape of confmap!")
+        return
+    plt.imshow(confmap_viz, origin='lower', aspect='auto')
+    for pp in pps:
+        plt.scatter(pp[1], pp[0], s=5, c='white')
+    plt.show()
+
+
+def visualize_confmaps_cr(confmapc, confmapr, confmapcr, ppsc=[], ppsr=[], ppres=[], figname=None):
+    fig = plt.figure(figsize=(8, 8))
+    n_channel, nr, na = confmapc.shape
+    fig_id = 1
+    for class_id in range(n_channel):
+        fig.add_subplot(n_channel, 3, fig_id)
+        fig_id += 1
+        plt.imshow(confmapc[class_id], origin='lower', aspect='auto')
+        for pp in ppsc[class_id]:
+            plt.scatter(pp[1], pp[0], s=5, c='white')
+        plt.xlim(0, na)
+        plt.ylim(0, nr)
+
+        fig.add_subplot(n_channel, 3, fig_id)
+        fig_id += 1
+        plt.imshow(confmapr, origin='lower', aspect='auto')
+        for pp in ppsr:
+            plt.scatter(pp[1], pp[0], s=5, c='white')
+        plt.xlim(0, na)
+        plt.ylim(0, nr)
+
+        fig.add_subplot(n_channel, 3, fig_id)
+        fig_id += 1
+        plt.imshow(confmapcr[class_id], origin='lower', aspect='auto', vmin=0, vmax=1)
+        for pp in ppres[class_id]:
+            plt.scatter(pp[1], pp[0], s=5, c='white')
+        plt.xlim(0, na)
+        plt.ylim(0, nr)
+
+    if figname is None:
+        plt.show()
+    else:
+        plt.savefig(figname)
+        plt.close(fig)
+
+
+def visualize_postprocessing(confmaps, det_results):
+    confmap_pred = np.transpose(confmaps, (1, 2, 0))
+    plt.imshow(confmap_pred, vmin=0, vmax=1, origin='lower', aspect='auto')
+    for d in range(rodnet_configs['max_dets']):
+        cla_id = int(det_results[d, 0])
+        if cla_id == -1:
+            continue
+        row_id = det_results[d, 1]
+        col_id = det_results[d, 2]
+        conf = det_results[d, 3]
+        cla_str = class_table[cla_id]
+        plt.scatter(col_id, row_id, s=50, c='white')
+        plt.text(col_id + 5, row_id, cla_str + '\n%.2f' % conf, color='white', fontsize=10, fontweight='black')
+    plt.axis('off')
+    plt.title("RODNet Detection")
+    plt.show()
--- a/rodnet/utils/visualization/demo.py
+++ b/rodnet/utils/visualization/demo.py
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib.image as mpimg
+
+from rodnet.core.object_class import get_class_name
+from .fig_configs import fig, fp, symbols
+
+
+def visualize_train_img_old(fig_name, input_radar, output_confmap, confmap_gt):
+    fig = plt.figure(figsize=(8, 4))
+    img = input_radar
+    fig.add_subplot(1, 3, 1)
+    plt.imshow(img, vmin=0, vmax=1, origin='lower', aspect='auto')
+    img = output_confmap
+    fig.add_subplot(1, 3, 2)
+    plt.imshow(img, vmin=0, vmax=1, origin='lower', aspect='auto')
+    img = confmap_gt
+    fig.add_subplot(1, 3, 3)
+    plt.imshow(img, vmin=0, vmax=1, origin='lower', aspect='auto')
+    plt.savefig(fig_name)
+    plt.close(fig)
+
+
+def visualize_train_img(fig_name, img_path, input_radar, output_confmap, confmap_gt):
+    fig = plt.figure(figsize=(8, 8))
+    img_data = mpimg.imread(img_path)
+
+    fig.add_subplot(2, 2, 1)
+    plt.imshow(img_data.astype(np.uint8))
+
+    fig.add_subplot(2, 2, 2)
+    plt.imshow(input_radar, origin='lower', aspect='auto')
+
+    fig.add_subplot(2, 2, 3)
+    output_confmap = np.transpose(output_confmap, (1, 2, 0))
+    output_confmap[output_confmap < 0] = 0
+    plt.imshow(output_confmap, vmin=0, vmax=1, origin='lower', aspect='auto')
+
+    fig.add_subplot(2, 2, 4)
+    confmap_gt = np.transpose(confmap_gt, (1, 2, 0))
+    plt.imshow(confmap_gt, vmin=0, vmax=1, origin='lower', aspect='auto')
+
+    plt.savefig(fig_name)
+    plt.close(fig)
+
+
+def visualize_test_img(fig_name, img_path, input_radar, confmap_pred, confmap_gt, res_final, dataset, viz=False,
+                       sybl=False):
+    max_dets, _ = res_final.shape
+    classes = dataset.object_cfg.classes
+
+    img_data = mpimg.imread(img_path)
+    if img_data.shape[0] > 864:
+        img_data = img_data[:img_data.shape[0] // 5 * 4, :, :]
+
+    fig.add_subplot(2, 2, 1)
+    plt.imshow(img_data.astype(np.uint8))
+    plt.axis('off')
+    plt.title("Image")
+
+    fig.add_subplot(2, 2, 2)
+    plt.imshow(input_radar, origin='lower', aspect='auto')
+    plt.axis('off')
+    plt.title("RA Heatmap")
+
+    fig.add_subplot(2, 2, 3)
+    confmap_pred = np.transpose(confmap_pred, (1, 2, 0))
+    confmap_pred[confmap_pred < 0] = 0
+    confmap_pred[confmap_pred > 1] = 1
+    plt.imshow(confmap_pred, vmin=0, vmax=1, origin='lower', aspect='auto')
+    for d in range(max_dets):
+        cla_id = int(res_final[d, 0])
+        if cla_id == -1:
+            continue
+        row_id = res_final[d, 1]
+        col_id = res_final[d, 2]
+        conf = res_final[d, 3]
+        conf = 1.0 if conf > 1 else conf
+        cla_str = get_class_name(cla_id, classes)
+        if sybl:
+            text = symbols[cla_str]
+            plt.text(col_id, row_id + 3, text, fontproperties=fp, color='white', size=20, ha="center")
+        else:
+            plt.scatter(col_id, row_id, s=10, c='white')
+            text = cla_str + '\n%.2f' % conf
+            plt.text(col_id + 5, row_id, text, color='white', fontsize=10)
+    plt.axis('off')
+    plt.title("RODNet Detection")
+
+    fig.add_subplot(2, 2, 4)
+    confmap_gt = np.transpose(confmap_gt, (1, 2, 0))
+    plt.imshow(confmap_gt, vmin=0, vmax=1, origin='lower', aspect='auto')
+    plt.axis('off')
+    plt.title("Ground Truth")
+
+    plt.savefig(fig_name)
+    if viz:
+        plt.pause(0.1)
+    plt.clf()
+
+
+def visualize_test_img_wo_gt(fig_name, img_path, input_radar, confmap_pred, res_final, dataset, viz=False,
+                             sybl=False):
+    max_dets, _ = res_final.shape
+    classes = dataset.object_cfg.classes
+
+    fig.set_size_inches(12, 4)
+    img_data = mpimg.imread(img_path)
+    if img_data.shape[0] > 864:
+        img_data = img_data[:img_data.shape[0] // 5 * 4, :, :]
+
+    fig.add_subplot(1, 3, 1)
+    plt.imshow(img_data.astype(np.uint8))
+    plt.axis('off')
+    plt.title("RGB Image")
+
+    fig.add_subplot(1, 3, 2)
+    input_radar[input_radar < 0] = 0
+    input_radar[input_radar > 1] = 1
+    plt.imshow(input_radar, vmin=0, vmax=1, origin='lower', aspect='auto')
+    plt.axis('off')
+    plt.title("RF Image")
+
+    fig.add_subplot(1, 3, 3)
+    confmap_pred = np.transpose(confmap_pred, (1, 2, 0))
+    confmap_pred[confmap_pred < 0] = 0
+    confmap_pred[confmap_pred > 1] = 1
+    plt.imshow(confmap_pred, vmin=0, vmax=1, origin='lower', aspect='auto')
+    for d in range(max_dets):
+        cla_id = int(res_final[d, 0])
+        if cla_id == -1:
+            continue
+        row_id = res_final[d, 1]
+        col_id = res_final[d, 2]
+        conf = res_final[d, 3]
+        conf = 1.0 if conf > 1 else conf
+        cla_str = get_class_name(cla_id, classes)
+        if sybl:
+            text = symbols[cla_str]
+            plt.text(col_id - 3, row_id + 2, text, fontproperties=fp, color='white', size=20)
+        else:
+            plt.scatter(col_id, row_id, s=10, c='white')
+            text = cla_str + '\n%.2f' % conf
+            plt.text(col_id + 5, row_id, text, color='white', fontsize=10)
+    plt.axis('off')
+    plt.title("RODNet Detections")
+
+    plt.savefig(fig_name)
+    if viz:
+        plt.pause(0.1)
+    plt.clf()