Commit a8863510 authored by Yizhou Wang's avatar Yizhou Wang
Browse files

v1.0: first commit

parent 16d8dda7
import numpy as np
import torch
import re
from torch._six import container_abcs, string_classes, int_classes
np_str_obj_array_pattern = re.compile(r'[SaUO]')
default_collate_err_msg_format = (
"default_collate: batch must contain tensors, numpy arrays, numbers, "
"dicts or lists; found {}")
def cr_collate(batch):
r"""Puts each data field into a tensor with outer dimension batch size"""
elem = batch[0]
elem_type = type(elem)
if elem is None:
return None
elif isinstance(elem, torch.Tensor):
out = None
if torch.utils.data.get_worker_info() is not None:
# If we're in a background process, concatenate directly into a
# shared memory tensor to avoid an extra copy
numel = sum([x.numel() for x in batch])
storage = elem.storage()._new_shared(numel)
out = elem.new(storage)
return torch.stack(batch, 0, out=out)
elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' \
and elem_type.__name__ != 'string_':
elem = batch[0]
if elem_type.__name__ == 'ndarray':
# array of string classes and object
if np_str_obj_array_pattern.search(elem.dtype.str) is not None:
raise TypeError(default_collate_err_msg_format.format(elem.dtype))
return cr_collate([torch.as_tensor(b) for b in batch])
elif elem.shape == (): # scalars
return torch.as_tensor(batch)
elif isinstance(elem, float):
return torch.tensor(batch, dtype=torch.float64)
elif isinstance(elem, bool):
return all(batch)
elif isinstance(elem, int_classes):
return torch.tensor(batch)
elif isinstance(elem, string_classes):
return batch
elif isinstance(elem, container_abcs.Mapping):
return {key: cr_collate([d[key] for d in batch]) for key in elem}
elif isinstance(elem, tuple) and hasattr(elem, '_fields'): # namedtuple
return elem_type(*(cr_collate(samples) for samples in zip(*batch)))
elif isinstance(elem, container_abcs.Sequence):
# transposed = zip(*batch)
# return [cr_collate(samples) for samples in transposed]
return batch
raise TypeError(default_collate_err_msg_format.format(elem_type))
def _cr_collate_npy(batch):
r"""Puts each data field into a tensor with outer dimension batch size"""
elem = batch[0]
elem_type = type(elem)
if elem_type.__name__ == 'ndarray':
return np.stack(batch, 0)
elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' \
and elem_type.__name__ != 'string_':
elem = batch[0]
if elem_type.__name__ == 'ndarray':
# array of string classes and object
if np_str_obj_array_pattern.search(elem.dtype.str) is not None:
raise TypeError(default_collate_err_msg_format.format(elem.dtype))
return _cr_collate_npy([b for b in batch])
elif elem.shape == (): # scalars
return batch
elif isinstance(elem, float):
return batch
elif isinstance(elem, bool):
return all(batch)
elif isinstance(elem, int_classes):
return batch
elif isinstance(elem, string_classes):
return batch
elif isinstance(elem, container_abcs.Mapping):
return {key: _cr_collate_npy([d[key] for d in batch]) for key in elem}
elif isinstance(elem, tuple) and hasattr(elem, '_fields'): # namedtuple
return elem_type(*(_cr_collate_npy(samples) for samples in zip(*batch)))
elif isinstance(elem, container_abcs.Sequence):
# transposed = zip(*batch)
# return [cr_collate(samples) for samples in transposed]
return batch
raise TypeError(default_collate_err_msg_format.format(elem_type))
from .parse_pkl import list_pkl_filenames
from .read_rod_results import load_rodnet_res, load_vgg_res
import os
def list_pkl_filenames(dataset_configs, split):
data_root = dataset_configs['data_root']
seqs = dataset_configs[split]['seqs']
seqs_pkl_names = [name + '.pkl' for name in seqs]
return seqs_pkl_names
from .rodnet_cdc import RODNetCDC
from .rodnet_hg import RODNetHG
from .rodnet_hgwi import RODNetHGwI
import torch.nn as nn
class RODEncode(nn.Module):
def __init__(self, in_channels=2):
super(RODEncode, self).__init__()
self.conv1a = nn.Conv3d(in_channels=in_channels, out_channels=64,
kernel_size=(9, 5, 5), stride=(1, 1, 1), padding=(4, 2, 2))
self.conv1b = nn.Conv3d(in_channels=64, out_channels=64,
kernel_size=(9, 5, 5), stride=(2, 2, 2), padding=(4, 2, 2))
self.conv2a = nn.Conv3d(in_channels=64, out_channels=128,
kernel_size=(9, 5, 5), stride=(1, 1, 1), padding=(4, 2, 2))
self.conv2b = nn.Conv3d(in_channels=128, out_channels=128,
kernel_size=(9, 5, 5), stride=(2, 2, 2), padding=(4, 2, 2))
self.conv3a = nn.Conv3d(in_channels=128, out_channels=256,
kernel_size=(9, 5, 5), stride=(1, 1, 1), padding=(4, 2, 2))
self.conv3b = nn.Conv3d(in_channels=256, out_channels=256,
kernel_size=(9, 5, 5), stride=(1, 2, 2), padding=(4, 2, 2))
self.bn1a = nn.BatchNorm3d(num_features=64)
self.bn1b = nn.BatchNorm3d(num_features=64)
self.bn2a = nn.BatchNorm3d(num_features=128)
self.bn2b = nn.BatchNorm3d(num_features=128)
self.bn3a = nn.BatchNorm3d(num_features=256)
self.bn3b = nn.BatchNorm3d(num_features=256)
self.relu = nn.ReLU()
def forward(self, x):
x = self.relu(self.bn1a(self.conv1a(x))) # (B, 2, W, 128, 128) -> (B, 64, W, 128, 128)
x = self.relu(self.bn1b(self.conv1b(x))) # (B, 64, W, 128, 128) -> (B, 64, W/2, 64, 64)
x = self.relu(self.bn2a(self.conv2a(x))) # (B, 64, W/2, 64, 64) -> (B, 128, W/2, 64, 64)
x = self.relu(self.bn2b(self.conv2b(x))) # (B, 128, W/2, 64, 64) -> (B, 128, W/4, 32, 32)
x = self.relu(self.bn3a(self.conv3a(x))) # (B, 128, W/4, 32, 32) -> (B, 256, W/4, 32, 32)
x = self.relu(self.bn3b(self.conv3b(x))) # (B, 256, W/4, 32, 32) -> (B, 256, W/4, 16, 16)
return x
class RODDecode(nn.Module):
def __init__(self, n_class):
super(RODDecode, self).__init__()
self.convt1 = nn.ConvTranspose3d(in_channels=256, out_channels=128,
kernel_size=(4, 6, 6), stride=(2, 2, 2), padding=(1, 2, 2))
self.convt2 = nn.ConvTranspose3d(in_channels=128, out_channels=64,
kernel_size=(4, 6, 6), stride=(2, 2, 2), padding=(1, 2, 2))
self.convt3 = nn.ConvTranspose3d(in_channels=64, out_channels=n_class,
kernel_size=(3, 6, 6), stride=(1, 2, 2), padding=(1, 2, 2))
self.prelu = nn.PReLU()
self.sigmoid = nn.Sigmoid()
# self.upsample = nn.Upsample(size=(rodnet_configs['win_size'], radar_configs['ramap_rsize'],
# radar_configs['ramap_asize']), mode='nearest')
def forward(self, x):
x = self.prelu(self.convt1(x)) # (B, 256, W/4, 16, 16) -> (B, 128, W/2, 32, 32)
x = self.prelu(self.convt2(x)) # (B, 128, W/2, 32, 32) -> (B, 64, W, 64, 64)
x = self.convt3(x) # (B, 64, W, 64, 64) -> (B, 3, W, 128, 128)
return x
import torch.nn as nn
class RadarStackedHourglass(nn.Module):
def __init__(self, n_class, stacked_num=1, in_channels=2):
super(RadarStackedHourglass, self).__init__()
self.stacked_num = stacked_num
self.conv1a = nn.Conv3d(in_channels=in_channels, out_channels=32,
kernel_size=(9, 5, 5), stride=(1, 1, 1), padding=(4, 2, 2))
self.conv1b = nn.Conv3d(in_channels=32, out_channels=64,
kernel_size=(9, 5, 5), stride=(1, 1, 1), padding=(4, 2, 2))
self.hourglass = []
for i in range(stacked_num):
self.hourglass.append(nn.ModuleList([RODEncode(), RODDecode(),
nn.Conv3d(in_channels=64, out_channels=n_class,
kernel_size=(9, 5, 5), stride=(1, 1, 1),
padding=(4, 2, 2)),
nn.Conv3d(in_channels=n_class, out_channels=64,
kernel_size=(9, 5, 5), stride=(1, 1, 1),
padding=(4, 2, 2))]))
self.hourglass = nn.ModuleList(self.hourglass)
self.relu = nn.ReLU()
self.bn1a = nn.BatchNorm3d(num_features=32)
self.bn1b = nn.BatchNorm3d(num_features=64)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
x = self.relu(self.bn1a(self.conv1a(x)))
x = self.relu(self.bn1b(self.conv1b(x)))
out = []
for i in range(self.stacked_num):
x, x1, x2, x3 = self.hourglass[i][0](x)
x = self.hourglass[i][1](x, x1, x2, x3)
confmap = self.hourglass[i][2](x)
out.append(self.sigmoid(confmap))
if i < self.stacked_num - 1:
confmap_ = self.hourglass[i][3](confmap)
x = x + confmap_
return out
class RODEncode(nn.Module):
def __init__(self):
super(RODEncode, self).__init__()
self.conv1a = nn.Conv3d(in_channels=64, out_channels=64,
kernel_size=(9, 5, 5), stride=(1, 1, 1), padding=(4, 2, 2))
self.conv1b = nn.Conv3d(in_channels=64, out_channels=64,
kernel_size=(9, 5, 5), stride=(2, 2, 2), padding=(4, 2, 2))
self.conv2a = nn.Conv3d(in_channels=64, out_channels=128,
kernel_size=(9, 5, 5), stride=(1, 1, 1), padding=(4, 2, 2))
self.conv2b = nn.Conv3d(in_channels=128, out_channels=128,
kernel_size=(9, 5, 5), stride=(2, 2, 2), padding=(4, 2, 2))
self.conv3a = nn.Conv3d(in_channels=128, out_channels=256,
kernel_size=(9, 5, 5), stride=(1, 1, 1), padding=(4, 2, 2))
self.conv3b = nn.Conv3d(in_channels=256, out_channels=256,
kernel_size=(9, 5, 5), stride=(1, 2, 2), padding=(4, 2, 2))
self.skipconv1a = nn.Conv3d(in_channels=64, out_channels=64,
kernel_size=(9, 5, 5), stride=(1, 1, 1), padding=(4, 2, 2))
self.skipconv1b = nn.Conv3d(in_channels=64, out_channels=64,
kernel_size=(9, 5, 5), stride=(2, 2, 2), padding=(4, 2, 2))
self.skipconv2a = nn.Conv3d(in_channels=64, out_channels=128,
kernel_size=(9, 5, 5), stride=(1, 1, 1), padding=(4, 2, 2))
self.skipconv2b = nn.Conv3d(in_channels=128, out_channels=128,
kernel_size=(9, 5, 5), stride=(2, 2, 2), padding=(4, 2, 2))
self.skipconv3a = nn.Conv3d(in_channels=128, out_channels=256,
kernel_size=(9, 5, 5), stride=(1, 1, 1), padding=(4, 2, 2))
self.skipconv3b = nn.Conv3d(in_channels=256, out_channels=256,
kernel_size=(9, 5, 5), stride=(1, 2, 2), padding=(4, 2, 2))
self.bn1a = nn.BatchNorm3d(num_features=64)
self.bn1b = nn.BatchNorm3d(num_features=64)
self.bn2a = nn.BatchNorm3d(num_features=128)
self.bn2b = nn.BatchNorm3d(num_features=128)
self.bn3a = nn.BatchNorm3d(num_features=256)
self.bn3b = nn.BatchNorm3d(num_features=256)
self.skipbn1a = nn.BatchNorm3d(num_features=64)
self.skipbn1b = nn.BatchNorm3d(num_features=64)
self.skipbn2a = nn.BatchNorm3d(num_features=128)
self.skipbn2b = nn.BatchNorm3d(num_features=128)
self.skipbn3a = nn.BatchNorm3d(num_features=256)
self.skipbn3b = nn.BatchNorm3d(num_features=256)
self.relu = nn.ReLU()
def forward(self, x):
x1 = self.relu(self.skipbn1a(self.skipconv1a(x)))
x1 = self.relu(self.skipbn1b(self.skipconv1b(x1)))
x = self.relu(self.bn1a(self.conv1a(x))) # (B, 2, W, 128, 128) -> (B, 64, W, 128, 128)
x = self.relu(self.bn1b(self.conv1b(x))) # (B, 64, W, 128, 128) -> (B, 64, W/2, 64, 64)
x2 = self.relu(self.skipbn2a(self.skipconv2a(x)))
x2 = self.relu(self.skipbn2b(self.skipconv2b(x2)))
x = self.relu(self.bn2a(self.conv2a(x))) # (B, 64, W/2, 64, 64) -> (B, 128, W/2, 64, 64)
x = self.relu(self.bn2b(self.conv2b(x))) # (B, 128, W/2, 64, 64) -> (B, 128, W/4, 32, 32)
x3 = self.relu(self.skipbn3a(self.skipconv3a(x)))
x3 = self.relu(self.skipbn3b(self.skipconv3b(x3)))
x = self.relu(self.bn3a(self.conv3a(x))) # (B, 128, W/4, 32, 32) -> (B, 256, W/4, 32, 32)
x = self.relu(self.bn3b(self.conv3b(x))) # (B, 256, W/4, 32, 32) -> (B, 256, W/4, 16, 16)
return x, x1, x2, x3
class RODDecode(nn.Module):
def __init__(self):
super(RODDecode, self).__init__()
self.convt1 = nn.ConvTranspose3d(in_channels=256, out_channels=128,
kernel_size=(3, 6, 6), stride=(1, 2, 2), padding=(1, 2, 2))
self.convt2 = nn.ConvTranspose3d(in_channels=128, out_channels=64,
kernel_size=(4, 6, 6), stride=(2, 2, 2), padding=(1, 2, 2))
self.convt3 = nn.ConvTranspose3d(in_channels=64, out_channels=64,
kernel_size=(4, 6, 6), stride=(2, 2, 2), padding=(1, 2, 2))
self.prelu = nn.PReLU()
self.sigmoid = nn.Sigmoid()
# self.upsample = nn.Upsample(size=(rodnet_configs['win_size'], radar_configs['ramap_rsize'],
# radar_configs['ramap_asize']), mode='nearest')
def forward(self, x, x1, x2, x3):
x = self.prelu(self.convt1(x + x3)) # (B, 256, W/4, 16, 16) -> (B, 128, W/2, 32, 32)
x = self.prelu(self.convt2(x + x2)) # (B, 128, W/2, 32, 32) -> (B, 64, W, 64, 64)
x = self.convt3(x + x1) # (B, 64, W, 64, 64) -> (B, 3, W, 128, 128)
return x
import torch
import torch.nn as nn
class RadarStackedHourglass(nn.Module):
def __init__(self, n_class, stacked_num=1):
super(RadarStackedHourglass, self).__init__()
self.stacked_num = stacked_num
self.conv1a = nn.Conv3d(in_channels=2, out_channels=32,
kernel_size=(9, 5, 5), stride=(1, 1, 1), padding=(4, 2, 2))
self.conv1b = nn.Conv3d(in_channels=32, out_channels=64,
kernel_size=(9, 5, 5), stride=(1, 1, 1), padding=(4, 2, 2))
self.conv1c = nn.Conv3d(in_channels=64, out_channels=160,
kernel_size=(9, 5, 5), stride=(1, 1, 1), padding=(4, 2, 2))
self.hourglass = []
for i in range(stacked_num):
self.hourglass.append(nn.ModuleList([RODEncode(), RODDecode(),
nn.Conv3d(in_channels=160, out_channels=n_class,
kernel_size=(9, 5, 5), stride=(1, 1, 1),
padding=(4, 2, 2)),
nn.Conv3d(in_channels=n_class, out_channels=160,
kernel_size=(9, 5, 5), stride=(1, 1, 1),
padding=(4, 2, 2))]))
self.hourglass = nn.ModuleList(self.hourglass)
self.relu = nn.ReLU()
self.bn1a = nn.BatchNorm3d(num_features=32)
self.bn1b = nn.BatchNorm3d(num_features=64)
self.bn1c = nn.BatchNorm3d(num_features=160)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
x = self.relu(self.bn1a(self.conv1a(x)))
x = self.relu(self.bn1b(self.conv1b(x)))
x = self.relu(self.bn1c(self.conv1c(x)))
out = []
for i in range(self.stacked_num):
x, x1, x2, x3 = self.hourglass[i][0](x)
x = self.hourglass[i][1](x, x1, x2, x3)
confmap = self.hourglass[i][2](x)
out.append(self.sigmoid(confmap))
if i < self.stacked_num - 1:
confmap_ = self.hourglass[i][3](confmap)
x = x + confmap_
return out
class InceptionLayerConcat(nn.Module):
"""
Kernal size: for 2d kernal size, since the kernal size in temporal domain will be fixed
"""
def __init__(self, kernal_size, in_channel, stride):
super(InceptionLayerConcat, self).__init__()
paddingX = kernal_size[0] // 2
paddingY = kernal_size[1] // 2
self.branch1 = nn.Conv3d(in_channels=in_channel, out_channels=32,
kernel_size=(5, kernal_size[0], kernal_size[1]), stride=stride,
padding=(2, paddingX, paddingY))
self.branch2a = nn.Conv3d(in_channels=in_channel, out_channels=64,
kernel_size=(5, kernal_size[0], kernal_size[1]), stride=(1, 1, 1),
padding=(2, paddingX, paddingY))
self.branch2b = nn.Conv3d(in_channels=64, out_channels=64,
kernel_size=(9, kernal_size[0], kernal_size[1]), stride=stride,
padding=(4, paddingX, paddingY))
self.branch3a = nn.Conv3d(in_channels=in_channel, out_channels=64,
kernel_size=(5, kernal_size[0], kernal_size[1]), stride=(1, 1, 1),
padding=(2, paddingX, paddingY))
self.branch3b = nn.Conv3d(in_channels=64, out_channels=64,
kernel_size=(13, kernal_size[0], kernal_size[1]), stride=stride,
padding=(6, paddingX, paddingY))
def forward(self, x):
branch1 = self.branch1(x)
branch2 = self.branch2a(x)
branch2 = self.branch2b(branch2)
branch3 = self.branch3a(x)
branch3 = self.branch3b(branch3)
return torch.cat((branch1, branch2, branch3), 1)
class RODEncode(nn.Module):
def __init__(self):
super(RODEncode, self).__init__()
self.inception1 = InceptionLayerConcat(kernal_size=(5, 5), in_channel=160, stride=(1, 2, 2))
self.inception2 = InceptionLayerConcat(kernal_size=(5, 5), in_channel=160, stride=(1, 2, 2))
self.inception3 = InceptionLayerConcat(kernal_size=(5, 5), in_channel=160, stride=(1, 2, 2))
self.skip_inception1 = InceptionLayerConcat(kernal_size=(5, 5), in_channel=160, stride=(1, 2, 2))
self.skip_inception2 = InceptionLayerConcat(kernal_size=(5, 5), in_channel=160, stride=(1, 2, 2))
self.skip_inception3 = InceptionLayerConcat(kernal_size=(5, 5), in_channel=160, stride=(1, 2, 2))
# self.conv4a = nn.Conv3d(in_channels=64, out_channels=64,
# kernel_size=(9, 5, 5), stride=(1, 1, 1), padding=(4, 2, 2))
# self.conv4b = nn.Conv3d(in_channels=64, out_channels=64,
# kernel_size=(9, 5, 5), stride=(1, 2, 2), padding=(4, 2, 2))
# self.conv5a = nn.Conv3d(in_channels=64, out_channels=64,
# kernel_size=(9, 5, 5), stride=(1, 1, 1), padding=(4, 2, 2))
# self.conv5b = nn.Conv3d(in_channels=64, out_channels=64,
# kernel_size=(9, 5, 5), stride=(1, 2, 2), padding=(4, 2, 2))
self.bn1 = nn.BatchNorm3d(num_features=160)
self.bn2 = nn.BatchNorm3d(num_features=160)
self.bn3 = nn.BatchNorm3d(num_features=160)
self.skip_bn1 = nn.BatchNorm3d(num_features=160)
self.skip_bn2 = nn.BatchNorm3d(num_features=160)
self.skip_bn3 = nn.BatchNorm3d(num_features=160)
# self.bn4a = nn.BatchNorm3d(num_features=64)
# self.bn4b = nn.BatchNorm3d(num_features=64)
# self.bn5a = nn.BatchNorm3d(num_features=64)
# self.bn5b = nn.BatchNorm3d(num_features=64)
self.relu = nn.ReLU()
def forward(self, x):
x1 = self.relu(self.skip_bn1(self.skip_inception1(x)))
x = self.relu(self.bn1(self.inception1(x))) # (B, 2, W, 128, 128) -> (B, 64, W, 128, 128)
x2 = self.relu(self.skip_bn2(self.skip_inception2(x)))
x = self.relu(self.bn2(self.inception2(x))) # (B, 2, W, 128, 128) -> (B, 64, W, 128, 128)
x3 = self.relu(self.skip_bn3(self.skip_inception3(x)))
x = self.relu(self.bn3(self.inception3(x))) # (B, 2, W, 128, 128) -> (B, 64, W, 128, 128)
return x, x1, x2, x3
class RODDecode(nn.Module):
def __init__(self):
super(RODDecode, self).__init__()
self.convt1 = nn.ConvTranspose3d(in_channels=160, out_channels=160,
kernel_size=(3, 6, 6), stride=(1, 2, 2), padding=(1, 2, 2))
self.convt2 = nn.ConvTranspose3d(in_channels=160, out_channels=160,
kernel_size=(3, 6, 6), stride=(1, 2, 2), padding=(1, 2, 2))
self.convt3 = nn.ConvTranspose3d(in_channels=160, out_channels=160,
kernel_size=(3, 6, 6), stride=(1, 2, 2), padding=(1, 2, 2))
self.conv1 = nn.Conv3d(in_channels=160, out_channels=160,
kernel_size=(9, 5, 5), stride=(1, 1, 1), padding=(4, 2, 2))
self.conv2 = nn.Conv3d(in_channels=160, out_channels=160,
kernel_size=(9, 5, 5), stride=(1, 1, 1), padding=(4, 2, 2))
self.conv3 = nn.Conv3d(in_channels=160, out_channels=160,
kernel_size=(9, 5, 5), stride=(1, 1, 1), padding=(4, 2, 2))
self.prelu = nn.PReLU()
self.sigmoid = nn.Sigmoid()
# self.upsample = nn.Upsample(size=(rodnet_configs['win_size'], radar_configs['ramap_rsize'],
# radar_configs['ramap_asize']), mode='nearest')
def forward(self, x, x1, x2, x3):
x = self.prelu(self.convt1(x + x3)) # (B, 256, W/4, 16, 16) -> (B, 128, W/2, 32, 32)
x = self.prelu(self.conv1(x))
x = self.prelu(self.convt2(x + x2)) # (B, 128, W/2, 32, 32) -> (B, 64, W, 64, 64)
x = self.prelu(self.conv2(x))
x = self.prelu(self.convt3(x + x1)) # (B, 64, W, 64, 64) -> (B, 3, W, 128, 128)
x = self.prelu(self.conv3(x))
return x
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
def one_hot_embedding(labels, num_classes):
"""Embedding labels to one-hot form.
Args:
labels: (LongTensor) class labels, sized [N,].
num_classes: (int) number of classes.
Returns:
(tensor) encoded labels, sized [N,#classes].
"""
y = torch.eye(num_classes) # [D,D]
return y[labels] # [N,D]
# def _neg_loss(pred, gt):
# ''' Modified focal loss. Exactly the same as CornerNet.
# Runs faster and costs a little bit more memory
# Arguments:
# pred (batch x c x h x w)
# gt_regr (batch x c x h x w)
# '''
# pos_inds = gt.eq(1).float()
# neg_inds = gt.lt(1).float()
#
# neg_weights = torch.pow(1 - gt, 4)
#
# loss = 0
#
# pos_loss = torch.log(pred) * torch.pow(1 - pred, 2) * pos_inds
# neg_loss = torch.log(1 - pred) * torch.pow(pred, 2) * neg_weights * neg_inds
#
# num_pos = pos_inds.float().sum()
# pos_loss = pos_loss.sum()
# neg_loss = neg_loss.sum()
#
# if num_pos == 0:
# loss = loss - neg_loss
# else:
# loss = loss - (pos_loss + neg_loss) / num_pos
# return loss
#
#
# class FocalLoss(nn.Module):
# '''nn.Module warpper for focal loss'''
#
# def __init__(self):
# super(FocalLoss, self).__init__()
# self.neg_loss = _neg_loss
#
# def forward(self, out, target):
# return self.neg_loss(out, target)
#
#
# class FocalLoss(nn.Module):
#
# def __init__(self, focusing_param=2, balance_param=0.25):
# super(FocalLoss, self).__init__()
#
# self.focusing_param = focusing_param
# self.balance_param = balance_param
#
# def forward(self, output, target):
# cross_entropy = F.cross_entropy(output, target)
# cross_entropy_log = torch.log(cross_entropy)
# logpt = - F.cross_entropy(output, target)
# pt = torch.exp(logpt)
#
# focal_loss = -((1 - pt) ** self.focusing_param) * logpt
#
# balanced_focal_loss = self.balance_param * focal_loss
#
# return balanced_focal_loss
class FocalLoss(nn.Module):
def __init__(self, num_classes=20):
super(FocalLoss, self).__init__()
self.num_classes = num_classes
def focal_loss(self, x, y):
"""Focal loss.
Args:
x: (tensor) sized [N,D].
y: (tensor) sized [N,].
Return:
(tensor) focal loss.
"""
alpha = 0.25
gamma = 2
t = one_hot_embedding(y.data.cpu(), 1 + self.num_classes) # [N,21]
t = t[:, 1:] # exclude background
t = Variable(t).cuda() # [N,20]
p = x.sigmoid()
pt = p * t + (1 - p) * (1 - t) # pt = p if t > 0 else 1-p
w = alpha * t + (1 - alpha) * (1 - t) # w = alpha if t > 0 else 1-alpha
w = w * (1 - pt).pow(gamma)
return F.binary_cross_entropy_with_logits(x, t, w, size_average=False)
def focal_loss_alt(self, x, y):
"""Focal loss alternative.
Args:
x: (tensor) sized [N,D].
y: (tensor) sized [N,].
Return:
(tensor) focal loss.
"""
alpha = 0.25
t = one_hot_embedding(y.data.cpu(), 1 + self.num_classes)
t = t[:, 1:]
t = Variable(t).cuda()
xt = x * (2 * t - 1) # xt = x if t > 0 else -x
pt = (2 * xt + 1).sigmoid()
w = alpha * t + (1 - alpha) * (1 - t)
loss = -w * pt.log() / 2
return loss.sum()
def forward(self, loc_preds, loc_targets, cls_preds, cls_targets):
"""Compute loss between (loc_preds, loc_targets) and (cls_preds, cls_targets).
Args:
loc_preds: (tensor) predicted locations, sized [batch_size, #anchors, 4].
loc_targets: (tensor) encoded target locations, sized [batch_size, #anchors, 4].
cls_preds: (tensor) predicted class confidences, sized [batch_size, #anchors, #classes].
cls_targets: (tensor) encoded target labels, sized [batch_size, #anchors].
loss:
(tensor) loss = SmoothL1Loss(loc_preds, loc_targets) + FocalLoss(cls_preds, cls_targets).
"""
batch_size, num_boxes = cls_targets.size()
pos = cls_targets > 0 # [N,#anchors]
num_pos = pos.data.long().sum()
################################################################
# loc_loss = SmoothL1Loss(pos_loc_preds, pos_loc_targets)
################################################################
mask = pos.unsqueeze(2).expand_as(loc_preds) # [N,#anchors,4]
masked_loc_preds = loc_preds[mask].view(-1, 4) # [#pos,4]
masked_loc_targets = loc_targets[mask].view(-1, 4) # [#pos,4]
loc_loss = F.smooth_l1_loss(masked_loc_preds, masked_loc_targets, size_average=False)
################################################################
# cls_loss = FocalLoss(loc_preds, loc_targets)
################################################################
pos_neg = cls_targets > -1 # exclude ignored anchors
mask = pos_neg.unsqueeze(2).expand_as(cls_preds)
masked_cls_preds = cls_preds[mask].view(-1, self.num_classes)
cls_loss = self.focal_loss_alt(masked_cls_preds, cls_targets[pos_neg])
print('loc_loss: %.3f | cls_loss: %.3f' % (loc_loss.data[0] / num_pos, cls_loss.data[0] / num_pos), end=' | ')
loss = (loc_loss + cls_loss) / num_pos
return loss
import torch.nn as nn
from .backbones.cdc import RODEncode, RODDecode
class RODNetCDC(nn.Module):
def __init__(self, n_class):
super(RODNetCDC, self).__init__()
self.c3d_encode = RODEncode()
self.c3d_decode = RODDecode(n_class)
def forward(self, x):
x = self.c3d_encode(x)
dets = self.c3d_decode(x)
return dets
import torch.nn as nn
from .backbones.hg import RadarStackedHourglass
class RODNetHG(nn.Module):
def __init__(self, n_class, stacked_num=2):
super(RODNetHG, self).__init__()
self.stacked_hourglass = RadarStackedHourglass(n_class, stacked_num=stacked_num)
def forward(self, x):
out = self.stacked_hourglass(x)
return out
import torch
import torch.nn as nn
from .backbones.hgwi import RadarStackedHourglass
class RODNetHGwI(nn.Module):
def __init__(self, n_class, stacked_num=1):
super(RODNetHGwI, self).__init__()
self.stacked_hourglass = RadarStackedHourglass(n_class, stacked_num=stacked_num)
def forward(self, x):
out = self.stacked_hourglass(x)
return out
if __name__ == '__main__':
testModel = RODNetHGwI().cuda()
x = torch.zeros((1, 2, 16, 128, 128)).cuda()
testModel(x)
import os
import sys
from importlib import import_module
def load_configs_from_file(config_path):
module_name = os.path.basename(config_path)[:-3]
if '.' in module_name:
raise ValueError('Dots are not allowed in config file path.')
config_dir = os.path.dirname(config_path)
sys.path.insert(0, config_dir)
mod = import_module(module_name)
sys.path.pop(0)
cfg_dict = {
name: value
for name, value in mod.__dict__.items()
if not name.startswith('__')
}
return cfg_dict
import os
import time
def create_dir_for_new_model(name, train_model_path):
model_name = name + '-' + time.strftime("%Y%m%d-%H%M%S")
model_dir = os.path.join(train_model_path, model_name)
if not os.path.exists(model_dir):
os.makedirs(model_dir)
return model_dir, model_name
def create_random_model_name(name, checkpoint_path=None):
if checkpoint_path is None:
model_name = name + '-rand-' + time.strftime("%Y%m%d-%H%M%S")
else:
folder_name = checkpoint_path.split('/')[-2]
if folder_name.startswith(name):
model_name = folder_name
else:
model_name = name + '-rand-' + time.strftime("%Y%m%d-%H%M%S")
return model_name
from .basic import heatmap2rgb
from .confmap import visualize_confmap, visualize_confmaps_cr, visualize_postprocessing
from .demo import visualize_train_img, visualize_test_img, visualize_test_img_wo_gt
from .postprocessing import visualize_ols_hist
from .ramap import visualize_radar_chirp, visualize_radar_chirps
import numpy as np
import matplotlib.pyplot as plt
def heatmap2rgb(heatmap):
cmap = plt.get_cmap('jet')
rgba_img = cmap(heatmap)
rgb_img = np.delete(rgba_img, 3, 2)
rgb_img = np.transpose(rgb_img, (2, 0, 1))
return rgb_img
import numpy as np
import matplotlib.pyplot as plt
def visualize_confmap(confmap, pps=[]):
if len(confmap.shape) == 2:
plt.imshow(confmap, origin='lower', aspect='auto')
for pp in pps:
plt.scatter(pp[1], pp[0], s=5, c='white')
plt.show()
return
else:
n_channel, _, _ = confmap.shape
if n_channel == 3:
confmap_viz = np.transpose(confmap, (1, 2, 0))
elif n_channel > 3:
confmap_viz = np.transpose(confmap[:3, :, :], (1, 2, 0))
if n_channel == 4:
confmap_noise = confmap[3, :, :]
plt.imshow(confmap_noise, origin='lower', aspect='auto')
plt.show()
else:
print("Warning: wrong shape of confmap!")
return
plt.imshow(confmap_viz, origin='lower', aspect='auto')
for pp in pps:
plt.scatter(pp[1], pp[0], s=5, c='white')
plt.show()
def visualize_confmaps_cr(confmapc, confmapr, confmapcr, ppsc=[], ppsr=[], ppres=[], figname=None):
fig = plt.figure(figsize=(8, 8))
n_channel, nr, na = confmapc.shape
fig_id = 1
for class_id in range(n_channel):
fig.add_subplot(n_channel, 3, fig_id)
fig_id += 1
plt.imshow(confmapc[class_id], origin='lower', aspect='auto')
for pp in ppsc[class_id]:
plt.scatter(pp[1], pp[0], s=5, c='white')
plt.xlim(0, na)
plt.ylim(0, nr)
fig.add_subplot(n_channel, 3, fig_id)
fig_id += 1
plt.imshow(confmapr, origin='lower', aspect='auto')
for pp in ppsr:
plt.scatter(pp[1], pp[0], s=5, c='white')
plt.xlim(0, na)
plt.ylim(0, nr)
fig.add_subplot(n_channel, 3, fig_id)
fig_id += 1
plt.imshow(confmapcr[class_id], origin='lower', aspect='auto', vmin=0, vmax=1)
for pp in ppres[class_id]:
plt.scatter(pp[1], pp[0], s=5, c='white')
plt.xlim(0, na)
plt.ylim(0, nr)
if figname is None:
plt.show()
else:
plt.savefig(figname)
plt.close(fig)
def visualize_postprocessing(confmaps, det_results):
confmap_pred = np.transpose(confmaps, (1, 2, 0))
plt.imshow(confmap_pred, vmin=0, vmax=1, origin='lower', aspect='auto')
for d in range(rodnet_configs['max_dets']):
cla_id = int(det_results[d, 0])
if cla_id == -1:
continue
row_id = det_results[d, 1]
col_id = det_results[d, 2]
conf = det_results[d, 3]
cla_str = class_table[cla_id]
plt.scatter(col_id, row_id, s=50, c='white')
plt.text(col_id + 5, row_id, cla_str + '\n%.2f' % conf, color='white', fontsize=10, fontweight='black')
plt.axis('off')
plt.title("RODNet Detection")
plt.show()
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from rodnet.core.object_class import get_class_name
from .fig_configs import fig, fp, symbols
def visualize_train_img_old(fig_name, input_radar, output_confmap, confmap_gt):
fig = plt.figure(figsize=(8, 4))
img = input_radar
fig.add_subplot(1, 3, 1)
plt.imshow(img, vmin=0, vmax=1, origin='lower', aspect='auto')
img = output_confmap
fig.add_subplot(1, 3, 2)
plt.imshow(img, vmin=0, vmax=1, origin='lower', aspect='auto')
img = confmap_gt
fig.add_subplot(1, 3, 3)
plt.imshow(img, vmin=0, vmax=1, origin='lower', aspect='auto')
plt.savefig(fig_name)
plt.close(fig)
def visualize_train_img(fig_name, img_path, input_radar, output_confmap, confmap_gt):
fig = plt.figure(figsize=(8, 8))
img_data = mpimg.imread(img_path)
fig.add_subplot(2, 2, 1)
plt.imshow(img_data.astype(np.uint8))
fig.add_subplot(2, 2, 2)
plt.imshow(input_radar, origin='lower', aspect='auto')
fig.add_subplot(2, 2, 3)
output_confmap = np.transpose(output_confmap, (1, 2, 0))
output_confmap[output_confmap < 0] = 0
plt.imshow(output_confmap, vmin=0, vmax=1, origin='lower', aspect='auto')
fig.add_subplot(2, 2, 4)
confmap_gt = np.transpose(confmap_gt, (1, 2, 0))
plt.imshow(confmap_gt, vmin=0, vmax=1, origin='lower', aspect='auto')
plt.savefig(fig_name)
plt.close(fig)
def visualize_test_img(fig_name, img_path, input_radar, confmap_pred, confmap_gt, res_final, dataset, viz=False,
sybl=False):
max_dets, _ = res_final.shape
classes = dataset.object_cfg.classes
img_data = mpimg.imread(img_path)
if img_data.shape[0] > 864:
img_data = img_data[:img_data.shape[0] // 5 * 4, :, :]
fig.add_subplot(2, 2, 1)
plt.imshow(img_data.astype(np.uint8))
plt.axis('off')
plt.title("Image")
fig.add_subplot(2, 2, 2)
plt.imshow(input_radar, origin='lower', aspect='auto')
plt.axis('off')
plt.title("RA Heatmap")
fig.add_subplot(2, 2, 3)
confmap_pred = np.transpose(confmap_pred, (1, 2, 0))
confmap_pred[confmap_pred < 0] = 0
confmap_pred[confmap_pred > 1] = 1
plt.imshow(confmap_pred, vmin=0, vmax=1, origin='lower', aspect='auto')
for d in range(max_dets):
cla_id = int(res_final[d, 0])
if cla_id == -1:
continue
row_id = res_final[d, 1]
col_id = res_final[d, 2]
conf = res_final[d, 3]
conf = 1.0 if conf > 1 else conf
cla_str = get_class_name(cla_id, classes)
if sybl:
text = symbols[cla_str]
plt.text(col_id, row_id + 3, text, fontproperties=fp, color='white', size=20, ha="center")
else:
plt.scatter(col_id, row_id, s=10, c='white')
text = cla_str + '\n%.2f' % conf
plt.text(col_id + 5, row_id, text, color='white', fontsize=10)
plt.axis('off')
plt.title("RODNet Detection")
fig.add_subplot(2, 2, 4)
confmap_gt = np.transpose(confmap_gt, (1, 2, 0))
plt.imshow(confmap_gt, vmin=0, vmax=1, origin='lower', aspect='auto')
plt.axis('off')
plt.title("Ground Truth")
plt.savefig(fig_name)
if viz:
plt.pause(0.1)
plt.clf()
def visualize_test_img_wo_gt(fig_name, img_path, input_radar, confmap_pred, res_final, dataset, viz=False,
sybl=False):
max_dets, _ = res_final.shape
classes = dataset.object_cfg.classes
fig.set_size_inches(12, 4)
img_data = mpimg.imread(img_path)
if img_data.shape[0] > 864:
img_data = img_data[:img_data.shape[0] // 5 * 4, :, :]
fig.add_subplot(1, 3, 1)
plt.imshow(img_data.astype(np.uint8))
plt.axis('off')
plt.title("RGB Image")
fig.add_subplot(1, 3, 2)
input_radar[input_radar < 0] = 0
input_radar[input_radar > 1] = 1
plt.imshow(input_radar, vmin=0, vmax=1, origin='lower', aspect='auto')
plt.axis('off')
plt.title("RF Image")
fig.add_subplot(1, 3, 3)
confmap_pred = np.transpose(confmap_pred, (1, 2, 0))
confmap_pred[confmap_pred < 0] = 0
confmap_pred[confmap_pred > 1] = 1
plt.imshow(confmap_pred, vmin=0, vmax=1, origin='lower', aspect='auto')
for d in range(max_dets):
cla_id = int(res_final[d, 0])
if cla_id == -1:
continue
row_id = res_final[d, 1]
col_id = res_final[d, 2]
conf = res_final[d, 3]
conf = 1.0 if conf > 1 else conf
cla_str = get_class_name(cla_id, classes)
if sybl:
text = symbols[cla_str]
plt.text(col_id - 3, row_id + 2, text, fontproperties=fp, color='white', size=20)
else:
plt.scatter(col_id, row_id, s=10, c='white')
text = cla_str + '\n%.2f' % conf
plt.text(col_id + 5, row_id, text, color='white', fontsize=10)
plt.axis('off')
plt.title("RODNet Detections")
plt.savefig(fig_name)
if viz:
plt.pause(0.1)
plt.clf()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment