Commit b952e97b authored by chenych's avatar chenych
Browse files

First Commit.

parents
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import cv2
import time
import torch
from external.nms import soft_nms
from models.decode import ddd_decode
from models.utils import flip_tensor
from utils.image import get_affine_transform
from utils.post_process import ddd_post_process
from utils.debugger import Debugger
from utils.ddd_utils import compute_box_3d, project_to_image, alpha2rot_y
from utils.ddd_utils import draw_box_3d, unproject_2d_to_3d
from progress.bar import Bar
from .base_detector import BaseDetector
import numpy as np
class DddDetector(BaseDetector):
def __init__(self, opt):
super(DddDetector, self).__init__(opt)
self.calib = np.array([[707.0493, 0, 604.0814, 45.75831],
[0, 707.0493, 180.5066, -0.3454157],
[0, 0, 1., 0.004981016]], dtype=np.float32)
def pre_process(self, image, scale, calib=None):
height, width = image.shape[0:2]
inp_height, inp_width = self.opt.input_h, self.opt.input_w
c = np.array([width / 2, height / 2], dtype=np.float32)
if self.opt.keep_res:
s = np.array([inp_width, inp_height], dtype=np.int32)
else:
s = np.array([width, height], dtype=np.int32)
trans_input = get_affine_transform(c, s, 0, [inp_width, inp_height])
resized_image = image # cv2.resize(image, (width, height))
inp_image = cv2.warpAffine(
resized_image, trans_input, (inp_width, inp_height),
flags=cv2.INTER_LINEAR)
inp_image = (inp_image.astype(np.float32) / 255.)
inp_image = (inp_image - self.mean) / self.std
images = inp_image.transpose(2, 0, 1)[np.newaxis, ...]
calib = np.array(calib, dtype=np.float32) if calib is not None \
else self.calib
images = torch.from_numpy(images)
meta = {'c': c, 's': s,
'out_height': inp_height // self.opt.down_ratio,
'out_width': inp_width // self.opt.down_ratio,
'calib': calib}
return images, meta
def process(self, images, return_time=False):
with torch.no_grad():
torch.cuda.synchronize()
output = self.model(images)[-1]
output['hm'] = output['hm'].sigmoid_()
output['dep'] = 1. / (output['dep'].sigmoid() + 1e-6) - 1.
wh = output['wh'] if self.opt.reg_bbox else None
reg = output['reg'] if self.opt.reg_offset else None
torch.cuda.synchronize()
forward_time = time.time()
dets = ddd_decode(output['hm'], output['rot'], output['dep'],
output['dim'], wh=wh, reg=reg, K=self.opt.K)
if return_time:
return output, dets, forward_time
else:
return output, dets
def post_process(self, dets, meta, scale=1):
dets = dets.detach().cpu().numpy()
detections = ddd_post_process(
dets.copy(), [meta['c']], [meta['s']], [meta['calib']], self.opt)
self.this_calib = meta['calib']
return detections[0]
def merge_outputs(self, detections):
results = detections[0]
for j in range(1, self.num_classes + 1):
if len(results[j] > 0):
keep_inds = (results[j][:, -1] > self.opt.peak_thresh)
results[j] = results[j][keep_inds]
return results
def debug(self, debugger, images, dets, output, scale=1):
dets = dets.detach().cpu().numpy()
img = images[0].detach().cpu().numpy().transpose(1, 2, 0)
img = ((img * self.std + self.mean) * 255).astype(np.uint8)
pred = debugger.gen_colormap(output['hm'][0].detach().cpu().numpy())
debugger.add_blend_img(img, pred, 'pred_hm')
debugger.add_ct_detection(
img, dets[0], show_box=self.opt.reg_bbox,
center_thresh=self.opt.vis_thresh, img_id='det_pred')
def show_results(self, debugger, image, results):
debugger.add_3d_detection(
image, results, self.this_calib,
center_thresh=self.opt.vis_thresh, img_id='add_pred')
debugger.add_bird_view(
results, center_thresh=self.opt.vis_thresh, img_id='bird_pred')
debugger.show_all_imgs(pause=self.pause)
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from .exdet import ExdetDetector
from .ddd import DddDetector
from .ctdet import CtdetDetector
from .multi_pose import MultiPoseDetector
detector_factory = {
'exdet': ExdetDetector,
'ddd': DddDetector,
'ctdet': CtdetDetector,
'multi_pose': MultiPoseDetector,
}
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import _init_paths
import os
import cv2
import time
import torch
from external.nms import soft_nms
from models.decode import exct_decode, agnex_ct_decode
from models.utils import flip_tensor
from utils.image import get_affine_transform, transform_preds
from utils.post_process import ctdet_post_process
from utils.debugger import Debugger
from progress.bar import Bar
from .base_detector import BaseDetector
import numpy as np
class ExdetDetector(BaseDetector):
def __init__(self, opt):
super(ExdetDetector, self).__init__(opt)
self.decode = agnex_ct_decode if opt.agnostic_ex else exct_decode
def process(self, images, return_time=False):
with torch.no_grad():
torch.cuda.synchronize()
output = self.model(images)[-1]
t_heat = output['hm_t'].sigmoid_()
l_heat = output['hm_l'].sigmoid_()
b_heat = output['hm_b'].sigmoid_()
r_heat = output['hm_r'].sigmoid_()
c_heat = output['hm_c'].sigmoid_()
torch.cuda.synchronize()
forward_time = time.time()
if self.opt.reg_offset:
dets = self.decode(t_heat, l_heat, b_heat, r_heat, c_heat,
output['reg_t'], output['reg_l'],
output['reg_b'], output['reg_r'],
K=self.opt.K,
scores_thresh=self.opt.scores_thresh,
center_thresh=self.opt.center_thresh,
aggr_weight=self.opt.aggr_weight)
else:
dets = self.decode(t_heat, l_heat, b_heat, r_heat, c_heat, K=self.opt.K,
scores_thresh=self.opt.scores_thresh,
center_thresh=self.opt.center_thresh,
aggr_weight=self.opt.aggr_weight)
if return_time:
return output, dets, forward_time
else:
return output, dets
def debug(self, debugger, images, dets, output, scale=1):
detection = dets.detach().cpu().numpy().copy()
detection[:, :, :4] *= self.opt.down_ratio
for i in range(1):
inp_height, inp_width = images.shape[2], images.shape[3]
pred_hm = np.zeros((inp_height, inp_width, 3), dtype=np.uint8)
img = images[i].detach().cpu().numpy().transpose(1, 2, 0)
img = ((img * self.std + self.mean) * 255).astype(np.uint8)
parts = ['t', 'l', 'b', 'r', 'c']
for p in parts:
tag = 'hm_{}'.format(p)
pred = debugger.gen_colormap(
output[tag][i].detach().cpu().numpy(), (inp_height, inp_width))
if p != 'c':
pred_hm = np.maximum(pred_hm, pred)
else:
debugger.add_blend_img(
img, pred, 'pred_{}_{:.1f}'.format(p, scale))
debugger.add_blend_img(img, pred_hm, 'pred_{:.1f}'.format(scale))
debugger.add_img(img, img_id='out_{:.1f}'.format(scale))
for k in range(len(detection[i])):
# print('detection', detection[i, k, 4], detection[i, k])
if detection[i, k, 4] > 0.01:
# print('detection', detection[i, k, 4], detection[i, k])
debugger.add_coco_bbox(detection[i, k, :4], detection[i, k, -1],
detection[i, k, 4],
img_id='out_{:.1f}'.format(scale))
def post_process(self, dets, meta, scale=1):
out_width, out_height = meta['out_width'], meta['out_height']
dets = dets.detach().cpu().numpy().reshape(2, -1, 14)
dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]]
dets = dets.reshape(1, -1, 14)
dets[0, :, 0:2] = transform_preds(
dets[0, :, 0:2], meta['c'], meta['s'], (out_width, out_height))
dets[0, :, 2:4] = transform_preds(
dets[0, :, 2:4], meta['c'], meta['s'], (out_width, out_height))
dets[:, :, 0:4] /= scale
return dets[0]
def merge_outputs(self, detections):
detections = np.concatenate(
[detection for detection in detections], axis=0).astype(np.float32)
classes = detections[..., -1]
keep_inds = (detections[:, 4] > 0)
detections = detections[keep_inds]
classes = classes[keep_inds]
results = {}
for j in range(self.num_classes):
keep_inds = (classes == j)
results[j + 1] = detections[keep_inds][:, 0:7].astype(np.float32)
soft_nms(results[j + 1], Nt=0.5, method=2)
results[j + 1] = results[j + 1][:, 0:5]
scores = np.hstack([
results[j][:, -1]
for j in range(1, self.num_classes + 1)
])
if len(scores) > self.max_per_image:
kth = len(scores) - self.max_per_image
thresh = np.partition(scores, kth)[kth]
for j in range(1, self.num_classes + 1):
keep_inds = (results[j][:, -1] >= thresh)
results[j] = results[j][keep_inds]
return results
def show_results(self, debugger, image, results):
debugger.add_img(image, img_id='exdet')
for j in range(1, self.num_classes + 1):
for bbox in results[j]:
if bbox[4] > self.opt.vis_thresh:
debugger.add_coco_bbox(
bbox[:4], j - 1, bbox[4], img_id='exdet')
debugger.show_all_imgs(pause=self.pause)
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import cv2
import time
import torch
from external.nms import soft_nms_39
from models.decode import multi_pose_decode, centerface_decode
from models.utils import flip_tensor, flip_lr_off, flip_lr
from utils.image import get_affine_transform
from utils.post_process import multi_pose_post_process
from utils.debugger import Debugger
from progress.bar import Bar
from .base_detector import BaseDetector
import numpy as np
class MultiPoseDetector(BaseDetector):
def __init__(self, opt):
super(MultiPoseDetector, self).__init__(opt)
self.flip_idx = opt.flip_idx
def process(self, images, return_time=False):
with torch.no_grad():
torch.cuda.synchronize()
output = self.model(images)[-1]
output['hm'] = output['hm']
# if self.opt.hm_hp and not self.opt.mse_loss:
# output['hm_hp'] = output['hm_hp'].sigmoid_()
reg = output['hm_offset'] if self.opt.reg_offset else None
# hm_hp = output['hm_hp'] if self.opt.hm_hp else None
# hp_offset = output['hp_offset'] if self.opt.reg_hp_offset else None
torch.cuda.synchronize()
forward_time = time.time()
if self.opt.flip_test:
output['hm'] = (output['hm'][0:1] +
flip_tensor(output['hm'][1:2])) / 2
output['wh'] = (output['wh'][0:1] +
flip_tensor(output['wh'][1:2])) / 2
output['hps'] = (output['hps'][0:1] +
flip_lr_off(output['hps'][1:2], self.flip_idx)) / 2
hm_hp = (hm_hp[0:1] + flip_lr(hm_hp[1:2], self.flip_idx)) / 2 \
if hm_hp is not None else None
reg = reg[0:1] if reg is not None else None
hp_offset = hp_offset[0:1] if hp_offset is not None else None
dets = centerface_decode(
output['hm'], output['wh'], output['landmarks'],
reg=reg, K=self.opt.K)
if return_time:
return output, dets, forward_time
else:
return output, dets
def post_process(self, dets, meta, scale=1):
dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])
dets = multi_pose_post_process(
dets.copy(), [meta['c']], [meta['s']],
meta['out_height'], meta['out_width'])
for j in range(1, self.num_classes + 1):
dets[0][j] = np.array(
dets[0][j], dtype=np.float32).reshape(-1, 15) # 关键点数+5=15
# import pdb; pdb.set_trace()
dets[0][j][:, :4] /= scale
dets[0][j][:, 5:] /= scale
return dets[0]
def merge_outputs(self, detections):
results = {}
results[1] = np.concatenate(
[detection[1] for detection in detections], axis=0).astype(np.float32)
if self.opt.nms or len(self.opt.test_scales) > 1:
soft_nms_39(results[1], Nt=0.5, method=2)
results[1] = results[1].tolist()
return results
def debug(self, debugger, images, dets, output, scale=1):
dets = dets.detach().cpu().numpy().copy()
dets[:, :, :4] *= self.opt.down_ratio
dets[:, :, 5:39] *= self.opt.down_ratio
img = images[0].detach().cpu().numpy().transpose(1, 2, 0)
img = np.clip(((
img * self.std + self.mean) * 255.), 0, 255).astype(np.uint8)
pred = debugger.gen_colormap(output['hm'][0].detach().cpu().numpy())
debugger.add_blend_img(img, pred, 'pred_hm')
if self.opt.hm_hp:
pred = debugger.gen_colormap_hp(
output['hm_hp'][0].detach().cpu().numpy())
debugger.add_blend_img(img, pred, 'pred_hmhp')
def show_results(self, debugger, image, results):
debugger.add_img(image, img_id='multi_pose')
for bbox in results[1]:
if bbox[4] > self.opt.vis_thresh:
debugger.add_coco_bbox(
bbox[:4], 0, bbox[4], img_id='multi_pose')
debugger.add_coco_hp(bbox[5:39], img_id='multi_pose')
debugger.show_all_imgs(pause=self.pause)
def return_results(self, debugger, image, results):
debugger.add_img(image, img_id='multi_pose')
for bbox in results[1]:
if bbox[4] > self.opt.vis_thresh:
debugger.add_coco_bbox(
bbox[:4], 0, bbox[4], img_id='multi_pose')
debugger.add_coco_hp(bbox[5:39], img_id='multi_pose')
return debugger.return_img(img_id='multi_pose')
all:
python setup.py build_ext --inplace
rm -rf build
This diff is collapsed.
# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------
# ----------------------------------------------------------
# Soft-NMS: Improving Object Detection With One Line of Code
# Copyright (c) University of Maryland, College Park
# Licensed under The MIT License [see LICENSE for details]
# Written by Navaneeth Bodla and Bharat Singh
# ----------------------------------------------------------
import numpy as np
cimport numpy as np
cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
return a if a >= b else b
cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
return a if a <= b else b
def nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
cdef int ndets = dets.shape[0]
cdef np.ndarray[np.int_t, ndim=1] suppressed = \
np.zeros((ndets), dtype=np.int)
# nominal indices
cdef int _i, _j
# sorted indices
cdef int i, j
# temp variables for box i's (the box currently under consideration)
cdef np.float32_t ix1, iy1, ix2, iy2, iarea
# variables for computing overlap with box j (lower scoring box)
cdef np.float32_t xx1, yy1, xx2, yy2
cdef np.float32_t w, h
cdef np.float32_t inter, ovr
keep = []
for _i in range(ndets):
i = order[_i]
if suppressed[i] == 1:
continue
keep.append(i)
ix1 = x1[i]
iy1 = y1[i]
ix2 = x2[i]
iy2 = y2[i]
iarea = areas[i]
for _j in range(_i + 1, ndets):
j = order[_j]
if suppressed[j] == 1:
continue
xx1 = max(ix1, x1[j])
yy1 = max(iy1, y1[j])
xx2 = min(ix2, x2[j])
yy2 = min(iy2, y2[j])
w = max(0.0, xx2 - xx1 + 1)
h = max(0.0, yy2 - yy1 + 1)
inter = w * h
ovr = inter / (iarea + areas[j] - inter)
if ovr >= thresh:
suppressed[j] = 1
return keep
def soft_nms(np.ndarray[float, ndim=2] boxes, float sigma=0.5, float Nt=0.3, float threshold=0.001, unsigned int method=0):
cdef unsigned int N = boxes.shape[0]
cdef float iw, ih, box_area
cdef float ua
cdef int pos = 0
cdef float maxscore = 0
cdef int maxpos = 0
cdef float x1,x2,y1,y2,tx1,tx2,ty1,ty2,ts,area,weight,ov
for i in range(N):
maxscore = boxes[i, 4]
maxpos = i
tx1 = boxes[i,0]
ty1 = boxes[i,1]
tx2 = boxes[i,2]
ty2 = boxes[i,3]
ts = boxes[i,4]
pos = i + 1
# get max box
while pos < N:
if maxscore < boxes[pos, 4]:
maxscore = boxes[pos, 4]
maxpos = pos
pos = pos + 1
# add max box as a detection
boxes[i,0] = boxes[maxpos,0]
boxes[i,1] = boxes[maxpos,1]
boxes[i,2] = boxes[maxpos,2]
boxes[i,3] = boxes[maxpos,3]
boxes[i,4] = boxes[maxpos,4]
# swap ith box with position of max box
boxes[maxpos,0] = tx1
boxes[maxpos,1] = ty1
boxes[maxpos,2] = tx2
boxes[maxpos,3] = ty2
boxes[maxpos,4] = ts
tx1 = boxes[i,0]
ty1 = boxes[i,1]
tx2 = boxes[i,2]
ty2 = boxes[i,3]
ts = boxes[i,4]
pos = i + 1
# NMS iterations, note that N changes if detection boxes fall below threshold
while pos < N:
x1 = boxes[pos, 0]
y1 = boxes[pos, 1]
x2 = boxes[pos, 2]
y2 = boxes[pos, 3]
s = boxes[pos, 4]
area = (x2 - x1 + 1) * (y2 - y1 + 1)
iw = (min(tx2, x2) - max(tx1, x1) + 1)
if iw > 0:
ih = (min(ty2, y2) - max(ty1, y1) + 1)
if ih > 0:
ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih)
ov = iw * ih / ua #iou between max box and detection box
if method == 1: # linear
if ov > Nt:
weight = 1 - ov
else:
weight = 1
elif method == 2: # gaussian
weight = np.exp(-(ov * ov)/sigma)
else: # original NMS
if ov > Nt:
weight = 0
else:
weight = 1
boxes[pos, 4] = weight*boxes[pos, 4]
# if box score falls below threshold, discard the box by swapping with last box
# update N
if boxes[pos, 4] < threshold:
boxes[pos,0] = boxes[N-1, 0]
boxes[pos,1] = boxes[N-1, 1]
boxes[pos,2] = boxes[N-1, 2]
boxes[pos,3] = boxes[N-1, 3]
boxes[pos,4] = boxes[N-1, 4]
N = N - 1
pos = pos - 1
pos = pos + 1
keep = [i for i in range(N)]
return keep
def soft_nms_39(np.ndarray[float, ndim=2] boxes, float sigma=0.5, float Nt=0.3, float threshold=0.001, unsigned int method=0):
cdef unsigned int N = boxes.shape[0]
cdef float iw, ih, box_area
cdef float ua
cdef int pos = 0
cdef float maxscore = 0
cdef int maxpos = 0
cdef float x1,x2,y1,y2,tx1,tx2,ty1,ty2,ts,area,weight,ov
cdef float tmp
for i in range(N):
maxscore = boxes[i, 4]
maxpos = i
tx1 = boxes[i,0]
ty1 = boxes[i,1]
tx2 = boxes[i,2]
ty2 = boxes[i,3]
ts = boxes[i,4]
pos = i + 1
# get max box
while pos < N:
if maxscore < boxes[pos, 4]:
maxscore = boxes[pos, 4]
maxpos = pos
pos = pos + 1
# add max box as a detection
boxes[i,0] = boxes[maxpos,0]
boxes[i,1] = boxes[maxpos,1]
boxes[i,2] = boxes[maxpos,2]
boxes[i,3] = boxes[maxpos,3]
boxes[i,4] = boxes[maxpos,4]
# swap ith box with position of max box
boxes[maxpos,0] = tx1
boxes[maxpos,1] = ty1
boxes[maxpos,2] = tx2
boxes[maxpos,3] = ty2
boxes[maxpos,4] = ts
for j in range(5, 39):
tmp = boxes[i, j]
boxes[i, j] = boxes[maxpos, j]
boxes[maxpos, j] = tmp
tx1 = boxes[i,0]
ty1 = boxes[i,1]
tx2 = boxes[i,2]
ty2 = boxes[i,3]
ts = boxes[i,4]
pos = i + 1
# NMS iterations, note that N changes if detection boxes fall below threshold
while pos < N:
x1 = boxes[pos, 0]
y1 = boxes[pos, 1]
x2 = boxes[pos, 2]
y2 = boxes[pos, 3]
s = boxes[pos, 4]
area = (x2 - x1 + 1) * (y2 - y1 + 1)
iw = (min(tx2, x2) - max(tx1, x1) + 1)
if iw > 0:
ih = (min(ty2, y2) - max(ty1, y1) + 1)
if ih > 0:
ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih)
ov = iw * ih / ua #iou between max box and detection box
if method == 1: # linear
if ov > Nt:
weight = 1 - ov
else:
weight = 1
elif method == 2: # gaussian
weight = np.exp(-(ov * ov)/sigma)
else: # original NMS
if ov > Nt:
weight = 0
else:
weight = 1
boxes[pos, 4] = weight*boxes[pos, 4]
# if box score falls below threshold, discard the box by swapping with last box
# update N
if boxes[pos, 4] < threshold:
boxes[pos,0] = boxes[N-1, 0]
boxes[pos,1] = boxes[N-1, 1]
boxes[pos,2] = boxes[N-1, 2]
boxes[pos,3] = boxes[N-1, 3]
boxes[pos,4] = boxes[N-1, 4]
for j in range(5, 39):
tmp = boxes[pos, j]
boxes[pos, j] = boxes[N - 1, j]
boxes[N - 1, j] = tmp
N = N - 1
pos = pos - 1
pos = pos + 1
keep = [i for i in range(N)]
return keep
def soft_nms_merge(np.ndarray[float, ndim=2] boxes, float sigma=0.5, float Nt=0.3, float threshold=0.001, unsigned int method=0, float weight_exp=6):
cdef unsigned int N = boxes.shape[0]
cdef float iw, ih, box_area
cdef float ua
cdef int pos = 0
cdef float maxscore = 0
cdef int maxpos = 0
cdef float x1,x2,y1,y2,tx1,tx2,ty1,ty2,ts,area,weight,ov
cdef float mx1,mx2,my1,my2,mts,mbs,mw
for i in range(N):
maxscore = boxes[i, 4]
maxpos = i
tx1 = boxes[i,0]
ty1 = boxes[i,1]
tx2 = boxes[i,2]
ty2 = boxes[i,3]
ts = boxes[i,4]
pos = i + 1
# get max box
while pos < N:
if maxscore < boxes[pos, 4]:
maxscore = boxes[pos, 4]
maxpos = pos
pos = pos + 1
# add max box as a detection
boxes[i,0] = boxes[maxpos,0]
boxes[i,1] = boxes[maxpos,1]
boxes[i,2] = boxes[maxpos,2]
boxes[i,3] = boxes[maxpos,3]
boxes[i,4] = boxes[maxpos,4]
mx1 = boxes[i, 0] * boxes[i, 5]
my1 = boxes[i, 1] * boxes[i, 5]
mx2 = boxes[i, 2] * boxes[i, 6]
my2 = boxes[i, 3] * boxes[i, 6]
mts = boxes[i, 5]
mbs = boxes[i, 6]
# swap ith box with position of max box
boxes[maxpos,0] = tx1
boxes[maxpos,1] = ty1
boxes[maxpos,2] = tx2
boxes[maxpos,3] = ty2
boxes[maxpos,4] = ts
tx1 = boxes[i,0]
ty1 = boxes[i,1]
tx2 = boxes[i,2]
ty2 = boxes[i,3]
ts = boxes[i,4]
pos = i + 1
# NMS iterations, note that N changes if detection boxes fall below threshold
while pos < N:
x1 = boxes[pos, 0]
y1 = boxes[pos, 1]
x2 = boxes[pos, 2]
y2 = boxes[pos, 3]
s = boxes[pos, 4]
area = (x2 - x1 + 1) * (y2 - y1 + 1)
iw = (min(tx2, x2) - max(tx1, x1) + 1)
if iw > 0:
ih = (min(ty2, y2) - max(ty1, y1) + 1)
if ih > 0:
ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih)
ov = iw * ih / ua #iou between max box and detection box
if method == 1: # linear
if ov > Nt:
weight = 1 - ov
else:
weight = 1
elif method == 2: # gaussian
weight = np.exp(-(ov * ov)/sigma)
else: # original NMS
if ov > Nt:
weight = 0
else:
weight = 1
mw = (1 - weight) ** weight_exp
mx1 = mx1 + boxes[pos, 0] * boxes[pos, 5] * mw
my1 = my1 + boxes[pos, 1] * boxes[pos, 5] * mw
mx2 = mx2 + boxes[pos, 2] * boxes[pos, 6] * mw
my2 = my2 + boxes[pos, 3] * boxes[pos, 6] * mw
mts = mts + boxes[pos, 5] * mw
mbs = mbs + boxes[pos, 6] * mw
boxes[pos, 4] = weight*boxes[pos, 4]
# if box score falls below threshold, discard the box by swapping with last box
# update N
if boxes[pos, 4] < threshold:
boxes[pos,0] = boxes[N-1, 0]
boxes[pos,1] = boxes[N-1, 1]
boxes[pos,2] = boxes[N-1, 2]
boxes[pos,3] = boxes[N-1, 3]
boxes[pos,4] = boxes[N-1, 4]
N = N - 1
pos = pos - 1
pos = pos + 1
boxes[i, 0] = mx1 / mts
boxes[i, 1] = my1 / mts
boxes[i, 2] = mx2 / mbs
boxes[i, 3] = my2 / mbs
keep = [i for i in range(N)]
return keep
import numpy
from distutils.core import setup
from distutils.extension import Extension
from Cython.Build import cythonize
extensions = [
Extension(
"nms",
["nms.pyx"],
extra_compile_args=["-Wno-cpp", "-Wno-unused-function"]
)
]
setup(
name="coco",
ext_modules=cythonize(extensions),
include_dirs=[numpy.get_include()]
)
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
# Code referenced from https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514
import os
import time
import sys
import torch
USE_TENSORBOARD = True
try:
import tensorboardX
print('Using tensorboardX')
except:
USE_TENSORBOARD = False
class Logger(object):
def __init__(self, opt):
"""Create a summary writer logging to log_dir."""
if not os.path.exists(opt.save_dir):
os.makedirs(opt.save_dir)
if not os.path.exists(opt.debug_dir):
os.makedirs(opt.debug_dir)
time_str = time.strftime('%Y-%m-%d-%H-%M')
args = dict((name, getattr(opt, name)) for name in dir(opt)
if not name.startswith('_'))
file_name = os.path.join(opt.save_dir, 'opt.txt')
with open(file_name, 'wt') as opt_file:
opt_file.write('==> torch version: {}\n'.format(torch.__version__))
opt_file.write('==> cudnn version: {}\n'.format(
torch.backends.cudnn.version()))
opt_file.write('==> Cmd:\n')
opt_file.write(str(sys.argv))
opt_file.write('\n==> Opt:\n')
for k, v in sorted(args.items()):
opt_file.write(' %s: %s\n' % (str(k), str(v)))
log_dir = opt.save_dir + '/logs_{}'.format(time_str)
if USE_TENSORBOARD:
self.writer = tensorboardX.SummaryWriter(logdir=log_dir)
else:
if not os.path.exists(os.path.dirname(log_dir)):
os.mkdir(os.path.dirname(log_dir))
if not os.path.exists(log_dir):
os.mkdir(log_dir)
self.log = open(log_dir + '/log.txt', 'w')
try:
os.system('cp {}/opt.txt {}/'.format(opt.save_dir, log_dir))
except:
pass
self.start_line = True
def write(self, txt):
if self.start_line:
time_str = time.strftime('%Y-%m-%d-%H-%M')
self.log.write('{}: {}'.format(time_str, txt))
else:
self.log.write(txt)
self.start_line = False
if '\n' in txt:
self.start_line = True
self.log.flush()
def close(self):
self.log.close()
def scalar_summary(self, tag, value, step):
"""Log a scalar variable."""
if USE_TENSORBOARD:
self.writer.add_scalar(tag, value, step)
from torch import nn
import torch.utils.model_zoo as model_zoo
from collections import OrderedDict
import math
__all__ = ['MobileNetV2']
model_urls = {
'mobilenet_v2': 'https://download.pytorch.org/models/mobilenet_v2-b0353104.pth',
}
def _make_divisible(v, divisor, min_value=None):
"""
This function is taken from the original tf repo.
It ensures that all layers have a channel number that is divisible by 8
It can be seen here:
https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
:param v:
:param divisor:
:param min_value:
:return:
"""
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
# Make sure that round down does not go down by more than 10%.
if new_v < 0.9 * v:
new_v += divisor
return new_v
class ConvBNReLU(nn.Sequential):
def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
padding = (kernel_size - 1) // 2
super(ConvBNReLU, self).__init__(
nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False),
nn.BatchNorm2d(out_planes),
nn.ReLU6(inplace=True)
)
class InvertedResidual(nn.Module):
def __init__(self, inp, oup, stride, expand_ratio):
super(InvertedResidual, self).__init__()
self.stride = stride
assert stride in [1, 2]
hidden_dim = int(round(inp * expand_ratio))
self.use_res_connect = self.stride == 1 and inp == oup
layers = []
if expand_ratio != 1:
# pw
layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1))
layers.extend([
# dw
ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim),
# pw-linear
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup),
])
self.conv = nn.Sequential(*layers)
def forward(self, x):
if self.use_res_connect:
return x + self.conv(x)
else:
return self.conv(x)
class MobileNetV2(nn.Module):
def __init__(self,width_mult=1.0,round_nearest=8,):
super(MobileNetV2, self).__init__()
block = InvertedResidual
input_channel = 32
inverted_residual_setting = [
# t, c, n, s
[1, 16, 1, 1], # 0
[6, 24, 2, 2], # 1
[6, 32, 3, 2], # 2
[6, 64, 4, 2], # 3
[6, 96, 3, 1], # 4
[6, 160, 3, 2],# 5
[6, 320, 1, 1],# 6
]
self.feat_id = [1,2,4,6]
self.feat_channel = []
# only check the first element, assuming user knows t,c,n,s are required
if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4:
raise ValueError("inverted_residual_setting should be non-empty "
"or a 4-element list, got {}".format(inverted_residual_setting))
# building first layer
input_channel = _make_divisible(input_channel * width_mult, round_nearest)
features = [ConvBNReLU(3, input_channel, stride=2)]
# building inverted residual blocks
for id,(t, c, n, s) in enumerate(inverted_residual_setting):
output_channel = _make_divisible(c * width_mult, round_nearest)
for i in range(n):
stride = s if i == 0 else 1
features.append(block(input_channel, output_channel, stride, expand_ratio=t))
input_channel = output_channel
if id in self.feat_id :
self.__setattr__("feature_%d"%id,nn.Sequential(*features))
self.feat_channel.append(output_channel)
features = []
# weight initialization
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out')
if m.bias is not None:
nn.init.zeros_(m.bias)
elif isinstance(m, nn.BatchNorm2d):
nn.init.ones_(m.weight)
nn.init.zeros_(m.bias)
def forward(self, x):
y = []
for id in self.feat_id:
x = self.__getattr__("feature_%d"%id)(x)
y.append(x)
return y
def load_model(model,state_dict):
new_model=model.state_dict()
new_keys = list(new_model.keys())
old_keys = list(state_dict.keys())
restore_dict = OrderedDict()
for id in range(len(new_keys)):
restore_dict[new_keys[id]] = state_dict[old_keys[id]]
model.load_state_dict(restore_dict)
def dict2list(func):
def wrap(*args, **kwargs):
self = args[0]
x = args[1]
ret_list = []
ret = func(self, x)
for k, v in ret[0].items():
ret_list.append(v)
return ret_list
return wrap
def fill_up_weights(up):
w = up.weight.data
f = math.ceil(w.size(2) / 2)
c = (2 * f - 1 - f % 2) / (2. * f)
for i in range(w.size(2)):
for j in range(w.size(3)):
w[0, 0, i, j] = \
(1 - math.fabs(i / f - c)) * (1 - math.fabs(j / f - c))
for c in range(1, w.size(0)):
w[c, 0, :, :] = w[0, 0, :, :]
def fill_fc_weights(layers):
for m in layers.modules():
if isinstance(m, nn.Conv2d):
nn.init.normal_(m.weight, std=0.001)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
class IDAUp(nn.Module):
def __init__(self, out_dim, channel):
super(IDAUp, self).__init__()
self.out_dim = out_dim
self.up = nn.Sequential(
nn.ConvTranspose2d(
out_dim, out_dim, kernel_size=2, stride=2, padding=0,
output_padding=0, groups=out_dim, bias=False),
nn.BatchNorm2d(out_dim,eps=0.001,momentum=0.1),
nn.ReLU())
self.conv = nn.Sequential(
nn.Conv2d(channel, out_dim,
kernel_size=1, stride=1, bias=False),
nn.BatchNorm2d(out_dim,eps=0.001,momentum=0.1),
nn.ReLU(inplace=True))
def forward(self, layers):
layers = list(layers)
x = self.up(layers[0])
y = self.conv(layers[1])
out = x + y
return out
class MobileNetUp(nn.Module):
def __init__(self, channels, out_dim = 24):
super(MobileNetUp, self).__init__()
channels = channels[::-1]
self.conv = nn.Sequential(
nn.Conv2d(channels[0], out_dim,
kernel_size=1, stride=1, bias=False),
nn.BatchNorm2d(out_dim,eps=0.001,momentum=0.1),
nn.ReLU(inplace=True))
self.conv_last = nn.Sequential(
nn.Conv2d(out_dim,out_dim,
kernel_size=3, stride=1, padding=1 ,bias=False),
nn.BatchNorm2d(out_dim,eps=1e-5,momentum=0.01),
nn.ReLU(inplace=True))
for i,channel in enumerate(channels[1:]):
setattr(self,'up_%d'%(i),IDAUp(out_dim,channel))
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out')
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
elif isinstance(m,nn.ConvTranspose2d):
fill_up_weights(m)
def forward(self, layers):
layers = list(layers)
assert len(layers) > 1
x = self.conv(layers[-1])
for i in range(0,len(layers)-1):
up = getattr(self, 'up_{}'.format(i))
x = up([x,layers[len(layers)-2-i]])
x = self.conv_last(x)
return x
class MobileNetSeg(nn.Module):
def __init__(self, base_name,heads,head_conv=24, pretrained = True):
super(MobileNetSeg, self).__init__()
self.heads = heads
self.base = globals()[base_name](
pretrained=pretrained)
channels = self.base.feat_channel
self.dla_up = MobileNetUp(channels, out_dim=head_conv)
for head in self.heads:
classes = self.heads[head]
if head == 'hm':
fc = nn.Sequential(
nn.Conv2d(head_conv, classes,
kernel_size=1, stride=1,
padding=0, bias=True),
nn.Sigmoid()
)
else:
fc = nn.Conv2d(head_conv, classes,
kernel_size=1, stride=1,
padding=0, bias=True)
# if 'hm' in head:
# fc.bias.data.fill_(-2.19)
# else:
# nn.init.normal_(fc.weight, std=0.001)
# nn.init.constant_(fc.bias, 0)
self.__setattr__(head, fc)
# @dict2list # 转onnx的时候需要将输出由dict转成list模式
def forward(self, x):
x = self.base(x)
x = self.dla_up(x)
ret = {}
for head in self.heads:
ret[head] = self.__getattr__(head)(x)
return [ret]
def mobilenetv2_10(pretrained=True, **kwargs):
model = MobileNetV2(width_mult=1.0)
if pretrained:
state_dict = model_zoo.load_url(model_urls['mobilenet_v2'],
progress=True)
load_model(model,state_dict)
return model
def mobilenetv2_5(pretrained=False, **kwargs):
model = MobileNetV2(width_mult=0.5)
if pretrained:
print('This version does not have pretrain weights.')
return model
# num_layers : [10 , 5]
def get_mobile_net(num_layers, heads, head_conv=24):
model = MobileNetSeg('mobilenetv2_{}'.format(num_layers), heads,
pretrained=True,
head_conv=head_conv)
return model
if __name__ == '__main__':
import torch
input = torch.zeros([1,3,416,416])
model = get_mobile_net(10,{'hm':1, 'hm_offset':2, 'wh':2, 'landmarks':10},head_conv=24) # hm reference for the classes of objects//这个头文件只能做矩形框检测
res = model(input)
print(res.shape)
This diff is collapsed.
import math
from collections import OrderedDict
import torch
import torch.nn as nn
import torch.nn.functional as F
class BasicBlock(nn.Module):
def __init__(self, inplanes, planes):
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes[0], kernel_size=1,
stride=1, padding=0, bias=False)
self.bn1 = nn.BatchNorm2d(planes[0])
self.relu1 = nn.LeakyReLU(0.1)
self.conv2 = nn.Conv2d(planes[0], planes[1], kernel_size=3,
stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes[1])
self.relu2 = nn.LeakyReLU(0.1)
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu1(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu2(out)
out += residual
return out
class DarkNet(nn.Module):
def __init__(self, layers):
super(DarkNet, self).__init__()
self.inplanes = 32
self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=3, stride=1, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(self.inplanes)
self.relu1 = nn.LeakyReLU(0.1)
self.layer1 = self._make_layer([32, 64], layers[0])
self.layer2 = self._make_layer([64, 128], layers[1])
self.layer3 = self._make_layer([128, 256], layers[2])
#self.layer4 = self._make_layer([256, 512], layers[3])
#self.layer5 = self._make_layer([512, 1024], layers[4])
self.layers_out_filters = [64, 128, 256]
for m in self.modules():
if isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
def _make_layer(self, planes, blocks):
layers = []
# downsample
layers.append(("ds_conv", nn.Conv2d(self.inplanes, planes[1], kernel_size=3,
stride=2, padding=1, bias=False)))
layers.append(("ds_bn", nn.BatchNorm2d(planes[1])))
layers.append(("ds_relu", nn.LeakyReLU(0.1)))
# blocks
self.inplanes = planes[1]
for i in range(0, blocks):
layers.append(("residual_{}".format(i), BasicBlock(self.inplanes, planes)))
return nn.Sequential(OrderedDict(layers))
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu1(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = F.interpolate(x, size=(128, 128),
mode="bilinear", align_corners=True)
return x
def darknet21(cfg,is_train=True, **kwargs):
model = DarkNet([1, 1, 2, 2, 1])
if is_train and cfg.BACKBONE.INIT_WEIGHTS:
if isinstance(cfg.BACKBONE.PRETRAINED, str):
model.load_state_dict(torch.load(cfg.BACKBONE.PRETRAINED))
else:
raise Exception("darknet request a pretrained path. got [{}]".format(cfg.BACKBONE.PRETRAINED))
return model
def darknet53(num_layers, cfg):
model = DarkNet([1, 2, 8])
#if is_train and cfg.BACKBONE.INIT_WEIGHTS:
# if isinstance(cfg.BACKBONE.PRETRAINED, str):
# model.load_state_dict(torch.load(cfg.BACKBONE.PRETRAINED))
# else:
# raise Exception("darknet request a pretrained path. got [{}]".format(cfg.BACKBONE.PRETRAINED))
return model
This diff is collapsed.
from .efficientdet import EfficientDet
def get_efficientdet(num_layers, cfg):
model = EfficientDet(intermediate_channels=cfg.MODEL.INTERMEDIATE_CHANNEL)
return model
This diff is collapsed.
This diff is collapsed.
import torch
import torch.nn as nn
import math
from .efficientnet import EfficientNet
from .bifpn import BIFPN
from .retinahead import RetinaHead
from torchvision.ops import nms
import torch.nn.functional as F
MODEL_MAP = {
'efficientdet-d0': 'efficientnet-b0',
'efficientdet-d1': 'efficientnet-b1',
'efficientdet-d2': 'efficientnet-b2',
'efficientdet-d3': 'efficientnet-b3',
'efficientdet-d4': 'efficientnet-b4',
'efficientdet-d5': 'efficientnet-b5',
'efficientdet-d6': 'efficientnet-b6',
'efficientdet-d7': 'efficientnet-b6',
}
class EfficientDet(nn.Module):
def __init__(self,
intermediate_channels,
network = 'efficientdet-d1',
D_bifpn=3,
W_bifpn=32,
D_class=3,
scale_ratios = [0.5, 1, 2, 4, 8,16,32],
):
super(EfficientDet, self).__init__()
self.backbone = EfficientNet.from_pretrained(MODEL_MAP[network])
self.neck = BIFPN(in_channels=self.backbone.get_list_features(),
out_channels=W_bifpn,
stack=D_bifpn,
num_outs=7)
self.bbox_head = RetinaHead(num_classes = intermediate_channels,
in_channels = W_bifpn)
self.scale_ratios = scale_ratios
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
self.freeze_bn()
def forward(self, inputs):
x = self.extract_feat(inputs)
outs = self.bbox_head(x)
return outs[0][1]
def freeze_bn(self):
'''Freeze BatchNorm layers.'''
for layer in self.modules():
if isinstance(layer, nn.BatchNorm2d):
layer.eval()
def extract_feat(self, img):
"""
Directly extract features from the backbone+neck
"""
x = self.backbone(img)
x = self.neck(x)
return x
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment