First Commit.

b952e97b · chenych · b952e97b · b952e97b · b952e97b · b952e97b
Commit b952e97b authored Nov 03, 2023 by chenych
20 changed files
--- a/src/lib/detectors/ddd.py
+++ b/src/lib/detectors/ddd.py
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import cv2
+import time
+import torch
+
+from external.nms import soft_nms
+from models.decode import ddd_decode
+from models.utils import flip_tensor
+from utils.image import get_affine_transform
+from utils.post_process import ddd_post_process
+from utils.debugger import Debugger
+from utils.ddd_utils import compute_box_3d, project_to_image, alpha2rot_y
+from utils.ddd_utils import draw_box_3d, unproject_2d_to_3d
+from progress.bar import Bar
+from .base_detector import BaseDetector
+
+import numpy as np
+
+
+class DddDetector(BaseDetector):
+    def __init__(self, opt):
+        super(DddDetector, self).__init__(opt)
+        self.calib = np.array([[707.0493, 0, 604.0814, 45.75831],
+                               [0, 707.0493, 180.5066, -0.3454157],
+                               [0, 0, 1., 0.004981016]], dtype=np.float32)
+
+    def pre_process(self, image, scale, calib=None):
+        height, width = image.shape[0:2]
+
+        inp_height, inp_width = self.opt.input_h, self.opt.input_w
+        c = np.array([width / 2, height / 2], dtype=np.float32)
+        if self.opt.keep_res:
+            s = np.array([inp_width, inp_height], dtype=np.int32)
+        else:
+            s = np.array([width, height], dtype=np.int32)
+
+        trans_input = get_affine_transform(c, s, 0, [inp_width, inp_height])
+        resized_image = image  # cv2.resize(image, (width, height))
+        inp_image = cv2.warpAffine(
+            resized_image, trans_input, (inp_width, inp_height),
+            flags=cv2.INTER_LINEAR)
+        inp_image = (inp_image.astype(np.float32) / 255.)
+        inp_image = (inp_image - self.mean) / self.std
+        images = inp_image.transpose(2, 0, 1)[np.newaxis, ...]
+        calib = np.array(calib, dtype=np.float32) if calib is not None \
+            else self.calib
+        images = torch.from_numpy(images)
+        meta = {'c': c, 's': s,
+                'out_height': inp_height // self.opt.down_ratio,
+                'out_width': inp_width // self.opt.down_ratio,
+                'calib': calib}
+        return images, meta
+
+    def process(self, images, return_time=False):
+        with torch.no_grad():
+            torch.cuda.synchronize()
+            output = self.model(images)[-1]
+            output['hm'] = output['hm'].sigmoid_()
+            output['dep'] = 1. / (output['dep'].sigmoid() + 1e-6) - 1.
+            wh = output['wh'] if self.opt.reg_bbox else None
+            reg = output['reg'] if self.opt.reg_offset else None
+            torch.cuda.synchronize()
+            forward_time = time.time()
+
+            dets = ddd_decode(output['hm'], output['rot'], output['dep'],
+                              output['dim'], wh=wh, reg=reg, K=self.opt.K)
+        if return_time:
+            return output, dets, forward_time
+        else:
+            return output, dets
+
+    def post_process(self, dets, meta, scale=1):
+        dets = dets.detach().cpu().numpy()
+        detections = ddd_post_process(
+            dets.copy(), [meta['c']], [meta['s']], [meta['calib']], self.opt)
+        self.this_calib = meta['calib']
+        return detections[0]
+
+    def merge_outputs(self, detections):
+        results = detections[0]
+        for j in range(1, self.num_classes + 1):
+            if len(results[j] > 0):
+                keep_inds = (results[j][:, -1] > self.opt.peak_thresh)
+                results[j] = results[j][keep_inds]
+        return results
+
+    def debug(self, debugger, images, dets, output, scale=1):
+        dets = dets.detach().cpu().numpy()
+        img = images[0].detach().cpu().numpy().transpose(1, 2, 0)
+        img = ((img * self.std + self.mean) * 255).astype(np.uint8)
+        pred = debugger.gen_colormap(output['hm'][0].detach().cpu().numpy())
+        debugger.add_blend_img(img, pred, 'pred_hm')
+        debugger.add_ct_detection(
+            img, dets[0], show_box=self.opt.reg_bbox,
+            center_thresh=self.opt.vis_thresh, img_id='det_pred')
+
+    def show_results(self, debugger, image, results):
+        debugger.add_3d_detection(
+            image, results, self.this_calib,
+            center_thresh=self.opt.vis_thresh, img_id='add_pred')
+        debugger.add_bird_view(
+            results, center_thresh=self.opt.vis_thresh, img_id='bird_pred')
+        debugger.show_all_imgs(pause=self.pause)
--- a/src/lib/detectors/detector_factory.py
+++ b/src/lib/detectors/detector_factory.py
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from .exdet import ExdetDetector
+from .ddd import DddDetector
+from .ctdet import CtdetDetector
+from .multi_pose import MultiPoseDetector
+
+detector_factory = {
+    'exdet': ExdetDetector,
+    'ddd': DddDetector,
+    'ctdet': CtdetDetector,
+    'multi_pose': MultiPoseDetector,
+}
--- a/src/lib/detectors/exdet.py
+++ b/src/lib/detectors/exdet.py
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import _init_paths
+
+import os
+import cv2
+import time
+import torch
+
+from external.nms import soft_nms
+from models.decode import exct_decode, agnex_ct_decode
+from models.utils import flip_tensor
+from utils.image import get_affine_transform, transform_preds
+from utils.post_process import ctdet_post_process
+from utils.debugger import Debugger
+from progress.bar import Bar
+from .base_detector import BaseDetector
+
+import numpy as np
+
+
+class ExdetDetector(BaseDetector):
+    def __init__(self, opt):
+        super(ExdetDetector, self).__init__(opt)
+        self.decode = agnex_ct_decode if opt.agnostic_ex else exct_decode
+
+    def process(self, images, return_time=False):
+        with torch.no_grad():
+            torch.cuda.synchronize()
+            output = self.model(images)[-1]
+            t_heat = output['hm_t'].sigmoid_()
+            l_heat = output['hm_l'].sigmoid_()
+            b_heat = output['hm_b'].sigmoid_()
+            r_heat = output['hm_r'].sigmoid_()
+            c_heat = output['hm_c'].sigmoid_()
+            torch.cuda.synchronize()
+            forward_time = time.time()
+            if self.opt.reg_offset:
+                dets = self.decode(t_heat, l_heat, b_heat, r_heat, c_heat,
+                                   output['reg_t'], output['reg_l'],
+                                   output['reg_b'], output['reg_r'],
+                                   K=self.opt.K,
+                                   scores_thresh=self.opt.scores_thresh,
+                                   center_thresh=self.opt.center_thresh,
+                                   aggr_weight=self.opt.aggr_weight)
+            else:
+                dets = self.decode(t_heat, l_heat, b_heat, r_heat, c_heat, K=self.opt.K,
+                                   scores_thresh=self.opt.scores_thresh,
+                                   center_thresh=self.opt.center_thresh,
+                                   aggr_weight=self.opt.aggr_weight)
+        if return_time:
+            return output, dets, forward_time
+        else:
+            return output, dets
+
+    def debug(self, debugger, images, dets, output, scale=1):
+        detection = dets.detach().cpu().numpy().copy()
+        detection[:, :, :4] *= self.opt.down_ratio
+        for i in range(1):
+            inp_height, inp_width = images.shape[2], images.shape[3]
+            pred_hm = np.zeros((inp_height, inp_width, 3), dtype=np.uint8)
+            img = images[i].detach().cpu().numpy().transpose(1, 2, 0)
+            img = ((img * self.std + self.mean) * 255).astype(np.uint8)
+            parts = ['t', 'l', 'b', 'r', 'c']
+            for p in parts:
+                tag = 'hm_{}'.format(p)
+                pred = debugger.gen_colormap(
+                    output[tag][i].detach().cpu().numpy(), (inp_height, inp_width))
+                if p != 'c':
+                    pred_hm = np.maximum(pred_hm, pred)
+                else:
+                    debugger.add_blend_img(
+                        img, pred, 'pred_{}_{:.1f}'.format(p, scale))
+            debugger.add_blend_img(img, pred_hm, 'pred_{:.1f}'.format(scale))
+            debugger.add_img(img, img_id='out_{:.1f}'.format(scale))
+            for k in range(len(detection[i])):
+                # print('detection', detection[i, k, 4], detection[i, k])
+                if detection[i, k, 4] > 0.01:
+                    # print('detection', detection[i, k, 4], detection[i, k])
+                    debugger.add_coco_bbox(detection[i, k, :4], detection[i, k, -1],
+                                           detection[i, k, 4],
+                                           img_id='out_{:.1f}'.format(scale))
+
+    def post_process(self, dets, meta, scale=1):
+        out_width, out_height = meta['out_width'], meta['out_height']
+        dets = dets.detach().cpu().numpy().reshape(2, -1, 14)
+        dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]]
+        dets = dets.reshape(1, -1, 14)
+        dets[0, :, 0:2] = transform_preds(
+            dets[0, :, 0:2], meta['c'], meta['s'], (out_width, out_height))
+        dets[0, :, 2:4] = transform_preds(
+            dets[0, :, 2:4], meta['c'], meta['s'], (out_width, out_height))
+        dets[:, :, 0:4] /= scale
+        return dets[0]
+
+    def merge_outputs(self, detections):
+        detections = np.concatenate(
+            [detection for detection in detections], axis=0).astype(np.float32)
+        classes = detections[..., -1]
+        keep_inds = (detections[:, 4] > 0)
+        detections = detections[keep_inds]
+        classes = classes[keep_inds]
+
+        results = {}
+        for j in range(self.num_classes):
+            keep_inds = (classes == j)
+            results[j + 1] = detections[keep_inds][:, 0:7].astype(np.float32)
+            soft_nms(results[j + 1], Nt=0.5, method=2)
+            results[j + 1] = results[j + 1][:, 0:5]
+
+        scores = np.hstack([
+            results[j][:, -1]
+            for j in range(1, self.num_classes + 1)
+        ])
+        if len(scores) > self.max_per_image:
+            kth = len(scores) - self.max_per_image
+            thresh = np.partition(scores, kth)[kth]
+            for j in range(1, self.num_classes + 1):
+                keep_inds = (results[j][:, -1] >= thresh)
+                results[j] = results[j][keep_inds]
+        return results
+
+    def show_results(self, debugger, image, results):
+        debugger.add_img(image, img_id='exdet')
+        for j in range(1, self.num_classes + 1):
+            for bbox in results[j]:
+                if bbox[4] > self.opt.vis_thresh:
+                    debugger.add_coco_bbox(
+                        bbox[:4], j - 1, bbox[4], img_id='exdet')
+        debugger.show_all_imgs(pause=self.pause)
--- a/src/lib/detectors/multi_pose.py
+++ b/src/lib/detectors/multi_pose.py
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import cv2
+import time
+import torch
+
+from external.nms import soft_nms_39
+from models.decode import multi_pose_decode, centerface_decode
+from models.utils import flip_tensor, flip_lr_off, flip_lr
+from utils.image import get_affine_transform
+from utils.post_process import multi_pose_post_process
+from utils.debugger import Debugger
+from progress.bar import Bar
+from .base_detector import BaseDetector
+import numpy as np
+
+
+class MultiPoseDetector(BaseDetector):
+    def __init__(self, opt):
+        super(MultiPoseDetector, self).__init__(opt)
+        self.flip_idx = opt.flip_idx
+
+    def process(self, images, return_time=False):
+        with torch.no_grad():
+            torch.cuda.synchronize()
+            output = self.model(images)[-1]
+            output['hm'] = output['hm']
+            # if self.opt.hm_hp and not self.opt.mse_loss:
+            #   output['hm_hp'] = output['hm_hp'].sigmoid_()
+
+            reg = output['hm_offset'] if self.opt.reg_offset else None
+            # hm_hp = output['hm_hp'] if self.opt.hm_hp else None
+            # hp_offset = output['hp_offset'] if self.opt.reg_hp_offset else None
+            torch.cuda.synchronize()
+            forward_time = time.time()
+
+            if self.opt.flip_test:
+                output['hm'] = (output['hm'][0:1] +
+                                flip_tensor(output['hm'][1:2])) / 2
+                output['wh'] = (output['wh'][0:1] +
+                                flip_tensor(output['wh'][1:2])) / 2
+                output['hps'] = (output['hps'][0:1] +
+                                 flip_lr_off(output['hps'][1:2], self.flip_idx)) / 2
+                hm_hp = (hm_hp[0:1] + flip_lr(hm_hp[1:2], self.flip_idx)) / 2 \
+                    if hm_hp is not None else None
+                reg = reg[0:1] if reg is not None else None
+                hp_offset = hp_offset[0:1] if hp_offset is not None else None
+
+            dets = centerface_decode(
+                output['hm'], output['wh'], output['landmarks'],
+                reg=reg, K=self.opt.K)
+
+        if return_time:
+            return output, dets, forward_time
+        else:
+            return output, dets
+
+    def post_process(self, dets, meta, scale=1):
+        dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])
+        dets = multi_pose_post_process(
+            dets.copy(), [meta['c']], [meta['s']],
+            meta['out_height'], meta['out_width'])
+        for j in range(1, self.num_classes + 1):
+            dets[0][j] = np.array(
+                dets[0][j], dtype=np.float32).reshape(-1, 15)             # 关键点数+5=15
+            # import pdb; pdb.set_trace()
+            dets[0][j][:, :4] /= scale
+            dets[0][j][:, 5:] /= scale
+        return dets[0]
+
+    def merge_outputs(self, detections):
+        results = {}
+        results[1] = np.concatenate(
+            [detection[1] for detection in detections], axis=0).astype(np.float32)
+        if self.opt.nms or len(self.opt.test_scales) > 1:
+            soft_nms_39(results[1], Nt=0.5, method=2)
+        results[1] = results[1].tolist()
+        return results
+
+    def debug(self, debugger, images, dets, output, scale=1):
+        dets = dets.detach().cpu().numpy().copy()
+        dets[:, :, :4] *= self.opt.down_ratio
+        dets[:, :, 5:39] *= self.opt.down_ratio
+        img = images[0].detach().cpu().numpy().transpose(1, 2, 0)
+        img = np.clip(((
+            img * self.std + self.mean) * 255.), 0, 255).astype(np.uint8)
+        pred = debugger.gen_colormap(output['hm'][0].detach().cpu().numpy())
+        debugger.add_blend_img(img, pred, 'pred_hm')
+        if self.opt.hm_hp:
+            pred = debugger.gen_colormap_hp(
+                output['hm_hp'][0].detach().cpu().numpy())
+            debugger.add_blend_img(img, pred, 'pred_hmhp')
+
+    def show_results(self, debugger, image, results):
+        debugger.add_img(image, img_id='multi_pose')
+        for bbox in results[1]:
+            if bbox[4] > self.opt.vis_thresh:
+                debugger.add_coco_bbox(
+                    bbox[:4], 0, bbox[4], img_id='multi_pose')
+                debugger.add_coco_hp(bbox[5:39], img_id='multi_pose')
+        debugger.show_all_imgs(pause=self.pause)
+
+    def return_results(self, debugger, image, results):
+        debugger.add_img(image, img_id='multi_pose')
+        for bbox in results[1]:
+            if bbox[4] > self.opt.vis_thresh:
+                debugger.add_coco_bbox(
+                    bbox[:4], 0, bbox[4], img_id='multi_pose')
+                debugger.add_coco_hp(bbox[5:39], img_id='multi_pose')
+        return debugger.return_img(img_id='multi_pose')
--- a/src/lib/external/Makefile
+++ b/src/lib/external/Makefile
+all:
+	python setup.py build_ext --inplace
+	rm -rf build
--- a/src/lib/external/__init__.py
+++ b/src/lib/external/__init__.py
--- a/src/lib/external/nms.c
+++ b/src/lib/external/nms.c
--- a/src/lib/external/nms.pyx
+++ b/src/lib/external/nms.pyx
+# --------------------------------------------------------
+# Fast R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick
+# --------------------------------------------------------
+
+# ----------------------------------------------------------
+# Soft-NMS: Improving Object Detection With One Line of Code
+# Copyright (c) University of Maryland, College Park
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Navaneeth Bodla and Bharat Singh
+# ----------------------------------------------------------
+
+import numpy as np
+cimport numpy as np
+
+cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
+    return a if a >= b else b
+
+cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
+    return a if a <= b else b
+
+def nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
+    cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
+    cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
+    cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
+    cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
+    cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
+
+    cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
+    cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
+
+    cdef int ndets = dets.shape[0]
+    cdef np.ndarray[np.int_t, ndim=1] suppressed = \
+            np.zeros((ndets), dtype=np.int)
+
+    # nominal indices
+    cdef int _i, _j
+    # sorted indices
+    cdef int i, j
+    # temp variables for box i's (the box currently under consideration)
+    cdef np.float32_t ix1, iy1, ix2, iy2, iarea
+    # variables for computing overlap with box j (lower scoring box)
+    cdef np.float32_t xx1, yy1, xx2, yy2
+    cdef np.float32_t w, h
+    cdef np.float32_t inter, ovr
+
+    keep = []
+    for _i in range(ndets):
+        i = order[_i]
+        if suppressed[i] == 1:
+            continue
+        keep.append(i)
+        ix1 = x1[i]
+        iy1 = y1[i]
+        ix2 = x2[i]
+        iy2 = y2[i]
+        iarea = areas[i]
+        for _j in range(_i + 1, ndets):
+            j = order[_j]
+            if suppressed[j] == 1:
+                continue
+            xx1 = max(ix1, x1[j])
+            yy1 = max(iy1, y1[j])
+            xx2 = min(ix2, x2[j])
+            yy2 = min(iy2, y2[j])
+            w = max(0.0, xx2 - xx1 + 1)
+            h = max(0.0, yy2 - yy1 + 1)
+            inter = w * h
+            ovr = inter / (iarea + areas[j] - inter)
+            if ovr >= thresh:
+                suppressed[j] = 1
+
+    return keep
+
+def soft_nms(np.ndarray[float, ndim=2] boxes, float sigma=0.5, float Nt=0.3, float threshold=0.001, unsigned int method=0):
+    cdef unsigned int N = boxes.shape[0]
+    cdef float iw, ih, box_area
+    cdef float ua
+    cdef int pos = 0
+    cdef float maxscore = 0
+    cdef int maxpos = 0
+    cdef float x1,x2,y1,y2,tx1,tx2,ty1,ty2,ts,area,weight,ov
+
+    for i in range(N):
+        maxscore = boxes[i, 4]
+        maxpos = i
+
+        tx1 = boxes[i,0]
+        ty1 = boxes[i,1]
+        tx2 = boxes[i,2]
+        ty2 = boxes[i,3]
+        ts = boxes[i,4]
+
+        pos = i + 1
+        # get max box
+        while pos < N:
+            if maxscore < boxes[pos, 4]:
+                maxscore = boxes[pos, 4]
+                maxpos = pos
+            pos = pos + 1
+
+        # add max box as a detection 
+        boxes[i,0] = boxes[maxpos,0]
+        boxes[i,1] = boxes[maxpos,1]
+        boxes[i,2] = boxes[maxpos,2]
+        boxes[i,3] = boxes[maxpos,3]
+        boxes[i,4] = boxes[maxpos,4]
+
+        # swap ith box with position of max box
+        boxes[maxpos,0] = tx1
+        boxes[maxpos,1] = ty1
+        boxes[maxpos,2] = tx2
+        boxes[maxpos,3] = ty2
+        boxes[maxpos,4] = ts
+
+        tx1 = boxes[i,0]
+        ty1 = boxes[i,1]
+        tx2 = boxes[i,2]
+        ty2 = boxes[i,3]
+        ts = boxes[i,4]
+
+        pos = i + 1
+        # NMS iterations, note that N changes if detection boxes fall below threshold
+        while pos < N:
+            x1 = boxes[pos, 0]
+            y1 = boxes[pos, 1]
+            x2 = boxes[pos, 2]
+            y2 = boxes[pos, 3]
+            s = boxes[pos, 4]
+
+            area = (x2 - x1 + 1) * (y2 - y1 + 1)
+            iw = (min(tx2, x2) - max(tx1, x1) + 1)
+            if iw > 0:
+                ih = (min(ty2, y2) - max(ty1, y1) + 1)
+                if ih > 0:
+                    ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih)
+                    ov = iw * ih / ua #iou between max box and detection box
+
+                    if method == 1: # linear
+                        if ov > Nt: 
+                            weight = 1 - ov
+                        else:
+                            weight = 1
+                    elif method == 2: # gaussian
+                        weight = np.exp(-(ov * ov)/sigma)
+                    else: # original NMS
+                        if ov > Nt: 
+                            weight = 0
+                        else:
+                            weight = 1
+
+                    boxes[pos, 4] = weight*boxes[pos, 4]
+                                
+                    # if box score falls below threshold, discard the box by swapping with last box
+                    # update N
+                    if boxes[pos, 4] < threshold:
+                        boxes[pos,0] = boxes[N-1, 0]
+                        boxes[pos,1] = boxes[N-1, 1]
+                        boxes[pos,2] = boxes[N-1, 2]
+                        boxes[pos,3] = boxes[N-1, 3]
+                        boxes[pos,4] = boxes[N-1, 4]
+                        N = N - 1
+                        pos = pos - 1
+
+            pos = pos + 1
+
+    keep = [i for i in range(N)]
+    return keep
+
+def soft_nms_39(np.ndarray[float, ndim=2] boxes, float sigma=0.5, float Nt=0.3, float threshold=0.001, unsigned int method=0):
+    cdef unsigned int N = boxes.shape[0]
+    cdef float iw, ih, box_area
+    cdef float ua
+    cdef int pos = 0
+    cdef float maxscore = 0
+    cdef int maxpos = 0
+    cdef float x1,x2,y1,y2,tx1,tx2,ty1,ty2,ts,area,weight,ov
+    cdef float tmp
+
+    for i in range(N):
+        maxscore = boxes[i, 4]
+        maxpos = i
+
+        tx1 = boxes[i,0]
+        ty1 = boxes[i,1]
+        tx2 = boxes[i,2]
+        ty2 = boxes[i,3]
+        ts = boxes[i,4]
+
+        pos = i + 1
+        # get max box
+        while pos < N:
+            if maxscore < boxes[pos, 4]:
+                maxscore = boxes[pos, 4]
+                maxpos = pos
+            pos = pos + 1
+
+        # add max box as a detection 
+        boxes[i,0] = boxes[maxpos,0]
+        boxes[i,1] = boxes[maxpos,1]
+        boxes[i,2] = boxes[maxpos,2]
+        boxes[i,3] = boxes[maxpos,3]
+        boxes[i,4] = boxes[maxpos,4]
+
+        # swap ith box with position of max box
+        boxes[maxpos,0] = tx1
+        boxes[maxpos,1] = ty1
+        boxes[maxpos,2] = tx2
+        boxes[maxpos,3] = ty2
+        boxes[maxpos,4] = ts
+
+        for j in range(5, 39):
+            tmp = boxes[i, j]
+            boxes[i, j] = boxes[maxpos, j]
+            boxes[maxpos, j] = tmp
+
+        tx1 = boxes[i,0]
+        ty1 = boxes[i,1]
+        tx2 = boxes[i,2]
+        ty2 = boxes[i,3]
+        ts = boxes[i,4]
+
+        pos = i + 1
+        # NMS iterations, note that N changes if detection boxes fall below threshold
+        while pos < N:
+            x1 = boxes[pos, 0]
+            y1 = boxes[pos, 1]
+            x2 = boxes[pos, 2]
+            y2 = boxes[pos, 3]
+            s = boxes[pos, 4]
+
+            area = (x2 - x1 + 1) * (y2 - y1 + 1)
+            iw = (min(tx2, x2) - max(tx1, x1) + 1)
+            if iw > 0:
+                ih = (min(ty2, y2) - max(ty1, y1) + 1)
+                if ih > 0:
+                    ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih)
+                    ov = iw * ih / ua #iou between max box and detection box
+
+                    if method == 1: # linear
+                        if ov > Nt: 
+                            weight = 1 - ov
+                        else:
+                            weight = 1
+                    elif method == 2: # gaussian
+                        weight = np.exp(-(ov * ov)/sigma)
+                    else: # original NMS
+                        if ov > Nt: 
+                            weight = 0
+                        else:
+                            weight = 1
+
+                    boxes[pos, 4] = weight*boxes[pos, 4]
+                                
+                    # if box score falls below threshold, discard the box by swapping with last box
+                    # update N
+                    if boxes[pos, 4] < threshold:
+                        boxes[pos,0] = boxes[N-1, 0]
+                        boxes[pos,1] = boxes[N-1, 1]
+                        boxes[pos,2] = boxes[N-1, 2]
+                        boxes[pos,3] = boxes[N-1, 3]
+                        boxes[pos,4] = boxes[N-1, 4]
+                        for j in range(5, 39):
+                            tmp = boxes[pos, j]
+                            boxes[pos, j] = boxes[N - 1, j]
+                            boxes[N - 1, j] = tmp
+                        N = N - 1
+                        pos = pos - 1
+
+            pos = pos + 1
+
+    keep = [i for i in range(N)]
+    return keep
+
+def soft_nms_merge(np.ndarray[float, ndim=2] boxes, float sigma=0.5, float Nt=0.3, float threshold=0.001, unsigned int method=0, float weight_exp=6):
+    cdef unsigned int N = boxes.shape[0]
+    cdef float iw, ih, box_area
+    cdef float ua
+    cdef int pos = 0
+    cdef float maxscore = 0
+    cdef int maxpos = 0
+    cdef float x1,x2,y1,y2,tx1,tx2,ty1,ty2,ts,area,weight,ov
+    cdef float mx1,mx2,my1,my2,mts,mbs,mw
+
+    for i in range(N):
+        maxscore = boxes[i, 4]
+        maxpos = i
+
+        tx1 = boxes[i,0]
+        ty1 = boxes[i,1]
+        tx2 = boxes[i,2]
+        ty2 = boxes[i,3]
+        ts = boxes[i,4]
+
+        pos = i + 1
+        # get max box
+        while pos < N:
+            if maxscore < boxes[pos, 4]:
+                maxscore = boxes[pos, 4]
+                maxpos = pos
+            pos = pos + 1
+
+        # add max box as a detection 
+        boxes[i,0] = boxes[maxpos,0]
+        boxes[i,1] = boxes[maxpos,1]
+        boxes[i,2] = boxes[maxpos,2]
+        boxes[i,3] = boxes[maxpos,3]
+        boxes[i,4] = boxes[maxpos,4]
+
+        mx1 = boxes[i, 0] * boxes[i, 5]
+        my1 = boxes[i, 1] * boxes[i, 5]
+        mx2 = boxes[i, 2] * boxes[i, 6]
+        my2 = boxes[i, 3] * boxes[i, 6]
+        mts = boxes[i, 5]
+        mbs = boxes[i, 6]
+
+        # swap ith box with position of max box
+        boxes[maxpos,0] = tx1
+        boxes[maxpos,1] = ty1
+        boxes[maxpos,2] = tx2
+        boxes[maxpos,3] = ty2
+        boxes[maxpos,4] = ts
+
+        tx1 = boxes[i,0]
+        ty1 = boxes[i,1]
+        tx2 = boxes[i,2]
+        ty2 = boxes[i,3]
+        ts = boxes[i,4]
+
+        pos = i + 1
+        # NMS iterations, note that N changes if detection boxes fall below threshold
+        while pos < N:
+            x1 = boxes[pos, 0]
+            y1 = boxes[pos, 1]
+            x2 = boxes[pos, 2]
+            y2 = boxes[pos, 3]
+            s = boxes[pos, 4]
+
+            area = (x2 - x1 + 1) * (y2 - y1 + 1)
+            iw = (min(tx2, x2) - max(tx1, x1) + 1)
+            if iw > 0:
+                ih = (min(ty2, y2) - max(ty1, y1) + 1)
+                if ih > 0:
+                    ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih)
+                    ov = iw * ih / ua #iou between max box and detection box
+
+                    if method == 1: # linear
+                        if ov > Nt: 
+                            weight = 1 - ov
+                        else:
+                            weight = 1
+                    elif method == 2: # gaussian
+                        weight = np.exp(-(ov * ov)/sigma)
+                    else: # original NMS
+                        if ov > Nt: 
+                            weight = 0
+                        else:
+                            weight = 1
+
+                    mw  = (1 - weight) ** weight_exp
+                    mx1 = mx1 + boxes[pos, 0] * boxes[pos, 5] * mw
+                    my1 = my1 + boxes[pos, 1] * boxes[pos, 5] * mw
+                    mx2 = mx2 + boxes[pos, 2] * boxes[pos, 6] * mw
+                    my2 = my2 + boxes[pos, 3] * boxes[pos, 6] * mw
+                    mts = mts + boxes[pos, 5] * mw
+                    mbs = mbs + boxes[pos, 6] * mw
+
+                    boxes[pos, 4] = weight*boxes[pos, 4]
+                                
+                    # if box score falls below threshold, discard the box by swapping with last box
+                    # update N
+                    if boxes[pos, 4] < threshold:
+                        boxes[pos,0] = boxes[N-1, 0]
+                        boxes[pos,1] = boxes[N-1, 1]
+                        boxes[pos,2] = boxes[N-1, 2]
+                        boxes[pos,3] = boxes[N-1, 3]
+                        boxes[pos,4] = boxes[N-1, 4]
+                        N = N - 1
+                        pos = pos - 1
+
+            pos = pos + 1
+
+        boxes[i, 0] = mx1 / mts
+        boxes[i, 1] = my1 / mts
+        boxes[i, 2] = mx2 / mbs
+        boxes[i, 3] = my2 / mbs
+
+    keep = [i for i in range(N)]
+    return keep
--- a/src/lib/external/setup.py
+++ b/src/lib/external/setup.py
+import numpy
+from distutils.core import setup
+from distutils.extension import Extension
+from Cython.Build import cythonize
+
+extensions = [
+    Extension(
+        "nms", 
+        ["nms.pyx"],
+        extra_compile_args=["-Wno-cpp", "-Wno-unused-function"]
+    )
+]
+
+setup(
+    name="coco",
+    ext_modules=cythonize(extensions),
+    include_dirs=[numpy.get_include()]
+)
--- a/src/lib/logger.py
+++ b/src/lib/logger.py
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+# Code referenced from https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514
+import os
+import time
+import sys
+import torch
+USE_TENSORBOARD = True
+try:
+    import tensorboardX
+    print('Using tensorboardX')
+except:
+    USE_TENSORBOARD = False
+
+
+class Logger(object):
+    def __init__(self, opt):
+        """Create a summary writer logging to log_dir."""
+        if not os.path.exists(opt.save_dir):
+            os.makedirs(opt.save_dir)
+        if not os.path.exists(opt.debug_dir):
+            os.makedirs(opt.debug_dir)
+
+        time_str = time.strftime('%Y-%m-%d-%H-%M')
+
+        args = dict((name, getattr(opt, name)) for name in dir(opt)
+                    if not name.startswith('_'))
+        file_name = os.path.join(opt.save_dir, 'opt.txt')
+        with open(file_name, 'wt') as opt_file:
+            opt_file.write('==> torch version: {}\n'.format(torch.__version__))
+            opt_file.write('==> cudnn version: {}\n'.format(
+                torch.backends.cudnn.version()))
+            opt_file.write('==> Cmd:\n')
+            opt_file.write(str(sys.argv))
+            opt_file.write('\n==> Opt:\n')
+            for k, v in sorted(args.items()):
+                opt_file.write('  %s: %s\n' % (str(k), str(v)))
+
+        log_dir = opt.save_dir + '/logs_{}'.format(time_str)
+        if USE_TENSORBOARD:
+            self.writer = tensorboardX.SummaryWriter(logdir=log_dir)
+        else:
+            if not os.path.exists(os.path.dirname(log_dir)):
+                os.mkdir(os.path.dirname(log_dir))
+            if not os.path.exists(log_dir):
+                os.mkdir(log_dir)
+        self.log = open(log_dir + '/log.txt', 'w')
+        try:
+            os.system('cp {}/opt.txt {}/'.format(opt.save_dir, log_dir))
+        except:
+            pass
+        self.start_line = True
+
+    def write(self, txt):
+        if self.start_line:
+            time_str = time.strftime('%Y-%m-%d-%H-%M')
+            self.log.write('{}: {}'.format(time_str, txt))
+        else:
+            self.log.write(txt)
+        self.start_line = False
+        if '\n' in txt:
+            self.start_line = True
+            self.log.flush()
+
+    def close(self):
+        self.log.close()
+
+    def scalar_summary(self, tag, value, step):
+        """Log a scalar variable."""
+        if USE_TENSORBOARD:
+            self.writer.add_scalar(tag, value, step)
--- a/src/lib/models/Backbone/centerface_mobilenet_v2.py
+++ b/src/lib/models/Backbone/centerface_mobilenet_v2.py
+from torch import nn
+import torch.utils.model_zoo as model_zoo
+from collections import OrderedDict
+import math
+
+
+__all__ = ['MobileNetV2']
+
+
+model_urls = {
+    'mobilenet_v2': 'https://download.pytorch.org/models/mobilenet_v2-b0353104.pth',
+}
+
+
+def _make_divisible(v, divisor, min_value=None):
+    """
+    This function is taken from the original tf repo.
+    It ensures that all layers have a channel number that is divisible by 8
+    It can be seen here:
+    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
+    :param v:
+    :param divisor:
+    :param min_value:
+    :return:
+    """
+    if min_value is None:
+        min_value = divisor
+    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
+    # Make sure that round down does not go down by more than 10%.
+    if new_v < 0.9 * v:
+        new_v += divisor
+    return new_v
+
+class ConvBNReLU(nn.Sequential):
+    def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
+        padding = (kernel_size - 1) // 2
+        super(ConvBNReLU, self).__init__(
+            nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False),
+            nn.BatchNorm2d(out_planes),
+            nn.ReLU6(inplace=True)
+        )
+
+
+class InvertedResidual(nn.Module):
+    def __init__(self, inp, oup, stride, expand_ratio):
+        super(InvertedResidual, self).__init__()
+        self.stride = stride
+        assert stride in [1, 2]
+
+        hidden_dim = int(round(inp * expand_ratio))
+        self.use_res_connect = self.stride == 1 and inp == oup
+
+        layers = []
+        if expand_ratio != 1:
+            # pw
+            layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1))
+        layers.extend([
+            # dw
+            ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim),
+            # pw-linear
+            nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
+            nn.BatchNorm2d(oup),
+        ])
+        self.conv = nn.Sequential(*layers)
+
+    def forward(self, x):
+        if self.use_res_connect:
+            return x + self.conv(x)
+        else:
+            return self.conv(x)
+
+
+class MobileNetV2(nn.Module):
+    def __init__(self,width_mult=1.0,round_nearest=8,):
+        super(MobileNetV2, self).__init__()
+        block = InvertedResidual
+        input_channel = 32
+        inverted_residual_setting = [
+            # t, c, n, s
+            [1, 16, 1, 1], # 0
+            [6, 24, 2, 2], # 1
+            [6, 32, 3, 2], # 2
+            [6, 64, 4, 2], # 3
+            [6, 96, 3, 1], # 4
+            [6, 160, 3, 2],# 5
+            [6, 320, 1, 1],# 6
+        ]
+        self.feat_id = [1,2,4,6]
+        self.feat_channel = []
+
+        # only check the first element, assuming user knows t,c,n,s are required
+        if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4:
+            raise ValueError("inverted_residual_setting should be non-empty "
+                             "or a 4-element list, got {}".format(inverted_residual_setting))
+
+        # building first layer
+        input_channel = _make_divisible(input_channel * width_mult, round_nearest)
+        features = [ConvBNReLU(3, input_channel, stride=2)]
+
+        # building inverted residual blocks
+        for id,(t, c, n, s) in enumerate(inverted_residual_setting):
+            output_channel = _make_divisible(c * width_mult, round_nearest)
+            for i in range(n):
+                stride = s if i == 0 else 1
+                features.append(block(input_channel, output_channel, stride, expand_ratio=t))
+                input_channel = output_channel
+            if id in self.feat_id  :
+                self.__setattr__("feature_%d"%id,nn.Sequential(*features))
+                self.feat_channel.append(output_channel)
+                features = []
+
+        # weight initialization
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out')
+                if m.bias is not None:
+                    nn.init.zeros_(m.bias)
+            elif isinstance(m, nn.BatchNorm2d):
+                nn.init.ones_(m.weight)
+                nn.init.zeros_(m.bias)
+
+    def forward(self, x):
+        y = []
+        for id in self.feat_id:
+            x = self.__getattr__("feature_%d"%id)(x)
+            y.append(x)
+        return y
+
+def load_model(model,state_dict):
+    new_model=model.state_dict()
+    new_keys = list(new_model.keys())
+    old_keys = list(state_dict.keys())
+    restore_dict = OrderedDict()
+    for id in range(len(new_keys)):
+        restore_dict[new_keys[id]] = state_dict[old_keys[id]]
+    model.load_state_dict(restore_dict)
+
+def dict2list(func):
+    def wrap(*args, **kwargs):
+        self = args[0]
+        x = args[1]
+        ret_list = []
+        ret = func(self, x)
+        for k, v in ret[0].items():
+            ret_list.append(v)
+        return ret_list
+    return wrap
+
+def fill_up_weights(up):
+    w = up.weight.data
+    f = math.ceil(w.size(2) / 2)
+    c = (2 * f - 1 - f % 2) / (2. * f)
+    for i in range(w.size(2)):
+        for j in range(w.size(3)):
+            w[0, 0, i, j] = \
+                (1 - math.fabs(i / f - c)) * (1 - math.fabs(j / f - c))
+    for c in range(1, w.size(0)):
+        w[c, 0, :, :] = w[0, 0, :, :]
+
+def fill_fc_weights(layers):
+    for m in layers.modules():
+        if isinstance(m, nn.Conv2d):
+            nn.init.normal_(m.weight, std=0.001)
+            if m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+
+
+class IDAUp(nn.Module):
+    def __init__(self, out_dim, channel):
+        super(IDAUp, self).__init__()
+        self.out_dim = out_dim
+        self.up = nn.Sequential(
+                    nn.ConvTranspose2d(
+                        out_dim, out_dim, kernel_size=2, stride=2, padding=0,
+                        output_padding=0, groups=out_dim, bias=False),
+                    nn.BatchNorm2d(out_dim,eps=0.001,momentum=0.1),
+                    nn.ReLU())
+        self.conv =  nn.Sequential(
+                    nn.Conv2d(channel, out_dim,
+                              kernel_size=1, stride=1, bias=False),
+                    nn.BatchNorm2d(out_dim,eps=0.001,momentum=0.1),
+                    nn.ReLU(inplace=True))
+
+    def forward(self, layers):
+        layers = list(layers)
+        x = self.up(layers[0])
+        y = self.conv(layers[1])
+        out = x + y
+        return out
+
+class MobileNetUp(nn.Module):
+    def __init__(self, channels, out_dim = 24):
+        super(MobileNetUp, self).__init__()
+        channels =  channels[::-1]
+        self.conv =  nn.Sequential(
+                    nn.Conv2d(channels[0], out_dim,
+                              kernel_size=1, stride=1, bias=False),
+                    nn.BatchNorm2d(out_dim,eps=0.001,momentum=0.1),
+                    nn.ReLU(inplace=True))
+        self.conv_last =  nn.Sequential(
+                    nn.Conv2d(out_dim,out_dim,
+                              kernel_size=3, stride=1, padding=1 ,bias=False),
+                    nn.BatchNorm2d(out_dim,eps=1e-5,momentum=0.01),
+                    nn.ReLU(inplace=True))
+
+        for i,channel in enumerate(channels[1:]):
+            setattr(self,'up_%d'%(i),IDAUp(out_dim,channel))
+
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out')
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.BatchNorm2d):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+            elif isinstance(m,nn.ConvTranspose2d):
+                fill_up_weights(m)
+
+    def forward(self, layers):
+        layers = list(layers)
+        assert len(layers) > 1
+        x = self.conv(layers[-1])
+        for i in range(0,len(layers)-1):
+            up = getattr(self, 'up_{}'.format(i))
+            x = up([x,layers[len(layers)-2-i]])
+        x = self.conv_last(x)
+        return x
+
+class MobileNetSeg(nn.Module):
+    def __init__(self, base_name,heads,head_conv=24, pretrained = True):
+        super(MobileNetSeg, self).__init__()
+        self.heads = heads
+        self.base = globals()[base_name](
+            pretrained=pretrained)
+        channels = self.base.feat_channel
+        self.dla_up = MobileNetUp(channels, out_dim=head_conv)
+        for head in self.heads:
+            classes = self.heads[head]
+            if head == 'hm':
+                fc = nn.Sequential(
+                    nn.Conv2d(head_conv, classes,
+                              kernel_size=1, stride=1,
+                              padding=0, bias=True),
+                    nn.Sigmoid()
+                )
+            else:
+                fc = nn.Conv2d(head_conv, classes,
+                              kernel_size=1, stride=1,
+                              padding=0, bias=True)
+            # if 'hm' in head:
+            #     fc.bias.data.fill_(-2.19)
+            # else:
+            #     nn.init.normal_(fc.weight, std=0.001)
+            #     nn.init.constant_(fc.bias, 0)
+            self.__setattr__(head, fc)
+
+    # @dict2list         # 转onnx的时候需要将输出由dict转成list模式
+    def forward(self, x):
+        x = self.base(x)
+        x = self.dla_up(x)
+        ret = {}
+        for head in self.heads:
+            ret[head] = self.__getattr__(head)(x)
+        return [ret]
+
+
+def mobilenetv2_10(pretrained=True, **kwargs):
+    model = MobileNetV2(width_mult=1.0)
+    if pretrained:
+        state_dict = model_zoo.load_url(model_urls['mobilenet_v2'],
+                                              progress=True)
+        load_model(model,state_dict)
+    return model
+
+def mobilenetv2_5(pretrained=False, **kwargs):
+    model = MobileNetV2(width_mult=0.5)
+    if pretrained:
+        print('This version does not have pretrain weights.')
+    return model
+
+# num_layers  : [10 , 5]
+def get_mobile_net(num_layers, heads, head_conv=24):
+  model = MobileNetSeg('mobilenetv2_{}'.format(num_layers), heads,
+                 pretrained=True,
+                 head_conv=head_conv)
+  return model
+
+
+if __name__ == '__main__':
+    import torch
+    input = torch.zeros([1,3,416,416])
+    model = get_mobile_net(10,{'hm':1, 'hm_offset':2, 'wh':2, 'landmarks':10},head_conv=24)          # hm reference for the classes of objects//这个头文件只能做矩形框检测
+    res = model(input)
+    print(res.shape)
--- a/src/lib/models/Backbone/centerface_mobilenet_v2_fpn.py
+++ b/src/lib/models/Backbone/centerface_mobilenet_v2_fpn.py
--- a/src/lib/models/Backbone/darknet.py
+++ b/src/lib/models/Backbone/darknet.py
+import math
+from collections import OrderedDict
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+       
+class BasicBlock(nn.Module):
+    def __init__(self, inplanes, planes):
+        super(BasicBlock, self).__init__()
+        self.conv1 = nn.Conv2d(inplanes, planes[0], kernel_size=1,
+                               stride=1, padding=0, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes[0])
+        self.relu1 = nn.LeakyReLU(0.1)
+        self.conv2 = nn.Conv2d(planes[0], planes[1], kernel_size=3,
+                               stride=1, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes[1])
+        self.relu2 = nn.LeakyReLU(0.1)
+
+    def forward(self, x):
+        residual = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu1(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu2(out)
+
+        out += residual
+        return out
+
+
+class DarkNet(nn.Module):
+    def __init__(self, layers):
+        super(DarkNet, self).__init__()
+        self.inplanes = 32
+        self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=3, stride=1, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(self.inplanes)
+        self.relu1 = nn.LeakyReLU(0.1)
+
+        self.layer1 = self._make_layer([32, 64], layers[0])
+        self.layer2 = self._make_layer([64, 128], layers[1])
+        self.layer3 = self._make_layer([128, 256], layers[2])
+        #self.layer4 = self._make_layer([256, 512], layers[3])
+        #self.layer5 = self._make_layer([512, 1024], layers[4])
+
+        self.layers_out_filters = [64, 128, 256]
+
+        for m in self.modules():
+            if isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+        
+    def _make_layer(self, planes, blocks):
+        layers = []
+        #  downsample
+        layers.append(("ds_conv", nn.Conv2d(self.inplanes, planes[1], kernel_size=3,
+                                stride=2, padding=1, bias=False)))
+        layers.append(("ds_bn", nn.BatchNorm2d(planes[1])))
+        layers.append(("ds_relu", nn.LeakyReLU(0.1)))
+        #  blocks
+        self.inplanes = planes[1]
+        for i in range(0, blocks):
+            layers.append(("residual_{}".format(i), BasicBlock(self.inplanes, planes)))
+        return nn.Sequential(OrderedDict(layers))
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu1(x)
+
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = F.interpolate(x, size=(128, 128), 
+            mode="bilinear", align_corners=True)
+
+        return x
+
+
+def darknet21(cfg,is_train=True, **kwargs):
+    model = DarkNet([1, 1, 2, 2, 1])
+    if is_train and cfg.BACKBONE.INIT_WEIGHTS:
+        if isinstance(cfg.BACKBONE.PRETRAINED, str):
+            model.load_state_dict(torch.load(cfg.BACKBONE.PRETRAINED))
+        else:
+            raise Exception("darknet request a pretrained path. got [{}]".format(cfg.BACKBONE.PRETRAINED))
+    return model
+
+def darknet53(num_layers, cfg):
+    model = DarkNet([1, 2, 8])
+    #if is_train and cfg.BACKBONE.INIT_WEIGHTS:
+    #    if isinstance(cfg.BACKBONE.PRETRAINED, str):
+    #        model.load_state_dict(torch.load(cfg.BACKBONE.PRETRAINED))
+    #    else:
+    #        raise Exception("darknet request a pretrained path. got [{}]".format(cfg.BACKBONE.PRETRAINED))
+    return model
--- a/src/lib/models/Backbone/dlav0.py
+++ b/src/lib/models/Backbone/dlav0.py
--- a/src/lib/models/Backbone/efficientdet/__init__.py
+++ b/src/lib/models/Backbone/efficientdet/__init__.py
+from .efficientdet import EfficientDet
+
+
+def get_efficientdet(num_layers, cfg):
+    model = EfficientDet(intermediate_channels=cfg.MODEL.INTERMEDIATE_CHANNEL)
+    return model
--- a/src/lib/models/Backbone/efficientdet/bifpn.py
+++ b/src/lib/models/Backbone/efficientdet/bifpn.py
--- a/src/lib/models/Backbone/efficientdet/conv_module.py
+++ b/src/lib/models/Backbone/efficientdet/conv_module.py
--- a/src/lib/models/Backbone/efficientdet/efficientdet.py
+++ b/src/lib/models/Backbone/efficientdet/efficientdet.py
+import torch
+import torch.nn as nn
+import math
+from .efficientnet import EfficientNet
+from .bifpn import BIFPN
+from .retinahead import RetinaHead
+from torchvision.ops import nms 
+import torch.nn.functional as F
+
+MODEL_MAP = {
+    'efficientdet-d0': 'efficientnet-b0',
+    'efficientdet-d1': 'efficientnet-b1',
+    'efficientdet-d2': 'efficientnet-b2',
+    'efficientdet-d3': 'efficientnet-b3',
+    'efficientdet-d4': 'efficientnet-b4',
+    'efficientdet-d5': 'efficientnet-b5',
+    'efficientdet-d6': 'efficientnet-b6',
+    'efficientdet-d7': 'efficientnet-b6',
+}
+class EfficientDet(nn.Module):
+    def __init__(self,
+                 intermediate_channels,
+                 network = 'efficientdet-d1',
+                 D_bifpn=3,
+                 W_bifpn=32,
+                 D_class=3,
+                 scale_ratios = [0.5, 1, 2, 4, 8,16,32],
+                 ):
+        super(EfficientDet, self).__init__()
+        self.backbone = EfficientNet.from_pretrained(MODEL_MAP[network])
+        self.neck = BIFPN(in_channels=self.backbone.get_list_features(),
+                                out_channels=W_bifpn,
+                                stack=D_bifpn,
+                                num_outs=7)
+        self.bbox_head = RetinaHead(num_classes = intermediate_channels,
+                                    in_channels = W_bifpn)
+
+        self.scale_ratios = scale_ratios
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                m.weight.data.normal_(0, math.sqrt(2. / n))
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+        self.freeze_bn()
+
+    def forward(self, inputs):
+        x = self.extract_feat(inputs)
+        outs = self.bbox_head(x)
+
+        return outs[0][1]
+        
+    def freeze_bn(self):
+        '''Freeze BatchNorm layers.'''
+        for layer in self.modules():
+            if isinstance(layer, nn.BatchNorm2d):
+                layer.eval()
+    def extract_feat(self, img):
+        """
+            Directly extract features from the backbone+neck
+        """
+        x = self.backbone(img)
+        x = self.neck(x)
+        return x
+    
--- a/src/lib/models/Backbone/efficientdet/efficientnet.py
+++ b/src/lib/models/Backbone/efficientdet/efficientnet.py
--- a/src/lib/models/Backbone/efficientdet/module.py
+++ b/src/lib/models/Backbone/efficientdet/module.py