First Commit.

b952e97b · chenych · b952e97b · b952e97b · b952e97b · b952e97b
Commit b952e97b authored Nov 03, 2023 by chenych
20 changed files
--- a/image.png
+++ b/image.png
--- a/src/_init_paths.py
+++ b/src/_init_paths.py
+import os.path as osp
+import sys
+
+def add_path(path):
+    if path not in sys.path:
+        sys.path.insert(0, path)
+
+this_dir = osp.dirname(__file__)
+
+# Add lib to PYTHONPATH
+lib_path = osp.join(this_dir, 'lib')
+add_path(lib_path)
--- a/src/data_process/pig_coco.py
+++ b/src/data_process/pig_coco.py
+import xml.etree.ElementTree as ET
+import os
+import json
+
+coco = dict()
+coco['images'] = []
+coco['type'] = 'instances'
+coco['annotations'] = []
+coco['categories'] = []
+
+category_set = dict()
+image_set = set()
+
+category_item_id = 0
+image_id = 20180000000
+annotation_id = 0
+
+def addCatItem(name):
+    global category_item_id
+    category_item = dict()
+    category_item['supercategory'] = 'none'
+    category_item_id += 1
+    category_item['id'] = category_item_id
+    category_item['name'] = name
+    coco['categories'].append(category_item)
+    category_set[name] = category_item_id
+    return category_item_id
+
+def addImgItem(file_name, size):
+    global image_id
+    if file_name is None:
+        raise Exception('Could not find filename tag in xml file.')
+    if size['width'] is None:
+        raise Exception('Could not find width tag in xml file.')
+    if size['height'] is None:
+        raise Exception('Could not find height tag in xml file.')
+    image_id += 1
+    image_item = dict()
+    image_item['id'] = image_id
+    image_item['file_name'] = file_name
+    image_item['width'] = size['width']
+    image_item['height'] = size['height']
+    coco['images'].append(image_item)
+    image_set.add(file_name)
+    return image_id
+
+def addAnnoItem(object_name, image_id, category_id, bbox):
+    global annotation_id
+    annotation_item = dict()
+    annotation_item['segmentation'] = []
+    seg = []
+    #bbox[] is x,y,w,h
+    #left_top
+    seg.append(bbox[0])
+    seg.append(bbox[1])
+    #left_bottom
+    seg.append(bbox[0])
+    seg.append(bbox[1] + bbox[3])
+    #right_bottom
+    seg.append(bbox[0] + bbox[2])
+    seg.append(bbox[1] + bbox[3])
+    #right_top
+    seg.append(bbox[0] + bbox[2])
+    seg.append(bbox[1])
+
+    annotation_item['segmentation'].append(seg)
+
+    annotation_item['area'] = bbox[2] * bbox[3]
+    annotation_item['iscrowd'] = 0
+    annotation_item['ignore'] = 0
+    annotation_item['image_id'] = image_id
+    annotation_item['bbox'] = bbox
+    annotation_item['category_id'] = category_id
+    annotation_id += 1
+    annotation_item['id'] = annotation_id
+    coco['annotations'].append(annotation_item)
+
+def parseXmlFiles(xml_path):
+    nums = int(0.8 * len(os.listdir(xml_path)))
+    fs = os.listdir(xml_path)
+    for f in fs[nums:]:
+        if not f.endswith('.xml'):
+            continue
+        
+        bndbox = dict()
+        size = dict()
+        current_image_id = None
+        current_category_id = None
+        file_name = None
+        size['width'] = None
+        size['height'] = None
+        size['depth'] = None
+
+        xml_file = os.path.join(xml_path, f)
+        print(xml_file)
+
+        tree = ET.parse(xml_file)
+        root = tree.getroot()
+        if root.tag != 'annotation':
+            raise Exception('pascal voc xml root element should be annotation, rather than {}'.format(root.tag))
+
+        #elem is <folder>, <filename>, <size>, <object>
+        for elem in root:
+            current_parent = elem.tag
+            current_sub = None
+            object_name = None
+            
+            if elem.tag == 'folder':
+                continue
+            
+            if elem.tag == 'filename':
+                file_name = elem.text
+                if file_name in category_set:
+                    raise Exception('file_name duplicated')
+                
+            #add img item only after parse <size> tag
+            elif current_image_id is None and file_name is not None and size['width'] is not None:
+                if file_name not in image_set:
+                    current_image_id = addImgItem(file_name, size)
+                    print('add image with {} and {}'.format(file_name, size))
+                else:
+                    raise Exception('duplicated image: {}'.format(file_name)) 
+            #subelem is <width>, <height>, <depth>, <name>, <bndbox>
+            for subelem in elem:
+                bndbox ['xmin'] = None
+                bndbox ['xmax'] = None
+                bndbox ['ymin'] = None
+                bndbox ['ymax'] = None
+                
+                current_sub = subelem.tag
+                if current_parent == 'object' and subelem.tag == 'name':
+                    object_name = subelem.text
+                    if object_name not in category_set:
+                        current_category_id = addCatItem(object_name)
+                    else:
+                        current_category_id = category_set[object_name]
+
+                elif current_parent == 'size':
+                    if size[subelem.tag] is not None:
+                        raise Exception('xml structure broken at size tag.')
+                    size[subelem.tag] = int(subelem.text)
+
+                #option is <xmin>, <ymin>, <xmax>, <ymax>, when subelem is <bndbox>
+                for option in subelem:
+                    if current_sub == 'bndbox':
+                        if bndbox[option.tag] is not None:
+                            raise Exception('xml structure corrupted at bndbox tag.')
+                        bndbox[option.tag] = int(option.text)
+
+                #only after parse the <object> tag
+                if bndbox['xmin'] is not None:
+                    if object_name is None:
+                        raise Exception('xml structure broken at bndbox tag')
+                    if current_image_id is None:
+                        raise Exception('xml structure broken at bndbox tag')
+                    if current_category_id is None:
+                        raise Exception('xml structure broken at bndbox tag')
+                    bbox = []
+                    #x
+                    bbox.append(bndbox['xmin'])
+                    #y
+                    bbox.append(bndbox['ymin'])
+                    #w
+                    bbox.append(bndbox['xmax'] - bndbox['xmin'])
+                    #h
+                    bbox.append(bndbox['ymax'] - bndbox['ymin'])
+                    print('add annotation with {},{},{},{}'.format(object_name, current_image_id, current_category_id, bbox))
+                    addAnnoItem(object_name, current_image_id, current_category_id, bbox )
+
+if __name__ == '__main__':
+    xml_path = '/home/yangna/deepblue/32_face_detect/centerface/data/wider_face/voc_xml'
+    json_file = './data/wider_face/annotations/val_wider_face.json'
+    parseXmlFiles(xml_path)
+    json.dump(coco, open(json_file, 'w'))
+
+    # add annotation with face,20180012875,1,[848, 445, 20, 29]
+    # /home/yangna/deepblue/32_face_detect/centerface/data/wider_face/voc/35_Basketball_basketballgame_ball_35_79.xml
--- a/src/data_process/train_pig.json
+++ b/src/data_process/train_pig.json
--- a/src/demo.py
+++ b/src/demo.py
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import _init_paths
+
+import os
+import cv2
+import time
+
+from opts_pose import opts
+from detectors.detector_factory import detector_factory
+
+image_ext = ['jpg', 'jpeg', 'png', 'webp']
+video_ext = ['mp4', 'mov', 'avi', 'mkv']
+time_stats = ['tot', 'load', 'pre', 'net', 'dec', 'post', 'merge']
+
+
+def demo(opt):
+    os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str
+    Detector = detector_factory[opt.task]
+    detector = Detector(opt)
+
+    if opt.demo == 'webcam' or \
+            opt.demo[opt.demo.rfind('.') + 1:].lower() in video_ext:
+        cam = cv2.VideoCapture(0 if opt.demo == 'webcam' else opt.demo)
+        detector.pause = False
+        i = 0
+        start_time = time.time()
+        if opt.output_video:
+            fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # 如果是mp4视频，编码需要为mp4v
+            im_width = int(cam.get(cv2.CAP_PROP_FRAME_WIDTH))
+            im_height = int(cam.get(cv2.CAP_PROP_FRAME_HEIGHT))
+            write_cap = cv2.VideoWriter(
+                opt.output_video, fourcc, 25, (im_width, im_height))
+
+        while cam.grab():
+            i += 1
+            _, img = cam.retrieve()
+            cv2.imshow('input', img)
+            ret = detector.run(img)
+            time_str = ''
+            for stat in time_stats:
+                time_str = time_str + '{} {:.3f}s |'.format(stat, ret[stat])
+            if opt.output_video:
+                write_cap.write(ret['plot_img'])
+            print('fps:{:.3f}'.format(i/(time.time()-start_time)), time_str)
+            if cv2.waitKey(1) == 27:
+                return  # esc to quit
+    else:
+        if os.path.isdir(opt.demo):
+            image_names = []
+            ls = os.listdir(opt.demo)
+            for file_name in sorted(ls):
+                ext = file_name[file_name.rfind('.') + 1:].lower()
+                if ext in image_ext:
+                    image_names.append(os.path.join(opt.demo, file_name))
+        else:
+            image_names = [opt.demo]
+
+        for (image_name) in image_names:
+            ret = detector.run(image_name)
+            time_str = ''
+            for stat in time_stats:
+                time_str = time_str + '{} {:.3f}s |'.format(stat, ret[stat])
+            print(time_str)
+
+
+if __name__ == '__main__':
+    opt = opts().init()
+    demo(opt)
--- a/src/lib/datasets/dataset/centerface.py
+++ b/src/lib/datasets/dataset/centerface.py
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import pycocotools.coco as coco
+import numpy as np
+import torch
+import json
+import os
+
+import torch.utils.data as data
+
+
+class FACE(data.Dataset):
+    num_classes = 1
+    default_resolution = [800, 800]
+    mean = np.array([0.485, 0.456, 0.406],
+                    dtype=np.float32).reshape(1, 1, 3)
+    std = np.array([0.229, 0.224, 0.225],
+                   dtype=np.float32).reshape(1, 1, 3)
+
+    def __init__(self, opt, split):
+        super(FACE, self).__init__()
+        self.data_dir = os.path.join(opt.data_dir, 'wider_face')
+        self.img_dir = os.path.join(
+            self.data_dir, 'image')                 # 这个在载入图片的时候有用
+        _ann_name = {'train': 'train', 'val': 'val'}
+        self.annot_path = os.path.join(
+            self.data_dir, 'annotations',
+            '{}_wider_face.json').format(_ann_name[split])
+        self.max_objs = 50
+        self.class_name = ['__background__', "face"]
+        self._valid_ids = np.arange(1, 21, dtype=np.int32)
+        self.cat_ids = {v: i for i, v in enumerate(self._valid_ids)}
+        self._data_rng = np.random.RandomState(123)
+        self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571],
+                                 dtype=np.float32)
+        self._eig_vec = np.array([
+            [-0.58752847, -0.69563484, 0.41340352],
+            [-0.5832747, 0.00994535, -0.81221408],
+            [-0.56089297, 0.71832671, 0.41158938]
+        ], dtype=np.float32)
+        self.split = split
+        self.opt = opt
+
+        print('==> initializing pascal {} data.'.format(_ann_name[split]))
+        self.coco = coco.COCO(self.annot_path)
+        self.images = sorted(self.coco.getImgIds())
+        self.num_samples = len(self.images)
+
+        print('Loaded {} {} samples'.format(split, self.num_samples))
+
+    def _to_float(self, x):
+        return float("{:.2f}".format(x))
+
+    def convert_eval_format(self, all_bboxes):
+        detections = [[[] for __ in range(self.num_samples)]
+                      for _ in range(self.num_classes + 1)]
+        for i in range(self.num_samples):
+            img_id = self.images[i]
+            for j in range(1, self.num_classes + 1):
+                if isinstance(all_bboxes[img_id][j], np.ndarray):
+                    detections[j][i] = all_bboxes[img_id][j].tolist()
+                else:
+                    detections[j][i] = all_bboxes[img_id][j]
+        return detections
+
+    def __len__(self):
+        return self.num_samples
+
+    def save_results(self, results, save_dir):
+        json.dump(self.convert_eval_format(results),
+                  open('{}/results.json'.format(save_dir), 'w'))
+
+    def run_eval(self, results, save_dir):
+        # result_json = os.path.join(save_dir, "results.json")
+        # detections  = self.convert_eval_format(results)
+        # json.dump(detections, open(result_json, "w"))
+        self.save_results(results, save_dir)
+        os.system('python tools/reval.py ' +
+                  '{}/results.json'.format(save_dir))
--- a/src/lib/datasets/dataset/centerface_hp.py
+++ b/src/lib/datasets/dataset/centerface_hp.py
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import pycocotools.coco as coco
+from pycocotools.cocoeval import COCOeval
+import numpy as np
+import json
+import os
+
+import torch.utils.data as data
+
+
+class FACEHP(data.Dataset):
+    num_classes = 1
+    num_joints = 5
+    default_resolution = [800, 800]
+    mean = np.array([0.40789654, 0.44719302, 0.47026115],
+                    dtype=np.float32).reshape(1, 1, 3)
+    std = np.array([0.28863828, 0.27408164, 0.27809835],
+                   dtype=np.float32).reshape(1, 1, 3)
+    flip_idx = [[0, 1], [3, 4]]             # 翻转的关键点在关键点矩阵中的索引
+
+    def __init__(self, opt, split):
+        super(FACEHP, self).__init__()
+        self.edges = [[0, 1], [0, 2], [1, 3], [2, 4],
+                      [4, 6], [3, 5], [5, 6],
+                      [5, 7], [7, 9], [6, 8], [8, 10],
+                      [6, 12], [5, 11], [11, 12],
+                      [12, 14], [14, 16], [11, 13], [13, 15]]
+
+        self.acc_idxs = [1, 2, 3, 4]
+        self.data_dir = opt.data_dir
+        self.img_dir = os.path.join(
+            self.data_dir, 'WIDER_train/images')  # 训练图片所在地址
+        _ann_name = {'train': 'train', 'val': 'val'}
+        if split == 'val':
+            self.annot_path = os.path.join(
+                self.data_dir, 'annotations',
+                '{}_wider_face.json').format(_ann_name[split])
+        else:
+            self.annot_path = os.path.join(
+                self.data_dir, 'annotations',
+                '{}_wider_face.json').format(_ann_name[split])
+        self.max_objs = 32
+        self._data_rng = np.random.RandomState(123)
+        self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571],
+                                 dtype=np.float32)
+        self._eig_vec = np.array([
+            [-0.58752847, -0.69563484, 0.41340352],
+            [-0.5832747, 0.00994535, -0.81221408],
+            [-0.56089297, 0.71832671, 0.41158938]
+        ], dtype=np.float32)
+        self.split = split
+        self.opt = opt
+
+        print('==> initializing centerface key point {} data.'.format(split))
+        self.coco = coco.COCO(self.annot_path)
+        image_ids = self.coco.getImgIds()
+
+        if split == 'train':
+            self.images = []
+            for img_id in image_ids:
+                idxs = self.coco.getAnnIds(imgIds=[img_id])
+                if len(idxs) > 0:
+                    self.images.append(img_id)
+        else:
+            self.images = image_ids
+        self.num_samples = len(self.images)
+        print('Loaded {} {} samples'.format(split, self.num_samples))
+
+    def _to_float(self, x):
+        return float("{:.2f}".format(x))
+
+    def convert_eval_format(self, all_bboxes):
+        # import pdb; pdb.set_trace()
+        detections = []
+        for image_id in all_bboxes:
+            for cls_ind in all_bboxes[image_id]:
+                category_id = 1
+                for dets in all_bboxes[image_id][cls_ind]:
+                    bbox = dets[:4]
+                    bbox[2] -= bbox[0]
+                    bbox[3] -= bbox[1]
+                    score = dets[4]
+                    bbox_out = list(map(self._to_float, bbox))
+                    keypoints = np.concatenate([
+                        np.array(dets[5:39], dtype=np.float32).reshape(-1, 2),
+                        np.ones((17, 1), dtype=np.float32)], axis=1).reshape(51).tolist()
+                    keypoints = list(map(self._to_float, keypoints))
+
+                    detection = {
+                        "image_id": int(image_id),
+                        "category_id": int(category_id),
+                        "bbox": bbox_out,
+                        "score": float("{:.2f}".format(score)),
+                        "keypoints": keypoints
+                    }
+                    detections.append(detection)
+        return detections
+
+    def __len__(self):
+        return self.num_samples
+
+    def save_results(self, results, save_dir):
+        json.dump(self.convert_eval_format(results),
+                  open('{}/results.json'.format(save_dir), 'w'))
+
+    def run_eval(self, results, save_dir):
+        # result_json = os.path.join(opt.save_dir, "results.json")
+        # detections  = convert_eval_format(all_boxes)
+        # json.dump(detections, open(result_json, "w"))
+        self.save_results(results, save_dir)
+        coco_dets = self.coco.loadRes('{}/results.json'.format(save_dir))
+        coco_eval = COCOeval(self.coco, coco_dets, "keypoints")
+        coco_eval.evaluate()
+        coco_eval.accumulate()
+        coco_eval.summarize()
+        coco_eval = COCOeval(self.coco, coco_dets, "bbox")
+        coco_eval.evaluate()
+        coco_eval.accumulate()
+        coco_eval.summarize()
--- a/src/lib/datasets/dataset/coco.py
+++ b/src/lib/datasets/dataset/coco.py
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import pycocotools.coco as coco
+from pycocotools.cocoeval import COCOeval
+import numpy as np
+import json
+import os
+
+import torch.utils.data as data
+
+
+class COCO(data.Dataset):
+    num_classes = 80
+    default_resolution = [512, 512]
+    mean = np.array([0.40789654, 0.44719302, 0.47026115],
+                    dtype=np.float32).reshape(1, 1, 3)
+    std = np.array([0.28863828, 0.27408164, 0.27809835],
+                   dtype=np.float32).reshape(1, 1, 3)
+
+    def __init__(self, opt, split):
+        super(COCO, self).__init__()
+        self.data_dir = os.path.join(opt.data_dir, 'coco')
+        self.img_dir = os.path.join(self.data_dir, '{}2017'.format(split))
+        if split == 'test':
+            self.annot_path = os.path.join(
+                self.data_dir, 'annotations',
+                'image_info_test-dev2017.json').format(split)
+        else:
+            if opt.task == 'exdet':
+                self.annot_path = os.path.join(
+                    self.data_dir, 'annotations',
+                    'instances_extreme_{}2017.json').format(split)
+            else:
+                self.annot_path = os.path.join(
+                    self.data_dir, 'annotations',
+                    'instances_{}2017.json').format(split)
+        self.max_objs = 128
+        self.class_name = [
+            '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
+            'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
+            'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse',
+            'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack',
+            'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis',
+            'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
+            'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass',
+            'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
+            'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake',
+            'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv',
+            'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
+            'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
+            'scissors', 'teddy bear', 'hair drier', 'toothbrush']
+        self._valid_ids = [
+            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13,
+            14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
+            24, 25, 27, 28, 31, 32, 33, 34, 35, 36,
+            37, 38, 39, 40, 41, 42, 43, 44, 46, 47,
+            48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
+            58, 59, 60, 61, 62, 63, 64, 65, 67, 70,
+            72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
+            82, 84, 85, 86, 87, 88, 89, 90]
+        self.cat_ids = {v: i for i, v in enumerate(self._valid_ids)}
+        self.voc_color = [(v // 32 * 64 + 64, (v // 8) % 4 * 64, v % 8 * 32)
+                          for v in range(1, self.num_classes + 1)]
+        self._data_rng = np.random.RandomState(123)
+        self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571],
+                                 dtype=np.float32)
+        self._eig_vec = np.array([
+            [-0.58752847, -0.69563484, 0.41340352],
+            [-0.5832747, 0.00994535, -0.81221408],
+            [-0.56089297, 0.71832671, 0.41158938]
+        ], dtype=np.float32)
+        # self.mean = np.array([0.485, 0.456, 0.406], np.float32).reshape(1, 1, 3)
+        # self.std = np.array([0.229, 0.224, 0.225], np.float32).reshape(1, 1, 3)
+
+        self.split = split
+        self.opt = opt
+
+        print('==> initializing coco 2017 {} data.'.format(split))
+        self.coco = coco.COCO(self.annot_path)
+        self.images = self.coco.getImgIds()
+        self.num_samples = len(self.images)
+
+        print('Loaded {} {} samples'.format(split, self.num_samples))
+
+    def _to_float(self, x):
+        return float("{:.2f}".format(x))
+
+    def convert_eval_format(self, all_bboxes):
+        # import pdb; pdb.set_trace()
+        detections = []
+        for image_id in all_bboxes:
+            for cls_ind in all_bboxes[image_id]:
+                category_id = self._valid_ids[cls_ind - 1]
+                for bbox in all_bboxes[image_id][cls_ind]:
+                    bbox[2] -= bbox[0]
+                    bbox[3] -= bbox[1]
+                    score = bbox[4]
+                    bbox_out = list(map(self._to_float, bbox[0:4]))
+
+                    detection = {
+                        "image_id": int(image_id),
+                        "category_id": int(category_id),
+                        "bbox": bbox_out,
+                        "score": float("{:.2f}".format(score))
+                    }
+                    if len(bbox) > 5:
+                        extreme_points = list(map(self._to_float, bbox[5:13]))
+                        detection["extreme_points"] = extreme_points
+                    detections.append(detection)
+        return detections
+
+    def __len__(self):
+        return self.num_samples
+
+    def save_results(self, results, save_dir):
+        json.dump(self.convert_eval_format(results),
+                  open('{}/results.json'.format(save_dir), 'w'))
+
+    def run_eval(self, results, save_dir):
+        # result_json = os.path.join(save_dir, "results.json")
+        # detections  = self.convert_eval_format(results)
+        # json.dump(detections, open(result_json, "w"))
+        self.save_results(results, save_dir)
+        coco_dets = self.coco.loadRes('{}/results.json'.format(save_dir))
+        coco_eval = COCOeval(self.coco, coco_dets, "bbox")
+        coco_eval.evaluate()
+        coco_eval.accumulate()
+        coco_eval.summarize()
--- a/src/lib/datasets/dataset/coco_hp.py
+++ b/src/lib/datasets/dataset/coco_hp.py
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import pycocotools.coco as coco
+from pycocotools.cocoeval import COCOeval
+import numpy as np
+import json
+import os
+
+import torch.utils.data as data
+
+
+class COCOHP(data.Dataset):
+    num_classes = 1
+    num_joints = 17
+    default_resolution = [512, 512]
+    mean = np.array([0.40789654, 0.44719302, 0.47026115],
+                    dtype=np.float32).reshape(1, 1, 3)
+    std = np.array([0.28863828, 0.27408164, 0.27809835],
+                   dtype=np.float32).reshape(1, 1, 3)
+    flip_idx = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10],
+                [11, 12], [13, 14], [15, 16]]
+
+    def __init__(self, opt, split):
+        super(COCOHP, self).__init__()
+        self.edges = [[0, 1], [0, 2], [1, 3], [2, 4],
+                      [4, 6], [3, 5], [5, 6],
+                      [5, 7], [7, 9], [6, 8], [8, 10],
+                      [6, 12], [5, 11], [11, 12],
+                      [12, 14], [14, 16], [11, 13], [13, 15]]
+
+        self.acc_idxs = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
+        self.data_dir = os.path.join(opt.data_dir, 'coco')
+        self.img_dir = os.path.join(self.data_dir, '{}2017'.format(split))
+        if split == 'test':
+            self.annot_path = os.path.join(
+                self.data_dir, 'annotations',
+                'image_info_test-dev2017.json').format(split)
+        else:
+            self.annot_path = os.path.join(
+                self.data_dir, 'annotations',
+                'person_keypoints_{}2017.json').format(split)
+        self.max_objs = 32
+        self._data_rng = np.random.RandomState(123)
+        self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571],
+                                 dtype=np.float32)
+        self._eig_vec = np.array([
+            [-0.58752847, -0.69563484, 0.41340352],
+            [-0.5832747, 0.00994535, -0.81221408],
+            [-0.56089297, 0.71832671, 0.41158938]
+        ], dtype=np.float32)
+        self.split = split
+        self.opt = opt
+
+        print('==> initializing coco 2017 {} data.'.format(split))
+        self.coco = coco.COCO(self.annot_path)
+        image_ids = self.coco.getImgIds()
+
+        if split == 'train':
+            self.images = []
+            for img_id in image_ids:
+                idxs = self.coco.getAnnIds(imgIds=[img_id])
+                if len(idxs) > 0:
+                    self.images.append(img_id)
+        else:
+            self.images = image_ids
+        self.num_samples = len(self.images)
+        print('Loaded {} {} samples'.format(split, self.num_samples))
+
+    def _to_float(self, x):
+        return float("{:.2f}".format(x))
+
+    def convert_eval_format(self, all_bboxes):
+        # import pdb; pdb.set_trace()
+        detections = []
+        for image_id in all_bboxes:
+            for cls_ind in all_bboxes[image_id]:
+                category_id = 1
+                for dets in all_bboxes[image_id][cls_ind]:
+                    bbox = dets[:4]
+                    bbox[2] -= bbox[0]
+                    bbox[3] -= bbox[1]
+                    score = dets[4]
+                    bbox_out = list(map(self._to_float, bbox))
+                    keypoints = np.concatenate([
+                        np.array(dets[5:39], dtype=np.float32).reshape(-1, 2),
+                        np.ones((17, 1), dtype=np.float32)], axis=1).reshape(51).tolist()
+                    keypoints = list(map(self._to_float, keypoints))
+
+                    detection = {
+                        "image_id": int(image_id),
+                        "category_id": int(category_id),
+                        "bbox": bbox_out,
+                        "score": float("{:.2f}".format(score)),
+                        "keypoints": keypoints
+                    }
+                    detections.append(detection)
+        return detections
+
+    def __len__(self):
+        return self.num_samples
+
+    def save_results(self, results, save_dir):
+        json.dump(self.convert_eval_format(results),
+                  open('{}/results.json'.format(save_dir), 'w'))
+
+    def run_eval(self, results, save_dir):
+        # result_json = os.path.join(opt.save_dir, "results.json")
+        # detections  = convert_eval_format(all_boxes)
+        # json.dump(detections, open(result_json, "w"))
+        self.save_results(results, save_dir)
+        coco_dets = self.coco.loadRes('{}/results.json'.format(save_dir))
+        coco_eval = COCOeval(self.coco, coco_dets, "keypoints")
+        coco_eval.evaluate()
+        coco_eval.accumulate()
+        coco_eval.summarize()
+        coco_eval = COCOeval(self.coco, coco_dets, "bbox")
+        coco_eval.evaluate()
+        coco_eval.accumulate()
+        coco_eval.summarize()
--- a/src/lib/datasets/dataset/kitti.py
+++ b/src/lib/datasets/dataset/kitti.py
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+
+import os
+import math
+import torch
+import json
+import cv2
+
+import numpy as np
+import torch.utils.data as data
+import torch.utils.data as data
+import pycocotools.coco as coco
+
+
+class KITTI(data.Dataset):
+    num_classes = 3
+    default_resolution = [384, 1280]
+    mean = np.array([0.485, 0.456, 0.406], np.float32).reshape(1, 1, 3)
+    std = np.array([0.229, 0.224, 0.225], np.float32).reshape(1, 1, 3)
+
+    def __init__(self, opt, split):
+        super(KITTI, self).__init__()
+        self.data_dir = os.path.join(opt.data_dir, 'kitti')
+        self.img_dir = os.path.join(self.data_dir, 'images', 'trainval')
+        if opt.trainval:
+            split = 'trainval' if split == 'train' else 'test'
+            self.img_dir = os.path.join(self.data_dir, 'images', split)
+            self.annot_path = os.path.join(
+                self.data_dir, 'annotations', 'kitti_{}.json').format(split)
+        else:
+            self.annot_path = os.path.join(self.data_dir,
+                                           'annotations', 'kitti_{}_{}.json').format(opt.kitti_split, split)
+        self.max_objs = 50
+        self.class_name = [
+            '__background__', 'Pedestrian', 'Car', 'Cyclist']
+        self.cat_ids = {1: 0, 2: 1, 3: 2, 4: -3,
+                        5: -3, 6: -2, 7: -99, 8: -99, 9: -1}
+
+        self._data_rng = np.random.RandomState(123)
+        self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571],
+                                 dtype=np.float32)
+        self._eig_vec = np.array([
+            [-0.58752847, -0.69563484, 0.41340352],
+            [-0.5832747, 0.00994535, -0.81221408],
+            [-0.56089297, 0.71832671, 0.41158938]
+        ], dtype=np.float32)
+        self.split = split
+        self.opt = opt
+        self.alpha_in_degree = False
+
+        print('==> initializing kitti {}, {} data.'.format(opt.kitti_split, split))
+        self.coco = coco.COCO(self.annot_path)
+        self.images = self.coco.getImgIds()
+        self.num_samples = len(self.images)
+
+        print('Loaded {} {} samples'.format(split, self.num_samples))
+
+    def __len__(self):
+        return self.num_samples
+
+    def _to_float(self, x):
+        return float("{:.2f}".format(x))
+
+    def convert_eval_format(self, all_bboxes):
+        pass
+
+    def save_results(self, results, save_dir):
+        results_dir = os.path.join(save_dir, 'results')
+        if not os.path.exists(results_dir):
+            os.mkdir(results_dir)
+        for img_id in results.keys():
+            out_path = os.path.join(results_dir, '{:06d}.txt'.format(img_id))
+            f = open(out_path, 'w')
+            for cls_ind in results[img_id]:
+                for j in range(len(results[img_id][cls_ind])):
+                    class_name = self.class_name[cls_ind]
+                    f.write('{} 0.0 0'.format(class_name))
+                    for i in range(len(results[img_id][cls_ind][j])):
+                        f.write(' {:.2f}'.format(
+                            results[img_id][cls_ind][j][i]))
+                    f.write('\n')
+            f.close()
+
+    def run_eval(self, results, save_dir):
+        self.save_results(results, save_dir)
+        os.system('./tools/kitti_eval/evaluate_object_3d_offline ' +
+                  '../data/kitti/training/label_val ' +
+                  '{}/results/'.format(save_dir))
--- a/src/lib/datasets/dataset/pascal.py
+++ b/src/lib/datasets/dataset/pascal.py
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import torch
+import json
+
+import numpy as np
+import torch.utils.data as data
+import pycocotools.coco as coco
+
+
+class PascalVOC(data.Dataset):
+    num_classes = 20
+    default_resolution = [384, 384]
+    mean = np.array([0.485, 0.456, 0.406],
+                    dtype=np.float32).reshape(1, 1, 3)
+    std = np.array([0.229, 0.224, 0.225],
+                   dtype=np.float32).reshape(1, 1, 3)
+
+    def __init__(self, opt, split):
+        super(PascalVOC, self).__init__()
+        self.data_dir = os.path.join(opt.data_dir, 'voc')
+        self.img_dir = os.path.join(self.data_dir, 'images')
+        _ann_name = {'train': 'trainval0712', 'val': 'test2007'}
+        self.annot_path = os.path.join(
+            self.data_dir, 'annotations',
+            'pascal_{}.json').format(_ann_name[split])
+        self.max_objs = 50
+        self.class_name = ['__background__', "aeroplane", "bicycle", "bird", "boat",
+                           "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog",
+                           "horse", "motorbike", "person", "pottedplant", "sheep", "sofa",
+                           "train", "tvmonitor"]
+        self._valid_ids = np.arange(1, 21, dtype=np.int32)
+        self.cat_ids = {v: i for i, v in enumerate(self._valid_ids)}
+        self._data_rng = np.random.RandomState(123)
+        self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571],
+                                 dtype=np.float32)
+        self._eig_vec = np.array([
+            [-0.58752847, -0.69563484, 0.41340352],
+            [-0.5832747, 0.00994535, -0.81221408],
+            [-0.56089297, 0.71832671, 0.41158938]
+        ], dtype=np.float32)
+        self.split = split
+        self.opt = opt
+
+        print('==> initializing pascal {} data.'.format(_ann_name[split]))
+        self.coco = coco.COCO(self.annot_path)
+        self.images = sorted(self.coco.getImgIds())
+        self.num_samples = len(self.images)
+
+        print('Loaded {} {} samples'.format(split, self.num_samples))
+
+    def _to_float(self, x):
+        return float("{:.2f}".format(x))
+
+    def convert_eval_format(self, all_bboxes):
+        detections = [[[] for __ in range(self.num_samples)]
+                      for _ in range(self.num_classes + 1)]
+        for i in range(self.num_samples):
+            img_id = self.images[i]
+            for j in range(1, self.num_classes + 1):
+                if isinstance(all_bboxes[img_id][j], np.ndarray):
+                    detections[j][i] = all_bboxes[img_id][j].tolist()
+                else:
+                    detections[j][i] = all_bboxes[img_id][j]
+        return detections
+
+    def __len__(self):
+        return self.num_samples
+
+    def save_results(self, results, save_dir):
+        json.dump(self.convert_eval_format(results),
+                  open('{}/results.json'.format(save_dir), 'w'))
+
+    def run_eval(self, results, save_dir):
+        # result_json = os.path.join(save_dir, "results.json")
+        # detections  = self.convert_eval_format(results)
+        # json.dump(detections, open(result_json, "w"))
+        self.save_results(results, save_dir)
+        os.system('python tools/reval.py ' +
+                  '{}/results.json'.format(save_dir))
--- a/src/lib/datasets/dataset/pig.py
+++ b/src/lib/datasets/dataset/pig.py
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import torch
+import json
+
+import numpy as np
+import torch.utils.data as data
+import pycocotools.coco as coco
+
+
+class PIG(data.Dataset):
+    num_classes = 1
+    default_resolution = [512, 512]
+    mean = np.array([0.485, 0.456, 0.406],
+                    dtype=np.float32).reshape(1, 1, 3)
+    std = np.array([0.229, 0.224, 0.225],
+                   dtype=np.float32).reshape(1, 1, 3)
+
+    def __init__(self, opt, split):
+        super(PIG, self).__init__()
+        self.data_dir = os.path.join(opt.data_dir, 'pig')
+        self.img_dir = os.path.join(
+            self.data_dir, 'image')                 # 这个在载入图片的时候有用
+        _ann_name = {'train': 'train', 'val': 'val'}
+        self.annot_path = os.path.join(
+            self.data_dir, 'annotations',
+            '{}_pig.json').format(_ann_name[split])
+        self.max_objs = 50
+        self.class_name = ['__background__', "pig"]
+        self._valid_ids = np.arange(1, 21, dtype=np.int32)
+        self.cat_ids = {v: i for i, v in enumerate(self._valid_ids)}
+        self._data_rng = np.random.RandomState(123)
+        self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571],
+                                 dtype=np.float32)
+        self._eig_vec = np.array([
+            [-0.58752847, -0.69563484, 0.41340352],
+            [-0.5832747, 0.00994535, -0.81221408],
+            [-0.56089297, 0.71832671, 0.41158938]
+        ], dtype=np.float32)
+        self.split = split
+        self.opt = opt
+
+        print('==> initializing pascal {} data.'.format(_ann_name[split]))
+        self.coco = coco.COCO(self.annot_path)
+        self.images = sorted(self.coco.getImgIds())
+        self.num_samples = len(self.images)
+
+        print('Loaded {} {} samples'.format(split, self.num_samples))
+
+    def _to_float(self, x):
+        return float("{:.2f}".format(x))
+
+    def convert_eval_format(self, all_bboxes):
+        detections = [[[] for __ in range(self.num_samples)]
+                      for _ in range(self.num_classes + 1)]
+        for i in range(self.num_samples):
+            img_id = self.images[i]
+            for j in range(1, self.num_classes + 1):
+                if isinstance(all_bboxes[img_id][j], np.ndarray):
+                    detections[j][i] = all_bboxes[img_id][j].tolist()
+                else:
+                    detections[j][i] = all_bboxes[img_id][j]
+        return detections
+
+    def __len__(self):
+        return self.num_samples
+
+    def save_results(self, results, save_dir):
+        json.dump(self.convert_eval_format(results),
+                  open('{}/results.json'.format(save_dir), 'w'))
+
+    def run_eval(self, results, save_dir):
+        # result_json = os.path.join(save_dir, "results.json")
+        # detections  = self.convert_eval_format(results)
+        # json.dump(detections, open(result_json, "w"))
+        self.save_results(results, save_dir)
+        os.system('python tools/reval.py ' +
+                  '{}/results.json'.format(save_dir))
--- a/src/lib/datasets/dataset/pig2.py
+++ b/src/lib/datasets/dataset/pig2.py
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import torch
+import json
+
+import torch.utils.data as data
+import pycocotools.coco as coco
+import numpy as np
+
+
+class PIG2(data.Dataset):
+    num_classes = 2
+    default_resolution = [512, 512]
+    mean = np.array([0.485, 0.456, 0.406],
+                    dtype=np.float32).reshape(1, 1, 3)
+    std = np.array([0.229, 0.224, 0.225],
+                   dtype=np.float32).reshape(1, 1, 3)
+
+    def __init__(self, opt, split):
+        super(PIG2, self).__init__()
+        self.data_dir = os.path.join(opt.data_dir, 'pig')
+        self.img_dir = os.path.join(
+            self.data_dir, 'image')                 # 这个在载入图片的时候有用
+        _ann_name = {'train': 'train', 'val': 'val'}
+        self.annot_path = os.path.join(
+            self.data_dir, 'annotations',
+            '{}_pig.json').format(_ann_name[split])
+        if split == 'train':
+            self.annot_path = opt.train_json
+        elif split == 'val':
+            self.annot_path = opt.val_json
+
+        self.max_objs = 50
+        self.class_name = ['__background__', "defect1", "defect2"]
+        self._valid_ids = np.arange(1, 21, dtype=np.int32)
+        self.cat_ids = {v: i for i, v in enumerate(self._valid_ids)}
+        self._data_rng = np.random.RandomState(123)
+        self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571],
+                                 dtype=np.float32)
+        self._eig_vec = np.array([
+            [-0.58752847, -0.69563484, 0.41340352],
+            [-0.5832747, 0.00994535, -0.81221408],
+            [-0.56089297, 0.71832671, 0.41158938]
+        ], dtype=np.float32)
+        self.split = split
+        self.opt = opt
+
+        print('==> initializing pascal {} data.'.format(_ann_name[split]))
+        self.coco = coco.COCO(self.annot_path)
+        self.images = sorted(self.coco.getImgIds())
+        self.num_samples = len(self.images)
+
+        print('Loaded {} {} samples'.format(split, self.num_samples))
+
+    def _to_float(self, x):
+        return float("{:.2f}".format(x))
+
+    def convert_eval_format(self, all_bboxes):
+        detections = [[[] for __ in range(self.num_samples)]
+                      for _ in range(self.num_classes + 1)]
+        for i in range(self.num_samples):
+            img_id = self.images[i]
+            for j in range(1, self.num_classes + 1):
+                if isinstance(all_bboxes[img_id][j], np.ndarray):
+                    detections[j][i] = all_bboxes[img_id][j].tolist()
+                else:
+                    detections[j][i] = all_bboxes[img_id][j]
+        return detections
+
+    def __len__(self):
+        return self.num_samples
+
+    def save_results(self, results, save_dir):
+        json.dump(self.convert_eval_format(results),
+                  open('{}/results.json'.format(save_dir), 'w'))
+
+    def run_eval(self, results, save_dir):
+        # result_json = os.path.join(save_dir, "results.json")
+        # detections  = self.convert_eval_format(results)
+        # json.dump(detections, open(result_json, "w"))
+        self.save_results(results, save_dir)
+        os.system('python tools/reval.py ' +
+                  '{}/results.json'.format(save_dir))
--- a/src/lib/datasets/dataset_factory.py
+++ b/src/lib/datasets/dataset_factory.py
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from .sample.ddd import DddDataset
+from .sample.exdet import EXDetDataset
+from .sample.ctdet import CTDetDataset
+from .sample.multi_pose import MultiPoseDataset
+
+from .dataset.coco import COCO
+from .dataset.pascal import PascalVOC
+from .dataset.kitti import KITTI
+from .dataset.coco_hp import COCOHP
+from .dataset.pig import PIG
+from .dataset.pig2 import PIG2
+from .dataset.centerface import FACE
+from .dataset.centerface_hp import FACEHP
+
+
+dataset_factory = {
+  'coco': COCO,
+  'pascal': PascalVOC,
+  'kitti': KITTI,
+  'coco_hp': COCOHP,
+  'pig': PIG,
+  'pig2': PIG2,
+  'face': FACE,
+  'facehp': FACEHP
+}
+
+_sample_factory = {
+  'exdet': EXDetDataset,
+  'ctdet': CTDetDataset,
+  'ddd': DddDataset,
+  'multi_pose': MultiPoseDataset
+}
+
+
+def get_dataset(dataset, task):
+  class Dataset(dataset_factory[dataset], _sample_factory[task]):           # 双重继承
+    pass
+  return Dataset
--- a/src/lib/datasets/sample/ctdet.py
+++ b/src/lib/datasets/sample/ctdet.py
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import torch.utils.data as data
+import numpy as np
+import torch
+import json
+import cv2
+import os
+from utils.image import flip, color_aug
+from utils.image import get_affine_transform, affine_transform
+from utils.image import gaussian_radius, draw_umich_gaussian, draw_msra_gaussian
+from utils.image import draw_dense_reg
+import math
+import matplotlib.pyplot as plt
+
+
+class CTDetDataset(data.Dataset):
+    def _coco_box_to_bbox(self, box):
+        bbox = np.array([box[0], box[1], box[0] + box[2], box[1] + box[3]],
+                        dtype=np.float32)
+        return bbox
+
+    def _get_border(self, border, size):
+        i = 1
+        while size - border // i <= border // i:
+            i *= 2
+        return border // i
+
+    def __getitem__(self, index):
+        img_id = self.images[index]
+        file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
+        img_path = os.path.join(self.img_dir, file_name)
+        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
+        anns = self.coco.loadAnns(ids=ann_ids)
+        num_objs = min(len(anns), self.max_objs)
+
+        img = cv2.imread(img_path)
+
+        height, width = img.shape[0], img.shape[1]
+        c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
+        if self.opt.keep_res:
+            input_h = (height | self.opt.pad) + 1
+            input_w = (width | self.opt.pad) + 1
+            s = np.array([input_w, input_h], dtype=np.float32)
+        else:
+            s = max(img.shape[0], img.shape[1]) * 1.0
+            input_h, input_w = self.opt.input_h, self.opt.input_w
+
+        flipped = False
+        if self.split == 'train':
+            if not self.opt.not_rand_crop:
+                s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
+                w_border = self._get_border(128, img.shape[1])
+                h_border = self._get_border(128, img.shape[0])
+                c[0] = np.random.randint(
+                    low=w_border, high=img.shape[1] - w_border)
+                c[1] = np.random.randint(
+                    low=h_border, high=img.shape[0] - h_border)
+            else:
+                sf = self.opt.scale
+                cf = self.opt.shift
+                c[0] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf)
+                c[1] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf)
+                s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf)
+
+            if np.random.random() < self.opt.flip:
+                flipped = True
+                img = img[:, ::-1, :]
+                c[0] = width - c[0] - 1
+
+        trans_input = get_affine_transform(
+            c, s, 0, [input_w, input_h])
+        inp = cv2.warpAffine(img, trans_input,
+                             (input_w, input_h),
+                             flags=cv2.INTER_LINEAR)
+        inp = (inp.astype(np.float32) / 255.)
+        if self.split == 'train' and not self.opt.no_color_aug:
+            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
+        inp = (inp - self.mean) / self.std
+        inp = inp.transpose(2, 0, 1)
+
+        output_h = input_h // self.opt.down_ratio
+        output_w = input_w // self.opt.down_ratio
+        num_classes = self.num_classes
+        trans_output = get_affine_transform(c, s, 0, [output_w, output_h])
+
+        hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
+        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
+        dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
+        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
+        ind = np.zeros((self.max_objs), dtype=np.int64)
+        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
+        cat_spec_wh = np.zeros(
+            (self.max_objs, num_classes * 2), dtype=np.float32)
+        cat_spec_mask = np.zeros(
+            (self.max_objs, num_classes * 2), dtype=np.uint8)
+
+        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
+            draw_umich_gaussian
+
+        gt_det = []
+        for k in range(num_objs):
+            ann = anns[k]
+            bbox = self._coco_box_to_bbox(ann['bbox'])
+            cls_id = int(self.cat_ids[ann['category_id']])
+            if flipped:
+                bbox[[0, 2]] = width - bbox[[2, 0]] - 1
+            bbox[:2] = affine_transform(bbox[:2], trans_output)
+            bbox[2:] = affine_transform(bbox[2:], trans_output)
+            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
+            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
+            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
+            if h > 0 and w > 0:
+                radius = gaussian_radius(
+                    (math.ceil(h), math.ceil(w)))                # 高斯半径
+                radius = max(0, int(radius))
+                radius = self.opt.hm_gauss if self.opt.mse_loss else radius
+                ct = np.array(
+                    [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32)       # 求中点
+                ct_int = ct.astype(np.int32)
+                # 每个类别一个channel
+                draw_gaussian(hm[cls_id], ct_int, radius)
+                # 目标的wh
+                wh[k] = 1. * w, 1. * h
+                ind[k] = ct_int[1] * output_w + \
+                    ct_int[0]                     # 索引，y*w + x
+                # 整数化的误差
+                reg[k] = ct - ct_int
+                reg_mask[k] = 1
+                cat_spec_wh[k, cls_id * 2: cls_id * 2 + 2] = wh[k]
+                cat_spec_mask[k, cls_id * 2: cls_id * 2 + 2] = 1
+                if self.opt.dense_wh:
+                    draw_dense_reg(dense_wh, hm.max(
+                        axis=0), ct_int, wh[k], radius)
+                gt_det.append([ct[0] - w / 2, ct[1] - h / 2,
+                               ct[0] + w / 2, ct[1] + h / 2, 1, cls_id])
+
+        ret = {'input': inp, 'hm': hm,
+               'reg_mask': reg_mask, 'ind': ind, 'wh': wh}
+        if self.opt.dense_wh:
+            hm_a = hm.max(axis=0, keepdims=True)
+            dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
+            ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
+            del ret['wh']
+        elif self.opt.cat_spec_wh:
+            ret.update({'cat_spec_wh': cat_spec_wh,
+                       'cat_spec_mask': cat_spec_mask})
+            del ret['wh']
+        if self.opt.reg_offset:
+            ret.update({'reg': reg})
+        if self.opt.debug > 0 or not self.split == 'train':
+            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
+                np.zeros((1, 6), dtype=np.float32)
+            meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
+            ret['meta'] = meta
+        return ret
--- a/src/lib/datasets/sample/ddd.py
+++ b/src/lib/datasets/sample/ddd.py
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+
+import os
+import math
+import torch
+import json
+import cv2
+
+from utils.image import flip, color_aug
+from utils.image import get_affine_transform, affine_transform
+from utils.image import gaussian_radius, draw_umich_gaussian, draw_msra_gaussian
+
+import numpy as np
+import pycocotools.coco as coco
+import torch.utils.data as data
+import pycocotools.coco as coco
+
+
+class DddDataset(data.Dataset):
+    def _coco_box_to_bbox(self, box):
+        bbox = np.array([box[0], box[1], box[0] + box[2], box[1] + box[3]],
+                        dtype=np.float32)
+        return bbox
+
+    def _convert_alpha(self, alpha):
+        return math.radians(alpha + 45) if self.alpha_in_degree else alpha
+
+    def __getitem__(self, index):
+        img_id = self.images[index]
+        img_info = self.coco.loadImgs(ids=[img_id])[0]
+        img_path = os.path.join(self.img_dir, img_info['file_name'])
+        img = cv2.imread(img_path)
+        if 'calib' in img_info:
+            calib = np.array(img_info['calib'], dtype=np.float32)
+        else:
+            calib = self.calib
+
+        height, width = img.shape[0], img.shape[1]
+        c = np.array([img.shape[1] / 2., img.shape[0] / 2.])
+        if self.opt.keep_res:
+            s = np.array([self.opt.input_w, self.opt.input_h], dtype=np.int32)
+        else:
+            s = np.array([width, height], dtype=np.int32)
+
+        aug = False
+        if self.split == 'train' and np.random.random() < self.opt.aug_ddd:
+            aug = True
+            sf = self.opt.scale
+            cf = self.opt.shift
+            s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf)
+            c[0] += img.shape[1] * np.clip(np.random.randn()*cf, -2*cf, 2*cf)
+            c[1] += img.shape[0] * np.clip(np.random.randn()*cf, -2*cf, 2*cf)
+
+        trans_input = get_affine_transform(
+            c, s, 0, [self.opt.input_w, self.opt.input_h])
+        inp = cv2.warpAffine(img, trans_input,
+                             (self.opt.input_w, self.opt.input_h),
+                             flags=cv2.INTER_LINEAR)
+        inp = (inp.astype(np.float32) / 255.)
+        # if self.split == 'train' and not self.opt.no_color_aug:
+        #   color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
+        inp = (inp - self.mean) / self.std
+        inp = inp.transpose(2, 0, 1)
+
+        num_classes = self.opt.num_classes
+        trans_output = get_affine_transform(
+            c, s, 0, [self.opt.output_w, self.opt.output_h])
+
+        hm = np.zeros(
+            (num_classes, self.opt.output_h, self.opt.output_w), dtype=np.float32)
+        wh = np.zeros((self.max_objs, 2), dtype=np.float32)
+        reg = np.zeros((self.max_objs, 2), dtype=np.float32)
+        dep = np.zeros((self.max_objs, 1), dtype=np.float32)
+        rotbin = np.zeros((self.max_objs, 2), dtype=np.int64)
+        rotres = np.zeros((self.max_objs, 2), dtype=np.float32)
+        dim = np.zeros((self.max_objs, 3), dtype=np.float32)
+        ind = np.zeros((self.max_objs), dtype=np.int64)
+        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
+        rot_mask = np.zeros((self.max_objs), dtype=np.uint8)
+
+        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
+        anns = self.coco.loadAnns(ids=ann_ids)
+        num_objs = min(len(anns), self.max_objs)
+        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
+            draw_umich_gaussian
+        gt_det = []
+        for k in range(num_objs):
+            ann = anns[k]
+            bbox = self._coco_box_to_bbox(ann['bbox'])
+            cls_id = int(self.cat_ids[ann['category_id']])
+            if cls_id <= -99:
+                continue
+            # if flipped:
+            #   bbox[[0, 2]] = width - bbox[[2, 0]] - 1
+            bbox[:2] = affine_transform(bbox[:2], trans_output)
+            bbox[2:] = affine_transform(bbox[2:], trans_output)
+            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.opt.output_w - 1)
+            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.opt.output_h - 1)
+            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
+            if h > 0 and w > 0:
+                radius = gaussian_radius((h, w))
+                radius = max(0, int(radius))
+                ct = np.array(
+                    [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32)
+                ct_int = ct.astype(np.int32)
+                if cls_id < 0:
+                    ignore_id = [_ for _ in range(num_classes)] \
+                        if cls_id == - 1 else [- cls_id - 2]
+                    if self.opt.rect_mask:
+                        hm[ignore_id, int(bbox[1]): int(bbox[3]) + 1,
+                           int(bbox[0]): int(bbox[2]) + 1] = 0.9999
+                    else:
+                        for cc in ignore_id:
+                            draw_gaussian(hm[cc], ct, radius)
+                        hm[ignore_id, ct_int[1], ct_int[0]] = 0.9999
+                    continue
+                draw_gaussian(hm[cls_id], ct, radius)
+
+                wh[k] = 1. * w, 1. * h
+                gt_det.append([ct[0], ct[1], 1] +
+                              self._alpha_to_8(self._convert_alpha(ann['alpha'])) +
+                              [ann['depth']] + (np.array(ann['dim']) / 1).tolist() + [cls_id])
+                if self.opt.reg_bbox:
+                    gt_det[-1] = gt_det[-1][:-1] + [w, h] + [gt_det[-1][-1]]
+                # if (not self.opt.car_only) or cls_id == 1: # Only estimate ADD for cars !!!
+                if 1:
+                    alpha = self._convert_alpha(ann['alpha'])
+                    # print('img_id cls_id alpha rot_y', img_path, cls_id, alpha, ann['rotation_y'])
+                    if alpha < np.pi / 6. or alpha > 5 * np.pi / 6.:
+                        rotbin[k, 0] = 1
+                        rotres[k, 0] = alpha - (-0.5 * np.pi)
+                    if alpha > -np.pi / 6. or alpha < -5 * np.pi / 6.:
+                        rotbin[k, 1] = 1
+                        rotres[k, 1] = alpha - (0.5 * np.pi)
+                    dep[k] = ann['depth']
+                    dim[k] = ann['dim']
+                    # print('        cat dim', cls_id, dim[k])
+                    ind[k] = ct_int[1] * self.opt.output_w + ct_int[0]
+                    reg[k] = ct - ct_int
+                    reg_mask[k] = 1 if not aug else 0
+                    rot_mask[k] = 1
+        # print('gt_det', gt_det)
+        # print('')
+        ret = {'input': inp, 'hm': hm, 'dep': dep, 'dim': dim, 'ind': ind,
+               'rotbin': rotbin, 'rotres': rotres, 'reg_mask': reg_mask,
+               'rot_mask': rot_mask}
+        if self.opt.reg_bbox:
+            ret.update({'wh': wh})
+        if self.opt.reg_offset:
+            ret.update({'reg': reg})
+        if self.opt.debug > 0 or not ('train' in self.split):
+            gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
+                np.zeros((1, 18), dtype=np.float32)
+            meta = {'c': c, 's': s, 'gt_det': gt_det, 'calib': calib,
+                    'image_path': img_path, 'img_id': img_id}
+            ret['meta'] = meta
+
+        return ret
+
+    def _alpha_to_8(self, alpha):
+        # return [alpha, 0, 0, 0, 0, 0, 0, 0]
+        ret = [0, 0, 0, 1, 0, 0, 0, 1]
+        if alpha < np.pi / 6. or alpha > 5 * np.pi / 6.:
+            r = alpha - (-0.5 * np.pi)
+            ret[1] = 1
+            ret[2], ret[3] = np.sin(r), np.cos(r)
+        if alpha > -np.pi / 6. or alpha < -5 * np.pi / 6.:
+            r = alpha - (0.5 * np.pi)
+            ret[5] = 1
+            ret[6], ret[7] = np.sin(r), np.cos(r)
+        return ret
--- a/src/lib/datasets/sample/exdet.py
+++ b/src/lib/datasets/sample/exdet.py
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import math
+import torch
+import json
+import cv2
+
+from utils.image import flip, color_aug
+from utils.image import get_affine_transform, affine_transform
+from utils.image import gaussian_radius, draw_umich_gaussian, draw_msra_gaussian
+
+import numpy as np
+import torch.utils.data as data
+import pycocotools.coco as coco
+
+
+class EXDetDataset(data.Dataset):
+    def _coco_box_to_bbox(self, box):
+        bbox = np.array([box[0], box[1], box[0] + box[2], box[1] + box[3]],
+                dtype=np.float32)
+        return bbox
+
+    def _get_border(self, border, size):
+        i = 1
+        while size - border // i <= border // i:
+            i *= 2
+        return border // i
+
+    def __getitem__(self, index):
+        img_id = self.images[index]
+        img_info = self.coco.loadImgs(ids=[img_id])[0]
+        img_path = os.path.join(self.img_dir, img_info['file_name'])
+        img = cv2.imread(img_path)
+
+        height, width = img.shape[0], img.shape[1]
+        c = np.array([img.shape[1] / 2., img.shape[0] / 2.])
+        s = max(img.shape[0], img.shape[1]) * 1.0
+
+        flipped = False
+        if self.split == 'train':
+            if not self.opt.not_rand_crop:
+                s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
+                w_border = self._get_border(128, img.shape[1])
+                h_border = self._get_border(128, img.shape[0])
+                c[0] = np.random.randint(
+                    low=w_border, high=img.shape[1] - w_border)
+                c[1] = np.random.randint(
+                    low=h_border, high=img.shape[0] - h_border)
+            else:
+                sf = self.opt.scale
+                cf = self.opt.shift
+                s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf)
+                c[0] += img.shape[1] * \
+                    np.clip(np.random.randn()*cf, -2*cf, 2*cf)
+                c[1] += img.shape[0] * \
+                    np.clip(np.random.randn()*cf, -2*cf, 2*cf)
+            if np.random.random() < self.opt.flip:
+                flipped = True
+                img = img[:, ::-1, :]
+
+        trans_input = get_affine_transform(
+            c, s, 0, [self.opt.input_res, self.opt.input_res])
+        inp = cv2.warpAffine(img, trans_input,
+                (self.opt.input_res, self.opt.input_res),
+                flags=cv2.INTER_LINEAR)
+        inp = (inp.astype(np.float32) / 255.)
+        if self.split == 'train' and not self.opt.no_color_aug:
+            color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
+        inp = (inp - self.mean) / self.std
+        inp = inp.transpose(2, 0, 1)
+
+        output_res = self.opt.output_res
+        num_classes = self.opt.num_classes
+        trans_output = get_affine_transform(c, s, 0, [output_res, output_res])
+        num_hm = 1 if self.opt.agnostic_ex else num_classes
+
+        hm_t = np.zeros((num_hm, output_res, output_res), dtype=np.float32)
+        hm_l = np.zeros((num_hm, output_res, output_res), dtype=np.float32)
+        hm_b = np.zeros((num_hm, output_res, output_res), dtype=np.float32)
+        hm_r = np.zeros((num_hm, output_res, output_res), dtype=np.float32)
+        hm_c = np.zeros((num_classes, output_res, output_res),
+                        dtype=np.float32)
+        reg_t = np.zeros((self.max_objs, 2), dtype=np.float32)
+        reg_l = np.zeros((self.max_objs, 2), dtype=np.float32)
+        reg_b = np.zeros((self.max_objs, 2), dtype=np.float32)
+        reg_r = np.zeros((self.max_objs, 2), dtype=np.float32)
+        ind_t = np.zeros((self.max_objs), dtype=np.int64)
+        ind_l = np.zeros((self.max_objs), dtype=np.int64)
+        ind_b = np.zeros((self.max_objs), dtype=np.int64)
+        ind_r = np.zeros((self.max_objs), dtype=np.int64)
+        reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
+
+        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
+        anns = self.coco.loadAnns(ids=ann_ids)
+        num_objs = min(len(anns), self.max_objs)
+        draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
+            draw_umich_gaussian
+
+        for k in range(num_objs):
+            ann = anns[k]
+            # bbox = self._coco_box_to_bbox(ann['bbox'])
+            # tlbr
+            pts = np.array(ann['extreme_points'],
+                           dtype=np.float32).reshape(4, 2)
+            # cls_id = int(self.cat_ids[ann['category_id']] - 1) # bug
+            cls_id = int(self.cat_ids[ann['category_id']])
+            hm_id = 0 if self.opt.agnostic_ex else cls_id
+            if flipped:
+                pts[:, 0] = width - pts[:, 0] - 1
+                pts[1], pts[3] = pts[3].copy(), pts[1].copy()
+            for j in range(4):
+                pts[j] = affine_transform(pts[j], trans_output)
+            pts = np.clip(pts, 0, self.opt.output_res - 1)
+            h, w = pts[2, 1] - pts[0, 1], pts[3, 0] - pts[1, 0]
+            if h > 0 and w > 0:
+                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
+                radius = max(0, int(radius))
+                pt_int = pts.astype(np.int32)
+                draw_gaussian(hm_t[hm_id], pt_int[0], radius)
+                draw_gaussian(hm_l[hm_id], pt_int[1], radius)
+                draw_gaussian(hm_b[hm_id], pt_int[2], radius)
+                draw_gaussian(hm_r[hm_id], pt_int[3], radius)
+                reg_t[k] = pts[0] - pt_int[0]
+                reg_l[k] = pts[1] - pt_int[1]
+                reg_b[k] = pts[2] - pt_int[2]
+                reg_r[k] = pts[3] - pt_int[3]
+                ind_t[k] = pt_int[0, 1] * output_res + pt_int[0, 0]
+                ind_l[k] = pt_int[1, 1] * output_res + pt_int[1, 0]
+                ind_b[k] = pt_int[2, 1] * output_res + pt_int[2, 0]
+                ind_r[k] = pt_int[3, 1] * output_res + pt_int[3, 0]
+
+                ct = [int((pts[3, 0] + pts[1, 0]) / 2),
+                      int((pts[0, 1] + pts[2, 1]) / 2)]
+                draw_gaussian(hm_c[cls_id], ct, radius)
+                reg_mask[k] = 1
+        ret = {'input': inp, 'hm_t': hm_t, 'hm_l': hm_l, 'hm_b': hm_b,
+               'hm_r': hm_r, 'hm_c': hm_c}
+        if self.opt.reg_offset:
+            ret.update({'reg_mask': reg_mask,
+                        'reg_t': reg_t, 'reg_l': reg_l, 'reg_b': reg_b, 'reg_r': reg_r,
+                        'ind_t': ind_t, 'ind_l': ind_l, 'ind_b': ind_b, 'ind_r': ind_r})
+
+        return ret
--- a/src/lib/datasets/sample/multi_pose.py
+++ b/src/lib/datasets/sample/multi_pose.py
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import re
+import math
+import torch
+import json
+import cv2
+
+from utils.image import flip, color_aug
+from utils.image import get_affine_transform, affine_transform
+from utils.image import gaussian_radius, draw_umich_gaussian, draw_msra_gaussian
+from utils.image import draw_dense_reg
+from utils.utils import Data_anchor_sample
+from utils.Randaugmentations import Randaugment
+from PIL import Image
+
+import torch.utils.data as data
+import numpy as np
+
+TORCH_MAJOR = int(torch.__version__.split('.')[0])
+TORCH_MINOR = int(torch.__version__.split('.')[1])
+
+if TORCH_MAJOR == 1 and TORCH_MINOR < 8:
+    from torch._six import container_abcs, string_classes, int_classes
+else:
+    string_classes = str
+    int_classes = int
+    import collections.abc as container_abcs
+
+np_str_obj_array_pattern = re.compile(r'[SaUO]')
+
+class MultiPoseDataset(data.Dataset):
+  def _coco_box_to_bbox(self, box):
+    bbox = np.array([box[0], box[1], box[0] + box[2], box[1] + box[3]],
+                    dtype=np.float32)
+    return bbox
+
+  def _get_border(self, border, size):
+    i = 1
+    while size - border // i <= border // i:
+        i *= 2
+    return border // i
+
+  def __getitem__(self, index):
+    img_id = self.images[index]
+    file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
+    img_path = os.path.join(self.img_dir, file_name)
+    ann_ids = self.coco.getAnnIds(imgIds=[img_id])
+    anns = self.coco.loadAnns(ids=ann_ids)
+    num_objs = len(anns)
+    # num_objs = min(len(anns), self.max_objs)
+    if num_objs > self.max_objs:
+        num_objs = self.max_objs
+        anns = np.random.choice(anns, num_objs)
+
+
+    img = cv2.imread(img_path)
+    img, anns = Data_anchor_sample(img, anns)
+
+    # # for test the keypoint order
+    # img1 = cv2.flip(img,1)
+    # for ann in anns:
+    #   width = img1.shape[1]
+    #   bbox = self._coco_box_to_bbox(ann['bbox'])
+    #   bbox[[0, 2]] = width - bbox[[2, 0]] - 1
+    #   pts = np.array(ann['keypoints'], np.float32).reshape(5, 3)
+    #
+    #   # for flip
+    #   pts[:, 0] = width - pts[:, 0] - 1
+    #   for e in self.flip_idx:
+    #     pts[e[0]], pts[e[1]] = pts[e[1]].copy(), pts[e[0]].copy()
+    #
+    #   # for debug show
+    #   def add_coco_bbox(image, bbox, conf=1):
+    #     txt = '{}{:.1f}'.format('person', conf)
+    #     font = cv2.FONT_HERSHEY_SIMPLEX
+    #     cv2.rectangle(image, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 255), 2)
+    #     cv2.putText(image, txt, (bbox[0], bbox[1] - 2),
+    #                 font, 0.5, (0, 255, 0), thickness=1, lineType=cv2.LINE_AA)
+    #
+    #   def add_coco_hp(image, points, keypoints_prob=1):
+    #     for j in range(5):
+    #       if keypoints_prob > 0.5:
+    #         if j == 0:
+    #           cv2.circle(image, (points[j, 0], points[j, 1]), 2, (255, 255, 0), -1)
+    #         elif j == 1:
+    #           cv2.circle(image, (points[j, 0], points[j, 1]), 2, (255, 0, 0), -1)
+    #         elif j == 2:
+    #           cv2.circle(image, (points[j, 0], points[j, 1]), 2, (0, 255, 0), -1)
+    #         elif j == 3:
+    #           cv2.circle(image, (points[j, 0], points[j, 1]), 2, (0, 0, 255), -1)
+    #         elif j == 4:
+    #           cv2.circle(image, (points[j, 0], points[j, 1]), 2, (0, 0, 0), -1)
+    #     return image
+    #
+    #   bbox = [int(x) for x in bbox]
+    #   add_coco_bbox(img1, bbox )
+    #   add_coco_hp(img1, pts)
+    #   cv2.imshow('mat', img1)
+    #   cv2.waitKey(5000)
+
+    height, width = img.shape[0], img.shape[1]
+    c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
+    s = max(img.shape[0], img.shape[1]) * 1.0
+    rot = 0
+
+    flipped = False
+    if self.split == 'train':
+      if not self.opt.not_rand_crop:
+        # s = s * np.random.choice(np.arange(0.8, 1.1, 0.1))
+        # s = s * np.random.choice(np.arange(0.3, 1.2, 0.1))
+        s = s # not scale
+        # _border = np.random.randint(128*0.4, 128*1.4)
+        _border = s * np.random.choice([0.1, 0.2, 0.25])
+        w_border = self._get_border(_border, img.shape[1])
+        h_border = self._get_border(_border, img.shape[0])
+        c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border)
+        c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border)
+      else:
+        sf = self.opt.scale
+        cf = self.opt.shift
+        c[0] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf)
+        c[1] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf)
+        s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf)
+      if np.random.random() < self.opt.aug_rot:
+        rf = self.opt.rotate
+        rot = np.clip(np.random.randn()*rf, -rf*2, rf*2)
+
+      if np.random.random() < self.opt.flip:
+        flipped = True
+        img = img[:, ::-1, :]
+        c[0] =  width - c[0] - 1
+
+    trans_input = get_affine_transform(
+      c, s, rot, [self.opt.input_res, self.opt.input_res])
+    inp = cv2.warpAffine(img, trans_input,
+                         (self.opt.input_res, self.opt.input_res),
+                         flags=cv2.INTER_LINEAR)
+
+    inp = (inp.astype(np.float32) / 255.)
+    if self.split == 'train' and not self.opt.no_color_aug:
+      # 随机进行图片增强
+      color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
+      # inp = Randaugment(self._data_rng, inp, self._eig_val, self._eig_vec)
+
+    inp = (inp - self.mean) / self.std
+    inp = inp.transpose(2, 0, 1)
+
+    output_res = self.opt.output_res
+    num_joints = self.num_joints
+    trans_output_rot = get_affine_transform(c, s, rot, [output_res, output_res])
+    trans_output = get_affine_transform(c, s, 0, [output_res, output_res])
+
+    hm = np.zeros((self.num_classes, output_res, output_res), dtype=np.float32)
+    hm_hp = np.zeros((num_joints, output_res, output_res), dtype=np.float32)
+    dense_kps = np.zeros((num_joints, 2, output_res, output_res),
+                          dtype=np.float32)
+    dense_kps_mask = np.zeros((num_joints, output_res, output_res),
+                               dtype=np.float32)
+    wh = np.zeros((self.max_objs, 2), dtype=np.float32)
+    kps = np.zeros((self.max_objs, num_joints * 2), dtype=np.float32)
+    reg = np.zeros((self.max_objs, 2), dtype=np.float32)
+    ind = np.zeros((self.max_objs), dtype=np.int64)
+    reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
+    wight_mask = np.ones((self.max_objs), dtype=np.float32)
+    kps_mask = np.zeros((self.max_objs, self.num_joints * 2), dtype=np.uint8)
+    hp_offset = np.zeros((self.max_objs * num_joints, 2), dtype=np.float32)
+    hp_ind = np.zeros((self.max_objs * num_joints), dtype=np.int64)
+    hp_mask = np.zeros((self.max_objs * num_joints), dtype=np.int64)
+
+    draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
+                    draw_umich_gaussian
+
+    gt_det = []
+    for k in range(num_objs):
+      ann = anns[k]
+      bbox = self._coco_box_to_bbox(ann['bbox'])
+      cls_id = int(ann['category_id']) - 1
+      pts = np.array(ann['keypoints'], np.float32).reshape(num_joints, 3)
+      if flipped:
+        bbox[[0, 2]] = width - bbox[[2, 0]] - 1
+        pts[:, 0] = width - pts[:, 0] - 1
+        for e in self.flip_idx:
+          pts[e[0]], pts[e[1]] = pts[e[1]].copy(), pts[e[0]].copy()
+
+      bbox[:2] = affine_transform(bbox[:2], trans_output)
+      bbox[2:] = affine_transform(bbox[2:], trans_output)
+      bbox = np.clip(bbox, 0, output_res - 1)
+      h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
+      if (h > 0 and w > 0) or (rot != 0):
+        radius = gaussian_radius((math.ceil(h), math.ceil(w)))
+        radius = self.opt.hm_gauss if self.opt.mse_loss else max(0, int(radius))
+        ct = np.array(
+          [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32)       # 人脸的中心坐标
+        ct_int = ct.astype(np.int32)                        # 整数化
+        # wh[k] = 1. * w, 1. * h                            # 2. centernet的方式
+        wh[k] = np.log(1. * w / 4), np.log(1. * h / 4)      # 2. 人脸bbox的高度和宽度,centerface论文的方式
+        ind[k] = ct_int[1] * output_res + ct_int[0]         # 人脸bbox在1/4特征图中的索引
+        reg[k] = ct - ct_int                                # 3. 人脸bbox中心点整数化的偏差
+        reg_mask[k] = 1                                     # 是否需要用于计算误差
+        # if w*h <= 20:
+        #     wight_mask[k] = 15
+
+        num_kpts = pts[:, 2].sum()                           # 没有关键点标注的时哦
+        if num_kpts == 0 or w*h <= 8:                        # 没有关键点标注的都是比较困难的样本
+          # print('没有关键点标注或者人脸bbox小于8像素')
+          hm[cls_id, ct_int[1], ct_int[0]] = 0.9999
+          # reg_mask[k] = 0
+
+        hp_radius = gaussian_radius((math.ceil(h), math.ceil(w)))
+        hp_radius = self.opt.hm_gauss \
+                    if self.opt.mse_loss else max(0, int(hp_radius))
+        for j in range(num_joints):
+          if pts[j, 2] > 0:
+            pts[j, :2] = affine_transform(pts[j, :2], trans_output_rot)
+            if pts[j, 0] >= 0 and pts[j, 0] < output_res and \
+               pts[j, 1] >= 0 and pts[j, 1] < output_res:
+              kps[k, j * 2: j * 2 + 2] = pts[j, :2] - ct_int                # 4. 关键点相对于人脸bbox的中心的偏差
+              kps_mask[k, j * 2: j * 2 + 2] = 1
+              pt_int = pts[j, :2].astype(np.int32)                          # 关键点整数化
+              hp_offset[k * num_joints + j] = pts[j, :2] - pt_int           # 关键点整数化的偏差
+              hp_ind[k * num_joints + j] = pt_int[1] * output_res + pt_int[0]   # 索引
+              hp_mask[k * num_joints + j] = 1                                   # 计算损失的mask
+              if self.opt.dense_hp:
+                # must be before draw center hm gaussian
+                draw_dense_reg(dense_kps[j], hm[cls_id], ct_int,
+                               pts[j, :2] - ct_int, radius, is_offset=True)
+                draw_gaussian(dense_kps_mask[j], ct_int, radius)
+              draw_gaussian(hm_hp[j], pt_int, hp_radius)                    # 1. 关键点高斯map
+              if ann['bbox'][2]*ann['bbox'][3] <= 16.0:                   # 太小的人脸忽略
+                kps_mask[k, j * 2: j * 2 + 2] = 0
+        draw_gaussian(hm[cls_id], ct_int, radius)
+        gt_det.append([ct[0] - w / 2, ct[1] - h / 2,
+                       ct[0] + w / 2, ct[1] + h / 2, 1] +
+                       pts[:, :2].reshape(num_joints * 2).tolist() + [cls_id])
+    if rot != 0:
+      hm = hm * 0 + 0.9999
+      reg_mask *= 0
+      kps_mask *= 0
+    ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh,
+           'landmarks': kps, 'hps_mask': kps_mask, 'wight_mask': wight_mask}
+    if self.opt.dense_hp:
+      dense_kps = dense_kps.reshape(num_joints * 2, output_res, output_res)
+      dense_kps_mask = dense_kps_mask.reshape(
+        num_joints, 1, output_res, output_res)
+      dense_kps_mask = np.concatenate([dense_kps_mask, dense_kps_mask], axis=1)
+      dense_kps_mask = dense_kps_mask.reshape(
+        num_joints * 2, output_res, output_res)
+      ret.update({'dense_hps': dense_kps, 'dense_hps_mask': dense_kps_mask})
+      del ret['hps'], ret['hps_mask']
+    if self.opt.reg_offset:
+      ret.update({'hm_offset': reg})                  # 人脸bbox中心点整数化的偏差
+    if self.opt.hm_hp:
+      ret.update({'hm_hp': hm_hp})
+    if self.opt.reg_hp_offset:
+      ret.update({'hp_offset': hp_offset, 'hp_ind': hp_ind, 'hp_mask': hp_mask})
+    if self.opt.debug > 0 or not self.split == 'train':
+      gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
+               np.zeros((1, 40), dtype=np.float32)
+      meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
+      ret['meta'] = meta
+    return ret
+
+
+_use_shared_memory = False
+
+error_msg_fmt = "batch must contain tensors, numbers, dicts or lists; found {}"
+
+numpy_type_map = {
+    'float64': torch.DoubleTensor,
+    'float32': torch.FloatTensor,
+    'float16': torch.HalfTensor,
+    'int64': torch.LongTensor,
+    'int32': torch.IntTensor,
+    'int16': torch.ShortTensor,
+    'int8': torch.CharTensor,
+    'uint8': torch.ByteTensor,
+}
+
+
+def default_collate(batch):
+    r"""Puts each data field into a tensor with outer dimension batch size"""
+
+    elem_type = type(batch[0])
+    if isinstance(batch[0], torch.Tensor):
+        out = None
+        if _use_shared_memory:
+            # If we're in a background process, concatenate directly into a
+            # shared memory tensor to avoid an extra copy
+            numel = sum([x.numel() for x in batch])
+            storage = batch[0].storage()._new_shared(numel)
+            out = batch[0].new(storage)
+        return torch.stack(batch, 0, out=out)
+    elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' \
+            and elem_type.__name__ != 'string_':
+        elem = batch[0]
+        if elem_type.__name__ == 'ndarray':
+            # array of string classes and object
+            if np_str_obj_array_pattern.search(elem.dtype.str) is not None:
+                raise TypeError(error_msg_fmt.format(elem.dtype))
+
+            return default_collate([torch.from_numpy(b) for b in batch])
+        if elem.shape == ():  # scalars
+            py_type = float if elem.dtype.name.startswith('float') else int
+            return numpy_type_map[elem.dtype.name](list(map(py_type, batch)))
+    elif isinstance(batch[0], float):
+        return torch.tensor(batch, dtype=torch.float64)
+    elif isinstance(batch[0], int_classes):
+        return torch.tensor(batch)
+    elif isinstance(batch[0], string_classes):
+        return batch
+    elif isinstance(batch[0], container_abcs.Mapping):
+        return {key: default_collate([d[key] for d in batch]) for key in batch[0]}
+    elif isinstance(batch[0], tuple) and hasattr(batch[0], '_fields'):  # namedtuple
+        return type(batch[0])(*(default_collate(samples) for samples in zip(*batch)))
+    elif isinstance(batch[0], container_abcs.Sequence):
+        transposed = zip(*batch)
+        return [default_collate(samples) for samples in transposed]
+
+    raise TypeError((error_msg_fmt.format(type(batch[0]))))
+
+
+def multipose_collate(batch):
+  objects_dims = [d.shape[0] for d in batch]
+  index = objects_dims.index(max(objects_dims))
+
+  # one_dim = True if len(batch[0].shape) == 1 else False
+  res = []
+  for i in range(len(batch)):
+      tres = np.zeros_like(batch[index], dtype=batch[index].dtype)
+      tres[:batch[i].shape[0]] = batch[i]
+      res.append(tres)
+
+  return res
+
+
+def Multiposebatch(batch):
+  sample_batch = {}
+  for key in batch[0]:
+    if key in ['hm', 'input']:
+      sample_batch[key] = default_collate([d[key] for d in batch])
+    else:
+      align_batch = multipose_collate([d[key] for d in batch])
+      sample_batch[key] = default_collate(align_batch)
+
+  return sample_batch
--- a/src/lib/detectors/base_detector.py
+++ b/src/lib/detectors/base_detector.py
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import cv2
+import time
+import torch
+
+from progress.bar import Bar
+from models.model import create_model, load_model
+from utils.image import get_affine_transform
+from utils.debugger import Debugger
+
+import numpy as np
+
+
+class BaseDetector(object):
+    def __init__(self, opt):
+        if opt.gpus[0] >= 0:
+            opt.device = torch.device('cuda')
+        else:
+            opt.device = torch.device('cpu')
+
+        print('Creating model...')
+        self.model = create_model(opt.arch, opt.heads, opt.head_conv)
+        self.model = load_model(self.model, opt.load_model)
+        self.model = self.model.to(opt.device)
+        self.model.eval()
+
+        self.mean = np.array(opt.mean, dtype=np.float32).reshape(1, 1, 3)
+        self.std = np.array(opt.std, dtype=np.float32).reshape(1, 1, 3)
+        self.max_per_image = 100
+        self.num_classes = opt.num_classes
+        self.scales = opt.test_scales
+        self.opt = opt
+        self.pause = False
+
+    def pre_process(self, image, scale, meta=None):
+        height, width = image.shape[0:2]
+        new_height = int(height * scale)
+        new_width = int(width * scale)
+        if self.opt.fix_res:
+            inp_height, inp_width = self.opt.input_h, self.opt.input_w
+            c = np.array([new_width / 2., new_height / 2.], dtype=np.float32)
+            s = max(height, width) * 1.0
+        else:
+            # inp_height = (new_height | self.opt.pad) + 1
+            # inp_width = (new_width | self.opt.pad) + 1
+            inp_height = int(np.ceil(new_height / 32) * 32)
+            inp_width = int(np.ceil(new_width / 32) * 32)
+            c = np.array([new_width // 2, new_height // 2], dtype=np.float32)
+            s = np.array([inp_width, inp_height], dtype=np.float32)
+
+        trans_input = get_affine_transform(c, s, 0, [inp_width, inp_height])
+        resized_image = cv2.resize(image, (new_width, new_height))
+        inp_image = cv2.warpAffine(
+            resized_image, trans_input, (inp_width, inp_height),
+            flags=cv2.INTER_LINEAR)
+        inp_image = ((inp_image / 255. - self.mean) /
+                     self.std).astype(np.float32)
+
+        images = inp_image.transpose(2, 0, 1).reshape(
+            1, 3, inp_height, inp_width)
+        if self.opt.flip_test:
+            images = np.concatenate((images, images[:, :, :, ::-1]), axis=0)
+        images = torch.from_numpy(images)
+        meta = {'c': c, 's': s,
+                'out_height': inp_height // self.opt.down_ratio,
+                'out_width': inp_width // self.opt.down_ratio}
+        return images, meta
+
+    def process(self, images, return_time=False):
+        raise NotImplementedError
+
+    def post_process(self, dets, meta, scale=1):
+        raise NotImplementedError
+
+    def merge_outputs(self, detections):
+        raise NotImplementedError
+
+    def debug(self, debugger, images, dets, output, scale=1):
+        raise NotImplementedError
+
+    def show_results(self, debugger, image, results):
+        raise NotImplementedError
+
+    def return_results(self, debugger, image, results):
+        raise NotImplementedError
+
+    def run(self, image_or_path_or_tensor, meta=None):
+        load_time, pre_time, net_time, dec_time, post_time = 0, 0, 0, 0, 0
+        merge_time, tot_time = 0, 0
+        debugger = Debugger(dataset=self.opt.dataset, ipynb=(self.opt.debug == 3),
+                            theme=self.opt.debugger_theme)
+        start_time = time.time()
+        pre_processed = False
+        if isinstance(image_or_path_or_tensor, np.ndarray):
+            image = image_or_path_or_tensor
+        elif type(image_or_path_or_tensor) == type(''):
+            image = cv2.imread(image_or_path_or_tensor)
+        else:
+            image = image_or_path_or_tensor['image'][0].numpy()
+            pre_processed_images = image_or_path_or_tensor
+            pre_processed = True
+
+        loaded_time = time.time()
+        load_time += (loaded_time - start_time)
+
+        detections = []
+        for scale in self.scales:
+            scale_start_time = time.time()
+            if not pre_processed:
+                images, meta = self.pre_process(image, scale, meta)
+            else:
+                # import pdb; pdb.set_trace()
+                images = pre_processed_images['images'][scale][0]
+                meta = pre_processed_images['meta'][scale]
+                meta = {k: v.numpy()[0] for k, v in meta.items()}
+            images = images.to(self.opt.device)
+            torch.cuda.synchronize()
+            pre_process_time = time.time()
+            pre_time += pre_process_time - scale_start_time
+
+            output, dets, forward_time = self.process(images, return_time=True)
+
+            torch.cuda.synchronize()
+            net_time += forward_time - pre_process_time
+            decode_time = time.time()
+            dec_time += decode_time - forward_time
+
+            if self.opt.debug >= 2:
+                self.debug(debugger, images, dets, output, scale)
+
+            # box:4+score:1+kpoints:10+class:1=16
+            dets = self.post_process(dets, meta, scale)
+            torch.cuda.synchronize()
+            post_process_time = time.time()
+            post_time += post_process_time - decode_time
+
+            detections.append(dets)
+
+        results = self.merge_outputs(detections)
+        torch.cuda.synchronize()
+        end_time = time.time()
+        merge_time += end_time - post_process_time
+        tot_time += end_time - start_time
+
+        if self.opt.debug >= 1:
+            self.show_results(debugger, image, results)
+
+        if self.opt.debug == -1:
+            plot_img = self.return_results(debugger, image, results)
+        else:
+            plot_img = None
+
+        return {'results': results, 'tot': tot_time, 'load': load_time,
+                'pre': pre_time, 'net': net_time, 'dec': dec_time,
+                'post': post_time, 'merge': merge_time, 'plot_img': plot_img}
--- a/src/lib/detectors/ctdet.py
+++ b/src/lib/detectors/ctdet.py
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import cv2
+import time
+import torch
+
+from external.nms import soft_nms
+from models.decode import ctdet_decode
+from models.utils import flip_tensor
+from utils.image import get_affine_transform
+from utils.post_process import ctdet_post_process
+from utils.debugger import Debugger
+from progress.bar import Bar
+from .base_detector import BaseDetector
+
+import numpy as np
+
+
+class CtdetDetector(BaseDetector):
+    def __init__(self, opt):
+        super(CtdetDetector, self).__init__(opt)
+
+    def process(self, images, return_time=False):
+        with torch.no_grad():
+            output = self.model(images)[-1]
+            hm = output['hm'].sigmoid_()
+            wh = output['wh']
+            reg = output['reg'] if self.opt.reg_offset else None
+            if self.opt.flip_test:
+                hm = (hm[0:1] + flip_tensor(hm[1:2])) / 2
+                wh = (wh[0:1] + flip_tensor(wh[1:2])) / 2
+                reg = reg[0:1] if reg is not None else None
+            torch.cuda.synchronize()
+            forward_time = time.time()
+            dets = ctdet_decode(hm, wh, reg=reg, K=self.opt.K)      # K=100
+
+        if return_time:
+            return output, dets, forward_time
+        else:
+            return output, dets
+
+    def post_process(self, dets, meta, scale=1):
+        dets = dets.detach().cpu().numpy()
+        dets = dets.reshape(1, -1, dets.shape[2])
+        dets = ctdet_post_process(
+            dets.copy(), [meta['c']], [meta['s']],
+            meta['out_height'], meta['out_width'], self.opt.num_classes)
+        for j in range(1, self.num_classes + 1):
+            dets[0][j] = np.array(dets[0][j], dtype=np.float32).reshape(-1, 5)
+            dets[0][j][:, :4] /= scale
+        return dets[0]
+
+    def merge_outputs(self, detections):
+        results = {}
+        for j in range(1, self.num_classes + 1):
+            results[j] = np.concatenate(
+                [detection[j] for detection in detections], axis=0).astype(np.float32)
+            if len(self.scales) > 1 or self.opt.nms:
+                soft_nms(results[j], Nt=0.5, method=2)
+        scores = np.hstack(
+            [results[j][:, 4] for j in range(1, self.num_classes + 1)])
+        if len(scores) > self.max_per_image:
+            kth = len(scores) - self.max_per_image
+            thresh = np.partition(scores, kth)[kth]
+            for j in range(1, self.num_classes + 1):
+                keep_inds = (results[j][:, 4] >= thresh)
+                results[j] = results[j][keep_inds]
+        return results
+
+    def debug(self, debugger, images, dets, output, scale=1):
+        detection = dets.detach().cpu().numpy().copy()
+        detection[:, :, :4] *= self.opt.down_ratio
+        for i in range(1):
+            img = images[i].detach().cpu().numpy().transpose(1, 2, 0)
+            img = ((img * self.std + self.mean) * 255).astype(np.uint8)
+            pred = debugger.gen_colormap(
+                output['hm'][i].detach().cpu().numpy())
+            debugger.add_blend_img(img, pred, 'pred_hm_{:.1f}'.format(scale))
+            debugger.add_img(img, img_id='out_pred_{:.1f}'.format(scale))
+            for k in range(len(dets[i])):
+                if detection[i, k, 4] > self.opt.center_thresh:
+                    debugger.add_coco_bbox(detection[i, k, :4], detection[i, k, -1],
+                                           detection[i, k, 4],
+                                           img_id='out_pred_{:.1f}'.format(scale))
+
+    def show_results(self, debugger, image, results):
+        debugger.add_img(image, img_id='ctdet')
+        for j in range(1, self.num_classes + 1):
+            for bbox in results[j]:
+                if bbox[4] > self.opt.vis_thresh:
+                    debugger.add_coco_bbox(
+                        bbox[:4], j - 1, bbox[4], img_id='ctdet')
+        debugger.show_all_imgs(pause=self.pause)
+
+    def return_results(self, debugger, image, results):
+        debugger.add_img(image, img_id='ctdet')
+        for j in range(1, self.num_classes + 1):
+            for bbox in results[j]:
+                if bbox[4] > self.opt.vis_thresh:
+                    debugger.add_coco_bbox(
+                        bbox[:4], j - 1, bbox[4], img_id='ctdet')
+        return debugger.return_img()