Commit b952e97b authored by chenych's avatar chenych
Browse files

First Commit.

parents
image.png

23.7 KB

import os.path as osp
import sys
def add_path(path):
if path not in sys.path:
sys.path.insert(0, path)
this_dir = osp.dirname(__file__)
# Add lib to PYTHONPATH
lib_path = osp.join(this_dir, 'lib')
add_path(lib_path)
import xml.etree.ElementTree as ET
import os
import json
coco = dict()
coco['images'] = []
coco['type'] = 'instances'
coco['annotations'] = []
coco['categories'] = []
category_set = dict()
image_set = set()
category_item_id = 0
image_id = 20180000000
annotation_id = 0
def addCatItem(name):
global category_item_id
category_item = dict()
category_item['supercategory'] = 'none'
category_item_id += 1
category_item['id'] = category_item_id
category_item['name'] = name
coco['categories'].append(category_item)
category_set[name] = category_item_id
return category_item_id
def addImgItem(file_name, size):
global image_id
if file_name is None:
raise Exception('Could not find filename tag in xml file.')
if size['width'] is None:
raise Exception('Could not find width tag in xml file.')
if size['height'] is None:
raise Exception('Could not find height tag in xml file.')
image_id += 1
image_item = dict()
image_item['id'] = image_id
image_item['file_name'] = file_name
image_item['width'] = size['width']
image_item['height'] = size['height']
coco['images'].append(image_item)
image_set.add(file_name)
return image_id
def addAnnoItem(object_name, image_id, category_id, bbox):
global annotation_id
annotation_item = dict()
annotation_item['segmentation'] = []
seg = []
#bbox[] is x,y,w,h
#left_top
seg.append(bbox[0])
seg.append(bbox[1])
#left_bottom
seg.append(bbox[0])
seg.append(bbox[1] + bbox[3])
#right_bottom
seg.append(bbox[0] + bbox[2])
seg.append(bbox[1] + bbox[3])
#right_top
seg.append(bbox[0] + bbox[2])
seg.append(bbox[1])
annotation_item['segmentation'].append(seg)
annotation_item['area'] = bbox[2] * bbox[3]
annotation_item['iscrowd'] = 0
annotation_item['ignore'] = 0
annotation_item['image_id'] = image_id
annotation_item['bbox'] = bbox
annotation_item['category_id'] = category_id
annotation_id += 1
annotation_item['id'] = annotation_id
coco['annotations'].append(annotation_item)
def parseXmlFiles(xml_path):
nums = int(0.8 * len(os.listdir(xml_path)))
fs = os.listdir(xml_path)
for f in fs[nums:]:
if not f.endswith('.xml'):
continue
bndbox = dict()
size = dict()
current_image_id = None
current_category_id = None
file_name = None
size['width'] = None
size['height'] = None
size['depth'] = None
xml_file = os.path.join(xml_path, f)
print(xml_file)
tree = ET.parse(xml_file)
root = tree.getroot()
if root.tag != 'annotation':
raise Exception('pascal voc xml root element should be annotation, rather than {}'.format(root.tag))
#elem is <folder>, <filename>, <size>, <object>
for elem in root:
current_parent = elem.tag
current_sub = None
object_name = None
if elem.tag == 'folder':
continue
if elem.tag == 'filename':
file_name = elem.text
if file_name in category_set:
raise Exception('file_name duplicated')
#add img item only after parse <size> tag
elif current_image_id is None and file_name is not None and size['width'] is not None:
if file_name not in image_set:
current_image_id = addImgItem(file_name, size)
print('add image with {} and {}'.format(file_name, size))
else:
raise Exception('duplicated image: {}'.format(file_name))
#subelem is <width>, <height>, <depth>, <name>, <bndbox>
for subelem in elem:
bndbox ['xmin'] = None
bndbox ['xmax'] = None
bndbox ['ymin'] = None
bndbox ['ymax'] = None
current_sub = subelem.tag
if current_parent == 'object' and subelem.tag == 'name':
object_name = subelem.text
if object_name not in category_set:
current_category_id = addCatItem(object_name)
else:
current_category_id = category_set[object_name]
elif current_parent == 'size':
if size[subelem.tag] is not None:
raise Exception('xml structure broken at size tag.')
size[subelem.tag] = int(subelem.text)
#option is <xmin>, <ymin>, <xmax>, <ymax>, when subelem is <bndbox>
for option in subelem:
if current_sub == 'bndbox':
if bndbox[option.tag] is not None:
raise Exception('xml structure corrupted at bndbox tag.')
bndbox[option.tag] = int(option.text)
#only after parse the <object> tag
if bndbox['xmin'] is not None:
if object_name is None:
raise Exception('xml structure broken at bndbox tag')
if current_image_id is None:
raise Exception('xml structure broken at bndbox tag')
if current_category_id is None:
raise Exception('xml structure broken at bndbox tag')
bbox = []
#x
bbox.append(bndbox['xmin'])
#y
bbox.append(bndbox['ymin'])
#w
bbox.append(bndbox['xmax'] - bndbox['xmin'])
#h
bbox.append(bndbox['ymax'] - bndbox['ymin'])
print('add annotation with {},{},{},{}'.format(object_name, current_image_id, current_category_id, bbox))
addAnnoItem(object_name, current_image_id, current_category_id, bbox )
if __name__ == '__main__':
xml_path = '/home/yangna/deepblue/32_face_detect/centerface/data/wider_face/voc_xml'
json_file = './data/wider_face/annotations/val_wider_face.json'
parseXmlFiles(xml_path)
json.dump(coco, open(json_file, 'w'))
# add annotation with face,20180012875,1,[848, 445, 20, 29]
# /home/yangna/deepblue/32_face_detect/centerface/data/wider_face/voc/35_Basketball_basketballgame_ball_35_79.xml
This diff is collapsed.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import _init_paths
import os
import cv2
import time
from opts_pose import opts
from detectors.detector_factory import detector_factory
image_ext = ['jpg', 'jpeg', 'png', 'webp']
video_ext = ['mp4', 'mov', 'avi', 'mkv']
time_stats = ['tot', 'load', 'pre', 'net', 'dec', 'post', 'merge']
def demo(opt):
os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str
Detector = detector_factory[opt.task]
detector = Detector(opt)
if opt.demo == 'webcam' or \
opt.demo[opt.demo.rfind('.') + 1:].lower() in video_ext:
cam = cv2.VideoCapture(0 if opt.demo == 'webcam' else opt.demo)
detector.pause = False
i = 0
start_time = time.time()
if opt.output_video:
fourcc = cv2.VideoWriter_fourcc(*'mp4v') # 如果是mp4视频,编码需要为mp4v
im_width = int(cam.get(cv2.CAP_PROP_FRAME_WIDTH))
im_height = int(cam.get(cv2.CAP_PROP_FRAME_HEIGHT))
write_cap = cv2.VideoWriter(
opt.output_video, fourcc, 25, (im_width, im_height))
while cam.grab():
i += 1
_, img = cam.retrieve()
cv2.imshow('input', img)
ret = detector.run(img)
time_str = ''
for stat in time_stats:
time_str = time_str + '{} {:.3f}s |'.format(stat, ret[stat])
if opt.output_video:
write_cap.write(ret['plot_img'])
print('fps:{:.3f}'.format(i/(time.time()-start_time)), time_str)
if cv2.waitKey(1) == 27:
return # esc to quit
else:
if os.path.isdir(opt.demo):
image_names = []
ls = os.listdir(opt.demo)
for file_name in sorted(ls):
ext = file_name[file_name.rfind('.') + 1:].lower()
if ext in image_ext:
image_names.append(os.path.join(opt.demo, file_name))
else:
image_names = [opt.demo]
for (image_name) in image_names:
ret = detector.run(image_name)
time_str = ''
for stat in time_stats:
time_str = time_str + '{} {:.3f}s |'.format(stat, ret[stat])
print(time_str)
if __name__ == '__main__':
opt = opts().init()
demo(opt)
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import pycocotools.coco as coco
import numpy as np
import torch
import json
import os
import torch.utils.data as data
class FACE(data.Dataset):
num_classes = 1
default_resolution = [800, 800]
mean = np.array([0.485, 0.456, 0.406],
dtype=np.float32).reshape(1, 1, 3)
std = np.array([0.229, 0.224, 0.225],
dtype=np.float32).reshape(1, 1, 3)
def __init__(self, opt, split):
super(FACE, self).__init__()
self.data_dir = os.path.join(opt.data_dir, 'wider_face')
self.img_dir = os.path.join(
self.data_dir, 'image') # 这个在载入图片的时候有用
_ann_name = {'train': 'train', 'val': 'val'}
self.annot_path = os.path.join(
self.data_dir, 'annotations',
'{}_wider_face.json').format(_ann_name[split])
self.max_objs = 50
self.class_name = ['__background__', "face"]
self._valid_ids = np.arange(1, 21, dtype=np.int32)
self.cat_ids = {v: i for i, v in enumerate(self._valid_ids)}
self._data_rng = np.random.RandomState(123)
self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571],
dtype=np.float32)
self._eig_vec = np.array([
[-0.58752847, -0.69563484, 0.41340352],
[-0.5832747, 0.00994535, -0.81221408],
[-0.56089297, 0.71832671, 0.41158938]
], dtype=np.float32)
self.split = split
self.opt = opt
print('==> initializing pascal {} data.'.format(_ann_name[split]))
self.coco = coco.COCO(self.annot_path)
self.images = sorted(self.coco.getImgIds())
self.num_samples = len(self.images)
print('Loaded {} {} samples'.format(split, self.num_samples))
def _to_float(self, x):
return float("{:.2f}".format(x))
def convert_eval_format(self, all_bboxes):
detections = [[[] for __ in range(self.num_samples)]
for _ in range(self.num_classes + 1)]
for i in range(self.num_samples):
img_id = self.images[i]
for j in range(1, self.num_classes + 1):
if isinstance(all_bboxes[img_id][j], np.ndarray):
detections[j][i] = all_bboxes[img_id][j].tolist()
else:
detections[j][i] = all_bboxes[img_id][j]
return detections
def __len__(self):
return self.num_samples
def save_results(self, results, save_dir):
json.dump(self.convert_eval_format(results),
open('{}/results.json'.format(save_dir), 'w'))
def run_eval(self, results, save_dir):
# result_json = os.path.join(save_dir, "results.json")
# detections = self.convert_eval_format(results)
# json.dump(detections, open(result_json, "w"))
self.save_results(results, save_dir)
os.system('python tools/reval.py ' +
'{}/results.json'.format(save_dir))
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import pycocotools.coco as coco
from pycocotools.cocoeval import COCOeval
import numpy as np
import json
import os
import torch.utils.data as data
class FACEHP(data.Dataset):
num_classes = 1
num_joints = 5
default_resolution = [800, 800]
mean = np.array([0.40789654, 0.44719302, 0.47026115],
dtype=np.float32).reshape(1, 1, 3)
std = np.array([0.28863828, 0.27408164, 0.27809835],
dtype=np.float32).reshape(1, 1, 3)
flip_idx = [[0, 1], [3, 4]] # 翻转的关键点在关键点矩阵中的索引
def __init__(self, opt, split):
super(FACEHP, self).__init__()
self.edges = [[0, 1], [0, 2], [1, 3], [2, 4],
[4, 6], [3, 5], [5, 6],
[5, 7], [7, 9], [6, 8], [8, 10],
[6, 12], [5, 11], [11, 12],
[12, 14], [14, 16], [11, 13], [13, 15]]
self.acc_idxs = [1, 2, 3, 4]
self.data_dir = opt.data_dir
self.img_dir = os.path.join(
self.data_dir, 'WIDER_train/images') # 训练图片所在地址
_ann_name = {'train': 'train', 'val': 'val'}
if split == 'val':
self.annot_path = os.path.join(
self.data_dir, 'annotations',
'{}_wider_face.json').format(_ann_name[split])
else:
self.annot_path = os.path.join(
self.data_dir, 'annotations',
'{}_wider_face.json').format(_ann_name[split])
self.max_objs = 32
self._data_rng = np.random.RandomState(123)
self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571],
dtype=np.float32)
self._eig_vec = np.array([
[-0.58752847, -0.69563484, 0.41340352],
[-0.5832747, 0.00994535, -0.81221408],
[-0.56089297, 0.71832671, 0.41158938]
], dtype=np.float32)
self.split = split
self.opt = opt
print('==> initializing centerface key point {} data.'.format(split))
self.coco = coco.COCO(self.annot_path)
image_ids = self.coco.getImgIds()
if split == 'train':
self.images = []
for img_id in image_ids:
idxs = self.coco.getAnnIds(imgIds=[img_id])
if len(idxs) > 0:
self.images.append(img_id)
else:
self.images = image_ids
self.num_samples = len(self.images)
print('Loaded {} {} samples'.format(split, self.num_samples))
def _to_float(self, x):
return float("{:.2f}".format(x))
def convert_eval_format(self, all_bboxes):
# import pdb; pdb.set_trace()
detections = []
for image_id in all_bboxes:
for cls_ind in all_bboxes[image_id]:
category_id = 1
for dets in all_bboxes[image_id][cls_ind]:
bbox = dets[:4]
bbox[2] -= bbox[0]
bbox[3] -= bbox[1]
score = dets[4]
bbox_out = list(map(self._to_float, bbox))
keypoints = np.concatenate([
np.array(dets[5:39], dtype=np.float32).reshape(-1, 2),
np.ones((17, 1), dtype=np.float32)], axis=1).reshape(51).tolist()
keypoints = list(map(self._to_float, keypoints))
detection = {
"image_id": int(image_id),
"category_id": int(category_id),
"bbox": bbox_out,
"score": float("{:.2f}".format(score)),
"keypoints": keypoints
}
detections.append(detection)
return detections
def __len__(self):
return self.num_samples
def save_results(self, results, save_dir):
json.dump(self.convert_eval_format(results),
open('{}/results.json'.format(save_dir), 'w'))
def run_eval(self, results, save_dir):
# result_json = os.path.join(opt.save_dir, "results.json")
# detections = convert_eval_format(all_boxes)
# json.dump(detections, open(result_json, "w"))
self.save_results(results, save_dir)
coco_dets = self.coco.loadRes('{}/results.json'.format(save_dir))
coco_eval = COCOeval(self.coco, coco_dets, "keypoints")
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()
coco_eval = COCOeval(self.coco, coco_dets, "bbox")
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import pycocotools.coco as coco
from pycocotools.cocoeval import COCOeval
import numpy as np
import json
import os
import torch.utils.data as data
class COCO(data.Dataset):
num_classes = 80
default_resolution = [512, 512]
mean = np.array([0.40789654, 0.44719302, 0.47026115],
dtype=np.float32).reshape(1, 1, 3)
std = np.array([0.28863828, 0.27408164, 0.27809835],
dtype=np.float32).reshape(1, 1, 3)
def __init__(self, opt, split):
super(COCO, self).__init__()
self.data_dir = os.path.join(opt.data_dir, 'coco')
self.img_dir = os.path.join(self.data_dir, '{}2017'.format(split))
if split == 'test':
self.annot_path = os.path.join(
self.data_dir, 'annotations',
'image_info_test-dev2017.json').format(split)
else:
if opt.task == 'exdet':
self.annot_path = os.path.join(
self.data_dir, 'annotations',
'instances_extreme_{}2017.json').format(split)
else:
self.annot_path = os.path.join(
self.data_dir, 'annotations',
'instances_{}2017.json').format(split)
self.max_objs = 128
self.class_name = [
'__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse',
'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack',
'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis',
'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass',
'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake',
'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv',
'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
'scissors', 'teddy bear', 'hair drier', 'toothbrush']
self._valid_ids = [
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13,
14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 27, 28, 31, 32, 33, 34, 35, 36,
37, 38, 39, 40, 41, 42, 43, 44, 46, 47,
48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
58, 59, 60, 61, 62, 63, 64, 65, 67, 70,
72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
82, 84, 85, 86, 87, 88, 89, 90]
self.cat_ids = {v: i for i, v in enumerate(self._valid_ids)}
self.voc_color = [(v // 32 * 64 + 64, (v // 8) % 4 * 64, v % 8 * 32)
for v in range(1, self.num_classes + 1)]
self._data_rng = np.random.RandomState(123)
self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571],
dtype=np.float32)
self._eig_vec = np.array([
[-0.58752847, -0.69563484, 0.41340352],
[-0.5832747, 0.00994535, -0.81221408],
[-0.56089297, 0.71832671, 0.41158938]
], dtype=np.float32)
# self.mean = np.array([0.485, 0.456, 0.406], np.float32).reshape(1, 1, 3)
# self.std = np.array([0.229, 0.224, 0.225], np.float32).reshape(1, 1, 3)
self.split = split
self.opt = opt
print('==> initializing coco 2017 {} data.'.format(split))
self.coco = coco.COCO(self.annot_path)
self.images = self.coco.getImgIds()
self.num_samples = len(self.images)
print('Loaded {} {} samples'.format(split, self.num_samples))
def _to_float(self, x):
return float("{:.2f}".format(x))
def convert_eval_format(self, all_bboxes):
# import pdb; pdb.set_trace()
detections = []
for image_id in all_bboxes:
for cls_ind in all_bboxes[image_id]:
category_id = self._valid_ids[cls_ind - 1]
for bbox in all_bboxes[image_id][cls_ind]:
bbox[2] -= bbox[0]
bbox[3] -= bbox[1]
score = bbox[4]
bbox_out = list(map(self._to_float, bbox[0:4]))
detection = {
"image_id": int(image_id),
"category_id": int(category_id),
"bbox": bbox_out,
"score": float("{:.2f}".format(score))
}
if len(bbox) > 5:
extreme_points = list(map(self._to_float, bbox[5:13]))
detection["extreme_points"] = extreme_points
detections.append(detection)
return detections
def __len__(self):
return self.num_samples
def save_results(self, results, save_dir):
json.dump(self.convert_eval_format(results),
open('{}/results.json'.format(save_dir), 'w'))
def run_eval(self, results, save_dir):
# result_json = os.path.join(save_dir, "results.json")
# detections = self.convert_eval_format(results)
# json.dump(detections, open(result_json, "w"))
self.save_results(results, save_dir)
coco_dets = self.coco.loadRes('{}/results.json'.format(save_dir))
coco_eval = COCOeval(self.coco, coco_dets, "bbox")
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import pycocotools.coco as coco
from pycocotools.cocoeval import COCOeval
import numpy as np
import json
import os
import torch.utils.data as data
class COCOHP(data.Dataset):
num_classes = 1
num_joints = 17
default_resolution = [512, 512]
mean = np.array([0.40789654, 0.44719302, 0.47026115],
dtype=np.float32).reshape(1, 1, 3)
std = np.array([0.28863828, 0.27408164, 0.27809835],
dtype=np.float32).reshape(1, 1, 3)
flip_idx = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10],
[11, 12], [13, 14], [15, 16]]
def __init__(self, opt, split):
super(COCOHP, self).__init__()
self.edges = [[0, 1], [0, 2], [1, 3], [2, 4],
[4, 6], [3, 5], [5, 6],
[5, 7], [7, 9], [6, 8], [8, 10],
[6, 12], [5, 11], [11, 12],
[12, 14], [14, 16], [11, 13], [13, 15]]
self.acc_idxs = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
self.data_dir = os.path.join(opt.data_dir, 'coco')
self.img_dir = os.path.join(self.data_dir, '{}2017'.format(split))
if split == 'test':
self.annot_path = os.path.join(
self.data_dir, 'annotations',
'image_info_test-dev2017.json').format(split)
else:
self.annot_path = os.path.join(
self.data_dir, 'annotations',
'person_keypoints_{}2017.json').format(split)
self.max_objs = 32
self._data_rng = np.random.RandomState(123)
self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571],
dtype=np.float32)
self._eig_vec = np.array([
[-0.58752847, -0.69563484, 0.41340352],
[-0.5832747, 0.00994535, -0.81221408],
[-0.56089297, 0.71832671, 0.41158938]
], dtype=np.float32)
self.split = split
self.opt = opt
print('==> initializing coco 2017 {} data.'.format(split))
self.coco = coco.COCO(self.annot_path)
image_ids = self.coco.getImgIds()
if split == 'train':
self.images = []
for img_id in image_ids:
idxs = self.coco.getAnnIds(imgIds=[img_id])
if len(idxs) > 0:
self.images.append(img_id)
else:
self.images = image_ids
self.num_samples = len(self.images)
print('Loaded {} {} samples'.format(split, self.num_samples))
def _to_float(self, x):
return float("{:.2f}".format(x))
def convert_eval_format(self, all_bboxes):
# import pdb; pdb.set_trace()
detections = []
for image_id in all_bboxes:
for cls_ind in all_bboxes[image_id]:
category_id = 1
for dets in all_bboxes[image_id][cls_ind]:
bbox = dets[:4]
bbox[2] -= bbox[0]
bbox[3] -= bbox[1]
score = dets[4]
bbox_out = list(map(self._to_float, bbox))
keypoints = np.concatenate([
np.array(dets[5:39], dtype=np.float32).reshape(-1, 2),
np.ones((17, 1), dtype=np.float32)], axis=1).reshape(51).tolist()
keypoints = list(map(self._to_float, keypoints))
detection = {
"image_id": int(image_id),
"category_id": int(category_id),
"bbox": bbox_out,
"score": float("{:.2f}".format(score)),
"keypoints": keypoints
}
detections.append(detection)
return detections
def __len__(self):
return self.num_samples
def save_results(self, results, save_dir):
json.dump(self.convert_eval_format(results),
open('{}/results.json'.format(save_dir), 'w'))
def run_eval(self, results, save_dir):
# result_json = os.path.join(opt.save_dir, "results.json")
# detections = convert_eval_format(all_boxes)
# json.dump(detections, open(result_json, "w"))
self.save_results(results, save_dir)
coco_dets = self.coco.loadRes('{}/results.json'.format(save_dir))
coco_eval = COCOeval(self.coco, coco_dets, "keypoints")
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()
coco_eval = COCOeval(self.coco, coco_dets, "bbox")
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import math
import torch
import json
import cv2
import numpy as np
import torch.utils.data as data
import torch.utils.data as data
import pycocotools.coco as coco
class KITTI(data.Dataset):
num_classes = 3
default_resolution = [384, 1280]
mean = np.array([0.485, 0.456, 0.406], np.float32).reshape(1, 1, 3)
std = np.array([0.229, 0.224, 0.225], np.float32).reshape(1, 1, 3)
def __init__(self, opt, split):
super(KITTI, self).__init__()
self.data_dir = os.path.join(opt.data_dir, 'kitti')
self.img_dir = os.path.join(self.data_dir, 'images', 'trainval')
if opt.trainval:
split = 'trainval' if split == 'train' else 'test'
self.img_dir = os.path.join(self.data_dir, 'images', split)
self.annot_path = os.path.join(
self.data_dir, 'annotations', 'kitti_{}.json').format(split)
else:
self.annot_path = os.path.join(self.data_dir,
'annotations', 'kitti_{}_{}.json').format(opt.kitti_split, split)
self.max_objs = 50
self.class_name = [
'__background__', 'Pedestrian', 'Car', 'Cyclist']
self.cat_ids = {1: 0, 2: 1, 3: 2, 4: -3,
5: -3, 6: -2, 7: -99, 8: -99, 9: -1}
self._data_rng = np.random.RandomState(123)
self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571],
dtype=np.float32)
self._eig_vec = np.array([
[-0.58752847, -0.69563484, 0.41340352],
[-0.5832747, 0.00994535, -0.81221408],
[-0.56089297, 0.71832671, 0.41158938]
], dtype=np.float32)
self.split = split
self.opt = opt
self.alpha_in_degree = False
print('==> initializing kitti {}, {} data.'.format(opt.kitti_split, split))
self.coco = coco.COCO(self.annot_path)
self.images = self.coco.getImgIds()
self.num_samples = len(self.images)
print('Loaded {} {} samples'.format(split, self.num_samples))
def __len__(self):
return self.num_samples
def _to_float(self, x):
return float("{:.2f}".format(x))
def convert_eval_format(self, all_bboxes):
pass
def save_results(self, results, save_dir):
results_dir = os.path.join(save_dir, 'results')
if not os.path.exists(results_dir):
os.mkdir(results_dir)
for img_id in results.keys():
out_path = os.path.join(results_dir, '{:06d}.txt'.format(img_id))
f = open(out_path, 'w')
for cls_ind in results[img_id]:
for j in range(len(results[img_id][cls_ind])):
class_name = self.class_name[cls_ind]
f.write('{} 0.0 0'.format(class_name))
for i in range(len(results[img_id][cls_ind][j])):
f.write(' {:.2f}'.format(
results[img_id][cls_ind][j][i]))
f.write('\n')
f.close()
def run_eval(self, results, save_dir):
self.save_results(results, save_dir)
os.system('./tools/kitti_eval/evaluate_object_3d_offline ' +
'../data/kitti/training/label_val ' +
'{}/results/'.format(save_dir))
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import torch
import json
import numpy as np
import torch.utils.data as data
import pycocotools.coco as coco
class PascalVOC(data.Dataset):
num_classes = 20
default_resolution = [384, 384]
mean = np.array([0.485, 0.456, 0.406],
dtype=np.float32).reshape(1, 1, 3)
std = np.array([0.229, 0.224, 0.225],
dtype=np.float32).reshape(1, 1, 3)
def __init__(self, opt, split):
super(PascalVOC, self).__init__()
self.data_dir = os.path.join(opt.data_dir, 'voc')
self.img_dir = os.path.join(self.data_dir, 'images')
_ann_name = {'train': 'trainval0712', 'val': 'test2007'}
self.annot_path = os.path.join(
self.data_dir, 'annotations',
'pascal_{}.json').format(_ann_name[split])
self.max_objs = 50
self.class_name = ['__background__', "aeroplane", "bicycle", "bird", "boat",
"bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog",
"horse", "motorbike", "person", "pottedplant", "sheep", "sofa",
"train", "tvmonitor"]
self._valid_ids = np.arange(1, 21, dtype=np.int32)
self.cat_ids = {v: i for i, v in enumerate(self._valid_ids)}
self._data_rng = np.random.RandomState(123)
self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571],
dtype=np.float32)
self._eig_vec = np.array([
[-0.58752847, -0.69563484, 0.41340352],
[-0.5832747, 0.00994535, -0.81221408],
[-0.56089297, 0.71832671, 0.41158938]
], dtype=np.float32)
self.split = split
self.opt = opt
print('==> initializing pascal {} data.'.format(_ann_name[split]))
self.coco = coco.COCO(self.annot_path)
self.images = sorted(self.coco.getImgIds())
self.num_samples = len(self.images)
print('Loaded {} {} samples'.format(split, self.num_samples))
def _to_float(self, x):
return float("{:.2f}".format(x))
def convert_eval_format(self, all_bboxes):
detections = [[[] for __ in range(self.num_samples)]
for _ in range(self.num_classes + 1)]
for i in range(self.num_samples):
img_id = self.images[i]
for j in range(1, self.num_classes + 1):
if isinstance(all_bboxes[img_id][j], np.ndarray):
detections[j][i] = all_bboxes[img_id][j].tolist()
else:
detections[j][i] = all_bboxes[img_id][j]
return detections
def __len__(self):
return self.num_samples
def save_results(self, results, save_dir):
json.dump(self.convert_eval_format(results),
open('{}/results.json'.format(save_dir), 'w'))
def run_eval(self, results, save_dir):
# result_json = os.path.join(save_dir, "results.json")
# detections = self.convert_eval_format(results)
# json.dump(detections, open(result_json, "w"))
self.save_results(results, save_dir)
os.system('python tools/reval.py ' +
'{}/results.json'.format(save_dir))
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import torch
import json
import numpy as np
import torch.utils.data as data
import pycocotools.coco as coco
class PIG(data.Dataset):
num_classes = 1
default_resolution = [512, 512]
mean = np.array([0.485, 0.456, 0.406],
dtype=np.float32).reshape(1, 1, 3)
std = np.array([0.229, 0.224, 0.225],
dtype=np.float32).reshape(1, 1, 3)
def __init__(self, opt, split):
super(PIG, self).__init__()
self.data_dir = os.path.join(opt.data_dir, 'pig')
self.img_dir = os.path.join(
self.data_dir, 'image') # 这个在载入图片的时候有用
_ann_name = {'train': 'train', 'val': 'val'}
self.annot_path = os.path.join(
self.data_dir, 'annotations',
'{}_pig.json').format(_ann_name[split])
self.max_objs = 50
self.class_name = ['__background__', "pig"]
self._valid_ids = np.arange(1, 21, dtype=np.int32)
self.cat_ids = {v: i for i, v in enumerate(self._valid_ids)}
self._data_rng = np.random.RandomState(123)
self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571],
dtype=np.float32)
self._eig_vec = np.array([
[-0.58752847, -0.69563484, 0.41340352],
[-0.5832747, 0.00994535, -0.81221408],
[-0.56089297, 0.71832671, 0.41158938]
], dtype=np.float32)
self.split = split
self.opt = opt
print('==> initializing pascal {} data.'.format(_ann_name[split]))
self.coco = coco.COCO(self.annot_path)
self.images = sorted(self.coco.getImgIds())
self.num_samples = len(self.images)
print('Loaded {} {} samples'.format(split, self.num_samples))
def _to_float(self, x):
return float("{:.2f}".format(x))
def convert_eval_format(self, all_bboxes):
detections = [[[] for __ in range(self.num_samples)]
for _ in range(self.num_classes + 1)]
for i in range(self.num_samples):
img_id = self.images[i]
for j in range(1, self.num_classes + 1):
if isinstance(all_bboxes[img_id][j], np.ndarray):
detections[j][i] = all_bboxes[img_id][j].tolist()
else:
detections[j][i] = all_bboxes[img_id][j]
return detections
def __len__(self):
return self.num_samples
def save_results(self, results, save_dir):
json.dump(self.convert_eval_format(results),
open('{}/results.json'.format(save_dir), 'w'))
def run_eval(self, results, save_dir):
# result_json = os.path.join(save_dir, "results.json")
# detections = self.convert_eval_format(results)
# json.dump(detections, open(result_json, "w"))
self.save_results(results, save_dir)
os.system('python tools/reval.py ' +
'{}/results.json'.format(save_dir))
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import torch
import json
import torch.utils.data as data
import pycocotools.coco as coco
import numpy as np
class PIG2(data.Dataset):
num_classes = 2
default_resolution = [512, 512]
mean = np.array([0.485, 0.456, 0.406],
dtype=np.float32).reshape(1, 1, 3)
std = np.array([0.229, 0.224, 0.225],
dtype=np.float32).reshape(1, 1, 3)
def __init__(self, opt, split):
super(PIG2, self).__init__()
self.data_dir = os.path.join(opt.data_dir, 'pig')
self.img_dir = os.path.join(
self.data_dir, 'image') # 这个在载入图片的时候有用
_ann_name = {'train': 'train', 'val': 'val'}
self.annot_path = os.path.join(
self.data_dir, 'annotations',
'{}_pig.json').format(_ann_name[split])
if split == 'train':
self.annot_path = opt.train_json
elif split == 'val':
self.annot_path = opt.val_json
self.max_objs = 50
self.class_name = ['__background__', "defect1", "defect2"]
self._valid_ids = np.arange(1, 21, dtype=np.int32)
self.cat_ids = {v: i for i, v in enumerate(self._valid_ids)}
self._data_rng = np.random.RandomState(123)
self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571],
dtype=np.float32)
self._eig_vec = np.array([
[-0.58752847, -0.69563484, 0.41340352],
[-0.5832747, 0.00994535, -0.81221408],
[-0.56089297, 0.71832671, 0.41158938]
], dtype=np.float32)
self.split = split
self.opt = opt
print('==> initializing pascal {} data.'.format(_ann_name[split]))
self.coco = coco.COCO(self.annot_path)
self.images = sorted(self.coco.getImgIds())
self.num_samples = len(self.images)
print('Loaded {} {} samples'.format(split, self.num_samples))
def _to_float(self, x):
return float("{:.2f}".format(x))
def convert_eval_format(self, all_bboxes):
detections = [[[] for __ in range(self.num_samples)]
for _ in range(self.num_classes + 1)]
for i in range(self.num_samples):
img_id = self.images[i]
for j in range(1, self.num_classes + 1):
if isinstance(all_bboxes[img_id][j], np.ndarray):
detections[j][i] = all_bboxes[img_id][j].tolist()
else:
detections[j][i] = all_bboxes[img_id][j]
return detections
def __len__(self):
return self.num_samples
def save_results(self, results, save_dir):
json.dump(self.convert_eval_format(results),
open('{}/results.json'.format(save_dir), 'w'))
def run_eval(self, results, save_dir):
# result_json = os.path.join(save_dir, "results.json")
# detections = self.convert_eval_format(results)
# json.dump(detections, open(result_json, "w"))
self.save_results(results, save_dir)
os.system('python tools/reval.py ' +
'{}/results.json'.format(save_dir))
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from .sample.ddd import DddDataset
from .sample.exdet import EXDetDataset
from .sample.ctdet import CTDetDataset
from .sample.multi_pose import MultiPoseDataset
from .dataset.coco import COCO
from .dataset.pascal import PascalVOC
from .dataset.kitti import KITTI
from .dataset.coco_hp import COCOHP
from .dataset.pig import PIG
from .dataset.pig2 import PIG2
from .dataset.centerface import FACE
from .dataset.centerface_hp import FACEHP
dataset_factory = {
'coco': COCO,
'pascal': PascalVOC,
'kitti': KITTI,
'coco_hp': COCOHP,
'pig': PIG,
'pig2': PIG2,
'face': FACE,
'facehp': FACEHP
}
_sample_factory = {
'exdet': EXDetDataset,
'ctdet': CTDetDataset,
'ddd': DddDataset,
'multi_pose': MultiPoseDataset
}
def get_dataset(dataset, task):
class Dataset(dataset_factory[dataset], _sample_factory[task]): # 双重继承
pass
return Dataset
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import torch.utils.data as data
import numpy as np
import torch
import json
import cv2
import os
from utils.image import flip, color_aug
from utils.image import get_affine_transform, affine_transform
from utils.image import gaussian_radius, draw_umich_gaussian, draw_msra_gaussian
from utils.image import draw_dense_reg
import math
import matplotlib.pyplot as plt
class CTDetDataset(data.Dataset):
def _coco_box_to_bbox(self, box):
bbox = np.array([box[0], box[1], box[0] + box[2], box[1] + box[3]],
dtype=np.float32)
return bbox
def _get_border(self, border, size):
i = 1
while size - border // i <= border // i:
i *= 2
return border // i
def __getitem__(self, index):
img_id = self.images[index]
file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
img_path = os.path.join(self.img_dir, file_name)
ann_ids = self.coco.getAnnIds(imgIds=[img_id])
anns = self.coco.loadAnns(ids=ann_ids)
num_objs = min(len(anns), self.max_objs)
img = cv2.imread(img_path)
height, width = img.shape[0], img.shape[1]
c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
if self.opt.keep_res:
input_h = (height | self.opt.pad) + 1
input_w = (width | self.opt.pad) + 1
s = np.array([input_w, input_h], dtype=np.float32)
else:
s = max(img.shape[0], img.shape[1]) * 1.0
input_h, input_w = self.opt.input_h, self.opt.input_w
flipped = False
if self.split == 'train':
if not self.opt.not_rand_crop:
s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
w_border = self._get_border(128, img.shape[1])
h_border = self._get_border(128, img.shape[0])
c[0] = np.random.randint(
low=w_border, high=img.shape[1] - w_border)
c[1] = np.random.randint(
low=h_border, high=img.shape[0] - h_border)
else:
sf = self.opt.scale
cf = self.opt.shift
c[0] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf)
c[1] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf)
s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf)
if np.random.random() < self.opt.flip:
flipped = True
img = img[:, ::-1, :]
c[0] = width - c[0] - 1
trans_input = get_affine_transform(
c, s, 0, [input_w, input_h])
inp = cv2.warpAffine(img, trans_input,
(input_w, input_h),
flags=cv2.INTER_LINEAR)
inp = (inp.astype(np.float32) / 255.)
if self.split == 'train' and not self.opt.no_color_aug:
color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
inp = (inp - self.mean) / self.std
inp = inp.transpose(2, 0, 1)
output_h = input_h // self.opt.down_ratio
output_w = input_w // self.opt.down_ratio
num_classes = self.num_classes
trans_output = get_affine_transform(c, s, 0, [output_w, output_h])
hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32)
wh = np.zeros((self.max_objs, 2), dtype=np.float32)
dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
reg = np.zeros((self.max_objs, 2), dtype=np.float32)
ind = np.zeros((self.max_objs), dtype=np.int64)
reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
cat_spec_wh = np.zeros(
(self.max_objs, num_classes * 2), dtype=np.float32)
cat_spec_mask = np.zeros(
(self.max_objs, num_classes * 2), dtype=np.uint8)
draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
draw_umich_gaussian
gt_det = []
for k in range(num_objs):
ann = anns[k]
bbox = self._coco_box_to_bbox(ann['bbox'])
cls_id = int(self.cat_ids[ann['category_id']])
if flipped:
bbox[[0, 2]] = width - bbox[[2, 0]] - 1
bbox[:2] = affine_transform(bbox[:2], trans_output)
bbox[2:] = affine_transform(bbox[2:], trans_output)
bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
if h > 0 and w > 0:
radius = gaussian_radius(
(math.ceil(h), math.ceil(w))) # 高斯半径
radius = max(0, int(radius))
radius = self.opt.hm_gauss if self.opt.mse_loss else radius
ct = np.array(
[(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) # 求中点
ct_int = ct.astype(np.int32)
# 每个类别一个channel
draw_gaussian(hm[cls_id], ct_int, radius)
# 目标的wh
wh[k] = 1. * w, 1. * h
ind[k] = ct_int[1] * output_w + \
ct_int[0] # 索引,y*w + x
# 整数化的误差
reg[k] = ct - ct_int
reg_mask[k] = 1
cat_spec_wh[k, cls_id * 2: cls_id * 2 + 2] = wh[k]
cat_spec_mask[k, cls_id * 2: cls_id * 2 + 2] = 1
if self.opt.dense_wh:
draw_dense_reg(dense_wh, hm.max(
axis=0), ct_int, wh[k], radius)
gt_det.append([ct[0] - w / 2, ct[1] - h / 2,
ct[0] + w / 2, ct[1] + h / 2, 1, cls_id])
ret = {'input': inp, 'hm': hm,
'reg_mask': reg_mask, 'ind': ind, 'wh': wh}
if self.opt.dense_wh:
hm_a = hm.max(axis=0, keepdims=True)
dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
del ret['wh']
elif self.opt.cat_spec_wh:
ret.update({'cat_spec_wh': cat_spec_wh,
'cat_spec_mask': cat_spec_mask})
del ret['wh']
if self.opt.reg_offset:
ret.update({'reg': reg})
if self.opt.debug > 0 or not self.split == 'train':
gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
np.zeros((1, 6), dtype=np.float32)
meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
ret['meta'] = meta
return ret
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import math
import torch
import json
import cv2
from utils.image import flip, color_aug
from utils.image import get_affine_transform, affine_transform
from utils.image import gaussian_radius, draw_umich_gaussian, draw_msra_gaussian
import numpy as np
import pycocotools.coco as coco
import torch.utils.data as data
import pycocotools.coco as coco
class DddDataset(data.Dataset):
def _coco_box_to_bbox(self, box):
bbox = np.array([box[0], box[1], box[0] + box[2], box[1] + box[3]],
dtype=np.float32)
return bbox
def _convert_alpha(self, alpha):
return math.radians(alpha + 45) if self.alpha_in_degree else alpha
def __getitem__(self, index):
img_id = self.images[index]
img_info = self.coco.loadImgs(ids=[img_id])[0]
img_path = os.path.join(self.img_dir, img_info['file_name'])
img = cv2.imread(img_path)
if 'calib' in img_info:
calib = np.array(img_info['calib'], dtype=np.float32)
else:
calib = self.calib
height, width = img.shape[0], img.shape[1]
c = np.array([img.shape[1] / 2., img.shape[0] / 2.])
if self.opt.keep_res:
s = np.array([self.opt.input_w, self.opt.input_h], dtype=np.int32)
else:
s = np.array([width, height], dtype=np.int32)
aug = False
if self.split == 'train' and np.random.random() < self.opt.aug_ddd:
aug = True
sf = self.opt.scale
cf = self.opt.shift
s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf)
c[0] += img.shape[1] * np.clip(np.random.randn()*cf, -2*cf, 2*cf)
c[1] += img.shape[0] * np.clip(np.random.randn()*cf, -2*cf, 2*cf)
trans_input = get_affine_transform(
c, s, 0, [self.opt.input_w, self.opt.input_h])
inp = cv2.warpAffine(img, trans_input,
(self.opt.input_w, self.opt.input_h),
flags=cv2.INTER_LINEAR)
inp = (inp.astype(np.float32) / 255.)
# if self.split == 'train' and not self.opt.no_color_aug:
# color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
inp = (inp - self.mean) / self.std
inp = inp.transpose(2, 0, 1)
num_classes = self.opt.num_classes
trans_output = get_affine_transform(
c, s, 0, [self.opt.output_w, self.opt.output_h])
hm = np.zeros(
(num_classes, self.opt.output_h, self.opt.output_w), dtype=np.float32)
wh = np.zeros((self.max_objs, 2), dtype=np.float32)
reg = np.zeros((self.max_objs, 2), dtype=np.float32)
dep = np.zeros((self.max_objs, 1), dtype=np.float32)
rotbin = np.zeros((self.max_objs, 2), dtype=np.int64)
rotres = np.zeros((self.max_objs, 2), dtype=np.float32)
dim = np.zeros((self.max_objs, 3), dtype=np.float32)
ind = np.zeros((self.max_objs), dtype=np.int64)
reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
rot_mask = np.zeros((self.max_objs), dtype=np.uint8)
ann_ids = self.coco.getAnnIds(imgIds=[img_id])
anns = self.coco.loadAnns(ids=ann_ids)
num_objs = min(len(anns), self.max_objs)
draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
draw_umich_gaussian
gt_det = []
for k in range(num_objs):
ann = anns[k]
bbox = self._coco_box_to_bbox(ann['bbox'])
cls_id = int(self.cat_ids[ann['category_id']])
if cls_id <= -99:
continue
# if flipped:
# bbox[[0, 2]] = width - bbox[[2, 0]] - 1
bbox[:2] = affine_transform(bbox[:2], trans_output)
bbox[2:] = affine_transform(bbox[2:], trans_output)
bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.opt.output_w - 1)
bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.opt.output_h - 1)
h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
if h > 0 and w > 0:
radius = gaussian_radius((h, w))
radius = max(0, int(radius))
ct = np.array(
[(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32)
ct_int = ct.astype(np.int32)
if cls_id < 0:
ignore_id = [_ for _ in range(num_classes)] \
if cls_id == - 1 else [- cls_id - 2]
if self.opt.rect_mask:
hm[ignore_id, int(bbox[1]): int(bbox[3]) + 1,
int(bbox[0]): int(bbox[2]) + 1] = 0.9999
else:
for cc in ignore_id:
draw_gaussian(hm[cc], ct, radius)
hm[ignore_id, ct_int[1], ct_int[0]] = 0.9999
continue
draw_gaussian(hm[cls_id], ct, radius)
wh[k] = 1. * w, 1. * h
gt_det.append([ct[0], ct[1], 1] +
self._alpha_to_8(self._convert_alpha(ann['alpha'])) +
[ann['depth']] + (np.array(ann['dim']) / 1).tolist() + [cls_id])
if self.opt.reg_bbox:
gt_det[-1] = gt_det[-1][:-1] + [w, h] + [gt_det[-1][-1]]
# if (not self.opt.car_only) or cls_id == 1: # Only estimate ADD for cars !!!
if 1:
alpha = self._convert_alpha(ann['alpha'])
# print('img_id cls_id alpha rot_y', img_path, cls_id, alpha, ann['rotation_y'])
if alpha < np.pi / 6. or alpha > 5 * np.pi / 6.:
rotbin[k, 0] = 1
rotres[k, 0] = alpha - (-0.5 * np.pi)
if alpha > -np.pi / 6. or alpha < -5 * np.pi / 6.:
rotbin[k, 1] = 1
rotres[k, 1] = alpha - (0.5 * np.pi)
dep[k] = ann['depth']
dim[k] = ann['dim']
# print(' cat dim', cls_id, dim[k])
ind[k] = ct_int[1] * self.opt.output_w + ct_int[0]
reg[k] = ct - ct_int
reg_mask[k] = 1 if not aug else 0
rot_mask[k] = 1
# print('gt_det', gt_det)
# print('')
ret = {'input': inp, 'hm': hm, 'dep': dep, 'dim': dim, 'ind': ind,
'rotbin': rotbin, 'rotres': rotres, 'reg_mask': reg_mask,
'rot_mask': rot_mask}
if self.opt.reg_bbox:
ret.update({'wh': wh})
if self.opt.reg_offset:
ret.update({'reg': reg})
if self.opt.debug > 0 or not ('train' in self.split):
gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
np.zeros((1, 18), dtype=np.float32)
meta = {'c': c, 's': s, 'gt_det': gt_det, 'calib': calib,
'image_path': img_path, 'img_id': img_id}
ret['meta'] = meta
return ret
def _alpha_to_8(self, alpha):
# return [alpha, 0, 0, 0, 0, 0, 0, 0]
ret = [0, 0, 0, 1, 0, 0, 0, 1]
if alpha < np.pi / 6. or alpha > 5 * np.pi / 6.:
r = alpha - (-0.5 * np.pi)
ret[1] = 1
ret[2], ret[3] = np.sin(r), np.cos(r)
if alpha > -np.pi / 6. or alpha < -5 * np.pi / 6.:
r = alpha - (0.5 * np.pi)
ret[5] = 1
ret[6], ret[7] = np.sin(r), np.cos(r)
return ret
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import math
import torch
import json
import cv2
from utils.image import flip, color_aug
from utils.image import get_affine_transform, affine_transform
from utils.image import gaussian_radius, draw_umich_gaussian, draw_msra_gaussian
import numpy as np
import torch.utils.data as data
import pycocotools.coco as coco
class EXDetDataset(data.Dataset):
def _coco_box_to_bbox(self, box):
bbox = np.array([box[0], box[1], box[0] + box[2], box[1] + box[3]],
dtype=np.float32)
return bbox
def _get_border(self, border, size):
i = 1
while size - border // i <= border // i:
i *= 2
return border // i
def __getitem__(self, index):
img_id = self.images[index]
img_info = self.coco.loadImgs(ids=[img_id])[0]
img_path = os.path.join(self.img_dir, img_info['file_name'])
img = cv2.imread(img_path)
height, width = img.shape[0], img.shape[1]
c = np.array([img.shape[1] / 2., img.shape[0] / 2.])
s = max(img.shape[0], img.shape[1]) * 1.0
flipped = False
if self.split == 'train':
if not self.opt.not_rand_crop:
s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
w_border = self._get_border(128, img.shape[1])
h_border = self._get_border(128, img.shape[0])
c[0] = np.random.randint(
low=w_border, high=img.shape[1] - w_border)
c[1] = np.random.randint(
low=h_border, high=img.shape[0] - h_border)
else:
sf = self.opt.scale
cf = self.opt.shift
s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf)
c[0] += img.shape[1] * \
np.clip(np.random.randn()*cf, -2*cf, 2*cf)
c[1] += img.shape[0] * \
np.clip(np.random.randn()*cf, -2*cf, 2*cf)
if np.random.random() < self.opt.flip:
flipped = True
img = img[:, ::-1, :]
trans_input = get_affine_transform(
c, s, 0, [self.opt.input_res, self.opt.input_res])
inp = cv2.warpAffine(img, trans_input,
(self.opt.input_res, self.opt.input_res),
flags=cv2.INTER_LINEAR)
inp = (inp.astype(np.float32) / 255.)
if self.split == 'train' and not self.opt.no_color_aug:
color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
inp = (inp - self.mean) / self.std
inp = inp.transpose(2, 0, 1)
output_res = self.opt.output_res
num_classes = self.opt.num_classes
trans_output = get_affine_transform(c, s, 0, [output_res, output_res])
num_hm = 1 if self.opt.agnostic_ex else num_classes
hm_t = np.zeros((num_hm, output_res, output_res), dtype=np.float32)
hm_l = np.zeros((num_hm, output_res, output_res), dtype=np.float32)
hm_b = np.zeros((num_hm, output_res, output_res), dtype=np.float32)
hm_r = np.zeros((num_hm, output_res, output_res), dtype=np.float32)
hm_c = np.zeros((num_classes, output_res, output_res),
dtype=np.float32)
reg_t = np.zeros((self.max_objs, 2), dtype=np.float32)
reg_l = np.zeros((self.max_objs, 2), dtype=np.float32)
reg_b = np.zeros((self.max_objs, 2), dtype=np.float32)
reg_r = np.zeros((self.max_objs, 2), dtype=np.float32)
ind_t = np.zeros((self.max_objs), dtype=np.int64)
ind_l = np.zeros((self.max_objs), dtype=np.int64)
ind_b = np.zeros((self.max_objs), dtype=np.int64)
ind_r = np.zeros((self.max_objs), dtype=np.int64)
reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
ann_ids = self.coco.getAnnIds(imgIds=[img_id])
anns = self.coco.loadAnns(ids=ann_ids)
num_objs = min(len(anns), self.max_objs)
draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
draw_umich_gaussian
for k in range(num_objs):
ann = anns[k]
# bbox = self._coco_box_to_bbox(ann['bbox'])
# tlbr
pts = np.array(ann['extreme_points'],
dtype=np.float32).reshape(4, 2)
# cls_id = int(self.cat_ids[ann['category_id']] - 1) # bug
cls_id = int(self.cat_ids[ann['category_id']])
hm_id = 0 if self.opt.agnostic_ex else cls_id
if flipped:
pts[:, 0] = width - pts[:, 0] - 1
pts[1], pts[3] = pts[3].copy(), pts[1].copy()
for j in range(4):
pts[j] = affine_transform(pts[j], trans_output)
pts = np.clip(pts, 0, self.opt.output_res - 1)
h, w = pts[2, 1] - pts[0, 1], pts[3, 0] - pts[1, 0]
if h > 0 and w > 0:
radius = gaussian_radius((math.ceil(h), math.ceil(w)))
radius = max(0, int(radius))
pt_int = pts.astype(np.int32)
draw_gaussian(hm_t[hm_id], pt_int[0], radius)
draw_gaussian(hm_l[hm_id], pt_int[1], radius)
draw_gaussian(hm_b[hm_id], pt_int[2], radius)
draw_gaussian(hm_r[hm_id], pt_int[3], radius)
reg_t[k] = pts[0] - pt_int[0]
reg_l[k] = pts[1] - pt_int[1]
reg_b[k] = pts[2] - pt_int[2]
reg_r[k] = pts[3] - pt_int[3]
ind_t[k] = pt_int[0, 1] * output_res + pt_int[0, 0]
ind_l[k] = pt_int[1, 1] * output_res + pt_int[1, 0]
ind_b[k] = pt_int[2, 1] * output_res + pt_int[2, 0]
ind_r[k] = pt_int[3, 1] * output_res + pt_int[3, 0]
ct = [int((pts[3, 0] + pts[1, 0]) / 2),
int((pts[0, 1] + pts[2, 1]) / 2)]
draw_gaussian(hm_c[cls_id], ct, radius)
reg_mask[k] = 1
ret = {'input': inp, 'hm_t': hm_t, 'hm_l': hm_l, 'hm_b': hm_b,
'hm_r': hm_r, 'hm_c': hm_c}
if self.opt.reg_offset:
ret.update({'reg_mask': reg_mask,
'reg_t': reg_t, 'reg_l': reg_l, 'reg_b': reg_b, 'reg_r': reg_r,
'ind_t': ind_t, 'ind_l': ind_l, 'ind_b': ind_b, 'ind_r': ind_r})
return ret
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import re
import math
import torch
import json
import cv2
from utils.image import flip, color_aug
from utils.image import get_affine_transform, affine_transform
from utils.image import gaussian_radius, draw_umich_gaussian, draw_msra_gaussian
from utils.image import draw_dense_reg
from utils.utils import Data_anchor_sample
from utils.Randaugmentations import Randaugment
from PIL import Image
import torch.utils.data as data
import numpy as np
TORCH_MAJOR = int(torch.__version__.split('.')[0])
TORCH_MINOR = int(torch.__version__.split('.')[1])
if TORCH_MAJOR == 1 and TORCH_MINOR < 8:
from torch._six import container_abcs, string_classes, int_classes
else:
string_classes = str
int_classes = int
import collections.abc as container_abcs
np_str_obj_array_pattern = re.compile(r'[SaUO]')
class MultiPoseDataset(data.Dataset):
def _coco_box_to_bbox(self, box):
bbox = np.array([box[0], box[1], box[0] + box[2], box[1] + box[3]],
dtype=np.float32)
return bbox
def _get_border(self, border, size):
i = 1
while size - border // i <= border // i:
i *= 2
return border // i
def __getitem__(self, index):
img_id = self.images[index]
file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
img_path = os.path.join(self.img_dir, file_name)
ann_ids = self.coco.getAnnIds(imgIds=[img_id])
anns = self.coco.loadAnns(ids=ann_ids)
num_objs = len(anns)
# num_objs = min(len(anns), self.max_objs)
if num_objs > self.max_objs:
num_objs = self.max_objs
anns = np.random.choice(anns, num_objs)
img = cv2.imread(img_path)
img, anns = Data_anchor_sample(img, anns)
# # for test the keypoint order
# img1 = cv2.flip(img,1)
# for ann in anns:
# width = img1.shape[1]
# bbox = self._coco_box_to_bbox(ann['bbox'])
# bbox[[0, 2]] = width - bbox[[2, 0]] - 1
# pts = np.array(ann['keypoints'], np.float32).reshape(5, 3)
#
# # for flip
# pts[:, 0] = width - pts[:, 0] - 1
# for e in self.flip_idx:
# pts[e[0]], pts[e[1]] = pts[e[1]].copy(), pts[e[0]].copy()
#
# # for debug show
# def add_coco_bbox(image, bbox, conf=1):
# txt = '{}{:.1f}'.format('person', conf)
# font = cv2.FONT_HERSHEY_SIMPLEX
# cv2.rectangle(image, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 255), 2)
# cv2.putText(image, txt, (bbox[0], bbox[1] - 2),
# font, 0.5, (0, 255, 0), thickness=1, lineType=cv2.LINE_AA)
#
# def add_coco_hp(image, points, keypoints_prob=1):
# for j in range(5):
# if keypoints_prob > 0.5:
# if j == 0:
# cv2.circle(image, (points[j, 0], points[j, 1]), 2, (255, 255, 0), -1)
# elif j == 1:
# cv2.circle(image, (points[j, 0], points[j, 1]), 2, (255, 0, 0), -1)
# elif j == 2:
# cv2.circle(image, (points[j, 0], points[j, 1]), 2, (0, 255, 0), -1)
# elif j == 3:
# cv2.circle(image, (points[j, 0], points[j, 1]), 2, (0, 0, 255), -1)
# elif j == 4:
# cv2.circle(image, (points[j, 0], points[j, 1]), 2, (0, 0, 0), -1)
# return image
#
# bbox = [int(x) for x in bbox]
# add_coco_bbox(img1, bbox )
# add_coco_hp(img1, pts)
# cv2.imshow('mat', img1)
# cv2.waitKey(5000)
height, width = img.shape[0], img.shape[1]
c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
s = max(img.shape[0], img.shape[1]) * 1.0
rot = 0
flipped = False
if self.split == 'train':
if not self.opt.not_rand_crop:
# s = s * np.random.choice(np.arange(0.8, 1.1, 0.1))
# s = s * np.random.choice(np.arange(0.3, 1.2, 0.1))
s = s # not scale
# _border = np.random.randint(128*0.4, 128*1.4)
_border = s * np.random.choice([0.1, 0.2, 0.25])
w_border = self._get_border(_border, img.shape[1])
h_border = self._get_border(_border, img.shape[0])
c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border)
c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border)
else:
sf = self.opt.scale
cf = self.opt.shift
c[0] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf)
c[1] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf)
s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf)
if np.random.random() < self.opt.aug_rot:
rf = self.opt.rotate
rot = np.clip(np.random.randn()*rf, -rf*2, rf*2)
if np.random.random() < self.opt.flip:
flipped = True
img = img[:, ::-1, :]
c[0] = width - c[0] - 1
trans_input = get_affine_transform(
c, s, rot, [self.opt.input_res, self.opt.input_res])
inp = cv2.warpAffine(img, trans_input,
(self.opt.input_res, self.opt.input_res),
flags=cv2.INTER_LINEAR)
inp = (inp.astype(np.float32) / 255.)
if self.split == 'train' and not self.opt.no_color_aug:
# 随机进行图片增强
color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
# inp = Randaugment(self._data_rng, inp, self._eig_val, self._eig_vec)
inp = (inp - self.mean) / self.std
inp = inp.transpose(2, 0, 1)
output_res = self.opt.output_res
num_joints = self.num_joints
trans_output_rot = get_affine_transform(c, s, rot, [output_res, output_res])
trans_output = get_affine_transform(c, s, 0, [output_res, output_res])
hm = np.zeros((self.num_classes, output_res, output_res), dtype=np.float32)
hm_hp = np.zeros((num_joints, output_res, output_res), dtype=np.float32)
dense_kps = np.zeros((num_joints, 2, output_res, output_res),
dtype=np.float32)
dense_kps_mask = np.zeros((num_joints, output_res, output_res),
dtype=np.float32)
wh = np.zeros((self.max_objs, 2), dtype=np.float32)
kps = np.zeros((self.max_objs, num_joints * 2), dtype=np.float32)
reg = np.zeros((self.max_objs, 2), dtype=np.float32)
ind = np.zeros((self.max_objs), dtype=np.int64)
reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
wight_mask = np.ones((self.max_objs), dtype=np.float32)
kps_mask = np.zeros((self.max_objs, self.num_joints * 2), dtype=np.uint8)
hp_offset = np.zeros((self.max_objs * num_joints, 2), dtype=np.float32)
hp_ind = np.zeros((self.max_objs * num_joints), dtype=np.int64)
hp_mask = np.zeros((self.max_objs * num_joints), dtype=np.int64)
draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
draw_umich_gaussian
gt_det = []
for k in range(num_objs):
ann = anns[k]
bbox = self._coco_box_to_bbox(ann['bbox'])
cls_id = int(ann['category_id']) - 1
pts = np.array(ann['keypoints'], np.float32).reshape(num_joints, 3)
if flipped:
bbox[[0, 2]] = width - bbox[[2, 0]] - 1
pts[:, 0] = width - pts[:, 0] - 1
for e in self.flip_idx:
pts[e[0]], pts[e[1]] = pts[e[1]].copy(), pts[e[0]].copy()
bbox[:2] = affine_transform(bbox[:2], trans_output)
bbox[2:] = affine_transform(bbox[2:], trans_output)
bbox = np.clip(bbox, 0, output_res - 1)
h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
if (h > 0 and w > 0) or (rot != 0):
radius = gaussian_radius((math.ceil(h), math.ceil(w)))
radius = self.opt.hm_gauss if self.opt.mse_loss else max(0, int(radius))
ct = np.array(
[(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) # 人脸的中心坐标
ct_int = ct.astype(np.int32) # 整数化
# wh[k] = 1. * w, 1. * h # 2. centernet的方式
wh[k] = np.log(1. * w / 4), np.log(1. * h / 4) # 2. 人脸bbox的高度和宽度,centerface论文的方式
ind[k] = ct_int[1] * output_res + ct_int[0] # 人脸bbox在1/4特征图中的索引
reg[k] = ct - ct_int # 3. 人脸bbox中心点整数化的偏差
reg_mask[k] = 1 # 是否需要用于计算误差
# if w*h <= 20:
# wight_mask[k] = 15
num_kpts = pts[:, 2].sum() # 没有关键点标注的时哦
if num_kpts == 0 or w*h <= 8: # 没有关键点标注的都是比较困难的样本
# print('没有关键点标注或者人脸bbox小于8像素')
hm[cls_id, ct_int[1], ct_int[0]] = 0.9999
# reg_mask[k] = 0
hp_radius = gaussian_radius((math.ceil(h), math.ceil(w)))
hp_radius = self.opt.hm_gauss \
if self.opt.mse_loss else max(0, int(hp_radius))
for j in range(num_joints):
if pts[j, 2] > 0:
pts[j, :2] = affine_transform(pts[j, :2], trans_output_rot)
if pts[j, 0] >= 0 and pts[j, 0] < output_res and \
pts[j, 1] >= 0 and pts[j, 1] < output_res:
kps[k, j * 2: j * 2 + 2] = pts[j, :2] - ct_int # 4. 关键点相对于人脸bbox的中心的偏差
kps_mask[k, j * 2: j * 2 + 2] = 1
pt_int = pts[j, :2].astype(np.int32) # 关键点整数化
hp_offset[k * num_joints + j] = pts[j, :2] - pt_int # 关键点整数化的偏差
hp_ind[k * num_joints + j] = pt_int[1] * output_res + pt_int[0] # 索引
hp_mask[k * num_joints + j] = 1 # 计算损失的mask
if self.opt.dense_hp:
# must be before draw center hm gaussian
draw_dense_reg(dense_kps[j], hm[cls_id], ct_int,
pts[j, :2] - ct_int, radius, is_offset=True)
draw_gaussian(dense_kps_mask[j], ct_int, radius)
draw_gaussian(hm_hp[j], pt_int, hp_radius) # 1. 关键点高斯map
if ann['bbox'][2]*ann['bbox'][3] <= 16.0: # 太小的人脸忽略
kps_mask[k, j * 2: j * 2 + 2] = 0
draw_gaussian(hm[cls_id], ct_int, radius)
gt_det.append([ct[0] - w / 2, ct[1] - h / 2,
ct[0] + w / 2, ct[1] + h / 2, 1] +
pts[:, :2].reshape(num_joints * 2).tolist() + [cls_id])
if rot != 0:
hm = hm * 0 + 0.9999
reg_mask *= 0
kps_mask *= 0
ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh,
'landmarks': kps, 'hps_mask': kps_mask, 'wight_mask': wight_mask}
if self.opt.dense_hp:
dense_kps = dense_kps.reshape(num_joints * 2, output_res, output_res)
dense_kps_mask = dense_kps_mask.reshape(
num_joints, 1, output_res, output_res)
dense_kps_mask = np.concatenate([dense_kps_mask, dense_kps_mask], axis=1)
dense_kps_mask = dense_kps_mask.reshape(
num_joints * 2, output_res, output_res)
ret.update({'dense_hps': dense_kps, 'dense_hps_mask': dense_kps_mask})
del ret['hps'], ret['hps_mask']
if self.opt.reg_offset:
ret.update({'hm_offset': reg}) # 人脸bbox中心点整数化的偏差
if self.opt.hm_hp:
ret.update({'hm_hp': hm_hp})
if self.opt.reg_hp_offset:
ret.update({'hp_offset': hp_offset, 'hp_ind': hp_ind, 'hp_mask': hp_mask})
if self.opt.debug > 0 or not self.split == 'train':
gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
np.zeros((1, 40), dtype=np.float32)
meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
ret['meta'] = meta
return ret
_use_shared_memory = False
error_msg_fmt = "batch must contain tensors, numbers, dicts or lists; found {}"
numpy_type_map = {
'float64': torch.DoubleTensor,
'float32': torch.FloatTensor,
'float16': torch.HalfTensor,
'int64': torch.LongTensor,
'int32': torch.IntTensor,
'int16': torch.ShortTensor,
'int8': torch.CharTensor,
'uint8': torch.ByteTensor,
}
def default_collate(batch):
r"""Puts each data field into a tensor with outer dimension batch size"""
elem_type = type(batch[0])
if isinstance(batch[0], torch.Tensor):
out = None
if _use_shared_memory:
# If we're in a background process, concatenate directly into a
# shared memory tensor to avoid an extra copy
numel = sum([x.numel() for x in batch])
storage = batch[0].storage()._new_shared(numel)
out = batch[0].new(storage)
return torch.stack(batch, 0, out=out)
elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' \
and elem_type.__name__ != 'string_':
elem = batch[0]
if elem_type.__name__ == 'ndarray':
# array of string classes and object
if np_str_obj_array_pattern.search(elem.dtype.str) is not None:
raise TypeError(error_msg_fmt.format(elem.dtype))
return default_collate([torch.from_numpy(b) for b in batch])
if elem.shape == (): # scalars
py_type = float if elem.dtype.name.startswith('float') else int
return numpy_type_map[elem.dtype.name](list(map(py_type, batch)))
elif isinstance(batch[0], float):
return torch.tensor(batch, dtype=torch.float64)
elif isinstance(batch[0], int_classes):
return torch.tensor(batch)
elif isinstance(batch[0], string_classes):
return batch
elif isinstance(batch[0], container_abcs.Mapping):
return {key: default_collate([d[key] for d in batch]) for key in batch[0]}
elif isinstance(batch[0], tuple) and hasattr(batch[0], '_fields'): # namedtuple
return type(batch[0])(*(default_collate(samples) for samples in zip(*batch)))
elif isinstance(batch[0], container_abcs.Sequence):
transposed = zip(*batch)
return [default_collate(samples) for samples in transposed]
raise TypeError((error_msg_fmt.format(type(batch[0]))))
def multipose_collate(batch):
objects_dims = [d.shape[0] for d in batch]
index = objects_dims.index(max(objects_dims))
# one_dim = True if len(batch[0].shape) == 1 else False
res = []
for i in range(len(batch)):
tres = np.zeros_like(batch[index], dtype=batch[index].dtype)
tres[:batch[i].shape[0]] = batch[i]
res.append(tres)
return res
def Multiposebatch(batch):
sample_batch = {}
for key in batch[0]:
if key in ['hm', 'input']:
sample_batch[key] = default_collate([d[key] for d in batch])
else:
align_batch = multipose_collate([d[key] for d in batch])
sample_batch[key] = default_collate(align_batch)
return sample_batch
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import cv2
import time
import torch
from progress.bar import Bar
from models.model import create_model, load_model
from utils.image import get_affine_transform
from utils.debugger import Debugger
import numpy as np
class BaseDetector(object):
def __init__(self, opt):
if opt.gpus[0] >= 0:
opt.device = torch.device('cuda')
else:
opt.device = torch.device('cpu')
print('Creating model...')
self.model = create_model(opt.arch, opt.heads, opt.head_conv)
self.model = load_model(self.model, opt.load_model)
self.model = self.model.to(opt.device)
self.model.eval()
self.mean = np.array(opt.mean, dtype=np.float32).reshape(1, 1, 3)
self.std = np.array(opt.std, dtype=np.float32).reshape(1, 1, 3)
self.max_per_image = 100
self.num_classes = opt.num_classes
self.scales = opt.test_scales
self.opt = opt
self.pause = False
def pre_process(self, image, scale, meta=None):
height, width = image.shape[0:2]
new_height = int(height * scale)
new_width = int(width * scale)
if self.opt.fix_res:
inp_height, inp_width = self.opt.input_h, self.opt.input_w
c = np.array([new_width / 2., new_height / 2.], dtype=np.float32)
s = max(height, width) * 1.0
else:
# inp_height = (new_height | self.opt.pad) + 1
# inp_width = (new_width | self.opt.pad) + 1
inp_height = int(np.ceil(new_height / 32) * 32)
inp_width = int(np.ceil(new_width / 32) * 32)
c = np.array([new_width // 2, new_height // 2], dtype=np.float32)
s = np.array([inp_width, inp_height], dtype=np.float32)
trans_input = get_affine_transform(c, s, 0, [inp_width, inp_height])
resized_image = cv2.resize(image, (new_width, new_height))
inp_image = cv2.warpAffine(
resized_image, trans_input, (inp_width, inp_height),
flags=cv2.INTER_LINEAR)
inp_image = ((inp_image / 255. - self.mean) /
self.std).astype(np.float32)
images = inp_image.transpose(2, 0, 1).reshape(
1, 3, inp_height, inp_width)
if self.opt.flip_test:
images = np.concatenate((images, images[:, :, :, ::-1]), axis=0)
images = torch.from_numpy(images)
meta = {'c': c, 's': s,
'out_height': inp_height // self.opt.down_ratio,
'out_width': inp_width // self.opt.down_ratio}
return images, meta
def process(self, images, return_time=False):
raise NotImplementedError
def post_process(self, dets, meta, scale=1):
raise NotImplementedError
def merge_outputs(self, detections):
raise NotImplementedError
def debug(self, debugger, images, dets, output, scale=1):
raise NotImplementedError
def show_results(self, debugger, image, results):
raise NotImplementedError
def return_results(self, debugger, image, results):
raise NotImplementedError
def run(self, image_or_path_or_tensor, meta=None):
load_time, pre_time, net_time, dec_time, post_time = 0, 0, 0, 0, 0
merge_time, tot_time = 0, 0
debugger = Debugger(dataset=self.opt.dataset, ipynb=(self.opt.debug == 3),
theme=self.opt.debugger_theme)
start_time = time.time()
pre_processed = False
if isinstance(image_or_path_or_tensor, np.ndarray):
image = image_or_path_or_tensor
elif type(image_or_path_or_tensor) == type(''):
image = cv2.imread(image_or_path_or_tensor)
else:
image = image_or_path_or_tensor['image'][0].numpy()
pre_processed_images = image_or_path_or_tensor
pre_processed = True
loaded_time = time.time()
load_time += (loaded_time - start_time)
detections = []
for scale in self.scales:
scale_start_time = time.time()
if not pre_processed:
images, meta = self.pre_process(image, scale, meta)
else:
# import pdb; pdb.set_trace()
images = pre_processed_images['images'][scale][0]
meta = pre_processed_images['meta'][scale]
meta = {k: v.numpy()[0] for k, v in meta.items()}
images = images.to(self.opt.device)
torch.cuda.synchronize()
pre_process_time = time.time()
pre_time += pre_process_time - scale_start_time
output, dets, forward_time = self.process(images, return_time=True)
torch.cuda.synchronize()
net_time += forward_time - pre_process_time
decode_time = time.time()
dec_time += decode_time - forward_time
if self.opt.debug >= 2:
self.debug(debugger, images, dets, output, scale)
# box:4+score:1+kpoints:10+class:1=16
dets = self.post_process(dets, meta, scale)
torch.cuda.synchronize()
post_process_time = time.time()
post_time += post_process_time - decode_time
detections.append(dets)
results = self.merge_outputs(detections)
torch.cuda.synchronize()
end_time = time.time()
merge_time += end_time - post_process_time
tot_time += end_time - start_time
if self.opt.debug >= 1:
self.show_results(debugger, image, results)
if self.opt.debug == -1:
plot_img = self.return_results(debugger, image, results)
else:
plot_img = None
return {'results': results, 'tot': tot_time, 'load': load_time,
'pre': pre_time, 'net': net_time, 'dec': dec_time,
'post': post_time, 'merge': merge_time, 'plot_img': plot_img}
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import cv2
import time
import torch
from external.nms import soft_nms
from models.decode import ctdet_decode
from models.utils import flip_tensor
from utils.image import get_affine_transform
from utils.post_process import ctdet_post_process
from utils.debugger import Debugger
from progress.bar import Bar
from .base_detector import BaseDetector
import numpy as np
class CtdetDetector(BaseDetector):
def __init__(self, opt):
super(CtdetDetector, self).__init__(opt)
def process(self, images, return_time=False):
with torch.no_grad():
output = self.model(images)[-1]
hm = output['hm'].sigmoid_()
wh = output['wh']
reg = output['reg'] if self.opt.reg_offset else None
if self.opt.flip_test:
hm = (hm[0:1] + flip_tensor(hm[1:2])) / 2
wh = (wh[0:1] + flip_tensor(wh[1:2])) / 2
reg = reg[0:1] if reg is not None else None
torch.cuda.synchronize()
forward_time = time.time()
dets = ctdet_decode(hm, wh, reg=reg, K=self.opt.K) # K=100
if return_time:
return output, dets, forward_time
else:
return output, dets
def post_process(self, dets, meta, scale=1):
dets = dets.detach().cpu().numpy()
dets = dets.reshape(1, -1, dets.shape[2])
dets = ctdet_post_process(
dets.copy(), [meta['c']], [meta['s']],
meta['out_height'], meta['out_width'], self.opt.num_classes)
for j in range(1, self.num_classes + 1):
dets[0][j] = np.array(dets[0][j], dtype=np.float32).reshape(-1, 5)
dets[0][j][:, :4] /= scale
return dets[0]
def merge_outputs(self, detections):
results = {}
for j in range(1, self.num_classes + 1):
results[j] = np.concatenate(
[detection[j] for detection in detections], axis=0).astype(np.float32)
if len(self.scales) > 1 or self.opt.nms:
soft_nms(results[j], Nt=0.5, method=2)
scores = np.hstack(
[results[j][:, 4] for j in range(1, self.num_classes + 1)])
if len(scores) > self.max_per_image:
kth = len(scores) - self.max_per_image
thresh = np.partition(scores, kth)[kth]
for j in range(1, self.num_classes + 1):
keep_inds = (results[j][:, 4] >= thresh)
results[j] = results[j][keep_inds]
return results
def debug(self, debugger, images, dets, output, scale=1):
detection = dets.detach().cpu().numpy().copy()
detection[:, :, :4] *= self.opt.down_ratio
for i in range(1):
img = images[i].detach().cpu().numpy().transpose(1, 2, 0)
img = ((img * self.std + self.mean) * 255).astype(np.uint8)
pred = debugger.gen_colormap(
output['hm'][i].detach().cpu().numpy())
debugger.add_blend_img(img, pred, 'pred_hm_{:.1f}'.format(scale))
debugger.add_img(img, img_id='out_pred_{:.1f}'.format(scale))
for k in range(len(dets[i])):
if detection[i, k, 4] > self.opt.center_thresh:
debugger.add_coco_bbox(detection[i, k, :4], detection[i, k, -1],
detection[i, k, 4],
img_id='out_pred_{:.1f}'.format(scale))
def show_results(self, debugger, image, results):
debugger.add_img(image, img_id='ctdet')
for j in range(1, self.num_classes + 1):
for bbox in results[j]:
if bbox[4] > self.opt.vis_thresh:
debugger.add_coco_bbox(
bbox[:4], j - 1, bbox[4], img_id='ctdet')
debugger.show_all_imgs(pause=self.pause)
def return_results(self, debugger, image, results):
debugger.add_img(image, img_id='ctdet')
for j in range(1, self.num_classes + 1):
for bbox in results[j]:
if bbox[4] > self.opt.vis_thresh:
debugger.add_coco_bbox(
bbox[:4], j - 1, bbox[4], img_id='ctdet')
return debugger.return_img()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment