First Commit.

b952e97b · chenych · b952e97b · b952e97b · b952e97b · b952e97b
Commit b952e97b authored Nov 03, 2023 by chenych
20 changed files
--- a/src/lib/utils/image.py
+++ b/src/lib/utils/image.py
+# ------------------------------------------------------------------------------
+# Copyright (c) Microsoft
+# Licensed under the MIT License.
+# Written by Bin Xiao (Bin.Xiao@microsoft.com)
+# Modified by Xingyi Zhou
+# ------------------------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import cv2
+import random
+
+def flip(img):
+  return img[:, :, ::-1].copy()  
+
+def transform_preds(coords, center, scale, output_size):
+    target_coords = np.zeros(coords.shape)
+    trans = get_affine_transform(center, scale, 0, output_size, inv=1)
+    for p in range(coords.shape[0]):
+        target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans)
+    return target_coords
+
+
+def get_affine_transform(center,
+                         scale,
+                         rot,
+                         output_size,
+                         shift=np.array([0, 0], dtype=np.float32),
+                         inv=0):
+    if not isinstance(scale, np.ndarray) and not isinstance(scale, list):
+        scale = np.array([scale, scale], dtype=np.float32)
+
+    scale_tmp = scale
+    src_w = scale_tmp[0]
+    dst_w = output_size[0]
+    dst_h = output_size[1]
+
+    rot_rad = np.pi * rot / 180
+    src_dir = get_dir([0, src_w * -0.5], rot_rad)
+    dst_dir = np.array([0, dst_w * -0.5], np.float32)
+
+    src = np.zeros((3, 2), dtype=np.float32)
+    dst = np.zeros((3, 2), dtype=np.float32)
+    src[0, :] = center + scale_tmp * shift
+    src[1, :] = center + src_dir + scale_tmp * shift
+    dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
+    dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5], np.float32) + dst_dir
+
+    src[2:, :] = get_3rd_point(src[0, :], src[1, :])
+    dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :])
+
+    if inv:
+        trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
+    else:
+        trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
+
+    return trans
+
+
+def affine_transform(pt, t):
+    new_pt = np.array([pt[0], pt[1], 1.], dtype=np.float32).T
+    new_pt = np.dot(t, new_pt)
+    return new_pt[:2]
+
+
+def get_3rd_point(a, b):
+    direct = a - b
+    return b + np.array([-direct[1], direct[0]], dtype=np.float32)
+
+
+def get_dir(src_point, rot_rad):
+    sn, cs = np.sin(rot_rad), np.cos(rot_rad)
+
+    src_result = [0, 0]
+    src_result[0] = src_point[0] * cs - src_point[1] * sn
+    src_result[1] = src_point[0] * sn + src_point[1] * cs
+
+    return src_result
+
+
+def crop(img, center, scale, output_size, rot=0):
+    trans = get_affine_transform(center, scale, rot, output_size)
+
+    dst_img = cv2.warpAffine(img,
+                             trans,
+                             (int(output_size[0]), int(output_size[1])),
+                             flags=cv2.INTER_LINEAR)
+
+    return dst_img
+
+
+def gaussian_radius(det_size, min_overlap=0.7):
+  height, width = det_size
+
+  a1  = 1
+  b1  = (height + width)
+  c1  = width * height * (1 - min_overlap) / (1 + min_overlap)
+  sq1 = np.sqrt(b1 ** 2 - 4 * a1 * c1)
+  r1  = (b1 + sq1) / 2
+
+  a2  = 4
+  b2  = 2 * (height + width)
+  c2  = (1 - min_overlap) * width * height
+  sq2 = np.sqrt(b2 ** 2 - 4 * a2 * c2)
+  r2  = (b2 + sq2) / 2
+
+  a3  = 4 * min_overlap
+  b3  = -2 * min_overlap * (height + width)
+  c3  = (min_overlap - 1) * width * height
+  sq3 = np.sqrt(b3 ** 2 - 4 * a3 * c3)
+  r3  = (b3 + sq3) / 2
+  return min(r1, r2, r3)
+
+
+def gaussian2D(shape, sigma=1):
+    m, n = [(ss - 1.) / 2. for ss in shape]
+    y, x = np.ogrid[-m:m+1,-n:n+1]
+
+    h = np.exp(-(x * x + y * y) / (2 * sigma * sigma))
+    h[h < np.finfo(h.dtype).eps * h.max()] = 0
+    return h
+
+def draw_umich_gaussian(heatmap, center, radius, k=1):
+  diameter = 2 * radius + 1                                         # 直径
+  gaussian = gaussian2D((diameter, diameter), sigma=diameter / 6)
+  
+  x, y = int(center[0]), int(center[1])
+
+  height, width = heatmap.shape[0:2]
+    
+  left, right = min(x, radius), min(width - x, radius + 1)
+  top, bottom = min(y, radius), min(height - y, radius + 1)
+
+  masked_heatmap  = heatmap[y - top:y + bottom, x - left:x + right]         # 对那个区域进行赋值
+  masked_gaussian = gaussian[radius - top:radius + bottom, radius - left:radius + right]
+  if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0: # TODO debug
+    np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap)
+  return heatmap
+
+def draw_dense_reg(regmap, heatmap, center, value, radius, is_offset=False):
+  diameter = 2 * radius + 1
+  gaussian = gaussian2D((diameter, diameter), sigma=diameter / 6)
+  value = np.array(value, dtype=np.float32).reshape(-1, 1, 1)
+  dim = value.shape[0]
+  reg = np.ones((dim, diameter*2+1, diameter*2+1), dtype=np.float32) * value
+  if is_offset and dim == 2:
+    delta = np.arange(diameter*2+1) - radius
+    reg[0] = reg[0] - delta.reshape(1, -1)
+    reg[1] = reg[1] - delta.reshape(-1, 1)
+  
+  x, y = int(center[0]), int(center[1])
+
+  height, width = heatmap.shape[0:2]
+    
+  left, right = min(x, radius), min(width - x, radius + 1)
+  top, bottom = min(y, radius), min(height - y, radius + 1)
+
+  masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]
+  masked_regmap = regmap[:, y - top:y + bottom, x - left:x + right]
+  masked_gaussian = gaussian[radius - top:radius + bottom,
+                             radius - left:radius + right]
+  masked_reg = reg[:, radius - top:radius + bottom,
+                      radius - left:radius + right]
+  if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0: # TODO debug
+    idx = (masked_gaussian >= masked_heatmap).reshape(
+      1, masked_gaussian.shape[0], masked_gaussian.shape[1])
+    masked_regmap = (1-idx) * masked_regmap + idx * masked_reg
+  regmap[:, y - top:y + bottom, x - left:x + right] = masked_regmap
+  return regmap
+
+
+def draw_msra_gaussian(heatmap, center, sigma):
+  tmp_size = sigma * 3
+  mu_x = int(center[0] + 0.5)
+  mu_y = int(center[1] + 0.5)
+  w, h = heatmap.shape[0], heatmap.shape[1]
+  ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
+  br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
+  if ul[0] >= h or ul[1] >= w or br[0] < 0 or br[1] < 0:
+    return heatmap
+  size = 2 * tmp_size + 1
+  x = np.arange(0, size, 1, np.float32)
+  y = x[:, np.newaxis]
+  x0 = y0 = size // 2
+  g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2))
+  g_x = max(0, -ul[0]), min(br[0], h) - ul[0]
+  g_y = max(0, -ul[1]), min(br[1], w) - ul[1]
+  img_x = max(0, ul[0]), min(br[0], h)
+  img_y = max(0, ul[1]), min(br[1], w)
+  heatmap[img_y[0]:img_y[1], img_x[0]:img_x[1]] = np.maximum(
+    heatmap[img_y[0]:img_y[1], img_x[0]:img_x[1]],
+    g[g_y[0]:g_y[1], g_x[0]:g_x[1]])
+  return heatmap
+
+def grayscale(image):
+    return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+
+def lighting_(data_rng, image, alphastd, eigval, eigvec):
+    alpha = data_rng.normal(scale=alphastd, size=(3, ))
+    image += np.dot(eigvec, eigval * alpha)
+
+def blend_(alpha, image1, image2):
+    image1 *= alpha
+    image2 *= (1 - alpha)
+    image1 += image2
+
+def saturation_(data_rng, image, gs, gs_mean, var):
+    alpha = 1. + data_rng.uniform(low=-var, high=var)
+    blend_(alpha, image, gs[:, :, None])
+
+def brightness_(data_rng, image, gs, gs_mean, var):
+    alpha = 1. + data_rng.uniform(low=-var, high=var)
+    image *= alpha
+
+def contrast_(data_rng, image, gs, gs_mean, var):
+    alpha = 1. + data_rng.uniform(low=-var, high=var)
+    blend_(alpha, image, gs_mean)
+
+def color_aug(data_rng, image, eig_val, eig_vec):
+    functions = [brightness_, contrast_, saturation_]
+    random.shuffle(functions)
+
+    gs = grayscale(image)
+    gs_mean = gs.mean()
+    for f in functions:
+        f(data_rng, image, gs, gs_mean, 0.4)
+    lighting_(data_rng, image, 0.1, eig_val, eig_vec)
--- a/src/lib/utils/oracle_utils.py
+++ b/src/lib/utils/oracle_utils.py
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import numba
+
+@numba.jit(nopython=True, nogil=True)
+def gen_oracle_map(feat, ind, w, h):
+  # feat: B x maxN x featDim
+  # ind: B x maxN
+  batch_size = feat.shape[0]
+  max_objs = feat.shape[1]
+  feat_dim = feat.shape[2]
+  out = np.zeros((batch_size, feat_dim, h, w), dtype=np.float32)
+  vis = np.zeros((batch_size, h, w), dtype=np.uint8)
+  ds = [(0, 1), (0, -1), (1, 0), (-1, 0)]
+  for i in range(batch_size):
+    queue_ind = np.zeros((h*w*2, 2), dtype=np.int32)
+    queue_feat = np.zeros((h*w*2, feat_dim), dtype=np.float32)
+    head, tail = 0, 0
+    for j in range(max_objs):
+      if ind[i][j] > 0:
+        x, y = ind[i][j] % w, ind[i][j] // w
+        out[i, :, y, x] = feat[i][j]
+        vis[i, y, x] = 1
+        queue_ind[tail] = x, y
+        queue_feat[tail] = feat[i][j]
+        tail += 1
+    while tail - head > 0:
+      x, y = queue_ind[head]
+      f = queue_feat[head]
+      head += 1
+      for (dx, dy) in ds:
+        xx, yy = x + dx, y + dy
+        if xx >= 0 and yy >= 0 and xx < w and yy < h and vis[i, yy, xx] < 1:
+          out[i, :, yy, xx] = f
+          vis[i, yy, xx] = 1
+          queue_ind[tail] = xx, yy
+          queue_feat[tail] = f
+          tail += 1
+  return out
\ No newline at end of file
--- a/src/lib/utils/post_process.py
+++ b/src/lib/utils/post_process.py
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+from .image import transform_preds
+from .ddd_utils import ddd2locrot
+
+
+def get_pred_depth(depth):
+  return depth
+
+def get_alpha(rot):
+  # output: (B, 8) [bin1_cls[0], bin1_cls[1], bin1_sin, bin1_cos, 
+  #                 bin2_cls[0], bin2_cls[1], bin2_sin, bin2_cos]
+  # return rot[:, 0]
+  idx = rot[:, 1] > rot[:, 5]
+  alpha1 = np.arctan(rot[:, 2] / rot[:, 3]) + (-0.5 * np.pi)
+  alpha2 = np.arctan(rot[:, 6] / rot[:, 7]) + ( 0.5 * np.pi)
+  return alpha1 * idx + alpha2 * (1 - idx)
+  
+
+def ddd_post_process_2d(dets, c, s, opt):
+  # dets: batch x max_dets x dim
+  # return 1-based class det list
+  ret = []
+  include_wh = dets.shape[2] > 16
+  for i in range(dets.shape[0]):
+    top_preds = {}
+    dets[i, :, :2] = transform_preds(
+          dets[i, :, 0:2], c[i], s[i], (opt.output_w, opt.output_h))
+    classes = dets[i, :, -1]
+    for j in range(opt.num_classes):
+      inds = (classes == j)
+      top_preds[j + 1] = np.concatenate([
+        dets[i, inds, :3].astype(np.float32),
+        get_alpha(dets[i, inds, 3:11])[:, np.newaxis].astype(np.float32),
+        get_pred_depth(dets[i, inds, 11:12]).astype(np.float32),
+        dets[i, inds, 12:15].astype(np.float32)], axis=1)
+      if include_wh:
+        top_preds[j + 1] = np.concatenate([
+          top_preds[j + 1],
+          transform_preds(
+            dets[i, inds, 15:17], c[i], s[i], (opt.output_w, opt.output_h))
+          .astype(np.float32)], axis=1)
+    ret.append(top_preds)
+  return ret
+
+def ddd_post_process_3d(dets, calibs):
+  # dets: batch x max_dets x dim
+  # return 1-based class det list
+  ret = []
+  for i in range(len(dets)):
+    preds = {}
+    for cls_ind in dets[i].keys():
+      preds[cls_ind] = []
+      for j in range(len(dets[i][cls_ind])):
+        center = dets[i][cls_ind][j][:2]
+        score = dets[i][cls_ind][j][2]
+        alpha = dets[i][cls_ind][j][3]
+        depth = dets[i][cls_ind][j][4]
+        dimensions = dets[i][cls_ind][j][5:8]
+        wh = dets[i][cls_ind][j][8:10]
+        locations, rotation_y = ddd2locrot(
+          center, alpha, dimensions, depth, calibs[0])
+        bbox = [center[0] - wh[0] / 2, center[1] - wh[1] / 2,
+                center[0] + wh[0] / 2, center[1] + wh[1] / 2]
+        pred = [alpha] + bbox + dimensions.tolist() + \
+               locations.tolist() + [rotation_y, score]
+        preds[cls_ind].append(pred)
+      preds[cls_ind] = np.array(preds[cls_ind], dtype=np.float32)
+    ret.append(preds)
+  return ret
+
+def ddd_post_process(dets, c, s, calibs, opt):
+  # dets: batch x max_dets x dim
+  # return 1-based class det list
+  dets = ddd_post_process_2d(dets, c, s, opt)
+  dets = ddd_post_process_3d(dets, calibs)
+  return dets
+
+
+def ctdet_post_process(dets, c, s, h, w, num_classes):
+  # dets: batch x max_dets x dim
+  # return 1-based class det dict
+  ret = []
+  for i in range(dets.shape[0]):
+    top_preds = {}
+    dets[i, :, :2] = transform_preds(
+          dets[i, :, 0:2], c[i], s[i], (w, h))
+    dets[i, :, 2:4] = transform_preds(
+          dets[i, :, 2:4], c[i], s[i], (w, h))
+    classes = dets[i, :, -1]
+    for j in range(num_classes):
+      inds = (classes == j)
+      top_preds[j + 1] = np.concatenate([
+        dets[i, inds, :4].astype(np.float32),
+        dets[i, inds, 4:5].astype(np.float32)], axis=1).tolist()
+    ret.append(top_preds)
+  return ret
+
+
+def multi_pose_post_process(dets, c, s, h, w):
+  # dets的数据格式为：box: 4 + score:1 + kpoints: 10 +  class: 1 = 16
+  # dets: batch x max_dets x 40
+  # return list of 39 in image coord
+  ret = []
+  for i in range(dets.shape[0]):
+    bbox = transform_preds(dets[i, :, :4].reshape(-1, 2), c[i], s[i], (w, h))         # 矩形框
+    pts = transform_preds(dets[i, :, 5:15].reshape(-1, 2), c[i], s[i], (w, h))        # 1-关键点数
+    top_preds = np.concatenate(
+      [bbox.reshape(-1, 4), dets[i, :, 4:5],                                          # 置信度
+       pts.reshape(-1, 10)], axis=1).astype(np.float32).tolist()                      # 2-关键点数×2
+    ret.append({np.ones(1, dtype=np.int32)[0]: top_preds})
+  return ret
--- a/src/lib/utils/utils.py
+++ b/src/lib/utils/utils.py
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import torch
+import numpy as np
+import random
+import cv2
+
+class AverageMeter(object):
+    """Computes and stores the average and current value"""
+    def __init__(self):
+        self.reset()
+
+    def reset(self):
+        self.val = 0
+        self.avg = 0
+        self.sum = 0
+        self.count = 0
+
+    def update(self, val, n=1):
+        self.val = val
+        self.sum += val * n
+        self.count += n
+        if self.count > 0:
+          self.avg = self.sum / self.count
+
+
+def Data_anchor_sample(image, anns):
+    maxSize = 12000
+    infDistance = 9999999
+
+    boxes = []
+    for ann in anns:
+        boxes.append([ann['bbox'][0], ann['bbox'][1], ann['bbox'][0]+ann['bbox'][2], ann['bbox'][1]+ann['bbox'][3]])
+    boxes = np.asarray(boxes, dtype=np.float32)
+
+    height, width, _ = image.shape
+
+    random_counter = 0
+
+    boxArea = (boxes[:, 2] - boxes[:, 0] + 1) * (boxes[:, 3] - boxes[:, 1] + 1)
+    rand_idx = random.randint(0, len(boxArea)-1)
+    rand_Side = boxArea[rand_idx] ** 0.5
+
+    anchors = [16, 32, 48, 64, 96, 128, 256, 512]
+    distance = infDistance
+    anchor_idx = 5
+    for i, anchor in enumerate(anchors):
+        if abs(anchor - rand_Side) < distance:
+            distance = abs(anchor - rand_Side)  # 选择最接近的anchors
+            anchor_idx = i
+
+    target_anchor = random.choice(anchors[0:min(anchor_idx+1, 5) ])  # 随机选择一个相对较小的anchor，向下
+    ratio = float(target_anchor) / rand_Side  # 缩放的尺度
+    ratio = ratio * (2 ** random.uniform(-1, 1))  # [ratio/2, 2ratio]的均匀分布
+
+    if int(height * ratio * width * ratio) > maxSize * maxSize:
+        ratio = (maxSize * maxSize / (height * width)) ** 0.5
+
+    interp_methods = [cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_NEAREST, cv2.INTER_LANCZOS4]
+    interp_method = random.choice(interp_methods)
+    image = cv2.resize(image, None, None, fx=ratio, fy=ratio, interpolation=interp_method)
+
+    boxes[:, 0] *= ratio
+    boxes[:, 1] *= ratio
+    boxes[:, 2] *= ratio
+    boxes[:, 3] *= ratio
+
+    boxes = boxes.tolist()
+    for i in range(len(anns)):
+        anns[i]['bbox'] = [boxes[i][0], boxes[i][1], boxes[i][2]-boxes[i][0], boxes[i][3]-boxes[i][1]]      # 人脸bbox
+        for j in range(5):
+            anns[i]['keypoints'][j*3] *= ratio
+            anns[i]['keypoints'][j*3+1] *= ratio
+
+    return image, anns
\ No newline at end of file
--- a/src/main.py
+++ b/src/main.py
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import _init_paths
+
+import os
+import torch
+import torch.utils.data
+
+from opts_pose import opts
+from models.model import create_model, load_model, save_model
+from models.data_parallel import DataParallel
+from logger import Logger
+from datasets.dataset_factory import get_dataset
+from trains.train_factory import train_factory
+from datasets.sample.multi_pose import Multiposebatch
+
+
+def main(opt, qtepoch=[0,]):
+    torch.manual_seed(opt.seed)
+    torch.backends.cudnn.benchmark = not opt.not_cuda_benchmark and not opt.test
+    Dataset = get_dataset(opt.dataset, opt.task)
+    opt = opts().update_dataset_info_and_set_heads(opt, Dataset)
+    print(opt)
+
+    logger = Logger(opt)
+
+    os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str
+    opt.device = torch.device('cuda' if opt.gpus[0] >= 0 else 'cpu')
+
+    print('Creating model...')
+    model = create_model(opt.arch, opt.heads, opt.head_conv)
+
+    optimizer = torch.optim.Adam(model.parameters(), opt.lr)
+    # optimizer = torch.optim.SGD(model.parameters(), opt.lr)
+    start_epoch = 0
+    if opt.load_model != '':
+        model, optimizer, start_epoch = load_model(
+            model, opt.load_model, optimizer, opt.resume, opt.lr, opt.lr_step)
+
+    Trainer = train_factory[opt.task]
+    trainer = Trainer(opt, model, optimizer)
+    trainer.set_device(opt.gpus, opt.chunk_sizes, opt.device)
+
+    print('Setting up data...')
+    val_loader = torch.utils.data.DataLoader(
+        Dataset(opt, 'val'),
+        batch_size=1,
+        shuffle=False,
+        num_workers=1,
+        pin_memory=True
+    )
+
+    if opt.test:
+        _, preds = trainer.val(0, val_loader)
+        val_loader.dataset.run_eval(preds, opt.save_dir)
+        return
+
+    train_loader = torch.utils.data.DataLoader(
+        Dataset(opt, 'train'),
+        batch_size=opt.batch_size,
+        shuffle=True,
+        num_workers=opt.num_workers,
+        pin_memory=True,
+        drop_last=True,
+        collate_fn=Multiposebatch
+    )
+
+    print('Starting training...')
+    best = 1e10
+    for epoch in range(start_epoch + 1, opt.num_epochs + 1):
+        qtepoch.append(epoch)
+        mark = epoch if opt.save_all else 'last'
+        log_dict_train, _ = trainer.train(epoch, train_loader)
+        logger.write('epoch: {}/{} |'.format(epoch, opt.num_epochs))
+        for k, v in log_dict_train.items():
+            logger.scalar_summary('train_{}'.format(k), v, epoch)
+            logger.write('{} {:8f} | '.format(k, v))
+        if opt.val_intervals > 0 and epoch % opt.val_intervals == 0:
+            save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(mark)),
+                       epoch, model, optimizer)
+            with torch.no_grad():
+                log_dict_val, preds = trainer.val(epoch, val_loader)
+            for k, v in log_dict_val.items():
+                logger.scalar_summary('val_{}'.format(k), v, epoch)
+                logger.write('{} {:8f} | '.format(k, v))
+            if log_dict_val[opt.metric] < best:
+                best = log_dict_val[opt.metric]
+                save_model(os.path.join(opt.save_dir, 'model_best.pth'),
+                        epoch, model)
+        else:
+            save_model(os.path.join(opt.save_dir, 'model_last.pth'),
+                       epoch, model, optimizer)
+        logger.write('\n')
+        if epoch in opt.lr_step:
+            save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(epoch)),
+                       epoch, model, optimizer)
+            lr = opt.lr * (0.1 ** (opt.lr_step.index(epoch) + 1))
+            print('Drop LR to', lr)
+            for param_group in optimizer.param_groups:
+                param_group['lr'] = lr
+    logger.close()
+
+
+if __name__ == '__main__':
+    opt = opts().parse()
+    main(opt)
--- a/src/py_util.py
+++ b/src/py_util.py
+import cv2
+from PyQt5.QtGui import QPixmap, QImage
+
+COLORS_10 =[(144,238,144),(178, 34, 34),(221,160,221),(  0,255,  0),(  0,128,  0),(210,105, 30),(220, 20, 60),
+            (192,192,192),(255,228,196),( 50,205, 50),(139,  0,139),(100,149,237),(138, 43,226),(238,130,238),
+            (255,  0,255),(  0,100,  0),(127,255,  0),(255,  0,255),(  0,  0,205),(255,140,  0),(255,239,213),
+            (199, 21,133),(124,252,  0),(147,112,219),(106, 90,205),(176,196,222),( 65,105,225),(173,255, 47),
+            (255, 20,147),(219,112,147),(186, 85,211),(199, 21,133),(148,  0,211),(255, 99, 71),(144,238,144),
+            (255,255,  0),(230,230,250),(  0,  0,255),(128,128,  0),(189,183,107),(255,255,224),(128,128,128),
+            (105,105,105),( 64,224,208),(205,133, 63),(  0,128,128),( 72,209,204),(139, 69, 19),(255,245,238),
+            (250,240,230),(152,251,152),(  0,255,255),(135,206,235),(  0,191,255),(176,224,230),(  0,250,154),
+            (245,255,250),(240,230,140),(245,222,179),(  0,139,139),(143,188,143),(255,  0,  0),(240,128,128),
+            (102,205,170),( 60,179,113),( 46,139, 87),(165, 42, 42),(178, 34, 34),(175,238,238),(255,248,220),
+            (218,165, 32),(255,250,240),(253,245,230),(244,164, 96),(210,105, 30)]
+
+def read_show(imgname, label, detector=None, confidence=0.5, choose_id=2):
+    try:
+        img = cv2.imread(imgname)
+        if detector:
+            # img = detector.run(img)['plot_img']
+            bbox = detector.run(img)['results'][choose_id]
+            bbox = bbox[bbox[:, 4] > confidence, :]
+            bbox = bbox[:, :4]
+            img = draw_bboxes(img, bbox)
+        img = cv2.resize(img, (1000,600))
+        h, w, c = img.shape
+        byteperlin = c * w
+        cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img)
+        image = QImage(img.data, w, h, byteperlin, QImage.Format_RGB888)
+        label.setPixmap(QPixmap.fromImage(image))
+    except:
+        label.setText('something wrong with the model')
+
+def product_show(img, detector, confidence=0.5, choose_id=1):
+    try:
+        if detector:
+            # img = detector.run(img)['plot_img']
+            bbox = detector.run(img)['results'][choose_id]
+            bbox = bbox[bbox[:, 4] > confidence, :]
+            bbox = bbox[:, :4]
+            img = draw_bboxes(img, bbox)
+        img = cv2.resize(img, (1000,600))
+        h, w, c = img.shape
+        byteperlin = c * w
+        cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img)
+        image = QImage(img.data, w, h, byteperlin, QImage.Format_RGB888)
+        return image
+    except:
+        cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img)
+        image = QImage(img.data, w, h, byteperlin, QImage.Format_RGB888)
+        return image
+
+
+def draw_bboxes(img, bbox, identities=None, offset=(0,0)):
+    for i,box in enumerate(bbox):
+        x1,y1,x2,y2 = [int(i) for i in box]
+        x1 += offset[0]
+        x2 += offset[0]
+        y1 += offset[1]
+        y2 += offset[1]
+        # box text and bar
+        id = int(identities[i]) if identities is not None else 0
+        color = COLORS_10[id%len(COLORS_10)]
+        # label = '{} {}'.format("defect", id)
+        label = 'defect'
+        font = cv2.FONT_HERSHEY_SIMPLEX
+        t_size = cv2.getTextSize(label, font, 0.5 , 2)[0]
+        cv2.rectangle(img,(x1, y1),(x2,y2),color,2)
+        cv2.rectangle(img,(x1, y1-t_size[1]-2),(x1+t_size[0],y1-2), color,-1)
+        cv2.putText(img,label,(x1,y1-4), font, 0.5, (0, 0, 0), thickness=1, lineType=cv2.LINE_AA)
+    return img
\ No newline at end of file
--- a/src/running.py
+++ b/src/running.py
+import sys
+import cv2
+import time
+import os
+import glob
+
+from UI import Ui_TabWidget
+from PyQt5 import QtCore, QtGui, QtWidgets
+from PyQt5.QtWidgets import QFileDialog, QTabWidget
+from PyQt5.QtCore import QTimer, QThread, pyqtSignal, Qt
+from PyQt5.QtGui import QPixmap, QImage
+from PyQt5.QtWidgets import QLabel, QWidget, QProgressBar
+from py_util import read_show
+from center_main import main
+from opts2 import opts
+from py_util import product_show
+# from win32process import SuspendThread, ResumeThread
+
+from opts2 import opts
+from detectors.detector_factory import detector_factory
+
+model_path = '/path/model_best.pth'
+arch = 'dla_34'
+task = 'ctdet'
+opt = opts().init('--task {} --load_model {} --arch {}'.format(task,
+                                                               model_path, arch).split(' '))
+
+
+class mywindow(QTabWidget, Ui_TabWidget):  # 这个窗口继承了用QtDesignner 绘制的窗口
+    def __init__(self):
+        super(mywindow, self).__init__()
+        self.setupUi(self)
+        self.thread = train_thred()
+        self.thread.my_signal.connect(self.set_step)  # 3
+
+        global imgnums
+        path = r'/path/data/pig/image/*.png'
+        self.datas = glob.glob(path)
+        imgnums = len(self.datas)
+
+        self.save_nums = 0                  # 采集的图片数量
+
+    def collect_image(self):
+        '''自动化采集图片
+            只能采用线程的方式进行摄像头的显示
+        '''
+        self.collect_image_thread = collect_image_thread()
+        self.collect_image_thread.signal.connect(self.set_label)
+        self.collect_image_thread.start()
+
+    def collect_save_image(self):
+        folder = f'./data/{self.line51.text()}/image'
+        if not os.path.exists(folder):
+            os.makedirs(folder)
+        self.label53.pixmap().save(f'{folder}/{self.save_nums}.jpg')
+        # cv2.imwrite(f'{folder}/{self.save_nums}.jpg', img)
+        self.save_nums += 1
+        self.label52.setText('已采集图片： ' + str(self.save_nums))
+
+    def set_label(self, image):
+        '''显示采集了多少张图片'''
+        # self.label52.setText(text)
+        self.label53.setPixmap(QPixmap.fromImage(image))
+
+    def choose_train(self):
+        global train_json
+        train_json, file_type = QFileDialog.getOpenFileName(self,
+                                                            '选择训练数据集',
+                                                            "",
+                                                            'All Files (*)')
+        self.label11.setText(train_json)
+
+    def choose_val(self):
+        global val_json
+        val_json, file_type = QFileDialog.getOpenFileName(self,
+                                                          '选择验证数据集',
+                                                          "",
+                                                          'All Files (*)')
+        self.label12.setText(val_json)
+
+    def count_func(self):
+        self.thread.start()
+
+    def set_step(self, num):
+        self.bar.setValue(num)
+
+    def load_model(self):
+        opt.debug = min(opt.debug, 0)       # 检测结果以cv2的格式返回
+        self.detector = detector_factory[opt.task](opt)
+
+    def load_picture(self):
+        '''
+            验证流程中的选择图片
+        '''
+        global imgname
+        if self.pushbutton_22.text() == '选择图片':
+            imgname, file_type = QFileDialog.getOpenFileName(self,
+                                                             '选择图片',
+                                                             "",
+                                                             'All Files (*)')
+            read_show(imgname, self.label_21,
+                      choose_id=self.combobox21.currentIndex() + 1)   # 显示图片
+
+    def test(self):
+        '''验证流程中的测试过程'''
+        read_show(imgname, self.label_21, self.detector,
+                  choose_id=self.combobox21.currentIndex() + 1)
+
+    def product_start(self):
+        '''流水线开始'''
+        if not hasattr(self, 'detector'):       # 没有载入模型
+            opt.debug = min(opt.debug, 0)
+            self.detector = detector_factory[opt.task](opt)
+
+        if not hasattr(self, 'product_thread'):         # 声明进程
+            # video_path = 'rtsp://admin:Shenlan2018@171.211.125.44:1554/h264/ch1/main/av_stream'
+            video_path = 0
+            self.product_thread = product_thread(
+                self.detector, video_path, self.combobox41)
+            self.product_thread.mysignal.connect(self.product_cess)
+        self.product_thread.start()
+
+    def product_stop(self):
+        '''流水线暂停'''
+        self.product_thread.stop()
+        self.product_thread.quit()
+        self.product_thread.wait()
+
+    def exit(self):
+        sys.exit()
+
+    def product_cess(self, image):
+        self.label41.setPixmap(QPixmap.fromImage(image))
+
+
+class collect_image_thread(QThread):
+    '''
+        数据采集页：
+        读取视频流;保存到指定文件夹;实时显示保存的图片数量
+        在线程中读取视频流，在推到UI进程
+    '''
+    signal = pyqtSignal(QImage)
+
+    def __init__(self):
+        super(collect_image_thread, self).__init__()
+        # self.cap = cv2.VideoCapture('rtsp://admin:Shenlan2018@171.211.125.44:1554/h264/ch1/main/av_stream')
+        self.cap = cv2.VideoCapture(0)
+
+    def run(self):
+        while self.cap.isOpened():
+            try:
+                ret, frame = self.cap.read()
+                if ret:
+                    img = cv2.resize(frame, (1000, 600))
+                    h, w, c = img.shape
+                    byteperlin = c * w
+                    cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img)
+                    image = QImage(img.data, w, h, byteperlin,
+                                   QImage.Format_RGB888)
+                    self.signal.emit(image)
+            except:
+                self.signal.emit('something wrong with the input video source')
+
+
+class product_thread(QThread):
+    '''
+        将这里做成一个API接口的样子, 模型，
+        模型一直加载在线程中，视频流可以释放、重启
+    '''
+    mysignal = pyqtSignal(QImage)
+
+    def __init__(self, detector, video_path, combobox):
+        super(product_thread, self).__init__()
+        self.flag = 1                   # 实现开始暂停
+        self.video_path = video_path
+        self.cap = cv2.VideoCapture(video_path)
+        self.detector = detector
+        self.combobox = combobox
+        self.index = 0
+
+    def run(self):
+        '''4帧处理一次'''
+        self.flag = 1
+        if not self.cap.isOpened():
+            self.cap = cv2.VideoCapture(self.video_path)
+
+        while self.cap.isOpened() and self.flag:
+            if self.index > 1000000000:
+                self.index = 0
+            self.index += 1
+
+            try:
+                # ret, frame = self.cap.read()
+                ret = self.cap.grab()
+                if ret and self.index % 4 == 0:
+                    tret, frame = self.cap.retrieve()
+                    image = product_show(frame, self.detector,
+                                         choose_id=self.combobox.currentIndex() + 1)
+                    self.mysignal.emit(image)
+            except:
+                print('something wrong with the product_thread')
+
+    def stop(self):
+        self.flag = 0
+        self.cap.release()          # 释放摄像头
+
+
+class train_thred(QThread):
+    my_signal = pyqtSignal(int)  # 1
+
+    def __init__(self):
+        super(train_thred, self).__init__()
+        self.max_iter = 50         # 共训练50个epoch
+
+    def run(self):
+        opt = opts(train_json, val_json).parse()     # 这是串行的
+        center_train = main(opt)
+        for i in range(self.max_iter):
+            self.my_signal.emit(i)  # 2
+            center_train.train(i)
+        center_train.logger.close()  # 关闭日志文件
+
+
+if __name__ == '__main__':
+    app = QtWidgets.QApplication(sys.argv)
+    window = mywindow()
+    window.show()
+    sys.exit(app.exec_())
--- a/src/test_wider_face.py
+++ b/src/test_wider_face.py
+import os
+import sys
+import cv2
+
+from opts_pose import opts
+from detectors.detector_factory import detector_factory
+
+import scipy.io as sio
+
+path = os.path.dirname(__file__)
+CENTERNET_PATH = os.path.join(path, '../src/lib')
+sys.path.insert(0, CENTERNET_PATH)
+
+
+def test_img(model_path, debug, threshold=0.4):
+    TASK = 'multi_pose'
+    input_h, intput_w = 800, 800
+    opt = opts().init('--task {} --load_model {} --debug {} --input_h {} --input_w {}'.format(
+        TASK, model_path, debug, intput_w, input_h).split(' '))
+
+    detector = detector_factory[opt.task](opt)
+    img_path = '../test_img/000388.jpg'
+    ori_img = cv2.imread(img_path, -1)
+    res = detector.run(ori_img)['results']
+    draw_img = ori_img.copy()
+
+    for b in res[1]:
+        x1, y1, x2, y2, s = b[0], b[1], b[2], b[3], b[4]
+        if s >= threshold:
+            cv2.rectangle(draw_img, (int(x1), int(y1)),
+                          (int(x2), int(y2)), (0, 0, 255))
+            cv2.putText(draw_img, "Face:"+str(s)
+                        [:3], (int(x1)-2, int(y1)-2), 0, 0.5, (255, 255, 255), 1)
+    cv2.imwrite("./draw_img.jpg", draw_img)
+    print("end.")
+
+
+def test_vedio(model_path, debug, vedio_path=None):
+    debug = -1  # return the result image with draw
+    TASK = 'multi_pose'
+    vis_thresh = 0.45
+    input_h, intput_w = 800, 800
+    opt = opts().init('--task {} --load_model {} --debug {} --input_h {} --input_w {} --vis_thresh {}'.format(
+        TASK, model_path, debug, intput_w, input_h, vis_thresh).split(' '))
+    detector = detector_factory[opt.task](opt)
+
+    vedio = vedio_path if vedio_path else 0
+    cap = cv2.VideoCapture(vedio)
+    while cap.isOpened():
+        det = cap.grab()
+        if det:
+            flag, frame = cap.retrieve()
+            res = detector.run(frame)
+            cv2.imshow('face detect', res['plot_img'])
+
+        if cv2.waitKey(1) & 0xFF == ord('q'):
+            break
+    cap.release()
+    cv2.destroyAllWindows()
+
+
+def test_wider_Face(model_path, debug, threshold=0.05):
+    from progress.bar import Bar
+    Path = '/your/path/WIDER_val/images' # WIDER_val/images path
+    wider_face_mat = sio.loadmat('../evaluate/ground_truth/wider_face_val.mat')
+    event_list = wider_face_mat['event_list']
+    file_list = wider_face_mat['file_list']
+    print("*** event_list", event_list)
+
+    TASK = 'multi_pose'
+    input_h, intput_w = 800, 800
+    opt = opts().init('--task {} --load_model {} --debug {} --vis_thresh {} --input_h {} --input_w {}'.format(
+        TASK, model_path, debug, threshold, input_h, intput_w).split(' '))
+    detector = detector_factory[opt.task](opt)
+
+    save_path = '../output/widerface/'
+    for index, event in enumerate(event_list):
+        file_list_item = file_list[index][0]
+        im_file_dir = event[0][0]
+
+        if not os.path.exists(save_path + im_file_dir):
+            os.makedirs(save_path + im_file_dir)
+
+        bar1 = Bar("Testing", max=len(file_list_item))
+        for num, file in enumerate(file_list_item):
+            im_name = file[0][0]
+
+            im_zip_name = '{}/{}.jpg'.format(im_file_dir, im_name)
+
+            img_path = os.path.join(Path, im_zip_name)
+            ori_img = cv2.imread(img_path)
+            if ori_img is None:
+                print("*** img_path {} is empty!".format(img_path))
+                continue
+            dets = detector.run(ori_img)['results']
+            f = open(save_path + im_file_dir + '/' + im_name + '.txt', 'w')
+            f.write('{:s}\n'.format('%s/%s.jpg' % (im_file_dir, im_name)))
+            f.write('{:d}\n'.format(len(dets)))
+            for b in dets[1]:
+                x1, y1, x2, y2, s = b[0], b[1], b[2], b[3], b[4]
+                f.write('{:.1f} {:.1f} {:.1f} {:.1f} {:.3f}\n'.format(
+                    x1, y1, (x2 - x1 + 1), (y2 - y1 + 1), s))
+            f.close()
+            Bar.suffix = 'event:%d num:%d' % (index + 1, num + 1)
+            bar1.next()
+
+
+if __name__ == '__main__':
+    '''
+    debug = 0 # return the detect result without show
+    debug = 1 # draw and show the result image
+    debug = -1  # return the result image with draw
+    '''
+    debug = 0
+    model_path = '../models/model_best.pth'  # or your model path
+    # 单图测试
+    # test_img(model_path, debug)
+    # 视频测试
+    # test_vedio(model_path, debug)
+    # WIDER_val 数据集测试
+    test_wider_Face(model_path, debug)
--- a/src/tools/_init_paths.py
+++ b/src/tools/_init_paths.py
+import os.path as osp
+import sys
+
+def add_path(path):
+    if path not in sys.path:
+        sys.path.insert(0, path)
+
+this_dir = osp.dirname(__file__)
+
+# Add lib to PYTHONPATH
+lib_path = osp.join(this_dir, '../lib')
+add_path(lib_path)
--- a/src/tools/calc_coco_overlap.py
+++ b/src/tools/calc_coco_overlap.py
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import pycocotools.coco as COCO
+import cv2
+import numpy as np
+from pycocotools import mask as maskUtils
+ANN_PATH = '../../data/coco/annotations/'
+IMG_PATH = '../../data/coco/'
+ANN_FILES = {'train': 'instances_train2017.json',
+             'val': 'instances_val2017.json'}
+DEBUG = False
+RESIZE = True
+
+class_name = [
+    '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
+    'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
+    'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse',
+    'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack',
+    'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis',
+    'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
+    'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass',
+    'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
+    'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake',
+    'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv',
+    'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
+    'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
+    'scissors', 'teddy bear', 'hair drier', 'toothbrush'
+]
+
+def iou(box1, box2):
+  area1 = (box1[2] - box1[0] + 1) * (box1[3] - box1[1] + 1)
+  area2 = (box2[2] - box2[0] + 1) * (box2[3] - box2[1] + 1)
+  inter = max(min(box1[2], box2[2]) - max(box1[0], box2[0]) + 1, 0) * \
+          max(min(box1[3], box2[3]) - max(box1[1], box2[1]) + 1, 0)
+  iou = 1.0 * inter / (area1 + area2 - inter)
+  return iou
+
+def generate_anchors(
+    stride=16, sizes=(32, 64, 128, 256, 512), aspect_ratios=(0.5, 1, 2)
+):
+    """Generates a matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors
+    are centered on stride / 2, have (approximate) sqrt areas of the specified
+    sizes, and aspect ratios as given.
+    """
+    return _generate_anchors(
+        stride,
+        np.array(sizes, dtype=np.float) / stride,
+        np.array(aspect_ratios, dtype=np.float)
+    )
+
+
+def _generate_anchors(base_size, scales, aspect_ratios):
+    """Generate anchor (reference) windows by enumerating aspect ratios X
+    scales wrt a reference (0, 0, base_size - 1, base_size - 1) window.
+    """
+    anchor = np.array([1, 1, base_size, base_size], dtype=np.float) - 1
+    anchors = _ratio_enum(anchor, aspect_ratios)
+    anchors = np.vstack(
+        [_scale_enum(anchors[i, :], scales) for i in range(anchors.shape[0])]
+    )
+    return anchors
+
+
+def _whctrs(anchor):
+    """Return width, height, x center, and y center for an anchor (window)."""
+    w = anchor[2] - anchor[0] + 1
+    h = anchor[3] - anchor[1] + 1
+    x_ctr = anchor[0] + 0.5 * (w - 1)
+    y_ctr = anchor[1] + 0.5 * (h - 1)
+    return w, h, x_ctr, y_ctr
+
+
+def _mkanchors(ws, hs, x_ctr, y_ctr):
+    """Given a vector of widths (ws) and heights (hs) around a center
+    (x_ctr, y_ctr), output a set of anchors (windows).
+    """
+    ws = ws[:, np.newaxis]
+    hs = hs[:, np.newaxis]
+    anchors = np.hstack(
+        (
+            x_ctr - 0.5 * (ws - 1),
+            y_ctr - 0.5 * (hs - 1),
+            x_ctr + 0.5 * (ws - 1),
+            y_ctr + 0.5 * (hs - 1)
+        )
+    )
+    return anchors
+
+
+def _ratio_enum(anchor, ratios):
+    """Enumerate a set of anchors for each aspect ratio wrt an anchor."""
+    w, h, x_ctr, y_ctr = _whctrs(anchor)
+    size = w * h
+    size_ratios = size / ratios
+    ws = np.round(np.sqrt(size_ratios))
+    hs = np.round(ws * ratios)
+    anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
+    return anchors
+
+
+def _scale_enum(anchor, scales):
+    """Enumerate a set of anchors for each scale wrt an anchor."""
+    w, h, x_ctr, y_ctr = _whctrs(anchor)
+    ws = w * scales
+    hs = h * scales
+    anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
+    return anchors
+
+
+def _coco_box_to_bbox(box):
+    bbox = np.array([box[0], box[1], box[0] + box[2], box[1] + box[3]],
+                    dtype=np.float32)
+    return bbox
+
+def count_agnostic(split):
+  coco = COCO.COCO(ANN_PATH + ANN_FILES[split])
+  images = coco.getImgIds()
+  cnt = 0
+  for img_id in images:
+    ann_ids = coco.getAnnIds(imgIds=[img_id])
+    anns = coco.loadAnns(ids=ann_ids)
+    centers = []
+    for ann in anns:
+      bbox = ann['bbox']
+      center = ((bbox[0] + bbox[2] / 2) // 4, (bbox[1] + bbox[3] / 2) // 4)
+      for c in centers:
+        if center[0] == c[0] and center[1] == c[1]:
+          cnt += 1
+      centers.append(center)
+  print('find {} collisions!'.format(cnt))
+
+
+def count(split):
+  coco = COCO.COCO(ANN_PATH + ANN_FILES[split])
+  images = coco.getImgIds()
+  cnt = 0
+  obj = 0
+  for img_id in images:
+    ann_ids = coco.getAnnIds(imgIds=[img_id])
+    anns = coco.loadAnns(ids=ann_ids)
+    centers = []
+    obj += len(anns)
+    for ann in anns:
+      if ann['iscrowd'] > 0:
+        continue
+      bbox = ann['bbox']
+      center = ((bbox[0] + bbox[2] / 2) // 4, (bbox[1] + bbox[3] / 2) // 4, ann['category_id'], bbox)
+      for c in centers:
+        if center[0] == c[0] and center[1] == c[1] and center[2] == c[2] and \
+           iou(_coco_box_to_bbox(bbox), _coco_box_to_bbox(c[3])) < 2:# 0.5:
+          cnt += 1
+          if DEBUG:
+            file_name = coco.loadImgs(ids=[img_id])[0]['file_name']
+            img = cv2.imread('{}/{}2017/{}'.format(IMG_PATH, split, file_name))
+            x1, y1 = int(c[3][0]), int(c[3][1]), 
+            x2, y2 = int(c[3][0] + c[3][2]), int(c[3][1] + c[3][3]) 
+            cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 0), 2, cv2.LINE_AA)
+            x1, y1 = int(center[3][0]), int(center[3][1]), 
+            x2, y2 = int(center[3][0] + center[3][2]), int(center[3][1] + center[3][3]) 
+            cv2.rectangle(img, (x1, y1), (x2, y2), (0, 0, 255), 2, cv2.LINE_AA)
+            cv2.imshow('img', img)
+            cv2.waitKey()
+      centers.append(center)
+  print('find {} collisions of {} objects!'.format(cnt, obj))
+
+def count_iou(split):
+  coco = COCO.COCO(ANN_PATH + ANN_FILES[split])
+  images = coco.getImgIds()
+  cnt = 0
+  obj = 0
+  for img_id in images:
+    ann_ids = coco.getAnnIds(imgIds=[img_id])
+    anns = coco.loadAnns(ids=ann_ids)
+    bboxes = []
+    obj += len(anns)
+    for ann in anns:
+      if ann['iscrowd'] > 0:
+        continue
+      bbox = _coco_box_to_bbox(ann['bbox']).tolist() + [ann['category_id']]
+      for b in bboxes:
+        if iou(b, bbox) > 0.5 and b[4] == bbox[4]:
+          cnt += 1
+          if DEBUG:
+            file_name = coco.loadImgs(ids=[img_id])[0]['file_name']
+            img = cv2.imread('{}/{}2017/{}'.format(IMG_PATH, split, file_name))
+            x1, y1 = int(b[0]), int(b[1]), 
+            x2, y2 = int(b[2]), int(b[3]) 
+            cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 0), 2, cv2.LINE_AA)
+            x1, y1 = int(bbox[0]), int(bbox[1]), 
+            x2, y2 = int(bbox[2]), int(bbox[3]) 
+            cv2.rectangle(img, (x1, y1), (x2, y2), (0, 0, 255), 2, cv2.LINE_AA)
+            cv2.imshow('img', img)
+            print('cats', class_name[b[4]], class_name[bbox[4]])
+            cv2.waitKey()
+      bboxes.append(bbox)
+  print('find {} collisions of {} objects!'.format(cnt, obj))
+
+
+def count_anchor(split):
+  coco = COCO.COCO(ANN_PATH + ANN_FILES[split])
+  images = coco.getImgIds()
+  cnt = 0
+  obj = 0
+  stride = 16
+  anchor = generate_anchors().reshape(15, 2, 2)
+  miss_s, miss_m, miss_l = 0, 0, 0
+  N = len(images)
+  print(N, 'images')
+  for ind, img_id in enumerate(images):
+    if ind % 1000 == 0:
+      print(ind, N)
+    anchors = []
+    ann_ids = coco.getAnnIds(imgIds=[img_id])
+    anns = coco.loadAnns(ids=ann_ids)
+    obj += len(anns)
+    img_info = coco.loadImgs(ids=[img_id])[0]
+    h, w = img_info['height'], img_info['width']
+    if RESIZE:
+      if h > w:
+        for i in range(len(anns)):
+          anns[i]['bbox'][0] *= 800 / w
+          anns[i]['bbox'][1] *= 800 / w
+          anns[i]['bbox'][2] *= 800 / w
+          anns[i]['bbox'][3] *= 800 / w
+        h = h * 800 // w
+        w = 800 
+      else:
+        for i in range(len(anns)):
+          anns[i]['bbox'][0] *= 800 / h
+          anns[i]['bbox'][1] *= 800 / h
+          anns[i]['bbox'][2] *= 800 / h
+          anns[i]['bbox'][3] *= 800 / h
+        w = w * 800 // h
+        h = 800 
+    for i in range(w // stride):
+      for j in range(h // stride):
+        ct = np.array([i * stride, j * stride], dtype=np.float32).reshape(1, 1, 2)
+        anchors.append(anchor + ct)
+    anchors = np.concatenate(anchors, axis=0).reshape(-1, 4)
+    anchors[:, 2:4] = anchors[:, 2:4] - anchors[:, 0:2]
+    anchors = anchors.tolist()
+    # import pdb; pdb.set_trace()
+    g = [g['bbox'] for g in anns]
+    iscrowd = [int(o['iscrowd']) for o in anns]
+    ious = maskUtils.iou(anchors,g,iscrowd)
+    for t in range(len(g)):
+      if ious[:, t].max() < 0.5:
+        s = anns[t]['area']
+        if s < 32 ** 2:
+          miss_s += 1
+        elif s < 96 ** 2:
+          miss_m += 1
+        else:
+          miss_l += 1
+    if DEBUG:
+      file_name = coco.loadImgs(ids=[img_id])[0]['file_name']
+      img = cv2.imread('{}/{}2017/{}'.format(IMG_PATH, split, file_name))
+      if RESIZE:
+        img = cv2.resize(img, (w, h))
+      for t, gt in enumerate(g):
+        if anns[t]['iscrowd'] > 0:
+          continue
+        x1, y1, x2, y2 = _coco_box_to_bbox(gt)
+        cl = (0, 0, 255) if ious[:, t].max() < 0.5 else (0, 255, 0)
+        cv2.rectangle(img, (x1, y1), (x2, y2), cl, 2, cv2.LINE_AA)
+        for k in range(len(anchors)):
+          if ious[k, t] > 0.5:
+            x1, y1, x2, y2 = _coco_box_to_bbox(anchors[k])
+            cl = (np.array([255, 0, 0]) * ious[k, t]).astype(np.int32).tolist()
+            cv2.rectangle(img, (x1, y1), (x2, y2), cl, 1, cv2.LINE_AA)
+      cv2.imshow('img', img)
+      cv2.waitKey()
+    miss = 0
+    if len(ious) > 0:
+      miss = (ious.max(axis=0) < 0.5).sum()
+    cnt += miss
+  print('cnt, obj, ratio ', cnt, obj, cnt / obj)
+  print('s, m, l ', miss_s, miss_m, miss_l)
+    # import pdb; pdb.set_trace()
+
+
+def count_size(split):
+  coco = COCO.COCO(ANN_PATH + ANN_FILES[split])
+  images = coco.getImgIds()
+  cnt = 0
+  obj = 0
+  stride = 16
+  anchor = generate_anchors().reshape(15, 2, 2)
+  cnt_s, cnt_m, cnt_l = 0, 0, 0
+  N = len(images)
+  print(N, 'images')
+  for ind, img_id in enumerate(images):
+    anchors = []
+    ann_ids = coco.getAnnIds(imgIds=[img_id])
+    anns = coco.loadAnns(ids=ann_ids)
+    obj += len(anns)
+    img_info = coco.loadImgs(ids=[img_id])[0]
+    for t in range(len(anns)):
+      if 1:
+        s = anns[t]['area']
+        if s < 32 ** 2:
+          cnt_s += 1
+        elif s < 96 ** 2:
+          cnt_m += 1
+        else:
+          cnt_l += 1
+      cnt += 1
+  print('cnt', cnt)
+  print('s, m, l ', cnt_s, cnt_m, cnt_l)
+ 
+
+# count_iou('train')
+# count_anchor('train')
+# count('train')
+count_size('train')
+
+
+
+
+
--- a/src/tools/convert_hourglass_weight.py
+++ b/src/tools/convert_hourglass_weight.py
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+MODEL_PATH = '../../models/ExtremeNet_500000.pkl'
+OUT_PATH = '../../models/ExtremeNet_500000.pth'
+
+import torch
+state_dict = torch.load(MODEL_PATH)
+key_map = {'t_heats': 'hm_t', 'l_heats': 'hm_l', 'b_heats': 'hm_b', \
+           'r_heats': 'hm_r', 'ct_heats': 'hm_c', \
+           't_regrs': 'reg_t', 'l_regrs': 'reg_l', \
+           'b_regrs': 'reg_b', 'r_regrs': 'reg_r'}
+
+out = {}
+for k in state_dict.keys():
+  changed = False
+  for m in key_map.keys():
+    if m in k:
+      if 'ct_heats' in k and m == 't_heats':
+        continue
+      new_k = k.replace(m, key_map[m])
+      out[new_k] = state_dict[k]
+      changed = True
+      print('replace {} to {}'.format(k, new_k))
+  if not changed:
+    out[k] = state_dict[k]
+data = {'epoch': 0,
+        'state_dict': out}
+torch.save(data, OUT_PATH)
--- a/src/tools/convert_kitti_to_coco.py
+++ b/src/tools/convert_kitti_to_coco.py
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import pickle
+import json
+import numpy as np
+import cv2
+DATA_PATH = '../../data/kitti/'
+DEBUG = False
+# VAL_PATH = DATA_PATH + 'training/label_val/'
+import os
+SPLITS = ['3dop', 'subcnn'] 
+import _init_paths
+from utils.ddd_utils import compute_box_3d, project_to_image, alpha2rot_y
+from utils.ddd_utils import draw_box_3d, unproject_2d_to_3d
+
+'''
+#Values    Name      Description
+----------------------------------------------------------------------------
+   1    type         Describes the type of object: 'Car', 'Van', 'Truck',
+                     'Pedestrian', 'Person_sitting', 'Cyclist', 'Tram',
+                     'Misc' or 'DontCare'
+   1    truncated    Float from 0 (non-truncated) to 1 (truncated), where
+                     truncated refers to the object leaving image boundaries
+   1    occluded     Integer (0,1,2,3) indicating occlusion state:
+                     0 = fully visible, 1 = partly occluded
+                     2 = largely occluded, 3 = unknown
+   1    alpha        Observation angle of object, ranging [-pi..pi]
+   4    bbox         2D bounding box of object in the image (0-based index):
+                     contains left, top, right, bottom pixel coordinates
+   3    dimensions   3D object dimensions: height, width, length (in meters)
+   3    location     3D object location x,y,z in camera coordinates (in meters)
+   1    rotation_y   Rotation ry around Y-axis in camera coordinates [-pi..pi]
+   1    score        Only for results: Float, indicating confidence in
+                     detection, needed for p/r curves, higher is better.
+'''
+
+def _bbox_to_coco_bbox(bbox):
+  return [(bbox[0]), (bbox[1]),
+          (bbox[2] - bbox[0]), (bbox[3] - bbox[1])]
+
+def read_clib(calib_path):
+  f = open(calib_path, 'r')
+  for i, line in enumerate(f):
+    if i == 2:
+      calib = np.array(line[:-1].split(' ')[1:], dtype=np.float32)
+      calib = calib.reshape(3, 4)
+      return calib
+
+cats = ['Pedestrian', 'Car', 'Cyclist', 'Van', 'Truck',  'Person_sitting',
+        'Tram', 'Misc', 'DontCare']
+cat_ids = {cat: i + 1 for i, cat in enumerate(cats)}
+# cat_info = [{"name": "pedestrian", "id": 1}, {"name": "vehicle", "id": 2}]
+F = 721
+H = 384 # 375
+W = 1248 # 1242
+EXT = [45.75, -0.34, 0.005]
+CALIB = np.array([[F, 0, W / 2, EXT[0]], [0, F, H / 2, EXT[1]], 
+                  [0, 0, 1, EXT[2]]], dtype=np.float32)
+
+cat_info = []
+for i, cat in enumerate(cats):
+  cat_info.append({'name': cat, 'id': i + 1})
+
+for SPLIT in SPLITS:
+  image_set_path = DATA_PATH + 'ImageSets_{}/'.format(SPLIT)
+  ann_dir = DATA_PATH + 'training/label_2/'
+  calib_dir = DATA_PATH + '{}/calib/'
+  splits = ['train', 'val']
+  # splits = ['trainval', 'test']
+  calib_type = {'train': 'training', 'val': 'training', 'trainval': 'training',
+                'test': 'testing'}
+
+  for split in splits:
+    ret = {'images': [], 'annotations': [], "categories": cat_info}
+    image_set = open(image_set_path + '{}.txt'.format(split), 'r')
+    image_to_id = {}
+    for line in image_set:
+      if line[-1] == '\n':
+        line = line[:-1]
+      image_id = int(line)
+      calib_path = calib_dir.format(calib_type[split]) + '{}.txt'.format(line)
+      calib = read_clib(calib_path)
+      image_info = {'file_name': '{}.png'.format(line),
+                    'id': int(image_id),
+                    'calib': calib.tolist()}
+      ret['images'].append(image_info)
+      if split == 'test':
+        continue
+      ann_path = ann_dir + '{}.txt'.format(line)
+      # if split == 'val':
+      #   os.system('cp {} {}/'.format(ann_path, VAL_PATH))
+      anns = open(ann_path, 'r')
+      
+      if DEBUG:
+        image = cv2.imread(
+          DATA_PATH + 'images/trainval/' + image_info['file_name'])
+
+      for ann_ind, txt in enumerate(anns):
+        tmp = txt[:-1].split(' ')
+        cat_id = cat_ids[tmp[0]]
+        truncated = int(float(tmp[1]))
+        occluded = int(tmp[2])
+        alpha = float(tmp[3])
+        bbox = [float(tmp[4]), float(tmp[5]), float(tmp[6]), float(tmp[7])]
+        dim = [float(tmp[8]), float(tmp[9]), float(tmp[10])]
+        location = [float(tmp[11]), float(tmp[12]), float(tmp[13])]
+        rotation_y = float(tmp[14])
+
+        ann = {'image_id': image_id,
+               'id': int(len(ret['annotations']) + 1),
+               'category_id': cat_id,
+               'dim': dim,
+               'bbox': _bbox_to_coco_bbox(bbox),
+               'depth': location[2],
+               'alpha': alpha,
+               'truncated': truncated,
+               'occluded': occluded,
+               'location': location,
+               'rotation_y': rotation_y}
+        ret['annotations'].append(ann)
+        if DEBUG and tmp[0] != 'DontCare':
+          box_3d = compute_box_3d(dim, location, rotation_y)
+          box_2d = project_to_image(box_3d, calib)
+          # print('box_2d', box_2d)
+          image = draw_box_3d(image, box_2d)
+          x = (bbox[0] + bbox[2]) / 2
+          '''
+          print('rot_y, alpha2rot_y, dlt', tmp[0], 
+                rotation_y, alpha2rot_y(alpha, x, calib[0, 2], calib[0, 0]),
+                np.cos(
+                  rotation_y - alpha2rot_y(alpha, x, calib[0, 2], calib[0, 0])))
+          '''
+          depth = np.array([location[2]], dtype=np.float32)
+          pt_2d = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2],
+                            dtype=np.float32)
+          pt_3d = unproject_2d_to_3d(pt_2d, depth, calib)
+          pt_3d[1] += dim[0] / 2
+          print('pt_3d', pt_3d)
+          print('location', location)
+      if DEBUG:
+        cv2.imshow('image', image)
+        cv2.waitKey()
+
+
+    print("# images: ", len(ret['images']))
+    print("# annotations: ", len(ret['annotations']))
+    # import pdb; pdb.set_trace()
+    out_path = '{}/annotations/kitti_{}_{}.json'.format(DATA_PATH, SPLIT, split)
+    json.dump(ret, open(out_path, 'w'))
+  
--- a/src/tools/eval_coco.py
+++ b/src/tools/eval_coco.py
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import pycocotools.coco as coco
+from pycocotools.cocoeval import COCOeval
+import sys
+import cv2
+import numpy as np
+import pickle
+import os
+
+this_dir = os.path.dirname(__file__)
+ANN_PATH = this_dir + '../../data/coco/annotations/instances_val2017.json'
+print(ANN_PATH)
+if __name__ == '__main__':
+  pred_path = sys.argv[1]
+  coco = coco.COCO(ANN_PATH)
+  dets = coco.loadRes(pred_path)
+  img_ids = coco.getImgIds()
+  num_images = len(img_ids)
+  coco_eval = COCOeval(coco, dets, "bbox")
+  coco_eval.evaluate()
+  coco_eval.accumulate()
+  coco_eval.summarize()
+
+  
--- a/src/tools/eval_coco_hp.py
+++ b/src/tools/eval_coco_hp.py
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import pycocotools.coco as coco
+from pycocotools.cocoeval import COCOeval
+import sys
+import cv2
+import numpy as np
+import pickle
+import os
+
+this_dir = os.path.dirname(__file__)
+ANN_PATH = this_dir + '../../data/coco/annotations/person_keypoints_val2017.json'
+print(ANN_PATH)
+if __name__ == '__main__':
+  pred_path = sys.argv[1]
+  coco = coco.COCO(ANN_PATH)
+  dets = coco.loadRes(pred_path)
+  img_ids = coco.getImgIds()
+  num_images = len(img_ids)
+  coco_eval = COCOeval(coco, dets, "keypoints")
+  coco_eval.evaluate()
+  coco_eval.accumulate()
+  coco_eval.summarize()
+  coco_eval = COCOeval(coco, dets, "bbox")
+  coco_eval.evaluate()
+  coco_eval.accumulate()
+  coco_eval.summarize()
+  
--- a/src/tools/kitti_eval/README.md
+++ b/src/tools/kitti_eval/README.md
+# kitti_eval
+
+`evaluate_object_3d_offline.cpp`evaluates your KITTI detection locally on your own computer using your validation data selected from KITTI training dataset, with the following metrics:
+
+- overlap on image (AP)
+- oriented overlap on image (AOS)
+- overlap on ground-plane (AP)
+- overlap in 3D (AP)
+
+Compile `evaluate_object_3d_offline.cpp` with dependency of Boost and Linux `dirent.h` (You should already have it under most Linux).
+
+Run the evalutaion by:
+
+    ./evaluate_object_3d_offline groundtruth_dir result_dir
+    
+Note that you don't have to detect over all KITTI training data. The evaluator only evaluates samples whose result files exist.
+
+
+### Updates
+
+- June, 2017:
+  * Fixed the bug of detection box filtering based on min height according to KITTI's note on 25.04.2017.
--- a/src/tools/kitti_eval/evaluate_object_3d.cpp
+++ b/src/tools/kitti_eval/evaluate_object_3d.cpp
+// from https://github.com/prclibo/kitti_eval
+#include <iostream>
+#include <algorithm>
+#include <stdio.h>
+#include <math.h>
+#include <vector>
+#include <numeric>
+#include <strings.h>
+#include <assert.h>
+
+#include <dirent.h>
+
+#include <boost/numeric/ublas/matrix.hpp>
+#include <boost/numeric/ublas/io.hpp>
+
+#include <boost/geometry.hpp>
+#include <boost/geometry/geometries/point_xy.hpp>
+#include <boost/geometry/geometries/polygon.hpp>
+#include <boost/geometry/geometries/adapted/c_array.hpp>
+
+#include "mail.h"
+
+BOOST_GEOMETRY_REGISTER_C_ARRAY_CS(cs::cartesian)
+
+typedef boost::geometry::model::polygon<boost::geometry::model::d2::point_xy<double> > Polygon;
+
+
+using namespace std;
+
+/*=======================================================================
+STATIC EVALUATION PARAMETERS
+=======================================================================*/
+
+// holds the number of test images on the server
+const int32_t N_TESTIMAGES = 7518;
+
+// easy, moderate and hard evaluation level
+enum DIFFICULTY{EASY=0, MODERATE=1, HARD=2};
+
+// evaluation metrics: image, ground or 3D
+enum METRIC{IMAGE=0, GROUND=1, BOX3D=2};
+
+// evaluation parameter
+const int32_t MIN_HEIGHT[3]     = {40, 25, 25};     // minimum height for evaluated groundtruth/detections
+const int32_t MAX_OCCLUSION[3]  = {0, 1, 2};        // maximum occlusion level of the groundtruth used for evaluation
+const double  MAX_TRUNCATION[3] = {0.15, 0.3, 0.5}; // maximum truncation level of the groundtruth used for evaluation
+
+// evaluated object classes
+enum CLASSES{CAR=0, PEDESTRIAN=1, CYCLIST=2};
+const int NUM_CLASS = 3;
+
+// parameters varying per class
+vector<string> CLASS_NAMES;
+// the minimum overlap required for 2D evaluation on the image/ground plane and 3D evaluation
+const double MIN_OVERLAP[3][3] = {{0.7, 0.5, 0.5}, {0.5, 0.25, 0.25}, {0.5, 0.25, 0.25}};
+
+// no. of recall steps that should be evaluated (discretized)
+const double N_SAMPLE_PTS = 41;
+
+
+// initialize class names
+void initGlobals () {
+  CLASS_NAMES.push_back("car");
+  CLASS_NAMES.push_back("pedestrian");
+  CLASS_NAMES.push_back("cyclist");
+}
+
+/*=======================================================================
+DATA TYPES FOR EVALUATION
+=======================================================================*/
+
+// holding data needed for precision-recall and precision-aos
+struct tPrData {
+  vector<double> v;           // detection score for computing score thresholds
+  double         similarity;  // orientation similarity
+  int32_t        tp;          // true positives
+  int32_t        fp;          // false positives
+  int32_t        fn;          // false negatives
+  tPrData () :
+    similarity(0), tp(0), fp(0), fn(0) {}
+};
+
+// holding bounding boxes for ground truth and detections
+struct tBox {
+  string  type;     // object type as car, pedestrian or cyclist,...
+  double   x1;      // left corner
+  double   y1;      // top corner
+  double   x2;      // right corner
+  double   y2;      // bottom corner
+  double   alpha;   // image orientation
+  tBox (string type, double x1,double y1,double x2,double y2,double alpha) :
+    type(type),x1(x1),y1(y1),x2(x2),y2(y2),alpha(alpha) {}
+};
+
+// holding ground truth data
+struct tGroundtruth {
+  tBox    box;        // object type, box, orientation
+  double  truncation; // truncation 0..1
+  int32_t occlusion;  // occlusion 0,1,2 (non, partly, fully)
+  double ry;
+  double  t1, t2, t3;
+  double h, w, l;
+  tGroundtruth () :
+    box(tBox("invalild",-1,-1,-1,-1,-10)),truncation(-1),occlusion(-1) {}
+  tGroundtruth (tBox box,double truncation,int32_t occlusion) :
+    box(box),truncation(truncation),occlusion(occlusion) {}
+  tGroundtruth (string type,double x1,double y1,double x2,double y2,double alpha,double truncation,int32_t occlusion) :
+    box(tBox(type,x1,y1,x2,y2,alpha)),truncation(truncation),occlusion(occlusion) {}
+};
+
+// holding detection data
+struct tDetection {
+  tBox    box;    // object type, box, orientation
+  double  thresh; // detection score
+  double  ry;
+  double  t1, t2, t3;
+  double  h, w, l;
+  tDetection ():
+    box(tBox("invalid",-1,-1,-1,-1,-10)),thresh(-1000) {}
+  tDetection (tBox box,double thresh) :
+    box(box),thresh(thresh) {}
+  tDetection (string type,double x1,double y1,double x2,double y2,double alpha,double thresh) :
+    box(tBox(type,x1,y1,x2,y2,alpha)),thresh(thresh) {}
+};
+
+
+/*=======================================================================
+FUNCTIONS TO LOAD DETECTION AND GROUND TRUTH DATA ONCE, SAVE RESULTS
+=======================================================================*/
+vector<int32_t> indices;
+
+vector<tDetection> loadDetections(string file_name, bool &compute_aos,
+        vector<bool> &eval_image, vector<bool> &eval_ground,
+        vector<bool> &eval_3d, bool &success) {
+
+  // holds all detections (ignored detections are indicated by an index vector
+  vector<tDetection> detections;
+  FILE *fp = fopen(file_name.c_str(),"r");
+  if (!fp) {
+    success = false;
+    return detections;
+  }
+  while (!feof(fp)) {
+    tDetection d;
+    double trash;
+    char str[255];
+    if (fscanf(fp, "%s %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf",
+                   str, &trash, &trash, &d.box.alpha, &d.box.x1, &d.box.y1,
+                   &d.box.x2, &d.box.y2, &d.h, &d.w, &d.l, &d.t1, &d.t2, &d.t3,
+                   &d.ry, &d.thresh)==16) {
+
+        // d.thresh = 1;
+      d.box.type = str;
+      detections.push_back(d);
+
+      // orientation=-10 is invalid, AOS is not evaluated if at least one orientation is invalid
+      if(d.box.alpha == -10)
+        compute_aos = false;
+
+      // a class is only evaluated if it is detected at least once
+      for (int c = 0; c < NUM_CLASS; c++) {
+        if (!strcasecmp(d.box.type.c_str(), CLASS_NAMES[c].c_str())) {
+          if (!eval_image[c] && d.box.x1 >= 0)
+            eval_image[c] = true;
+          if (!eval_ground[c] && d.t1 != -1000)
+            eval_ground[c] = true;
+          if (!eval_3d[c] && d.t2 != -1000)
+            eval_3d[c] = true;
+          break;
+        }
+      }
+    }
+  }
+  fclose(fp);
+  success = true;
+  return detections;
+}
+
+vector<tGroundtruth> loadGroundtruth(string file_name,bool &success) {
+
+  // holds all ground truth (ignored ground truth is indicated by an index vector
+  vector<tGroundtruth> groundtruth;
+  FILE *fp = fopen(file_name.c_str(),"r");
+  if (!fp) {
+    success = false;
+    return groundtruth;
+  }
+  while (!feof(fp)) {
+    tGroundtruth g;
+    char str[255];
+    if (fscanf(fp, "%s %lf %d %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf",
+                   str, &g.truncation, &g.occlusion, &g.box.alpha,
+                   &g.box.x1,   &g.box.y1,     &g.box.x2,    &g.box.y2,
+                   &g.h,      &g.w,        &g.l,       &g.t1,
+                   &g.t2,      &g.t3,        &g.ry )==15) {
+      g.box.type = str;
+      groundtruth.push_back(g);
+    }
+  }
+  fclose(fp);
+  success = true;
+  return groundtruth;
+}
+
+void saveStats (const vector<double> &precision, const vector<double> &aos, FILE *fp_det, FILE *fp_ori) {
+
+  // save precision to file
+  if(precision.empty())
+    return;
+  for (int32_t i=0; i<precision.size(); i++)
+    fprintf(fp_det,"%f ",precision[i]);
+  fprintf(fp_det,"\n");
+
+  // save orientation similarity, only if there were no invalid orientation entries in submission (alpha=-10)
+  if(aos.empty())
+    return;
+  for (int32_t i=0; i<aos.size(); i++)
+    fprintf(fp_ori,"%f ",aos[i]);
+  fprintf(fp_ori,"\n");
+}
+
+/*=======================================================================
+EVALUATION HELPER FUNCTIONS
+=======================================================================*/
+
+// criterion defines whether the overlap is computed with respect to both areas (ground truth and detection)
+// or with respect to box a or b (detection and "dontcare" areas)
+inline double imageBoxOverlap(tBox a, tBox b, int32_t criterion=-1){
+
+  // overlap is invalid in the beginning
+  double o = -1;
+
+  // get overlapping area
+  double x1 = max(a.x1, b.x1);
+  double y1 = max(a.y1, b.y1);
+  double x2 = min(a.x2, b.x2);
+  double y2 = min(a.y2, b.y2);
+
+  // compute width and height of overlapping area
+  double w = x2-x1;
+  double h = y2-y1;
+
+  // set invalid entries to 0 overlap
+  if(w<=0 || h<=0)
+    return 0;
+
+  // get overlapping areas
+  double inter = w*h;
+  double a_area = (a.x2-a.x1) * (a.y2-a.y1);
+  double b_area = (b.x2-b.x1) * (b.y2-b.y1);
+
+  // intersection over union overlap depending on users choice
+  if(criterion==-1)     // union
+    o = inter / (a_area+b_area-inter);
+  else if(criterion==0) // bbox_a
+    o = inter / a_area;
+  else if(criterion==1) // bbox_b
+    o = inter / b_area;
+
+  // overlap
+  return o;
+}
+
+inline double imageBoxOverlap(tDetection a, tGroundtruth b, int32_t criterion=-1){
+  return imageBoxOverlap(a.box, b.box, criterion);
+}
+
+// compute polygon of an oriented bounding box
+template <typename T>
+Polygon toPolygon(const T& g) {
+    using namespace boost::numeric::ublas;
+    using namespace boost::geometry;
+    matrix<double> mref(2, 2);
+    mref(0, 0) = cos(g.ry); mref(0, 1) = sin(g.ry);
+    mref(1, 0) = -sin(g.ry); mref(1, 1) = cos(g.ry);
+
+    static int count = 0;
+    matrix<double> corners(2, 4);
+    double data[] = {g.l / 2, g.l / 2, -g.l / 2, -g.l / 2,
+                     g.w / 2, -g.w / 2, -g.w / 2, g.w / 2};
+    std::copy(data, data + 8, corners.data().begin());
+    matrix<double> gc = prod(mref, corners);
+    for (int i = 0; i < 4; ++i) {
+        gc(0, i) += g.t1;
+        gc(1, i) += g.t3;
+    }
+
+    double points[][2] = {{gc(0, 0), gc(1, 0)},{gc(0, 1), gc(1, 1)},{gc(0, 2), gc(1, 2)},{gc(0, 3), gc(1, 3)},{gc(0, 0), gc(1, 0)}};
+    Polygon poly;
+    append(poly, points);
+    return poly;
+}
+
+// measure overlap between bird's eye view bounding boxes, parametrized by (ry, l, w, tx, tz)
+inline double groundBoxOverlap(tDetection d, tGroundtruth g, int32_t criterion = -1) {
+    using namespace boost::geometry;
+    Polygon gp = toPolygon(g);
+    Polygon dp = toPolygon(d);
+
+    std::vector<Polygon> in, un;
+    intersection(gp, dp, in);
+    union_(gp, dp, un);
+
+    double inter_area = in.empty() ? 0 : area(in.front());
+    double union_area = area(un.front());
+    double o;
+    if(criterion==-1)     // union
+        o = inter_area / union_area;
+    else if(criterion==0) // bbox_a
+        o = inter_area / area(dp);
+    else if(criterion==1) // bbox_b
+        o = inter_area / area(gp);
+
+    return o;
+}
+
+// measure overlap between 3D bounding boxes, parametrized by (ry, h, w, l, tx, ty, tz)
+inline double box3DOverlap(tDetection d, tGroundtruth g, int32_t criterion = -1) {
+    using namespace boost::geometry;
+    Polygon gp = toPolygon(g);
+    Polygon dp = toPolygon(d);
+
+    std::vector<Polygon> in, un;
+    intersection(gp, dp, in);
+    union_(gp, dp, un);
+
+    double ymax = min(d.t2, g.t2);
+    double ymin = max(d.t2 - d.h, g.t2 - g.h);
+
+    double inter_area = in.empty() ? 0 : area(in.front());
+    double inter_vol = inter_area * max(0.0, ymax - ymin);
+
+    double det_vol = d.h * d.l * d.w;
+    double gt_vol = g.h * g.l * g.w;
+
+    double o;
+    if(criterion==-1)     // union
+        o = inter_vol / (det_vol + gt_vol - inter_vol);
+    else if(criterion==0) // bbox_a
+        o = inter_vol / det_vol;
+    else if(criterion==1) // bbox_b
+        o = inter_vol / gt_vol;
+
+    return o;
+}
+
+vector<double> getThresholds(vector<double> &v, double n_groundtruth){
+
+  // holds scores needed to compute N_SAMPLE_PTS recall values
+  vector<double> t;
+
+  // sort scores in descending order
+  // (highest score is assumed to give best/most confident detections)
+  sort(v.begin(), v.end(), greater<double>());
+
+  // get scores for linearly spaced recall
+  double current_recall = 0;
+  for(int32_t i=0; i<v.size(); i++){
+
+    // check if right-hand-side recall with respect to current recall is close than left-hand-side one
+    // in this case, skip the current detection score
+    double l_recall, r_recall, recall;
+    l_recall = (double)(i+1)/n_groundtruth;
+    if(i<(v.size()-1))
+      r_recall = (double)(i+2)/n_groundtruth;
+    else
+      r_recall = l_recall;
+
+    if( (r_recall-current_recall) < (current_recall-l_recall) && i<(v.size()-1))
+      continue;
+
+    // left recall is the best approximation, so use this and goto next recall step for approximation
+    recall = l_recall;
+
+    // the next recall step was reached
+    t.push_back(v[i]);
+    current_recall += 1.0/(N_SAMPLE_PTS-1.0);
+  }
+  return t;
+}
+
+void cleanData(CLASSES current_class, const vector<tGroundtruth> &gt, const vector<tDetection> &det, vector<int32_t> &ignored_gt, vector<tGroundtruth> &dc, vector<int32_t> &ignored_det, int32_t &n_gt, DIFFICULTY difficulty){
+
+  // extract ground truth bounding boxes for current evaluation class
+  for(int32_t i=0;i<gt.size(); i++){
+
+    // only bounding boxes with a minimum height are used for evaluation
+    double height = gt[i].box.y2 - gt[i].box.y1;
+
+    // neighboring classes are ignored ("van" for "car" and "person_sitting" for "pedestrian")
+    // (lower/upper cases are ignored)
+    int32_t valid_class;
+
+    // all classes without a neighboring class
+    if(!strcasecmp(gt[i].box.type.c_str(), CLASS_NAMES[current_class].c_str()))
+      valid_class = 1;
+
+    // classes with a neighboring class
+    else if(!strcasecmp(CLASS_NAMES[current_class].c_str(), "Pedestrian") && !strcasecmp("Person_sitting", gt[i].box.type.c_str()))
+      valid_class = 0;
+    else if(!strcasecmp(CLASS_NAMES[current_class].c_str(), "Car") && !strcasecmp("Van", gt[i].box.type.c_str()))
+      valid_class = 0;
+
+    // classes not used for evaluation
+    else
+      valid_class = -1;
+
+    // ground truth is ignored, if occlusion, truncation exceeds the difficulty or ground truth is too small
+    // (doesn't count as FN nor TP, although detections may be assigned)
+    bool ignore = false;
+    if(gt[i].occlusion>MAX_OCCLUSION[difficulty] || gt[i].truncation>MAX_TRUNCATION[difficulty] || height<MIN_HEIGHT[difficulty])
+      ignore = true;
+
+    // set ignored vector for ground truth
+    // current class and not ignored (total no. of ground truth is detected for recall denominator)
+    if(valid_class==1 && !ignore){
+      ignored_gt.push_back(0);
+      n_gt++;
+    }
+
+    // neighboring class, or current class but ignored
+    else if(valid_class==0 || (ignore && valid_class==1))
+      ignored_gt.push_back(1);
+
+    // all other classes which are FN in the evaluation
+    else
+      ignored_gt.push_back(-1);
+  }
+
+  // extract dontcare areas
+  for(int32_t i=0;i<gt.size(); i++)
+    if(!strcasecmp("DontCare", gt[i].box.type.c_str()))
+      dc.push_back(gt[i]);
+
+  // extract detections bounding boxes of the current class
+  for(int32_t i=0;i<det.size(); i++){
+
+    // neighboring classes are not evaluated
+    int32_t valid_class;
+    if(!strcasecmp(det[i].box.type.c_str(), CLASS_NAMES[current_class].c_str()))
+      valid_class = 1;
+    else
+      valid_class = -1;
+
+    int32_t height = fabs(det[i].box.y1 - det[i].box.y2);
+    // set ignored vector for detections
+    if(height<MIN_HEIGHT[difficulty])
+      ignored_det.push_back(1);
+    else if(valid_class==1)
+      ignored_det.push_back(0);
+    else
+      ignored_det.push_back(-1);
+  }
+}
+
+tPrData computeStatistics(CLASSES current_class, const vector<tGroundtruth> &gt,
+        const vector<tDetection> &det, const vector<tGroundtruth> &dc,
+        const vector<int32_t> &ignored_gt, const vector<int32_t>  &ignored_det,
+        bool compute_fp, double (*boxoverlap)(tDetection, tGroundtruth, int32_t),
+        METRIC metric, bool compute_aos=false, double thresh=0, bool debug=false){
+
+  tPrData stat = tPrData();
+  const double NO_DETECTION = -10000000;
+  vector<double> delta;            // holds angular difference for TPs (needed for AOS evaluation)
+  vector<bool> assigned_detection; // holds wether a detection was assigned to a valid or ignored ground truth
+  assigned_detection.assign(det.size(), false);
+  vector<bool> ignored_threshold;
+  ignored_threshold.assign(det.size(), false); // holds detections with a threshold lower than thresh if FP are computed
+
+  // detections with a low score are ignored for computing precision (needs FP)
+  if(compute_fp)
+    for(int32_t i=0; i<det.size(); i++)
+      if(det[i].thresh<thresh)
+        ignored_threshold[i] = true;
+
+  // evaluate all ground truth boxes
+  for(int32_t i=0; i<gt.size(); i++){
+
+    // this ground truth is not of the current or a neighboring class and therefore ignored
+    if(ignored_gt[i]==-1)
+      continue;
+
+    /*=======================================================================
+    find candidates (overlap with ground truth > 0.5) (logical len(det))
+    =======================================================================*/
+    int32_t det_idx          = -1;
+    double valid_detection = NO_DETECTION;
+    double max_overlap     = 0;
+
+    // search for a possible detection
+    bool assigned_ignored_det = false;
+    for(int32_t j=0; j<det.size(); j++){
+
+      // detections not of the current class, already assigned or with a low threshold are ignored
+      if(ignored_det[j]==-1)
+        continue;
+      if(assigned_detection[j])
+        continue;
+      if(ignored_threshold[j])
+        continue;
+
+      // find the maximum score for the candidates and get idx of respective detection
+      double overlap = boxoverlap(det[j], gt[i], -1);
+
+      // for computing recall thresholds, the candidate with highest score is considered
+      if(!compute_fp && overlap>MIN_OVERLAP[metric][current_class] && det[j].thresh>valid_detection){
+        det_idx         = j;
+        valid_detection = det[j].thresh;
+      }
+
+      // for computing pr curve values, the candidate with the greatest overlap is considered
+      // if the greatest overlap is an ignored detection (min_height), the overlapping detection is used
+      else if(compute_fp && overlap>MIN_OVERLAP[metric][current_class] && (overlap>max_overlap || assigned_ignored_det) && ignored_det[j]==0){
+        max_overlap     = overlap;
+        det_idx         = j;
+        valid_detection = 1;
+        assigned_ignored_det = false;
+      }
+      else if(compute_fp && overlap>MIN_OVERLAP[metric][current_class] && valid_detection==NO_DETECTION && ignored_det[j]==1){
+        det_idx              = j;
+        valid_detection      = 1;
+        assigned_ignored_det = true;
+      }
+    }
+
+    /*=======================================================================
+    compute TP, FP and FN
+    =======================================================================*/
+
+    // nothing was assigned to this valid ground truth
+    if(valid_detection==NO_DETECTION && ignored_gt[i]==0) {
+      stat.fn++;
+    }
+
+    // only evaluate valid ground truth <=> detection assignments (considering difficulty level)
+    else if(valid_detection!=NO_DETECTION && (ignored_gt[i]==1 || ignored_det[det_idx]==1))
+      assigned_detection[det_idx] = true;
+
+    // found a valid true positive
+    else if(valid_detection!=NO_DETECTION){
+
+      // write highest score to threshold vector
+      stat.tp++;
+      stat.v.push_back(det[det_idx].thresh);
+
+      // compute angular difference of detection and ground truth if valid detection orientation was provided
+      if(compute_aos)
+        delta.push_back(gt[i].box.alpha - det[det_idx].box.alpha);
+
+      // clean up
+      assigned_detection[det_idx] = true;
+    }
+  }
+
+  // if FP are requested, consider stuff area
+  if(compute_fp){
+
+    // count fp
+    for(int32_t i=0; i<det.size(); i++){
+
+      // count false positives if required (height smaller than required is ignored (ignored_det==1)
+      if(!(assigned_detection[i] || ignored_det[i]==-1 || ignored_det[i]==1 || ignored_threshold[i]))
+        stat.fp++;
+    }
+
+    // do not consider detections overlapping with stuff area
+    int32_t nstuff = 0;
+    for(int32_t i=0; i<dc.size(); i++){
+      for(int32_t j=0; j<det.size(); j++){
+
+        // detections not of the current class, already assigned, with a low threshold or a low minimum height are ignored
+        if(assigned_detection[j])
+          continue;
+        if(ignored_det[j]==-1 || ignored_det[j]==1)
+          continue;
+        if(ignored_threshold[j])
+          continue;
+
+        // compute overlap and assign to stuff area, if overlap exceeds class specific value
+        double overlap = boxoverlap(det[j], dc[i], 0);
+        if(overlap>MIN_OVERLAP[metric][current_class]){
+          assigned_detection[j] = true;
+          nstuff++;
+        }
+      }
+    }
+
+    // FP = no. of all not to ground truth assigned detections - detections assigned to stuff areas
+    stat.fp -= nstuff;
+
+    // if all orientation values are valid, the AOS is computed
+    if(compute_aos){
+      vector<double> tmp;
+
+      // FP have a similarity of 0, for all TP compute AOS
+      tmp.assign(stat.fp, 0);
+      for(int32_t i=0; i<delta.size(); i++)
+        tmp.push_back((1.0+cos(delta[i]))/2.0);
+
+      // be sure, that all orientation deltas are computed
+      assert(tmp.size()==stat.fp+stat.tp);
+      assert(delta.size()==stat.tp);
+
+      // get the mean orientation similarity for this image
+      if(stat.tp>0 || stat.fp>0)
+        stat.similarity = accumulate(tmp.begin(), tmp.end(), 0.0);
+
+      // there was neither a FP nor a TP, so the similarity is ignored in the evaluation
+      else
+        stat.similarity = -1;
+    }
+  }
+  return stat;
+}
+
+/*=======================================================================
+EVALUATE CLASS-WISE
+=======================================================================*/
+
+bool eval_class (FILE *fp_det, FILE *fp_ori, CLASSES current_class,
+        const vector< vector<tGroundtruth> > &groundtruth,
+        const vector< vector<tDetection> > &detections, bool compute_aos,
+        double (*boxoverlap)(tDetection, tGroundtruth, int32_t),
+        vector<double> &precision, vector<double> &aos,
+        DIFFICULTY difficulty, METRIC metric) {
+    assert(groundtruth.size() == detections.size());
+
+  // init
+  int32_t n_gt=0;                                     // total no. of gt (denominator of recall)
+  vector<double> v, thresholds;                       // detection scores, evaluated for recall discretization
+  vector< vector<int32_t> > ignored_gt, ignored_det;  // index of ignored gt detection for current class/difficulty
+  vector< vector<tGroundtruth> > dontcare;            // index of dontcare areas, included in ground truth
+
+  // for all test images do
+  for (int32_t i=0; i<groundtruth.size(); i++){
+
+    // holds ignored ground truth, ignored detections and dontcare areas for current frame
+    vector<int32_t> i_gt, i_det;
+    vector<tGroundtruth> dc;
+
+    // only evaluate objects of current class and ignore occluded, truncated objects
+    cleanData(current_class, groundtruth[i], detections[i], i_gt, dc, i_det, n_gt, difficulty);
+    ignored_gt.push_back(i_gt);
+    ignored_det.push_back(i_det);
+    dontcare.push_back(dc);
+
+    // compute statistics to get recall values
+    tPrData pr_tmp = tPrData();
+    pr_tmp = computeStatistics(current_class, groundtruth[i], detections[i], dc, i_gt, i_det, false, boxoverlap, metric);
+
+    // add detection scores to vector over all images
+    for(int32_t j=0; j<pr_tmp.v.size(); j++)
+      v.push_back(pr_tmp.v[j]);
+  }
+
+  // get scores that must be evaluated for recall discretization
+  thresholds = getThresholds(v, n_gt);
+
+  // compute TP,FP,FN for relevant scores
+  vector<tPrData> pr;
+  pr.assign(thresholds.size(),tPrData());
+  for (int32_t i=0; i<groundtruth.size(); i++){
+
+    // for all scores/recall thresholds do:
+    for(int32_t t=0; t<thresholds.size(); t++){
+      tPrData tmp = tPrData();
+      tmp = computeStatistics(current_class, groundtruth[i], detections[i], dontcare[i],
+                              ignored_gt[i], ignored_det[i], true, boxoverlap, metric,
+                              compute_aos, thresholds[t], t==38);
+
+      // add no. of TP, FP, FN, AOS for current frame to total evaluation for current threshold
+      pr[t].tp += tmp.tp;
+      pr[t].fp += tmp.fp;
+      pr[t].fn += tmp.fn;
+      if(tmp.similarity!=-1)
+        pr[t].similarity += tmp.similarity;
+    }
+  }
+
+  // compute recall, precision and AOS
+  vector<double> recall;
+  precision.assign(N_SAMPLE_PTS, 0);
+  if(compute_aos)
+    aos.assign(N_SAMPLE_PTS, 0);
+  double r=0;
+  for (int32_t i=0; i<thresholds.size(); i++){
+    r = pr[i].tp/(double)(pr[i].tp + pr[i].fn);
+    recall.push_back(r);
+    precision[i] = pr[i].tp/(double)(pr[i].tp + pr[i].fp);
+    if(compute_aos)
+      aos[i] = pr[i].similarity/(double)(pr[i].tp + pr[i].fp);
+  }
+
+  // filter precision and AOS using max_{i..end}(precision)
+  for (int32_t i=0; i<thresholds.size(); i++){
+    precision[i] = *max_element(precision.begin()+i, precision.end());
+    if(compute_aos)
+      aos[i] = *max_element(aos.begin()+i, aos.end());
+  }
+
+  // save statisics and finish with success
+  saveStats(precision, aos, fp_det, fp_ori);
+    return true;
+}
+
+void saveAndPlotPlots(string dir_name,string file_name,string obj_type,vector<double> vals[],bool is_aos){
+
+  char command[1024];
+
+  // save plot data to file
+  FILE *fp = fopen((dir_name + "/" + file_name + ".txt").c_str(),"w");
+  printf("save %s\n", (dir_name + "/" + file_name + ".txt").c_str());
+  for (int32_t i=0; i<(int)N_SAMPLE_PTS; i++)
+    fprintf(fp,"%f %f %f %f\n",(double)i/(N_SAMPLE_PTS-1.0),vals[0][i],vals[1][i],vals[2][i]);
+  fclose(fp);
+
+  // create png + eps
+  for (int32_t j=0; j<2; j++) {
+
+    // open file
+    FILE *fp = fopen((dir_name + "/" + file_name + ".gp").c_str(),"w");
+
+    // save gnuplot instructions
+    if (j==0) {
+      fprintf(fp,"set term png size 450,315 font \"Helvetica\" 11\n");
+      fprintf(fp,"set output \"%s.png\"\n",file_name.c_str());
+    } else {
+      fprintf(fp,"set term postscript eps enhanced color font \"Helvetica\" 20\n");
+      fprintf(fp,"set output \"%s.eps\"\n",file_name.c_str());
+    }
+
+    // set labels and ranges
+    fprintf(fp,"set size ratio 0.7\n");
+    fprintf(fp,"set xrange [0:1]\n");
+    fprintf(fp,"set yrange [0:1]\n");
+    fprintf(fp,"set xlabel \"Recall\"\n");
+    if (!is_aos) fprintf(fp,"set ylabel \"Precision\"\n");
+    else         fprintf(fp,"set ylabel \"Orientation Similarity\"\n");
+    obj_type[0] = toupper(obj_type[0]);
+    fprintf(fp,"set title \"%s\"\n",obj_type.c_str());
+
+    // line width
+    int32_t   lw = 5;
+    if (j==0) lw = 3;
+
+    // plot error curve
+    fprintf(fp,"plot ");
+    fprintf(fp,"\"%s.txt\" using 1:2 title 'Easy' with lines ls 1 lw %d,",file_name.c_str(),lw);
+    fprintf(fp,"\"%s.txt\" using 1:3 title 'Moderate' with lines ls 2 lw %d,",file_name.c_str(),lw);
+    fprintf(fp,"\"%s.txt\" using 1:4 title 'Hard' with lines ls 3 lw %d",file_name.c_str(),lw);
+
+    // close file
+    fclose(fp);
+
+    // run gnuplot => create png + eps
+    sprintf(command,"cd %s; gnuplot %s",dir_name.c_str(),(file_name + ".gp").c_str());
+    system(command);
+  }
+
+  // create pdf and crop
+  sprintf(command,"cd %s; ps2pdf %s.eps %s_large.pdf",dir_name.c_str(),file_name.c_str(),file_name.c_str());
+  system(command);
+  sprintf(command,"cd %s; pdfcrop %s_large.pdf %s.pdf",dir_name.c_str(),file_name.c_str(),file_name.c_str());
+  system(command);
+  sprintf(command,"cd %s; rm %s_large.pdf",dir_name.c_str(),file_name.c_str());
+  system(command);
+}
+
+bool eval(string result_sha,Mail* mail){
+
+  // set some global parameters
+  initGlobals();
+
+  // ground truth and result directories
+  string gt_dir         = "data/object/label_2";
+  string result_dir     = "results/" + result_sha;
+  string plot_dir       = result_dir + "/plot";
+
+  // create output directories
+  system(("mkdir " + plot_dir).c_str());
+
+  // hold detections and ground truth in memory
+  vector< vector<tGroundtruth> > groundtruth;
+  vector< vector<tDetection> >   detections;
+
+  // holds wether orientation similarity shall be computed (might be set to false while loading detections)
+  // and which labels where provided by this submission
+  bool compute_aos=true;
+  vector<bool> eval_image(NUM_CLASS, false);
+  vector<bool> eval_ground(NUM_CLASS, false);
+  vector<bool> eval_3d(NUM_CLASS, false);
+
+  // for all images read groundtruth and detections
+  mail->msg("Loading detections...");
+  for (int32_t i=0; i<N_TESTIMAGES; i++) {
+
+    // file name
+    char file_name[256];
+    sprintf(file_name,"%06d.txt",indices.at(i));
+
+    // read ground truth and result poses
+    bool gt_success,det_success;
+    vector<tGroundtruth> gt   = loadGroundtruth(gt_dir + "/" + file_name,gt_success);
+    vector<tDetection>   det  = loadDetections(result_dir + "/data/" + file_name,
+            compute_aos, eval_image, eval_ground, eval_3d, det_success);
+    groundtruth.push_back(gt);
+    detections.push_back(det);
+
+    // check for errors
+    if (!gt_success) {
+      mail->msg("ERROR: Couldn't read: %s of ground truth. Please write me an email!", file_name);
+      return false;
+    }
+    if (!det_success) {
+      mail->msg("ERROR: Couldn't read: %s", file_name);
+      return false;
+    }
+  }
+  mail->msg("  done.");
+
+  // holds pointers for result files
+  FILE *fp_det=0, *fp_ori=0;
+
+  // eval image 2D bounding boxes
+  for (int c = 0; c < NUM_CLASS; c++) {
+    CLASSES cls = (CLASSES)c;
+    if (eval_image[c]) {
+      fp_det = fopen((result_dir + "/stats_" + CLASS_NAMES[c] + "_detection.txt").c_str(), "w");
+      if(compute_aos)
+        fp_ori = fopen((result_dir + "/stats_" + CLASS_NAMES[c] + "_orientation.txt").c_str(),"w");
+      vector<double> precision[3], aos[3];
+      if(   !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, imageBoxOverlap, precision[0], aos[0], EASY, IMAGE)
+         || !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, imageBoxOverlap, precision[1], aos[1], MODERATE, IMAGE)
+         || !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, imageBoxOverlap, precision[2], aos[2], HARD, IMAGE)) {
+        mail->msg("%s evaluation failed.", CLASS_NAMES[c].c_str());
+        return false;
+      }
+      fclose(fp_det);
+      saveAndPlotPlots(plot_dir, CLASS_NAMES[c] + "_detection", CLASS_NAMES[c], precision, 0);
+      if(compute_aos){
+        saveAndPlotPlots(plot_dir, CLASS_NAMES[c] + "_orientation", CLASS_NAMES[c], aos, 1);
+        fclose(fp_ori);
+      }
+    }
+  }
+
+  // don't evaluate AOS for birdview boxes and 3D boxes
+  compute_aos = false;
+
+  // eval bird's eye view bounding boxes
+  for (int c = 0; c < NUM_CLASS; c++) {
+    CLASSES cls = (CLASSES)c;
+    if (eval_ground[c]) {
+      fp_det = fopen((result_dir + "/stats_" + CLASS_NAMES[c] + "_detection_ground.txt").c_str(), "w");
+      vector<double> precision[3], aos[3];
+      if(   !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, groundBoxOverlap, precision[0], aos[0], EASY, GROUND)
+         || !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, groundBoxOverlap, precision[1], aos[1], MODERATE, GROUND)
+         || !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, groundBoxOverlap, precision[2], aos[2], HARD, GROUND)) {
+        mail->msg("%s evaluation failed.", CLASS_NAMES[c].c_str());
+        return false;
+      }
+      fclose(fp_det);
+      saveAndPlotPlots(plot_dir, CLASS_NAMES[c] + "_detection_ground", CLASS_NAMES[c], precision, 0);
+    }
+  }
+
+  // eval 3D bounding boxes
+  for (int c = 0; c < NUM_CLASS; c++) {
+    CLASSES cls = (CLASSES)c;
+    if (eval_3d[c]) {
+      fp_det = fopen((result_dir + "/stats_" + CLASS_NAMES[c] + "_detection_3d.txt").c_str(), "w");
+      vector<double> precision[3], aos[3];
+      if(   !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, box3DOverlap, precision[0], aos[0], EASY, BOX3D)
+         || !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, box3DOverlap, precision[1], aos[1], MODERATE, BOX3D)
+         || !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, box3DOverlap, precision[2], aos[2], HARD, BOX3D)) {
+        mail->msg("%s evaluation failed.", CLASS_NAMES[c].c_str());
+        return false;
+      }
+      fclose(fp_det);
+      saveAndPlotPlots(plot_dir, CLASS_NAMES[c] + "_detection_3d", CLASS_NAMES[c], precision, 0);
+    }
+  }
+
+  // success
+  return true;
+}
+
+int32_t main (int32_t argc,char *argv[]) {
+
+  // we need 2 or 4 arguments!
+  if (argc!=2 && argc!=4) {
+    cout << "Usage: ./eval_detection result_sha [user_sha email]" << endl;
+    return 1;
+  }
+
+  // read arguments
+  string result_sha = argv[1];
+
+  // init notification mail
+  Mail *mail;
+  if (argc==4) mail = new Mail(argv[3]);
+  else         mail = new Mail();
+  mail->msg("Thank you for participating in our evaluation!");
+
+  // run evaluation
+  if (eval(result_sha,mail)) {
+    mail->msg("Your evaluation results are available at:");
+    mail->msg("http://www.cvlibs.net/datasets/kitti/user_submit_check_login.php?benchmark=object&user=%s&result=%s",argv[2], result_sha.c_str());
+  } else {
+    system(("rm -r results/" + result_sha).c_str());
+    mail->msg("An error occured while processing your results.");
+    mail->msg("Please make sure that the data in your zip archive has the right format!");
+  }
+
+  // send mail and exit
+  delete mail;
+
+  return 0;
+}
+
+
--- a/src/tools/kitti_eval/evaluate_object_3d_offline
+++ b/src/tools/kitti_eval/evaluate_object_3d_offline
--- a/src/tools/kitti_eval/evaluate_object_3d_offline.cpp
+++ b/src/tools/kitti_eval/evaluate_object_3d_offline.cpp
+#include <iostream>
+#include <algorithm>
+#include <stdio.h>
+#include <math.h>
+#include <vector>
+#include <numeric>
+#include <strings.h>
+#include <assert.h>
+
+#include <dirent.h>
+
+#include <boost/numeric/ublas/matrix.hpp>
+#include <boost/numeric/ublas/io.hpp>
+
+#include <boost/geometry.hpp>
+#include <boost/geometry/geometries/point_xy.hpp>
+#include <boost/geometry/geometries/polygon.hpp>
+#include <boost/geometry/geometries/adapted/c_array.hpp>
+
+#include "mail.h"
+
+BOOST_GEOMETRY_REGISTER_C_ARRAY_CS(cs::cartesian)
+
+typedef boost::geometry::model::polygon<boost::geometry::model::d2::point_xy<double> > Polygon;
+
+
+using namespace std;
+
+/*=======================================================================
+STATIC EVALUATION PARAMETERS
+=======================================================================*/
+
+// holds the number of test images on the server
+const int32_t N_TESTIMAGES = 7518;
+
+// easy, moderate and hard evaluation level
+enum DIFFICULTY{EASY=0, MODERATE=1, HARD=2};
+
+// evaluation metrics: image, ground or 3D
+enum METRIC{IMAGE=0, GROUND=1, BOX3D=2};
+
+// evaluation parameter
+const int32_t MIN_HEIGHT[3]     = {40, 25, 25};     // minimum height for evaluated groundtruth/detections
+const int32_t MAX_OCCLUSION[3]  = {0, 1, 2};        // maximum occlusion level of the groundtruth used for evaluation
+const double  MAX_TRUNCATION[3] = {0.15, 0.3, 0.5}; // maximum truncation level of the groundtruth used for evaluation
+
+// evaluated object classes
+enum CLASSES{CAR=0, PEDESTRIAN=1, CYCLIST=2};
+const int NUM_CLASS = 3;
+
+// parameters varying per class
+vector<string> CLASS_NAMES;
+// the minimum overlap required for 2D evaluation on the image/ground plane and 3D evaluation
+const double MIN_OVERLAP[3][3] = {{0.7, 0.5, 0.5}, {0.5, 0.25, 0.25}, {0.5, 0.25, 0.25}};
+// const double MIN_OVERLAP[3][3] = {{0.7, 0.5, 0.5}, {0.7, 0.5, 0.5}, {0.7, 0.5, 0.5}};
+
+// no. of recall steps that should be evaluated (discretized)
+const double N_SAMPLE_PTS = 41;
+
+
+// initialize class names
+void initGlobals () {
+  CLASS_NAMES.push_back("car");
+  CLASS_NAMES.push_back("pedestrian");
+  CLASS_NAMES.push_back("cyclist");
+}
+
+/*=======================================================================
+DATA TYPES FOR EVALUATION
+=======================================================================*/
+
+// holding data needed for precision-recall and precision-aos
+struct tPrData {
+  vector<double> v;           // detection score for computing score thresholds
+  double         similarity;  // orientation similarity
+  int32_t        tp;          // true positives
+  int32_t        fp;          // false positives
+  int32_t        fn;          // false negatives
+  tPrData () :
+    similarity(0), tp(0), fp(0), fn(0) {}
+};
+
+// holding bounding boxes for ground truth and detections
+struct tBox {
+  string  type;     // object type as car, pedestrian or cyclist,...
+  double   x1;      // left corner
+  double   y1;      // top corner
+  double   x2;      // right corner
+  double   y2;      // bottom corner
+  double   alpha;   // image orientation
+  tBox (string type, double x1,double y1,double x2,double y2,double alpha) :
+    type(type),x1(x1),y1(y1),x2(x2),y2(y2),alpha(alpha) {}
+};
+
+// holding ground truth data
+struct tGroundtruth {
+  tBox    box;        // object type, box, orientation
+  double  truncation; // truncation 0..1
+  int32_t occlusion;  // occlusion 0,1,2 (non, partly, fully)
+  double ry;
+  double  t1, t2, t3;
+  double h, w, l;
+  tGroundtruth () :
+    box(tBox("invalild",-1,-1,-1,-1,-10)),truncation(-1),occlusion(-1) {}
+  tGroundtruth (tBox box,double truncation,int32_t occlusion) :
+    box(box),truncation(truncation),occlusion(occlusion) {}
+  tGroundtruth (string type,double x1,double y1,double x2,double y2,double alpha,double truncation,int32_t occlusion) :
+    box(tBox(type,x1,y1,x2,y2,alpha)),truncation(truncation),occlusion(occlusion) {}
+};
+
+// holding detection data
+struct tDetection {
+  tBox    box;    // object type, box, orientation
+  double  thresh; // detection score
+  double  ry;
+  double  t1, t2, t3;
+  double  h, w, l;
+  tDetection ():
+    box(tBox("invalid",-1,-1,-1,-1,-10)),thresh(-1000) {}
+  tDetection (tBox box,double thresh) :
+    box(box),thresh(thresh) {}
+  tDetection (string type,double x1,double y1,double x2,double y2,double alpha,double thresh) :
+    box(tBox(type,x1,y1,x2,y2,alpha)),thresh(thresh) {}
+};
+
+
+/*=======================================================================
+FUNCTIONS TO LOAD DETECTION AND GROUND TRUTH DATA ONCE, SAVE RESULTS
+=======================================================================*/
+vector<int32_t> indices;
+
+vector<tDetection> loadDetections(string file_name, bool &compute_aos,
+        vector<bool> &eval_image, vector<bool> &eval_ground,
+        vector<bool> &eval_3d, bool &success) {
+
+  // holds all detections (ignored detections are indicated by an index vector
+  vector<tDetection> detections;
+  FILE *fp = fopen(file_name.c_str(),"r");
+  if (!fp) {
+    success = false;
+    return detections;
+  }
+  while (!feof(fp)) {
+    tDetection d;
+    double trash;
+    char str[255];
+    if (fscanf(fp, "%s %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf",
+                   str, &trash, &trash, &d.box.alpha, &d.box.x1, &d.box.y1,
+                   &d.box.x2, &d.box.y2, &d.h, &d.w, &d.l, &d.t1, &d.t2, &d.t3,
+                   &d.ry, &d.thresh)==16) {
+
+        // d.thresh = 1;
+      d.box.type = str;
+      detections.push_back(d);
+
+      // orientation=-10 is invalid, AOS is not evaluated if at least one orientation is invalid
+      if(d.box.alpha == -10)
+        compute_aos = false;
+
+      // a class is only evaluated if it is detected at least once
+      for (int c = 0; c < NUM_CLASS; c++) {
+        if (!strcasecmp(d.box.type.c_str(), CLASS_NAMES[c].c_str())) {
+          if (!eval_image[c] && d.box.x1 >= 0)
+            eval_image[c] = true;
+          if (!eval_ground[c] && d.t1 != -1000)
+            eval_ground[c] = true;
+          if (!eval_3d[c] && d.t2 != -1000)
+            eval_3d[c] = true;
+          break;
+        }
+      }
+    }
+  }
+  fclose(fp);
+  success = true;
+  return detections;
+}
+
+vector<tGroundtruth> loadGroundtruth(string file_name,bool &success) {
+
+  // holds all ground truth (ignored ground truth is indicated by an index vector
+  vector<tGroundtruth> groundtruth;
+  FILE *fp = fopen(file_name.c_str(),"r");
+  if (!fp) {
+    success = false;
+    return groundtruth;
+  }
+  while (!feof(fp)) {
+    tGroundtruth g;
+    char str[255];
+    if (fscanf(fp, "%s %lf %d %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf",
+                   str, &g.truncation, &g.occlusion, &g.box.alpha,
+                   &g.box.x1,   &g.box.y1,     &g.box.x2,    &g.box.y2,
+                   &g.h,      &g.w,        &g.l,       &g.t1,
+                   &g.t2,      &g.t3,        &g.ry )==15) {
+      g.box.type = str;
+      groundtruth.push_back(g);
+    }
+  }
+  fclose(fp);
+  success = true;
+  return groundtruth;
+}
+
+void saveStats (const vector<double> &precision, const vector<double> &aos, FILE *fp_det, FILE *fp_ori) {
+
+  // save precision to file
+  if(precision.empty())
+    return;
+  for (int32_t i=0; i<precision.size(); i++)
+    fprintf(fp_det,"%f ",precision[i]);
+  fprintf(fp_det,"\n");
+
+  // save orientation similarity, only if there were no invalid orientation entries in submission (alpha=-10)
+  if(aos.empty())
+    return;
+  for (int32_t i=0; i<aos.size(); i++)
+    fprintf(fp_ori,"%f ",aos[i]);
+  fprintf(fp_ori,"\n");
+}
+
+/*=======================================================================
+EVALUATION HELPER FUNCTIONS
+=======================================================================*/
+
+// criterion defines whether the overlap is computed with respect to both areas (ground truth and detection)
+// or with respect to box a or b (detection and "dontcare" areas)
+inline double imageBoxOverlap(tBox a, tBox b, int32_t criterion=-1){
+
+  // overlap is invalid in the beginning
+  double o = -1;
+
+  // get overlapping area
+  double x1 = max(a.x1, b.x1);
+  double y1 = max(a.y1, b.y1);
+  double x2 = min(a.x2, b.x2);
+  double y2 = min(a.y2, b.y2);
+
+  // compute width and height of overlapping area
+  double w = x2-x1;
+  double h = y2-y1;
+
+  // set invalid entries to 0 overlap
+  if(w<=0 || h<=0)
+    return 0;
+
+  // get overlapping areas
+  double inter = w*h;
+  double a_area = (a.x2-a.x1) * (a.y2-a.y1);
+  double b_area = (b.x2-b.x1) * (b.y2-b.y1);
+
+  // intersection over union overlap depending on users choice
+  if(criterion==-1)     // union
+    o = inter / (a_area+b_area-inter);
+  else if(criterion==0) // bbox_a
+    o = inter / a_area;
+  else if(criterion==1) // bbox_b
+    o = inter / b_area;
+
+  // overlap
+  return o;
+}
+
+inline double imageBoxOverlap(tDetection a, tGroundtruth b, int32_t criterion=-1){
+  return imageBoxOverlap(a.box, b.box, criterion);
+}
+
+// compute polygon of an oriented bounding box
+template <typename T>
+Polygon toPolygon(const T& g) {
+    using namespace boost::numeric::ublas;
+    using namespace boost::geometry;
+    matrix<double> mref(2, 2);
+    mref(0, 0) = cos(g.ry); mref(0, 1) = sin(g.ry);
+    mref(1, 0) = -sin(g.ry); mref(1, 1) = cos(g.ry);
+
+    static int count = 0;
+    matrix<double> corners(2, 4);
+    double data[] = {g.l / 2, g.l / 2, -g.l / 2, -g.l / 2,
+                     g.w / 2, -g.w / 2, -g.w / 2, g.w / 2};
+    std::copy(data, data + 8, corners.data().begin());
+    matrix<double> gc = prod(mref, corners);
+    for (int i = 0; i < 4; ++i) {
+        gc(0, i) += g.t1;
+        gc(1, i) += g.t3;
+    }
+
+    double points[][2] = {{gc(0, 0), gc(1, 0)},{gc(0, 1), gc(1, 1)},{gc(0, 2), gc(1, 2)},{gc(0, 3), gc(1, 3)},{gc(0, 0), gc(1, 0)}};
+    Polygon poly;
+    append(poly, points);
+    return poly;
+}
+
+// measure overlap between bird's eye view bounding boxes, parametrized by (ry, l, w, tx, tz)
+inline double groundBoxOverlap(tDetection d, tGroundtruth g, int32_t criterion = -1) {
+    using namespace boost::geometry;
+    Polygon gp = toPolygon(g);
+    Polygon dp = toPolygon(d);
+
+    std::vector<Polygon> in, un;
+    intersection(gp, dp, in);
+    union_(gp, dp, un);
+
+    double inter_area = in.empty() ? 0 : area(in.front());
+    double union_area = area(un.front());
+    double o;
+    if(criterion==-1)     // union
+        o = inter_area / union_area;
+    else if(criterion==0) // bbox_a
+        o = inter_area / area(dp);
+    else if(criterion==1) // bbox_b
+        o = inter_area / area(gp);
+
+    return o;
+}
+
+// measure overlap between 3D bounding boxes, parametrized by (ry, h, w, l, tx, ty, tz)
+inline double box3DOverlap(tDetection d, tGroundtruth g, int32_t criterion = -1) {
+    using namespace boost::geometry;
+    Polygon gp = toPolygon(g);
+    Polygon dp = toPolygon(d);
+
+    std::vector<Polygon> in, un;
+    intersection(gp, dp, in);
+    union_(gp, dp, un);
+
+    double ymax = min(d.t2, g.t2);
+    double ymin = max(d.t2 - d.h, g.t2 - g.h);
+
+    double inter_area = in.empty() ? 0 : area(in.front());
+    double inter_vol = inter_area * max(0.0, ymax - ymin);
+
+    double det_vol = d.h * d.l * d.w;
+    double gt_vol = g.h * g.l * g.w;
+
+    double o;
+    if(criterion==-1)     // union
+        o = inter_vol / (det_vol + gt_vol - inter_vol);
+    else if(criterion==0) // bbox_a
+        o = inter_vol / det_vol;
+    else if(criterion==1) // bbox_b
+        o = inter_vol / gt_vol;
+
+    return o;
+}
+
+vector<double> getThresholds(vector<double> &v, double n_groundtruth){
+
+  // holds scores needed to compute N_SAMPLE_PTS recall values
+  vector<double> t;
+
+  // sort scores in descending order
+  // (highest score is assumed to give best/most confident detections)
+  sort(v.begin(), v.end(), greater<double>());
+
+  // get scores for linearly spaced recall
+  double current_recall = 0;
+  for(int32_t i=0; i<v.size(); i++){
+
+    // check if right-hand-side recall with respect to current recall is close than left-hand-side one
+    // in this case, skip the current detection score
+    double l_recall, r_recall, recall;
+    l_recall = (double)(i+1)/n_groundtruth;
+    if(i<(v.size()-1))
+      r_recall = (double)(i+2)/n_groundtruth;
+    else
+      r_recall = l_recall;
+
+    if( (r_recall-current_recall) < (current_recall-l_recall) && i<(v.size()-1))
+      continue;
+
+    // left recall is the best approximation, so use this and goto next recall step for approximation
+    recall = l_recall;
+
+    // the next recall step was reached
+    t.push_back(v[i]);
+    current_recall += 1.0/(N_SAMPLE_PTS-1.0);
+  }
+  return t;
+}
+
+void cleanData(CLASSES current_class, const vector<tGroundtruth> &gt, const vector<tDetection> &det, vector<int32_t> &ignored_gt, vector<tGroundtruth> &dc, vector<int32_t> &ignored_det, int32_t &n_gt, DIFFICULTY difficulty){
+
+  // extract ground truth bounding boxes for current evaluation class
+  for(int32_t i=0;i<gt.size(); i++){
+
+    // only bounding boxes with a minimum height are used for evaluation
+    double height = gt[i].box.y2 - gt[i].box.y1;
+
+    // neighboring classes are ignored ("van" for "car" and "person_sitting" for "pedestrian")
+    // (lower/upper cases are ignored)
+    int32_t valid_class;
+
+    // all classes without a neighboring class
+    if(!strcasecmp(gt[i].box.type.c_str(), CLASS_NAMES[current_class].c_str()))
+      valid_class = 1;
+
+    // classes with a neighboring class
+    else if(!strcasecmp(CLASS_NAMES[current_class].c_str(), "Pedestrian") && !strcasecmp("Person_sitting", gt[i].box.type.c_str()))
+      valid_class = 0;
+    else if(!strcasecmp(CLASS_NAMES[current_class].c_str(), "Car") && !strcasecmp("Van", gt[i].box.type.c_str()))
+      valid_class = 0;
+
+    // classes not used for evaluation
+    else
+      valid_class = -1;
+
+    // ground truth is ignored, if occlusion, truncation exceeds the difficulty or ground truth is too small
+    // (doesn't count as FN nor TP, although detections may be assigned)
+    bool ignore = false;
+    if(gt[i].occlusion>MAX_OCCLUSION[difficulty] || gt[i].truncation>MAX_TRUNCATION[difficulty] || height<MIN_HEIGHT[difficulty])
+      ignore = true;
+
+    // set ignored vector for ground truth
+    // current class and not ignored (total no. of ground truth is detected for recall denominator)
+    if(valid_class==1 && !ignore){
+      ignored_gt.push_back(0);
+      n_gt++;
+    }
+
+    // neighboring class, or current class but ignored
+    else if(valid_class==0 || (ignore && valid_class==1))
+      ignored_gt.push_back(1);
+
+    // all other classes which are FN in the evaluation
+    else
+      ignored_gt.push_back(-1);
+  }
+
+  // extract dontcare areas
+  for(int32_t i=0;i<gt.size(); i++)
+    if(!strcasecmp("DontCare", gt[i].box.type.c_str()))
+      dc.push_back(gt[i]);
+
+  // extract detections bounding boxes of the current class
+  for(int32_t i=0;i<det.size(); i++){
+
+    // neighboring classes are not evaluated
+    int32_t valid_class;
+    if(!strcasecmp(det[i].box.type.c_str(), CLASS_NAMES[current_class].c_str()))
+      valid_class = 1;
+    else
+      valid_class = -1;
+
+    int32_t height = fabs(det[i].box.y1 - det[i].box.y2);
+
+    // set ignored vector for detections
+    if(height<MIN_HEIGHT[difficulty])
+      ignored_det.push_back(1);
+    else if(valid_class==1)
+      ignored_det.push_back(0);
+    else
+      ignored_det.push_back(-1);
+  }
+}
+
+tPrData computeStatistics(CLASSES current_class, const vector<tGroundtruth> &gt,
+        const vector<tDetection> &det, const vector<tGroundtruth> &dc,
+        const vector<int32_t> &ignored_gt, const vector<int32_t>  &ignored_det,
+        bool compute_fp, double (*boxoverlap)(tDetection, tGroundtruth, int32_t),
+        METRIC metric, bool compute_aos=false, double thresh=0, bool debug=false){
+
+  tPrData stat = tPrData();
+  const double NO_DETECTION = -10000000;
+  vector<double> delta;            // holds angular difference for TPs (needed for AOS evaluation)
+  vector<bool> assigned_detection; // holds wether a detection was assigned to a valid or ignored ground truth
+  assigned_detection.assign(det.size(), false);
+  vector<bool> ignored_threshold;
+  ignored_threshold.assign(det.size(), false); // holds detections with a threshold lower than thresh if FP are computed
+
+  // detections with a low score are ignored for computing precision (needs FP)
+  if(compute_fp)
+    for(int32_t i=0; i<det.size(); i++)
+      if(det[i].thresh<thresh)
+        ignored_threshold[i] = true;
+
+  // evaluate all ground truth boxes
+  for(int32_t i=0; i<gt.size(); i++){
+
+    // this ground truth is not of the current or a neighboring class and therefore ignored
+    if(ignored_gt[i]==-1)
+      continue;
+
+    /*=======================================================================
+    find candidates (overlap with ground truth > 0.5) (logical len(det))
+    =======================================================================*/
+    int32_t det_idx          = -1;
+    double valid_detection = NO_DETECTION;
+    double max_overlap     = 0;
+
+    // search for a possible detection
+    bool assigned_ignored_det = false;
+    for(int32_t j=0; j<det.size(); j++){
+
+      // detections not of the current class, already assigned or with a low threshold are ignored
+      if(ignored_det[j]==-1)
+        continue;
+      if(assigned_detection[j])
+        continue;
+      if(ignored_threshold[j])
+        continue;
+
+      // find the maximum score for the candidates and get idx of respective detection
+      double overlap = boxoverlap(det[j], gt[i], -1);
+
+      // for computing recall thresholds, the candidate with highest score is considered
+      if(!compute_fp && overlap>MIN_OVERLAP[metric][current_class] && det[j].thresh>valid_detection){
+        det_idx         = j;
+        valid_detection = det[j].thresh;
+      }
+
+      // for computing pr curve values, the candidate with the greatest overlap is considered
+      // if the greatest overlap is an ignored detection (min_height), the overlapping detection is used
+      else if(compute_fp && overlap>MIN_OVERLAP[metric][current_class] && (overlap>max_overlap || assigned_ignored_det) && ignored_det[j]==0){
+        max_overlap     = overlap;
+        det_idx         = j;
+        valid_detection = 1;
+        assigned_ignored_det = false;
+      }
+      else if(compute_fp && overlap>MIN_OVERLAP[metric][current_class] && valid_detection==NO_DETECTION && ignored_det[j]==1){
+        det_idx              = j;
+        valid_detection      = 1;
+        assigned_ignored_det = true;
+      }
+    }
+
+    /*=======================================================================
+    compute TP, FP and FN
+    =======================================================================*/
+
+    // nothing was assigned to this valid ground truth
+    if(valid_detection==NO_DETECTION && ignored_gt[i]==0) {
+      stat.fn++;
+    }
+
+    // only evaluate valid ground truth <=> detection assignments (considering difficulty level)
+    else if(valid_detection!=NO_DETECTION && (ignored_gt[i]==1 || ignored_det[det_idx]==1))
+      assigned_detection[det_idx] = true;
+
+    // found a valid true positive
+    else if(valid_detection!=NO_DETECTION){
+
+      // write highest score to threshold vector
+      stat.tp++;
+      stat.v.push_back(det[det_idx].thresh);
+
+      // compute angular difference of detection and ground truth if valid detection orientation was provided
+      if(compute_aos)
+        delta.push_back(gt[i].box.alpha - det[det_idx].box.alpha);
+
+      // clean up
+      assigned_detection[det_idx] = true;
+    }
+  }
+
+  // if FP are requested, consider stuff area
+  if(compute_fp){
+
+    // count fp
+    for(int32_t i=0; i<det.size(); i++){
+
+      // count false positives if required (height smaller than required is ignored (ignored_det==1)
+      if(!(assigned_detection[i] || ignored_det[i]==-1 || ignored_det[i]==1 || ignored_threshold[i]))
+        stat.fp++;
+    }
+
+    // do not consider detections overlapping with stuff area
+    int32_t nstuff = 0;
+    for(int32_t i=0; i<dc.size(); i++){
+      for(int32_t j=0; j<det.size(); j++){
+
+        // detections not of the current class, already assigned, with a low threshold or a low minimum height are ignored
+        if(assigned_detection[j])
+          continue;
+        if(ignored_det[j]==-1 || ignored_det[j]==1)
+          continue;
+        if(ignored_threshold[j])
+          continue;
+
+        // compute overlap and assign to stuff area, if overlap exceeds class specific value
+        double overlap = boxoverlap(det[j], dc[i], 0);
+        if(overlap>MIN_OVERLAP[metric][current_class]){
+          assigned_detection[j] = true;
+          nstuff++;
+        }
+      }
+    }
+
+    // FP = no. of all not to ground truth assigned detections - detections assigned to stuff areas
+    stat.fp -= nstuff;
+
+    // if all orientation values are valid, the AOS is computed
+    if(compute_aos){
+      vector<double> tmp;
+
+      // FP have a similarity of 0, for all TP compute AOS
+      tmp.assign(stat.fp, 0);
+      for(int32_t i=0; i<delta.size(); i++)
+        tmp.push_back((1.0+cos(delta[i]))/2.0);
+
+      // be sure, that all orientation deltas are computed
+      assert(tmp.size()==stat.fp+stat.tp);
+      assert(delta.size()==stat.tp);
+
+      // get the mean orientation similarity for this image
+      if(stat.tp>0 || stat.fp>0)
+        stat.similarity = accumulate(tmp.begin(), tmp.end(), 0.0);
+
+      // there was neither a FP nor a TP, so the similarity is ignored in the evaluation
+      else
+        stat.similarity = -1;
+    }
+  }
+  return stat;
+}
+
+/*=======================================================================
+EVALUATE CLASS-WISE
+=======================================================================*/
+
+bool eval_class (FILE *fp_det, FILE *fp_ori, CLASSES current_class,
+        const vector< vector<tGroundtruth> > &groundtruth,
+        const vector< vector<tDetection> > &detections, bool compute_aos,
+        double (*boxoverlap)(tDetection, tGroundtruth, int32_t),
+        vector<double> &precision, vector<double> &aos,
+        DIFFICULTY difficulty, METRIC metric) {
+    assert(groundtruth.size() == detections.size());
+
+  // init
+  int32_t n_gt=0;                                     // total no. of gt (denominator of recall)
+  vector<double> v, thresholds;                       // detection scores, evaluated for recall discretization
+  vector< vector<int32_t> > ignored_gt, ignored_det;  // index of ignored gt detection for current class/difficulty
+  vector< vector<tGroundtruth> > dontcare;            // index of dontcare areas, included in ground truth
+
+  // for all test images do
+  for (int32_t i=0; i<groundtruth.size(); i++){
+
+    // holds ignored ground truth, ignored detections and dontcare areas for current frame
+    vector<int32_t> i_gt, i_det;
+    vector<tGroundtruth> dc;
+
+    // only evaluate objects of current class and ignore occluded, truncated objects
+    cleanData(current_class, groundtruth[i], detections[i], i_gt, dc, i_det, n_gt, difficulty);
+    ignored_gt.push_back(i_gt);
+    ignored_det.push_back(i_det);
+    dontcare.push_back(dc);
+
+    // compute statistics to get recall values
+    tPrData pr_tmp = tPrData();
+    pr_tmp = computeStatistics(current_class, groundtruth[i], detections[i], dc, i_gt, i_det, false, boxoverlap, metric);
+
+    // add detection scores to vector over all images
+    for(int32_t j=0; j<pr_tmp.v.size(); j++)
+      v.push_back(pr_tmp.v[j]);
+  }
+
+  // get scores that must be evaluated for recall discretization
+  thresholds = getThresholds(v, n_gt);
+
+  // compute TP,FP,FN for relevant scores
+  vector<tPrData> pr;
+  pr.assign(thresholds.size(),tPrData());
+  for (int32_t i=0; i<groundtruth.size(); i++){
+
+    // for all scores/recall thresholds do:
+    for(int32_t t=0; t<thresholds.size(); t++){
+      tPrData tmp = tPrData();
+      tmp = computeStatistics(current_class, groundtruth[i], detections[i], dontcare[i],
+                              ignored_gt[i], ignored_det[i], true, boxoverlap, metric,
+                              compute_aos, thresholds[t], t==38);
+
+      // add no. of TP, FP, FN, AOS for current frame to total evaluation for current threshold
+      pr[t].tp += tmp.tp;
+      pr[t].fp += tmp.fp;
+      pr[t].fn += tmp.fn;
+      if(tmp.similarity!=-1)
+        pr[t].similarity += tmp.similarity;
+    }
+  }
+
+  // compute recall, precision and AOS
+  vector<double> recall;
+  precision.assign(N_SAMPLE_PTS, 0);
+  if(compute_aos)
+    aos.assign(N_SAMPLE_PTS, 0);
+  double r=0;
+  for (int32_t i=0; i<thresholds.size(); i++){
+    r = pr[i].tp/(double)(pr[i].tp + pr[i].fn);
+    recall.push_back(r);
+    precision[i] = pr[i].tp/(double)(pr[i].tp + pr[i].fp);
+    if(compute_aos)
+      aos[i] = pr[i].similarity/(double)(pr[i].tp + pr[i].fp);
+  }
+
+  // filter precision and AOS using max_{i..end}(precision)
+  for (int32_t i=0; i<thresholds.size(); i++){
+    precision[i] = *max_element(precision.begin()+i, precision.end());
+    if(compute_aos)
+      aos[i] = *max_element(aos.begin()+i, aos.end());
+  }
+
+  // save statisics and finish with success
+  saveStats(precision, aos, fp_det, fp_ori);
+    return true;
+}
+
+void saveAndPlotPlots(string dir_name,string file_name,string obj_type,vector<double> vals[],bool is_aos){
+
+  char command[1024];
+
+  // save plot data to file
+  FILE *fp = fopen((dir_name + "/" + file_name + ".txt").c_str(),"w");
+  printf("save %s\n", (dir_name + "/" + file_name + ".txt").c_str());
+  for (int32_t i=0; i<(int)N_SAMPLE_PTS; i++)
+    fprintf(fp,"%f %f %f %f\n",(double)i/(N_SAMPLE_PTS-1.0),vals[0][i],vals[1][i],vals[2][i]);
+  fclose(fp);
+
+  float sum[3] = {0, 0, 0};
+  for (int v = 0; v < 3; ++v)
+      for (int i = 0; i < vals[v].size(); i = i + 4)
+          sum[v] += vals[v][i];
+  printf("%s AP: %f %f %f\n", file_name.c_str(), sum[0] / 11 * 100, sum[1] / 11 * 100, sum[2] / 11 * 100);
+
+
+  // create png + eps
+  for (int32_t j=0; j<2; j++) {
+
+    // open file
+    FILE *fp = fopen((dir_name + "/" + file_name + ".gp").c_str(),"w");
+
+    // save gnuplot instructions
+    if (j==0) {
+      fprintf(fp,"set term png size 450,315 font \"Helvetica\" 11\n");
+      fprintf(fp,"set output \"%s.png\"\n",file_name.c_str());
+    } else {
+      fprintf(fp,"set term postscript eps enhanced color font \"Helvetica\" 20\n");
+      fprintf(fp,"set output \"%s.eps\"\n",file_name.c_str());
+    }
+
+    // set labels and ranges
+    fprintf(fp,"set size ratio 0.7\n");
+    fprintf(fp,"set xrange [0:1]\n");
+    fprintf(fp,"set yrange [0:1]\n");
+    fprintf(fp,"set xlabel \"Recall\"\n");
+    if (!is_aos) fprintf(fp,"set ylabel \"Precision\"\n");
+    else         fprintf(fp,"set ylabel \"Orientation Similarity\"\n");
+    obj_type[0] = toupper(obj_type[0]);
+    fprintf(fp,"set title \"%s\"\n",obj_type.c_str());
+
+    // line width
+    int32_t   lw = 5;
+    if (j==0) lw = 3;
+
+    // plot error curve
+    fprintf(fp,"plot ");
+    fprintf(fp,"\"%s.txt\" using 1:2 title 'Easy' with lines ls 1 lw %d,",file_name.c_str(),lw);
+    fprintf(fp,"\"%s.txt\" using 1:3 title 'Moderate' with lines ls 2 lw %d,",file_name.c_str(),lw);
+    fprintf(fp,"\"%s.txt\" using 1:4 title 'Hard' with lines ls 3 lw %d",file_name.c_str(),lw);
+
+    // close file
+    fclose(fp);
+
+    // run gnuplot => create png + eps
+    sprintf(command,"cd %s; gnuplot %s",dir_name.c_str(),(file_name + ".gp").c_str());
+    system(command);
+  }
+
+  // create pdf and crop
+  sprintf(command,"cd %s; ps2pdf %s.eps %s_large.pdf",dir_name.c_str(),file_name.c_str(),file_name.c_str());
+  system(command);
+  sprintf(command,"cd %s; pdfcrop %s_large.pdf %s.pdf",dir_name.c_str(),file_name.c_str(),file_name.c_str());
+  system(command);
+  sprintf(command,"cd %s; rm %s_large.pdf",dir_name.c_str(),file_name.c_str());
+  system(command);
+}
+
+vector<int32_t> getEvalIndices(const string& result_dir) {
+
+    DIR* dir;
+    dirent* entity;
+    dir = opendir(result_dir.c_str());
+    if (dir) {
+        while (entity = readdir(dir)) {
+            string path(entity->d_name);
+            int32_t len = path.size();
+            if (len < 10) continue;
+            int32_t index = atoi(path.substr(len - 10, 10).c_str());
+            indices.push_back(index);
+        }
+    }
+    return indices;
+}
+
+bool eval(string gt_dir, string result_dir, Mail* mail){
+
+  // set some global parameters
+  initGlobals();
+
+  // ground truth and result directories
+  // string gt_dir         = "data/object/label_2";
+  // string result_dir     = "results/" + result_sha;
+  string plot_dir       = result_dir + "/../plot";
+
+  // create output directories
+  system(("mkdir " + plot_dir).c_str());
+
+  // hold detections and ground truth in memory
+  vector< vector<tGroundtruth> > groundtruth;
+  vector< vector<tDetection> >   detections;
+
+  // holds wether orientation similarity shall be computed (might be set to false while loading detections)
+  // and which labels where provided by this submission
+  bool compute_aos=true;
+  vector<bool> eval_image(NUM_CLASS, false);
+  vector<bool> eval_ground(NUM_CLASS, false);
+  vector<bool> eval_3d(NUM_CLASS, false);
+
+  // for all images read groundtruth and detections
+  mail->msg("Loading detections...");
+  std::vector<int32_t> indices = getEvalIndices(result_dir);
+  printf("number of files for evaluation: %d\n", (int)indices.size());
+
+  for (int32_t i=0; i<indices.size(); i++) {
+
+    // file name
+    char file_name[256];
+    sprintf(file_name,"%06d.txt",indices.at(i));
+
+    // read ground truth and result poses
+    bool gt_success,det_success;
+    vector<tGroundtruth> gt   = loadGroundtruth(gt_dir + "/" + file_name,gt_success);
+    vector<tDetection>   det  = loadDetections(result_dir + file_name,
+            compute_aos, eval_image, eval_ground, eval_3d, det_success);
+    groundtruth.push_back(gt);
+    detections.push_back(det);
+
+    // check for errors
+    if (!gt_success) {
+      mail->msg("ERROR: Couldn't read: %s of ground truth. Please write me an email!", file_name);
+      return false;
+    }
+    if (!det_success) {
+      mail->msg("ERROR: Couldn't read: %s", file_name);
+      return false;
+    }
+  }
+  mail->msg("  done.");
+
+  // holds pointers for result files
+  FILE *fp_det=0, *fp_ori=0;
+
+  // eval image 2D bounding boxes
+  for (int c = 0; c < NUM_CLASS; c++) {
+    CLASSES cls = (CLASSES)c;
+    if (eval_image[c]) {
+      fp_det = fopen((result_dir + "/../stats_" + CLASS_NAMES[c] + "_detection.txt").c_str(), "w");
+      if(compute_aos)
+        fp_ori = fopen((result_dir + "/../stats_" + CLASS_NAMES[c] + "_orientation.txt").c_str(),"w");
+      vector<double> precision[3], aos[3];
+      if(   !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, imageBoxOverlap, precision[0], aos[0], EASY, IMAGE)
+         || !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, imageBoxOverlap, precision[1], aos[1], MODERATE, IMAGE)
+         || !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, imageBoxOverlap, precision[2], aos[2], HARD, IMAGE)) {
+        mail->msg("%s evaluation failed.", CLASS_NAMES[c].c_str());
+        return false;
+      }
+      fclose(fp_det);
+      saveAndPlotPlots(plot_dir, CLASS_NAMES[c] + "_detection", CLASS_NAMES[c], precision, 0);
+      if(compute_aos){
+        saveAndPlotPlots(plot_dir, CLASS_NAMES[c] + "_orientation", CLASS_NAMES[c], aos, 1);
+        fclose(fp_ori);
+      }
+    }
+  }
+
+  // don't evaluate AOS for birdview boxes and 3D boxes
+  compute_aos = false;
+
+  // eval bird's eye view bounding boxes
+  for (int c = 0; c < NUM_CLASS; c++) {
+    CLASSES cls = (CLASSES)c;
+    if (eval_ground[c]) {
+      fp_det = fopen((result_dir + "/../stats_" + CLASS_NAMES[c] + "_detection_ground.txt").c_str(), "w");
+      vector<double> precision[3], aos[3];
+      if(   !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, groundBoxOverlap, precision[0], aos[0], EASY, GROUND)
+         || !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, groundBoxOverlap, precision[1], aos[1], MODERATE, GROUND)
+         || !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, groundBoxOverlap, precision[2], aos[2], HARD, GROUND)) {
+        mail->msg("%s evaluation failed.", CLASS_NAMES[c].c_str());
+        return false;
+      }
+      fclose(fp_det);
+      saveAndPlotPlots(plot_dir, CLASS_NAMES[c] + "_detection_ground", CLASS_NAMES[c], precision, 0);
+    }
+  }
+
+  // eval 3D bounding boxes
+  for (int c = 0; c < NUM_CLASS; c++) {
+    CLASSES cls = (CLASSES)c;
+    if (eval_3d[c]) {
+      fp_det = fopen((result_dir + "/../stats_" + CLASS_NAMES[c] + "_detection_3d.txt").c_str(), "w");
+      vector<double> precision[3], aos[3];
+      if(   !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, box3DOverlap, precision[0], aos[0], EASY, BOX3D)
+         || !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, box3DOverlap, precision[1], aos[1], MODERATE, BOX3D)
+         || !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, box3DOverlap, precision[2], aos[2], HARD, BOX3D)) {
+        mail->msg("%s evaluation failed.", CLASS_NAMES[c].c_str());
+        return false;
+      }
+      fclose(fp_det);
+      saveAndPlotPlots(plot_dir, CLASS_NAMES[c] + "_detection_3d", CLASS_NAMES[c], precision, 0);
+    }
+  }
+
+  // success
+  return true;
+}
+
+int32_t main (int32_t argc,char *argv[]) {
+
+  // we need 2 or 4 arguments!
+  if (argc!=3) {
+    cout << "Usage: ./eval_detection_3d_offline gt_dir result_dir" << endl;
+    return 1;
+  }
+
+  // read arguments
+  string gt_dir = argv[1];
+  string result_dir = argv[2];
+
+  // init notification mail
+  Mail *mail;
+  mail = new Mail();
+  mail->msg("Thank you for participating in our evaluation!");
+
+  // run evaluation
+  if (eval(gt_dir, result_dir, mail)) {
+    mail->msg("Your evaluation results are available at:");
+    mail->msg(result_dir.c_str());
+  } else {
+    system(("rm -r " + result_dir + "/../plot").c_str());
+    mail->msg("An error occured while processing your results.");
+  }
+
+  // send mail and exit
+  delete mail;
+
+  return 0;
+}
+
+
--- a/src/tools/kitti_eval/mail.h
+++ b/src/tools/kitti_eval/mail.h
+#ifndef MAIL_H
+#define MAIL_H
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+
+class Mail {
+
+public:
+
+  Mail (std::string email = "") {
+    if (email.compare("")) {
+      mail = popen("/usr/lib/sendmail -t -f noreply@cvlibs.net","w");
+      fprintf(mail,"To: %s\n", email.c_str());
+      fprintf(mail,"From: noreply@cvlibs.net\n");
+      fprintf(mail,"Subject: KITTI Evaluation Benchmark\n");
+      fprintf(mail,"\n\n");
+    } else {
+      mail = 0;
+    }
+  }
+  
+  ~Mail() {
+    if (mail) {
+      pclose(mail);
+    }
+  }
+  
+  void msg (const char *format, ...) {
+    va_list args;
+    va_start(args,format);
+    if (mail) {
+      vfprintf(mail,format,args);
+      fprintf(mail,"\n");
+    }
+    vprintf(format,args);
+    printf("\n");
+    va_end(args);
+  }
+    
+private:
+
+  FILE *mail;
+  
+};
+
+#endif
--- a/src/tools/merge_pascal_json.py
+++ b/src/tools/merge_pascal_json.py
+import json
+
+# ANNOT_PATH = '/home/zxy/Datasets/VOC/annotations/'
+ANNOT_PATH = 'voc/annotations/'
+OUT_PATH = ANNOT_PATH
+# INPUT_FILES = ['pascal_train2012.json', 'pascal_val2012.json',
+#                'pascal_train2007.json', 'pascal_val2007.json']
+INPUT_FILES = ['pascal_train2007.json', 'pascal_val2007.json']
+OUTPUT_FILE = 'pascal_trainval0712.json'
+KEYS = ['images', 'type', 'annotations', 'categories']
+MERGE_KEYS = ['images', 'annotations']
+
+out = {}
+tot_anns = 0
+for i, file_name in enumerate(INPUT_FILES):
+  data = json.load(open(ANNOT_PATH + file_name, 'r'))
+  print('keys', data.keys())
+  if i == 0:
+    for key in KEYS:
+      out[key] = data[key]
+      print(file_name, key, len(data[key]))
+  else:
+    out['images'] += data['images']
+    for j in range(len(data['annotations'])):
+      data['annotations'][j]['id'] += tot_anns
+    out['annotations'] += data['annotations']
+    print(file_name, 'images', len(data['images']))
+    print(file_name, 'annotations', len(data['annotations']))
+  tot_anns = len(out['annotations'])
+print('tot', len(out['annotations']))
+json.dump(out, open(OUT_PATH + OUTPUT_FILE, 'w'))