First Commit.

b952e97b · chenych · b952e97b · b952e97b · b952e97b · b952e97b
Commit b952e97b authored Nov 03, 2023 by chenych
15 changed files
--- a/src/tools/voc_eval_lib/nms/gpu_nms.cpp
+++ b/src/tools/voc_eval_lib/nms/gpu_nms.cpp
--- a/src/tools/voc_eval_lib/nms/gpu_nms.hpp
+++ b/src/tools/voc_eval_lib/nms/gpu_nms.hpp
+void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
+          int boxes_dim, float nms_overlap_thresh, int device_id);
--- a/src/tools/voc_eval_lib/nms/gpu_nms.pyx
+++ b/src/tools/voc_eval_lib/nms/gpu_nms.pyx
+# --------------------------------------------------------
+# Faster R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick
+# --------------------------------------------------------
+import numpy as np
+cimport numpy as np
+assert sizeof(int) == sizeof(np.int32_t)
+cdef extern from "gpu_nms.hpp":
+    void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
+def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
+            np.int32_t device_id=0):
+    cdef int boxes_num = dets.shape[0]
+    cdef int boxes_dim = dets.shape[1]
+    cdef int num_out
+    cdef np.ndarray[np.int32_t, ndim=1] \
+        keep = np.zeros(boxes_num, dtype=np.int32)
+    cdef np.ndarray[np.float32_t, ndim=1] \
+        scores = dets[:, 4]
+    cdef np.ndarray[np.int_t, ndim=1] \
+        order = scores.argsort()[::-1]
+    cdef np.ndarray[np.float32_t, ndim=2] \
+        sorted_dets = dets[order, :]
+    _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
+    keep = keep[:num_out]
+    return list(order[keep])
--- a/src/tools/voc_eval_lib/nms/nms_kernel.cu
+++ b/src/tools/voc_eval_lib/nms/nms_kernel.cu
+// ------------------------------------------------------------------
+// Faster R-CNN
+// Copyright (c) 2015 Microsoft
+// Licensed under The MIT License [see fast-rcnn/LICENSE for details]
+// Written by Shaoqing Ren
+// ------------------------------------------------------------------
+#include "gpu_nms.hpp"
+#include <vector>
+#include <iostream>
+#define CUDA_CHECK(condition) \
+  /* Code block avoids redefinition of cudaError_t error */ \
+  do { \
+    cudaError_t error = condition; \
+    if (error != cudaSuccess) { \
+      std::cout << cudaGetErrorString(error) << std::endl; \
+    } \
+  } while (0)
+#define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
+int const threadsPerBlock = sizeof(unsigned long long) * 8;
+__device__ inline float devIoU(float const * const a, float const * const b) {
+  float left = max(a[0], b[0]), right = min(a[2], b[2]);
+  float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
+  float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
+  float interS = width * height;
+  float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
+  float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
+  return interS / (Sa + Sb - interS);
+}
+__global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
+                           const float *dev_boxes, unsigned long long *dev_mask) {
+  const int row_start = blockIdx.y;
+  const int col_start = blockIdx.x;
+  // if (row_start > col_start) return;
+  const int row_size =
+        min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
+  const int col_size =
+        min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
+  __shared__ float block_boxes[threadsPerBlock * 5];
+  if (threadIdx.x < col_size) {
+    block_boxes[threadIdx.x * 5 + 0] =
+        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
+    block_boxes[threadIdx.x * 5 + 1] =
+        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
+    block_boxes[threadIdx.x * 5 + 2] =
+        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
+    block_boxes[threadIdx.x * 5 + 3] =
+        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
+    block_boxes[threadIdx.x * 5 + 4] =
+        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
+  }
+  __syncthreads();
+  if (threadIdx.x < row_size) {
+    const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
+    const float *cur_box = dev_boxes + cur_box_idx * 5;
+    int i = 0;
+    unsigned long long t = 0;
+    int start = 0;
+    if (row_start == col_start) {
+      start = threadIdx.x + 1;
+    }
+    for (i = start; i < col_size; i++) {
+      if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
+        t |= 1ULL << i;
+      }
+    }
+    const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
+    dev_mask[cur_box_idx * col_blocks + col_start] = t;
+  }
+}
+void _set_device(int device_id) {
+  int current_device;
+  CUDA_CHECK(cudaGetDevice(&current_device));
+  if (current_device == device_id) {
+    return;
+  }
+  // The call to cudaSetDevice must come before any calls to Get, which
+  // may perform initialization using the GPU.
+  CUDA_CHECK(cudaSetDevice(device_id));
+}
+void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
+          int boxes_dim, float nms_overlap_thresh, int device_id) {
+  _set_device(device_id);
+  float* boxes_dev = NULL;
+  unsigned long long* mask_dev = NULL;
+  const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
+  CUDA_CHECK(cudaMalloc(&boxes_dev,
+                        boxes_num * boxes_dim * sizeof(float)));
+  CUDA_CHECK(cudaMemcpy(boxes_dev,
+                        boxes_host,
+                        boxes_num * boxes_dim * sizeof(float),
+                        cudaMemcpyHostToDevice));
+  CUDA_CHECK(cudaMalloc(&mask_dev,
+                        boxes_num * col_blocks * sizeof(unsigned long long)));
+  dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
+              DIVUP(boxes_num, threadsPerBlock));
+  dim3 threads(threadsPerBlock);
+  nms_kernel<<<blocks, threads>>>(boxes_num,
+                                  nms_overlap_thresh,
+                                  boxes_dev,
+                                  mask_dev);
+  std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
+  CUDA_CHECK(cudaMemcpy(&mask_host[0],
+                        mask_dev,
+                        sizeof(unsigned long long) * boxes_num * col_blocks,
+                        cudaMemcpyDeviceToHost));
+  std::vector<unsigned long long> remv(col_blocks);
+  memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
+  int num_to_keep = 0;
+  for (int i = 0; i < boxes_num; i++) {
+    int nblock = i / threadsPerBlock;
+    int inblock = i % threadsPerBlock;
+    if (!(remv[nblock] & (1ULL << inblock))) {
+      keep_out[num_to_keep++] = i;
+      unsigned long long *p = &mask_host[0] + i * col_blocks;
+      for (int j = nblock; j < col_blocks; j++) {
+        remv[j] |= p[j];
+      }
+    }
+  }
+  *num_out = num_to_keep;
+  CUDA_CHECK(cudaFree(boxes_dev));
+  CUDA_CHECK(cudaFree(mask_dev));
+}
--- a/src/tools/voc_eval_lib/nms/py_cpu_nms.py
+++ b/src/tools/voc_eval_lib/nms/py_cpu_nms.py
+# --------------------------------------------------------
+# Fast R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick
+# --------------------------------------------------------
+import numpy as np
+def py_cpu_nms(dets, thresh):
+    """Pure Python NMS baseline."""
+    x1 = dets[:, 0]
+    y1 = dets[:, 1]
+    x2 = dets[:, 2]
+    y2 = dets[:, 3]
+    scores = dets[:, 4]
+    areas = (x2 - x1 + 1) * (y2 - y1 + 1)
+    order = scores.argsort()[::-1]
+    keep = []
+    while order.size > 0:
+        i = order[0]
+        keep.append(i)
+        xx1 = np.maximum(x1[i], x1[order[1:]])
+        yy1 = np.maximum(y1[i], y1[order[1:]])
+        xx2 = np.minimum(x2[i], x2[order[1:]])
+        yy2 = np.minimum(y2[i], y2[order[1:]])
+        w = np.maximum(0.0, xx2 - xx1 + 1)
+        h = np.maximum(0.0, yy2 - yy1 + 1)
+        inter = w * h
+        ovr = inter / (areas[i] + areas[order[1:]] - inter)
+        inds = np.where(ovr <= thresh)[0]
+        order = order[inds + 1]
+    return keep
--- a/src/tools/voc_eval_lib/setup.py
+++ b/src/tools/voc_eval_lib/setup.py
+# --------------------------------------------------------
+# Fast R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick
+# --------------------------------------------------------
+import os
+from os.path import join as pjoin
+import numpy as np
+from distutils.core import setup
+from distutils.extension import Extension
+from Cython.Distutils import build_ext
+def find_in_path(name, path):
+    "Find a file in a search path"
+    #adapted fom http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/
+    for dir in path.split(os.pathsep):
+        binpath = pjoin(dir, name)
+        if os.path.exists(binpath):
+            return os.path.abspath(binpath)
+    return None
+def locate_cuda():
+    """Locate the CUDA environment on the system
+    Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64'
+    and values giving the absolute path to each directory.
+    Starts by looking for the CUDAHOME env variable. If not found, everything
+    is based on finding 'nvcc' in the PATH.
+    """
+    # first check if the CUDAHOME env variable is in use
+    if 'CUDAHOME' in os.environ:
+        home = os.environ['CUDAHOME']
+        nvcc = pjoin(home, 'bin', 'nvcc')
+    else:
+        # otherwise, search the PATH for NVCC
+        default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin')
+        nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path)
+        if nvcc is None:
+            raise EnvironmentError('The nvcc binary could not be '
+                'located in your $PATH. Either add it to your path, or set $CUDAHOME')
+        home = os.path.dirname(os.path.dirname(nvcc))
+    cudaconfig = {'home':home, 'nvcc':nvcc,
+                  'include': pjoin(home, 'include'),
+                  'lib64': pjoin(home, 'lib64')}
+    for k, v in cudaconfig.items():
+        if not os.path.exists(v):
+            raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v))
+    return cudaconfig
+CUDA = locate_cuda()
+# Obtain the numpy include directory.  This logic works across numpy versions.
+try:
+    numpy_include = np.get_include()
+except AttributeError:
+    numpy_include = np.get_numpy_include()
+def customize_compiler_for_nvcc(self):
+    """inject deep into distutils to customize how the dispatch
+    to gcc/nvcc works.
+    If you subclass UnixCCompiler, it's not trivial to get your subclass
+    injected in, and still have the right customizations (i.e.
+    distutils.sysconfig.customize_compiler) run on it. So instead of going
+    the OO route, I have this. Note, it's kindof like a wierd functional
+    subclassing going on."""
+    # tell the compiler it can processes .cu
+    self.src_extensions.append('.cu')
+    # save references to the default compiler_so and _comple methods
+    default_compiler_so = self.compiler_so
+    super = self._compile
+    # now redefine the _compile method. This gets executed for each
+    # object but distutils doesn't have the ability to change compilers
+    # based on source extension: we add it.
+    def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
+        print(extra_postargs)
+        if os.path.splitext(src)[1] == '.cu':
+            # use the cuda for .cu files
+            self.set_executable('compiler_so', CUDA['nvcc'])
+            # use only a subset of the extra_postargs, which are 1-1 translated
+            # from the extra_compile_args in the Extension class
+            postargs = extra_postargs['nvcc']
+        else:
+            postargs = extra_postargs['gcc']
+        super(obj, src, ext, cc_args, postargs, pp_opts)
+        # reset the default compiler_so, which we might have changed for cuda
+        self.compiler_so = default_compiler_so
+    # inject our redefined _compile method into the class
+    self._compile = _compile
+# run the customize_compiler
+class custom_build_ext(build_ext):
+    def build_extensions(self):
+        customize_compiler_for_nvcc(self.compiler)
+        build_ext.build_extensions(self)
+ext_modules = [
+    Extension(
+        "utils.cython_bbox",
+        ["utils/bbox.pyx"],
+        extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
+        include_dirs = [numpy_include]
+    ),
+    Extension(
+        "nms.cpu_nms",
+        ["nms/cpu_nms.pyx"],
+        extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
+        include_dirs = [numpy_include]
+    ),
+    Extension('nms.gpu_nms',
+        ['nms/nms_kernel.cu', 'nms/gpu_nms.pyx'],
+        library_dirs=[CUDA['lib64']],
+        libraries=['cudart'],
+        language='c++',
+        runtime_library_dirs=[CUDA['lib64']],
+        # this syntax is specific to this build system
+        # we're only going to use certain compiler args with nvcc and not with gcc
+        # the implementation of this trick is in customize_compiler() below
+        extra_compile_args={'gcc': ["-Wno-unused-function"],
+                            'nvcc': ['-arch=sm_61',
+                                     '--ptxas-options=-v',
+                                     '-c',
+                                     '--compiler-options',
+                                     "'-fPIC'"]},
+        include_dirs = [numpy_include, CUDA['include']]
+    )
+]
+setup(
+    name='tf_faster_rcnn',
+    ext_modules=ext_modules,
+    # inject our custom trigger
+    cmdclass={'build_ext': custom_build_ext},
+)
--- a/src/tools/voc_eval_lib/utils/.gitignore
+++ b/src/tools/voc_eval_lib/utils/.gitignore
+*.c
+*.cpp
+*.h
+*.hpp
--- a/src/tools/voc_eval_lib/utils/__init__.py
+++ b/src/tools/voc_eval_lib/utils/__init__.py
+# --------------------------------------------------------
+# Fast R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick
+# --------------------------------------------------------
--- a/src/tools/voc_eval_lib/utils/bbox.pyx
+++ b/src/tools/voc_eval_lib/utils/bbox.pyx
+# --------------------------------------------------------
+# Fast R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Sergey Karayev
+# --------------------------------------------------------
+cimport cython
+import numpy as np
+cimport numpy as np
+DTYPE = np.float
+ctypedef np.float_t DTYPE_t
+def bbox_overlaps(
+        np.ndarray[DTYPE_t, ndim=2] boxes,
+        np.ndarray[DTYPE_t, ndim=2] query_boxes):
+    """
+    Parameters
+    ----------
+    boxes: (N, 4) ndarray of float
+    query_boxes: (K, 4) ndarray of float
+    Returns
+    -------
+    overlaps: (N, K) ndarray of overlap between boxes and query_boxes
+    """
+    cdef unsigned int N = boxes.shape[0]
+    cdef unsigned int K = query_boxes.shape[0]
+    cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
+    cdef DTYPE_t iw, ih, box_area
+    cdef DTYPE_t ua
+    cdef unsigned int k, n
+    for k in range(K):
+        box_area = (
+            (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
+            (query_boxes[k, 3] - query_boxes[k, 1] + 1)
+        )
+        for n in range(N):
+            iw = (
+                min(boxes[n, 2], query_boxes[k, 2]) -
+                max(boxes[n, 0], query_boxes[k, 0]) + 1
+            )
+            if iw > 0:
+                ih = (
+                    min(boxes[n, 3], query_boxes[k, 3]) -
+                    max(boxes[n, 1], query_boxes[k, 1]) + 1
+                )
+                if ih > 0:
+                    ua = float(
+                        (boxes[n, 2] - boxes[n, 0] + 1) *
+                        (boxes[n, 3] - boxes[n, 1] + 1) +
+                        box_area - iw * ih
+                    )
+                    overlaps[n, k] = iw * ih / ua
+    return overlaps
--- a/src/tools/voc_eval_lib/utils/blob.py
+++ b/src/tools/voc_eval_lib/utils/blob.py
+# --------------------------------------------------------
+# Fast R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick
+# --------------------------------------------------------
+"""Blob helper functions."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import numpy as np
+import cv2
+def im_list_to_blob(ims):
+  """Convert a list of images into a network input.
+  Assumes images are already prepared (means subtracted, BGR order, ...).
+  """
+  max_shape = np.array([im.shape for im in ims]).max(axis=0)
+  num_images = len(ims)
+  blob = np.zeros((num_images, max_shape[0], max_shape[1], 3),
+                  dtype=np.float32)
+  for i in range(num_images):
+    im = ims[i]
+    blob[i, 0:im.shape[0], 0:im.shape[1], :] = im
+  return blob
+def prep_im_for_blob(im, pixel_means, target_size, max_size):
+  """Mean subtract and scale an image for use in a blob."""
+  im = im.astype(np.float32, copy=False)
+  im -= pixel_means
+  im_shape = im.shape
+  im_size_min = np.min(im_shape[0:2])
+  im_size_max = np.max(im_shape[0:2])
+  im_scale = float(target_size) / float(im_size_min)
+  # Prevent the biggest axis from being more than MAX_SIZE
+  if np.round(im_scale * im_size_max) > max_size:
+    im_scale = float(max_size) / float(im_size_max)
+  im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale,
+                  interpolation=cv2.INTER_LINEAR)
+  return im, im_scale
--- a/src/tools/voc_eval_lib/utils/timer.py
+++ b/src/tools/voc_eval_lib/utils/timer.py
+# --------------------------------------------------------
+# Fast R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick
+# --------------------------------------------------------
+import time
+class Timer(object):
+    """A simple timer."""
+    def __init__(self):
+        self.total_time = 0.
+        self.calls = 0
+        self.start_time = 0.
+        self.diff = 0.
+        self.average_time = 0.
+    def tic(self):
+        # using time.time instead of time.clock because time time.clock
+        # does not normalize for multithreading
+        self.start_time = time.time()
+    def toc(self, average=True):
+        self.diff = time.time() - self.start_time
+        self.total_time += self.diff
+        self.calls += 1
+        self.average_time = self.total_time / self.calls
+        if average:
+            return self.average_time
+        else:
+            return self.diff
--- a/src/tools/voc_eval_lib/utils/visualization.py
+++ b/src/tools/voc_eval_lib/utils/visualization.py
+# --------------------------------------------------------
+# Tensorflow Faster R-CNN
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Xinlei Chen
+# --------------------------------------------------------
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import numpy as np
+from six.moves import range
+import PIL.Image as Image
+import PIL.ImageColor as ImageColor
+import PIL.ImageDraw as ImageDraw
+import PIL.ImageFont as ImageFont
+STANDARD_COLORS = [
+    'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque',
+    'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite',
+    'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan',
+    'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange',
+    'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet',
+    'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite',
+    'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod',
+    'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki',
+    'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue',
+    'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey',
+    'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue',
+    'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime',
+    'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid',
+    'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen',
+    'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin',
+    'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed',
+    'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed',
+    'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple',
+    'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown',
+    'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue',
+    'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow',
+    'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White',
+    'WhiteSmoke', 'Yellow', 'YellowGreen'
+]
+NUM_COLORS = len(STANDARD_COLORS)
+try:
+  FONT = ImageFont.truetype('arial.ttf', 24)
+except IOError:
+  FONT = ImageFont.load_default()
+def _draw_single_box(image, xmin, ymin, xmax, ymax, display_str, font, color='black', thickness=4):
+  draw = ImageDraw.Draw(image)
+  (left, right, top, bottom) = (xmin, xmax, ymin, ymax)
+  draw.line([(left, top), (left, bottom), (right, bottom),
+             (right, top), (left, top)], width=thickness, fill=color)
+  text_bottom = bottom
+  # Reverse list and print from bottom to top.
+  text_width, text_height = font.getsize(display_str)
+  margin = np.ceil(0.05 * text_height)
+  draw.rectangle(
+      [(left, text_bottom - text_height - 2 * margin), (left + text_width,
+                                                        text_bottom)],
+      fill=color)
+  draw.text(
+      (left + margin, text_bottom - text_height - margin),
+      display_str,
+      fill='black',
+      font=font)
+  return image
+def draw_bounding_boxes(image, gt_boxes, im_info):
+  num_boxes = gt_boxes.shape[0]
+  gt_boxes_new = gt_boxes.copy()
+  gt_boxes_new[:,:4] = np.round(gt_boxes_new[:,:4].copy() / im_info[2])
+  disp_image = Image.fromarray(np.uint8(image[0]))
+  for i in range(num_boxes):
+    this_class = int(gt_boxes_new[i, 4])
+    disp_image = _draw_single_box(disp_image, 
+                                gt_boxes_new[i, 0],
+                                gt_boxes_new[i, 1],
+                                gt_boxes_new[i, 2],
+                                gt_boxes_new[i, 3],
+                                'N%02d-C%02d' % (i, this_class),
+                                FONT,
+                                color=STANDARD_COLORS[this_class % NUM_COLORS])
+  image[0, :] = np.array(disp_image)
+  return image
--- a/test_img/000388.jpg
+++ b/test_img/000388.jpg
--- a/test_img/0_Parade_marchingband_1_35.jpg
+++ b/test_img/0_Parade_marchingband_1_35.jpg
--- a/test_img/0_Parade_marchingband_1_5.jpg
+++ b/test_img/0_Parade_marchingband_1_5.jpg