tf2 detection

c320b6ef · zhenyi · 0fc002df · c320b6ef · c320b6ef · c320b6ef
Commit c320b6ef authored Apr 15, 2022 by zhenyi
20 changed files
--- a/TensorFlow2x/ComputeVision/Detection/SSD/nets/vgg.py
+++ b/TensorFlow2x/ComputeVision/Detection/SSD/nets/vgg.py
+from tensorflow.keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D
+
+
+def VGG16(input_tensor):
+    #----------------------------主干特征提取网络开始---------------------------#
+    # SSD结构,net字典
+    net = {} 
+    # Block 1
+    net['input'] = input_tensor
+    # 300,300,3 -> 150,150,64
+    net['conv1_1'] = Conv2D(64, kernel_size=(3,3),
+                                   activation='relu',
+                                   padding='same',
+                                   name='conv1_1')(net['input'])
+    net['conv1_2'] = Conv2D(64, kernel_size=(3,3),
+                                   activation='relu',
+                                   padding='same',
+                                   name='conv1_2')(net['conv1_1'])
+    net['pool1'] = MaxPooling2D((2, 2), strides=(2, 2), padding='same',
+                                name='pool1')(net['conv1_2'])
+    # Block 2
+    # 150,150,64 -> 75,75,128
+    net['conv2_1'] = Conv2D(128, kernel_size=(3,3),
+                                   activation='relu',
+                                   padding='same',
+                                   name='conv2_1')(net['pool1'])
+    net['conv2_2'] = Conv2D(128, kernel_size=(3,3),
+                                   activation='relu',
+                                   padding='same',
+                                   name='conv2_2')(net['conv2_1'])
+    net['pool2'] = MaxPooling2D((2, 2), strides=(2, 2), padding='same',
+                                name='pool2')(net['conv2_2'])
+    # Block 3
+    # 75,75,128 -> 38,38,256
+    net['conv3_1'] = Conv2D(256, kernel_size=(3,3),
+                                   activation='relu',
+                                   padding='same',
+                                   name='conv3_1')(net['pool2'])
+    net['conv3_2'] = Conv2D(256, kernel_size=(3,3),
+                                   activation='relu',
+                                   padding='same',
+                                   name='conv3_2')(net['conv3_1'])
+    net['conv3_3'] = Conv2D(256, kernel_size=(3,3),
+                                   activation='relu',
+                                   padding='same',
+                                   name='conv3_3')(net['conv3_2'])
+    net['pool3'] = MaxPooling2D((2, 2), strides=(2, 2), padding='same',
+                                name='pool3')(net['conv3_3'])
+    # Block 4
+    # 38,38,256 -> 19,19,512
+    net['conv4_1'] = Conv2D(512, kernel_size=(3,3),
+                                   activation='relu',
+                                   padding='same',
+                                   name='conv4_1')(net['pool3'])
+    net['conv4_2'] = Conv2D(512, kernel_size=(3,3),
+                                   activation='relu',
+                                   padding='same',
+                                   name='conv4_2')(net['conv4_1'])
+    net['conv4_3'] = Conv2D(512, kernel_size=(3,3),
+                                   activation='relu',
+                                   padding='same',
+                                   name='conv4_3')(net['conv4_2'])
+    net['pool4'] = MaxPooling2D((2, 2), strides=(2, 2), padding='same',
+                                name='pool4')(net['conv4_3'])
+    # Block 5
+    # 19,19,512 -> 19,19,512
+    net['conv5_1'] = Conv2D(512, kernel_size=(3,3),
+                                   activation='relu',
+                                   padding='same',
+                                   name='conv5_1')(net['pool4'])
+    net['conv5_2'] = Conv2D(512, kernel_size=(3,3),
+                                   activation='relu',
+                                   padding='same',
+                                   name='conv5_2')(net['conv5_1'])
+    net['conv5_3'] = Conv2D(512, kernel_size=(3,3),
+                                   activation='relu',
+                                   padding='same',
+                                   name='conv5_3')(net['conv5_2'])
+    net['pool5'] = MaxPooling2D((3, 3), strides=(1, 1), padding='same',
+                                name='pool5')(net['conv5_3'])
+    # FC6
+    # 19,19,512 -> 19,19,1024
+    net['fc6'] = Conv2D(1024, kernel_size=(3,3), dilation_rate=(6, 6),
+                                     activation='relu', padding='same',
+                                     name='fc6')(net['pool5'])
+
+    # x = Dropout(0.5, name='drop6')(x)
+    # FC7
+    # 19,19,1024 -> 19,19,1024
+    net['fc7'] = Conv2D(1024, kernel_size=(1,1), activation='relu',
+                               padding='same', name='fc7')(net['fc6'])
+
+    # x = Dropout(0.5, name='drop7')(x)
+    # Block 6
+    # 19,19,512 -> 10,10,512
+    net['conv6_1'] = Conv2D(256, kernel_size=(1,1), activation='relu',
+                                   padding='same',
+                                   name='conv6_1')(net['fc7'])
+    net['conv6_2'] = ZeroPadding2D(padding=((1, 1), (1, 1)), name='conv6_padding')(net['conv6_1'])
+    net['conv6_2'] = Conv2D(512, kernel_size=(3,3), strides=(2, 2),
+                                   activation='relu',
+                                   name='conv6_2')(net['conv6_2'])
+
+    # Block 7
+    # 10,10,512 -> 5,5,256
+    net['conv7_1'] = Conv2D(128, kernel_size=(1,1), activation='relu',
+                                   padding='same', 
+                                   name='conv7_1')(net['conv6_2'])
+    net['conv7_2'] = ZeroPadding2D(padding=((1, 1), (1, 1)), name='conv7_padding')(net['conv7_1'])
+    net['conv7_2'] = Conv2D(256, kernel_size=(3,3), strides=(2, 2),
+                                   activation='relu', padding='valid',
+                                   name='conv7_2')(net['conv7_2'])
+    # Block 8
+    # 5,5,256 -> 3,3,256
+    net['conv8_1'] = Conv2D(128, kernel_size=(1,1), activation='relu',
+                                   padding='same',
+                                   name='conv8_1')(net['conv7_2'])
+    net['conv8_2'] = Conv2D(256, kernel_size=(3,3), strides=(1, 1),
+                                   activation='relu', padding='valid',
+                                   name='conv8_2')(net['conv8_1'])
+    # Block 9
+    # 3,3,256 -> 1,1,256
+    net['conv9_1'] = Conv2D(128, kernel_size=(1,1), activation='relu',
+                                   padding='same',
+                                   name='conv9_1')(net['conv8_2'])
+    net['conv9_2'] = Conv2D(256, kernel_size=(3,3), strides=(1, 1),
+                                   activation='relu', padding='valid',
+                                   name='conv9_2')(net['conv9_1'])
+    #----------------------------主干特征提取网络结束---------------------------#
+    return net
--- a/TensorFlow2x/ComputeVision/Detection/SSD/nets/vgg_copy_single.py
+++ b/TensorFlow2x/ComputeVision/Detection/SSD/nets/vgg_copy_single.py
+from tensorflow.keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D
+
+
+def VGG16(input_tensor):
+    #----------------------------主干特征提取网络开始---------------------------#
+    # SSD结构,net字典
+    net = {} 
+    # Block 1
+    net['input'] = input_tensor
+    # 300,300,3 -> 150,150,64
+    net['conv1_1'] = Conv2D(64, kernel_size=(3,3),
+                                   activation='relu',
+                                   padding='same',
+                                   name='conv1_1')(net['input'])
+    net['conv1_2'] = Conv2D(64, kernel_size=(3,3),
+                                   activation='relu',
+                                   padding='same',
+                                   name='conv1_2')(net['conv1_1'])
+    net['pool1'] = MaxPooling2D((2, 2), strides=(2, 2), padding='same',
+                                name='pool1')(net['conv1_2'])
+    # Block 2
+    # 150,150,64 -> 75,75,128
+    net['conv2_1'] = Conv2D(128, kernel_size=(3,3),
+                                   activation='relu',
+                                   padding='same',
+                                   name='conv2_1')(net['pool1'])
+    net['conv2_2'] = Conv2D(128, kernel_size=(3,3),
+                                   activation='relu',
+                                   padding='same',
+                                   name='conv2_2')(net['conv2_1'])
+    net['pool2'] = MaxPooling2D((2, 2), strides=(2, 2), padding='same',
+                                name='pool2')(net['conv2_2'])
+    # Block 3
+    # 75,75,128 -> 38,38,256
+    net['conv3_1'] = Conv2D(256, kernel_size=(3,3),
+                                   activation='relu',
+                                   padding='same',
+                                   name='conv3_1')(net['pool2'])
+    net['conv3_2'] = Conv2D(256, kernel_size=(3,3),
+                                   activation='relu',
+                                   padding='same',
+                                   name='conv3_2')(net['conv3_1'])
+    net['conv3_3'] = Conv2D(256, kernel_size=(3,3),
+                                   activation='relu',
+                                   padding='same',
+                                   name='conv3_3')(net['conv3_2'])
+    net['pool3'] = MaxPooling2D((2, 2), strides=(2, 2), padding='same',
+                                name='pool3')(net['conv3_3'])
+    # Block 4
+    # 38,38,256 -> 19,19,512
+    net['conv4_1'] = Conv2D(512, kernel_size=(3,3),
+                                   activation='relu',
+                                   padding='same',
+                                   name='conv4_1')(net['pool3'])
+    net['conv4_2'] = Conv2D(512, kernel_size=(3,3),
+                                   activation='relu',
+                                   padding='same',
+                                   name='conv4_2')(net['conv4_1'])
+    net['conv4_3'] = Conv2D(512, kernel_size=(3,3),
+                                   activation='relu',
+                                   padding='same',
+                                   name='conv4_3')(net['conv4_2'])
+    net['pool4'] = MaxPooling2D((2, 2), strides=(2, 2), padding='same',
+                                name='pool4')(net['conv4_3'])
+    # Block 5
+    # 19,19,512 -> 19,19,512
+    net['conv5_1'] = Conv2D(512, kernel_size=(3,3),
+                                   activation='relu',
+                                   padding='same',
+                                   name='conv5_1')(net['pool4'])
+    net['conv5_2'] = Conv2D(512, kernel_size=(3,3),
+                                   activation='relu',
+                                   padding='same',
+                                   name='conv5_2')(net['conv5_1'])
+    net['conv5_3'] = Conv2D(512, kernel_size=(3,3),
+                                   activation='relu',
+                                   padding='same',
+                                   name='conv5_3')(net['conv5_2'])
+    net['pool5'] = MaxPooling2D((3, 3), strides=(1, 1), padding='same',
+                                name='pool5')(net['conv5_3'])
+    # FC6
+    # 19,19,512 -> 19,19,1024
+    net['fc6'] = Conv2D(1024, kernel_size=(3,3), dilation_rate=(6, 6),
+                                     activation='relu', padding='same',
+                                     name='fc6')(net['pool5'])
+
+    # x = Dropout(0.5, name='drop6')(x)
+    # FC7
+    # 19,19,1024 -> 19,19,1024
+    net['fc7'] = Conv2D(1024, kernel_size=(1,1), activation='relu',
+                               padding='same', name='fc7')(net['fc6'])
+
+    # x = Dropout(0.5, name='drop7')(x)
+    # Block 6
+    # 19,19,512 -> 10,10,512
+    net['conv6_1'] = Conv2D(256, kernel_size=(1,1), activation='relu',
+                                   padding='same',
+                                   name='conv6_1')(net['fc7'])
+    net['conv6_2'] = ZeroPadding2D(padding=((1, 1), (1, 1)), name='conv6_padding')(net['conv6_1'])
+    net['conv6_2'] = Conv2D(512, kernel_size=(3,3), strides=(2, 2),
+                                   activation='relu',
+                                   name='conv6_2')(net['conv6_2'])
+
+    # Block 7
+    # 10,10,512 -> 5,5,256
+    net['conv7_1'] = Conv2D(128, kernel_size=(1,1), activation='relu',
+                                   padding='same', 
+                                   name='conv7_1')(net['conv6_2'])
+    net['conv7_2'] = ZeroPadding2D(padding=((1, 1), (1, 1)), name='conv7_padding')(net['conv7_1'])
+    net['conv7_2'] = Conv2D(256, kernel_size=(3,3), strides=(2, 2),
+                                   activation='relu', padding='valid',
+                                   name='conv7_2')(net['conv7_2'])
+    # Block 8
+    # 5,5,256 -> 3,3,256
+    net['conv8_1'] = Conv2D(128, kernel_size=(1,1), activation='relu',
+                                   padding='same',
+                                   name='conv8_1')(net['conv7_2'])
+    net['conv8_2'] = Conv2D(256, kernel_size=(3,3), strides=(1, 1),
+                                   activation='relu', padding='valid',
+                                   name='conv8_2')(net['conv8_1'])
+    # Block 9
+    # 3,3,256 -> 1,1,256
+    net['conv9_1'] = Conv2D(128, kernel_size=(1,1), activation='relu',
+                                   padding='same',
+                                   name='conv9_1')(net['conv8_2'])
+    net['conv9_2'] = Conv2D(256, kernel_size=(3,3), strides=(1, 1),
+                                   activation='relu', padding='valid',
+                                   name='conv9_2')(net['conv9_1'])
+    #----------------------------主干特征提取网络结束---------------------------#
+    return net
--- a/TensorFlow2x/ComputeVision/Detection/SSD/predict.py
+++ b/TensorFlow2x/ComputeVision/Detection/SSD/predict.py
+#----------------------------------------------------#
+#   对视频中的predict.py进行了修改，
+#   将单张图片预测、摄像头检测和FPS测试功能
+#   整合到了一个py文件中，通过指定mode进行模式的修改。
+#----------------------------------------------------#
+import time
+
+import cv2
+import numpy as np
+import tensorflow as tf
+from PIL import Image
+
+from ssd import SSD
+
+gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
+for gpu in gpus:
+    tf.config.experimental.set_memory_growth(gpu, True)
+
+if __name__ == "__main__":
+    ssd = SSD()
+    #----------------------------------------------------------------------------------------------------------#
+    #   mode用于指定测试的模式：
+    #   'predict'表示单张图片预测，如果想对预测过程进行修改，如保存图片，截取对象等，可以先看下方详细的注释
+    #   'video'表示视频检测，可调用摄像头或者视频进行检测，详情查看下方注释。
+    #   'fps'表示测试fps，使用的图片是img里面的street.jpg，详情查看下方注释。
+    #   'dir_predict'表示遍历文件夹进行检测并保存。默认遍历img文件夹，保存img_out文件夹，详情查看下方注释。
+    #----------------------------------------------------------------------------------------------------------#
+    mode = "fps"
+    #-------------------------------------------------------------------------#
+    #   crop指定了是否在单张图片预测后对目标进行截取
+    #   crop仅在mode='predict'时有效
+    #-------------------------------------------------------------------------#
+    crop            = False
+    #----------------------------------------------------------------------------------------------------------#
+    #   video_path用于指定视频的路径，当video_path=0时表示检测摄像头
+    #   想要检测视频，则设置如video_path = "xxx.mp4"即可，代表读取出根目录下的xxx.mp4文件。
+    #   video_save_path表示视频保存的路径，当video_save_path=""时表示不保存
+    #   想要保存视频，则设置如video_save_path = "yyy.mp4"即可，代表保存为根目录下的yyy.mp4文件。
+    #   video_fps用于保存的视频的fps
+    #   video_path、video_save_path和video_fps仅在mode='video'时有效
+    #   保存视频时需要ctrl+c退出或者运行到最后一帧才会完成完整的保存步骤。
+    #----------------------------------------------------------------------------------------------------------#
+    video_path      = 0
+    video_save_path = ""
+    video_fps       = 25.0
+    #-------------------------------------------------------------------------#
+    #   test_interval用于指定测量fps的时候，图片检测的次数
+    #   理论上test_interval越大，fps越准确。
+    #-------------------------------------------------------------------------#
+    test_interval = 100
+    #-------------------------------------------------------------------------#
+    #   dir_origin_path指定了用于检测的图片的文件夹路径
+    #   dir_save_path指定了检测完图片的保存路径
+    #   dir_origin_path和dir_save_path仅在mode='dir_predict'时有效
+    #-------------------------------------------------------------------------#
+    dir_origin_path = "img/"
+    dir_save_path   = "img_out/"
+
+    if mode == "predict":
+        '''
+        1、该代码无法直接进行批量预测，如果想要批量预测，可以利用os.listdir()遍历文件夹，利用Image.open打开图片文件进行预测。
+        具体流程可以参考get_dr_txt.py，在get_dr_txt.py即实现了遍历还实现了目标信息的保存。
+        2、如果想要进行检测完的图片的保存，利用r_image.save("img.jpg")即可保存，直接在predict.py里进行修改即可。 
+        3、如果想要获得预测框的坐标，可以进入ssd.detect_image函数，在绘图部分读取top，left，bottom，right这四个值。
+        4、如果想要利用预测框截取下目标，可以进入ssd.detect_image函数，在绘图部分利用获取到的top，left，bottom，right这四个值
+        在原图上利用矩阵的方式进行截取。
+        5、如果想要在预测图上写额外的字，比如检测到的特定目标的数量，可以进入ssd.detect_image函数，在绘图部分对predicted_class进行判断，
+        比如判断if predicted_class == 'car': 即可判断当前目标是否为车，然后记录数量即可。利用draw.text即可写字。
+        '''
+        while True:
+            img = input('Input image filename:')
+            try:
+                image = Image.open(img)
+            except:
+                print('Open Error! Try again!')
+                continue
+            else:
+                r_image = ssd.detect_image(image, crop = crop)
+                r_image.show()
+
+    elif mode == "video":
+        capture=cv2.VideoCapture(video_path)
+        if video_save_path!="":
+            fourcc = cv2.VideoWriter_fourcc(*'XVID')
+            size = (int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)), int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)))
+            out = cv2.VideoWriter(video_save_path, fourcc, video_fps, size)
+
+        ref, frame = capture.read()
+        if not ref:
+            raise ValueError("未能正确读取摄像头（视频），请注意是否正确安装摄像头（是否正确填写视频路径）。")
+
+        fps = 0.0
+        while(True):
+            t1 = time.time()
+            # 读取某一帧
+            ref, frame = capture.read()
+            if not ref:
+                break
+            # 格式转变，BGRtoRGB
+            frame = cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
+            # 转变成Image
+            frame = Image.fromarray(np.uint8(frame))
+            # 进行检测
+            frame = np.array(ssd.detect_image(frame))
+            # RGBtoBGR满足opencv显示格式
+            frame = cv2.cvtColor(frame,cv2.COLOR_RGB2BGR)
+            
+            fps  = ( fps + (1./(time.time()-t1)) ) / 2
+            print("fps= %.2f"%(fps))
+            frame = cv2.putText(frame, "fps= %.2f"%(fps), (0, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
+            
+            cv2.imshow("video",frame)
+            c= cv2.waitKey(1) & 0xff 
+            if video_save_path!="":
+                out.write(frame)
+
+            if c==27:
+                capture.release()
+                break
+
+        print("Video Detection Done!")
+        capture.release()
+        if video_save_path!="":
+            print("Save processed video to the path :" + video_save_path)
+            out.release()
+        cv2.destroyAllWindows()
+        
+    elif mode == "fps":
+        img = Image.open('img/street.jpg')
+        tact_time = ssd.get_FPS(img, test_interval)
+        print(str(tact_time) + ' seconds, ' + str(1/tact_time) + 'FPS, @batch_size 1')
+
+    elif mode == "dir_predict":
+        import os
+        from tqdm import tqdm
+
+        img_names = os.listdir(dir_origin_path)
+        for img_name in tqdm(img_names):
+            if img_name.lower().endswith(('.bmp', '.dib', '.png', '.jpg', '.jpeg', '.pbm', '.pgm', '.ppm', '.tif', '.tiff')):
+                image_path  = os.path.join(dir_origin_path, img_name)
+                image       = Image.open(image_path)
+                r_image     = ssd.detect_image(image)
+                if not os.path.exists(dir_save_path):
+                    os.makedirs(dir_save_path)
+                r_image.save(os.path.join(dir_save_path, img_name.replace(".jpg", ".png")), quality=95, subsampling=0)
+
+    else:
+        raise AssertionError("Please specify the correct mode: 'predict', 'video', 'fps' or 'dir_predict'.")
\ No newline at end of file
--- a/TensorFlow2x/ComputeVision/Detection/SSD/requirements.txt
+++ b/TensorFlow2x/ComputeVision/Detection/SSD/requirements.txt
+scipy==1.4.1
+numpy==1.18.4
+matplotlib==3.2.1
+opencv_python==4.2.0.34
+tqdm==4.46.1
+Pillow==8.2.0
+h5py==2.10.0
--- a/TensorFlow2x/ComputeVision/Detection/SSD/ssd.py
+++ b/TensorFlow2x/ComputeVision/Detection/SSD/ssd.py
+import colorsys
+import os
+import time
+
+import numpy as np
+import tensorflow as tf
+from tensorflow.keras import backend as K
+from tensorflow.keras.applications.imagenet_utils import preprocess_input
+from PIL import ImageDraw, ImageFont
+
+from nets.ssd import SSD300
+from utils.utils_bbox import BBoxUtility
+from utils.utils import get_classes, resize_image, cvtColor
+from utils.anchors import get_anchors
+
+'''
+训练自己的数据集必看！
+'''
+class SSD(object):
+    _defaults = {
+        #--------------------------------------------------------------------------#
+        #   使用自己训练好的模型进行预测一定要修改model_path和classes_path！
+        #   model_path指向logs文件夹下的权值文件，classes_path指向model_data下的txt
+        #
+        #   训练好后logs文件夹下存在多个权值文件，选择验证集损失较低的即可。
+        #   验证集损失较低不代表mAP较高，仅代表该权值在验证集上泛化性能较好。
+        #   如果出现shape不匹配，同时要注意训练时的model_path和classes_path参数的修改
+        #--------------------------------------------------------------------------#
+        "model_path"        : 'model_data/ssd_weights.h5',
+        "classes_path"      : 'model_data/voc_classes.txt',
+        #---------------------------------------------------------------------#
+        #   用于预测的图像大小，和train时使用同一个即可
+        #---------------------------------------------------------------------#
+        "input_shape"       : [300, 300],
+        #---------------------------------------------------------------------#
+        #   只有得分大于置信度的预测框会被保留下来
+        #---------------------------------------------------------------------#
+        "confidence"        : 0.5,
+        #---------------------------------------------------------------------#
+        #   非极大抑制所用到的nms_iou大小
+        #---------------------------------------------------------------------#
+        "nms_iou"           : 0.45,
+        #---------------------------------------------------------------------#
+        #   用于指定先验框的大小
+        #---------------------------------------------------------------------#
+        'anchors_size'      : [30, 60, 111, 162, 213, 264, 315],
+        #---------------------------------------------------------------------#
+        #   该变量用于控制是否使用letterbox_image对输入图像进行不失真的resize，
+        #   在多次测试后，发现关闭letterbox_image直接resize的效果更好
+        #---------------------------------------------------------------------#
+        "letterbox_image"   : False,
+    }
+
+    @classmethod
+    def get_defaults(cls, n):
+        if n in cls._defaults:
+            return cls._defaults[n]
+        else:
+            return "Unrecognized attribute name '" + n + "'"
+
+    #---------------------------------------------------#
+    #   初始化ssd
+    #---------------------------------------------------#
+    def __init__(self, **kwargs):
+        self.__dict__.update(self._defaults)
+        for name, value in kwargs.items():
+            setattr(self, name, value)
+        #---------------------------------------------------#
+        #   计算总的类的数量
+        #---------------------------------------------------#
+        self.class_names, self.num_classes  = get_classes(self.classes_path)
+        self.anchors                        = get_anchors(self.input_shape, self.anchors_size)
+        self.num_classes                    = self.num_classes + 1
+        
+        #---------------------------------------------------#
+        #   画框设置不同的颜色
+        #---------------------------------------------------#
+        hsv_tuples = [(x / self.num_classes, 1., 1.) for x in range(self.num_classes)]
+        self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
+        self.colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors))
+        
+        self.bbox_util = BBoxUtility(self.num_classes, nms_thresh=self.nms_iou)
+        self.generate()
+
+    #---------------------------------------------------#
+    #   载入模型
+    #---------------------------------------------------#
+    def generate(self):
+        model_path = os.path.expanduser(self.model_path)
+        assert model_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.'
+        
+        #-------------------------------#
+        #   载入模型与权值
+        #-------------------------------#
+        self.ssd = SSD300([self.input_shape[0], self.input_shape[1], 3], self.num_classes)
+        self.ssd.load_weights(self.model_path, by_name=True)
+        print('{} model, anchors, and classes loaded.'.format(model_path))
+
+    @tf.function
+    def get_pred(self, photo):
+        preds = self.ssd(photo, training=False)
+        return preds
+    #---------------------------------------------------#
+    #   检测图片
+    #---------------------------------------------------#
+    def detect_image(self, image, crop = False):
+        image_shape = np.array(np.shape(image)[0:2])
+        #---------------------------------------------------------#
+        #   在这里将图像转换成RGB图像，防止灰度图在预测时报错。
+        #   代码仅仅支持RGB图像的预测，所有其它类型的图像都会转化成RGB
+        #---------------------------------------------------------#
+        image       = cvtColor(image)
+        #---------------------------------------------------------#
+        #   给图像增加灰条，实现不失真的resize
+        #   也可以直接resize进行识别
+        #---------------------------------------------------------#
+        image_data  = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image)
+        #---------------------------------------------------------#
+        #   添加上batch_size维度，图片预处理，归一化。
+        #---------------------------------------------------------#
+        image_data  = preprocess_input(np.expand_dims(np.array(image_data, dtype='float32'), 0))
+
+        preds       = self.get_pred(image_data).numpy()
+        #-----------------------------------------------------------#
+        #   将预测结果进行解码
+        #-----------------------------------------------------------#
+        results     = self.bbox_util.decode_box(preds, self.anchors, image_shape, 
+                                                self.input_shape, self.letterbox_image, confidence=self.confidence)
+        #--------------------------------------#
+        #   如果没有检测到物体，则返回原图
+        #--------------------------------------#
+        if len(results[0])<=0:
+            return image
+
+        top_label   = np.array(results[0][:, 4], dtype = 'int32')
+        top_conf    = results[0][:, 5]
+        top_boxes   = results[0][:, :4]
+        #---------------------------------------------------------#
+        #   设置字体与边框厚度
+        #---------------------------------------------------------#
+        font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32'))
+        thickness = max((np.shape(image)[0] + np.shape(image)[1]) // self.input_shape[0], 1)
+        
+        #---------------------------------------------------------#
+        #   是否进行目标的裁剪
+        #---------------------------------------------------------#
+        if crop:
+            for i, c in list(enumerate(top_boxes)):
+                top, left, bottom, right = top_boxes[i]
+                top     = max(0, np.floor(top).astype('int32'))
+                left    = max(0, np.floor(left).astype('int32'))
+                bottom  = min(image.size[1], np.floor(bottom).astype('int32'))
+                right   = min(image.size[0], np.floor(right).astype('int32'))
+                
+                dir_save_path = "img_crop"
+                if not os.path.exists(dir_save_path):
+                    os.makedirs(dir_save_path)
+                crop_image = image.crop([left, top, right, bottom])
+                crop_image.save(os.path.join(dir_save_path, "crop_" + str(i) + ".png"), quality=95, subsampling=0)
+                print("save crop_" + str(i) + ".png to " + dir_save_path)
+        #---------------------------------------------------------#
+        #   图像绘制
+        #---------------------------------------------------------#
+        for i, c in list(enumerate(top_label)):
+            predicted_class = self.class_names[int(c)]
+            box             = top_boxes[i]
+            score           = top_conf[i]
+
+            top, left, bottom, right = box
+
+            top     = max(0, np.floor(top).astype('int32'))
+            left    = max(0, np.floor(left).astype('int32'))
+            bottom  = min(image.size[1], np.floor(bottom).astype('int32'))
+            right   = min(image.size[0], np.floor(right).astype('int32'))
+
+            label = '{} {:.2f}'.format(predicted_class, score)
+            draw = ImageDraw.Draw(image)
+            label_size = draw.textsize(label, font)
+            label = label.encode('utf-8')
+            print(label, top, left, bottom, right)
+            
+            if top - label_size[1] >= 0:
+                text_origin = np.array([left, top - label_size[1]])
+            else:
+                text_origin = np.array([left, top + 1])
+
+            for i in range(thickness):
+                draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[c])
+            draw.rectangle([tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[c])
+            draw.text(text_origin, str(label,'UTF-8'), fill=(0, 0, 0), font=font)
+            del draw
+
+        return image
+
+    def get_FPS(self, image, test_interval):
+        image_shape = np.array(np.shape(image)[0:2])
+        #---------------------------------------------------------#
+        #   在这里将图像转换成RGB图像，防止灰度图在预测时报错。
+        #   代码仅仅支持RGB图像的预测，所有其它类型的图像都会转化成RGB
+        #---------------------------------------------------------#
+        image       = cvtColor(image)
+        #---------------------------------------------------------#
+        #   给图像增加灰条，实现不失真的resize
+        #   也可以直接resize进行识别
+        #---------------------------------------------------------#
+        image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image)
+        #---------------------------------------------------------#
+        #   添加上batch_size维度，图片预处理，归一化。
+        #---------------------------------------------------------#
+        image_data = preprocess_input(np.expand_dims(np.array(image_data, dtype='float32'), 0))
+
+        preds       = self.get_pred(image_data).numpy()
+        #-----------------------------------------------------------#
+        #   将预测结果进行解码
+        #-----------------------------------------------------------#
+        results     = self.bbox_util.decode_box(preds, self.anchors, image_shape, 
+                                                self.input_shape, self.letterbox_image, confidence=self.confidence)
+        t1 = time.time()
+        for _ in range(test_interval):
+            preds       = self.get_pred(image_data).numpy()
+            #-----------------------------------------------------------#
+            #   将预测结果进行解码
+            #-----------------------------------------------------------#
+            results     = self.bbox_util.decode_box(preds, self.anchors, image_shape, 
+                                                    self.input_shape, self.letterbox_image, confidence=self.confidence)
+        t2 = time.time()
+        tact_time = (t2 - t1) / test_interval
+        return tact_time
+
+    def get_map_txt(self, image_id, image, class_names, map_out_path):
+        f = open(os.path.join(map_out_path, "detection-results/"+image_id+".txt"),"w") 
+        image_shape = np.array(np.shape(image)[0:2])
+        #---------------------------------------------------------#
+        #   在这里将图像转换成RGB图像，防止灰度图在预测时报错。
+        #   代码仅仅支持RGB图像的预测，所有其它类型的图像都会转化成RGB
+        #---------------------------------------------------------#
+        image       = cvtColor(image)
+        #---------------------------------------------------------#
+        #   给图像增加灰条，实现不失真的resize
+        #   也可以直接resize进行识别
+        #---------------------------------------------------------#
+        image_data  = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image)
+        #---------------------------------------------------------#
+        #   添加上batch_size维度，图片预处理，归一化。
+        #---------------------------------------------------------#
+        image_data  = preprocess_input(np.expand_dims(np.array(image_data, dtype='float32'), 0))
+
+        preds       = self.get_pred(image_data).numpy()
+        #-----------------------------------------------------------#
+        #   将预测结果进行解码
+        #-----------------------------------------------------------#
+        results     = self.bbox_util.decode_box(preds, self.anchors, image_shape, 
+                                                self.input_shape, self.letterbox_image, confidence=self.confidence)
+        #--------------------------------------#
+        #   如果没有检测到物体，则返回原图
+        #--------------------------------------#
+        if len(results[0])<=0:
+            return 
+
+        top_label   = results[0][:, 4]
+        top_conf    = results[0][:, 5]
+        top_boxes   = results[0][:, :4]
+
+        for i, c in list(enumerate(top_label)):
+            predicted_class = self.class_names[int(c)]
+            box             = top_boxes[i]
+            score           = str(top_conf[i])
+            
+            top, left, bottom, right = box
+
+            if predicted_class not in class_names:
+                continue
+
+            f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str(int(top)), str(int(right)),str(int(bottom))))
+
+        f.close()
+        return 
--- a/TensorFlow2x/ComputeVision/Detection/SSD/summary.py
+++ b/TensorFlow2x/ComputeVision/Detection/SSD/summary.py
+#--------------------------------------------#
+#   该部分代码用于看网络结构
+#--------------------------------------------#
+from nets.ssd import SSD300
+
+if __name__ == "__main__":
+    input_shape = [300, 300, 3]
+    num_classes = 21
+
+    model = SSD300(input_shape, num_classes)
+    model.summary()
+
+    # for i,layer in enumerate(model.layers):
+    #     print(i,layer.name)
--- a/TensorFlow2x/ComputeVision/Detection/SSD/train.py
+++ b/TensorFlow2x/ComputeVision/Detection/SSD/train.py
+from functools import partial
+
+import tensorflow as tf
+from tensorflow.keras.callbacks import EarlyStopping, TensorBoard
+from tensorflow.keras.optimizers import Adam
+
+from nets.ssd import SSD300
+from nets.ssd_training import MultiboxLoss
+from utils.anchors import get_anchors
+from utils.callbacks import (ExponentDecayScheduler, LossHistory,
+                             ModelCheckpoint)
+from utils.dataloader import SSDDatasets
+from utils.utils import get_classes
+from utils.utils_fit import fit_one_epoch
+
+gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
+for gpu in gpus:
+    tf.config.experimental.set_memory_growth(gpu, True)
+
+'''
+训练自己的目标检测模型一定需要注意以下几点：
+1、训练前仔细检查自己的格式是否满足要求，该库要求数据集格式为VOC格式，需要准备好的内容有输入图片和标签
+   输入图片为.jpg图片，无需固定大小，传入训练前会自动进行resize。
+   灰度图会自动转成RGB图片进行训练，无需自己修改。
+   输入图片如果后缀非jpg，需要自己批量转成jpg后再开始训练。
+
+   标签为.xml格式，文件中会有需要检测的目标信息，标签文件和输入图片文件相对应。
+
+2、训练好的权值文件保存在logs文件夹中，每个epoch都会保存一次，如果只是训练了几个step是不会保存的，epoch和step的概念要捋清楚一下。
+   在训练过程中，该代码并没有设定只保存最低损失的，因此按默认参数训练完会有100个权值，如果空间不够可以自行删除。
+   这个并不是保存越少越好也不是保存越多越好，有人想要都保存、有人想只保存一点，为了满足大多数的需求，还是都保存可选择性高。
+
+3、损失值的大小用于判断是否收敛，比较重要的是有收敛的趋势，即验证集损失不断下降，如果验证集损失基本上不改变的话，模型基本上就收敛了。
+   损失值的具体大小并没有什么意义，大和小只在于损失的计算方式，并不是接近于0才好。如果想要让损失好看点，可以直接到对应的损失函数里面除上10000。
+   训练过程中的损失值会保存在logs文件夹下的loss_%Y_%m_%d_%H_%M_%S文件夹中
+
+4、调参是一门蛮重要的学问，没有什么参数是一定好的，现有的参数是我测试过可以正常训练的参数，因此我会建议用现有的参数。
+   但是参数本身并不是绝对的，比如随着batch的增大学习率也可以增大，效果也会好一些；过深的网络不要用太大的学习率等等。
+   这些都是经验上，只能靠各位同学多查询资料和自己试试了。
+'''  
+if __name__ == "__main__":
+    #----------------------------------------------------#
+    #   是否使用eager模式训练
+    #----------------------------------------------------#
+    eager           = False
+    #--------------------------------------------------------#
+    #   训练前一定要修改classes_path，使其对应自己的数据集
+    #--------------------------------------------------------#
+    classes_path    = 'model_data/voc_classes.txt'
+    #----------------------------------------------------------------------------------------------------------------------------#
+    #   权值文件的下载请看README，可以通过网盘下载。模型的 预训练权重 对不同数据集是通用的，因为特征是通用的。
+    #   模型的 预训练权重 比较重要的部分是 主干特征提取网络的权值部分，用于进行特征提取。
+    #   预训练权重对于99%的情况都必须要用，不用的话主干部分的权值太过随机，特征提取效果不明显，网络训练的结果也不会好
+    #
+    #   如果训练过程中存在中断训练的操作，可以将model_path设置成logs文件夹下的权值文件，将已经训练了一部分的权值再次载入。
+    #   同时修改下方的 冻结阶段 或者 解冻阶段 的参数，来保证模型epoch的连续性。
+    #   
+    #   当model_path = ''的时候不加载整个模型的权值。
+    #
+    #   此处使用的是整个模型的权重，因此是在train.py进行加载的。
+    #   如果想要让模型从主干的预训练权值开始训练，则设置model_path为主干网络的权值，此时仅加载主干。
+    #   如果想要让模型从0开始训练，则设置model_path = ''，Freeze_Train = Fasle，此时从0开始训练，且没有冻结主干的过程。
+    #   一般来讲，从0开始训练效果会很差，因为权值太过随机，特征提取效果不明显。
+    #
+    #   网络一般不从0开始训练，至少会使用主干部分的权值，有些论文提到可以不用预训练，主要原因是他们 数据集较大 且 调参能力优秀。
+    #   如果一定要训练网络的主干部分，可以了解imagenet数据集，首先训练分类模型，分类模型的 主干部分 和该模型通用，基于此进行训练。
+    #----------------------------------------------------------------------------------------------------------------------------#
+    model_path      = ''
+    #------------------------------------------------------#
+    #   输入的shape大小
+    #------------------------------------------------------#
+    input_shape     = [300, 300]
+    #----------------------------------------------------#
+    #   可用于设定先验框的大小，默认的anchors_size
+    #   是根据voc数据集设定的，大多数情况下都是通用的！
+    #   如果想要检测小物体，可以修改anchors_size
+    #   一般调小浅层先验框的大小就行了！因为浅层负责小物体检测！
+    #   比如anchors_size = [21, 45, 99, 153, 207, 261, 315]
+    #----------------------------------------------------#
+    anchors_size    = [30, 60, 111, 162, 213, 264, 315]
+
+    #----------------------------------------------------#
+    #   训练分为两个阶段，分别是冻结阶段和解冻阶段。
+    #   显存不足与数据集大小无关，提示显存不足请调小batch_size。
+    #   受到BatchNorm层影响，batch_size最小为2，不能为1。
+    #----------------------------------------------------#
+    #----------------------------------------------------#
+    #   冻结阶段训练参数
+    #   此时模型的主干被冻结了，特征提取网络不发生改变
+    #   占用的显存较小，仅对网络进行微调
+    #----------------------------------------------------#
+    Init_Epoch          = 0
+    Freeze_Epoch        = 50
+    Freeze_batch_size   = 8
+    Freeze_lr           = 5e-4
+    #----------------------------------------------------#
+    #   解冻阶段训练参数
+    #   此时模型的主干不被冻结了，特征提取网络会发生改变
+    #   占用的显存较大，网络所有的参数都会发生改变
+    #----------------------------------------------------#
+    UnFreeze_Epoch      = 100
+    Unfreeze_batch_size = 8
+    Unfreeze_lr         = 1e-4
+    #------------------------------------------------------#
+    #   是否进行冻结训练，默认先冻结主干训练后解冻训练。
+    #------------------------------------------------------#
+    Freeze_Train        = False
+    #------------------------------------------------------#
+    #   用于设置是否使用多线程读取数据，1代表关闭多线程
+    #   开启后会加快数据读取速度，但是会占用更多内存
+    #   keras里开启多线程有些时候速度反而慢了许多
+    #   在IO为瓶颈的时候再开启多线程，即GPU运算速度远大于读取图片的速度。
+    #   在eager模式为False有效
+    #------------------------------------------------------#
+    num_workers         = 1
+    #----------------------------------------------------#
+    #   获得图片路径和标签
+    #----------------------------------------------------#
+    train_annotation_path   = '2012_train.txt'
+    val_annotation_path     = '2012_val.txt'
+
+    #----------------------------------------------------#
+    #   获取classes和anchor
+    #----------------------------------------------------#
+    class_names, num_classes = get_classes(classes_path)
+    num_classes += 1
+    anchors = get_anchors(input_shape, anchors_size)
+
+    #----------------------------------------------------#
+    #   获取classes和anchor
+    #----------------------------------------------------#
+    class_names, num_classes = get_classes(classes_path)
+    num_classes += 1
+    anchors = get_anchors(input_shape, anchors_size)
+
+    model = SSD300((input_shape[0], input_shape[1], 3), num_classes)
+    if model_path != '':
+        #------------------------------------------------------#
+        #   载入预训练权重
+        #------------------------------------------------------#
+        print('Load weights {}.'.format(model_path))
+        model.load_weights(model_path, by_name=True, skip_mismatch=True)
+
+    #-------------------------------------------------------------------------------#
+    #   训练参数的设置
+    #   logging表示tensorboard的保存地址
+    #   checkpoint用于设置权值保存的细节，period用于修改多少epoch保存一次
+    #   reduce_lr用于设置学习率下降的方式
+    #   early_stopping用于设定早停，val_loss多次不下降自动结束训练，表示模型基本收敛
+    #-------------------------------------------------------------------------------#
+    logging         = TensorBoard(log_dir = 'logs/')
+    checkpoint      = ModelCheckpoint('logs/ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5',
+                        monitor = 'val_loss', save_weights_only = True, save_best_only = False, period = 1)
+    reduce_lr       = ExponentDecayScheduler(decay_rate = 0.94, verbose = 1)
+    early_stopping  = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1)
+    loss_history    = LossHistory('logs/')
+
+    multiloss       = MultiboxLoss(num_classes, neg_pos_ratio=3.0).compute_loss
+    #---------------------------#
+    #   读取数据集对应的txt
+    #---------------------------#
+    with open(train_annotation_path) as f:
+        train_lines = f.readlines()
+    with open(val_annotation_path) as f:
+        val_lines   = f.readlines()
+    num_train   = len(train_lines)
+    num_val     = len(val_lines)
+
+    if Freeze_Train:
+        freeze_layers = 17
+        for i in range(freeze_layers): model.layers[i].trainable = False
+        print('Freeze the first {} layers of total {} layers.'.format(freeze_layers, len(model.layers)))
+
+    #------------------------------------------------------#
+    #   主干特征提取网络特征通用，冻结训练可以加快训练速度
+    #   也可以在训练初期防止权值被破坏。
+    #   Init_Epoch为起始世代
+    #   Freeze_Epoch为冻结训练的世代
+    #   Unfreeze_Epoch总训练世代
+    #   提示OOM或者显存不足请调小Batch_size
+    #------------------------------------------------------#
+    if True:
+        batch_size  = Freeze_batch_size
+        lr          = Freeze_lr
+        start_epoch = Init_Epoch
+        end_epoch   = Freeze_Epoch
+
+        epoch_step          = num_train // batch_size
+        epoch_step_val      = num_val // batch_size
+
+        if epoch_step == 0 or epoch_step_val == 0:
+            raise ValueError('数据集过小，无法进行训练，请扩充数据集。')
+
+        train_dataloader    = SSDDatasets(train_lines, input_shape, anchors, batch_size, num_classes, train = True)
+        val_dataloader      = SSDDatasets(val_lines, input_shape, anchors, batch_size, num_classes, train = False)
+
+        print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))
+        if eager:
+            gen         = tf.data.Dataset.from_generator(partial(train_dataloader.generate), (tf.float32, tf.float32))
+            gen_val     = tf.data.Dataset.from_generator(partial(val_dataloader.generate), (tf.float32, tf.float32))
+
+            gen     = gen.shuffle(buffer_size = batch_size).prefetch(buffer_size = batch_size)
+            gen_val = gen_val.shuffle(buffer_size = batch_size).prefetch(buffer_size = batch_size)
+
+            lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
+                initial_learning_rate = lr, decay_steps = epoch_step, decay_rate=0.94, staircase=True)
+            
+            optimizer = tf.keras.optimizers.Adam(learning_rate = lr_schedule)
+            for epoch in range(start_epoch, end_epoch):
+                fit_one_epoch(model, multiloss, loss_history, optimizer, epoch, epoch_step, epoch_step_val, gen, gen_val, 
+                            end_epoch)
+        else:
+            model.compile(optimizer=Adam(lr = lr), loss = MultiboxLoss(num_classes, neg_pos_ratio=3.0).compute_loss)
+            model.fit_generator(
+                generator           = train_dataloader,
+                steps_per_epoch     = epoch_step,
+                validation_data     = val_dataloader,
+                validation_steps    = epoch_step_val,
+                epochs              = end_epoch,
+                initial_epoch       = start_epoch,
+                use_multiprocessing = True if num_workers > 1 else False,
+                workers             = num_workers,
+                callbacks           = [logging, checkpoint, reduce_lr, early_stopping, loss_history]
+            )
+
+
+    if Freeze_Train:
+        for i in range(freeze_layers): model.layers[i].trainable = True
+
+    if True:
+        batch_size  = Unfreeze_batch_size
+        lr          = Unfreeze_lr
+        start_epoch = Freeze_Epoch
+        end_epoch   = UnFreeze_Epoch
+
+        epoch_step          = num_train // batch_size
+        epoch_step_val      = num_val // batch_size
+
+        if epoch_step == 0 or epoch_step_val == 0:
+            raise ValueError('数据集过小，无法进行训练，请扩充数据集。')
+
+        train_dataloader    = SSDDatasets(train_lines, input_shape, anchors, batch_size, num_classes, train = True)
+        val_dataloader      = SSDDatasets(val_lines, input_shape, anchors, batch_size, num_classes, train = False)
+
+        print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))
+        if eager:
+            gen         = tf.data.Dataset.from_generator(partial(train_dataloader.generate), (tf.float32, tf.float32))
+            gen_val     = tf.data.Dataset.from_generator(partial(val_dataloader.generate), (tf.float32, tf.float32))
+
+            gen     = gen.shuffle(buffer_size = batch_size).prefetch(buffer_size = batch_size)
+            gen_val = gen_val.shuffle(buffer_size = batch_size).prefetch(buffer_size = batch_size)
+
+            lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
+                initial_learning_rate = lr, decay_steps = epoch_step, decay_rate=0.94, staircase=True)
+            
+            optimizer = tf.keras.optimizers.Adam(learning_rate = lr_schedule)
+            for epoch in range(start_epoch, end_epoch):
+                fit_one_epoch(model, multiloss, loss_history, optimizer, epoch, epoch_step, epoch_step_val, gen, gen_val, 
+                            end_epoch)
+        else:
+            model.compile(optimizer=Adam(lr = lr), loss = MultiboxLoss(num_classes, neg_pos_ratio=3.0).compute_loss)
+            model.fit_generator(
+                generator           = train_dataloader,
+                steps_per_epoch     = epoch_step,
+                validation_data     = val_dataloader,
+                validation_steps    = epoch_step_val,
+                epochs              = end_epoch,
+                initial_epoch       = start_epoch,
+                use_multiprocessing = True if num_workers > 1 else False,
+                workers             = num_workers,
+                callbacks           = [logging, checkpoint, reduce_lr, early_stopping, loss_history]
+            )
--- a/TensorFlow2x/ComputeVision/Detection/SSD/utils/__init__.py
+++ b/TensorFlow2x/ComputeVision/Detection/SSD/utils/__init__.py
+#
\ No newline at end of file
--- a/TensorFlow2x/ComputeVision/Detection/SSD/utils/__pycache__/__init__.cpython-36.pyc
+++ b/TensorFlow2x/ComputeVision/Detection/SSD/utils/__pycache__/__init__.cpython-36.pyc
--- a/TensorFlow2x/ComputeVision/Detection/SSD/utils/__pycache__/anchors.cpython-36.pyc
+++ b/TensorFlow2x/ComputeVision/Detection/SSD/utils/__pycache__/anchors.cpython-36.pyc
--- a/TensorFlow2x/ComputeVision/Detection/SSD/utils/__pycache__/callbacks.cpython-36.pyc
+++ b/TensorFlow2x/ComputeVision/Detection/SSD/utils/__pycache__/callbacks.cpython-36.pyc
--- a/TensorFlow2x/ComputeVision/Detection/SSD/utils/__pycache__/dataloader.cpython-36.pyc
+++ b/TensorFlow2x/ComputeVision/Detection/SSD/utils/__pycache__/dataloader.cpython-36.pyc
--- a/TensorFlow2x/ComputeVision/Detection/SSD/utils/__pycache__/utils.cpython-36.pyc
+++ b/TensorFlow2x/ComputeVision/Detection/SSD/utils/__pycache__/utils.cpython-36.pyc
--- a/TensorFlow2x/ComputeVision/Detection/SSD/utils/__pycache__/utils_bbox.cpython-36.pyc
+++ b/TensorFlow2x/ComputeVision/Detection/SSD/utils/__pycache__/utils_bbox.cpython-36.pyc
--- a/TensorFlow2x/ComputeVision/Detection/SSD/utils/__pycache__/utils_fit.cpython-36.pyc
+++ b/TensorFlow2x/ComputeVision/Detection/SSD/utils/__pycache__/utils_fit.cpython-36.pyc
--- a/TensorFlow2x/ComputeVision/Detection/SSD/utils/anchors.py
+++ b/TensorFlow2x/ComputeVision/Detection/SSD/utils/anchors.py
+import numpy as np
+
+
+class AnchorBox():
+    def __init__(self, input_shape, min_size, max_size=None, aspect_ratios=None, flip=True):
+        self.input_shape = input_shape
+
+        self.min_size = min_size
+        self.max_size = max_size
+
+        self.aspect_ratios = []
+        for ar in aspect_ratios:
+            self.aspect_ratios.append(ar)
+            self.aspect_ratios.append(1.0 / ar)
+
+    def call(self, layer_shape, mask=None):
+        # --------------------------------- #
+        #   获取输入进来的特征层的宽和高
+        #   比如38x38
+        # --------------------------------- #
+        layer_height    = layer_shape[0]
+        layer_width     = layer_shape[1]
+        # --------------------------------- #
+        #   获取输入进来的图片的宽和高
+        #   比如300x300
+        # --------------------------------- #
+        img_height  = self.input_shape[0]
+        img_width   = self.input_shape[1]
+
+        box_widths  = []
+        box_heights = []
+        # --------------------------------- #
+        #   self.aspect_ratios一般有两个值
+        #   [1, 1, 2, 1/2]
+        #   [1, 1, 2, 1/2, 3, 1/3]
+        # --------------------------------- #
+        for ar in self.aspect_ratios:
+            # 首先添加一个较小的正方形
+            if ar == 1 and len(box_widths) == 0:
+                box_widths.append(self.min_size)
+                box_heights.append(self.min_size)
+            # 然后添加一个较大的正方形
+            elif ar == 1 and len(box_widths) > 0:
+                box_widths.append(np.sqrt(self.min_size * self.max_size))
+                box_heights.append(np.sqrt(self.min_size * self.max_size))
+            # 然后添加长方形
+            elif ar != 1:
+                box_widths.append(self.min_size * np.sqrt(ar))
+                box_heights.append(self.min_size / np.sqrt(ar))
+
+        # --------------------------------- #
+        #   获得所有先验框的宽高1/2
+        # --------------------------------- #
+        box_widths  = 0.5 * np.array(box_widths)
+        box_heights = 0.5 * np.array(box_heights)
+
+        # --------------------------------- #
+        #   每一个特征层对应的步长
+        # --------------------------------- #
+        step_x = img_width / layer_width
+        step_y = img_height / layer_height
+
+        # --------------------------------- #
+        #   生成网格中心
+        # --------------------------------- #
+        linx = np.linspace(0.5 * step_x, img_width - 0.5 * step_x,
+                           layer_width)
+        liny = np.linspace(0.5 * step_y, img_height - 0.5 * step_y,
+                           layer_height)
+        centers_x, centers_y = np.meshgrid(linx, liny)
+        centers_x = centers_x.reshape(-1, 1)
+        centers_y = centers_y.reshape(-1, 1)
+
+        # 每一个先验框需要两个(centers_x, centers_y)，前一个用来计算左上角，后一个计算右下角
+        num_anchors_ = len(self.aspect_ratios)
+        anchor_boxes = np.concatenate((centers_x, centers_y), axis=1)
+        anchor_boxes = np.tile(anchor_boxes, (1, 2 * num_anchors_))
+        
+        # 获得先验框的左上角和右下角
+        anchor_boxes[:, ::4]    -= box_widths
+        anchor_boxes[:, 1::4]   -= box_heights
+        anchor_boxes[:, 2::4]   += box_widths
+        anchor_boxes[:, 3::4]   += box_heights
+
+        # --------------------------------- #
+        #   将先验框变成小数的形式
+        #   归一化
+        # --------------------------------- #
+        anchor_boxes[:, ::2]    /= img_width
+        anchor_boxes[:, 1::2]   /= img_height
+        anchor_boxes = anchor_boxes.reshape(-1, 4)
+
+        anchor_boxes = np.minimum(np.maximum(anchor_boxes, 0.0), 1.0)
+        return anchor_boxes
+
+#---------------------------------------------------#
+#   用于计算共享特征层的大小
+#---------------------------------------------------#
+def get_img_output_length(height, width):
+    filter_sizes    = [3, 3, 3, 3, 3, 3, 3, 3]
+    padding         = [1, 1, 1, 1, 1, 1, 0, 0]
+    stride          = [2, 2, 2, 2, 2, 2, 1, 1]
+    feature_heights = []
+    feature_widths  = []
+
+    for i in range(len(filter_sizes)):
+        height  = (height + 2*padding[i] - filter_sizes[i]) // stride[i] + 1
+        width   = (width + 2*padding[i] - filter_sizes[i]) // stride[i] + 1
+        feature_heights.append(height)
+        feature_widths.append(width)
+    return np.array(feature_heights)[-6:], np.array(feature_widths)[-6:]
+
+def get_anchors(input_shape = [300,300], anchors_size = [30, 60, 111, 162, 213, 264, 315]):
+    feature_heights, feature_widths = get_img_output_length(input_shape[0], input_shape[1])
+    aspect_ratios = [[1, 2], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]]
+    anchors = []
+    for i in range(len(feature_heights)):
+        anchors.append(AnchorBox(input_shape, anchors_size[i], max_size = anchors_size[i+1], 
+                    aspect_ratios = aspect_ratios[i]).call([feature_heights[i], feature_widths[i]]))
+
+    anchors = np.concatenate(anchors, axis=0)
+    return anchors
+
+if __name__ == '__main__':
+    import matplotlib.pyplot as plt
+    class AnchorBox_for_Vision():
+        def __init__(self, input_shape, min_size, max_size=None, aspect_ratios=None, flip=True):
+            # 获得输入图片的大小，300x300
+            self.input_shape = input_shape
+
+            # 先验框的短边
+            self.min_size = min_size
+            # 先验框的长边
+            self.max_size = max_size
+
+            # [1, 2] => [1, 1, 2, 1/2]
+            # [1, 2, 3] => [1, 1, 2, 1/2, 3, 1/3]
+            self.aspect_ratios = []
+            for ar in aspect_ratios:
+                self.aspect_ratios.append(ar)
+                self.aspect_ratios.append(1.0 / ar)
+
+        def call(self, layer_shape, mask=None):
+            # --------------------------------- #
+            #   获取输入进来的特征层的宽和高
+            #   比如3x3
+            # --------------------------------- #
+            layer_height    = layer_shape[0]
+            layer_width     = layer_shape[1]
+            # --------------------------------- #
+            #   获取输入进来的图片的宽和高
+            #   比如300x300
+            # --------------------------------- #
+            img_height  = self.input_shape[0]
+            img_width   = self.input_shape[1]
+            
+            box_widths  = []
+            box_heights = []
+            # --------------------------------- #
+            #   self.aspect_ratios一般有两个值
+            #   [1, 1, 2, 1/2]
+            #   [1, 1, 2, 1/2, 3, 1/3]
+            # --------------------------------- #
+            for ar in self.aspect_ratios:
+                # 首先添加一个较小的正方形
+                if ar == 1 and len(box_widths) == 0:
+                    box_widths.append(self.min_size)
+                    box_heights.append(self.min_size)
+                # 然后添加一个较大的正方形
+                elif ar == 1 and len(box_widths) > 0:
+                    box_widths.append(np.sqrt(self.min_size * self.max_size))
+                    box_heights.append(np.sqrt(self.min_size * self.max_size))
+                # 然后添加长方形
+                elif ar != 1:
+                    box_widths.append(self.min_size * np.sqrt(ar))
+                    box_heights.append(self.min_size / np.sqrt(ar))
+
+            print("box_widths:", box_widths)
+            print("box_heights:", box_heights)
+            
+            # --------------------------------- #
+            #   获得所有先验框的宽高1/2
+            # --------------------------------- #
+            box_widths  = 0.5 * np.array(box_widths)
+            box_heights = 0.5 * np.array(box_heights)
+
+            # --------------------------------- #
+            #   每一个特征层对应的步长
+            #   3x3的步长为100
+            # --------------------------------- #
+            step_x = img_width / layer_width
+            step_y = img_height / layer_height
+
+            # --------------------------------- #
+            #   生成网格中心
+            # --------------------------------- #
+            linx = np.linspace(0.5 * step_x, img_width - 0.5 * step_x, layer_width)
+            liny = np.linspace(0.5 * step_y, img_height - 0.5 * step_y, layer_height)
+            # 构建网格
+            centers_x, centers_y = np.meshgrid(linx, liny)
+            centers_x = centers_x.reshape(-1, 1)
+            centers_y = centers_y.reshape(-1, 1)
+
+            if layer_height == 3:
+                fig = plt.figure()
+                ax = fig.add_subplot(111)
+                plt.ylim(-50,350)
+                plt.xlim(-50,350)
+                plt.scatter(centers_x,centers_y)
+
+            # 每一个先验框需要两个(centers_x, centers_y)，前一个用来计算左上角，后一个计算右下角
+            num_anchors_ = len(self.aspect_ratios)
+            anchor_boxes = np.concatenate((centers_x, centers_y), axis=1)
+            anchor_boxes = np.tile(anchor_boxes, (1, 2 * num_anchors_))
+            
+            # 获得先验框的左上角和右下角
+            anchor_boxes[:, ::4]    -= box_widths
+            anchor_boxes[:, 1::4]   -= box_heights
+            anchor_boxes[:, 2::4]   += box_widths
+            anchor_boxes[:, 3::4]   += box_heights
+
+            print(np.shape(anchor_boxes))
+            if layer_height == 3:
+                rect1 = plt.Rectangle([anchor_boxes[4, 0],anchor_boxes[4, 1]],box_widths[0]*2,box_heights[0]*2,color="r",fill=False)
+                rect2 = plt.Rectangle([anchor_boxes[4, 4],anchor_boxes[4, 5]],box_widths[1]*2,box_heights[1]*2,color="r",fill=False)
+                rect3 = plt.Rectangle([anchor_boxes[4, 8],anchor_boxes[4, 9]],box_widths[2]*2,box_heights[2]*2,color="r",fill=False)
+                rect4 = plt.Rectangle([anchor_boxes[4, 12],anchor_boxes[4, 13]],box_widths[3]*2,box_heights[3]*2,color="r",fill=False)
+                
+                ax.add_patch(rect1)
+                ax.add_patch(rect2)
+                ax.add_patch(rect3)
+                ax.add_patch(rect4)
+
+                plt.show()
+            # --------------------------------- #
+            #   将先验框变成小数的形式
+            #   归一化
+            # --------------------------------- #
+            anchor_boxes[:, ::2]    /= img_width
+            anchor_boxes[:, 1::2]   /= img_height
+            anchor_boxes = anchor_boxes.reshape(-1, 4)
+
+            anchor_boxes = np.minimum(np.maximum(anchor_boxes, 0.0), 1.0)
+            return anchor_boxes
+
+    # 输入图片大小为300, 300
+    input_shape     = [300, 300] 
+    # 指定先验框的大小，即宽高
+    anchors_size    = [30, 60, 111, 162, 213, 264, 315]
+    # feature_heights   [38, 19, 10, 5, 3, 1]
+    # feature_widths    [38, 19, 10, 5, 3, 1]
+    feature_heights, feature_widths = get_img_output_length(input_shape[0], input_shape[1])
+    # 对先验框的数量进行一个指定 4，6
+    aspect_ratios                   = [[1, 2], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]]
+
+    anchors = []
+    for i in range(len(feature_heights)):
+        anchors.append(AnchorBox_for_Vision(input_shape, anchors_size[i], max_size = anchors_size[i+1], 
+                    aspect_ratios = aspect_ratios[i]).call([feature_heights[i], feature_widths[i]]))
+
+    anchors = np.concatenate(anchors, axis=0)
+    print(np.shape(anchors))
--- a/TensorFlow2x/ComputeVision/Detection/SSD/utils/callbacks.py
+++ b/TensorFlow2x/ComputeVision/Detection/SSD/utils/callbacks.py
+import os
+import warnings
+
+import matplotlib
+matplotlib.use('Agg')
+from matplotlib import pyplot as plt
+import numpy as np
+import scipy.signal
+from tensorflow import keras
+from tensorflow.keras import backend as K
+
+
+class LossHistory(keras.callbacks.Callback):
+    def __init__(self, log_dir):
+        import datetime
+        curr_time = datetime.datetime.now()
+        time_str = datetime.datetime.strftime(curr_time,'%Y_%m_%d_%H_%M_%S')
+        self.log_dir    = log_dir
+        self.time_str   = time_str
+        self.save_path  = os.path.join(self.log_dir, "loss_" + str(self.time_str))  
+        self.losses     = []
+        self.val_loss   = []
+        
+        try:
+            os.makedirs(self.save_path)
+        except OSError:
+            pass
+
+    def on_epoch_end(self, batch, logs={}):
+        self.losses.append(logs.get('loss'))
+        self.val_loss.append(logs.get('val_loss'))
+        with open(os.path.join(self.save_path, "epoch_loss_" + str(self.time_str) + ".txt"), 'a') as f:
+            f.write(str(logs.get('loss')))
+            f.write("\n")
+        with open(os.path.join(self.save_path, "epoch_val_loss_" + str(self.time_str) + ".txt"), 'a') as f:
+            f.write(str(logs.get('val_loss')))
+            f.write("\n")
+        self.loss_plot()
+
+    def loss_plot(self):
+        iters = range(len(self.losses))
+
+        plt.figure()
+        plt.plot(iters, self.losses, 'red', linewidth = 2, label='train loss')
+        plt.plot(iters, self.val_loss, 'coral', linewidth = 2, label='val loss')
+        try:
+            if len(self.losses) < 25:
+                num = 5
+            else:
+                num = 15
+            
+            plt.plot(iters, scipy.signal.savgol_filter(self.losses, num, 3), 'green', linestyle = '--', linewidth = 2, label='smooth train loss')
+            plt.plot(iters, scipy.signal.savgol_filter(self.val_loss, num, 3), '#8B4513', linestyle = '--', linewidth = 2, label='smooth val loss')
+        except:
+            pass
+
+        plt.grid(True)
+        plt.xlabel('Epoch')
+        plt.ylabel('Loss')
+        plt.title('A Loss Curve')
+        plt.legend(loc="upper right")
+
+        plt.savefig(os.path.join(self.save_path, "epoch_loss_" + str(self.time_str) + ".png"))
+
+        plt.cla()
+        plt.close("all")
+
+class ExponentDecayScheduler(keras.callbacks.Callback):
+    def __init__(self,
+                 decay_rate,
+                 verbose=0):
+        super(ExponentDecayScheduler, self).__init__()
+        self.decay_rate         = decay_rate
+        self.verbose            = verbose
+        self.learning_rates     = []
+
+    def on_epoch_end(self, batch, logs=None):
+        learning_rate = K.get_value(self.model.optimizer.lr) * self.decay_rate
+        K.set_value(self.model.optimizer.lr, learning_rate)
+        if self.verbose > 0:
+            print('Setting learning rate to %s.' % (learning_rate))
+
+class ModelCheckpoint(keras.callbacks.Callback):
+    def __init__(self, filepath, monitor='val_loss', verbose=0,
+                 save_best_only=False, save_weights_only=False,
+                 mode='auto', period=1):
+        super(ModelCheckpoint, self).__init__()
+        self.monitor = monitor
+        self.verbose = verbose
+        self.filepath = filepath
+        self.save_best_only = save_best_only
+        self.save_weights_only = save_weights_only
+        self.period = period
+        self.epochs_since_last_save = 0
+
+        if mode not in ['auto', 'min', 'max']:
+            warnings.warn('ModelCheckpoint mode %s is unknown, '
+                          'fallback to auto mode.' % (mode),
+                          RuntimeWarning)
+            mode = 'auto'
+
+        if mode == 'min':
+            self.monitor_op = np.less
+            self.best = np.Inf
+        elif mode == 'max':
+            self.monitor_op = np.greater
+            self.best = -np.Inf
+        else:
+            if 'acc' in self.monitor or self.monitor.startswith('fmeasure'):
+                self.monitor_op = np.greater
+                self.best = -np.Inf
+            else:
+                self.monitor_op = np.less
+                self.best = np.Inf
+
+    def on_epoch_end(self, epoch, logs=None):
+        logs = logs or {}
+        self.epochs_since_last_save += 1
+        if self.epochs_since_last_save >= self.period:
+            self.epochs_since_last_save = 0
+            filepath = self.filepath.format(epoch=epoch + 1, **logs)
+            if self.save_best_only:
+                current = logs.get(self.monitor)
+                if current is None:
+                    warnings.warn('Can save best model only with %s available, '
+                                  'skipping.' % (self.monitor), RuntimeWarning)
+                else:
+                    if self.monitor_op(current, self.best):
+                        if self.verbose > 0:
+                            print('\nEpoch %05d: %s improved from %0.5f to %0.5f,'
+                                  ' saving model to %s'
+                                  % (epoch + 1, self.monitor, self.best,
+                                     current, filepath))
+                        self.best = current
+                        if self.save_weights_only:
+                            self.model.save_weights(filepath, overwrite=True)
+                        else:
+                            self.model.save(filepath, overwrite=True)
+                    else:
+                        if self.verbose > 0:
+                            print('\nEpoch %05d: %s did not improve' %
+                                  (epoch + 1, self.monitor))
+            else:
+                if self.verbose > 0:
+                    print('\nEpoch %05d: saving model to %s' % (epoch + 1, filepath))
+                if self.save_weights_only:
+                    self.model.save_weights(filepath, overwrite=True)
+                else:
+                    self.model.save(filepath, overwrite=True)
--- a/TensorFlow2x/ComputeVision/Detection/SSD/utils/dataloader.py
+++ b/TensorFlow2x/ComputeVision/Detection/SSD/utils/dataloader.py
+import math
+from random import shuffle
+
+import cv2
+import numpy as np
+from PIL import Image
+from tensorflow import keras
+from tensorflow.keras.applications.imagenet_utils import preprocess_input
+
+from utils.utils import cvtColor
+
+
+class SSDDatasets(keras.utils.Sequence):
+    def __init__(self, annotation_lines, input_shape, anchors, batch_size, num_classes, train, overlap_threshold = 0.5):
+        self.annotation_lines   = annotation_lines
+        self.length             = len(self.annotation_lines)
+        
+        self.input_shape        = input_shape
+        self.anchors            = anchors
+        self.num_anchors        = len(anchors)
+        self.batch_size         = batch_size
+        self.num_classes        = num_classes
+        self.train              = train
+        self.overlap_threshold  = overlap_threshold
+
+    def __len__(self):
+        return math.ceil(len(self.annotation_lines) / float(self.batch_size))
+
+    def __getitem__(self, index):
+        image_data  = []
+        box_data    = []
+        for i in range(index * self.batch_size, (index + 1) * self.batch_size):  
+            i = i % self.length
+            #---------------------------------------------------#
+            #   训练时进行数据的随机增强
+            #   验证时不进行数据的随机增强
+            #---------------------------------------------------#
+            image, box  = self.get_random_data(self.annotation_lines[i], self.input_shape, random = self.train)
+            if len(box)!=0:
+                boxes               = np.array(box[:,:4] , dtype=np.float32)
+                boxes[:, [0, 2]]    = boxes[:,[0, 2]] / self.input_shape[1]
+                boxes[:, [1, 3]]    = boxes[:,[1, 3]] / self.input_shape[0]
+                one_hot_label   = np.eye(self.num_classes - 1)[np.array(box[:,4], np.int32)]
+                box             = np.concatenate([boxes, one_hot_label], axis=-1)
+            box = self.assign_boxes(box)
+
+            image_data.append(image)               
+            box_data.append(box)
+            print(preprocess_input(np.array(image_data)), np.array(box_data))
+            break
+
+        return preprocess_input(np.array(image_data)), np.array(box_data)
+
+    def generate(self):
+        i = 0
+        while True:
+            image_data  = []
+            box_data    = []
+            for b in range(self.batch_size):
+                if i==0:
+                    np.random.shuffle(self.annotation_lines)
+                #---------------------------------------------------#
+                #   训练时进行数据的随机增强
+                #   验证时不进行数据的随机增强
+                #---------------------------------------------------#
+                image, box  = self.get_random_data(self.annotation_lines[i], self.input_shape, random = self.train)
+                i           = (i+1) % self.length
+                if len(box)!=0:
+                    boxes               = np.array(box[:,:4] , dtype=np.float32)
+                    boxes[:, [0, 2]]    = boxes[:,[0, 2]] / self.input_shape[1]
+                    boxes[:, [1, 3]]    = boxes[:,[1, 3]] / self.input_shape[0]
+                    one_hot_label   = np.eye(self.num_classes - 1)[np.array(box[:,4], np.int32)]
+                    box             = np.concatenate([boxes, one_hot_label], axis=-1)
+                box = self.assign_boxes(box)
+
+                image_data.append(image)               
+                box_data.append(box)
+            yield preprocess_input(np.array(image_data)), np.array(box_data)
+
+    def rand(self, a=0, b=1):
+        return np.random.rand()*(b-a) + a
+
+    def get_random_data(self, annotation_line, input_shape, jitter=.3, hue=.1, sat=1.5, val=1.5, random=True):
+        line = annotation_line.split()
+        #------------------------------#
+        #   读取图像并转换成RGB图像
+        #------------------------------#
+        image   = Image.open(line[0])
+        image   = cvtColor(image)
+        #------------------------------#
+        #   获得图像的高宽与目标高宽
+        #------------------------------#
+        iw, ih  = image.size
+        h, w    = input_shape
+        #------------------------------#
+        #   获得预测框
+        #------------------------------#
+        box     = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]])
+
+        if not random:
+            scale = min(w/iw, h/ih)
+            nw = int(iw*scale)
+            nh = int(ih*scale)
+            dx = (w-nw)//2
+            dy = (h-nh)//2
+
+            #---------------------------------#
+            #   将图像多余的部分加上灰条
+            #---------------------------------#
+            image       = image.resize((nw,nh), Image.BICUBIC)
+            new_image   = Image.new('RGB', (w,h), (128,128,128))
+            new_image.paste(image, (dx, dy))
+            image_data  = np.array(new_image, np.float32)
+
+            #---------------------------------#
+            #   对真实框进行调整
+            #---------------------------------#
+            if len(box)>0:
+                np.random.shuffle(box)
+                box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
+                box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
+                box[:, 0:2][box[:, 0:2]<0] = 0
+                box[:, 2][box[:, 2]>w] = w
+                box[:, 3][box[:, 3]>h] = h
+                box_w = box[:, 2] - box[:, 0]
+                box_h = box[:, 3] - box[:, 1]
+                box = box[np.logical_and(box_w>1, box_h>1)] # discard invalid box
+
+            return image_data, box
+                
+        #------------------------------------------#
+        #   对图像进行缩放并且进行长和宽的扭曲
+        #------------------------------------------#
+        new_ar = w/h * self.rand(1-jitter,1+jitter) / self.rand(1-jitter,1+jitter)
+        scale = self.rand(.25, 2)
+        if new_ar < 1:
+            nh = int(scale*h)
+            nw = int(nh*new_ar)
+        else:
+            nw = int(scale*w)
+            nh = int(nw/new_ar)
+        image = image.resize((nw,nh), Image.BICUBIC)
+
+        #------------------------------------------#
+        #   将图像多余的部分加上灰条
+        #------------------------------------------#
+        dx = int(self.rand(0, w-nw))
+        dy = int(self.rand(0, h-nh))
+        new_image = Image.new('RGB', (w,h), (128,128,128))
+        new_image.paste(image, (dx, dy))
+        image = new_image
+
+        #------------------------------------------#
+        #   翻转图像
+        #------------------------------------------#
+        flip = self.rand()<.5
+        if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)
+
+        #------------------------------------------#
+        #   色域扭曲
+        #------------------------------------------#
+        hue = self.rand(-hue, hue)
+        sat = self.rand(1, sat) if self.rand()<.5 else 1/self.rand(1, sat)
+        val = self.rand(1, val) if self.rand()<.5 else 1/self.rand(1, val)
+        x = cv2.cvtColor(np.array(image,np.float32)/255, cv2.COLOR_RGB2HSV)
+        x[..., 0] += hue*360
+        x[..., 0][x[..., 0]>1] -= 1
+        x[..., 0][x[..., 0]<0] += 1
+        x[..., 1] *= sat
+        x[..., 2] *= val
+        x[x[:,:, 0]>360, 0] = 360
+        x[:, :, 1:][x[:, :, 1:]>1] = 1
+        x[x<0] = 0
+        image_data = cv2.cvtColor(x, cv2.COLOR_HSV2RGB)*255 # numpy array, 0 to 1
+
+        #---------------------------------#
+        #   对真实框进行调整
+        #---------------------------------#
+        if len(box)>0:
+            np.random.shuffle(box)
+            box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
+            box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
+            if flip: box[:, [0,2]] = w - box[:, [2,0]]
+            box[:, 0:2][box[:, 0:2]<0] = 0
+            box[:, 2][box[:, 2]>w] = w
+            box[:, 3][box[:, 3]>h] = h
+            box_w = box[:, 2] - box[:, 0]
+            box_h = box[:, 3] - box[:, 1]
+            box = box[np.logical_and(box_w>1, box_h>1)] 
+        
+        return image_data, box
+
+    def on_epoch_begin(self):
+        shuffle(self.annotation_lines)
+
+    def iou(self, box):
+        #---------------------------------------------#
+        #   计算出每个真实框与所有的先验框的iou
+        #   判断真实框与先验框的重合情况
+        #---------------------------------------------#
+        inter_upleft    = np.maximum(self.anchors[:, :2], box[:2])
+        inter_botright  = np.minimum(self.anchors[:, 2:4], box[2:])
+
+        inter_wh    = inter_botright - inter_upleft
+        inter_wh    = np.maximum(inter_wh, 0)
+        inter       = inter_wh[:, 0] * inter_wh[:, 1]
+        #---------------------------------------------# 
+        #   真实框的面积
+        #---------------------------------------------#
+        area_true = (box[2] - box[0]) * (box[3] - box[1])
+        #---------------------------------------------#
+        #   先验框的面积
+        #---------------------------------------------#
+        area_gt = (self.anchors[:, 2] - self.anchors[:, 0])*(self.anchors[:, 3] - self.anchors[:, 1])
+        #---------------------------------------------#
+        #   计算iou
+        #---------------------------------------------#
+        union = area_true + area_gt - inter
+
+        iou = inter / union
+        return iou
+
+    def encode_box(self, box, return_iou=True, variances = [0.1, 0.1, 0.2, 0.2]):
+        #---------------------------------------------#
+        #   计算当前真实框和先验框的重合情况
+        #   iou [self.num_anchors]
+        #   encoded_box [self.num_anchors, 5]
+        #---------------------------------------------#
+        iou = self.iou(box)
+        encoded_box = np.zeros((self.num_anchors, 4 + return_iou))
+        
+        #---------------------------------------------#
+        #   找到每一个真实框，重合程度较高的先验框
+        #   真实框可以由这个先验框来负责预测
+        #---------------------------------------------#
+        assign_mask = iou > self.overlap_threshold
+
+        #---------------------------------------------#
+        #   如果没有一个先验框重合度大于self.overlap_threshold
+        #   则选择重合度最大的为正样本
+        #---------------------------------------------#
+        if not assign_mask.any():
+            assign_mask[iou.argmax()] = True
+        
+        #---------------------------------------------#
+        #   利用iou进行赋值 
+        #---------------------------------------------#
+        if return_iou:
+            encoded_box[:, -1][assign_mask] = iou[assign_mask]
+        
+        #---------------------------------------------#
+        #   找到对应的先验框
+        #---------------------------------------------#
+        assigned_anchors = self.anchors[assign_mask]
+
+        #---------------------------------------------#
+        #   逆向编码，将真实框转化为ssd预测结果的格式
+        #   先计算真实框的中心与长宽
+        #---------------------------------------------#
+        box_center  = 0.5 * (box[:2] + box[2:])
+        box_wh      = box[2:] - box[:2]
+        #---------------------------------------------#
+        #   再计算重合度较高的先验框的中心与长宽
+        #---------------------------------------------#
+        assigned_anchors_center = (assigned_anchors[:, 0:2] + assigned_anchors[:, 2:4]) * 0.5
+        assigned_anchors_wh     = (assigned_anchors[:, 2:4] - assigned_anchors[:, 0:2])
+        
+        #------------------------------------------------#
+        #   逆向求取ssd应该有的预测结果
+        #   先求取中心的预测结果，再求取宽高的预测结果
+        #   存在改变数量级的参数，默认为[0.1,0.1,0.2,0.2]
+        #------------------------------------------------#
+        encoded_box[:, :2][assign_mask] = box_center - assigned_anchors_center
+        encoded_box[:, :2][assign_mask] /= assigned_anchors_wh
+        encoded_box[:, :2][assign_mask] /= np.array(variances)[:2]
+
+        encoded_box[:, 2:4][assign_mask] = np.log(box_wh / assigned_anchors_wh)
+        encoded_box[:, 2:4][assign_mask] /= np.array(variances)[2:4]
+        return encoded_box.ravel()
+
+    def assign_boxes(self, boxes):
+        #---------------------------------------------------#
+        #   assignment分为3个部分
+        #   :4      的内容为网络应该有的回归预测结果
+        #   4:-1    的内容为先验框所对应的种类，默认为背景
+        #   -1      的内容为当前先验框是否包含目标
+        #---------------------------------------------------#
+        assignment          = np.zeros((self.num_anchors, 4 + self.num_classes + 1))
+        assignment[:, 4]    = 1.0
+        if len(boxes) == 0:
+            return assignment
+
+        # 对每一个真实框都进行iou计算
+        encoded_boxes   = np.apply_along_axis(self.encode_box, 1, boxes[:, :4])
+        #---------------------------------------------------#
+        #   在reshape后，获得的encoded_boxes的shape为：
+        #   [num_true_box, num_anchors, 4 + 1]
+        #   4是编码后的结果，1为iou
+        #---------------------------------------------------#
+        encoded_boxes   = encoded_boxes.reshape(-1, self.num_anchors, 5)
+        
+        #---------------------------------------------------#
+        #   [num_anchors]求取每一个先验框重合度最大的真实框
+        #---------------------------------------------------#
+        best_iou        = encoded_boxes[:, :, -1].max(axis=0)
+        best_iou_idx    = encoded_boxes[:, :, -1].argmax(axis=0)
+        best_iou_mask   = best_iou > 0
+        best_iou_idx    = best_iou_idx[best_iou_mask]
+        
+        #---------------------------------------------------#
+        #   计算一共有多少先验框满足需求
+        #---------------------------------------------------#
+        assign_num      = len(best_iou_idx)
+
+        # 将编码后的真实框取出
+        encoded_boxes   = encoded_boxes[:, best_iou_mask, :]
+        #---------------------------------------------------#
+        #   编码后的真实框的赋值
+        #---------------------------------------------------#
+        assignment[:, :4][best_iou_mask] = encoded_boxes[best_iou_idx,np.arange(assign_num),:4]
+        #----------------------------------------------------------#
+        #   4代表为背景的概率，设定为0，因为这些先验框有对应的物体
+        #----------------------------------------------------------#
+        assignment[:, 4][best_iou_mask]     = 0
+        assignment[:, 5:-1][best_iou_mask]  = boxes[best_iou_idx, 4:]
+        #----------------------------------------------------------#
+        #   -1表示先验框是否有对应的物体
+        #----------------------------------------------------------#
+        assignment[:, -1][best_iou_mask]    = 1
+        # 通过assign_boxes我们就获得了，输入进来的这张图片，应该有的预测结果是什么样子的
+        return assignment
--- a/TensorFlow2x/ComputeVision/Detection/SSD/utils/utils.py
+++ b/TensorFlow2x/ComputeVision/Detection/SSD/utils/utils.py
+import numpy as np
+from PIL import Image
+
+
+#---------------------------------------------------------#
+#   将图像转换成RGB图像，防止灰度图在预测时报错。
+#   代码仅仅支持RGB图像的预测，所有其它类型的图像都会转化成RGB
+#---------------------------------------------------------#
+def cvtColor(image):
+    if len(np.shape(image)) == 3 and np.shape(image)[2] == 3:
+        return image 
+    else:
+        image = image.convert('RGB')
+        return image 
+
+#---------------------------------------------------#
+#   对输入图像进行resize
+#---------------------------------------------------#
+def resize_image(image, size, letterbox_image):
+    iw, ih  = image.size
+    w, h    = size
+    if letterbox_image:
+        scale   = min(w/iw, h/ih)
+        nw      = int(iw*scale)
+        nh      = int(ih*scale)
+
+        image   = image.resize((nw,nh), Image.BICUBIC)
+        new_image = Image.new('RGB', size, (128,128,128))
+        new_image.paste(image, ((w-nw)//2, (h-nh)//2))
+    else:
+        new_image = image.resize((w, h), Image.BICUBIC)
+    return new_image
+
+#---------------------------------------------------#
+#   获得类
+#---------------------------------------------------#
+def get_classes(classes_path):
+    with open(classes_path, encoding='utf-8') as f:
+        class_names = f.readlines()
+    class_names = [c.strip() for c in class_names]
+    return class_names, len(class_names)
+
--- a/TensorFlow2x/ComputeVision/Detection/SSD/utils/utils_bbox.py
+++ b/TensorFlow2x/ComputeVision/Detection/SSD/utils/utils_bbox.py
+import numpy as np
+import tensorflow as tf
+import tensorflow.keras.backend as K
+
+
+class BBoxUtility(object):
+    def __init__(self, num_classes, nms_thresh=0.45, top_k=300):
+        self.num_classes    = num_classes
+        self._nms_thresh    = nms_thresh
+        self._top_k         = top_k
+        
+    def ssd_correct_boxes(self, box_xy, box_wh, input_shape, image_shape, letterbox_image):
+        #-----------------------------------------------------------------#
+        #   把y轴放前面是因为方便预测框和图像的宽高进行相乘
+        #-----------------------------------------------------------------#
+        box_yx = box_xy[..., ::-1]
+        box_hw = box_wh[..., ::-1]
+        input_shape = np.array(input_shape)
+        image_shape = np.array(image_shape)
+
+        if letterbox_image:
+            #-----------------------------------------------------------------#
+            #   这里求出来的offset是图像有效区域相对于图像左上角的偏移情况
+            #   new_shape指的是宽高缩放情况
+            #-----------------------------------------------------------------#
+            new_shape = np.round(image_shape * np.min(input_shape/image_shape))
+            offset  = (input_shape - new_shape)/2./input_shape
+            scale   = input_shape/new_shape
+
+            box_yx  = (box_yx - offset) * scale
+            box_hw *= scale
+
+        box_mins    = box_yx - (box_hw / 2.)
+        box_maxes   = box_yx + (box_hw / 2.)
+        boxes  = np.concatenate([box_mins[..., 0:1], box_mins[..., 1:2], box_maxes[..., 0:1], box_maxes[..., 1:2]], axis=-1)
+        boxes *= np.concatenate([image_shape, image_shape], axis=-1)
+        return boxes
+
+    def decode_boxes(self, mbox_loc, anchors, variances):
+        # 获得先验框的宽与高
+        anchor_width     = anchors[:, 2] - anchors[:, 0]
+        anchor_height    = anchors[:, 3] - anchors[:, 1]
+        # 获得先验框的中心点
+        anchor_center_x  = 0.5 * (anchors[:, 2] + anchors[:, 0])
+        anchor_center_y  = 0.5 * (anchors[:, 3] + anchors[:, 1])
+
+        # 真实框距离先验框中心的xy轴偏移情况
+        decode_bbox_center_x = mbox_loc[:, 0] * anchor_width * variances[0]
+        decode_bbox_center_x += anchor_center_x
+        decode_bbox_center_y = mbox_loc[:, 1] * anchor_height * variances[1]
+        decode_bbox_center_y += anchor_center_y
+        
+        # 真实框的宽与高的求取
+        decode_bbox_width   = np.exp(mbox_loc[:, 2] * variances[2])
+        decode_bbox_width   *= anchor_width
+        decode_bbox_height  = np.exp(mbox_loc[:, 3] * variances[3])
+        decode_bbox_height  *= anchor_height
+
+        # 获取真实框的左上角与右下角
+        decode_bbox_xmin = decode_bbox_center_x - 0.5 * decode_bbox_width
+        decode_bbox_ymin = decode_bbox_center_y - 0.5 * decode_bbox_height
+        decode_bbox_xmax = decode_bbox_center_x + 0.5 * decode_bbox_width
+        decode_bbox_ymax = decode_bbox_center_y + 0.5 * decode_bbox_height
+
+        # 真实框的左上角与右下角进行堆叠
+        decode_bbox = np.concatenate((decode_bbox_xmin[:, None],
+                                      decode_bbox_ymin[:, None],
+                                      decode_bbox_xmax[:, None],
+                                      decode_bbox_ymax[:, None]), axis=-1)
+        # 防止超出0与1
+        decode_bbox = np.minimum(np.maximum(decode_bbox, 0.0), 1.0)
+        return decode_bbox
+
+    def decode_box(self, predictions, anchors, image_shape, input_shape, letterbox_image, variances = [0.1, 0.1, 0.2, 0.2], confidence=0.5):
+        #---------------------------------------------------#
+        #   :4是回归预测结果
+        #---------------------------------------------------#
+        mbox_loc        = predictions[:, :, :4]
+        #---------------------------------------------------#
+        #   获得种类的置信度
+        #---------------------------------------------------#
+        mbox_conf       = predictions[:, :, 4:]
+
+        results = []
+        #----------------------------------------------------------------------------------------------------------------#
+        #   对每一张图片进行处理，由于在predict.py的时候，我们只输入一张图片，所以for i in range(len(mbox_loc))只进行一次
+        #----------------------------------------------------------------------------------------------------------------#
+        for i in range(len(mbox_loc)):
+            results.append([])
+            #--------------------------------#
+            #   利用回归结果对先验框进行解码
+            #--------------------------------#
+            decode_bbox = self.decode_boxes(mbox_loc[i], anchors, variances)
+
+            for c in range(1, self.num_classes):
+                #--------------------------------#
+                #   取出属于该类的所有框的置信度
+                #   判断是否大于门限
+                #--------------------------------#
+                c_confs     = mbox_conf[i, :, c]
+                c_confs_m   = c_confs > confidence
+                if len(c_confs[c_confs_m]) > 0:
+                    #-----------------------------------------#
+                    #   取出得分高于confidence的框
+                    #-----------------------------------------#
+                    boxes_to_process = decode_bbox[c_confs_m]
+                    confs_to_process = c_confs[c_confs_m]
+                    #-----------------------------------------#
+                    #   进行iou的非极大抑制
+                    #-----------------------------------------#
+                    idx = tf.image.non_max_suppression(tf.cast(boxes_to_process,tf.float32), tf.cast(confs_to_process,tf.float32),
+                                            self._top_k,
+                                            iou_threshold=self._nms_thresh).numpy()
+                    #-----------------------------------------#
+                    #   取出在非极大抑制中效果较好的内容
+                    #-----------------------------------------#
+                    good_boxes  = boxes_to_process[idx]
+                    confs       = confs_to_process[idx][:, None]
+                    labels      = (c - 1) * np.ones((len(idx), 1))
+                    #-----------------------------------------#
+                    #   将label、置信度、框的位置进行堆叠。
+                    #-----------------------------------------#
+                    c_pred      = np.concatenate((good_boxes, labels, confs), axis=1)
+                    # 添加进result里
+                    results[-1].extend(c_pred)
+
+            if len(results[-1]) > 0:
+                results[-1] = np.array(results[-1])
+                box_xy, box_wh = (results[-1][:, 0:2] + results[-1][:, 2:4])/2, results[-1][:, 2:4] - results[-1][:, 0:2]
+                results[-1][:, :4] = self.ssd_correct_boxes(box_xy, box_wh, input_shape, image_shape, letterbox_image)
+
+        return results