Commit c320b6ef authored by zhenyi's avatar zhenyi
Browse files

tf2 detection

parent 0fc002df
from tensorflow.keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D
def VGG16(input_tensor):
#----------------------------主干特征提取网络开始---------------------------#
# SSD结构,net字典
net = {}
# Block 1
net['input'] = input_tensor
# 300,300,3 -> 150,150,64
net['conv1_1'] = Conv2D(64, kernel_size=(3,3),
activation='relu',
padding='same',
name='conv1_1')(net['input'])
net['conv1_2'] = Conv2D(64, kernel_size=(3,3),
activation='relu',
padding='same',
name='conv1_2')(net['conv1_1'])
net['pool1'] = MaxPooling2D((2, 2), strides=(2, 2), padding='same',
name='pool1')(net['conv1_2'])
# Block 2
# 150,150,64 -> 75,75,128
net['conv2_1'] = Conv2D(128, kernel_size=(3,3),
activation='relu',
padding='same',
name='conv2_1')(net['pool1'])
net['conv2_2'] = Conv2D(128, kernel_size=(3,3),
activation='relu',
padding='same',
name='conv2_2')(net['conv2_1'])
net['pool2'] = MaxPooling2D((2, 2), strides=(2, 2), padding='same',
name='pool2')(net['conv2_2'])
# Block 3
# 75,75,128 -> 38,38,256
net['conv3_1'] = Conv2D(256, kernel_size=(3,3),
activation='relu',
padding='same',
name='conv3_1')(net['pool2'])
net['conv3_2'] = Conv2D(256, kernel_size=(3,3),
activation='relu',
padding='same',
name='conv3_2')(net['conv3_1'])
net['conv3_3'] = Conv2D(256, kernel_size=(3,3),
activation='relu',
padding='same',
name='conv3_3')(net['conv3_2'])
net['pool3'] = MaxPooling2D((2, 2), strides=(2, 2), padding='same',
name='pool3')(net['conv3_3'])
# Block 4
# 38,38,256 -> 19,19,512
net['conv4_1'] = Conv2D(512, kernel_size=(3,3),
activation='relu',
padding='same',
name='conv4_1')(net['pool3'])
net['conv4_2'] = Conv2D(512, kernel_size=(3,3),
activation='relu',
padding='same',
name='conv4_2')(net['conv4_1'])
net['conv4_3'] = Conv2D(512, kernel_size=(3,3),
activation='relu',
padding='same',
name='conv4_3')(net['conv4_2'])
net['pool4'] = MaxPooling2D((2, 2), strides=(2, 2), padding='same',
name='pool4')(net['conv4_3'])
# Block 5
# 19,19,512 -> 19,19,512
net['conv5_1'] = Conv2D(512, kernel_size=(3,3),
activation='relu',
padding='same',
name='conv5_1')(net['pool4'])
net['conv5_2'] = Conv2D(512, kernel_size=(3,3),
activation='relu',
padding='same',
name='conv5_2')(net['conv5_1'])
net['conv5_3'] = Conv2D(512, kernel_size=(3,3),
activation='relu',
padding='same',
name='conv5_3')(net['conv5_2'])
net['pool5'] = MaxPooling2D((3, 3), strides=(1, 1), padding='same',
name='pool5')(net['conv5_3'])
# FC6
# 19,19,512 -> 19,19,1024
net['fc6'] = Conv2D(1024, kernel_size=(3,3), dilation_rate=(6, 6),
activation='relu', padding='same',
name='fc6')(net['pool5'])
# x = Dropout(0.5, name='drop6')(x)
# FC7
# 19,19,1024 -> 19,19,1024
net['fc7'] = Conv2D(1024, kernel_size=(1,1), activation='relu',
padding='same', name='fc7')(net['fc6'])
# x = Dropout(0.5, name='drop7')(x)
# Block 6
# 19,19,512 -> 10,10,512
net['conv6_1'] = Conv2D(256, kernel_size=(1,1), activation='relu',
padding='same',
name='conv6_1')(net['fc7'])
net['conv6_2'] = ZeroPadding2D(padding=((1, 1), (1, 1)), name='conv6_padding')(net['conv6_1'])
net['conv6_2'] = Conv2D(512, kernel_size=(3,3), strides=(2, 2),
activation='relu',
name='conv6_2')(net['conv6_2'])
# Block 7
# 10,10,512 -> 5,5,256
net['conv7_1'] = Conv2D(128, kernel_size=(1,1), activation='relu',
padding='same',
name='conv7_1')(net['conv6_2'])
net['conv7_2'] = ZeroPadding2D(padding=((1, 1), (1, 1)), name='conv7_padding')(net['conv7_1'])
net['conv7_2'] = Conv2D(256, kernel_size=(3,3), strides=(2, 2),
activation='relu', padding='valid',
name='conv7_2')(net['conv7_2'])
# Block 8
# 5,5,256 -> 3,3,256
net['conv8_1'] = Conv2D(128, kernel_size=(1,1), activation='relu',
padding='same',
name='conv8_1')(net['conv7_2'])
net['conv8_2'] = Conv2D(256, kernel_size=(3,3), strides=(1, 1),
activation='relu', padding='valid',
name='conv8_2')(net['conv8_1'])
# Block 9
# 3,3,256 -> 1,1,256
net['conv9_1'] = Conv2D(128, kernel_size=(1,1), activation='relu',
padding='same',
name='conv9_1')(net['conv8_2'])
net['conv9_2'] = Conv2D(256, kernel_size=(3,3), strides=(1, 1),
activation='relu', padding='valid',
name='conv9_2')(net['conv9_1'])
#----------------------------主干特征提取网络结束---------------------------#
return net
from tensorflow.keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D
def VGG16(input_tensor):
#----------------------------主干特征提取网络开始---------------------------#
# SSD结构,net字典
net = {}
# Block 1
net['input'] = input_tensor
# 300,300,3 -> 150,150,64
net['conv1_1'] = Conv2D(64, kernel_size=(3,3),
activation='relu',
padding='same',
name='conv1_1')(net['input'])
net['conv1_2'] = Conv2D(64, kernel_size=(3,3),
activation='relu',
padding='same',
name='conv1_2')(net['conv1_1'])
net['pool1'] = MaxPooling2D((2, 2), strides=(2, 2), padding='same',
name='pool1')(net['conv1_2'])
# Block 2
# 150,150,64 -> 75,75,128
net['conv2_1'] = Conv2D(128, kernel_size=(3,3),
activation='relu',
padding='same',
name='conv2_1')(net['pool1'])
net['conv2_2'] = Conv2D(128, kernel_size=(3,3),
activation='relu',
padding='same',
name='conv2_2')(net['conv2_1'])
net['pool2'] = MaxPooling2D((2, 2), strides=(2, 2), padding='same',
name='pool2')(net['conv2_2'])
# Block 3
# 75,75,128 -> 38,38,256
net['conv3_1'] = Conv2D(256, kernel_size=(3,3),
activation='relu',
padding='same',
name='conv3_1')(net['pool2'])
net['conv3_2'] = Conv2D(256, kernel_size=(3,3),
activation='relu',
padding='same',
name='conv3_2')(net['conv3_1'])
net['conv3_3'] = Conv2D(256, kernel_size=(3,3),
activation='relu',
padding='same',
name='conv3_3')(net['conv3_2'])
net['pool3'] = MaxPooling2D((2, 2), strides=(2, 2), padding='same',
name='pool3')(net['conv3_3'])
# Block 4
# 38,38,256 -> 19,19,512
net['conv4_1'] = Conv2D(512, kernel_size=(3,3),
activation='relu',
padding='same',
name='conv4_1')(net['pool3'])
net['conv4_2'] = Conv2D(512, kernel_size=(3,3),
activation='relu',
padding='same',
name='conv4_2')(net['conv4_1'])
net['conv4_3'] = Conv2D(512, kernel_size=(3,3),
activation='relu',
padding='same',
name='conv4_3')(net['conv4_2'])
net['pool4'] = MaxPooling2D((2, 2), strides=(2, 2), padding='same',
name='pool4')(net['conv4_3'])
# Block 5
# 19,19,512 -> 19,19,512
net['conv5_1'] = Conv2D(512, kernel_size=(3,3),
activation='relu',
padding='same',
name='conv5_1')(net['pool4'])
net['conv5_2'] = Conv2D(512, kernel_size=(3,3),
activation='relu',
padding='same',
name='conv5_2')(net['conv5_1'])
net['conv5_3'] = Conv2D(512, kernel_size=(3,3),
activation='relu',
padding='same',
name='conv5_3')(net['conv5_2'])
net['pool5'] = MaxPooling2D((3, 3), strides=(1, 1), padding='same',
name='pool5')(net['conv5_3'])
# FC6
# 19,19,512 -> 19,19,1024
net['fc6'] = Conv2D(1024, kernel_size=(3,3), dilation_rate=(6, 6),
activation='relu', padding='same',
name='fc6')(net['pool5'])
# x = Dropout(0.5, name='drop6')(x)
# FC7
# 19,19,1024 -> 19,19,1024
net['fc7'] = Conv2D(1024, kernel_size=(1,1), activation='relu',
padding='same', name='fc7')(net['fc6'])
# x = Dropout(0.5, name='drop7')(x)
# Block 6
# 19,19,512 -> 10,10,512
net['conv6_1'] = Conv2D(256, kernel_size=(1,1), activation='relu',
padding='same',
name='conv6_1')(net['fc7'])
net['conv6_2'] = ZeroPadding2D(padding=((1, 1), (1, 1)), name='conv6_padding')(net['conv6_1'])
net['conv6_2'] = Conv2D(512, kernel_size=(3,3), strides=(2, 2),
activation='relu',
name='conv6_2')(net['conv6_2'])
# Block 7
# 10,10,512 -> 5,5,256
net['conv7_1'] = Conv2D(128, kernel_size=(1,1), activation='relu',
padding='same',
name='conv7_1')(net['conv6_2'])
net['conv7_2'] = ZeroPadding2D(padding=((1, 1), (1, 1)), name='conv7_padding')(net['conv7_1'])
net['conv7_2'] = Conv2D(256, kernel_size=(3,3), strides=(2, 2),
activation='relu', padding='valid',
name='conv7_2')(net['conv7_2'])
# Block 8
# 5,5,256 -> 3,3,256
net['conv8_1'] = Conv2D(128, kernel_size=(1,1), activation='relu',
padding='same',
name='conv8_1')(net['conv7_2'])
net['conv8_2'] = Conv2D(256, kernel_size=(3,3), strides=(1, 1),
activation='relu', padding='valid',
name='conv8_2')(net['conv8_1'])
# Block 9
# 3,3,256 -> 1,1,256
net['conv9_1'] = Conv2D(128, kernel_size=(1,1), activation='relu',
padding='same',
name='conv9_1')(net['conv8_2'])
net['conv9_2'] = Conv2D(256, kernel_size=(3,3), strides=(1, 1),
activation='relu', padding='valid',
name='conv9_2')(net['conv9_1'])
#----------------------------主干特征提取网络结束---------------------------#
return net
#----------------------------------------------------#
# 对视频中的predict.py进行了修改,
# 将单张图片预测、摄像头检测和FPS测试功能
# 整合到了一个py文件中,通过指定mode进行模式的修改。
#----------------------------------------------------#
import time
import cv2
import numpy as np
import tensorflow as tf
from PIL import Image
from ssd import SSD
gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
if __name__ == "__main__":
ssd = SSD()
#----------------------------------------------------------------------------------------------------------#
# mode用于指定测试的模式:
# 'predict'表示单张图片预测,如果想对预测过程进行修改,如保存图片,截取对象等,可以先看下方详细的注释
# 'video'表示视频检测,可调用摄像头或者视频进行检测,详情查看下方注释。
# 'fps'表示测试fps,使用的图片是img里面的street.jpg,详情查看下方注释。
# 'dir_predict'表示遍历文件夹进行检测并保存。默认遍历img文件夹,保存img_out文件夹,详情查看下方注释。
#----------------------------------------------------------------------------------------------------------#
mode = "fps"
#-------------------------------------------------------------------------#
# crop指定了是否在单张图片预测后对目标进行截取
# crop仅在mode='predict'时有效
#-------------------------------------------------------------------------#
crop = False
#----------------------------------------------------------------------------------------------------------#
# video_path用于指定视频的路径,当video_path=0时表示检测摄像头
# 想要检测视频,则设置如video_path = "xxx.mp4"即可,代表读取出根目录下的xxx.mp4文件。
# video_save_path表示视频保存的路径,当video_save_path=""时表示不保存
# 想要保存视频,则设置如video_save_path = "yyy.mp4"即可,代表保存为根目录下的yyy.mp4文件。
# video_fps用于保存的视频的fps
# video_path、video_save_path和video_fps仅在mode='video'时有效
# 保存视频时需要ctrl+c退出或者运行到最后一帧才会完成完整的保存步骤。
#----------------------------------------------------------------------------------------------------------#
video_path = 0
video_save_path = ""
video_fps = 25.0
#-------------------------------------------------------------------------#
# test_interval用于指定测量fps的时候,图片检测的次数
# 理论上test_interval越大,fps越准确。
#-------------------------------------------------------------------------#
test_interval = 100
#-------------------------------------------------------------------------#
# dir_origin_path指定了用于检测的图片的文件夹路径
# dir_save_path指定了检测完图片的保存路径
# dir_origin_path和dir_save_path仅在mode='dir_predict'时有效
#-------------------------------------------------------------------------#
dir_origin_path = "img/"
dir_save_path = "img_out/"
if mode == "predict":
'''
1、该代码无法直接进行批量预测,如果想要批量预测,可以利用os.listdir()遍历文件夹,利用Image.open打开图片文件进行预测。
具体流程可以参考get_dr_txt.py,在get_dr_txt.py即实现了遍历还实现了目标信息的保存。
2、如果想要进行检测完的图片的保存,利用r_image.save("img.jpg")即可保存,直接在predict.py里进行修改即可。
3、如果想要获得预测框的坐标,可以进入ssd.detect_image函数,在绘图部分读取top,left,bottom,right这四个值。
4、如果想要利用预测框截取下目标,可以进入ssd.detect_image函数,在绘图部分利用获取到的top,left,bottom,right这四个值
在原图上利用矩阵的方式进行截取。
5、如果想要在预测图上写额外的字,比如检测到的特定目标的数量,可以进入ssd.detect_image函数,在绘图部分对predicted_class进行判断,
比如判断if predicted_class == 'car': 即可判断当前目标是否为车,然后记录数量即可。利用draw.text即可写字。
'''
while True:
img = input('Input image filename:')
try:
image = Image.open(img)
except:
print('Open Error! Try again!')
continue
else:
r_image = ssd.detect_image(image, crop = crop)
r_image.show()
elif mode == "video":
capture=cv2.VideoCapture(video_path)
if video_save_path!="":
fourcc = cv2.VideoWriter_fourcc(*'XVID')
size = (int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)), int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)))
out = cv2.VideoWriter(video_save_path, fourcc, video_fps, size)
ref, frame = capture.read()
if not ref:
raise ValueError("未能正确读取摄像头(视频),请注意是否正确安装摄像头(是否正确填写视频路径)。")
fps = 0.0
while(True):
t1 = time.time()
# 读取某一帧
ref, frame = capture.read()
if not ref:
break
# 格式转变,BGRtoRGB
frame = cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
# 转变成Image
frame = Image.fromarray(np.uint8(frame))
# 进行检测
frame = np.array(ssd.detect_image(frame))
# RGBtoBGR满足opencv显示格式
frame = cv2.cvtColor(frame,cv2.COLOR_RGB2BGR)
fps = ( fps + (1./(time.time()-t1)) ) / 2
print("fps= %.2f"%(fps))
frame = cv2.putText(frame, "fps= %.2f"%(fps), (0, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
cv2.imshow("video",frame)
c= cv2.waitKey(1) & 0xff
if video_save_path!="":
out.write(frame)
if c==27:
capture.release()
break
print("Video Detection Done!")
capture.release()
if video_save_path!="":
print("Save processed video to the path :" + video_save_path)
out.release()
cv2.destroyAllWindows()
elif mode == "fps":
img = Image.open('img/street.jpg')
tact_time = ssd.get_FPS(img, test_interval)
print(str(tact_time) + ' seconds, ' + str(1/tact_time) + 'FPS, @batch_size 1')
elif mode == "dir_predict":
import os
from tqdm import tqdm
img_names = os.listdir(dir_origin_path)
for img_name in tqdm(img_names):
if img_name.lower().endswith(('.bmp', '.dib', '.png', '.jpg', '.jpeg', '.pbm', '.pgm', '.ppm', '.tif', '.tiff')):
image_path = os.path.join(dir_origin_path, img_name)
image = Image.open(image_path)
r_image = ssd.detect_image(image)
if not os.path.exists(dir_save_path):
os.makedirs(dir_save_path)
r_image.save(os.path.join(dir_save_path, img_name.replace(".jpg", ".png")), quality=95, subsampling=0)
else:
raise AssertionError("Please specify the correct mode: 'predict', 'video', 'fps' or 'dir_predict'.")
\ No newline at end of file
scipy==1.4.1
numpy==1.18.4
matplotlib==3.2.1
opencv_python==4.2.0.34
tqdm==4.46.1
Pillow==8.2.0
h5py==2.10.0
import colorsys
import os
import time
import numpy as np
import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.applications.imagenet_utils import preprocess_input
from PIL import ImageDraw, ImageFont
from nets.ssd import SSD300
from utils.utils_bbox import BBoxUtility
from utils.utils import get_classes, resize_image, cvtColor
from utils.anchors import get_anchors
'''
训练自己的数据集必看!
'''
class SSD(object):
_defaults = {
#--------------------------------------------------------------------------#
# 使用自己训练好的模型进行预测一定要修改model_path和classes_path!
# model_path指向logs文件夹下的权值文件,classes_path指向model_data下的txt
#
# 训练好后logs文件夹下存在多个权值文件,选择验证集损失较低的即可。
# 验证集损失较低不代表mAP较高,仅代表该权值在验证集上泛化性能较好。
# 如果出现shape不匹配,同时要注意训练时的model_path和classes_path参数的修改
#--------------------------------------------------------------------------#
"model_path" : 'model_data/ssd_weights.h5',
"classes_path" : 'model_data/voc_classes.txt',
#---------------------------------------------------------------------#
# 用于预测的图像大小,和train时使用同一个即可
#---------------------------------------------------------------------#
"input_shape" : [300, 300],
#---------------------------------------------------------------------#
# 只有得分大于置信度的预测框会被保留下来
#---------------------------------------------------------------------#
"confidence" : 0.5,
#---------------------------------------------------------------------#
# 非极大抑制所用到的nms_iou大小
#---------------------------------------------------------------------#
"nms_iou" : 0.45,
#---------------------------------------------------------------------#
# 用于指定先验框的大小
#---------------------------------------------------------------------#
'anchors_size' : [30, 60, 111, 162, 213, 264, 315],
#---------------------------------------------------------------------#
# 该变量用于控制是否使用letterbox_image对输入图像进行不失真的resize,
# 在多次测试后,发现关闭letterbox_image直接resize的效果更好
#---------------------------------------------------------------------#
"letterbox_image" : False,
}
@classmethod
def get_defaults(cls, n):
if n in cls._defaults:
return cls._defaults[n]
else:
return "Unrecognized attribute name '" + n + "'"
#---------------------------------------------------#
# 初始化ssd
#---------------------------------------------------#
def __init__(self, **kwargs):
self.__dict__.update(self._defaults)
for name, value in kwargs.items():
setattr(self, name, value)
#---------------------------------------------------#
# 计算总的类的数量
#---------------------------------------------------#
self.class_names, self.num_classes = get_classes(self.classes_path)
self.anchors = get_anchors(self.input_shape, self.anchors_size)
self.num_classes = self.num_classes + 1
#---------------------------------------------------#
# 画框设置不同的颜色
#---------------------------------------------------#
hsv_tuples = [(x / self.num_classes, 1., 1.) for x in range(self.num_classes)]
self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
self.colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors))
self.bbox_util = BBoxUtility(self.num_classes, nms_thresh=self.nms_iou)
self.generate()
#---------------------------------------------------#
# 载入模型
#---------------------------------------------------#
def generate(self):
model_path = os.path.expanduser(self.model_path)
assert model_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.'
#-------------------------------#
# 载入模型与权值
#-------------------------------#
self.ssd = SSD300([self.input_shape[0], self.input_shape[1], 3], self.num_classes)
self.ssd.load_weights(self.model_path, by_name=True)
print('{} model, anchors, and classes loaded.'.format(model_path))
@tf.function
def get_pred(self, photo):
preds = self.ssd(photo, training=False)
return preds
#---------------------------------------------------#
# 检测图片
#---------------------------------------------------#
def detect_image(self, image, crop = False):
image_shape = np.array(np.shape(image)[0:2])
#---------------------------------------------------------#
# 在这里将图像转换成RGB图像,防止灰度图在预测时报错。
# 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
#---------------------------------------------------------#
image = cvtColor(image)
#---------------------------------------------------------#
# 给图像增加灰条,实现不失真的resize
# 也可以直接resize进行识别
#---------------------------------------------------------#
image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image)
#---------------------------------------------------------#
# 添加上batch_size维度,图片预处理,归一化。
#---------------------------------------------------------#
image_data = preprocess_input(np.expand_dims(np.array(image_data, dtype='float32'), 0))
preds = self.get_pred(image_data).numpy()
#-----------------------------------------------------------#
# 将预测结果进行解码
#-----------------------------------------------------------#
results = self.bbox_util.decode_box(preds, self.anchors, image_shape,
self.input_shape, self.letterbox_image, confidence=self.confidence)
#--------------------------------------#
# 如果没有检测到物体,则返回原图
#--------------------------------------#
if len(results[0])<=0:
return image
top_label = np.array(results[0][:, 4], dtype = 'int32')
top_conf = results[0][:, 5]
top_boxes = results[0][:, :4]
#---------------------------------------------------------#
# 设置字体与边框厚度
#---------------------------------------------------------#
font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32'))
thickness = max((np.shape(image)[0] + np.shape(image)[1]) // self.input_shape[0], 1)
#---------------------------------------------------------#
# 是否进行目标的裁剪
#---------------------------------------------------------#
if crop:
for i, c in list(enumerate(top_boxes)):
top, left, bottom, right = top_boxes[i]
top = max(0, np.floor(top).astype('int32'))
left = max(0, np.floor(left).astype('int32'))
bottom = min(image.size[1], np.floor(bottom).astype('int32'))
right = min(image.size[0], np.floor(right).astype('int32'))
dir_save_path = "img_crop"
if not os.path.exists(dir_save_path):
os.makedirs(dir_save_path)
crop_image = image.crop([left, top, right, bottom])
crop_image.save(os.path.join(dir_save_path, "crop_" + str(i) + ".png"), quality=95, subsampling=0)
print("save crop_" + str(i) + ".png to " + dir_save_path)
#---------------------------------------------------------#
# 图像绘制
#---------------------------------------------------------#
for i, c in list(enumerate(top_label)):
predicted_class = self.class_names[int(c)]
box = top_boxes[i]
score = top_conf[i]
top, left, bottom, right = box
top = max(0, np.floor(top).astype('int32'))
left = max(0, np.floor(left).astype('int32'))
bottom = min(image.size[1], np.floor(bottom).astype('int32'))
right = min(image.size[0], np.floor(right).astype('int32'))
label = '{} {:.2f}'.format(predicted_class, score)
draw = ImageDraw.Draw(image)
label_size = draw.textsize(label, font)
label = label.encode('utf-8')
print(label, top, left, bottom, right)
if top - label_size[1] >= 0:
text_origin = np.array([left, top - label_size[1]])
else:
text_origin = np.array([left, top + 1])
for i in range(thickness):
draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[c])
draw.rectangle([tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[c])
draw.text(text_origin, str(label,'UTF-8'), fill=(0, 0, 0), font=font)
del draw
return image
def get_FPS(self, image, test_interval):
image_shape = np.array(np.shape(image)[0:2])
#---------------------------------------------------------#
# 在这里将图像转换成RGB图像,防止灰度图在预测时报错。
# 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
#---------------------------------------------------------#
image = cvtColor(image)
#---------------------------------------------------------#
# 给图像增加灰条,实现不失真的resize
# 也可以直接resize进行识别
#---------------------------------------------------------#
image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image)
#---------------------------------------------------------#
# 添加上batch_size维度,图片预处理,归一化。
#---------------------------------------------------------#
image_data = preprocess_input(np.expand_dims(np.array(image_data, dtype='float32'), 0))
preds = self.get_pred(image_data).numpy()
#-----------------------------------------------------------#
# 将预测结果进行解码
#-----------------------------------------------------------#
results = self.bbox_util.decode_box(preds, self.anchors, image_shape,
self.input_shape, self.letterbox_image, confidence=self.confidence)
t1 = time.time()
for _ in range(test_interval):
preds = self.get_pred(image_data).numpy()
#-----------------------------------------------------------#
# 将预测结果进行解码
#-----------------------------------------------------------#
results = self.bbox_util.decode_box(preds, self.anchors, image_shape,
self.input_shape, self.letterbox_image, confidence=self.confidence)
t2 = time.time()
tact_time = (t2 - t1) / test_interval
return tact_time
def get_map_txt(self, image_id, image, class_names, map_out_path):
f = open(os.path.join(map_out_path, "detection-results/"+image_id+".txt"),"w")
image_shape = np.array(np.shape(image)[0:2])
#---------------------------------------------------------#
# 在这里将图像转换成RGB图像,防止灰度图在预测时报错。
# 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
#---------------------------------------------------------#
image = cvtColor(image)
#---------------------------------------------------------#
# 给图像增加灰条,实现不失真的resize
# 也可以直接resize进行识别
#---------------------------------------------------------#
image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image)
#---------------------------------------------------------#
# 添加上batch_size维度,图片预处理,归一化。
#---------------------------------------------------------#
image_data = preprocess_input(np.expand_dims(np.array(image_data, dtype='float32'), 0))
preds = self.get_pred(image_data).numpy()
#-----------------------------------------------------------#
# 将预测结果进行解码
#-----------------------------------------------------------#
results = self.bbox_util.decode_box(preds, self.anchors, image_shape,
self.input_shape, self.letterbox_image, confidence=self.confidence)
#--------------------------------------#
# 如果没有检测到物体,则返回原图
#--------------------------------------#
if len(results[0])<=0:
return
top_label = results[0][:, 4]
top_conf = results[0][:, 5]
top_boxes = results[0][:, :4]
for i, c in list(enumerate(top_label)):
predicted_class = self.class_names[int(c)]
box = top_boxes[i]
score = str(top_conf[i])
top, left, bottom, right = box
if predicted_class not in class_names:
continue
f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str(int(top)), str(int(right)),str(int(bottom))))
f.close()
return
#--------------------------------------------#
# 该部分代码用于看网络结构
#--------------------------------------------#
from nets.ssd import SSD300
if __name__ == "__main__":
input_shape = [300, 300, 3]
num_classes = 21
model = SSD300(input_shape, num_classes)
model.summary()
# for i,layer in enumerate(model.layers):
# print(i,layer.name)
from functools import partial
import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard
from tensorflow.keras.optimizers import Adam
from nets.ssd import SSD300
from nets.ssd_training import MultiboxLoss
from utils.anchors import get_anchors
from utils.callbacks import (ExponentDecayScheduler, LossHistory,
ModelCheckpoint)
from utils.dataloader import SSDDatasets
from utils.utils import get_classes
from utils.utils_fit import fit_one_epoch
gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
'''
训练自己的目标检测模型一定需要注意以下几点:
1、训练前仔细检查自己的格式是否满足要求,该库要求数据集格式为VOC格式,需要准备好的内容有输入图片和标签
输入图片为.jpg图片,无需固定大小,传入训练前会自动进行resize。
灰度图会自动转成RGB图片进行训练,无需自己修改。
输入图片如果后缀非jpg,需要自己批量转成jpg后再开始训练。
标签为.xml格式,文件中会有需要检测的目标信息,标签文件和输入图片文件相对应。
2、训练好的权值文件保存在logs文件夹中,每个epoch都会保存一次,如果只是训练了几个step是不会保存的,epoch和step的概念要捋清楚一下。
在训练过程中,该代码并没有设定只保存最低损失的,因此按默认参数训练完会有100个权值,如果空间不够可以自行删除。
这个并不是保存越少越好也不是保存越多越好,有人想要都保存、有人想只保存一点,为了满足大多数的需求,还是都保存可选择性高。
3、损失值的大小用于判断是否收敛,比较重要的是有收敛的趋势,即验证集损失不断下降,如果验证集损失基本上不改变的话,模型基本上就收敛了。
损失值的具体大小并没有什么意义,大和小只在于损失的计算方式,并不是接近于0才好。如果想要让损失好看点,可以直接到对应的损失函数里面除上10000。
训练过程中的损失值会保存在logs文件夹下的loss_%Y_%m_%d_%H_%M_%S文件夹中
4、调参是一门蛮重要的学问,没有什么参数是一定好的,现有的参数是我测试过可以正常训练的参数,因此我会建议用现有的参数。
但是参数本身并不是绝对的,比如随着batch的增大学习率也可以增大,效果也会好一些;过深的网络不要用太大的学习率等等。
这些都是经验上,只能靠各位同学多查询资料和自己试试了。
'''
if __name__ == "__main__":
#----------------------------------------------------#
# 是否使用eager模式训练
#----------------------------------------------------#
eager = False
#--------------------------------------------------------#
# 训练前一定要修改classes_path,使其对应自己的数据集
#--------------------------------------------------------#
classes_path = 'model_data/voc_classes.txt'
#----------------------------------------------------------------------------------------------------------------------------#
# 权值文件的下载请看README,可以通过网盘下载。模型的 预训练权重 对不同数据集是通用的,因为特征是通用的。
# 模型的 预训练权重 比较重要的部分是 主干特征提取网络的权值部分,用于进行特征提取。
# 预训练权重对于99%的情况都必须要用,不用的话主干部分的权值太过随机,特征提取效果不明显,网络训练的结果也不会好
#
# 如果训练过程中存在中断训练的操作,可以将model_path设置成logs文件夹下的权值文件,将已经训练了一部分的权值再次载入。
# 同时修改下方的 冻结阶段 或者 解冻阶段 的参数,来保证模型epoch的连续性。
#
# 当model_path = ''的时候不加载整个模型的权值。
#
# 此处使用的是整个模型的权重,因此是在train.py进行加载的。
# 如果想要让模型从主干的预训练权值开始训练,则设置model_path为主干网络的权值,此时仅加载主干。
# 如果想要让模型从0开始训练,则设置model_path = '',Freeze_Train = Fasle,此时从0开始训练,且没有冻结主干的过程。
# 一般来讲,从0开始训练效果会很差,因为权值太过随机,特征提取效果不明显。
#
# 网络一般不从0开始训练,至少会使用主干部分的权值,有些论文提到可以不用预训练,主要原因是他们 数据集较大 且 调参能力优秀。
# 如果一定要训练网络的主干部分,可以了解imagenet数据集,首先训练分类模型,分类模型的 主干部分 和该模型通用,基于此进行训练。
#----------------------------------------------------------------------------------------------------------------------------#
model_path = ''
#------------------------------------------------------#
# 输入的shape大小
#------------------------------------------------------#
input_shape = [300, 300]
#----------------------------------------------------#
# 可用于设定先验框的大小,默认的anchors_size
# 是根据voc数据集设定的,大多数情况下都是通用的!
# 如果想要检测小物体,可以修改anchors_size
# 一般调小浅层先验框的大小就行了!因为浅层负责小物体检测!
# 比如anchors_size = [21, 45, 99, 153, 207, 261, 315]
#----------------------------------------------------#
anchors_size = [30, 60, 111, 162, 213, 264, 315]
#----------------------------------------------------#
# 训练分为两个阶段,分别是冻结阶段和解冻阶段。
# 显存不足与数据集大小无关,提示显存不足请调小batch_size。
# 受到BatchNorm层影响,batch_size最小为2,不能为1。
#----------------------------------------------------#
#----------------------------------------------------#
# 冻结阶段训练参数
# 此时模型的主干被冻结了,特征提取网络不发生改变
# 占用的显存较小,仅对网络进行微调
#----------------------------------------------------#
Init_Epoch = 0
Freeze_Epoch = 50
Freeze_batch_size = 8
Freeze_lr = 5e-4
#----------------------------------------------------#
# 解冻阶段训练参数
# 此时模型的主干不被冻结了,特征提取网络会发生改变
# 占用的显存较大,网络所有的参数都会发生改变
#----------------------------------------------------#
UnFreeze_Epoch = 100
Unfreeze_batch_size = 8
Unfreeze_lr = 1e-4
#------------------------------------------------------#
# 是否进行冻结训练,默认先冻结主干训练后解冻训练。
#------------------------------------------------------#
Freeze_Train = False
#------------------------------------------------------#
# 用于设置是否使用多线程读取数据,1代表关闭多线程
# 开启后会加快数据读取速度,但是会占用更多内存
# keras里开启多线程有些时候速度反而慢了许多
# 在IO为瓶颈的时候再开启多线程,即GPU运算速度远大于读取图片的速度。
# 在eager模式为False有效
#------------------------------------------------------#
num_workers = 1
#----------------------------------------------------#
# 获得图片路径和标签
#----------------------------------------------------#
train_annotation_path = '2012_train.txt'
val_annotation_path = '2012_val.txt'
#----------------------------------------------------#
# 获取classes和anchor
#----------------------------------------------------#
class_names, num_classes = get_classes(classes_path)
num_classes += 1
anchors = get_anchors(input_shape, anchors_size)
#----------------------------------------------------#
# 获取classes和anchor
#----------------------------------------------------#
class_names, num_classes = get_classes(classes_path)
num_classes += 1
anchors = get_anchors(input_shape, anchors_size)
model = SSD300((input_shape[0], input_shape[1], 3), num_classes)
if model_path != '':
#------------------------------------------------------#
# 载入预训练权重
#------------------------------------------------------#
print('Load weights {}.'.format(model_path))
model.load_weights(model_path, by_name=True, skip_mismatch=True)
#-------------------------------------------------------------------------------#
# 训练参数的设置
# logging表示tensorboard的保存地址
# checkpoint用于设置权值保存的细节,period用于修改多少epoch保存一次
# reduce_lr用于设置学习率下降的方式
# early_stopping用于设定早停,val_loss多次不下降自动结束训练,表示模型基本收敛
#-------------------------------------------------------------------------------#
logging = TensorBoard(log_dir = 'logs/')
checkpoint = ModelCheckpoint('logs/ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5',
monitor = 'val_loss', save_weights_only = True, save_best_only = False, period = 1)
reduce_lr = ExponentDecayScheduler(decay_rate = 0.94, verbose = 1)
early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1)
loss_history = LossHistory('logs/')
multiloss = MultiboxLoss(num_classes, neg_pos_ratio=3.0).compute_loss
#---------------------------#
# 读取数据集对应的txt
#---------------------------#
with open(train_annotation_path) as f:
train_lines = f.readlines()
with open(val_annotation_path) as f:
val_lines = f.readlines()
num_train = len(train_lines)
num_val = len(val_lines)
if Freeze_Train:
freeze_layers = 17
for i in range(freeze_layers): model.layers[i].trainable = False
print('Freeze the first {} layers of total {} layers.'.format(freeze_layers, len(model.layers)))
#------------------------------------------------------#
# 主干特征提取网络特征通用,冻结训练可以加快训练速度
# 也可以在训练初期防止权值被破坏。
# Init_Epoch为起始世代
# Freeze_Epoch为冻结训练的世代
# Unfreeze_Epoch总训练世代
# 提示OOM或者显存不足请调小Batch_size
#------------------------------------------------------#
if True:
batch_size = Freeze_batch_size
lr = Freeze_lr
start_epoch = Init_Epoch
end_epoch = Freeze_Epoch
epoch_step = num_train // batch_size
epoch_step_val = num_val // batch_size
if epoch_step == 0 or epoch_step_val == 0:
raise ValueError('数据集过小,无法进行训练,请扩充数据集。')
train_dataloader = SSDDatasets(train_lines, input_shape, anchors, batch_size, num_classes, train = True)
val_dataloader = SSDDatasets(val_lines, input_shape, anchors, batch_size, num_classes, train = False)
print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))
if eager:
gen = tf.data.Dataset.from_generator(partial(train_dataloader.generate), (tf.float32, tf.float32))
gen_val = tf.data.Dataset.from_generator(partial(val_dataloader.generate), (tf.float32, tf.float32))
gen = gen.shuffle(buffer_size = batch_size).prefetch(buffer_size = batch_size)
gen_val = gen_val.shuffle(buffer_size = batch_size).prefetch(buffer_size = batch_size)
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
initial_learning_rate = lr, decay_steps = epoch_step, decay_rate=0.94, staircase=True)
optimizer = tf.keras.optimizers.Adam(learning_rate = lr_schedule)
for epoch in range(start_epoch, end_epoch):
fit_one_epoch(model, multiloss, loss_history, optimizer, epoch, epoch_step, epoch_step_val, gen, gen_val,
end_epoch)
else:
model.compile(optimizer=Adam(lr = lr), loss = MultiboxLoss(num_classes, neg_pos_ratio=3.0).compute_loss)
model.fit_generator(
generator = train_dataloader,
steps_per_epoch = epoch_step,
validation_data = val_dataloader,
validation_steps = epoch_step_val,
epochs = end_epoch,
initial_epoch = start_epoch,
use_multiprocessing = True if num_workers > 1 else False,
workers = num_workers,
callbacks = [logging, checkpoint, reduce_lr, early_stopping, loss_history]
)
if Freeze_Train:
for i in range(freeze_layers): model.layers[i].trainable = True
if True:
batch_size = Unfreeze_batch_size
lr = Unfreeze_lr
start_epoch = Freeze_Epoch
end_epoch = UnFreeze_Epoch
epoch_step = num_train // batch_size
epoch_step_val = num_val // batch_size
if epoch_step == 0 or epoch_step_val == 0:
raise ValueError('数据集过小,无法进行训练,请扩充数据集。')
train_dataloader = SSDDatasets(train_lines, input_shape, anchors, batch_size, num_classes, train = True)
val_dataloader = SSDDatasets(val_lines, input_shape, anchors, batch_size, num_classes, train = False)
print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))
if eager:
gen = tf.data.Dataset.from_generator(partial(train_dataloader.generate), (tf.float32, tf.float32))
gen_val = tf.data.Dataset.from_generator(partial(val_dataloader.generate), (tf.float32, tf.float32))
gen = gen.shuffle(buffer_size = batch_size).prefetch(buffer_size = batch_size)
gen_val = gen_val.shuffle(buffer_size = batch_size).prefetch(buffer_size = batch_size)
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
initial_learning_rate = lr, decay_steps = epoch_step, decay_rate=0.94, staircase=True)
optimizer = tf.keras.optimizers.Adam(learning_rate = lr_schedule)
for epoch in range(start_epoch, end_epoch):
fit_one_epoch(model, multiloss, loss_history, optimizer, epoch, epoch_step, epoch_step_val, gen, gen_val,
end_epoch)
else:
model.compile(optimizer=Adam(lr = lr), loss = MultiboxLoss(num_classes, neg_pos_ratio=3.0).compute_loss)
model.fit_generator(
generator = train_dataloader,
steps_per_epoch = epoch_step,
validation_data = val_dataloader,
validation_steps = epoch_step_val,
epochs = end_epoch,
initial_epoch = start_epoch,
use_multiprocessing = True if num_workers > 1 else False,
workers = num_workers,
callbacks = [logging, checkpoint, reduce_lr, early_stopping, loss_history]
)
import numpy as np
class AnchorBox():
def __init__(self, input_shape, min_size, max_size=None, aspect_ratios=None, flip=True):
self.input_shape = input_shape
self.min_size = min_size
self.max_size = max_size
self.aspect_ratios = []
for ar in aspect_ratios:
self.aspect_ratios.append(ar)
self.aspect_ratios.append(1.0 / ar)
def call(self, layer_shape, mask=None):
# --------------------------------- #
# 获取输入进来的特征层的宽和高
# 比如38x38
# --------------------------------- #
layer_height = layer_shape[0]
layer_width = layer_shape[1]
# --------------------------------- #
# 获取输入进来的图片的宽和高
# 比如300x300
# --------------------------------- #
img_height = self.input_shape[0]
img_width = self.input_shape[1]
box_widths = []
box_heights = []
# --------------------------------- #
# self.aspect_ratios一般有两个值
# [1, 1, 2, 1/2]
# [1, 1, 2, 1/2, 3, 1/3]
# --------------------------------- #
for ar in self.aspect_ratios:
# 首先添加一个较小的正方形
if ar == 1 and len(box_widths) == 0:
box_widths.append(self.min_size)
box_heights.append(self.min_size)
# 然后添加一个较大的正方形
elif ar == 1 and len(box_widths) > 0:
box_widths.append(np.sqrt(self.min_size * self.max_size))
box_heights.append(np.sqrt(self.min_size * self.max_size))
# 然后添加长方形
elif ar != 1:
box_widths.append(self.min_size * np.sqrt(ar))
box_heights.append(self.min_size / np.sqrt(ar))
# --------------------------------- #
# 获得所有先验框的宽高1/2
# --------------------------------- #
box_widths = 0.5 * np.array(box_widths)
box_heights = 0.5 * np.array(box_heights)
# --------------------------------- #
# 每一个特征层对应的步长
# --------------------------------- #
step_x = img_width / layer_width
step_y = img_height / layer_height
# --------------------------------- #
# 生成网格中心
# --------------------------------- #
linx = np.linspace(0.5 * step_x, img_width - 0.5 * step_x,
layer_width)
liny = np.linspace(0.5 * step_y, img_height - 0.5 * step_y,
layer_height)
centers_x, centers_y = np.meshgrid(linx, liny)
centers_x = centers_x.reshape(-1, 1)
centers_y = centers_y.reshape(-1, 1)
# 每一个先验框需要两个(centers_x, centers_y),前一个用来计算左上角,后一个计算右下角
num_anchors_ = len(self.aspect_ratios)
anchor_boxes = np.concatenate((centers_x, centers_y), axis=1)
anchor_boxes = np.tile(anchor_boxes, (1, 2 * num_anchors_))
# 获得先验框的左上角和右下角
anchor_boxes[:, ::4] -= box_widths
anchor_boxes[:, 1::4] -= box_heights
anchor_boxes[:, 2::4] += box_widths
anchor_boxes[:, 3::4] += box_heights
# --------------------------------- #
# 将先验框变成小数的形式
# 归一化
# --------------------------------- #
anchor_boxes[:, ::2] /= img_width
anchor_boxes[:, 1::2] /= img_height
anchor_boxes = anchor_boxes.reshape(-1, 4)
anchor_boxes = np.minimum(np.maximum(anchor_boxes, 0.0), 1.0)
return anchor_boxes
#---------------------------------------------------#
# 用于计算共享特征层的大小
#---------------------------------------------------#
def get_img_output_length(height, width):
filter_sizes = [3, 3, 3, 3, 3, 3, 3, 3]
padding = [1, 1, 1, 1, 1, 1, 0, 0]
stride = [2, 2, 2, 2, 2, 2, 1, 1]
feature_heights = []
feature_widths = []
for i in range(len(filter_sizes)):
height = (height + 2*padding[i] - filter_sizes[i]) // stride[i] + 1
width = (width + 2*padding[i] - filter_sizes[i]) // stride[i] + 1
feature_heights.append(height)
feature_widths.append(width)
return np.array(feature_heights)[-6:], np.array(feature_widths)[-6:]
def get_anchors(input_shape = [300,300], anchors_size = [30, 60, 111, 162, 213, 264, 315]):
feature_heights, feature_widths = get_img_output_length(input_shape[0], input_shape[1])
aspect_ratios = [[1, 2], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]]
anchors = []
for i in range(len(feature_heights)):
anchors.append(AnchorBox(input_shape, anchors_size[i], max_size = anchors_size[i+1],
aspect_ratios = aspect_ratios[i]).call([feature_heights[i], feature_widths[i]]))
anchors = np.concatenate(anchors, axis=0)
return anchors
if __name__ == '__main__':
import matplotlib.pyplot as plt
class AnchorBox_for_Vision():
def __init__(self, input_shape, min_size, max_size=None, aspect_ratios=None, flip=True):
# 获得输入图片的大小,300x300
self.input_shape = input_shape
# 先验框的短边
self.min_size = min_size
# 先验框的长边
self.max_size = max_size
# [1, 2] => [1, 1, 2, 1/2]
# [1, 2, 3] => [1, 1, 2, 1/2, 3, 1/3]
self.aspect_ratios = []
for ar in aspect_ratios:
self.aspect_ratios.append(ar)
self.aspect_ratios.append(1.0 / ar)
def call(self, layer_shape, mask=None):
# --------------------------------- #
# 获取输入进来的特征层的宽和高
# 比如3x3
# --------------------------------- #
layer_height = layer_shape[0]
layer_width = layer_shape[1]
# --------------------------------- #
# 获取输入进来的图片的宽和高
# 比如300x300
# --------------------------------- #
img_height = self.input_shape[0]
img_width = self.input_shape[1]
box_widths = []
box_heights = []
# --------------------------------- #
# self.aspect_ratios一般有两个值
# [1, 1, 2, 1/2]
# [1, 1, 2, 1/2, 3, 1/3]
# --------------------------------- #
for ar in self.aspect_ratios:
# 首先添加一个较小的正方形
if ar == 1 and len(box_widths) == 0:
box_widths.append(self.min_size)
box_heights.append(self.min_size)
# 然后添加一个较大的正方形
elif ar == 1 and len(box_widths) > 0:
box_widths.append(np.sqrt(self.min_size * self.max_size))
box_heights.append(np.sqrt(self.min_size * self.max_size))
# 然后添加长方形
elif ar != 1:
box_widths.append(self.min_size * np.sqrt(ar))
box_heights.append(self.min_size / np.sqrt(ar))
print("box_widths:", box_widths)
print("box_heights:", box_heights)
# --------------------------------- #
# 获得所有先验框的宽高1/2
# --------------------------------- #
box_widths = 0.5 * np.array(box_widths)
box_heights = 0.5 * np.array(box_heights)
# --------------------------------- #
# 每一个特征层对应的步长
# 3x3的步长为100
# --------------------------------- #
step_x = img_width / layer_width
step_y = img_height / layer_height
# --------------------------------- #
# 生成网格中心
# --------------------------------- #
linx = np.linspace(0.5 * step_x, img_width - 0.5 * step_x, layer_width)
liny = np.linspace(0.5 * step_y, img_height - 0.5 * step_y, layer_height)
# 构建网格
centers_x, centers_y = np.meshgrid(linx, liny)
centers_x = centers_x.reshape(-1, 1)
centers_y = centers_y.reshape(-1, 1)
if layer_height == 3:
fig = plt.figure()
ax = fig.add_subplot(111)
plt.ylim(-50,350)
plt.xlim(-50,350)
plt.scatter(centers_x,centers_y)
# 每一个先验框需要两个(centers_x, centers_y),前一个用来计算左上角,后一个计算右下角
num_anchors_ = len(self.aspect_ratios)
anchor_boxes = np.concatenate((centers_x, centers_y), axis=1)
anchor_boxes = np.tile(anchor_boxes, (1, 2 * num_anchors_))
# 获得先验框的左上角和右下角
anchor_boxes[:, ::4] -= box_widths
anchor_boxes[:, 1::4] -= box_heights
anchor_boxes[:, 2::4] += box_widths
anchor_boxes[:, 3::4] += box_heights
print(np.shape(anchor_boxes))
if layer_height == 3:
rect1 = plt.Rectangle([anchor_boxes[4, 0],anchor_boxes[4, 1]],box_widths[0]*2,box_heights[0]*2,color="r",fill=False)
rect2 = plt.Rectangle([anchor_boxes[4, 4],anchor_boxes[4, 5]],box_widths[1]*2,box_heights[1]*2,color="r",fill=False)
rect3 = plt.Rectangle([anchor_boxes[4, 8],anchor_boxes[4, 9]],box_widths[2]*2,box_heights[2]*2,color="r",fill=False)
rect4 = plt.Rectangle([anchor_boxes[4, 12],anchor_boxes[4, 13]],box_widths[3]*2,box_heights[3]*2,color="r",fill=False)
ax.add_patch(rect1)
ax.add_patch(rect2)
ax.add_patch(rect3)
ax.add_patch(rect4)
plt.show()
# --------------------------------- #
# 将先验框变成小数的形式
# 归一化
# --------------------------------- #
anchor_boxes[:, ::2] /= img_width
anchor_boxes[:, 1::2] /= img_height
anchor_boxes = anchor_boxes.reshape(-1, 4)
anchor_boxes = np.minimum(np.maximum(anchor_boxes, 0.0), 1.0)
return anchor_boxes
# 输入图片大小为300, 300
input_shape = [300, 300]
# 指定先验框的大小,即宽高
anchors_size = [30, 60, 111, 162, 213, 264, 315]
# feature_heights [38, 19, 10, 5, 3, 1]
# feature_widths [38, 19, 10, 5, 3, 1]
feature_heights, feature_widths = get_img_output_length(input_shape[0], input_shape[1])
# 对先验框的数量进行一个指定 4,6
aspect_ratios = [[1, 2], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]]
anchors = []
for i in range(len(feature_heights)):
anchors.append(AnchorBox_for_Vision(input_shape, anchors_size[i], max_size = anchors_size[i+1],
aspect_ratios = aspect_ratios[i]).call([feature_heights[i], feature_widths[i]]))
anchors = np.concatenate(anchors, axis=0)
print(np.shape(anchors))
import os
import warnings
import matplotlib
matplotlib.use('Agg')
from matplotlib import pyplot as plt
import numpy as np
import scipy.signal
from tensorflow import keras
from tensorflow.keras import backend as K
class LossHistory(keras.callbacks.Callback):
def __init__(self, log_dir):
import datetime
curr_time = datetime.datetime.now()
time_str = datetime.datetime.strftime(curr_time,'%Y_%m_%d_%H_%M_%S')
self.log_dir = log_dir
self.time_str = time_str
self.save_path = os.path.join(self.log_dir, "loss_" + str(self.time_str))
self.losses = []
self.val_loss = []
try:
os.makedirs(self.save_path)
except OSError:
pass
def on_epoch_end(self, batch, logs={}):
self.losses.append(logs.get('loss'))
self.val_loss.append(logs.get('val_loss'))
with open(os.path.join(self.save_path, "epoch_loss_" + str(self.time_str) + ".txt"), 'a') as f:
f.write(str(logs.get('loss')))
f.write("\n")
with open(os.path.join(self.save_path, "epoch_val_loss_" + str(self.time_str) + ".txt"), 'a') as f:
f.write(str(logs.get('val_loss')))
f.write("\n")
self.loss_plot()
def loss_plot(self):
iters = range(len(self.losses))
plt.figure()
plt.plot(iters, self.losses, 'red', linewidth = 2, label='train loss')
plt.plot(iters, self.val_loss, 'coral', linewidth = 2, label='val loss')
try:
if len(self.losses) < 25:
num = 5
else:
num = 15
plt.plot(iters, scipy.signal.savgol_filter(self.losses, num, 3), 'green', linestyle = '--', linewidth = 2, label='smooth train loss')
plt.plot(iters, scipy.signal.savgol_filter(self.val_loss, num, 3), '#8B4513', linestyle = '--', linewidth = 2, label='smooth val loss')
except:
pass
plt.grid(True)
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('A Loss Curve')
plt.legend(loc="upper right")
plt.savefig(os.path.join(self.save_path, "epoch_loss_" + str(self.time_str) + ".png"))
plt.cla()
plt.close("all")
class ExponentDecayScheduler(keras.callbacks.Callback):
def __init__(self,
decay_rate,
verbose=0):
super(ExponentDecayScheduler, self).__init__()
self.decay_rate = decay_rate
self.verbose = verbose
self.learning_rates = []
def on_epoch_end(self, batch, logs=None):
learning_rate = K.get_value(self.model.optimizer.lr) * self.decay_rate
K.set_value(self.model.optimizer.lr, learning_rate)
if self.verbose > 0:
print('Setting learning rate to %s.' % (learning_rate))
class ModelCheckpoint(keras.callbacks.Callback):
def __init__(self, filepath, monitor='val_loss', verbose=0,
save_best_only=False, save_weights_only=False,
mode='auto', period=1):
super(ModelCheckpoint, self).__init__()
self.monitor = monitor
self.verbose = verbose
self.filepath = filepath
self.save_best_only = save_best_only
self.save_weights_only = save_weights_only
self.period = period
self.epochs_since_last_save = 0
if mode not in ['auto', 'min', 'max']:
warnings.warn('ModelCheckpoint mode %s is unknown, '
'fallback to auto mode.' % (mode),
RuntimeWarning)
mode = 'auto'
if mode == 'min':
self.monitor_op = np.less
self.best = np.Inf
elif mode == 'max':
self.monitor_op = np.greater
self.best = -np.Inf
else:
if 'acc' in self.monitor or self.monitor.startswith('fmeasure'):
self.monitor_op = np.greater
self.best = -np.Inf
else:
self.monitor_op = np.less
self.best = np.Inf
def on_epoch_end(self, epoch, logs=None):
logs = logs or {}
self.epochs_since_last_save += 1
if self.epochs_since_last_save >= self.period:
self.epochs_since_last_save = 0
filepath = self.filepath.format(epoch=epoch + 1, **logs)
if self.save_best_only:
current = logs.get(self.monitor)
if current is None:
warnings.warn('Can save best model only with %s available, '
'skipping.' % (self.monitor), RuntimeWarning)
else:
if self.monitor_op(current, self.best):
if self.verbose > 0:
print('\nEpoch %05d: %s improved from %0.5f to %0.5f,'
' saving model to %s'
% (epoch + 1, self.monitor, self.best,
current, filepath))
self.best = current
if self.save_weights_only:
self.model.save_weights(filepath, overwrite=True)
else:
self.model.save(filepath, overwrite=True)
else:
if self.verbose > 0:
print('\nEpoch %05d: %s did not improve' %
(epoch + 1, self.monitor))
else:
if self.verbose > 0:
print('\nEpoch %05d: saving model to %s' % (epoch + 1, filepath))
if self.save_weights_only:
self.model.save_weights(filepath, overwrite=True)
else:
self.model.save(filepath, overwrite=True)
import math
from random import shuffle
import cv2
import numpy as np
from PIL import Image
from tensorflow import keras
from tensorflow.keras.applications.imagenet_utils import preprocess_input
from utils.utils import cvtColor
class SSDDatasets(keras.utils.Sequence):
def __init__(self, annotation_lines, input_shape, anchors, batch_size, num_classes, train, overlap_threshold = 0.5):
self.annotation_lines = annotation_lines
self.length = len(self.annotation_lines)
self.input_shape = input_shape
self.anchors = anchors
self.num_anchors = len(anchors)
self.batch_size = batch_size
self.num_classes = num_classes
self.train = train
self.overlap_threshold = overlap_threshold
def __len__(self):
return math.ceil(len(self.annotation_lines) / float(self.batch_size))
def __getitem__(self, index):
image_data = []
box_data = []
for i in range(index * self.batch_size, (index + 1) * self.batch_size):
i = i % self.length
#---------------------------------------------------#
# 训练时进行数据的随机增强
# 验证时不进行数据的随机增强
#---------------------------------------------------#
image, box = self.get_random_data(self.annotation_lines[i], self.input_shape, random = self.train)
if len(box)!=0:
boxes = np.array(box[:,:4] , dtype=np.float32)
boxes[:, [0, 2]] = boxes[:,[0, 2]] / self.input_shape[1]
boxes[:, [1, 3]] = boxes[:,[1, 3]] / self.input_shape[0]
one_hot_label = np.eye(self.num_classes - 1)[np.array(box[:,4], np.int32)]
box = np.concatenate([boxes, one_hot_label], axis=-1)
box = self.assign_boxes(box)
image_data.append(image)
box_data.append(box)
print(preprocess_input(np.array(image_data)), np.array(box_data))
break
return preprocess_input(np.array(image_data)), np.array(box_data)
def generate(self):
i = 0
while True:
image_data = []
box_data = []
for b in range(self.batch_size):
if i==0:
np.random.shuffle(self.annotation_lines)
#---------------------------------------------------#
# 训练时进行数据的随机增强
# 验证时不进行数据的随机增强
#---------------------------------------------------#
image, box = self.get_random_data(self.annotation_lines[i], self.input_shape, random = self.train)
i = (i+1) % self.length
if len(box)!=0:
boxes = np.array(box[:,:4] , dtype=np.float32)
boxes[:, [0, 2]] = boxes[:,[0, 2]] / self.input_shape[1]
boxes[:, [1, 3]] = boxes[:,[1, 3]] / self.input_shape[0]
one_hot_label = np.eye(self.num_classes - 1)[np.array(box[:,4], np.int32)]
box = np.concatenate([boxes, one_hot_label], axis=-1)
box = self.assign_boxes(box)
image_data.append(image)
box_data.append(box)
yield preprocess_input(np.array(image_data)), np.array(box_data)
def rand(self, a=0, b=1):
return np.random.rand()*(b-a) + a
def get_random_data(self, annotation_line, input_shape, jitter=.3, hue=.1, sat=1.5, val=1.5, random=True):
line = annotation_line.split()
#------------------------------#
# 读取图像并转换成RGB图像
#------------------------------#
image = Image.open(line[0])
image = cvtColor(image)
#------------------------------#
# 获得图像的高宽与目标高宽
#------------------------------#
iw, ih = image.size
h, w = input_shape
#------------------------------#
# 获得预测框
#------------------------------#
box = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]])
if not random:
scale = min(w/iw, h/ih)
nw = int(iw*scale)
nh = int(ih*scale)
dx = (w-nw)//2
dy = (h-nh)//2
#---------------------------------#
# 将图像多余的部分加上灰条
#---------------------------------#
image = image.resize((nw,nh), Image.BICUBIC)
new_image = Image.new('RGB', (w,h), (128,128,128))
new_image.paste(image, (dx, dy))
image_data = np.array(new_image, np.float32)
#---------------------------------#
# 对真实框进行调整
#---------------------------------#
if len(box)>0:
np.random.shuffle(box)
box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
box[:, 0:2][box[:, 0:2]<0] = 0
box[:, 2][box[:, 2]>w] = w
box[:, 3][box[:, 3]>h] = h
box_w = box[:, 2] - box[:, 0]
box_h = box[:, 3] - box[:, 1]
box = box[np.logical_and(box_w>1, box_h>1)] # discard invalid box
return image_data, box
#------------------------------------------#
# 对图像进行缩放并且进行长和宽的扭曲
#------------------------------------------#
new_ar = w/h * self.rand(1-jitter,1+jitter) / self.rand(1-jitter,1+jitter)
scale = self.rand(.25, 2)
if new_ar < 1:
nh = int(scale*h)
nw = int(nh*new_ar)
else:
nw = int(scale*w)
nh = int(nw/new_ar)
image = image.resize((nw,nh), Image.BICUBIC)
#------------------------------------------#
# 将图像多余的部分加上灰条
#------------------------------------------#
dx = int(self.rand(0, w-nw))
dy = int(self.rand(0, h-nh))
new_image = Image.new('RGB', (w,h), (128,128,128))
new_image.paste(image, (dx, dy))
image = new_image
#------------------------------------------#
# 翻转图像
#------------------------------------------#
flip = self.rand()<.5
if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)
#------------------------------------------#
# 色域扭曲
#------------------------------------------#
hue = self.rand(-hue, hue)
sat = self.rand(1, sat) if self.rand()<.5 else 1/self.rand(1, sat)
val = self.rand(1, val) if self.rand()<.5 else 1/self.rand(1, val)
x = cv2.cvtColor(np.array(image,np.float32)/255, cv2.COLOR_RGB2HSV)
x[..., 0] += hue*360
x[..., 0][x[..., 0]>1] -= 1
x[..., 0][x[..., 0]<0] += 1
x[..., 1] *= sat
x[..., 2] *= val
x[x[:,:, 0]>360, 0] = 360
x[:, :, 1:][x[:, :, 1:]>1] = 1
x[x<0] = 0
image_data = cv2.cvtColor(x, cv2.COLOR_HSV2RGB)*255 # numpy array, 0 to 1
#---------------------------------#
# 对真实框进行调整
#---------------------------------#
if len(box)>0:
np.random.shuffle(box)
box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
if flip: box[:, [0,2]] = w - box[:, [2,0]]
box[:, 0:2][box[:, 0:2]<0] = 0
box[:, 2][box[:, 2]>w] = w
box[:, 3][box[:, 3]>h] = h
box_w = box[:, 2] - box[:, 0]
box_h = box[:, 3] - box[:, 1]
box = box[np.logical_and(box_w>1, box_h>1)]
return image_data, box
def on_epoch_begin(self):
shuffle(self.annotation_lines)
def iou(self, box):
#---------------------------------------------#
# 计算出每个真实框与所有的先验框的iou
# 判断真实框与先验框的重合情况
#---------------------------------------------#
inter_upleft = np.maximum(self.anchors[:, :2], box[:2])
inter_botright = np.minimum(self.anchors[:, 2:4], box[2:])
inter_wh = inter_botright - inter_upleft
inter_wh = np.maximum(inter_wh, 0)
inter = inter_wh[:, 0] * inter_wh[:, 1]
#---------------------------------------------#
# 真实框的面积
#---------------------------------------------#
area_true = (box[2] - box[0]) * (box[3] - box[1])
#---------------------------------------------#
# 先验框的面积
#---------------------------------------------#
area_gt = (self.anchors[:, 2] - self.anchors[:, 0])*(self.anchors[:, 3] - self.anchors[:, 1])
#---------------------------------------------#
# 计算iou
#---------------------------------------------#
union = area_true + area_gt - inter
iou = inter / union
return iou
def encode_box(self, box, return_iou=True, variances = [0.1, 0.1, 0.2, 0.2]):
#---------------------------------------------#
# 计算当前真实框和先验框的重合情况
# iou [self.num_anchors]
# encoded_box [self.num_anchors, 5]
#---------------------------------------------#
iou = self.iou(box)
encoded_box = np.zeros((self.num_anchors, 4 + return_iou))
#---------------------------------------------#
# 找到每一个真实框,重合程度较高的先验框
# 真实框可以由这个先验框来负责预测
#---------------------------------------------#
assign_mask = iou > self.overlap_threshold
#---------------------------------------------#
# 如果没有一个先验框重合度大于self.overlap_threshold
# 则选择重合度最大的为正样本
#---------------------------------------------#
if not assign_mask.any():
assign_mask[iou.argmax()] = True
#---------------------------------------------#
# 利用iou进行赋值
#---------------------------------------------#
if return_iou:
encoded_box[:, -1][assign_mask] = iou[assign_mask]
#---------------------------------------------#
# 找到对应的先验框
#---------------------------------------------#
assigned_anchors = self.anchors[assign_mask]
#---------------------------------------------#
# 逆向编码,将真实框转化为ssd预测结果的格式
# 先计算真实框的中心与长宽
#---------------------------------------------#
box_center = 0.5 * (box[:2] + box[2:])
box_wh = box[2:] - box[:2]
#---------------------------------------------#
# 再计算重合度较高的先验框的中心与长宽
#---------------------------------------------#
assigned_anchors_center = (assigned_anchors[:, 0:2] + assigned_anchors[:, 2:4]) * 0.5
assigned_anchors_wh = (assigned_anchors[:, 2:4] - assigned_anchors[:, 0:2])
#------------------------------------------------#
# 逆向求取ssd应该有的预测结果
# 先求取中心的预测结果,再求取宽高的预测结果
# 存在改变数量级的参数,默认为[0.1,0.1,0.2,0.2]
#------------------------------------------------#
encoded_box[:, :2][assign_mask] = box_center - assigned_anchors_center
encoded_box[:, :2][assign_mask] /= assigned_anchors_wh
encoded_box[:, :2][assign_mask] /= np.array(variances)[:2]
encoded_box[:, 2:4][assign_mask] = np.log(box_wh / assigned_anchors_wh)
encoded_box[:, 2:4][assign_mask] /= np.array(variances)[2:4]
return encoded_box.ravel()
def assign_boxes(self, boxes):
#---------------------------------------------------#
# assignment分为3个部分
# :4 的内容为网络应该有的回归预测结果
# 4:-1 的内容为先验框所对应的种类,默认为背景
# -1 的内容为当前先验框是否包含目标
#---------------------------------------------------#
assignment = np.zeros((self.num_anchors, 4 + self.num_classes + 1))
assignment[:, 4] = 1.0
if len(boxes) == 0:
return assignment
# 对每一个真实框都进行iou计算
encoded_boxes = np.apply_along_axis(self.encode_box, 1, boxes[:, :4])
#---------------------------------------------------#
# 在reshape后,获得的encoded_boxes的shape为:
# [num_true_box, num_anchors, 4 + 1]
# 4是编码后的结果,1为iou
#---------------------------------------------------#
encoded_boxes = encoded_boxes.reshape(-1, self.num_anchors, 5)
#---------------------------------------------------#
# [num_anchors]求取每一个先验框重合度最大的真实框
#---------------------------------------------------#
best_iou = encoded_boxes[:, :, -1].max(axis=0)
best_iou_idx = encoded_boxes[:, :, -1].argmax(axis=0)
best_iou_mask = best_iou > 0
best_iou_idx = best_iou_idx[best_iou_mask]
#---------------------------------------------------#
# 计算一共有多少先验框满足需求
#---------------------------------------------------#
assign_num = len(best_iou_idx)
# 将编码后的真实框取出
encoded_boxes = encoded_boxes[:, best_iou_mask, :]
#---------------------------------------------------#
# 编码后的真实框的赋值
#---------------------------------------------------#
assignment[:, :4][best_iou_mask] = encoded_boxes[best_iou_idx,np.arange(assign_num),:4]
#----------------------------------------------------------#
# 4代表为背景的概率,设定为0,因为这些先验框有对应的物体
#----------------------------------------------------------#
assignment[:, 4][best_iou_mask] = 0
assignment[:, 5:-1][best_iou_mask] = boxes[best_iou_idx, 4:]
#----------------------------------------------------------#
# -1表示先验框是否有对应的物体
#----------------------------------------------------------#
assignment[:, -1][best_iou_mask] = 1
# 通过assign_boxes我们就获得了,输入进来的这张图片,应该有的预测结果是什么样子的
return assignment
import numpy as np
from PIL import Image
#---------------------------------------------------------#
# 将图像转换成RGB图像,防止灰度图在预测时报错。
# 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
#---------------------------------------------------------#
def cvtColor(image):
if len(np.shape(image)) == 3 and np.shape(image)[2] == 3:
return image
else:
image = image.convert('RGB')
return image
#---------------------------------------------------#
# 对输入图像进行resize
#---------------------------------------------------#
def resize_image(image, size, letterbox_image):
iw, ih = image.size
w, h = size
if letterbox_image:
scale = min(w/iw, h/ih)
nw = int(iw*scale)
nh = int(ih*scale)
image = image.resize((nw,nh), Image.BICUBIC)
new_image = Image.new('RGB', size, (128,128,128))
new_image.paste(image, ((w-nw)//2, (h-nh)//2))
else:
new_image = image.resize((w, h), Image.BICUBIC)
return new_image
#---------------------------------------------------#
# 获得类
#---------------------------------------------------#
def get_classes(classes_path):
with open(classes_path, encoding='utf-8') as f:
class_names = f.readlines()
class_names = [c.strip() for c in class_names]
return class_names, len(class_names)
import numpy as np
import tensorflow as tf
import tensorflow.keras.backend as K
class BBoxUtility(object):
def __init__(self, num_classes, nms_thresh=0.45, top_k=300):
self.num_classes = num_classes
self._nms_thresh = nms_thresh
self._top_k = top_k
def ssd_correct_boxes(self, box_xy, box_wh, input_shape, image_shape, letterbox_image):
#-----------------------------------------------------------------#
# 把y轴放前面是因为方便预测框和图像的宽高进行相乘
#-----------------------------------------------------------------#
box_yx = box_xy[..., ::-1]
box_hw = box_wh[..., ::-1]
input_shape = np.array(input_shape)
image_shape = np.array(image_shape)
if letterbox_image:
#-----------------------------------------------------------------#
# 这里求出来的offset是图像有效区域相对于图像左上角的偏移情况
# new_shape指的是宽高缩放情况
#-----------------------------------------------------------------#
new_shape = np.round(image_shape * np.min(input_shape/image_shape))
offset = (input_shape - new_shape)/2./input_shape
scale = input_shape/new_shape
box_yx = (box_yx - offset) * scale
box_hw *= scale
box_mins = box_yx - (box_hw / 2.)
box_maxes = box_yx + (box_hw / 2.)
boxes = np.concatenate([box_mins[..., 0:1], box_mins[..., 1:2], box_maxes[..., 0:1], box_maxes[..., 1:2]], axis=-1)
boxes *= np.concatenate([image_shape, image_shape], axis=-1)
return boxes
def decode_boxes(self, mbox_loc, anchors, variances):
# 获得先验框的宽与高
anchor_width = anchors[:, 2] - anchors[:, 0]
anchor_height = anchors[:, 3] - anchors[:, 1]
# 获得先验框的中心点
anchor_center_x = 0.5 * (anchors[:, 2] + anchors[:, 0])
anchor_center_y = 0.5 * (anchors[:, 3] + anchors[:, 1])
# 真实框距离先验框中心的xy轴偏移情况
decode_bbox_center_x = mbox_loc[:, 0] * anchor_width * variances[0]
decode_bbox_center_x += anchor_center_x
decode_bbox_center_y = mbox_loc[:, 1] * anchor_height * variances[1]
decode_bbox_center_y += anchor_center_y
# 真实框的宽与高的求取
decode_bbox_width = np.exp(mbox_loc[:, 2] * variances[2])
decode_bbox_width *= anchor_width
decode_bbox_height = np.exp(mbox_loc[:, 3] * variances[3])
decode_bbox_height *= anchor_height
# 获取真实框的左上角与右下角
decode_bbox_xmin = decode_bbox_center_x - 0.5 * decode_bbox_width
decode_bbox_ymin = decode_bbox_center_y - 0.5 * decode_bbox_height
decode_bbox_xmax = decode_bbox_center_x + 0.5 * decode_bbox_width
decode_bbox_ymax = decode_bbox_center_y + 0.5 * decode_bbox_height
# 真实框的左上角与右下角进行堆叠
decode_bbox = np.concatenate((decode_bbox_xmin[:, None],
decode_bbox_ymin[:, None],
decode_bbox_xmax[:, None],
decode_bbox_ymax[:, None]), axis=-1)
# 防止超出0与1
decode_bbox = np.minimum(np.maximum(decode_bbox, 0.0), 1.0)
return decode_bbox
def decode_box(self, predictions, anchors, image_shape, input_shape, letterbox_image, variances = [0.1, 0.1, 0.2, 0.2], confidence=0.5):
#---------------------------------------------------#
# :4是回归预测结果
#---------------------------------------------------#
mbox_loc = predictions[:, :, :4]
#---------------------------------------------------#
# 获得种类的置信度
#---------------------------------------------------#
mbox_conf = predictions[:, :, 4:]
results = []
#----------------------------------------------------------------------------------------------------------------#
# 对每一张图片进行处理,由于在predict.py的时候,我们只输入一张图片,所以for i in range(len(mbox_loc))只进行一次
#----------------------------------------------------------------------------------------------------------------#
for i in range(len(mbox_loc)):
results.append([])
#--------------------------------#
# 利用回归结果对先验框进行解码
#--------------------------------#
decode_bbox = self.decode_boxes(mbox_loc[i], anchors, variances)
for c in range(1, self.num_classes):
#--------------------------------#
# 取出属于该类的所有框的置信度
# 判断是否大于门限
#--------------------------------#
c_confs = mbox_conf[i, :, c]
c_confs_m = c_confs > confidence
if len(c_confs[c_confs_m]) > 0:
#-----------------------------------------#
# 取出得分高于confidence的框
#-----------------------------------------#
boxes_to_process = decode_bbox[c_confs_m]
confs_to_process = c_confs[c_confs_m]
#-----------------------------------------#
# 进行iou的非极大抑制
#-----------------------------------------#
idx = tf.image.non_max_suppression(tf.cast(boxes_to_process,tf.float32), tf.cast(confs_to_process,tf.float32),
self._top_k,
iou_threshold=self._nms_thresh).numpy()
#-----------------------------------------#
# 取出在非极大抑制中效果较好的内容
#-----------------------------------------#
good_boxes = boxes_to_process[idx]
confs = confs_to_process[idx][:, None]
labels = (c - 1) * np.ones((len(idx), 1))
#-----------------------------------------#
# 将label、置信度、框的位置进行堆叠。
#-----------------------------------------#
c_pred = np.concatenate((good_boxes, labels, confs), axis=1)
# 添加进result里
results[-1].extend(c_pred)
if len(results[-1]) > 0:
results[-1] = np.array(results[-1])
box_xy, box_wh = (results[-1][:, 0:2] + results[-1][:, 2:4])/2, results[-1][:, 2:4] - results[-1][:, 0:2]
results[-1][:, :4] = self.ssd_correct_boxes(box_xy, box_wh, input_shape, image_shape, letterbox_image)
return results
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment