Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
dcuai
dlexamples
Commits
c320b6ef
Commit
c320b6ef
authored
Apr 15, 2022
by
zhenyi
Browse files
tf2 detection
parent
0fc002df
Changes
195
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1895 additions
and
0 deletions
+1895
-0
TensorFlow2x/ComputeVision/Detection/SSD/nets/vgg.py
TensorFlow2x/ComputeVision/Detection/SSD/nets/vgg.py
+130
-0
TensorFlow2x/ComputeVision/Detection/SSD/nets/vgg_copy_single.py
...low2x/ComputeVision/Detection/SSD/nets/vgg_copy_single.py
+130
-0
TensorFlow2x/ComputeVision/Detection/SSD/predict.py
TensorFlow2x/ComputeVision/Detection/SSD/predict.py
+148
-0
TensorFlow2x/ComputeVision/Detection/SSD/requirements.txt
TensorFlow2x/ComputeVision/Detection/SSD/requirements.txt
+7
-0
TensorFlow2x/ComputeVision/Detection/SSD/ssd.py
TensorFlow2x/ComputeVision/Detection/SSD/ssd.py
+277
-0
TensorFlow2x/ComputeVision/Detection/SSD/summary.py
TensorFlow2x/ComputeVision/Detection/SSD/summary.py
+14
-0
TensorFlow2x/ComputeVision/Detection/SSD/train.py
TensorFlow2x/ComputeVision/Detection/SSD/train.py
+272
-0
TensorFlow2x/ComputeVision/Detection/SSD/utils/__init__.py
TensorFlow2x/ComputeVision/Detection/SSD/utils/__init__.py
+1
-0
TensorFlow2x/ComputeVision/Detection/SSD/utils/__pycache__/__init__.cpython-36.pyc
...n/Detection/SSD/utils/__pycache__/__init__.cpython-36.pyc
+0
-0
TensorFlow2x/ComputeVision/Detection/SSD/utils/__pycache__/anchors.cpython-36.pyc
...on/Detection/SSD/utils/__pycache__/anchors.cpython-36.pyc
+0
-0
TensorFlow2x/ComputeVision/Detection/SSD/utils/__pycache__/callbacks.cpython-36.pyc
.../Detection/SSD/utils/__pycache__/callbacks.cpython-36.pyc
+0
-0
TensorFlow2x/ComputeVision/Detection/SSD/utils/__pycache__/dataloader.cpython-36.pyc
...Detection/SSD/utils/__pycache__/dataloader.cpython-36.pyc
+0
-0
TensorFlow2x/ComputeVision/Detection/SSD/utils/__pycache__/utils.cpython-36.pyc
...sion/Detection/SSD/utils/__pycache__/utils.cpython-36.pyc
+0
-0
TensorFlow2x/ComputeVision/Detection/SSD/utils/__pycache__/utils_bbox.cpython-36.pyc
...Detection/SSD/utils/__pycache__/utils_bbox.cpython-36.pyc
+0
-0
TensorFlow2x/ComputeVision/Detection/SSD/utils/__pycache__/utils_fit.cpython-36.pyc
.../Detection/SSD/utils/__pycache__/utils_fit.cpython-36.pyc
+0
-0
TensorFlow2x/ComputeVision/Detection/SSD/utils/anchors.py
TensorFlow2x/ComputeVision/Detection/SSD/utils/anchors.py
+262
-0
TensorFlow2x/ComputeVision/Detection/SSD/utils/callbacks.py
TensorFlow2x/ComputeVision/Detection/SSD/utils/callbacks.py
+149
-0
TensorFlow2x/ComputeVision/Detection/SSD/utils/dataloader.py
TensorFlow2x/ComputeVision/Detection/SSD/utils/dataloader.py
+331
-0
TensorFlow2x/ComputeVision/Detection/SSD/utils/utils.py
TensorFlow2x/ComputeVision/Detection/SSD/utils/utils.py
+42
-0
TensorFlow2x/ComputeVision/Detection/SSD/utils/utils_bbox.py
TensorFlow2x/ComputeVision/Detection/SSD/utils/utils_bbox.py
+132
-0
No files found.
TensorFlow2x/ComputeVision/Detection/SSD/nets/vgg.py
0 → 100644
View file @
c320b6ef
from
tensorflow.keras.layers
import
Conv2D
,
MaxPooling2D
,
ZeroPadding2D
def
VGG16
(
input_tensor
):
#----------------------------主干特征提取网络开始---------------------------#
# SSD结构,net字典
net
=
{}
# Block 1
net
[
'input'
]
=
input_tensor
# 300,300,3 -> 150,150,64
net
[
'conv1_1'
]
=
Conv2D
(
64
,
kernel_size
=
(
3
,
3
),
activation
=
'relu'
,
padding
=
'same'
,
name
=
'conv1_1'
)(
net
[
'input'
])
net
[
'conv1_2'
]
=
Conv2D
(
64
,
kernel_size
=
(
3
,
3
),
activation
=
'relu'
,
padding
=
'same'
,
name
=
'conv1_2'
)(
net
[
'conv1_1'
])
net
[
'pool1'
]
=
MaxPooling2D
((
2
,
2
),
strides
=
(
2
,
2
),
padding
=
'same'
,
name
=
'pool1'
)(
net
[
'conv1_2'
])
# Block 2
# 150,150,64 -> 75,75,128
net
[
'conv2_1'
]
=
Conv2D
(
128
,
kernel_size
=
(
3
,
3
),
activation
=
'relu'
,
padding
=
'same'
,
name
=
'conv2_1'
)(
net
[
'pool1'
])
net
[
'conv2_2'
]
=
Conv2D
(
128
,
kernel_size
=
(
3
,
3
),
activation
=
'relu'
,
padding
=
'same'
,
name
=
'conv2_2'
)(
net
[
'conv2_1'
])
net
[
'pool2'
]
=
MaxPooling2D
((
2
,
2
),
strides
=
(
2
,
2
),
padding
=
'same'
,
name
=
'pool2'
)(
net
[
'conv2_2'
])
# Block 3
# 75,75,128 -> 38,38,256
net
[
'conv3_1'
]
=
Conv2D
(
256
,
kernel_size
=
(
3
,
3
),
activation
=
'relu'
,
padding
=
'same'
,
name
=
'conv3_1'
)(
net
[
'pool2'
])
net
[
'conv3_2'
]
=
Conv2D
(
256
,
kernel_size
=
(
3
,
3
),
activation
=
'relu'
,
padding
=
'same'
,
name
=
'conv3_2'
)(
net
[
'conv3_1'
])
net
[
'conv3_3'
]
=
Conv2D
(
256
,
kernel_size
=
(
3
,
3
),
activation
=
'relu'
,
padding
=
'same'
,
name
=
'conv3_3'
)(
net
[
'conv3_2'
])
net
[
'pool3'
]
=
MaxPooling2D
((
2
,
2
),
strides
=
(
2
,
2
),
padding
=
'same'
,
name
=
'pool3'
)(
net
[
'conv3_3'
])
# Block 4
# 38,38,256 -> 19,19,512
net
[
'conv4_1'
]
=
Conv2D
(
512
,
kernel_size
=
(
3
,
3
),
activation
=
'relu'
,
padding
=
'same'
,
name
=
'conv4_1'
)(
net
[
'pool3'
])
net
[
'conv4_2'
]
=
Conv2D
(
512
,
kernel_size
=
(
3
,
3
),
activation
=
'relu'
,
padding
=
'same'
,
name
=
'conv4_2'
)(
net
[
'conv4_1'
])
net
[
'conv4_3'
]
=
Conv2D
(
512
,
kernel_size
=
(
3
,
3
),
activation
=
'relu'
,
padding
=
'same'
,
name
=
'conv4_3'
)(
net
[
'conv4_2'
])
net
[
'pool4'
]
=
MaxPooling2D
((
2
,
2
),
strides
=
(
2
,
2
),
padding
=
'same'
,
name
=
'pool4'
)(
net
[
'conv4_3'
])
# Block 5
# 19,19,512 -> 19,19,512
net
[
'conv5_1'
]
=
Conv2D
(
512
,
kernel_size
=
(
3
,
3
),
activation
=
'relu'
,
padding
=
'same'
,
name
=
'conv5_1'
)(
net
[
'pool4'
])
net
[
'conv5_2'
]
=
Conv2D
(
512
,
kernel_size
=
(
3
,
3
),
activation
=
'relu'
,
padding
=
'same'
,
name
=
'conv5_2'
)(
net
[
'conv5_1'
])
net
[
'conv5_3'
]
=
Conv2D
(
512
,
kernel_size
=
(
3
,
3
),
activation
=
'relu'
,
padding
=
'same'
,
name
=
'conv5_3'
)(
net
[
'conv5_2'
])
net
[
'pool5'
]
=
MaxPooling2D
((
3
,
3
),
strides
=
(
1
,
1
),
padding
=
'same'
,
name
=
'pool5'
)(
net
[
'conv5_3'
])
# FC6
# 19,19,512 -> 19,19,1024
net
[
'fc6'
]
=
Conv2D
(
1024
,
kernel_size
=
(
3
,
3
),
dilation_rate
=
(
6
,
6
),
activation
=
'relu'
,
padding
=
'same'
,
name
=
'fc6'
)(
net
[
'pool5'
])
# x = Dropout(0.5, name='drop6')(x)
# FC7
# 19,19,1024 -> 19,19,1024
net
[
'fc7'
]
=
Conv2D
(
1024
,
kernel_size
=
(
1
,
1
),
activation
=
'relu'
,
padding
=
'same'
,
name
=
'fc7'
)(
net
[
'fc6'
])
# x = Dropout(0.5, name='drop7')(x)
# Block 6
# 19,19,512 -> 10,10,512
net
[
'conv6_1'
]
=
Conv2D
(
256
,
kernel_size
=
(
1
,
1
),
activation
=
'relu'
,
padding
=
'same'
,
name
=
'conv6_1'
)(
net
[
'fc7'
])
net
[
'conv6_2'
]
=
ZeroPadding2D
(
padding
=
((
1
,
1
),
(
1
,
1
)),
name
=
'conv6_padding'
)(
net
[
'conv6_1'
])
net
[
'conv6_2'
]
=
Conv2D
(
512
,
kernel_size
=
(
3
,
3
),
strides
=
(
2
,
2
),
activation
=
'relu'
,
name
=
'conv6_2'
)(
net
[
'conv6_2'
])
# Block 7
# 10,10,512 -> 5,5,256
net
[
'conv7_1'
]
=
Conv2D
(
128
,
kernel_size
=
(
1
,
1
),
activation
=
'relu'
,
padding
=
'same'
,
name
=
'conv7_1'
)(
net
[
'conv6_2'
])
net
[
'conv7_2'
]
=
ZeroPadding2D
(
padding
=
((
1
,
1
),
(
1
,
1
)),
name
=
'conv7_padding'
)(
net
[
'conv7_1'
])
net
[
'conv7_2'
]
=
Conv2D
(
256
,
kernel_size
=
(
3
,
3
),
strides
=
(
2
,
2
),
activation
=
'relu'
,
padding
=
'valid'
,
name
=
'conv7_2'
)(
net
[
'conv7_2'
])
# Block 8
# 5,5,256 -> 3,3,256
net
[
'conv8_1'
]
=
Conv2D
(
128
,
kernel_size
=
(
1
,
1
),
activation
=
'relu'
,
padding
=
'same'
,
name
=
'conv8_1'
)(
net
[
'conv7_2'
])
net
[
'conv8_2'
]
=
Conv2D
(
256
,
kernel_size
=
(
3
,
3
),
strides
=
(
1
,
1
),
activation
=
'relu'
,
padding
=
'valid'
,
name
=
'conv8_2'
)(
net
[
'conv8_1'
])
# Block 9
# 3,3,256 -> 1,1,256
net
[
'conv9_1'
]
=
Conv2D
(
128
,
kernel_size
=
(
1
,
1
),
activation
=
'relu'
,
padding
=
'same'
,
name
=
'conv9_1'
)(
net
[
'conv8_2'
])
net
[
'conv9_2'
]
=
Conv2D
(
256
,
kernel_size
=
(
3
,
3
),
strides
=
(
1
,
1
),
activation
=
'relu'
,
padding
=
'valid'
,
name
=
'conv9_2'
)(
net
[
'conv9_1'
])
#----------------------------主干特征提取网络结束---------------------------#
return
net
TensorFlow2x/ComputeVision/Detection/SSD/nets/vgg_copy_single.py
0 → 100644
View file @
c320b6ef
from
tensorflow.keras.layers
import
Conv2D
,
MaxPooling2D
,
ZeroPadding2D
def
VGG16
(
input_tensor
):
#----------------------------主干特征提取网络开始---------------------------#
# SSD结构,net字典
net
=
{}
# Block 1
net
[
'input'
]
=
input_tensor
# 300,300,3 -> 150,150,64
net
[
'conv1_1'
]
=
Conv2D
(
64
,
kernel_size
=
(
3
,
3
),
activation
=
'relu'
,
padding
=
'same'
,
name
=
'conv1_1'
)(
net
[
'input'
])
net
[
'conv1_2'
]
=
Conv2D
(
64
,
kernel_size
=
(
3
,
3
),
activation
=
'relu'
,
padding
=
'same'
,
name
=
'conv1_2'
)(
net
[
'conv1_1'
])
net
[
'pool1'
]
=
MaxPooling2D
((
2
,
2
),
strides
=
(
2
,
2
),
padding
=
'same'
,
name
=
'pool1'
)(
net
[
'conv1_2'
])
# Block 2
# 150,150,64 -> 75,75,128
net
[
'conv2_1'
]
=
Conv2D
(
128
,
kernel_size
=
(
3
,
3
),
activation
=
'relu'
,
padding
=
'same'
,
name
=
'conv2_1'
)(
net
[
'pool1'
])
net
[
'conv2_2'
]
=
Conv2D
(
128
,
kernel_size
=
(
3
,
3
),
activation
=
'relu'
,
padding
=
'same'
,
name
=
'conv2_2'
)(
net
[
'conv2_1'
])
net
[
'pool2'
]
=
MaxPooling2D
((
2
,
2
),
strides
=
(
2
,
2
),
padding
=
'same'
,
name
=
'pool2'
)(
net
[
'conv2_2'
])
# Block 3
# 75,75,128 -> 38,38,256
net
[
'conv3_1'
]
=
Conv2D
(
256
,
kernel_size
=
(
3
,
3
),
activation
=
'relu'
,
padding
=
'same'
,
name
=
'conv3_1'
)(
net
[
'pool2'
])
net
[
'conv3_2'
]
=
Conv2D
(
256
,
kernel_size
=
(
3
,
3
),
activation
=
'relu'
,
padding
=
'same'
,
name
=
'conv3_2'
)(
net
[
'conv3_1'
])
net
[
'conv3_3'
]
=
Conv2D
(
256
,
kernel_size
=
(
3
,
3
),
activation
=
'relu'
,
padding
=
'same'
,
name
=
'conv3_3'
)(
net
[
'conv3_2'
])
net
[
'pool3'
]
=
MaxPooling2D
((
2
,
2
),
strides
=
(
2
,
2
),
padding
=
'same'
,
name
=
'pool3'
)(
net
[
'conv3_3'
])
# Block 4
# 38,38,256 -> 19,19,512
net
[
'conv4_1'
]
=
Conv2D
(
512
,
kernel_size
=
(
3
,
3
),
activation
=
'relu'
,
padding
=
'same'
,
name
=
'conv4_1'
)(
net
[
'pool3'
])
net
[
'conv4_2'
]
=
Conv2D
(
512
,
kernel_size
=
(
3
,
3
),
activation
=
'relu'
,
padding
=
'same'
,
name
=
'conv4_2'
)(
net
[
'conv4_1'
])
net
[
'conv4_3'
]
=
Conv2D
(
512
,
kernel_size
=
(
3
,
3
),
activation
=
'relu'
,
padding
=
'same'
,
name
=
'conv4_3'
)(
net
[
'conv4_2'
])
net
[
'pool4'
]
=
MaxPooling2D
((
2
,
2
),
strides
=
(
2
,
2
),
padding
=
'same'
,
name
=
'pool4'
)(
net
[
'conv4_3'
])
# Block 5
# 19,19,512 -> 19,19,512
net
[
'conv5_1'
]
=
Conv2D
(
512
,
kernel_size
=
(
3
,
3
),
activation
=
'relu'
,
padding
=
'same'
,
name
=
'conv5_1'
)(
net
[
'pool4'
])
net
[
'conv5_2'
]
=
Conv2D
(
512
,
kernel_size
=
(
3
,
3
),
activation
=
'relu'
,
padding
=
'same'
,
name
=
'conv5_2'
)(
net
[
'conv5_1'
])
net
[
'conv5_3'
]
=
Conv2D
(
512
,
kernel_size
=
(
3
,
3
),
activation
=
'relu'
,
padding
=
'same'
,
name
=
'conv5_3'
)(
net
[
'conv5_2'
])
net
[
'pool5'
]
=
MaxPooling2D
((
3
,
3
),
strides
=
(
1
,
1
),
padding
=
'same'
,
name
=
'pool5'
)(
net
[
'conv5_3'
])
# FC6
# 19,19,512 -> 19,19,1024
net
[
'fc6'
]
=
Conv2D
(
1024
,
kernel_size
=
(
3
,
3
),
dilation_rate
=
(
6
,
6
),
activation
=
'relu'
,
padding
=
'same'
,
name
=
'fc6'
)(
net
[
'pool5'
])
# x = Dropout(0.5, name='drop6')(x)
# FC7
# 19,19,1024 -> 19,19,1024
net
[
'fc7'
]
=
Conv2D
(
1024
,
kernel_size
=
(
1
,
1
),
activation
=
'relu'
,
padding
=
'same'
,
name
=
'fc7'
)(
net
[
'fc6'
])
# x = Dropout(0.5, name='drop7')(x)
# Block 6
# 19,19,512 -> 10,10,512
net
[
'conv6_1'
]
=
Conv2D
(
256
,
kernel_size
=
(
1
,
1
),
activation
=
'relu'
,
padding
=
'same'
,
name
=
'conv6_1'
)(
net
[
'fc7'
])
net
[
'conv6_2'
]
=
ZeroPadding2D
(
padding
=
((
1
,
1
),
(
1
,
1
)),
name
=
'conv6_padding'
)(
net
[
'conv6_1'
])
net
[
'conv6_2'
]
=
Conv2D
(
512
,
kernel_size
=
(
3
,
3
),
strides
=
(
2
,
2
),
activation
=
'relu'
,
name
=
'conv6_2'
)(
net
[
'conv6_2'
])
# Block 7
# 10,10,512 -> 5,5,256
net
[
'conv7_1'
]
=
Conv2D
(
128
,
kernel_size
=
(
1
,
1
),
activation
=
'relu'
,
padding
=
'same'
,
name
=
'conv7_1'
)(
net
[
'conv6_2'
])
net
[
'conv7_2'
]
=
ZeroPadding2D
(
padding
=
((
1
,
1
),
(
1
,
1
)),
name
=
'conv7_padding'
)(
net
[
'conv7_1'
])
net
[
'conv7_2'
]
=
Conv2D
(
256
,
kernel_size
=
(
3
,
3
),
strides
=
(
2
,
2
),
activation
=
'relu'
,
padding
=
'valid'
,
name
=
'conv7_2'
)(
net
[
'conv7_2'
])
# Block 8
# 5,5,256 -> 3,3,256
net
[
'conv8_1'
]
=
Conv2D
(
128
,
kernel_size
=
(
1
,
1
),
activation
=
'relu'
,
padding
=
'same'
,
name
=
'conv8_1'
)(
net
[
'conv7_2'
])
net
[
'conv8_2'
]
=
Conv2D
(
256
,
kernel_size
=
(
3
,
3
),
strides
=
(
1
,
1
),
activation
=
'relu'
,
padding
=
'valid'
,
name
=
'conv8_2'
)(
net
[
'conv8_1'
])
# Block 9
# 3,3,256 -> 1,1,256
net
[
'conv9_1'
]
=
Conv2D
(
128
,
kernel_size
=
(
1
,
1
),
activation
=
'relu'
,
padding
=
'same'
,
name
=
'conv9_1'
)(
net
[
'conv8_2'
])
net
[
'conv9_2'
]
=
Conv2D
(
256
,
kernel_size
=
(
3
,
3
),
strides
=
(
1
,
1
),
activation
=
'relu'
,
padding
=
'valid'
,
name
=
'conv9_2'
)(
net
[
'conv9_1'
])
#----------------------------主干特征提取网络结束---------------------------#
return
net
TensorFlow2x/ComputeVision/Detection/SSD/predict.py
0 → 100644
View file @
c320b6ef
#----------------------------------------------------#
# 对视频中的predict.py进行了修改,
# 将单张图片预测、摄像头检测和FPS测试功能
# 整合到了一个py文件中,通过指定mode进行模式的修改。
#----------------------------------------------------#
import
time
import
cv2
import
numpy
as
np
import
tensorflow
as
tf
from
PIL
import
Image
from
ssd
import
SSD
gpus
=
tf
.
config
.
experimental
.
list_physical_devices
(
device_type
=
'GPU'
)
for
gpu
in
gpus
:
tf
.
config
.
experimental
.
set_memory_growth
(
gpu
,
True
)
if
__name__
==
"__main__"
:
ssd
=
SSD
()
#----------------------------------------------------------------------------------------------------------#
# mode用于指定测试的模式:
# 'predict'表示单张图片预测,如果想对预测过程进行修改,如保存图片,截取对象等,可以先看下方详细的注释
# 'video'表示视频检测,可调用摄像头或者视频进行检测,详情查看下方注释。
# 'fps'表示测试fps,使用的图片是img里面的street.jpg,详情查看下方注释。
# 'dir_predict'表示遍历文件夹进行检测并保存。默认遍历img文件夹,保存img_out文件夹,详情查看下方注释。
#----------------------------------------------------------------------------------------------------------#
mode
=
"fps"
#-------------------------------------------------------------------------#
# crop指定了是否在单张图片预测后对目标进行截取
# crop仅在mode='predict'时有效
#-------------------------------------------------------------------------#
crop
=
False
#----------------------------------------------------------------------------------------------------------#
# video_path用于指定视频的路径,当video_path=0时表示检测摄像头
# 想要检测视频,则设置如video_path = "xxx.mp4"即可,代表读取出根目录下的xxx.mp4文件。
# video_save_path表示视频保存的路径,当video_save_path=""时表示不保存
# 想要保存视频,则设置如video_save_path = "yyy.mp4"即可,代表保存为根目录下的yyy.mp4文件。
# video_fps用于保存的视频的fps
# video_path、video_save_path和video_fps仅在mode='video'时有效
# 保存视频时需要ctrl+c退出或者运行到最后一帧才会完成完整的保存步骤。
#----------------------------------------------------------------------------------------------------------#
video_path
=
0
video_save_path
=
""
video_fps
=
25.0
#-------------------------------------------------------------------------#
# test_interval用于指定测量fps的时候,图片检测的次数
# 理论上test_interval越大,fps越准确。
#-------------------------------------------------------------------------#
test_interval
=
100
#-------------------------------------------------------------------------#
# dir_origin_path指定了用于检测的图片的文件夹路径
# dir_save_path指定了检测完图片的保存路径
# dir_origin_path和dir_save_path仅在mode='dir_predict'时有效
#-------------------------------------------------------------------------#
dir_origin_path
=
"img/"
dir_save_path
=
"img_out/"
if
mode
==
"predict"
:
'''
1、该代码无法直接进行批量预测,如果想要批量预测,可以利用os.listdir()遍历文件夹,利用Image.open打开图片文件进行预测。
具体流程可以参考get_dr_txt.py,在get_dr_txt.py即实现了遍历还实现了目标信息的保存。
2、如果想要进行检测完的图片的保存,利用r_image.save("img.jpg")即可保存,直接在predict.py里进行修改即可。
3、如果想要获得预测框的坐标,可以进入ssd.detect_image函数,在绘图部分读取top,left,bottom,right这四个值。
4、如果想要利用预测框截取下目标,可以进入ssd.detect_image函数,在绘图部分利用获取到的top,left,bottom,right这四个值
在原图上利用矩阵的方式进行截取。
5、如果想要在预测图上写额外的字,比如检测到的特定目标的数量,可以进入ssd.detect_image函数,在绘图部分对predicted_class进行判断,
比如判断if predicted_class == 'car': 即可判断当前目标是否为车,然后记录数量即可。利用draw.text即可写字。
'''
while
True
:
img
=
input
(
'Input image filename:'
)
try
:
image
=
Image
.
open
(
img
)
except
:
print
(
'Open Error! Try again!'
)
continue
else
:
r_image
=
ssd
.
detect_image
(
image
,
crop
=
crop
)
r_image
.
show
()
elif
mode
==
"video"
:
capture
=
cv2
.
VideoCapture
(
video_path
)
if
video_save_path
!=
""
:
fourcc
=
cv2
.
VideoWriter_fourcc
(
*
'XVID'
)
size
=
(
int
(
capture
.
get
(
cv2
.
CAP_PROP_FRAME_WIDTH
)),
int
(
capture
.
get
(
cv2
.
CAP_PROP_FRAME_HEIGHT
)))
out
=
cv2
.
VideoWriter
(
video_save_path
,
fourcc
,
video_fps
,
size
)
ref
,
frame
=
capture
.
read
()
if
not
ref
:
raise
ValueError
(
"未能正确读取摄像头(视频),请注意是否正确安装摄像头(是否正确填写视频路径)。"
)
fps
=
0.0
while
(
True
):
t1
=
time
.
time
()
# 读取某一帧
ref
,
frame
=
capture
.
read
()
if
not
ref
:
break
# 格式转变,BGRtoRGB
frame
=
cv2
.
cvtColor
(
frame
,
cv2
.
COLOR_BGR2RGB
)
# 转变成Image
frame
=
Image
.
fromarray
(
np
.
uint8
(
frame
))
# 进行检测
frame
=
np
.
array
(
ssd
.
detect_image
(
frame
))
# RGBtoBGR满足opencv显示格式
frame
=
cv2
.
cvtColor
(
frame
,
cv2
.
COLOR_RGB2BGR
)
fps
=
(
fps
+
(
1.
/
(
time
.
time
()
-
t1
))
)
/
2
print
(
"fps= %.2f"
%
(
fps
))
frame
=
cv2
.
putText
(
frame
,
"fps= %.2f"
%
(
fps
),
(
0
,
40
),
cv2
.
FONT_HERSHEY_SIMPLEX
,
1
,
(
0
,
255
,
0
),
2
)
cv2
.
imshow
(
"video"
,
frame
)
c
=
cv2
.
waitKey
(
1
)
&
0xff
if
video_save_path
!=
""
:
out
.
write
(
frame
)
if
c
==
27
:
capture
.
release
()
break
print
(
"Video Detection Done!"
)
capture
.
release
()
if
video_save_path
!=
""
:
print
(
"Save processed video to the path :"
+
video_save_path
)
out
.
release
()
cv2
.
destroyAllWindows
()
elif
mode
==
"fps"
:
img
=
Image
.
open
(
'img/street.jpg'
)
tact_time
=
ssd
.
get_FPS
(
img
,
test_interval
)
print
(
str
(
tact_time
)
+
' seconds, '
+
str
(
1
/
tact_time
)
+
'FPS, @batch_size 1'
)
elif
mode
==
"dir_predict"
:
import
os
from
tqdm
import
tqdm
img_names
=
os
.
listdir
(
dir_origin_path
)
for
img_name
in
tqdm
(
img_names
):
if
img_name
.
lower
().
endswith
((
'.bmp'
,
'.dib'
,
'.png'
,
'.jpg'
,
'.jpeg'
,
'.pbm'
,
'.pgm'
,
'.ppm'
,
'.tif'
,
'.tiff'
)):
image_path
=
os
.
path
.
join
(
dir_origin_path
,
img_name
)
image
=
Image
.
open
(
image_path
)
r_image
=
ssd
.
detect_image
(
image
)
if
not
os
.
path
.
exists
(
dir_save_path
):
os
.
makedirs
(
dir_save_path
)
r_image
.
save
(
os
.
path
.
join
(
dir_save_path
,
img_name
.
replace
(
".jpg"
,
".png"
)),
quality
=
95
,
subsampling
=
0
)
else
:
raise
AssertionError
(
"Please specify the correct mode: 'predict', 'video', 'fps' or 'dir_predict'."
)
\ No newline at end of file
TensorFlow2x/ComputeVision/Detection/SSD/requirements.txt
0 → 100644
View file @
c320b6ef
scipy==1.4.1
numpy==1.18.4
matplotlib==3.2.1
opencv_python==4.2.0.34
tqdm==4.46.1
Pillow==8.2.0
h5py==2.10.0
TensorFlow2x/ComputeVision/Detection/SSD/ssd.py
0 → 100644
View file @
c320b6ef
import
colorsys
import
os
import
time
import
numpy
as
np
import
tensorflow
as
tf
from
tensorflow.keras
import
backend
as
K
from
tensorflow.keras.applications.imagenet_utils
import
preprocess_input
from
PIL
import
ImageDraw
,
ImageFont
from
nets.ssd
import
SSD300
from
utils.utils_bbox
import
BBoxUtility
from
utils.utils
import
get_classes
,
resize_image
,
cvtColor
from
utils.anchors
import
get_anchors
'''
训练自己的数据集必看!
'''
class
SSD
(
object
):
_defaults
=
{
#--------------------------------------------------------------------------#
# 使用自己训练好的模型进行预测一定要修改model_path和classes_path!
# model_path指向logs文件夹下的权值文件,classes_path指向model_data下的txt
#
# 训练好后logs文件夹下存在多个权值文件,选择验证集损失较低的即可。
# 验证集损失较低不代表mAP较高,仅代表该权值在验证集上泛化性能较好。
# 如果出现shape不匹配,同时要注意训练时的model_path和classes_path参数的修改
#--------------------------------------------------------------------------#
"model_path"
:
'model_data/ssd_weights.h5'
,
"classes_path"
:
'model_data/voc_classes.txt'
,
#---------------------------------------------------------------------#
# 用于预测的图像大小,和train时使用同一个即可
#---------------------------------------------------------------------#
"input_shape"
:
[
300
,
300
],
#---------------------------------------------------------------------#
# 只有得分大于置信度的预测框会被保留下来
#---------------------------------------------------------------------#
"confidence"
:
0.5
,
#---------------------------------------------------------------------#
# 非极大抑制所用到的nms_iou大小
#---------------------------------------------------------------------#
"nms_iou"
:
0.45
,
#---------------------------------------------------------------------#
# 用于指定先验框的大小
#---------------------------------------------------------------------#
'anchors_size'
:
[
30
,
60
,
111
,
162
,
213
,
264
,
315
],
#---------------------------------------------------------------------#
# 该变量用于控制是否使用letterbox_image对输入图像进行不失真的resize,
# 在多次测试后,发现关闭letterbox_image直接resize的效果更好
#---------------------------------------------------------------------#
"letterbox_image"
:
False
,
}
@
classmethod
def
get_defaults
(
cls
,
n
):
if
n
in
cls
.
_defaults
:
return
cls
.
_defaults
[
n
]
else
:
return
"Unrecognized attribute name '"
+
n
+
"'"
#---------------------------------------------------#
# 初始化ssd
#---------------------------------------------------#
def
__init__
(
self
,
**
kwargs
):
self
.
__dict__
.
update
(
self
.
_defaults
)
for
name
,
value
in
kwargs
.
items
():
setattr
(
self
,
name
,
value
)
#---------------------------------------------------#
# 计算总的类的数量
#---------------------------------------------------#
self
.
class_names
,
self
.
num_classes
=
get_classes
(
self
.
classes_path
)
self
.
anchors
=
get_anchors
(
self
.
input_shape
,
self
.
anchors_size
)
self
.
num_classes
=
self
.
num_classes
+
1
#---------------------------------------------------#
# 画框设置不同的颜色
#---------------------------------------------------#
hsv_tuples
=
[(
x
/
self
.
num_classes
,
1.
,
1.
)
for
x
in
range
(
self
.
num_classes
)]
self
.
colors
=
list
(
map
(
lambda
x
:
colorsys
.
hsv_to_rgb
(
*
x
),
hsv_tuples
))
self
.
colors
=
list
(
map
(
lambda
x
:
(
int
(
x
[
0
]
*
255
),
int
(
x
[
1
]
*
255
),
int
(
x
[
2
]
*
255
)),
self
.
colors
))
self
.
bbox_util
=
BBoxUtility
(
self
.
num_classes
,
nms_thresh
=
self
.
nms_iou
)
self
.
generate
()
#---------------------------------------------------#
# 载入模型
#---------------------------------------------------#
def
generate
(
self
):
model_path
=
os
.
path
.
expanduser
(
self
.
model_path
)
assert
model_path
.
endswith
(
'.h5'
),
'Keras model or weights must be a .h5 file.'
#-------------------------------#
# 载入模型与权值
#-------------------------------#
self
.
ssd
=
SSD300
([
self
.
input_shape
[
0
],
self
.
input_shape
[
1
],
3
],
self
.
num_classes
)
self
.
ssd
.
load_weights
(
self
.
model_path
,
by_name
=
True
)
print
(
'{} model, anchors, and classes loaded.'
.
format
(
model_path
))
@
tf
.
function
def
get_pred
(
self
,
photo
):
preds
=
self
.
ssd
(
photo
,
training
=
False
)
return
preds
#---------------------------------------------------#
# 检测图片
#---------------------------------------------------#
def
detect_image
(
self
,
image
,
crop
=
False
):
image_shape
=
np
.
array
(
np
.
shape
(
image
)[
0
:
2
])
#---------------------------------------------------------#
# 在这里将图像转换成RGB图像,防止灰度图在预测时报错。
# 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
#---------------------------------------------------------#
image
=
cvtColor
(
image
)
#---------------------------------------------------------#
# 给图像增加灰条,实现不失真的resize
# 也可以直接resize进行识别
#---------------------------------------------------------#
image_data
=
resize_image
(
image
,
(
self
.
input_shape
[
1
],
self
.
input_shape
[
0
]),
self
.
letterbox_image
)
#---------------------------------------------------------#
# 添加上batch_size维度,图片预处理,归一化。
#---------------------------------------------------------#
image_data
=
preprocess_input
(
np
.
expand_dims
(
np
.
array
(
image_data
,
dtype
=
'float32'
),
0
))
preds
=
self
.
get_pred
(
image_data
).
numpy
()
#-----------------------------------------------------------#
# 将预测结果进行解码
#-----------------------------------------------------------#
results
=
self
.
bbox_util
.
decode_box
(
preds
,
self
.
anchors
,
image_shape
,
self
.
input_shape
,
self
.
letterbox_image
,
confidence
=
self
.
confidence
)
#--------------------------------------#
# 如果没有检测到物体,则返回原图
#--------------------------------------#
if
len
(
results
[
0
])
<=
0
:
return
image
top_label
=
np
.
array
(
results
[
0
][:,
4
],
dtype
=
'int32'
)
top_conf
=
results
[
0
][:,
5
]
top_boxes
=
results
[
0
][:,
:
4
]
#---------------------------------------------------------#
# 设置字体与边框厚度
#---------------------------------------------------------#
font
=
ImageFont
.
truetype
(
font
=
'model_data/simhei.ttf'
,
size
=
np
.
floor
(
3e-2
*
np
.
shape
(
image
)[
1
]
+
0.5
).
astype
(
'int32'
))
thickness
=
max
((
np
.
shape
(
image
)[
0
]
+
np
.
shape
(
image
)[
1
])
//
self
.
input_shape
[
0
],
1
)
#---------------------------------------------------------#
# 是否进行目标的裁剪
#---------------------------------------------------------#
if
crop
:
for
i
,
c
in
list
(
enumerate
(
top_boxes
)):
top
,
left
,
bottom
,
right
=
top_boxes
[
i
]
top
=
max
(
0
,
np
.
floor
(
top
).
astype
(
'int32'
))
left
=
max
(
0
,
np
.
floor
(
left
).
astype
(
'int32'
))
bottom
=
min
(
image
.
size
[
1
],
np
.
floor
(
bottom
).
astype
(
'int32'
))
right
=
min
(
image
.
size
[
0
],
np
.
floor
(
right
).
astype
(
'int32'
))
dir_save_path
=
"img_crop"
if
not
os
.
path
.
exists
(
dir_save_path
):
os
.
makedirs
(
dir_save_path
)
crop_image
=
image
.
crop
([
left
,
top
,
right
,
bottom
])
crop_image
.
save
(
os
.
path
.
join
(
dir_save_path
,
"crop_"
+
str
(
i
)
+
".png"
),
quality
=
95
,
subsampling
=
0
)
print
(
"save crop_"
+
str
(
i
)
+
".png to "
+
dir_save_path
)
#---------------------------------------------------------#
# 图像绘制
#---------------------------------------------------------#
for
i
,
c
in
list
(
enumerate
(
top_label
)):
predicted_class
=
self
.
class_names
[
int
(
c
)]
box
=
top_boxes
[
i
]
score
=
top_conf
[
i
]
top
,
left
,
bottom
,
right
=
box
top
=
max
(
0
,
np
.
floor
(
top
).
astype
(
'int32'
))
left
=
max
(
0
,
np
.
floor
(
left
).
astype
(
'int32'
))
bottom
=
min
(
image
.
size
[
1
],
np
.
floor
(
bottom
).
astype
(
'int32'
))
right
=
min
(
image
.
size
[
0
],
np
.
floor
(
right
).
astype
(
'int32'
))
label
=
'{} {:.2f}'
.
format
(
predicted_class
,
score
)
draw
=
ImageDraw
.
Draw
(
image
)
label_size
=
draw
.
textsize
(
label
,
font
)
label
=
label
.
encode
(
'utf-8'
)
print
(
label
,
top
,
left
,
bottom
,
right
)
if
top
-
label_size
[
1
]
>=
0
:
text_origin
=
np
.
array
([
left
,
top
-
label_size
[
1
]])
else
:
text_origin
=
np
.
array
([
left
,
top
+
1
])
for
i
in
range
(
thickness
):
draw
.
rectangle
([
left
+
i
,
top
+
i
,
right
-
i
,
bottom
-
i
],
outline
=
self
.
colors
[
c
])
draw
.
rectangle
([
tuple
(
text_origin
),
tuple
(
text_origin
+
label_size
)],
fill
=
self
.
colors
[
c
])
draw
.
text
(
text_origin
,
str
(
label
,
'UTF-8'
),
fill
=
(
0
,
0
,
0
),
font
=
font
)
del
draw
return
image
def
get_FPS
(
self
,
image
,
test_interval
):
image_shape
=
np
.
array
(
np
.
shape
(
image
)[
0
:
2
])
#---------------------------------------------------------#
# 在这里将图像转换成RGB图像,防止灰度图在预测时报错。
# 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
#---------------------------------------------------------#
image
=
cvtColor
(
image
)
#---------------------------------------------------------#
# 给图像增加灰条,实现不失真的resize
# 也可以直接resize进行识别
#---------------------------------------------------------#
image_data
=
resize_image
(
image
,
(
self
.
input_shape
[
1
],
self
.
input_shape
[
0
]),
self
.
letterbox_image
)
#---------------------------------------------------------#
# 添加上batch_size维度,图片预处理,归一化。
#---------------------------------------------------------#
image_data
=
preprocess_input
(
np
.
expand_dims
(
np
.
array
(
image_data
,
dtype
=
'float32'
),
0
))
preds
=
self
.
get_pred
(
image_data
).
numpy
()
#-----------------------------------------------------------#
# 将预测结果进行解码
#-----------------------------------------------------------#
results
=
self
.
bbox_util
.
decode_box
(
preds
,
self
.
anchors
,
image_shape
,
self
.
input_shape
,
self
.
letterbox_image
,
confidence
=
self
.
confidence
)
t1
=
time
.
time
()
for
_
in
range
(
test_interval
):
preds
=
self
.
get_pred
(
image_data
).
numpy
()
#-----------------------------------------------------------#
# 将预测结果进行解码
#-----------------------------------------------------------#
results
=
self
.
bbox_util
.
decode_box
(
preds
,
self
.
anchors
,
image_shape
,
self
.
input_shape
,
self
.
letterbox_image
,
confidence
=
self
.
confidence
)
t2
=
time
.
time
()
tact_time
=
(
t2
-
t1
)
/
test_interval
return
tact_time
def
get_map_txt
(
self
,
image_id
,
image
,
class_names
,
map_out_path
):
f
=
open
(
os
.
path
.
join
(
map_out_path
,
"detection-results/"
+
image_id
+
".txt"
),
"w"
)
image_shape
=
np
.
array
(
np
.
shape
(
image
)[
0
:
2
])
#---------------------------------------------------------#
# 在这里将图像转换成RGB图像,防止灰度图在预测时报错。
# 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
#---------------------------------------------------------#
image
=
cvtColor
(
image
)
#---------------------------------------------------------#
# 给图像增加灰条,实现不失真的resize
# 也可以直接resize进行识别
#---------------------------------------------------------#
image_data
=
resize_image
(
image
,
(
self
.
input_shape
[
1
],
self
.
input_shape
[
0
]),
self
.
letterbox_image
)
#---------------------------------------------------------#
# 添加上batch_size维度,图片预处理,归一化。
#---------------------------------------------------------#
image_data
=
preprocess_input
(
np
.
expand_dims
(
np
.
array
(
image_data
,
dtype
=
'float32'
),
0
))
preds
=
self
.
get_pred
(
image_data
).
numpy
()
#-----------------------------------------------------------#
# 将预测结果进行解码
#-----------------------------------------------------------#
results
=
self
.
bbox_util
.
decode_box
(
preds
,
self
.
anchors
,
image_shape
,
self
.
input_shape
,
self
.
letterbox_image
,
confidence
=
self
.
confidence
)
#--------------------------------------#
# 如果没有检测到物体,则返回原图
#--------------------------------------#
if
len
(
results
[
0
])
<=
0
:
return
top_label
=
results
[
0
][:,
4
]
top_conf
=
results
[
0
][:,
5
]
top_boxes
=
results
[
0
][:,
:
4
]
for
i
,
c
in
list
(
enumerate
(
top_label
)):
predicted_class
=
self
.
class_names
[
int
(
c
)]
box
=
top_boxes
[
i
]
score
=
str
(
top_conf
[
i
])
top
,
left
,
bottom
,
right
=
box
if
predicted_class
not
in
class_names
:
continue
f
.
write
(
"%s %s %s %s %s %s
\n
"
%
(
predicted_class
,
score
[:
6
],
str
(
int
(
left
)),
str
(
int
(
top
)),
str
(
int
(
right
)),
str
(
int
(
bottom
))))
f
.
close
()
return
TensorFlow2x/ComputeVision/Detection/SSD/summary.py
0 → 100644
View file @
c320b6ef
#--------------------------------------------#
# 该部分代码用于看网络结构
#--------------------------------------------#
from
nets.ssd
import
SSD300
if
__name__
==
"__main__"
:
input_shape
=
[
300
,
300
,
3
]
num_classes
=
21
model
=
SSD300
(
input_shape
,
num_classes
)
model
.
summary
()
# for i,layer in enumerate(model.layers):
# print(i,layer.name)
TensorFlow2x/ComputeVision/Detection/SSD/train.py
0 → 100644
View file @
c320b6ef
from
functools
import
partial
import
tensorflow
as
tf
from
tensorflow.keras.callbacks
import
EarlyStopping
,
TensorBoard
from
tensorflow.keras.optimizers
import
Adam
from
nets.ssd
import
SSD300
from
nets.ssd_training
import
MultiboxLoss
from
utils.anchors
import
get_anchors
from
utils.callbacks
import
(
ExponentDecayScheduler
,
LossHistory
,
ModelCheckpoint
)
from
utils.dataloader
import
SSDDatasets
from
utils.utils
import
get_classes
from
utils.utils_fit
import
fit_one_epoch
gpus
=
tf
.
config
.
experimental
.
list_physical_devices
(
device_type
=
'GPU'
)
for
gpu
in
gpus
:
tf
.
config
.
experimental
.
set_memory_growth
(
gpu
,
True
)
'''
训练自己的目标检测模型一定需要注意以下几点:
1、训练前仔细检查自己的格式是否满足要求,该库要求数据集格式为VOC格式,需要准备好的内容有输入图片和标签
输入图片为.jpg图片,无需固定大小,传入训练前会自动进行resize。
灰度图会自动转成RGB图片进行训练,无需自己修改。
输入图片如果后缀非jpg,需要自己批量转成jpg后再开始训练。
标签为.xml格式,文件中会有需要检测的目标信息,标签文件和输入图片文件相对应。
2、训练好的权值文件保存在logs文件夹中,每个epoch都会保存一次,如果只是训练了几个step是不会保存的,epoch和step的概念要捋清楚一下。
在训练过程中,该代码并没有设定只保存最低损失的,因此按默认参数训练完会有100个权值,如果空间不够可以自行删除。
这个并不是保存越少越好也不是保存越多越好,有人想要都保存、有人想只保存一点,为了满足大多数的需求,还是都保存可选择性高。
3、损失值的大小用于判断是否收敛,比较重要的是有收敛的趋势,即验证集损失不断下降,如果验证集损失基本上不改变的话,模型基本上就收敛了。
损失值的具体大小并没有什么意义,大和小只在于损失的计算方式,并不是接近于0才好。如果想要让损失好看点,可以直接到对应的损失函数里面除上10000。
训练过程中的损失值会保存在logs文件夹下的loss_%Y_%m_%d_%H_%M_%S文件夹中
4、调参是一门蛮重要的学问,没有什么参数是一定好的,现有的参数是我测试过可以正常训练的参数,因此我会建议用现有的参数。
但是参数本身并不是绝对的,比如随着batch的增大学习率也可以增大,效果也会好一些;过深的网络不要用太大的学习率等等。
这些都是经验上,只能靠各位同学多查询资料和自己试试了。
'''
if
__name__
==
"__main__"
:
#----------------------------------------------------#
# 是否使用eager模式训练
#----------------------------------------------------#
eager
=
False
#--------------------------------------------------------#
# 训练前一定要修改classes_path,使其对应自己的数据集
#--------------------------------------------------------#
classes_path
=
'model_data/voc_classes.txt'
#----------------------------------------------------------------------------------------------------------------------------#
# 权值文件的下载请看README,可以通过网盘下载。模型的 预训练权重 对不同数据集是通用的,因为特征是通用的。
# 模型的 预训练权重 比较重要的部分是 主干特征提取网络的权值部分,用于进行特征提取。
# 预训练权重对于99%的情况都必须要用,不用的话主干部分的权值太过随机,特征提取效果不明显,网络训练的结果也不会好
#
# 如果训练过程中存在中断训练的操作,可以将model_path设置成logs文件夹下的权值文件,将已经训练了一部分的权值再次载入。
# 同时修改下方的 冻结阶段 或者 解冻阶段 的参数,来保证模型epoch的连续性。
#
# 当model_path = ''的时候不加载整个模型的权值。
#
# 此处使用的是整个模型的权重,因此是在train.py进行加载的。
# 如果想要让模型从主干的预训练权值开始训练,则设置model_path为主干网络的权值,此时仅加载主干。
# 如果想要让模型从0开始训练,则设置model_path = '',Freeze_Train = Fasle,此时从0开始训练,且没有冻结主干的过程。
# 一般来讲,从0开始训练效果会很差,因为权值太过随机,特征提取效果不明显。
#
# 网络一般不从0开始训练,至少会使用主干部分的权值,有些论文提到可以不用预训练,主要原因是他们 数据集较大 且 调参能力优秀。
# 如果一定要训练网络的主干部分,可以了解imagenet数据集,首先训练分类模型,分类模型的 主干部分 和该模型通用,基于此进行训练。
#----------------------------------------------------------------------------------------------------------------------------#
model_path
=
''
#------------------------------------------------------#
# 输入的shape大小
#------------------------------------------------------#
input_shape
=
[
300
,
300
]
#----------------------------------------------------#
# 可用于设定先验框的大小,默认的anchors_size
# 是根据voc数据集设定的,大多数情况下都是通用的!
# 如果想要检测小物体,可以修改anchors_size
# 一般调小浅层先验框的大小就行了!因为浅层负责小物体检测!
# 比如anchors_size = [21, 45, 99, 153, 207, 261, 315]
#----------------------------------------------------#
anchors_size
=
[
30
,
60
,
111
,
162
,
213
,
264
,
315
]
#----------------------------------------------------#
# 训练分为两个阶段,分别是冻结阶段和解冻阶段。
# 显存不足与数据集大小无关,提示显存不足请调小batch_size。
# 受到BatchNorm层影响,batch_size最小为2,不能为1。
#----------------------------------------------------#
#----------------------------------------------------#
# 冻结阶段训练参数
# 此时模型的主干被冻结了,特征提取网络不发生改变
# 占用的显存较小,仅对网络进行微调
#----------------------------------------------------#
Init_Epoch
=
0
Freeze_Epoch
=
50
Freeze_batch_size
=
8
Freeze_lr
=
5e-4
#----------------------------------------------------#
# 解冻阶段训练参数
# 此时模型的主干不被冻结了,特征提取网络会发生改变
# 占用的显存较大,网络所有的参数都会发生改变
#----------------------------------------------------#
UnFreeze_Epoch
=
100
Unfreeze_batch_size
=
8
Unfreeze_lr
=
1e-4
#------------------------------------------------------#
# 是否进行冻结训练,默认先冻结主干训练后解冻训练。
#------------------------------------------------------#
Freeze_Train
=
False
#------------------------------------------------------#
# 用于设置是否使用多线程读取数据,1代表关闭多线程
# 开启后会加快数据读取速度,但是会占用更多内存
# keras里开启多线程有些时候速度反而慢了许多
# 在IO为瓶颈的时候再开启多线程,即GPU运算速度远大于读取图片的速度。
# 在eager模式为False有效
#------------------------------------------------------#
num_workers
=
1
#----------------------------------------------------#
# 获得图片路径和标签
#----------------------------------------------------#
train_annotation_path
=
'2012_train.txt'
val_annotation_path
=
'2012_val.txt'
#----------------------------------------------------#
# 获取classes和anchor
#----------------------------------------------------#
class_names
,
num_classes
=
get_classes
(
classes_path
)
num_classes
+=
1
anchors
=
get_anchors
(
input_shape
,
anchors_size
)
#----------------------------------------------------#
# 获取classes和anchor
#----------------------------------------------------#
class_names
,
num_classes
=
get_classes
(
classes_path
)
num_classes
+=
1
anchors
=
get_anchors
(
input_shape
,
anchors_size
)
model
=
SSD300
((
input_shape
[
0
],
input_shape
[
1
],
3
),
num_classes
)
if
model_path
!=
''
:
#------------------------------------------------------#
# 载入预训练权重
#------------------------------------------------------#
print
(
'Load weights {}.'
.
format
(
model_path
))
model
.
load_weights
(
model_path
,
by_name
=
True
,
skip_mismatch
=
True
)
#-------------------------------------------------------------------------------#
# 训练参数的设置
# logging表示tensorboard的保存地址
# checkpoint用于设置权值保存的细节,period用于修改多少epoch保存一次
# reduce_lr用于设置学习率下降的方式
# early_stopping用于设定早停,val_loss多次不下降自动结束训练,表示模型基本收敛
#-------------------------------------------------------------------------------#
logging
=
TensorBoard
(
log_dir
=
'logs/'
)
checkpoint
=
ModelCheckpoint
(
'logs/ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5'
,
monitor
=
'val_loss'
,
save_weights_only
=
True
,
save_best_only
=
False
,
period
=
1
)
reduce_lr
=
ExponentDecayScheduler
(
decay_rate
=
0.94
,
verbose
=
1
)
early_stopping
=
EarlyStopping
(
monitor
=
'val_loss'
,
min_delta
=
0
,
patience
=
10
,
verbose
=
1
)
loss_history
=
LossHistory
(
'logs/'
)
multiloss
=
MultiboxLoss
(
num_classes
,
neg_pos_ratio
=
3.0
).
compute_loss
#---------------------------#
# 读取数据集对应的txt
#---------------------------#
with
open
(
train_annotation_path
)
as
f
:
train_lines
=
f
.
readlines
()
with
open
(
val_annotation_path
)
as
f
:
val_lines
=
f
.
readlines
()
num_train
=
len
(
train_lines
)
num_val
=
len
(
val_lines
)
if
Freeze_Train
:
freeze_layers
=
17
for
i
in
range
(
freeze_layers
):
model
.
layers
[
i
].
trainable
=
False
print
(
'Freeze the first {} layers of total {} layers.'
.
format
(
freeze_layers
,
len
(
model
.
layers
)))
#------------------------------------------------------#
# 主干特征提取网络特征通用,冻结训练可以加快训练速度
# 也可以在训练初期防止权值被破坏。
# Init_Epoch为起始世代
# Freeze_Epoch为冻结训练的世代
# Unfreeze_Epoch总训练世代
# 提示OOM或者显存不足请调小Batch_size
#------------------------------------------------------#
if
True
:
batch_size
=
Freeze_batch_size
lr
=
Freeze_lr
start_epoch
=
Init_Epoch
end_epoch
=
Freeze_Epoch
epoch_step
=
num_train
//
batch_size
epoch_step_val
=
num_val
//
batch_size
if
epoch_step
==
0
or
epoch_step_val
==
0
:
raise
ValueError
(
'数据集过小,无法进行训练,请扩充数据集。'
)
train_dataloader
=
SSDDatasets
(
train_lines
,
input_shape
,
anchors
,
batch_size
,
num_classes
,
train
=
True
)
val_dataloader
=
SSDDatasets
(
val_lines
,
input_shape
,
anchors
,
batch_size
,
num_classes
,
train
=
False
)
print
(
'Train on {} samples, val on {} samples, with batch size {}.'
.
format
(
num_train
,
num_val
,
batch_size
))
if
eager
:
gen
=
tf
.
data
.
Dataset
.
from_generator
(
partial
(
train_dataloader
.
generate
),
(
tf
.
float32
,
tf
.
float32
))
gen_val
=
tf
.
data
.
Dataset
.
from_generator
(
partial
(
val_dataloader
.
generate
),
(
tf
.
float32
,
tf
.
float32
))
gen
=
gen
.
shuffle
(
buffer_size
=
batch_size
).
prefetch
(
buffer_size
=
batch_size
)
gen_val
=
gen_val
.
shuffle
(
buffer_size
=
batch_size
).
prefetch
(
buffer_size
=
batch_size
)
lr_schedule
=
tf
.
keras
.
optimizers
.
schedules
.
ExponentialDecay
(
initial_learning_rate
=
lr
,
decay_steps
=
epoch_step
,
decay_rate
=
0.94
,
staircase
=
True
)
optimizer
=
tf
.
keras
.
optimizers
.
Adam
(
learning_rate
=
lr_schedule
)
for
epoch
in
range
(
start_epoch
,
end_epoch
):
fit_one_epoch
(
model
,
multiloss
,
loss_history
,
optimizer
,
epoch
,
epoch_step
,
epoch_step_val
,
gen
,
gen_val
,
end_epoch
)
else
:
model
.
compile
(
optimizer
=
Adam
(
lr
=
lr
),
loss
=
MultiboxLoss
(
num_classes
,
neg_pos_ratio
=
3.0
).
compute_loss
)
model
.
fit_generator
(
generator
=
train_dataloader
,
steps_per_epoch
=
epoch_step
,
validation_data
=
val_dataloader
,
validation_steps
=
epoch_step_val
,
epochs
=
end_epoch
,
initial_epoch
=
start_epoch
,
use_multiprocessing
=
True
if
num_workers
>
1
else
False
,
workers
=
num_workers
,
callbacks
=
[
logging
,
checkpoint
,
reduce_lr
,
early_stopping
,
loss_history
]
)
if
Freeze_Train
:
for
i
in
range
(
freeze_layers
):
model
.
layers
[
i
].
trainable
=
True
if
True
:
batch_size
=
Unfreeze_batch_size
lr
=
Unfreeze_lr
start_epoch
=
Freeze_Epoch
end_epoch
=
UnFreeze_Epoch
epoch_step
=
num_train
//
batch_size
epoch_step_val
=
num_val
//
batch_size
if
epoch_step
==
0
or
epoch_step_val
==
0
:
raise
ValueError
(
'数据集过小,无法进行训练,请扩充数据集。'
)
train_dataloader
=
SSDDatasets
(
train_lines
,
input_shape
,
anchors
,
batch_size
,
num_classes
,
train
=
True
)
val_dataloader
=
SSDDatasets
(
val_lines
,
input_shape
,
anchors
,
batch_size
,
num_classes
,
train
=
False
)
print
(
'Train on {} samples, val on {} samples, with batch size {}.'
.
format
(
num_train
,
num_val
,
batch_size
))
if
eager
:
gen
=
tf
.
data
.
Dataset
.
from_generator
(
partial
(
train_dataloader
.
generate
),
(
tf
.
float32
,
tf
.
float32
))
gen_val
=
tf
.
data
.
Dataset
.
from_generator
(
partial
(
val_dataloader
.
generate
),
(
tf
.
float32
,
tf
.
float32
))
gen
=
gen
.
shuffle
(
buffer_size
=
batch_size
).
prefetch
(
buffer_size
=
batch_size
)
gen_val
=
gen_val
.
shuffle
(
buffer_size
=
batch_size
).
prefetch
(
buffer_size
=
batch_size
)
lr_schedule
=
tf
.
keras
.
optimizers
.
schedules
.
ExponentialDecay
(
initial_learning_rate
=
lr
,
decay_steps
=
epoch_step
,
decay_rate
=
0.94
,
staircase
=
True
)
optimizer
=
tf
.
keras
.
optimizers
.
Adam
(
learning_rate
=
lr_schedule
)
for
epoch
in
range
(
start_epoch
,
end_epoch
):
fit_one_epoch
(
model
,
multiloss
,
loss_history
,
optimizer
,
epoch
,
epoch_step
,
epoch_step_val
,
gen
,
gen_val
,
end_epoch
)
else
:
model
.
compile
(
optimizer
=
Adam
(
lr
=
lr
),
loss
=
MultiboxLoss
(
num_classes
,
neg_pos_ratio
=
3.0
).
compute_loss
)
model
.
fit_generator
(
generator
=
train_dataloader
,
steps_per_epoch
=
epoch_step
,
validation_data
=
val_dataloader
,
validation_steps
=
epoch_step_val
,
epochs
=
end_epoch
,
initial_epoch
=
start_epoch
,
use_multiprocessing
=
True
if
num_workers
>
1
else
False
,
workers
=
num_workers
,
callbacks
=
[
logging
,
checkpoint
,
reduce_lr
,
early_stopping
,
loss_history
]
)
TensorFlow2x/ComputeVision/Detection/SSD/utils/__init__.py
0 → 100644
View file @
c320b6ef
#
\ No newline at end of file
TensorFlow2x/ComputeVision/Detection/SSD/utils/__pycache__/__init__.cpython-36.pyc
0 → 100644
View file @
c320b6ef
File added
TensorFlow2x/ComputeVision/Detection/SSD/utils/__pycache__/anchors.cpython-36.pyc
0 → 100644
View file @
c320b6ef
File added
TensorFlow2x/ComputeVision/Detection/SSD/utils/__pycache__/callbacks.cpython-36.pyc
0 → 100644
View file @
c320b6ef
File added
TensorFlow2x/ComputeVision/Detection/SSD/utils/__pycache__/dataloader.cpython-36.pyc
0 → 100644
View file @
c320b6ef
File added
TensorFlow2x/ComputeVision/Detection/SSD/utils/__pycache__/utils.cpython-36.pyc
0 → 100644
View file @
c320b6ef
File added
TensorFlow2x/ComputeVision/Detection/SSD/utils/__pycache__/utils_bbox.cpython-36.pyc
0 → 100644
View file @
c320b6ef
File added
TensorFlow2x/ComputeVision/Detection/SSD/utils/__pycache__/utils_fit.cpython-36.pyc
0 → 100644
View file @
c320b6ef
File added
TensorFlow2x/ComputeVision/Detection/SSD/utils/anchors.py
0 → 100644
View file @
c320b6ef
import
numpy
as
np
class
AnchorBox
():
def
__init__
(
self
,
input_shape
,
min_size
,
max_size
=
None
,
aspect_ratios
=
None
,
flip
=
True
):
self
.
input_shape
=
input_shape
self
.
min_size
=
min_size
self
.
max_size
=
max_size
self
.
aspect_ratios
=
[]
for
ar
in
aspect_ratios
:
self
.
aspect_ratios
.
append
(
ar
)
self
.
aspect_ratios
.
append
(
1.0
/
ar
)
def
call
(
self
,
layer_shape
,
mask
=
None
):
# --------------------------------- #
# 获取输入进来的特征层的宽和高
# 比如38x38
# --------------------------------- #
layer_height
=
layer_shape
[
0
]
layer_width
=
layer_shape
[
1
]
# --------------------------------- #
# 获取输入进来的图片的宽和高
# 比如300x300
# --------------------------------- #
img_height
=
self
.
input_shape
[
0
]
img_width
=
self
.
input_shape
[
1
]
box_widths
=
[]
box_heights
=
[]
# --------------------------------- #
# self.aspect_ratios一般有两个值
# [1, 1, 2, 1/2]
# [1, 1, 2, 1/2, 3, 1/3]
# --------------------------------- #
for
ar
in
self
.
aspect_ratios
:
# 首先添加一个较小的正方形
if
ar
==
1
and
len
(
box_widths
)
==
0
:
box_widths
.
append
(
self
.
min_size
)
box_heights
.
append
(
self
.
min_size
)
# 然后添加一个较大的正方形
elif
ar
==
1
and
len
(
box_widths
)
>
0
:
box_widths
.
append
(
np
.
sqrt
(
self
.
min_size
*
self
.
max_size
))
box_heights
.
append
(
np
.
sqrt
(
self
.
min_size
*
self
.
max_size
))
# 然后添加长方形
elif
ar
!=
1
:
box_widths
.
append
(
self
.
min_size
*
np
.
sqrt
(
ar
))
box_heights
.
append
(
self
.
min_size
/
np
.
sqrt
(
ar
))
# --------------------------------- #
# 获得所有先验框的宽高1/2
# --------------------------------- #
box_widths
=
0.5
*
np
.
array
(
box_widths
)
box_heights
=
0.5
*
np
.
array
(
box_heights
)
# --------------------------------- #
# 每一个特征层对应的步长
# --------------------------------- #
step_x
=
img_width
/
layer_width
step_y
=
img_height
/
layer_height
# --------------------------------- #
# 生成网格中心
# --------------------------------- #
linx
=
np
.
linspace
(
0.5
*
step_x
,
img_width
-
0.5
*
step_x
,
layer_width
)
liny
=
np
.
linspace
(
0.5
*
step_y
,
img_height
-
0.5
*
step_y
,
layer_height
)
centers_x
,
centers_y
=
np
.
meshgrid
(
linx
,
liny
)
centers_x
=
centers_x
.
reshape
(
-
1
,
1
)
centers_y
=
centers_y
.
reshape
(
-
1
,
1
)
# 每一个先验框需要两个(centers_x, centers_y),前一个用来计算左上角,后一个计算右下角
num_anchors_
=
len
(
self
.
aspect_ratios
)
anchor_boxes
=
np
.
concatenate
((
centers_x
,
centers_y
),
axis
=
1
)
anchor_boxes
=
np
.
tile
(
anchor_boxes
,
(
1
,
2
*
num_anchors_
))
# 获得先验框的左上角和右下角
anchor_boxes
[:,
::
4
]
-=
box_widths
anchor_boxes
[:,
1
::
4
]
-=
box_heights
anchor_boxes
[:,
2
::
4
]
+=
box_widths
anchor_boxes
[:,
3
::
4
]
+=
box_heights
# --------------------------------- #
# 将先验框变成小数的形式
# 归一化
# --------------------------------- #
anchor_boxes
[:,
::
2
]
/=
img_width
anchor_boxes
[:,
1
::
2
]
/=
img_height
anchor_boxes
=
anchor_boxes
.
reshape
(
-
1
,
4
)
anchor_boxes
=
np
.
minimum
(
np
.
maximum
(
anchor_boxes
,
0.0
),
1.0
)
return
anchor_boxes
#---------------------------------------------------#
# 用于计算共享特征层的大小
#---------------------------------------------------#
def
get_img_output_length
(
height
,
width
):
filter_sizes
=
[
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
]
padding
=
[
1
,
1
,
1
,
1
,
1
,
1
,
0
,
0
]
stride
=
[
2
,
2
,
2
,
2
,
2
,
2
,
1
,
1
]
feature_heights
=
[]
feature_widths
=
[]
for
i
in
range
(
len
(
filter_sizes
)):
height
=
(
height
+
2
*
padding
[
i
]
-
filter_sizes
[
i
])
//
stride
[
i
]
+
1
width
=
(
width
+
2
*
padding
[
i
]
-
filter_sizes
[
i
])
//
stride
[
i
]
+
1
feature_heights
.
append
(
height
)
feature_widths
.
append
(
width
)
return
np
.
array
(
feature_heights
)[
-
6
:],
np
.
array
(
feature_widths
)[
-
6
:]
def
get_anchors
(
input_shape
=
[
300
,
300
],
anchors_size
=
[
30
,
60
,
111
,
162
,
213
,
264
,
315
]):
feature_heights
,
feature_widths
=
get_img_output_length
(
input_shape
[
0
],
input_shape
[
1
])
aspect_ratios
=
[[
1
,
2
],
[
1
,
2
,
3
],
[
1
,
2
,
3
],
[
1
,
2
,
3
],
[
1
,
2
],
[
1
,
2
]]
anchors
=
[]
for
i
in
range
(
len
(
feature_heights
)):
anchors
.
append
(
AnchorBox
(
input_shape
,
anchors_size
[
i
],
max_size
=
anchors_size
[
i
+
1
],
aspect_ratios
=
aspect_ratios
[
i
]).
call
([
feature_heights
[
i
],
feature_widths
[
i
]]))
anchors
=
np
.
concatenate
(
anchors
,
axis
=
0
)
return
anchors
if
__name__
==
'__main__'
:
import
matplotlib.pyplot
as
plt
class
AnchorBox_for_Vision
():
def
__init__
(
self
,
input_shape
,
min_size
,
max_size
=
None
,
aspect_ratios
=
None
,
flip
=
True
):
# 获得输入图片的大小,300x300
self
.
input_shape
=
input_shape
# 先验框的短边
self
.
min_size
=
min_size
# 先验框的长边
self
.
max_size
=
max_size
# [1, 2] => [1, 1, 2, 1/2]
# [1, 2, 3] => [1, 1, 2, 1/2, 3, 1/3]
self
.
aspect_ratios
=
[]
for
ar
in
aspect_ratios
:
self
.
aspect_ratios
.
append
(
ar
)
self
.
aspect_ratios
.
append
(
1.0
/
ar
)
def
call
(
self
,
layer_shape
,
mask
=
None
):
# --------------------------------- #
# 获取输入进来的特征层的宽和高
# 比如3x3
# --------------------------------- #
layer_height
=
layer_shape
[
0
]
layer_width
=
layer_shape
[
1
]
# --------------------------------- #
# 获取输入进来的图片的宽和高
# 比如300x300
# --------------------------------- #
img_height
=
self
.
input_shape
[
0
]
img_width
=
self
.
input_shape
[
1
]
box_widths
=
[]
box_heights
=
[]
# --------------------------------- #
# self.aspect_ratios一般有两个值
# [1, 1, 2, 1/2]
# [1, 1, 2, 1/2, 3, 1/3]
# --------------------------------- #
for
ar
in
self
.
aspect_ratios
:
# 首先添加一个较小的正方形
if
ar
==
1
and
len
(
box_widths
)
==
0
:
box_widths
.
append
(
self
.
min_size
)
box_heights
.
append
(
self
.
min_size
)
# 然后添加一个较大的正方形
elif
ar
==
1
and
len
(
box_widths
)
>
0
:
box_widths
.
append
(
np
.
sqrt
(
self
.
min_size
*
self
.
max_size
))
box_heights
.
append
(
np
.
sqrt
(
self
.
min_size
*
self
.
max_size
))
# 然后添加长方形
elif
ar
!=
1
:
box_widths
.
append
(
self
.
min_size
*
np
.
sqrt
(
ar
))
box_heights
.
append
(
self
.
min_size
/
np
.
sqrt
(
ar
))
print
(
"box_widths:"
,
box_widths
)
print
(
"box_heights:"
,
box_heights
)
# --------------------------------- #
# 获得所有先验框的宽高1/2
# --------------------------------- #
box_widths
=
0.5
*
np
.
array
(
box_widths
)
box_heights
=
0.5
*
np
.
array
(
box_heights
)
# --------------------------------- #
# 每一个特征层对应的步长
# 3x3的步长为100
# --------------------------------- #
step_x
=
img_width
/
layer_width
step_y
=
img_height
/
layer_height
# --------------------------------- #
# 生成网格中心
# --------------------------------- #
linx
=
np
.
linspace
(
0.5
*
step_x
,
img_width
-
0.5
*
step_x
,
layer_width
)
liny
=
np
.
linspace
(
0.5
*
step_y
,
img_height
-
0.5
*
step_y
,
layer_height
)
# 构建网格
centers_x
,
centers_y
=
np
.
meshgrid
(
linx
,
liny
)
centers_x
=
centers_x
.
reshape
(
-
1
,
1
)
centers_y
=
centers_y
.
reshape
(
-
1
,
1
)
if
layer_height
==
3
:
fig
=
plt
.
figure
()
ax
=
fig
.
add_subplot
(
111
)
plt
.
ylim
(
-
50
,
350
)
plt
.
xlim
(
-
50
,
350
)
plt
.
scatter
(
centers_x
,
centers_y
)
# 每一个先验框需要两个(centers_x, centers_y),前一个用来计算左上角,后一个计算右下角
num_anchors_
=
len
(
self
.
aspect_ratios
)
anchor_boxes
=
np
.
concatenate
((
centers_x
,
centers_y
),
axis
=
1
)
anchor_boxes
=
np
.
tile
(
anchor_boxes
,
(
1
,
2
*
num_anchors_
))
# 获得先验框的左上角和右下角
anchor_boxes
[:,
::
4
]
-=
box_widths
anchor_boxes
[:,
1
::
4
]
-=
box_heights
anchor_boxes
[:,
2
::
4
]
+=
box_widths
anchor_boxes
[:,
3
::
4
]
+=
box_heights
print
(
np
.
shape
(
anchor_boxes
))
if
layer_height
==
3
:
rect1
=
plt
.
Rectangle
([
anchor_boxes
[
4
,
0
],
anchor_boxes
[
4
,
1
]],
box_widths
[
0
]
*
2
,
box_heights
[
0
]
*
2
,
color
=
"r"
,
fill
=
False
)
rect2
=
plt
.
Rectangle
([
anchor_boxes
[
4
,
4
],
anchor_boxes
[
4
,
5
]],
box_widths
[
1
]
*
2
,
box_heights
[
1
]
*
2
,
color
=
"r"
,
fill
=
False
)
rect3
=
plt
.
Rectangle
([
anchor_boxes
[
4
,
8
],
anchor_boxes
[
4
,
9
]],
box_widths
[
2
]
*
2
,
box_heights
[
2
]
*
2
,
color
=
"r"
,
fill
=
False
)
rect4
=
plt
.
Rectangle
([
anchor_boxes
[
4
,
12
],
anchor_boxes
[
4
,
13
]],
box_widths
[
3
]
*
2
,
box_heights
[
3
]
*
2
,
color
=
"r"
,
fill
=
False
)
ax
.
add_patch
(
rect1
)
ax
.
add_patch
(
rect2
)
ax
.
add_patch
(
rect3
)
ax
.
add_patch
(
rect4
)
plt
.
show
()
# --------------------------------- #
# 将先验框变成小数的形式
# 归一化
# --------------------------------- #
anchor_boxes
[:,
::
2
]
/=
img_width
anchor_boxes
[:,
1
::
2
]
/=
img_height
anchor_boxes
=
anchor_boxes
.
reshape
(
-
1
,
4
)
anchor_boxes
=
np
.
minimum
(
np
.
maximum
(
anchor_boxes
,
0.0
),
1.0
)
return
anchor_boxes
# 输入图片大小为300, 300
input_shape
=
[
300
,
300
]
# 指定先验框的大小,即宽高
anchors_size
=
[
30
,
60
,
111
,
162
,
213
,
264
,
315
]
# feature_heights [38, 19, 10, 5, 3, 1]
# feature_widths [38, 19, 10, 5, 3, 1]
feature_heights
,
feature_widths
=
get_img_output_length
(
input_shape
[
0
],
input_shape
[
1
])
# 对先验框的数量进行一个指定 4,6
aspect_ratios
=
[[
1
,
2
],
[
1
,
2
,
3
],
[
1
,
2
,
3
],
[
1
,
2
,
3
],
[
1
,
2
],
[
1
,
2
]]
anchors
=
[]
for
i
in
range
(
len
(
feature_heights
)):
anchors
.
append
(
AnchorBox_for_Vision
(
input_shape
,
anchors_size
[
i
],
max_size
=
anchors_size
[
i
+
1
],
aspect_ratios
=
aspect_ratios
[
i
]).
call
([
feature_heights
[
i
],
feature_widths
[
i
]]))
anchors
=
np
.
concatenate
(
anchors
,
axis
=
0
)
print
(
np
.
shape
(
anchors
))
TensorFlow2x/ComputeVision/Detection/SSD/utils/callbacks.py
0 → 100644
View file @
c320b6ef
import
os
import
warnings
import
matplotlib
matplotlib
.
use
(
'Agg'
)
from
matplotlib
import
pyplot
as
plt
import
numpy
as
np
import
scipy.signal
from
tensorflow
import
keras
from
tensorflow.keras
import
backend
as
K
class
LossHistory
(
keras
.
callbacks
.
Callback
):
def
__init__
(
self
,
log_dir
):
import
datetime
curr_time
=
datetime
.
datetime
.
now
()
time_str
=
datetime
.
datetime
.
strftime
(
curr_time
,
'%Y_%m_%d_%H_%M_%S'
)
self
.
log_dir
=
log_dir
self
.
time_str
=
time_str
self
.
save_path
=
os
.
path
.
join
(
self
.
log_dir
,
"loss_"
+
str
(
self
.
time_str
))
self
.
losses
=
[]
self
.
val_loss
=
[]
try
:
os
.
makedirs
(
self
.
save_path
)
except
OSError
:
pass
def
on_epoch_end
(
self
,
batch
,
logs
=
{}):
self
.
losses
.
append
(
logs
.
get
(
'loss'
))
self
.
val_loss
.
append
(
logs
.
get
(
'val_loss'
))
with
open
(
os
.
path
.
join
(
self
.
save_path
,
"epoch_loss_"
+
str
(
self
.
time_str
)
+
".txt"
),
'a'
)
as
f
:
f
.
write
(
str
(
logs
.
get
(
'loss'
)))
f
.
write
(
"
\n
"
)
with
open
(
os
.
path
.
join
(
self
.
save_path
,
"epoch_val_loss_"
+
str
(
self
.
time_str
)
+
".txt"
),
'a'
)
as
f
:
f
.
write
(
str
(
logs
.
get
(
'val_loss'
)))
f
.
write
(
"
\n
"
)
self
.
loss_plot
()
def
loss_plot
(
self
):
iters
=
range
(
len
(
self
.
losses
))
plt
.
figure
()
plt
.
plot
(
iters
,
self
.
losses
,
'red'
,
linewidth
=
2
,
label
=
'train loss'
)
plt
.
plot
(
iters
,
self
.
val_loss
,
'coral'
,
linewidth
=
2
,
label
=
'val loss'
)
try
:
if
len
(
self
.
losses
)
<
25
:
num
=
5
else
:
num
=
15
plt
.
plot
(
iters
,
scipy
.
signal
.
savgol_filter
(
self
.
losses
,
num
,
3
),
'green'
,
linestyle
=
'--'
,
linewidth
=
2
,
label
=
'smooth train loss'
)
plt
.
plot
(
iters
,
scipy
.
signal
.
savgol_filter
(
self
.
val_loss
,
num
,
3
),
'#8B4513'
,
linestyle
=
'--'
,
linewidth
=
2
,
label
=
'smooth val loss'
)
except
:
pass
plt
.
grid
(
True
)
plt
.
xlabel
(
'Epoch'
)
plt
.
ylabel
(
'Loss'
)
plt
.
title
(
'A Loss Curve'
)
plt
.
legend
(
loc
=
"upper right"
)
plt
.
savefig
(
os
.
path
.
join
(
self
.
save_path
,
"epoch_loss_"
+
str
(
self
.
time_str
)
+
".png"
))
plt
.
cla
()
plt
.
close
(
"all"
)
class
ExponentDecayScheduler
(
keras
.
callbacks
.
Callback
):
def
__init__
(
self
,
decay_rate
,
verbose
=
0
):
super
(
ExponentDecayScheduler
,
self
).
__init__
()
self
.
decay_rate
=
decay_rate
self
.
verbose
=
verbose
self
.
learning_rates
=
[]
def
on_epoch_end
(
self
,
batch
,
logs
=
None
):
learning_rate
=
K
.
get_value
(
self
.
model
.
optimizer
.
lr
)
*
self
.
decay_rate
K
.
set_value
(
self
.
model
.
optimizer
.
lr
,
learning_rate
)
if
self
.
verbose
>
0
:
print
(
'Setting learning rate to %s.'
%
(
learning_rate
))
class
ModelCheckpoint
(
keras
.
callbacks
.
Callback
):
def
__init__
(
self
,
filepath
,
monitor
=
'val_loss'
,
verbose
=
0
,
save_best_only
=
False
,
save_weights_only
=
False
,
mode
=
'auto'
,
period
=
1
):
super
(
ModelCheckpoint
,
self
).
__init__
()
self
.
monitor
=
monitor
self
.
verbose
=
verbose
self
.
filepath
=
filepath
self
.
save_best_only
=
save_best_only
self
.
save_weights_only
=
save_weights_only
self
.
period
=
period
self
.
epochs_since_last_save
=
0
if
mode
not
in
[
'auto'
,
'min'
,
'max'
]:
warnings
.
warn
(
'ModelCheckpoint mode %s is unknown, '
'fallback to auto mode.'
%
(
mode
),
RuntimeWarning
)
mode
=
'auto'
if
mode
==
'min'
:
self
.
monitor_op
=
np
.
less
self
.
best
=
np
.
Inf
elif
mode
==
'max'
:
self
.
monitor_op
=
np
.
greater
self
.
best
=
-
np
.
Inf
else
:
if
'acc'
in
self
.
monitor
or
self
.
monitor
.
startswith
(
'fmeasure'
):
self
.
monitor_op
=
np
.
greater
self
.
best
=
-
np
.
Inf
else
:
self
.
monitor_op
=
np
.
less
self
.
best
=
np
.
Inf
def
on_epoch_end
(
self
,
epoch
,
logs
=
None
):
logs
=
logs
or
{}
self
.
epochs_since_last_save
+=
1
if
self
.
epochs_since_last_save
>=
self
.
period
:
self
.
epochs_since_last_save
=
0
filepath
=
self
.
filepath
.
format
(
epoch
=
epoch
+
1
,
**
logs
)
if
self
.
save_best_only
:
current
=
logs
.
get
(
self
.
monitor
)
if
current
is
None
:
warnings
.
warn
(
'Can save best model only with %s available, '
'skipping.'
%
(
self
.
monitor
),
RuntimeWarning
)
else
:
if
self
.
monitor_op
(
current
,
self
.
best
):
if
self
.
verbose
>
0
:
print
(
'
\n
Epoch %05d: %s improved from %0.5f to %0.5f,'
' saving model to %s'
%
(
epoch
+
1
,
self
.
monitor
,
self
.
best
,
current
,
filepath
))
self
.
best
=
current
if
self
.
save_weights_only
:
self
.
model
.
save_weights
(
filepath
,
overwrite
=
True
)
else
:
self
.
model
.
save
(
filepath
,
overwrite
=
True
)
else
:
if
self
.
verbose
>
0
:
print
(
'
\n
Epoch %05d: %s did not improve'
%
(
epoch
+
1
,
self
.
monitor
))
else
:
if
self
.
verbose
>
0
:
print
(
'
\n
Epoch %05d: saving model to %s'
%
(
epoch
+
1
,
filepath
))
if
self
.
save_weights_only
:
self
.
model
.
save_weights
(
filepath
,
overwrite
=
True
)
else
:
self
.
model
.
save
(
filepath
,
overwrite
=
True
)
TensorFlow2x/ComputeVision/Detection/SSD/utils/dataloader.py
0 → 100644
View file @
c320b6ef
import
math
from
random
import
shuffle
import
cv2
import
numpy
as
np
from
PIL
import
Image
from
tensorflow
import
keras
from
tensorflow.keras.applications.imagenet_utils
import
preprocess_input
from
utils.utils
import
cvtColor
class
SSDDatasets
(
keras
.
utils
.
Sequence
):
def
__init__
(
self
,
annotation_lines
,
input_shape
,
anchors
,
batch_size
,
num_classes
,
train
,
overlap_threshold
=
0.5
):
self
.
annotation_lines
=
annotation_lines
self
.
length
=
len
(
self
.
annotation_lines
)
self
.
input_shape
=
input_shape
self
.
anchors
=
anchors
self
.
num_anchors
=
len
(
anchors
)
self
.
batch_size
=
batch_size
self
.
num_classes
=
num_classes
self
.
train
=
train
self
.
overlap_threshold
=
overlap_threshold
def
__len__
(
self
):
return
math
.
ceil
(
len
(
self
.
annotation_lines
)
/
float
(
self
.
batch_size
))
def
__getitem__
(
self
,
index
):
image_data
=
[]
box_data
=
[]
for
i
in
range
(
index
*
self
.
batch_size
,
(
index
+
1
)
*
self
.
batch_size
):
i
=
i
%
self
.
length
#---------------------------------------------------#
# 训练时进行数据的随机增强
# 验证时不进行数据的随机增强
#---------------------------------------------------#
image
,
box
=
self
.
get_random_data
(
self
.
annotation_lines
[
i
],
self
.
input_shape
,
random
=
self
.
train
)
if
len
(
box
)
!=
0
:
boxes
=
np
.
array
(
box
[:,:
4
]
,
dtype
=
np
.
float32
)
boxes
[:,
[
0
,
2
]]
=
boxes
[:,[
0
,
2
]]
/
self
.
input_shape
[
1
]
boxes
[:,
[
1
,
3
]]
=
boxes
[:,[
1
,
3
]]
/
self
.
input_shape
[
0
]
one_hot_label
=
np
.
eye
(
self
.
num_classes
-
1
)[
np
.
array
(
box
[:,
4
],
np
.
int32
)]
box
=
np
.
concatenate
([
boxes
,
one_hot_label
],
axis
=-
1
)
box
=
self
.
assign_boxes
(
box
)
image_data
.
append
(
image
)
box_data
.
append
(
box
)
print
(
preprocess_input
(
np
.
array
(
image_data
)),
np
.
array
(
box_data
))
break
return
preprocess_input
(
np
.
array
(
image_data
)),
np
.
array
(
box_data
)
def
generate
(
self
):
i
=
0
while
True
:
image_data
=
[]
box_data
=
[]
for
b
in
range
(
self
.
batch_size
):
if
i
==
0
:
np
.
random
.
shuffle
(
self
.
annotation_lines
)
#---------------------------------------------------#
# 训练时进行数据的随机增强
# 验证时不进行数据的随机增强
#---------------------------------------------------#
image
,
box
=
self
.
get_random_data
(
self
.
annotation_lines
[
i
],
self
.
input_shape
,
random
=
self
.
train
)
i
=
(
i
+
1
)
%
self
.
length
if
len
(
box
)
!=
0
:
boxes
=
np
.
array
(
box
[:,:
4
]
,
dtype
=
np
.
float32
)
boxes
[:,
[
0
,
2
]]
=
boxes
[:,[
0
,
2
]]
/
self
.
input_shape
[
1
]
boxes
[:,
[
1
,
3
]]
=
boxes
[:,[
1
,
3
]]
/
self
.
input_shape
[
0
]
one_hot_label
=
np
.
eye
(
self
.
num_classes
-
1
)[
np
.
array
(
box
[:,
4
],
np
.
int32
)]
box
=
np
.
concatenate
([
boxes
,
one_hot_label
],
axis
=-
1
)
box
=
self
.
assign_boxes
(
box
)
image_data
.
append
(
image
)
box_data
.
append
(
box
)
yield
preprocess_input
(
np
.
array
(
image_data
)),
np
.
array
(
box_data
)
def
rand
(
self
,
a
=
0
,
b
=
1
):
return
np
.
random
.
rand
()
*
(
b
-
a
)
+
a
def
get_random_data
(
self
,
annotation_line
,
input_shape
,
jitter
=
.
3
,
hue
=
.
1
,
sat
=
1.5
,
val
=
1.5
,
random
=
True
):
line
=
annotation_line
.
split
()
#------------------------------#
# 读取图像并转换成RGB图像
#------------------------------#
image
=
Image
.
open
(
line
[
0
])
image
=
cvtColor
(
image
)
#------------------------------#
# 获得图像的高宽与目标高宽
#------------------------------#
iw
,
ih
=
image
.
size
h
,
w
=
input_shape
#------------------------------#
# 获得预测框
#------------------------------#
box
=
np
.
array
([
np
.
array
(
list
(
map
(
int
,
box
.
split
(
','
))))
for
box
in
line
[
1
:]])
if
not
random
:
scale
=
min
(
w
/
iw
,
h
/
ih
)
nw
=
int
(
iw
*
scale
)
nh
=
int
(
ih
*
scale
)
dx
=
(
w
-
nw
)
//
2
dy
=
(
h
-
nh
)
//
2
#---------------------------------#
# 将图像多余的部分加上灰条
#---------------------------------#
image
=
image
.
resize
((
nw
,
nh
),
Image
.
BICUBIC
)
new_image
=
Image
.
new
(
'RGB'
,
(
w
,
h
),
(
128
,
128
,
128
))
new_image
.
paste
(
image
,
(
dx
,
dy
))
image_data
=
np
.
array
(
new_image
,
np
.
float32
)
#---------------------------------#
# 对真实框进行调整
#---------------------------------#
if
len
(
box
)
>
0
:
np
.
random
.
shuffle
(
box
)
box
[:,
[
0
,
2
]]
=
box
[:,
[
0
,
2
]]
*
nw
/
iw
+
dx
box
[:,
[
1
,
3
]]
=
box
[:,
[
1
,
3
]]
*
nh
/
ih
+
dy
box
[:,
0
:
2
][
box
[:,
0
:
2
]
<
0
]
=
0
box
[:,
2
][
box
[:,
2
]
>
w
]
=
w
box
[:,
3
][
box
[:,
3
]
>
h
]
=
h
box_w
=
box
[:,
2
]
-
box
[:,
0
]
box_h
=
box
[:,
3
]
-
box
[:,
1
]
box
=
box
[
np
.
logical_and
(
box_w
>
1
,
box_h
>
1
)]
# discard invalid box
return
image_data
,
box
#------------------------------------------#
# 对图像进行缩放并且进行长和宽的扭曲
#------------------------------------------#
new_ar
=
w
/
h
*
self
.
rand
(
1
-
jitter
,
1
+
jitter
)
/
self
.
rand
(
1
-
jitter
,
1
+
jitter
)
scale
=
self
.
rand
(.
25
,
2
)
if
new_ar
<
1
:
nh
=
int
(
scale
*
h
)
nw
=
int
(
nh
*
new_ar
)
else
:
nw
=
int
(
scale
*
w
)
nh
=
int
(
nw
/
new_ar
)
image
=
image
.
resize
((
nw
,
nh
),
Image
.
BICUBIC
)
#------------------------------------------#
# 将图像多余的部分加上灰条
#------------------------------------------#
dx
=
int
(
self
.
rand
(
0
,
w
-
nw
))
dy
=
int
(
self
.
rand
(
0
,
h
-
nh
))
new_image
=
Image
.
new
(
'RGB'
,
(
w
,
h
),
(
128
,
128
,
128
))
new_image
.
paste
(
image
,
(
dx
,
dy
))
image
=
new_image
#------------------------------------------#
# 翻转图像
#------------------------------------------#
flip
=
self
.
rand
()
<
.
5
if
flip
:
image
=
image
.
transpose
(
Image
.
FLIP_LEFT_RIGHT
)
#------------------------------------------#
# 色域扭曲
#------------------------------------------#
hue
=
self
.
rand
(
-
hue
,
hue
)
sat
=
self
.
rand
(
1
,
sat
)
if
self
.
rand
()
<
.
5
else
1
/
self
.
rand
(
1
,
sat
)
val
=
self
.
rand
(
1
,
val
)
if
self
.
rand
()
<
.
5
else
1
/
self
.
rand
(
1
,
val
)
x
=
cv2
.
cvtColor
(
np
.
array
(
image
,
np
.
float32
)
/
255
,
cv2
.
COLOR_RGB2HSV
)
x
[...,
0
]
+=
hue
*
360
x
[...,
0
][
x
[...,
0
]
>
1
]
-=
1
x
[...,
0
][
x
[...,
0
]
<
0
]
+=
1
x
[...,
1
]
*=
sat
x
[...,
2
]
*=
val
x
[
x
[:,:,
0
]
>
360
,
0
]
=
360
x
[:,
:,
1
:][
x
[:,
:,
1
:]
>
1
]
=
1
x
[
x
<
0
]
=
0
image_data
=
cv2
.
cvtColor
(
x
,
cv2
.
COLOR_HSV2RGB
)
*
255
# numpy array, 0 to 1
#---------------------------------#
# 对真实框进行调整
#---------------------------------#
if
len
(
box
)
>
0
:
np
.
random
.
shuffle
(
box
)
box
[:,
[
0
,
2
]]
=
box
[:,
[
0
,
2
]]
*
nw
/
iw
+
dx
box
[:,
[
1
,
3
]]
=
box
[:,
[
1
,
3
]]
*
nh
/
ih
+
dy
if
flip
:
box
[:,
[
0
,
2
]]
=
w
-
box
[:,
[
2
,
0
]]
box
[:,
0
:
2
][
box
[:,
0
:
2
]
<
0
]
=
0
box
[:,
2
][
box
[:,
2
]
>
w
]
=
w
box
[:,
3
][
box
[:,
3
]
>
h
]
=
h
box_w
=
box
[:,
2
]
-
box
[:,
0
]
box_h
=
box
[:,
3
]
-
box
[:,
1
]
box
=
box
[
np
.
logical_and
(
box_w
>
1
,
box_h
>
1
)]
return
image_data
,
box
def
on_epoch_begin
(
self
):
shuffle
(
self
.
annotation_lines
)
def
iou
(
self
,
box
):
#---------------------------------------------#
# 计算出每个真实框与所有的先验框的iou
# 判断真实框与先验框的重合情况
#---------------------------------------------#
inter_upleft
=
np
.
maximum
(
self
.
anchors
[:,
:
2
],
box
[:
2
])
inter_botright
=
np
.
minimum
(
self
.
anchors
[:,
2
:
4
],
box
[
2
:])
inter_wh
=
inter_botright
-
inter_upleft
inter_wh
=
np
.
maximum
(
inter_wh
,
0
)
inter
=
inter_wh
[:,
0
]
*
inter_wh
[:,
1
]
#---------------------------------------------#
# 真实框的面积
#---------------------------------------------#
area_true
=
(
box
[
2
]
-
box
[
0
])
*
(
box
[
3
]
-
box
[
1
])
#---------------------------------------------#
# 先验框的面积
#---------------------------------------------#
area_gt
=
(
self
.
anchors
[:,
2
]
-
self
.
anchors
[:,
0
])
*
(
self
.
anchors
[:,
3
]
-
self
.
anchors
[:,
1
])
#---------------------------------------------#
# 计算iou
#---------------------------------------------#
union
=
area_true
+
area_gt
-
inter
iou
=
inter
/
union
return
iou
def
encode_box
(
self
,
box
,
return_iou
=
True
,
variances
=
[
0.1
,
0.1
,
0.2
,
0.2
]):
#---------------------------------------------#
# 计算当前真实框和先验框的重合情况
# iou [self.num_anchors]
# encoded_box [self.num_anchors, 5]
#---------------------------------------------#
iou
=
self
.
iou
(
box
)
encoded_box
=
np
.
zeros
((
self
.
num_anchors
,
4
+
return_iou
))
#---------------------------------------------#
# 找到每一个真实框,重合程度较高的先验框
# 真实框可以由这个先验框来负责预测
#---------------------------------------------#
assign_mask
=
iou
>
self
.
overlap_threshold
#---------------------------------------------#
# 如果没有一个先验框重合度大于self.overlap_threshold
# 则选择重合度最大的为正样本
#---------------------------------------------#
if
not
assign_mask
.
any
():
assign_mask
[
iou
.
argmax
()]
=
True
#---------------------------------------------#
# 利用iou进行赋值
#---------------------------------------------#
if
return_iou
:
encoded_box
[:,
-
1
][
assign_mask
]
=
iou
[
assign_mask
]
#---------------------------------------------#
# 找到对应的先验框
#---------------------------------------------#
assigned_anchors
=
self
.
anchors
[
assign_mask
]
#---------------------------------------------#
# 逆向编码,将真实框转化为ssd预测结果的格式
# 先计算真实框的中心与长宽
#---------------------------------------------#
box_center
=
0.5
*
(
box
[:
2
]
+
box
[
2
:])
box_wh
=
box
[
2
:]
-
box
[:
2
]
#---------------------------------------------#
# 再计算重合度较高的先验框的中心与长宽
#---------------------------------------------#
assigned_anchors_center
=
(
assigned_anchors
[:,
0
:
2
]
+
assigned_anchors
[:,
2
:
4
])
*
0.5
assigned_anchors_wh
=
(
assigned_anchors
[:,
2
:
4
]
-
assigned_anchors
[:,
0
:
2
])
#------------------------------------------------#
# 逆向求取ssd应该有的预测结果
# 先求取中心的预测结果,再求取宽高的预测结果
# 存在改变数量级的参数,默认为[0.1,0.1,0.2,0.2]
#------------------------------------------------#
encoded_box
[:,
:
2
][
assign_mask
]
=
box_center
-
assigned_anchors_center
encoded_box
[:,
:
2
][
assign_mask
]
/=
assigned_anchors_wh
encoded_box
[:,
:
2
][
assign_mask
]
/=
np
.
array
(
variances
)[:
2
]
encoded_box
[:,
2
:
4
][
assign_mask
]
=
np
.
log
(
box_wh
/
assigned_anchors_wh
)
encoded_box
[:,
2
:
4
][
assign_mask
]
/=
np
.
array
(
variances
)[
2
:
4
]
return
encoded_box
.
ravel
()
def
assign_boxes
(
self
,
boxes
):
#---------------------------------------------------#
# assignment分为3个部分
# :4 的内容为网络应该有的回归预测结果
# 4:-1 的内容为先验框所对应的种类,默认为背景
# -1 的内容为当前先验框是否包含目标
#---------------------------------------------------#
assignment
=
np
.
zeros
((
self
.
num_anchors
,
4
+
self
.
num_classes
+
1
))
assignment
[:,
4
]
=
1.0
if
len
(
boxes
)
==
0
:
return
assignment
# 对每一个真实框都进行iou计算
encoded_boxes
=
np
.
apply_along_axis
(
self
.
encode_box
,
1
,
boxes
[:,
:
4
])
#---------------------------------------------------#
# 在reshape后,获得的encoded_boxes的shape为:
# [num_true_box, num_anchors, 4 + 1]
# 4是编码后的结果,1为iou
#---------------------------------------------------#
encoded_boxes
=
encoded_boxes
.
reshape
(
-
1
,
self
.
num_anchors
,
5
)
#---------------------------------------------------#
# [num_anchors]求取每一个先验框重合度最大的真实框
#---------------------------------------------------#
best_iou
=
encoded_boxes
[:,
:,
-
1
].
max
(
axis
=
0
)
best_iou_idx
=
encoded_boxes
[:,
:,
-
1
].
argmax
(
axis
=
0
)
best_iou_mask
=
best_iou
>
0
best_iou_idx
=
best_iou_idx
[
best_iou_mask
]
#---------------------------------------------------#
# 计算一共有多少先验框满足需求
#---------------------------------------------------#
assign_num
=
len
(
best_iou_idx
)
# 将编码后的真实框取出
encoded_boxes
=
encoded_boxes
[:,
best_iou_mask
,
:]
#---------------------------------------------------#
# 编码后的真实框的赋值
#---------------------------------------------------#
assignment
[:,
:
4
][
best_iou_mask
]
=
encoded_boxes
[
best_iou_idx
,
np
.
arange
(
assign_num
),:
4
]
#----------------------------------------------------------#
# 4代表为背景的概率,设定为0,因为这些先验框有对应的物体
#----------------------------------------------------------#
assignment
[:,
4
][
best_iou_mask
]
=
0
assignment
[:,
5
:
-
1
][
best_iou_mask
]
=
boxes
[
best_iou_idx
,
4
:]
#----------------------------------------------------------#
# -1表示先验框是否有对应的物体
#----------------------------------------------------------#
assignment
[:,
-
1
][
best_iou_mask
]
=
1
# 通过assign_boxes我们就获得了,输入进来的这张图片,应该有的预测结果是什么样子的
return
assignment
TensorFlow2x/ComputeVision/Detection/SSD/utils/utils.py
0 → 100644
View file @
c320b6ef
import
numpy
as
np
from
PIL
import
Image
#---------------------------------------------------------#
# 将图像转换成RGB图像,防止灰度图在预测时报错。
# 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
#---------------------------------------------------------#
def
cvtColor
(
image
):
if
len
(
np
.
shape
(
image
))
==
3
and
np
.
shape
(
image
)[
2
]
==
3
:
return
image
else
:
image
=
image
.
convert
(
'RGB'
)
return
image
#---------------------------------------------------#
# 对输入图像进行resize
#---------------------------------------------------#
def
resize_image
(
image
,
size
,
letterbox_image
):
iw
,
ih
=
image
.
size
w
,
h
=
size
if
letterbox_image
:
scale
=
min
(
w
/
iw
,
h
/
ih
)
nw
=
int
(
iw
*
scale
)
nh
=
int
(
ih
*
scale
)
image
=
image
.
resize
((
nw
,
nh
),
Image
.
BICUBIC
)
new_image
=
Image
.
new
(
'RGB'
,
size
,
(
128
,
128
,
128
))
new_image
.
paste
(
image
,
((
w
-
nw
)
//
2
,
(
h
-
nh
)
//
2
))
else
:
new_image
=
image
.
resize
((
w
,
h
),
Image
.
BICUBIC
)
return
new_image
#---------------------------------------------------#
# 获得类
#---------------------------------------------------#
def
get_classes
(
classes_path
):
with
open
(
classes_path
,
encoding
=
'utf-8'
)
as
f
:
class_names
=
f
.
readlines
()
class_names
=
[
c
.
strip
()
for
c
in
class_names
]
return
class_names
,
len
(
class_names
)
TensorFlow2x/ComputeVision/Detection/SSD/utils/utils_bbox.py
0 → 100644
View file @
c320b6ef
import
numpy
as
np
import
tensorflow
as
tf
import
tensorflow.keras.backend
as
K
class
BBoxUtility
(
object
):
def
__init__
(
self
,
num_classes
,
nms_thresh
=
0.45
,
top_k
=
300
):
self
.
num_classes
=
num_classes
self
.
_nms_thresh
=
nms_thresh
self
.
_top_k
=
top_k
def
ssd_correct_boxes
(
self
,
box_xy
,
box_wh
,
input_shape
,
image_shape
,
letterbox_image
):
#-----------------------------------------------------------------#
# 把y轴放前面是因为方便预测框和图像的宽高进行相乘
#-----------------------------------------------------------------#
box_yx
=
box_xy
[...,
::
-
1
]
box_hw
=
box_wh
[...,
::
-
1
]
input_shape
=
np
.
array
(
input_shape
)
image_shape
=
np
.
array
(
image_shape
)
if
letterbox_image
:
#-----------------------------------------------------------------#
# 这里求出来的offset是图像有效区域相对于图像左上角的偏移情况
# new_shape指的是宽高缩放情况
#-----------------------------------------------------------------#
new_shape
=
np
.
round
(
image_shape
*
np
.
min
(
input_shape
/
image_shape
))
offset
=
(
input_shape
-
new_shape
)
/
2.
/
input_shape
scale
=
input_shape
/
new_shape
box_yx
=
(
box_yx
-
offset
)
*
scale
box_hw
*=
scale
box_mins
=
box_yx
-
(
box_hw
/
2.
)
box_maxes
=
box_yx
+
(
box_hw
/
2.
)
boxes
=
np
.
concatenate
([
box_mins
[...,
0
:
1
],
box_mins
[...,
1
:
2
],
box_maxes
[...,
0
:
1
],
box_maxes
[...,
1
:
2
]],
axis
=-
1
)
boxes
*=
np
.
concatenate
([
image_shape
,
image_shape
],
axis
=-
1
)
return
boxes
def
decode_boxes
(
self
,
mbox_loc
,
anchors
,
variances
):
# 获得先验框的宽与高
anchor_width
=
anchors
[:,
2
]
-
anchors
[:,
0
]
anchor_height
=
anchors
[:,
3
]
-
anchors
[:,
1
]
# 获得先验框的中心点
anchor_center_x
=
0.5
*
(
anchors
[:,
2
]
+
anchors
[:,
0
])
anchor_center_y
=
0.5
*
(
anchors
[:,
3
]
+
anchors
[:,
1
])
# 真实框距离先验框中心的xy轴偏移情况
decode_bbox_center_x
=
mbox_loc
[:,
0
]
*
anchor_width
*
variances
[
0
]
decode_bbox_center_x
+=
anchor_center_x
decode_bbox_center_y
=
mbox_loc
[:,
1
]
*
anchor_height
*
variances
[
1
]
decode_bbox_center_y
+=
anchor_center_y
# 真实框的宽与高的求取
decode_bbox_width
=
np
.
exp
(
mbox_loc
[:,
2
]
*
variances
[
2
])
decode_bbox_width
*=
anchor_width
decode_bbox_height
=
np
.
exp
(
mbox_loc
[:,
3
]
*
variances
[
3
])
decode_bbox_height
*=
anchor_height
# 获取真实框的左上角与右下角
decode_bbox_xmin
=
decode_bbox_center_x
-
0.5
*
decode_bbox_width
decode_bbox_ymin
=
decode_bbox_center_y
-
0.5
*
decode_bbox_height
decode_bbox_xmax
=
decode_bbox_center_x
+
0.5
*
decode_bbox_width
decode_bbox_ymax
=
decode_bbox_center_y
+
0.5
*
decode_bbox_height
# 真实框的左上角与右下角进行堆叠
decode_bbox
=
np
.
concatenate
((
decode_bbox_xmin
[:,
None
],
decode_bbox_ymin
[:,
None
],
decode_bbox_xmax
[:,
None
],
decode_bbox_ymax
[:,
None
]),
axis
=-
1
)
# 防止超出0与1
decode_bbox
=
np
.
minimum
(
np
.
maximum
(
decode_bbox
,
0.0
),
1.0
)
return
decode_bbox
def
decode_box
(
self
,
predictions
,
anchors
,
image_shape
,
input_shape
,
letterbox_image
,
variances
=
[
0.1
,
0.1
,
0.2
,
0.2
],
confidence
=
0.5
):
#---------------------------------------------------#
# :4是回归预测结果
#---------------------------------------------------#
mbox_loc
=
predictions
[:,
:,
:
4
]
#---------------------------------------------------#
# 获得种类的置信度
#---------------------------------------------------#
mbox_conf
=
predictions
[:,
:,
4
:]
results
=
[]
#----------------------------------------------------------------------------------------------------------------#
# 对每一张图片进行处理,由于在predict.py的时候,我们只输入一张图片,所以for i in range(len(mbox_loc))只进行一次
#----------------------------------------------------------------------------------------------------------------#
for
i
in
range
(
len
(
mbox_loc
)):
results
.
append
([])
#--------------------------------#
# 利用回归结果对先验框进行解码
#--------------------------------#
decode_bbox
=
self
.
decode_boxes
(
mbox_loc
[
i
],
anchors
,
variances
)
for
c
in
range
(
1
,
self
.
num_classes
):
#--------------------------------#
# 取出属于该类的所有框的置信度
# 判断是否大于门限
#--------------------------------#
c_confs
=
mbox_conf
[
i
,
:,
c
]
c_confs_m
=
c_confs
>
confidence
if
len
(
c_confs
[
c_confs_m
])
>
0
:
#-----------------------------------------#
# 取出得分高于confidence的框
#-----------------------------------------#
boxes_to_process
=
decode_bbox
[
c_confs_m
]
confs_to_process
=
c_confs
[
c_confs_m
]
#-----------------------------------------#
# 进行iou的非极大抑制
#-----------------------------------------#
idx
=
tf
.
image
.
non_max_suppression
(
tf
.
cast
(
boxes_to_process
,
tf
.
float32
),
tf
.
cast
(
confs_to_process
,
tf
.
float32
),
self
.
_top_k
,
iou_threshold
=
self
.
_nms_thresh
).
numpy
()
#-----------------------------------------#
# 取出在非极大抑制中效果较好的内容
#-----------------------------------------#
good_boxes
=
boxes_to_process
[
idx
]
confs
=
confs_to_process
[
idx
][:,
None
]
labels
=
(
c
-
1
)
*
np
.
ones
((
len
(
idx
),
1
))
#-----------------------------------------#
# 将label、置信度、框的位置进行堆叠。
#-----------------------------------------#
c_pred
=
np
.
concatenate
((
good_boxes
,
labels
,
confs
),
axis
=
1
)
# 添加进result里
results
[
-
1
].
extend
(
c_pred
)
if
len
(
results
[
-
1
])
>
0
:
results
[
-
1
]
=
np
.
array
(
results
[
-
1
])
box_xy
,
box_wh
=
(
results
[
-
1
][:,
0
:
2
]
+
results
[
-
1
][:,
2
:
4
])
/
2
,
results
[
-
1
][:,
2
:
4
]
-
results
[
-
1
][:,
0
:
2
]
results
[
-
1
][:,
:
4
]
=
self
.
ssd_correct_boxes
(
box_xy
,
box_wh
,
input_shape
,
image_shape
,
letterbox_image
)
return
results
Prev
1
…
3
4
5
6
7
8
9
10
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment