Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
dcuai
dlexamples
Commits
c320b6ef
Commit
c320b6ef
authored
Apr 15, 2022
by
zhenyi
Browse files
tf2 detection
parent
0fc002df
Changes
195
Hide whitespace changes
Inline
Side-by-side
Showing
15 changed files
with
1037 additions
and
0 deletions
+1037
-0
TensorFlow2x/ComputeVision/Detection/YOLOv3/tools/tools/visualize_dataset.py
...eVision/Detection/YOLOv3/tools/tools/visualize_dataset.py
+56
-0
TensorFlow2x/ComputeVision/Detection/YOLOv3/tools/tools/voc2012.py
...w2x/ComputeVision/Detection/YOLOv3/tools/tools/voc2012.py
+110
-0
TensorFlow2x/ComputeVision/Detection/YOLOv3/train.py
TensorFlow2x/ComputeVision/Detection/YOLOv3/train.py
+217
-0
TensorFlow2x/ComputeVision/Detection/YOLOv3/yolov3_tf2.egg-info/PKG-INFO
...mputeVision/Detection/YOLOv3/yolov3_tf2.egg-info/PKG-INFO
+13
-0
TensorFlow2x/ComputeVision/Detection/YOLOv3/yolov3_tf2.egg-info/SOURCES.txt
...teVision/Detection/YOLOv3/yolov3_tf2.egg-info/SOURCES.txt
+11
-0
TensorFlow2x/ComputeVision/Detection/YOLOv3/yolov3_tf2.egg-info/dependency_links.txt
...Detection/YOLOv3/yolov3_tf2.egg-info/dependency_links.txt
+1
-0
TensorFlow2x/ComputeVision/Detection/YOLOv3/yolov3_tf2.egg-info/top_level.txt
...Vision/Detection/YOLOv3/yolov3_tf2.egg-info/top_level.txt
+1
-0
TensorFlow2x/ComputeVision/Detection/YOLOv3/yolov3_tf2/yolov3_tf2/__init__.py
...Vision/Detection/YOLOv3/yolov3_tf2/yolov3_tf2/__init__.py
+0
-0
TensorFlow2x/ComputeVision/Detection/YOLOv3/yolov3_tf2/yolov3_tf2/__pycache__/__init__.cpython-36.pyc
...yolov3_tf2/yolov3_tf2/__pycache__/__init__.cpython-36.pyc
+0
-0
TensorFlow2x/ComputeVision/Detection/YOLOv3/yolov3_tf2/yolov3_tf2/__pycache__/dataset.cpython-36.pyc
.../yolov3_tf2/yolov3_tf2/__pycache__/dataset.cpython-36.pyc
+0
-0
TensorFlow2x/ComputeVision/Detection/YOLOv3/yolov3_tf2/yolov3_tf2/__pycache__/models.cpython-36.pyc
...3/yolov3_tf2/yolov3_tf2/__pycache__/models.cpython-36.pyc
+0
-0
TensorFlow2x/ComputeVision/Detection/YOLOv3/yolov3_tf2/yolov3_tf2/__pycache__/utils.cpython-36.pyc
...v3/yolov3_tf2/yolov3_tf2/__pycache__/utils.cpython-36.pyc
+0
-0
TensorFlow2x/ComputeVision/Detection/YOLOv3/yolov3_tf2/yolov3_tf2/dataset.py
...eVision/Detection/YOLOv3/yolov3_tf2/yolov3_tf2/dataset.py
+144
-0
TensorFlow2x/ComputeVision/Detection/YOLOv3/yolov3_tf2/yolov3_tf2/models.py
...teVision/Detection/YOLOv3/yolov3_tf2/yolov3_tf2/models.py
+349
-0
TensorFlow2x/ComputeVision/Detection/YOLOv3/yolov3_tf2/yolov3_tf2/utils.py
...uteVision/Detection/YOLOv3/yolov3_tf2/yolov3_tf2/utils.py
+135
-0
No files found.
TensorFlow2x/ComputeVision/Detection/YOLOv3/tools/tools/visualize_dataset.py
0 → 100644
View file @
c320b6ef
import
time
from
absl
import
app
,
flags
,
logging
from
absl.flags
import
FLAGS
import
cv2
import
numpy
as
np
import
tensorflow
as
tf
from
yolov3_tf2.models
import
(
YoloV3
,
YoloV3Tiny
)
from
yolov3_tf2.dataset
import
load_tfrecord_dataset
,
transform_images
from
yolov3_tf2.utils
import
draw_outputs
flags
.
DEFINE_string
(
'classes'
,
'./data/coco.names'
,
'path to classes file'
)
flags
.
DEFINE_integer
(
'size'
,
416
,
'resize images to'
)
flags
.
DEFINE_string
(
'dataset'
,
'./data/voc2012_train.tfrecord'
,
'path to dataset'
)
flags
.
DEFINE_string
(
'output'
,
'./output.jpg'
,
'path to output image'
)
def
main
(
_argv
):
class_names
=
[
c
.
strip
()
for
c
in
open
(
FLAGS
.
classes
).
readlines
()]
logging
.
info
(
'classes loaded'
)
dataset
=
load_tfrecord_dataset
(
FLAGS
.
dataset
,
FLAGS
.
classes
,
FLAGS
.
size
)
dataset
=
dataset
.
shuffle
(
512
)
for
image
,
labels
in
dataset
.
take
(
1
):
boxes
=
[]
scores
=
[]
classes
=
[]
for
x1
,
y1
,
x2
,
y2
,
label
in
labels
:
if
x1
==
0
and
x2
==
0
:
continue
boxes
.
append
((
x1
,
y1
,
x2
,
y2
))
scores
.
append
(
1
)
classes
.
append
(
label
)
nums
=
[
len
(
boxes
)]
boxes
=
[
boxes
]
scores
=
[
scores
]
classes
=
[
classes
]
logging
.
info
(
'labels:'
)
for
i
in
range
(
nums
[
0
]):
logging
.
info
(
'
\t
{}, {}, {}'
.
format
(
class_names
[
int
(
classes
[
0
][
i
])],
np
.
array
(
scores
[
0
][
i
]),
np
.
array
(
boxes
[
0
][
i
])))
img
=
cv2
.
cvtColor
(
image
.
numpy
(),
cv2
.
COLOR_RGB2BGR
)
img
=
draw_outputs
(
img
,
(
boxes
,
scores
,
classes
,
nums
),
class_names
)
cv2
.
imwrite
(
FLAGS
.
output
,
img
)
logging
.
info
(
'output saved to: {}'
.
format
(
FLAGS
.
output
))
if
__name__
==
'__main__'
:
app
.
run
(
main
)
TensorFlow2x/ComputeVision/Detection/YOLOv3/tools/tools/voc2012.py
0 → 100644
View file @
c320b6ef
import
time
import
os
import
hashlib
from
absl
import
app
,
flags
,
logging
from
absl.flags
import
FLAGS
import
tensorflow
as
tf
import
lxml.etree
import
tqdm
flags
.
DEFINE_string
(
'data_dir'
,
'./data/voc2012_raw/VOCdevkit/VOC2012/'
,
'path to raw PASCAL VOC dataset'
)
flags
.
DEFINE_enum
(
'split'
,
'train'
,
[
'train'
,
'val'
],
'specify train or val spit'
)
flags
.
DEFINE_string
(
'output_file'
,
'./data/voc2012_train.tfrecord'
,
'outpot dataset'
)
flags
.
DEFINE_string
(
'classes'
,
'./data/voc2012.names'
,
'classes file'
)
def
build_example
(
annotation
,
class_map
):
img_path
=
os
.
path
.
join
(
FLAGS
.
data_dir
,
'JPEGImages'
,
annotation
[
'filename'
])
img_raw
=
open
(
img_path
,
'rb'
).
read
()
key
=
hashlib
.
sha256
(
img_raw
).
hexdigest
()
width
=
int
(
annotation
[
'size'
][
'width'
])
height
=
int
(
annotation
[
'size'
][
'height'
])
xmin
=
[]
ymin
=
[]
xmax
=
[]
ymax
=
[]
classes
=
[]
classes_text
=
[]
truncated
=
[]
views
=
[]
difficult_obj
=
[]
if
'object'
in
annotation
:
for
obj
in
annotation
[
'object'
]:
difficult
=
bool
(
int
(
obj
[
'difficult'
]))
difficult_obj
.
append
(
int
(
difficult
))
xmin
.
append
(
float
(
obj
[
'bndbox'
][
'xmin'
])
/
width
)
ymin
.
append
(
float
(
obj
[
'bndbox'
][
'ymin'
])
/
height
)
xmax
.
append
(
float
(
obj
[
'bndbox'
][
'xmax'
])
/
width
)
ymax
.
append
(
float
(
obj
[
'bndbox'
][
'ymax'
])
/
height
)
classes_text
.
append
(
obj
[
'name'
].
encode
(
'utf8'
))
classes
.
append
(
class_map
[
obj
[
'name'
]])
truncated
.
append
(
int
(
obj
[
'truncated'
]))
views
.
append
(
obj
[
'pose'
].
encode
(
'utf8'
))
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/height'
:
tf
.
train
.
Feature
(
int64_list
=
tf
.
train
.
Int64List
(
value
=
[
height
])),
'image/width'
:
tf
.
train
.
Feature
(
int64_list
=
tf
.
train
.
Int64List
(
value
=
[
width
])),
'image/filename'
:
tf
.
train
.
Feature
(
bytes_list
=
tf
.
train
.
BytesList
(
value
=
[
annotation
[
'filename'
].
encode
(
'utf8'
)])),
'image/source_id'
:
tf
.
train
.
Feature
(
bytes_list
=
tf
.
train
.
BytesList
(
value
=
[
annotation
[
'filename'
].
encode
(
'utf8'
)])),
'image/key/sha256'
:
tf
.
train
.
Feature
(
bytes_list
=
tf
.
train
.
BytesList
(
value
=
[
key
.
encode
(
'utf8'
)])),
'image/encoded'
:
tf
.
train
.
Feature
(
bytes_list
=
tf
.
train
.
BytesList
(
value
=
[
img_raw
])),
'image/format'
:
tf
.
train
.
Feature
(
bytes_list
=
tf
.
train
.
BytesList
(
value
=
[
'jpeg'
.
encode
(
'utf8'
)])),
'image/object/bbox/xmin'
:
tf
.
train
.
Feature
(
float_list
=
tf
.
train
.
FloatList
(
value
=
xmin
)),
'image/object/bbox/xmax'
:
tf
.
train
.
Feature
(
float_list
=
tf
.
train
.
FloatList
(
value
=
xmax
)),
'image/object/bbox/ymin'
:
tf
.
train
.
Feature
(
float_list
=
tf
.
train
.
FloatList
(
value
=
ymin
)),
'image/object/bbox/ymax'
:
tf
.
train
.
Feature
(
float_list
=
tf
.
train
.
FloatList
(
value
=
ymax
)),
'image/object/class/text'
:
tf
.
train
.
Feature
(
bytes_list
=
tf
.
train
.
BytesList
(
value
=
classes_text
)),
'image/object/class/label'
:
tf
.
train
.
Feature
(
int64_list
=
tf
.
train
.
Int64List
(
value
=
classes
)),
'image/object/difficult'
:
tf
.
train
.
Feature
(
int64_list
=
tf
.
train
.
Int64List
(
value
=
difficult_obj
)),
'image/object/truncated'
:
tf
.
train
.
Feature
(
int64_list
=
tf
.
train
.
Int64List
(
value
=
truncated
)),
'image/object/view'
:
tf
.
train
.
Feature
(
bytes_list
=
tf
.
train
.
BytesList
(
value
=
views
)),
}))
return
example
def
parse_xml
(
xml
):
if
not
len
(
xml
):
return
{
xml
.
tag
:
xml
.
text
}
result
=
{}
for
child
in
xml
:
child_result
=
parse_xml
(
child
)
if
child
.
tag
!=
'object'
:
result
[
child
.
tag
]
=
child_result
[
child
.
tag
]
else
:
if
child
.
tag
not
in
result
:
result
[
child
.
tag
]
=
[]
result
[
child
.
tag
].
append
(
child_result
[
child
.
tag
])
return
{
xml
.
tag
:
result
}
def
main
(
_argv
):
class_map
=
{
name
:
idx
for
idx
,
name
in
enumerate
(
open
(
FLAGS
.
classes
).
read
().
splitlines
())}
logging
.
info
(
"Class mapping loaded: %s"
,
class_map
)
writer
=
tf
.
io
.
TFRecordWriter
(
FLAGS
.
output_file
)
image_list
=
open
(
os
.
path
.
join
(
FLAGS
.
data_dir
,
'ImageSets'
,
'Main'
,
'%s.txt'
%
FLAGS
.
split
)).
read
().
splitlines
()
logging
.
info
(
"Image list loaded: %d"
,
len
(
image_list
))
for
name
in
tqdm
.
tqdm
(
image_list
):
annotation_xml
=
os
.
path
.
join
(
FLAGS
.
data_dir
,
'Annotations'
,
name
+
'.xml'
)
annotation_xml
=
lxml
.
etree
.
fromstring
(
open
(
annotation_xml
).
read
())
annotation
=
parse_xml
(
annotation_xml
)[
'annotation'
]
tf_example
=
build_example
(
annotation
,
class_map
)
writer
.
write
(
tf_example
.
SerializeToString
())
writer
.
close
()
logging
.
info
(
"Done"
)
if
__name__
==
'__main__'
:
app
.
run
(
main
)
TensorFlow2x/ComputeVision/Detection/YOLOv3/train.py
0 → 100644
View file @
c320b6ef
from
absl
import
app
,
flags
,
logging
from
absl.flags
import
FLAGS
import
tensorflow
as
tf
import
numpy
as
np
import
cv2
import
time
from
tensorflow.keras.callbacks
import
(
ReduceLROnPlateau
,
EarlyStopping
,
ModelCheckpoint
,
TensorBoard
)
from
yolov3_tf2.models
import
(
YoloV3
,
YoloV3Tiny
,
YoloLoss
,
yolo_anchors
,
yolo_anchor_masks
,
yolo_tiny_anchors
,
yolo_tiny_anchor_masks
)
from
yolov3_tf2.utils
import
freeze_all
import
yolov3_tf2.dataset
as
dataset
flags
.
DEFINE_string
(
'dataset'
,
''
,
'path to dataset'
)
flags
.
DEFINE_string
(
'val_dataset'
,
''
,
'path to validation dataset'
)
flags
.
DEFINE_boolean
(
'tiny'
,
False
,
'yolov3 or yolov3-tiny'
)
flags
.
DEFINE_string
(
'weights'
,
'./checkpoints/yolov3.tf'
,
'path to weights file'
)
flags
.
DEFINE_string
(
'classes'
,
'./data/coco.names'
,
'path to classes file'
)
flags
.
DEFINE_enum
(
'mode'
,
'fit'
,
[
'fit'
,
'eager_fit'
,
'eager_tf'
],
'fit: model.fit, '
'eager_fit: model.fit(run_eagerly=True), '
'eager_tf: custom GradientTape'
)
flags
.
DEFINE_enum
(
'transfer'
,
'none'
,
[
'none'
,
'darknet'
,
'no_output'
,
'frozen'
,
'fine_tune'
],
'none: Training from scratch, '
'darknet: Transfer darknet, '
'no_output: Transfer all but output, '
'frozen: Transfer and freeze all, '
'fine_tune: Transfer all and freeze darknet only'
)
flags
.
DEFINE_integer
(
'size'
,
416
,
'image size'
)
flags
.
DEFINE_integer
(
'epochs'
,
2
,
'number of epochs'
)
flags
.
DEFINE_integer
(
'batch_size'
,
8
,
'batch size'
)
flags
.
DEFINE_float
(
'learning_rate'
,
1e-3
,
'learning rate'
)
flags
.
DEFINE_integer
(
'num_classes'
,
80
,
'number of classes in the model'
)
flags
.
DEFINE_integer
(
'weights_num_classes'
,
None
,
'specify num class for `weights` file if different, '
'useful in transfer learning with different number of classes'
)
flags
.
DEFINE_boolean
(
'multi_gpu'
,
False
,
'Use if wishing to train with more than 1 GPU.'
)
def
setup_model
():
if
FLAGS
.
tiny
:
model
=
YoloV3Tiny
(
FLAGS
.
size
,
training
=
True
,
classes
=
FLAGS
.
num_classes
)
anchors
=
yolo_tiny_anchors
anchor_masks
=
yolo_tiny_anchor_masks
else
:
model
=
YoloV3
(
FLAGS
.
size
,
training
=
True
,
classes
=
FLAGS
.
num_classes
)
anchors
=
yolo_anchors
anchor_masks
=
yolo_anchor_masks
# Configure the model for transfer learning
if
FLAGS
.
transfer
==
'none'
:
pass
# Nothing to do
elif
FLAGS
.
transfer
in
[
'darknet'
,
'no_output'
]:
# Darknet transfer is a special case that works
# with incompatible number of classes
# reset top layers
if
FLAGS
.
tiny
:
model_pretrained
=
YoloV3Tiny
(
FLAGS
.
size
,
training
=
True
,
classes
=
FLAGS
.
weights_num_classes
or
FLAGS
.
num_classes
)
else
:
model_pretrained
=
YoloV3
(
FLAGS
.
size
,
training
=
True
,
classes
=
FLAGS
.
weights_num_classes
or
FLAGS
.
num_classes
)
model_pretrained
.
load_weights
(
FLAGS
.
weights
)
if
FLAGS
.
transfer
==
'darknet'
:
model
.
get_layer
(
'yolo_darknet'
).
set_weights
(
model_pretrained
.
get_layer
(
'yolo_darknet'
).
get_weights
())
freeze_all
(
model
.
get_layer
(
'yolo_darknet'
))
elif
FLAGS
.
transfer
==
'no_output'
:
for
l
in
model
.
layers
:
if
not
l
.
name
.
startswith
(
'yolo_output'
):
l
.
set_weights
(
model_pretrained
.
get_layer
(
l
.
name
).
get_weights
())
freeze_all
(
l
)
else
:
# All other transfer require matching classes
model
.
load_weights
(
FLAGS
.
weights
)
if
FLAGS
.
transfer
==
'fine_tune'
:
# freeze darknet and fine tune other layers
darknet
=
model
.
get_layer
(
'yolo_darknet'
)
freeze_all
(
darknet
)
elif
FLAGS
.
transfer
==
'frozen'
:
# freeze everything
freeze_all
(
model
)
optimizer
=
tf
.
keras
.
optimizers
.
Adam
(
lr
=
FLAGS
.
learning_rate
)
loss
=
[
YoloLoss
(
anchors
[
mask
],
classes
=
FLAGS
.
num_classes
)
for
mask
in
anchor_masks
]
model
.
compile
(
optimizer
=
optimizer
,
loss
=
loss
,
run_eagerly
=
(
FLAGS
.
mode
==
'eager_fit'
))
return
model
,
optimizer
,
loss
,
anchors
,
anchor_masks
def
main
(
_argv
):
physical_devices
=
tf
.
config
.
experimental
.
list_physical_devices
(
'GPU'
)
# Setup
if
FLAGS
.
multi_gpu
:
for
physical_device
in
physical_devices
:
tf
.
config
.
experimental
.
set_memory_growth
(
physical_device
,
True
)
strategy
=
tf
.
distribute
.
MirroredStrategy
()
print
(
'Number of devices: {}'
.
format
(
strategy
.
num_replicas_in_sync
))
BATCH_SIZE
=
FLAGS
.
batch_size
*
strategy
.
num_replicas_in_sync
FLAGS
.
batch_size
=
BATCH_SIZE
with
strategy
.
scope
():
model
,
optimizer
,
loss
,
anchors
,
anchor_masks
=
setup_model
()
else
:
model
,
optimizer
,
loss
,
anchors
,
anchor_masks
=
setup_model
()
if
FLAGS
.
dataset
:
train_dataset
=
dataset
.
load_tfrecord_dataset
(
FLAGS
.
dataset
,
FLAGS
.
classes
,
FLAGS
.
size
)
else
:
train_dataset
=
dataset
.
load_fake_dataset
()
train_dataset
=
train_dataset
.
shuffle
(
buffer_size
=
512
)
train_dataset
=
train_dataset
.
batch
(
FLAGS
.
batch_size
)
train_dataset
=
train_dataset
.
map
(
lambda
x
,
y
:
(
dataset
.
transform_images
(
x
,
FLAGS
.
size
),
dataset
.
transform_targets
(
y
,
anchors
,
anchor_masks
,
FLAGS
.
size
)))
train_dataset
=
train_dataset
.
prefetch
(
buffer_size
=
tf
.
data
.
experimental
.
AUTOTUNE
)
if
FLAGS
.
val_dataset
:
val_dataset
=
dataset
.
load_tfrecord_dataset
(
FLAGS
.
val_dataset
,
FLAGS
.
classes
,
FLAGS
.
size
)
else
:
val_dataset
=
dataset
.
load_fake_dataset
()
val_dataset
=
val_dataset
.
batch
(
FLAGS
.
batch_size
)
val_dataset
=
val_dataset
.
map
(
lambda
x
,
y
:
(
dataset
.
transform_images
(
x
,
FLAGS
.
size
),
dataset
.
transform_targets
(
y
,
anchors
,
anchor_masks
,
FLAGS
.
size
)))
if
FLAGS
.
mode
==
'eager_tf'
:
# Eager mode is great for debugging
# Non eager graph mode is recommended for real training
avg_loss
=
tf
.
keras
.
metrics
.
Mean
(
'loss'
,
dtype
=
tf
.
float32
)
avg_val_loss
=
tf
.
keras
.
metrics
.
Mean
(
'val_loss'
,
dtype
=
tf
.
float32
)
for
epoch
in
range
(
1
,
FLAGS
.
epochs
+
1
):
for
batch
,
(
images
,
labels
)
in
enumerate
(
train_dataset
):
with
tf
.
GradientTape
()
as
tape
:
outputs
=
model
(
images
,
training
=
True
)
regularization_loss
=
tf
.
reduce_sum
(
model
.
losses
)
pred_loss
=
[]
for
output
,
label
,
loss_fn
in
zip
(
outputs
,
labels
,
loss
):
pred_loss
.
append
(
loss_fn
(
label
,
output
))
total_loss
=
tf
.
reduce_sum
(
pred_loss
)
+
regularization_loss
grads
=
tape
.
gradient
(
total_loss
,
model
.
trainable_variables
)
optimizer
.
apply_gradients
(
zip
(
grads
,
model
.
trainable_variables
))
logging
.
info
(
"{}_train_{}, {}, {}"
.
format
(
epoch
,
batch
,
total_loss
.
numpy
(),
list
(
map
(
lambda
x
:
np
.
sum
(
x
.
numpy
()),
pred_loss
))))
avg_loss
.
update_state
(
total_loss
)
for
batch
,
(
images
,
labels
)
in
enumerate
(
val_dataset
):
outputs
=
model
(
images
)
regularization_loss
=
tf
.
reduce_sum
(
model
.
losses
)
pred_loss
=
[]
for
output
,
label
,
loss_fn
in
zip
(
outputs
,
labels
,
loss
):
pred_loss
.
append
(
loss_fn
(
label
,
output
))
total_loss
=
tf
.
reduce_sum
(
pred_loss
)
+
regularization_loss
logging
.
info
(
"{}_val_{}, {}, {}"
.
format
(
epoch
,
batch
,
total_loss
.
numpy
(),
list
(
map
(
lambda
x
:
np
.
sum
(
x
.
numpy
()),
pred_loss
))))
avg_val_loss
.
update_state
(
total_loss
)
logging
.
info
(
"{}, train: {}, val: {}"
.
format
(
epoch
,
avg_loss
.
result
().
numpy
(),
avg_val_loss
.
result
().
numpy
()))
avg_loss
.
reset_states
()
avg_val_loss
.
reset_states
()
model
.
save_weights
(
'checkpoints/yolov3_train_{}.tf'
.
format
(
epoch
))
else
:
callbacks
=
[
ReduceLROnPlateau
(
verbose
=
2
),
EarlyStopping
(
patience
=
3
,
verbose
=
2
),
ModelCheckpoint
(
'checkpoints/yolov3_train_{epoch}.tf'
,
verbose
=
2
,
save_weights_only
=
True
),
TensorBoard
(
log_dir
=
'logs'
)
]
start_time
=
time
.
time
()
history
=
model
.
fit
(
train_dataset
,
epochs
=
FLAGS
.
epochs
,
callbacks
=
callbacks
,
validation_data
=
val_dataset
)
end_time
=
time
.
time
()
-
start_time
print
(
f
'Total Training Time:
{
end_time
}
'
)
if
__name__
==
'__main__'
:
try
:
app
.
run
(
main
)
except
SystemExit
:
pass
TensorFlow2x/ComputeVision/Detection/YOLOv3/yolov3_tf2.egg-info/PKG-INFO
0 → 100644
View file @
c320b6ef
Metadata-Version: 2.1
Name: yolov3-tf2
Version: 0.1
Summary: UNKNOWN
Home-page: https://github.com/zzh8829/yolov3-tf2
Author: Zihao Zhang
Author-email: zzh8829@gmail.com
License: UNKNOWN
Platform: UNKNOWN
License-File: LICENSE
UNKNOWN
TensorFlow2x/ComputeVision/Detection/YOLOv3/yolov3_tf2.egg-info/SOURCES.txt
0 → 100644
View file @
c320b6ef
LICENSE
README.md
setup.py
yolov3_tf2/__init__.py
yolov3_tf2/dataset.py
yolov3_tf2/models.py
yolov3_tf2/utils.py
yolov3_tf2.egg-info/PKG-INFO
yolov3_tf2.egg-info/SOURCES.txt
yolov3_tf2.egg-info/dependency_links.txt
yolov3_tf2.egg-info/top_level.txt
\ No newline at end of file
TensorFlow2x/ComputeVision/Detection/YOLOv3/yolov3_tf2.egg-info/dependency_links.txt
0 → 100644
View file @
c320b6ef
TensorFlow2x/ComputeVision/Detection/YOLOv3/yolov3_tf2.egg-info/top_level.txt
0 → 100644
View file @
c320b6ef
yolov3_tf2
TensorFlow2x/ComputeVision/Detection/YOLOv3/yolov3_tf2/yolov3_tf2/__init__.py
0 → 100644
View file @
c320b6ef
TensorFlow2x/ComputeVision/Detection/YOLOv3/yolov3_tf2/yolov3_tf2/__pycache__/__init__.cpython-36.pyc
0 → 100644
View file @
c320b6ef
File added
TensorFlow2x/ComputeVision/Detection/YOLOv3/yolov3_tf2/yolov3_tf2/__pycache__/dataset.cpython-36.pyc
0 → 100644
View file @
c320b6ef
File added
TensorFlow2x/ComputeVision/Detection/YOLOv3/yolov3_tf2/yolov3_tf2/__pycache__/models.cpython-36.pyc
0 → 100644
View file @
c320b6ef
File added
TensorFlow2x/ComputeVision/Detection/YOLOv3/yolov3_tf2/yolov3_tf2/__pycache__/utils.cpython-36.pyc
0 → 100644
View file @
c320b6ef
File added
TensorFlow2x/ComputeVision/Detection/YOLOv3/yolov3_tf2/yolov3_tf2/dataset.py
0 → 100644
View file @
c320b6ef
import
tensorflow
as
tf
from
absl.flags
import
FLAGS
@
tf
.
function
def
transform_targets_for_output
(
y_true
,
grid_size
,
anchor_idxs
):
# y_true: (N, boxes, (x1, y1, x2, y2, class, best_anchor))
N
=
tf
.
shape
(
y_true
)[
0
]
# y_true_out: (N, grid, grid, anchors, [x1, y1, x2, y2, obj, class])
y_true_out
=
tf
.
zeros
(
(
N
,
grid_size
,
grid_size
,
tf
.
shape
(
anchor_idxs
)[
0
],
6
))
anchor_idxs
=
tf
.
cast
(
anchor_idxs
,
tf
.
int32
)
indexes
=
tf
.
TensorArray
(
tf
.
int32
,
1
,
dynamic_size
=
True
)
updates
=
tf
.
TensorArray
(
tf
.
float32
,
1
,
dynamic_size
=
True
)
idx
=
0
for
i
in
tf
.
range
(
N
):
for
j
in
tf
.
range
(
tf
.
shape
(
y_true
)[
1
]):
if
tf
.
equal
(
y_true
[
i
][
j
][
2
],
0
):
continue
anchor_eq
=
tf
.
equal
(
anchor_idxs
,
tf
.
cast
(
y_true
[
i
][
j
][
5
],
tf
.
int32
))
if
tf
.
reduce_any
(
anchor_eq
):
box
=
y_true
[
i
][
j
][
0
:
4
]
box_xy
=
(
y_true
[
i
][
j
][
0
:
2
]
+
y_true
[
i
][
j
][
2
:
4
])
/
2
anchor_idx
=
tf
.
cast
(
tf
.
where
(
anchor_eq
),
tf
.
int32
)
grid_xy
=
tf
.
cast
(
box_xy
//
(
1
/
grid_size
),
tf
.
int32
)
# grid[y][x][anchor] = (tx, ty, bw, bh, obj, class)
indexes
=
indexes
.
write
(
idx
,
[
i
,
grid_xy
[
1
],
grid_xy
[
0
],
anchor_idx
[
0
][
0
]])
updates
=
updates
.
write
(
idx
,
[
box
[
0
],
box
[
1
],
box
[
2
],
box
[
3
],
1
,
y_true
[
i
][
j
][
4
]])
idx
+=
1
# tf.print(indexes.stack())
# tf.print(updates.stack())
return
tf
.
tensor_scatter_nd_update
(
y_true_out
,
indexes
.
stack
(),
updates
.
stack
())
def
transform_targets
(
y_train
,
anchors
,
anchor_masks
,
size
):
y_outs
=
[]
grid_size
=
size
//
32
# calculate anchor index for true boxes
anchors
=
tf
.
cast
(
anchors
,
tf
.
float32
)
anchor_area
=
anchors
[...,
0
]
*
anchors
[...,
1
]
box_wh
=
y_train
[...,
2
:
4
]
-
y_train
[...,
0
:
2
]
box_wh
=
tf
.
tile
(
tf
.
expand_dims
(
box_wh
,
-
2
),
(
1
,
1
,
tf
.
shape
(
anchors
)[
0
],
1
))
box_area
=
box_wh
[...,
0
]
*
box_wh
[...,
1
]
intersection
=
tf
.
minimum
(
box_wh
[...,
0
],
anchors
[...,
0
])
*
\
tf
.
minimum
(
box_wh
[...,
1
],
anchors
[...,
1
])
iou
=
intersection
/
(
box_area
+
anchor_area
-
intersection
)
anchor_idx
=
tf
.
cast
(
tf
.
argmax
(
iou
,
axis
=-
1
),
tf
.
float32
)
anchor_idx
=
tf
.
expand_dims
(
anchor_idx
,
axis
=-
1
)
y_train
=
tf
.
concat
([
y_train
,
anchor_idx
],
axis
=-
1
)
for
anchor_idxs
in
anchor_masks
:
y_outs
.
append
(
transform_targets_for_output
(
y_train
,
grid_size
,
anchor_idxs
))
grid_size
*=
2
return
tuple
(
y_outs
)
def
transform_images
(
x_train
,
size
):
x_train
=
tf
.
image
.
resize
(
x_train
,
(
size
,
size
))
x_train
=
x_train
/
255
return
x_train
# https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/using_your_own_dataset.md#conversion-script-outline-conversion-script-outline
# Commented out fields are not required in our project
IMAGE_FEATURE_MAP
=
{
# 'image/width': tf.io.FixedLenFeature([], tf.int64),
# 'image/height': tf.io.FixedLenFeature([], tf.int64),
# 'image/filename': tf.io.FixedLenFeature([], tf.string),
# 'image/source_id': tf.io.FixedLenFeature([], tf.string),
# 'image/key/sha256': tf.io.FixedLenFeature([], tf.string),
'image/encoded'
:
tf
.
io
.
FixedLenFeature
([],
tf
.
string
),
# 'image/format': tf.io.FixedLenFeature([], tf.string),
'image/object/bbox/xmin'
:
tf
.
io
.
VarLenFeature
(
tf
.
float32
),
'image/object/bbox/ymin'
:
tf
.
io
.
VarLenFeature
(
tf
.
float32
),
'image/object/bbox/xmax'
:
tf
.
io
.
VarLenFeature
(
tf
.
float32
),
'image/object/bbox/ymax'
:
tf
.
io
.
VarLenFeature
(
tf
.
float32
),
'image/object/class/text'
:
tf
.
io
.
VarLenFeature
(
tf
.
string
),
# 'image/object/class/label': tf.io.VarLenFeature(tf.int64),
# 'image/object/difficult': tf.io.VarLenFeature(tf.int64),
# 'image/object/truncated': tf.io.VarLenFeature(tf.int64),
# 'image/object/view': tf.io.VarLenFeature(tf.string),
}
def
parse_tfrecord
(
tfrecord
,
class_table
,
size
):
x
=
tf
.
io
.
parse_single_example
(
tfrecord
,
IMAGE_FEATURE_MAP
)
x_train
=
tf
.
image
.
decode_jpeg
(
x
[
'image/encoded'
],
channels
=
3
)
x_train
=
tf
.
image
.
resize
(
x_train
,
(
size
,
size
))
class_text
=
tf
.
sparse
.
to_dense
(
x
[
'image/object/class/text'
],
default_value
=
''
)
labels
=
tf
.
cast
(
class_table
.
lookup
(
class_text
),
tf
.
float32
)
y_train
=
tf
.
stack
([
tf
.
sparse
.
to_dense
(
x
[
'image/object/bbox/xmin'
]),
tf
.
sparse
.
to_dense
(
x
[
'image/object/bbox/ymin'
]),
tf
.
sparse
.
to_dense
(
x
[
'image/object/bbox/xmax'
]),
tf
.
sparse
.
to_dense
(
x
[
'image/object/bbox/ymax'
]),
labels
],
axis
=
1
)
paddings
=
[[
0
,
FLAGS
.
yolo_max_boxes
-
tf
.
shape
(
y_train
)[
0
]],
[
0
,
0
]]
y_train
=
tf
.
pad
(
y_train
,
paddings
)
return
x_train
,
y_train
def
load_tfrecord_dataset
(
file_pattern
,
class_file
,
size
=
416
):
LINE_NUMBER
=
-
1
# TODO: use tf.lookup.TextFileIndex.LINE_NUMBER
class_table
=
tf
.
lookup
.
StaticHashTable
(
tf
.
lookup
.
TextFileInitializer
(
class_file
,
tf
.
string
,
0
,
tf
.
int64
,
LINE_NUMBER
,
delimiter
=
"
\n
"
),
-
1
)
files
=
tf
.
data
.
Dataset
.
list_files
(
file_pattern
)
dataset
=
files
.
flat_map
(
tf
.
data
.
TFRecordDataset
)
return
dataset
.
map
(
lambda
x
:
parse_tfrecord
(
x
,
class_table
,
size
))
def
load_fake_dataset
():
x_train
=
tf
.
image
.
decode_jpeg
(
open
(
'./data/girl.png'
,
'rb'
).
read
(),
channels
=
3
)
x_train
=
tf
.
expand_dims
(
x_train
,
axis
=
0
)
labels
=
[
[
0.18494931
,
0.03049111
,
0.9435849
,
0.96302897
,
0
],
[
0.01586703
,
0.35938117
,
0.17582396
,
0.6069674
,
56
],
[
0.09158827
,
0.48252046
,
0.26967454
,
0.6403017
,
67
]
]
+
[[
0
,
0
,
0
,
0
,
0
]]
*
5
y_train
=
tf
.
convert_to_tensor
(
labels
,
tf
.
float32
)
y_train
=
tf
.
expand_dims
(
y_train
,
axis
=
0
)
return
tf
.
data
.
Dataset
.
from_tensor_slices
((
x_train
,
y_train
))
TensorFlow2x/ComputeVision/Detection/YOLOv3/yolov3_tf2/yolov3_tf2/models.py
0 → 100644
View file @
c320b6ef
from
absl
import
flags
from
absl.flags
import
FLAGS
import
numpy
as
np
import
tensorflow
as
tf
from
tensorflow.keras
import
Model
from
tensorflow.keras.layers
import
(
Add
,
Concatenate
,
Conv2D
,
Input
,
Lambda
,
LeakyReLU
,
MaxPool2D
,
UpSampling2D
,
ZeroPadding2D
,
BatchNormalization
,
)
from
tensorflow.keras.regularizers
import
l2
from
tensorflow.keras.losses
import
(
binary_crossentropy
,
sparse_categorical_crossentropy
)
from
.utils
import
broadcast_iou
flags
.
DEFINE_integer
(
'yolo_max_boxes'
,
100
,
'maximum number of boxes per image'
)
flags
.
DEFINE_float
(
'yolo_iou_threshold'
,
0.5
,
'iou threshold'
)
flags
.
DEFINE_float
(
'yolo_score_threshold'
,
0.5
,
'score threshold'
)
yolo_anchors
=
np
.
array
([(
10
,
13
),
(
16
,
30
),
(
33
,
23
),
(
30
,
61
),
(
62
,
45
),
(
59
,
119
),
(
116
,
90
),
(
156
,
198
),
(
373
,
326
)],
np
.
float32
)
/
416
yolo_anchor_masks
=
np
.
array
([[
6
,
7
,
8
],
[
3
,
4
,
5
],
[
0
,
1
,
2
]])
yolo_tiny_anchors
=
np
.
array
([(
10
,
14
),
(
23
,
27
),
(
37
,
58
),
(
81
,
82
),
(
135
,
169
),
(
344
,
319
)],
np
.
float32
)
/
416
yolo_tiny_anchor_masks
=
np
.
array
([[
3
,
4
,
5
],
[
0
,
1
,
2
]])
def
DarknetConv
(
x
,
filters
,
size
,
strides
=
1
,
batch_norm
=
True
):
if
strides
==
1
:
padding
=
'same'
else
:
x
=
ZeroPadding2D
(((
1
,
0
),
(
1
,
0
)))(
x
)
# top left half-padding
padding
=
'valid'
x
=
Conv2D
(
filters
=
filters
,
kernel_size
=
size
,
strides
=
strides
,
padding
=
padding
,
use_bias
=
not
batch_norm
,
kernel_regularizer
=
l2
(
0.0005
))(
x
)
if
batch_norm
:
x
=
BatchNormalization
()(
x
)
x
=
LeakyReLU
(
alpha
=
0.1
)(
x
)
return
x
def
DarknetResidual
(
x
,
filters
):
prev
=
x
x
=
DarknetConv
(
x
,
filters
//
2
,
1
)
x
=
DarknetConv
(
x
,
filters
,
3
)
x
=
Add
()([
prev
,
x
])
return
x
def
DarknetBlock
(
x
,
filters
,
blocks
):
x
=
DarknetConv
(
x
,
filters
,
3
,
strides
=
2
)
for
_
in
range
(
blocks
):
x
=
DarknetResidual
(
x
,
filters
)
return
x
def
Darknet
(
name
=
None
):
x
=
inputs
=
Input
([
None
,
None
,
3
])
x
=
DarknetConv
(
x
,
32
,
3
)
x
=
DarknetBlock
(
x
,
64
,
1
)
x
=
DarknetBlock
(
x
,
128
,
2
)
# skip connection
x
=
x_36
=
DarknetBlock
(
x
,
256
,
8
)
# skip connection
x
=
x_61
=
DarknetBlock
(
x
,
512
,
8
)
x
=
DarknetBlock
(
x
,
1024
,
4
)
return
tf
.
keras
.
Model
(
inputs
,
(
x_36
,
x_61
,
x
),
name
=
name
)
def
DarknetTiny
(
name
=
None
):
x
=
inputs
=
Input
([
None
,
None
,
3
])
x
=
DarknetConv
(
x
,
16
,
3
)
x
=
MaxPool2D
(
2
,
2
,
'same'
)(
x
)
x
=
DarknetConv
(
x
,
32
,
3
)
x
=
MaxPool2D
(
2
,
2
,
'same'
)(
x
)
x
=
DarknetConv
(
x
,
64
,
3
)
x
=
MaxPool2D
(
2
,
2
,
'same'
)(
x
)
x
=
DarknetConv
(
x
,
128
,
3
)
x
=
MaxPool2D
(
2
,
2
,
'same'
)(
x
)
x
=
x_8
=
DarknetConv
(
x
,
256
,
3
)
# skip connection
x
=
MaxPool2D
(
2
,
2
,
'same'
)(
x
)
x
=
DarknetConv
(
x
,
512
,
3
)
x
=
MaxPool2D
(
2
,
1
,
'same'
)(
x
)
x
=
DarknetConv
(
x
,
1024
,
3
)
return
tf
.
keras
.
Model
(
inputs
,
(
x_8
,
x
),
name
=
name
)
def
YoloConv
(
filters
,
name
=
None
):
def
yolo_conv
(
x_in
):
if
isinstance
(
x_in
,
tuple
):
inputs
=
Input
(
x_in
[
0
].
shape
[
1
:]),
Input
(
x_in
[
1
].
shape
[
1
:])
x
,
x_skip
=
inputs
# concat with skip connection
x
=
DarknetConv
(
x
,
filters
,
1
)
x
=
UpSampling2D
(
2
)(
x
)
x
=
Concatenate
()([
x
,
x_skip
])
else
:
x
=
inputs
=
Input
(
x_in
.
shape
[
1
:])
x
=
DarknetConv
(
x
,
filters
,
1
)
x
=
DarknetConv
(
x
,
filters
*
2
,
3
)
x
=
DarknetConv
(
x
,
filters
,
1
)
x
=
DarknetConv
(
x
,
filters
*
2
,
3
)
x
=
DarknetConv
(
x
,
filters
,
1
)
return
Model
(
inputs
,
x
,
name
=
name
)(
x_in
)
return
yolo_conv
def
YoloConvTiny
(
filters
,
name
=
None
):
def
yolo_conv
(
x_in
):
if
isinstance
(
x_in
,
tuple
):
inputs
=
Input
(
x_in
[
0
].
shape
[
1
:]),
Input
(
x_in
[
1
].
shape
[
1
:])
x
,
x_skip
=
inputs
# concat with skip connection
x
=
DarknetConv
(
x
,
filters
,
1
)
x
=
UpSampling2D
(
2
)(
x
)
x
=
Concatenate
()([
x
,
x_skip
])
else
:
x
=
inputs
=
Input
(
x_in
.
shape
[
1
:])
x
=
DarknetConv
(
x
,
filters
,
1
)
return
Model
(
inputs
,
x
,
name
=
name
)(
x_in
)
return
yolo_conv
def
YoloOutput
(
filters
,
anchors
,
classes
,
name
=
None
):
def
yolo_output
(
x_in
):
x
=
inputs
=
Input
(
x_in
.
shape
[
1
:])
x
=
DarknetConv
(
x
,
filters
*
2
,
3
)
x
=
DarknetConv
(
x
,
anchors
*
(
classes
+
5
),
1
,
batch_norm
=
False
)
x
=
Lambda
(
lambda
x
:
tf
.
reshape
(
x
,
(
-
1
,
tf
.
shape
(
x
)[
1
],
tf
.
shape
(
x
)[
2
],
anchors
,
classes
+
5
)))(
x
)
return
tf
.
keras
.
Model
(
inputs
,
x
,
name
=
name
)(
x_in
)
return
yolo_output
# As tensorflow lite doesn't support tf.size used in tf.meshgrid,
# we reimplemented a simple meshgrid function that use basic tf function.
def
_meshgrid
(
n_a
,
n_b
):
return
[
tf
.
reshape
(
tf
.
tile
(
tf
.
range
(
n_a
),
[
n_b
]),
(
n_b
,
n_a
)),
tf
.
reshape
(
tf
.
repeat
(
tf
.
range
(
n_b
),
n_a
),
(
n_b
,
n_a
))
]
def
yolo_boxes
(
pred
,
anchors
,
classes
):
# pred: (batch_size, grid, grid, anchors, (x, y, w, h, obj, ...classes))
grid_size
=
tf
.
shape
(
pred
)[
1
:
3
]
box_xy
,
box_wh
,
objectness
,
class_probs
=
tf
.
split
(
pred
,
(
2
,
2
,
1
,
classes
),
axis
=-
1
)
box_xy
=
tf
.
sigmoid
(
box_xy
)
objectness
=
tf
.
sigmoid
(
objectness
)
class_probs
=
tf
.
sigmoid
(
class_probs
)
pred_box
=
tf
.
concat
((
box_xy
,
box_wh
),
axis
=-
1
)
# original xywh for loss
# !!! grid[x][y] == (y, x)
grid
=
_meshgrid
(
grid_size
[
1
],
grid_size
[
0
])
grid
=
tf
.
expand_dims
(
tf
.
stack
(
grid
,
axis
=-
1
),
axis
=
2
)
# [gx, gy, 1, 2]
box_xy
=
(
box_xy
+
tf
.
cast
(
grid
,
tf
.
float32
))
/
\
tf
.
cast
(
grid_size
,
tf
.
float32
)
box_wh
=
tf
.
exp
(
box_wh
)
*
anchors
box_x1y1
=
box_xy
-
box_wh
/
2
box_x2y2
=
box_xy
+
box_wh
/
2
bbox
=
tf
.
concat
([
box_x1y1
,
box_x2y2
],
axis
=-
1
)
return
bbox
,
objectness
,
class_probs
,
pred_box
def
yolo_nms
(
outputs
,
anchors
,
masks
,
classes
):
# boxes, conf, type
b
,
c
,
t
=
[],
[],
[]
for
o
in
outputs
:
b
.
append
(
tf
.
reshape
(
o
[
0
],
(
tf
.
shape
(
o
[
0
])[
0
],
-
1
,
tf
.
shape
(
o
[
0
])[
-
1
])))
c
.
append
(
tf
.
reshape
(
o
[
1
],
(
tf
.
shape
(
o
[
1
])[
0
],
-
1
,
tf
.
shape
(
o
[
1
])[
-
1
])))
t
.
append
(
tf
.
reshape
(
o
[
2
],
(
tf
.
shape
(
o
[
2
])[
0
],
-
1
,
tf
.
shape
(
o
[
2
])[
-
1
])))
bbox
=
tf
.
concat
(
b
,
axis
=
1
)
confidence
=
tf
.
concat
(
c
,
axis
=
1
)
class_probs
=
tf
.
concat
(
t
,
axis
=
1
)
# If we only have one class, do not multiply by class_prob (always 0.5)
if
classes
==
1
:
scores
=
confidence
else
:
scores
=
confidence
*
class_probs
dscores
=
tf
.
squeeze
(
scores
,
axis
=
0
)
scores
=
tf
.
reduce_max
(
dscores
,[
1
])
bbox
=
tf
.
reshape
(
bbox
,(
-
1
,
4
))
classes
=
tf
.
argmax
(
dscores
,
1
)
selected_indices
,
selected_scores
=
tf
.
image
.
non_max_suppression_with_scores
(
boxes
=
bbox
,
scores
=
scores
,
max_output_size
=
FLAGS
.
yolo_max_boxes
,
iou_threshold
=
FLAGS
.
yolo_iou_threshold
,
score_threshold
=
FLAGS
.
yolo_score_threshold
,
soft_nms_sigma
=
0.5
)
num_valid_nms_boxes
=
tf
.
shape
(
selected_indices
)[
0
]
selected_indices
=
tf
.
concat
([
selected_indices
,
tf
.
zeros
(
FLAGS
.
yolo_max_boxes
-
num_valid_nms_boxes
,
tf
.
int32
)],
0
)
selected_scores
=
tf
.
concat
([
selected_scores
,
tf
.
zeros
(
FLAGS
.
yolo_max_boxes
-
num_valid_nms_boxes
,
tf
.
float32
)],
-
1
)
boxes
=
tf
.
gather
(
bbox
,
selected_indices
)
boxes
=
tf
.
expand_dims
(
boxes
,
axis
=
0
)
scores
=
selected_scores
scores
=
tf
.
expand_dims
(
scores
,
axis
=
0
)
classes
=
tf
.
gather
(
classes
,
selected_indices
)
classes
=
tf
.
expand_dims
(
classes
,
axis
=
0
)
valid_detections
=
num_valid_nms_boxes
valid_detections
=
tf
.
expand_dims
(
valid_detections
,
axis
=
0
)
return
boxes
,
scores
,
classes
,
valid_detections
def
YoloV3
(
size
=
None
,
channels
=
3
,
anchors
=
yolo_anchors
,
masks
=
yolo_anchor_masks
,
classes
=
80
,
training
=
False
):
x
=
inputs
=
Input
([
size
,
size
,
channels
],
name
=
'input'
)
x_36
,
x_61
,
x
=
Darknet
(
name
=
'yolo_darknet'
)(
x
)
x
=
YoloConv
(
512
,
name
=
'yolo_conv_0'
)(
x
)
output_0
=
YoloOutput
(
512
,
len
(
masks
[
0
]),
classes
,
name
=
'yolo_output_0'
)(
x
)
x
=
YoloConv
(
256
,
name
=
'yolo_conv_1'
)((
x
,
x_61
))
output_1
=
YoloOutput
(
256
,
len
(
masks
[
1
]),
classes
,
name
=
'yolo_output_1'
)(
x
)
x
=
YoloConv
(
128
,
name
=
'yolo_conv_2'
)((
x
,
x_36
))
output_2
=
YoloOutput
(
128
,
len
(
masks
[
2
]),
classes
,
name
=
'yolo_output_2'
)(
x
)
if
training
:
return
Model
(
inputs
,
(
output_0
,
output_1
,
output_2
),
name
=
'yolov3'
)
boxes_0
=
Lambda
(
lambda
x
:
yolo_boxes
(
x
,
anchors
[
masks
[
0
]],
classes
),
name
=
'yolo_boxes_0'
)(
output_0
)
boxes_1
=
Lambda
(
lambda
x
:
yolo_boxes
(
x
,
anchors
[
masks
[
1
]],
classes
),
name
=
'yolo_boxes_1'
)(
output_1
)
boxes_2
=
Lambda
(
lambda
x
:
yolo_boxes
(
x
,
anchors
[
masks
[
2
]],
classes
),
name
=
'yolo_boxes_2'
)(
output_2
)
outputs
=
Lambda
(
lambda
x
:
yolo_nms
(
x
,
anchors
,
masks
,
classes
),
name
=
'yolo_nms'
)((
boxes_0
[:
3
],
boxes_1
[:
3
],
boxes_2
[:
3
]))
return
Model
(
inputs
,
outputs
,
name
=
'yolov3'
)
def
YoloV3Tiny
(
size
=
None
,
channels
=
3
,
anchors
=
yolo_tiny_anchors
,
masks
=
yolo_tiny_anchor_masks
,
classes
=
80
,
training
=
False
):
x
=
inputs
=
Input
([
size
,
size
,
channels
],
name
=
'input'
)
x_8
,
x
=
DarknetTiny
(
name
=
'yolo_darknet'
)(
x
)
x
=
YoloConvTiny
(
256
,
name
=
'yolo_conv_0'
)(
x
)
output_0
=
YoloOutput
(
256
,
len
(
masks
[
0
]),
classes
,
name
=
'yolo_output_0'
)(
x
)
x
=
YoloConvTiny
(
128
,
name
=
'yolo_conv_1'
)((
x
,
x_8
))
output_1
=
YoloOutput
(
128
,
len
(
masks
[
1
]),
classes
,
name
=
'yolo_output_1'
)(
x
)
if
training
:
return
Model
(
inputs
,
(
output_0
,
output_1
),
name
=
'yolov3'
)
boxes_0
=
Lambda
(
lambda
x
:
yolo_boxes
(
x
,
anchors
[
masks
[
0
]],
classes
),
name
=
'yolo_boxes_0'
)(
output_0
)
boxes_1
=
Lambda
(
lambda
x
:
yolo_boxes
(
x
,
anchors
[
masks
[
1
]],
classes
),
name
=
'yolo_boxes_1'
)(
output_1
)
outputs
=
Lambda
(
lambda
x
:
yolo_nms
(
x
,
anchors
,
masks
,
classes
),
name
=
'yolo_nms'
)((
boxes_0
[:
3
],
boxes_1
[:
3
]))
return
Model
(
inputs
,
outputs
,
name
=
'yolov3_tiny'
)
def
YoloLoss
(
anchors
,
classes
=
80
,
ignore_thresh
=
0.5
):
def
yolo_loss
(
y_true
,
y_pred
):
# 1. transform all pred outputs
# y_pred: (batch_size, grid, grid, anchors, (x, y, w, h, obj, ...cls))
pred_box
,
pred_obj
,
pred_class
,
pred_xywh
=
yolo_boxes
(
y_pred
,
anchors
,
classes
)
pred_xy
=
pred_xywh
[...,
0
:
2
]
pred_wh
=
pred_xywh
[...,
2
:
4
]
# 2. transform all true outputs
# y_true: (batch_size, grid, grid, anchors, (x1, y1, x2, y2, obj, cls))
true_box
,
true_obj
,
true_class_idx
=
tf
.
split
(
y_true
,
(
4
,
1
,
1
),
axis
=-
1
)
true_xy
=
(
true_box
[...,
0
:
2
]
+
true_box
[...,
2
:
4
])
/
2
true_wh
=
true_box
[...,
2
:
4
]
-
true_box
[...,
0
:
2
]
# give higher weights to small boxes
box_loss_scale
=
2
-
true_wh
[...,
0
]
*
true_wh
[...,
1
]
# 3. inverting the pred box equations
grid_size
=
tf
.
shape
(
y_true
)[
1
]
grid
=
tf
.
meshgrid
(
tf
.
range
(
grid_size
),
tf
.
range
(
grid_size
))
grid
=
tf
.
expand_dims
(
tf
.
stack
(
grid
,
axis
=-
1
),
axis
=
2
)
true_xy
=
true_xy
*
tf
.
cast
(
grid_size
,
tf
.
float32
)
-
\
tf
.
cast
(
grid
,
tf
.
float32
)
true_wh
=
tf
.
math
.
log
(
true_wh
/
anchors
)
true_wh
=
tf
.
where
(
tf
.
math
.
is_inf
(
true_wh
),
tf
.
zeros_like
(
true_wh
),
true_wh
)
# 4. calculate all masks
obj_mask
=
tf
.
squeeze
(
true_obj
,
-
1
)
# ignore false positive when iou is over threshold
best_iou
=
tf
.
map_fn
(
lambda
x
:
tf
.
reduce_max
(
broadcast_iou
(
x
[
0
],
tf
.
boolean_mask
(
x
[
1
],
tf
.
cast
(
x
[
2
],
tf
.
bool
))),
axis
=-
1
),
(
pred_box
,
true_box
,
obj_mask
),
tf
.
float32
)
ignore_mask
=
tf
.
cast
(
best_iou
<
ignore_thresh
,
tf
.
float32
)
# 5. calculate all losses
xy_loss
=
obj_mask
*
box_loss_scale
*
\
tf
.
reduce_sum
(
tf
.
square
(
true_xy
-
pred_xy
),
axis
=-
1
)
wh_loss
=
obj_mask
*
box_loss_scale
*
\
tf
.
reduce_sum
(
tf
.
square
(
true_wh
-
pred_wh
),
axis
=-
1
)
obj_loss
=
binary_crossentropy
(
true_obj
,
pred_obj
)
obj_loss
=
obj_mask
*
obj_loss
+
\
(
1
-
obj_mask
)
*
ignore_mask
*
obj_loss
# TODO: use binary_crossentropy instead
class_loss
=
obj_mask
*
sparse_categorical_crossentropy
(
true_class_idx
,
pred_class
)
# 6. sum over (batch, gridx, gridy, anchors) => (batch, 1)
xy_loss
=
tf
.
reduce_sum
(
xy_loss
,
axis
=
(
1
,
2
,
3
))
wh_loss
=
tf
.
reduce_sum
(
wh_loss
,
axis
=
(
1
,
2
,
3
))
obj_loss
=
tf
.
reduce_sum
(
obj_loss
,
axis
=
(
1
,
2
,
3
))
class_loss
=
tf
.
reduce_sum
(
class_loss
,
axis
=
(
1
,
2
,
3
))
return
xy_loss
+
wh_loss
+
obj_loss
+
class_loss
return
yolo_loss
TensorFlow2x/ComputeVision/Detection/YOLOv3/yolov3_tf2/yolov3_tf2/utils.py
0 → 100644
View file @
c320b6ef
from
absl
import
logging
import
numpy
as
np
import
tensorflow
as
tf
import
cv2
YOLOV3_LAYER_LIST
=
[
'yolo_darknet'
,
'yolo_conv_0'
,
'yolo_output_0'
,
'yolo_conv_1'
,
'yolo_output_1'
,
'yolo_conv_2'
,
'yolo_output_2'
,
]
YOLOV3_TINY_LAYER_LIST
=
[
'yolo_darknet'
,
'yolo_conv_0'
,
'yolo_output_0'
,
'yolo_conv_1'
,
'yolo_output_1'
,
]
def
load_darknet_weights
(
model
,
weights_file
,
tiny
=
False
):
wf
=
open
(
weights_file
,
'rb'
)
major
,
minor
,
revision
,
seen
,
_
=
np
.
fromfile
(
wf
,
dtype
=
np
.
int32
,
count
=
5
)
if
tiny
:
layers
=
YOLOV3_TINY_LAYER_LIST
else
:
layers
=
YOLOV3_LAYER_LIST
for
layer_name
in
layers
:
sub_model
=
model
.
get_layer
(
layer_name
)
for
i
,
layer
in
enumerate
(
sub_model
.
layers
):
if
not
layer
.
name
.
startswith
(
'conv2d'
):
continue
batch_norm
=
None
if
i
+
1
<
len
(
sub_model
.
layers
)
and
\
sub_model
.
layers
[
i
+
1
].
name
.
startswith
(
'batch_norm'
):
batch_norm
=
sub_model
.
layers
[
i
+
1
]
logging
.
info
(
"{}/{} {}"
.
format
(
sub_model
.
name
,
layer
.
name
,
'bn'
if
batch_norm
else
'bias'
))
filters
=
layer
.
filters
size
=
layer
.
kernel_size
[
0
]
in_dim
=
layer
.
get_input_shape_at
(
0
)[
-
1
]
if
batch_norm
is
None
:
conv_bias
=
np
.
fromfile
(
wf
,
dtype
=
np
.
float32
,
count
=
filters
)
else
:
# darknet [beta, gamma, mean, variance]
bn_weights
=
np
.
fromfile
(
wf
,
dtype
=
np
.
float32
,
count
=
4
*
filters
)
# tf [gamma, beta, mean, variance]
bn_weights
=
bn_weights
.
reshape
((
4
,
filters
))[[
1
,
0
,
2
,
3
]]
# darknet shape (out_dim, in_dim, height, width)
conv_shape
=
(
filters
,
in_dim
,
size
,
size
)
conv_weights
=
np
.
fromfile
(
wf
,
dtype
=
np
.
float32
,
count
=
np
.
product
(
conv_shape
))
# tf shape (height, width, in_dim, out_dim)
conv_weights
=
conv_weights
.
reshape
(
conv_shape
).
transpose
([
2
,
3
,
1
,
0
])
if
batch_norm
is
None
:
layer
.
set_weights
([
conv_weights
,
conv_bias
])
else
:
layer
.
set_weights
([
conv_weights
])
batch_norm
.
set_weights
(
bn_weights
)
assert
len
(
wf
.
read
())
==
0
,
'failed to read all data'
wf
.
close
()
def
broadcast_iou
(
box_1
,
box_2
):
# box_1: (..., (x1, y1, x2, y2))
# box_2: (N, (x1, y1, x2, y2))
# broadcast boxes
box_1
=
tf
.
expand_dims
(
box_1
,
-
2
)
box_2
=
tf
.
expand_dims
(
box_2
,
0
)
# new_shape: (..., N, (x1, y1, x2, y2))
new_shape
=
tf
.
broadcast_dynamic_shape
(
tf
.
shape
(
box_1
),
tf
.
shape
(
box_2
))
box_1
=
tf
.
broadcast_to
(
box_1
,
new_shape
)
box_2
=
tf
.
broadcast_to
(
box_2
,
new_shape
)
int_w
=
tf
.
maximum
(
tf
.
minimum
(
box_1
[...,
2
],
box_2
[...,
2
])
-
tf
.
maximum
(
box_1
[...,
0
],
box_2
[...,
0
]),
0
)
int_h
=
tf
.
maximum
(
tf
.
minimum
(
box_1
[...,
3
],
box_2
[...,
3
])
-
tf
.
maximum
(
box_1
[...,
1
],
box_2
[...,
1
]),
0
)
int_area
=
int_w
*
int_h
box_1_area
=
(
box_1
[...,
2
]
-
box_1
[...,
0
])
*
\
(
box_1
[...,
3
]
-
box_1
[...,
1
])
box_2_area
=
(
box_2
[...,
2
]
-
box_2
[...,
0
])
*
\
(
box_2
[...,
3
]
-
box_2
[...,
1
])
return
int_area
/
(
box_1_area
+
box_2_area
-
int_area
)
def
draw_outputs
(
img
,
outputs
,
class_names
):
boxes
,
objectness
,
classes
,
nums
=
outputs
boxes
,
objectness
,
classes
,
nums
=
boxes
[
0
],
objectness
[
0
],
classes
[
0
],
nums
[
0
]
wh
=
np
.
flip
(
img
.
shape
[
0
:
2
])
for
i
in
range
(
nums
):
x1y1
=
tuple
((
np
.
array
(
boxes
[
i
][
0
:
2
])
*
wh
).
astype
(
np
.
int32
))
x2y2
=
tuple
((
np
.
array
(
boxes
[
i
][
2
:
4
])
*
wh
).
astype
(
np
.
int32
))
img
=
cv2
.
rectangle
(
img
,
x1y1
,
x2y2
,
(
255
,
0
,
0
),
2
)
img
=
cv2
.
putText
(
img
,
'{} {:.4f}'
.
format
(
class_names
[
int
(
classes
[
i
])],
objectness
[
i
]),
x1y1
,
cv2
.
FONT_HERSHEY_COMPLEX_SMALL
,
1
,
(
0
,
0
,
255
),
2
)
return
img
def
draw_labels
(
x
,
y
,
class_names
):
img
=
x
.
numpy
()
boxes
,
classes
=
tf
.
split
(
y
,
(
4
,
1
),
axis
=-
1
)
classes
=
classes
[...,
0
]
wh
=
np
.
flip
(
img
.
shape
[
0
:
2
])
for
i
in
range
(
len
(
boxes
)):
x1y1
=
tuple
((
np
.
array
(
boxes
[
i
][
0
:
2
])
*
wh
).
astype
(
np
.
int32
))
x2y2
=
tuple
((
np
.
array
(
boxes
[
i
][
2
:
4
])
*
wh
).
astype
(
np
.
int32
))
img
=
cv2
.
rectangle
(
img
,
x1y1
,
x2y2
,
(
255
,
0
,
0
),
2
)
img
=
cv2
.
putText
(
img
,
class_names
[
classes
[
i
]],
x1y1
,
cv2
.
FONT_HERSHEY_COMPLEX_SMALL
,
1
,
(
0
,
0
,
255
),
2
)
return
img
def
freeze_all
(
model
,
frozen
=
True
):
model
.
trainable
=
not
frozen
if
isinstance
(
model
,
tf
.
keras
.
Model
):
for
l
in
model
.
layers
:
freeze_all
(
l
,
frozen
)
Prev
1
…
6
7
8
9
10
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment