Commit d0d91e12 authored by huchen's avatar huchen
Browse files

Merge branch 'tf2' into 'main'

tf2 detection

See merge request dcutoolkit/deeplearing/dlexamples_new!2
parents 2795dc1f c320b6ef
import time
from absl import app, flags, logging
from absl.flags import FLAGS
import cv2
import numpy as np
import tensorflow as tf
from yolov3_tf2.models import (
YoloV3, YoloV3Tiny
)
from yolov3_tf2.dataset import load_tfrecord_dataset, transform_images
from yolov3_tf2.utils import draw_outputs
flags.DEFINE_string('classes', './data/coco.names', 'path to classes file')
flags.DEFINE_integer('size', 416, 'resize images to')
flags.DEFINE_string(
'dataset', './data/voc2012_train.tfrecord', 'path to dataset')
flags.DEFINE_string('output', './output.jpg', 'path to output image')
def main(_argv):
class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
logging.info('classes loaded')
dataset = load_tfrecord_dataset(FLAGS.dataset, FLAGS.classes, FLAGS.size)
dataset = dataset.shuffle(512)
for image, labels in dataset.take(1):
boxes = []
scores = []
classes = []
for x1, y1, x2, y2, label in labels:
if x1 == 0 and x2 == 0:
continue
boxes.append((x1, y1, x2, y2))
scores.append(1)
classes.append(label)
nums = [len(boxes)]
boxes = [boxes]
scores = [scores]
classes = [classes]
logging.info('labels:')
for i in range(nums[0]):
logging.info('\t{}, {}, {}'.format(class_names[int(classes[0][i])],
np.array(scores[0][i]),
np.array(boxes[0][i])))
img = cv2.cvtColor(image.numpy(), cv2.COLOR_RGB2BGR)
img = draw_outputs(img, (boxes, scores, classes, nums), class_names)
cv2.imwrite(FLAGS.output, img)
logging.info('output saved to: {}'.format(FLAGS.output))
if __name__ == '__main__':
app.run(main)
import time
import os
import hashlib
from absl import app, flags, logging
from absl.flags import FLAGS
import tensorflow as tf
import lxml.etree
import tqdm
flags.DEFINE_string('data_dir', './data/voc2012_raw/VOCdevkit/VOC2012/',
'path to raw PASCAL VOC dataset')
flags.DEFINE_enum('split', 'train', [
'train', 'val'], 'specify train or val spit')
flags.DEFINE_string('output_file', './data/voc2012_train.tfrecord', 'outpot dataset')
flags.DEFINE_string('classes', './data/voc2012.names', 'classes file')
def build_example(annotation, class_map):
img_path = os.path.join(
FLAGS.data_dir, 'JPEGImages', annotation['filename'])
img_raw = open(img_path, 'rb').read()
key = hashlib.sha256(img_raw).hexdigest()
width = int(annotation['size']['width'])
height = int(annotation['size']['height'])
xmin = []
ymin = []
xmax = []
ymax = []
classes = []
classes_text = []
truncated = []
views = []
difficult_obj = []
if 'object' in annotation:
for obj in annotation['object']:
difficult = bool(int(obj['difficult']))
difficult_obj.append(int(difficult))
xmin.append(float(obj['bndbox']['xmin']) / width)
ymin.append(float(obj['bndbox']['ymin']) / height)
xmax.append(float(obj['bndbox']['xmax']) / width)
ymax.append(float(obj['bndbox']['ymax']) / height)
classes_text.append(obj['name'].encode('utf8'))
classes.append(class_map[obj['name']])
truncated.append(int(obj['truncated']))
views.append(obj['pose'].encode('utf8'))
example = tf.train.Example(features=tf.train.Features(feature={
'image/height': tf.train.Feature(int64_list=tf.train.Int64List(value=[height])),
'image/width': tf.train.Feature(int64_list=tf.train.Int64List(value=[width])),
'image/filename': tf.train.Feature(bytes_list=tf.train.BytesList(value=[
annotation['filename'].encode('utf8')])),
'image/source_id': tf.train.Feature(bytes_list=tf.train.BytesList(value=[
annotation['filename'].encode('utf8')])),
'image/key/sha256': tf.train.Feature(bytes_list=tf.train.BytesList(value=[key.encode('utf8')])),
'image/encoded': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img_raw])),
'image/format': tf.train.Feature(bytes_list=tf.train.BytesList(value=['jpeg'.encode('utf8')])),
'image/object/bbox/xmin': tf.train.Feature(float_list=tf.train.FloatList(value=xmin)),
'image/object/bbox/xmax': tf.train.Feature(float_list=tf.train.FloatList(value=xmax)),
'image/object/bbox/ymin': tf.train.Feature(float_list=tf.train.FloatList(value=ymin)),
'image/object/bbox/ymax': tf.train.Feature(float_list=tf.train.FloatList(value=ymax)),
'image/object/class/text': tf.train.Feature(bytes_list=tf.train.BytesList(value=classes_text)),
'image/object/class/label': tf.train.Feature(int64_list=tf.train.Int64List(value=classes)),
'image/object/difficult': tf.train.Feature(int64_list=tf.train.Int64List(value=difficult_obj)),
'image/object/truncated': tf.train.Feature(int64_list=tf.train.Int64List(value=truncated)),
'image/object/view': tf.train.Feature(bytes_list=tf.train.BytesList(value=views)),
}))
return example
def parse_xml(xml):
if not len(xml):
return {xml.tag: xml.text}
result = {}
for child in xml:
child_result = parse_xml(child)
if child.tag != 'object':
result[child.tag] = child_result[child.tag]
else:
if child.tag not in result:
result[child.tag] = []
result[child.tag].append(child_result[child.tag])
return {xml.tag: result}
def main(_argv):
class_map = {name: idx for idx, name in enumerate(
open(FLAGS.classes).read().splitlines())}
logging.info("Class mapping loaded: %s", class_map)
writer = tf.io.TFRecordWriter(FLAGS.output_file)
image_list = open(os.path.join(
FLAGS.data_dir, 'ImageSets', 'Main', '%s.txt' % FLAGS.split)).read().splitlines()
logging.info("Image list loaded: %d", len(image_list))
for name in tqdm.tqdm(image_list):
annotation_xml = os.path.join(
FLAGS.data_dir, 'Annotations', name + '.xml')
annotation_xml = lxml.etree.fromstring(open(annotation_xml).read())
annotation = parse_xml(annotation_xml)['annotation']
tf_example = build_example(annotation, class_map)
writer.write(tf_example.SerializeToString())
writer.close()
logging.info("Done")
if __name__ == '__main__':
app.run(main)
from absl import app, flags, logging
from absl.flags import FLAGS
import tensorflow as tf
import numpy as np
import cv2
import time
from tensorflow.keras.callbacks import (
ReduceLROnPlateau,
EarlyStopping,
ModelCheckpoint,
TensorBoard
)
from yolov3_tf2.models import (
YoloV3, YoloV3Tiny, YoloLoss,
yolo_anchors, yolo_anchor_masks,
yolo_tiny_anchors, yolo_tiny_anchor_masks
)
from yolov3_tf2.utils import freeze_all
import yolov3_tf2.dataset as dataset
flags.DEFINE_string('dataset', '', 'path to dataset')
flags.DEFINE_string('val_dataset', '', 'path to validation dataset')
flags.DEFINE_boolean('tiny', False, 'yolov3 or yolov3-tiny')
flags.DEFINE_string('weights', './checkpoints/yolov3.tf',
'path to weights file')
flags.DEFINE_string('classes', './data/coco.names', 'path to classes file')
flags.DEFINE_enum('mode', 'fit', ['fit', 'eager_fit', 'eager_tf'],
'fit: model.fit, '
'eager_fit: model.fit(run_eagerly=True), '
'eager_tf: custom GradientTape')
flags.DEFINE_enum('transfer', 'none',
['none', 'darknet', 'no_output', 'frozen', 'fine_tune'],
'none: Training from scratch, '
'darknet: Transfer darknet, '
'no_output: Transfer all but output, '
'frozen: Transfer and freeze all, '
'fine_tune: Transfer all and freeze darknet only')
flags.DEFINE_integer('size', 416, 'image size')
flags.DEFINE_integer('epochs', 2, 'number of epochs')
flags.DEFINE_integer('batch_size', 8, 'batch size')
flags.DEFINE_float('learning_rate', 1e-3, 'learning rate')
flags.DEFINE_integer('num_classes', 80, 'number of classes in the model')
flags.DEFINE_integer('weights_num_classes', None, 'specify num class for `weights` file if different, '
'useful in transfer learning with different number of classes')
flags.DEFINE_boolean('multi_gpu', False, 'Use if wishing to train with more than 1 GPU.')
def setup_model():
if FLAGS.tiny:
model = YoloV3Tiny(FLAGS.size, training=True,
classes=FLAGS.num_classes)
anchors = yolo_tiny_anchors
anchor_masks = yolo_tiny_anchor_masks
else:
model = YoloV3(FLAGS.size, training=True, classes=FLAGS.num_classes)
anchors = yolo_anchors
anchor_masks = yolo_anchor_masks
# Configure the model for transfer learning
if FLAGS.transfer == 'none':
pass # Nothing to do
elif FLAGS.transfer in ['darknet', 'no_output']:
# Darknet transfer is a special case that works
# with incompatible number of classes
# reset top layers
if FLAGS.tiny:
model_pretrained = YoloV3Tiny(
FLAGS.size, training=True, classes=FLAGS.weights_num_classes or FLAGS.num_classes)
else:
model_pretrained = YoloV3(
FLAGS.size, training=True, classes=FLAGS.weights_num_classes or FLAGS.num_classes)
model_pretrained.load_weights(FLAGS.weights)
if FLAGS.transfer == 'darknet':
model.get_layer('yolo_darknet').set_weights(
model_pretrained.get_layer('yolo_darknet').get_weights())
freeze_all(model.get_layer('yolo_darknet'))
elif FLAGS.transfer == 'no_output':
for l in model.layers:
if not l.name.startswith('yolo_output'):
l.set_weights(model_pretrained.get_layer(
l.name).get_weights())
freeze_all(l)
else:
# All other transfer require matching classes
model.load_weights(FLAGS.weights)
if FLAGS.transfer == 'fine_tune':
# freeze darknet and fine tune other layers
darknet = model.get_layer('yolo_darknet')
freeze_all(darknet)
elif FLAGS.transfer == 'frozen':
# freeze everything
freeze_all(model)
optimizer = tf.keras.optimizers.Adam(lr=FLAGS.learning_rate)
loss = [YoloLoss(anchors[mask], classes=FLAGS.num_classes)
for mask in anchor_masks]
model.compile(optimizer=optimizer, loss=loss,
run_eagerly=(FLAGS.mode == 'eager_fit'))
return model, optimizer, loss, anchors, anchor_masks
def main(_argv):
physical_devices = tf.config.experimental.list_physical_devices('GPU')
# Setup
if FLAGS.multi_gpu:
for physical_device in physical_devices:
tf.config.experimental.set_memory_growth(physical_device, True)
strategy = tf.distribute.MirroredStrategy()
print('Number of devices: {}'.format(strategy.num_replicas_in_sync))
BATCH_SIZE = FLAGS.batch_size * strategy.num_replicas_in_sync
FLAGS.batch_size = BATCH_SIZE
with strategy.scope():
model, optimizer, loss, anchors, anchor_masks = setup_model()
else:
model, optimizer, loss, anchors, anchor_masks = setup_model()
if FLAGS.dataset:
train_dataset = dataset.load_tfrecord_dataset(
FLAGS.dataset, FLAGS.classes, FLAGS.size)
else:
train_dataset = dataset.load_fake_dataset()
train_dataset = train_dataset.shuffle(buffer_size=512)
train_dataset = train_dataset.batch(FLAGS.batch_size)
train_dataset = train_dataset.map(lambda x, y: (
dataset.transform_images(x, FLAGS.size),
dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))
train_dataset = train_dataset.prefetch(
buffer_size=tf.data.experimental.AUTOTUNE)
if FLAGS.val_dataset:
val_dataset = dataset.load_tfrecord_dataset(
FLAGS.val_dataset, FLAGS.classes, FLAGS.size)
else:
val_dataset = dataset.load_fake_dataset()
val_dataset = val_dataset.batch(FLAGS.batch_size)
val_dataset = val_dataset.map(lambda x, y: (
dataset.transform_images(x, FLAGS.size),
dataset.transform_targets(y, anchors, anchor_masks, FLAGS.size)))
if FLAGS.mode == 'eager_tf':
# Eager mode is great for debugging
# Non eager graph mode is recommended for real training
avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32)
for epoch in range(1, FLAGS.epochs + 1):
for batch, (images, labels) in enumerate(train_dataset):
with tf.GradientTape() as tape:
outputs = model(images, training=True)
regularization_loss = tf.reduce_sum(model.losses)
pred_loss = []
for output, label, loss_fn in zip(outputs, labels, loss):
pred_loss.append(loss_fn(label, output))
total_loss = tf.reduce_sum(pred_loss) + regularization_loss
grads = tape.gradient(total_loss, model.trainable_variables)
optimizer.apply_gradients(
zip(grads, model.trainable_variables))
logging.info("{}_train_{}, {}, {}".format(
epoch, batch, total_loss.numpy(),
list(map(lambda x: np.sum(x.numpy()), pred_loss))))
avg_loss.update_state(total_loss)
for batch, (images, labels) in enumerate(val_dataset):
outputs = model(images)
regularization_loss = tf.reduce_sum(model.losses)
pred_loss = []
for output, label, loss_fn in zip(outputs, labels, loss):
pred_loss.append(loss_fn(label, output))
total_loss = tf.reduce_sum(pred_loss) + regularization_loss
logging.info("{}_val_{}, {}, {}".format(
epoch, batch, total_loss.numpy(),
list(map(lambda x: np.sum(x.numpy()), pred_loss))))
avg_val_loss.update_state(total_loss)
logging.info("{}, train: {}, val: {}".format(
epoch,
avg_loss.result().numpy(),
avg_val_loss.result().numpy()))
avg_loss.reset_states()
avg_val_loss.reset_states()
model.save_weights(
'checkpoints/yolov3_train_{}.tf'.format(epoch))
else:
callbacks = [
ReduceLROnPlateau(verbose=2),
EarlyStopping(patience=3, verbose=2),
ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf',
verbose=2, save_weights_only=True),
TensorBoard(log_dir='logs')
]
start_time = time.time()
history = model.fit(train_dataset,
epochs=FLAGS.epochs,
callbacks=callbacks,
validation_data=val_dataset)
end_time = time.time() - start_time
print(f'Total Training Time: {end_time}')
if __name__ == '__main__':
try:
app.run(main)
except SystemExit:
pass
Metadata-Version: 2.1
Name: yolov3-tf2
Version: 0.1
Summary: UNKNOWN
Home-page: https://github.com/zzh8829/yolov3-tf2
Author: Zihao Zhang
Author-email: zzh8829@gmail.com
License: UNKNOWN
Platform: UNKNOWN
License-File: LICENSE
UNKNOWN
LICENSE
README.md
setup.py
yolov3_tf2/__init__.py
yolov3_tf2/dataset.py
yolov3_tf2/models.py
yolov3_tf2/utils.py
yolov3_tf2.egg-info/PKG-INFO
yolov3_tf2.egg-info/SOURCES.txt
yolov3_tf2.egg-info/dependency_links.txt
yolov3_tf2.egg-info/top_level.txt
\ No newline at end of file
import tensorflow as tf
from absl.flags import FLAGS
@tf.function
def transform_targets_for_output(y_true, grid_size, anchor_idxs):
# y_true: (N, boxes, (x1, y1, x2, y2, class, best_anchor))
N = tf.shape(y_true)[0]
# y_true_out: (N, grid, grid, anchors, [x1, y1, x2, y2, obj, class])
y_true_out = tf.zeros(
(N, grid_size, grid_size, tf.shape(anchor_idxs)[0], 6))
anchor_idxs = tf.cast(anchor_idxs, tf.int32)
indexes = tf.TensorArray(tf.int32, 1, dynamic_size=True)
updates = tf.TensorArray(tf.float32, 1, dynamic_size=True)
idx = 0
for i in tf.range(N):
for j in tf.range(tf.shape(y_true)[1]):
if tf.equal(y_true[i][j][2], 0):
continue
anchor_eq = tf.equal(
anchor_idxs, tf.cast(y_true[i][j][5], tf.int32))
if tf.reduce_any(anchor_eq):
box = y_true[i][j][0:4]
box_xy = (y_true[i][j][0:2] + y_true[i][j][2:4]) / 2
anchor_idx = tf.cast(tf.where(anchor_eq), tf.int32)
grid_xy = tf.cast(box_xy // (1/grid_size), tf.int32)
# grid[y][x][anchor] = (tx, ty, bw, bh, obj, class)
indexes = indexes.write(
idx, [i, grid_xy[1], grid_xy[0], anchor_idx[0][0]])
updates = updates.write(
idx, [box[0], box[1], box[2], box[3], 1, y_true[i][j][4]])
idx += 1
# tf.print(indexes.stack())
# tf.print(updates.stack())
return tf.tensor_scatter_nd_update(
y_true_out, indexes.stack(), updates.stack())
def transform_targets(y_train, anchors, anchor_masks, size):
y_outs = []
grid_size = size // 32
# calculate anchor index for true boxes
anchors = tf.cast(anchors, tf.float32)
anchor_area = anchors[..., 0] * anchors[..., 1]
box_wh = y_train[..., 2:4] - y_train[..., 0:2]
box_wh = tf.tile(tf.expand_dims(box_wh, -2),
(1, 1, tf.shape(anchors)[0], 1))
box_area = box_wh[..., 0] * box_wh[..., 1]
intersection = tf.minimum(box_wh[..., 0], anchors[..., 0]) * \
tf.minimum(box_wh[..., 1], anchors[..., 1])
iou = intersection / (box_area + anchor_area - intersection)
anchor_idx = tf.cast(tf.argmax(iou, axis=-1), tf.float32)
anchor_idx = tf.expand_dims(anchor_idx, axis=-1)
y_train = tf.concat([y_train, anchor_idx], axis=-1)
for anchor_idxs in anchor_masks:
y_outs.append(transform_targets_for_output(
y_train, grid_size, anchor_idxs))
grid_size *= 2
return tuple(y_outs)
def transform_images(x_train, size):
x_train = tf.image.resize(x_train, (size, size))
x_train = x_train / 255
return x_train
# https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/using_your_own_dataset.md#conversion-script-outline-conversion-script-outline
# Commented out fields are not required in our project
IMAGE_FEATURE_MAP = {
# 'image/width': tf.io.FixedLenFeature([], tf.int64),
# 'image/height': tf.io.FixedLenFeature([], tf.int64),
# 'image/filename': tf.io.FixedLenFeature([], tf.string),
# 'image/source_id': tf.io.FixedLenFeature([], tf.string),
# 'image/key/sha256': tf.io.FixedLenFeature([], tf.string),
'image/encoded': tf.io.FixedLenFeature([], tf.string),
# 'image/format': tf.io.FixedLenFeature([], tf.string),
'image/object/bbox/xmin': tf.io.VarLenFeature(tf.float32),
'image/object/bbox/ymin': tf.io.VarLenFeature(tf.float32),
'image/object/bbox/xmax': tf.io.VarLenFeature(tf.float32),
'image/object/bbox/ymax': tf.io.VarLenFeature(tf.float32),
'image/object/class/text': tf.io.VarLenFeature(tf.string),
# 'image/object/class/label': tf.io.VarLenFeature(tf.int64),
# 'image/object/difficult': tf.io.VarLenFeature(tf.int64),
# 'image/object/truncated': tf.io.VarLenFeature(tf.int64),
# 'image/object/view': tf.io.VarLenFeature(tf.string),
}
def parse_tfrecord(tfrecord, class_table, size):
x = tf.io.parse_single_example(tfrecord, IMAGE_FEATURE_MAP)
x_train = tf.image.decode_jpeg(x['image/encoded'], channels=3)
x_train = tf.image.resize(x_train, (size, size))
class_text = tf.sparse.to_dense(
x['image/object/class/text'], default_value='')
labels = tf.cast(class_table.lookup(class_text), tf.float32)
y_train = tf.stack([tf.sparse.to_dense(x['image/object/bbox/xmin']),
tf.sparse.to_dense(x['image/object/bbox/ymin']),
tf.sparse.to_dense(x['image/object/bbox/xmax']),
tf.sparse.to_dense(x['image/object/bbox/ymax']),
labels], axis=1)
paddings = [[0, FLAGS.yolo_max_boxes - tf.shape(y_train)[0]], [0, 0]]
y_train = tf.pad(y_train, paddings)
return x_train, y_train
def load_tfrecord_dataset(file_pattern, class_file, size=416):
LINE_NUMBER = -1 # TODO: use tf.lookup.TextFileIndex.LINE_NUMBER
class_table = tf.lookup.StaticHashTable(tf.lookup.TextFileInitializer(
class_file, tf.string, 0, tf.int64, LINE_NUMBER, delimiter="\n"), -1)
files = tf.data.Dataset.list_files(file_pattern)
dataset = files.flat_map(tf.data.TFRecordDataset)
return dataset.map(lambda x: parse_tfrecord(x, class_table, size))
def load_fake_dataset():
x_train = tf.image.decode_jpeg(
open('./data/girl.png', 'rb').read(), channels=3)
x_train = tf.expand_dims(x_train, axis=0)
labels = [
[0.18494931, 0.03049111, 0.9435849, 0.96302897, 0],
[0.01586703, 0.35938117, 0.17582396, 0.6069674, 56],
[0.09158827, 0.48252046, 0.26967454, 0.6403017, 67]
] + [[0, 0, 0, 0, 0]] * 5
y_train = tf.convert_to_tensor(labels, tf.float32)
y_train = tf.expand_dims(y_train, axis=0)
return tf.data.Dataset.from_tensor_slices((x_train, y_train))
from absl import flags
from absl.flags import FLAGS
import numpy as np
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.layers import (
Add,
Concatenate,
Conv2D,
Input,
Lambda,
LeakyReLU,
MaxPool2D,
UpSampling2D,
ZeroPadding2D,
BatchNormalization,
)
from tensorflow.keras.regularizers import l2
from tensorflow.keras.losses import (
binary_crossentropy,
sparse_categorical_crossentropy
)
from .utils import broadcast_iou
flags.DEFINE_integer('yolo_max_boxes', 100,
'maximum number of boxes per image')
flags.DEFINE_float('yolo_iou_threshold', 0.5, 'iou threshold')
flags.DEFINE_float('yolo_score_threshold', 0.5, 'score threshold')
yolo_anchors = np.array([(10, 13), (16, 30), (33, 23), (30, 61), (62, 45),
(59, 119), (116, 90), (156, 198), (373, 326)],
np.float32) / 416
yolo_anchor_masks = np.array([[6, 7, 8], [3, 4, 5], [0, 1, 2]])
yolo_tiny_anchors = np.array([(10, 14), (23, 27), (37, 58),
(81, 82), (135, 169), (344, 319)],
np.float32) / 416
yolo_tiny_anchor_masks = np.array([[3, 4, 5], [0, 1, 2]])
def DarknetConv(x, filters, size, strides=1, batch_norm=True):
if strides == 1:
padding = 'same'
else:
x = ZeroPadding2D(((1, 0), (1, 0)))(x) # top left half-padding
padding = 'valid'
x = Conv2D(filters=filters, kernel_size=size,
strides=strides, padding=padding,
use_bias=not batch_norm, kernel_regularizer=l2(0.0005))(x)
if batch_norm:
x = BatchNormalization()(x)
x = LeakyReLU(alpha=0.1)(x)
return x
def DarknetResidual(x, filters):
prev = x
x = DarknetConv(x, filters // 2, 1)
x = DarknetConv(x, filters, 3)
x = Add()([prev, x])
return x
def DarknetBlock(x, filters, blocks):
x = DarknetConv(x, filters, 3, strides=2)
for _ in range(blocks):
x = DarknetResidual(x, filters)
return x
def Darknet(name=None):
x = inputs = Input([None, None, 3])
x = DarknetConv(x, 32, 3)
x = DarknetBlock(x, 64, 1)
x = DarknetBlock(x, 128, 2) # skip connection
x = x_36 = DarknetBlock(x, 256, 8) # skip connection
x = x_61 = DarknetBlock(x, 512, 8)
x = DarknetBlock(x, 1024, 4)
return tf.keras.Model(inputs, (x_36, x_61, x), name=name)
def DarknetTiny(name=None):
x = inputs = Input([None, None, 3])
x = DarknetConv(x, 16, 3)
x = MaxPool2D(2, 2, 'same')(x)
x = DarknetConv(x, 32, 3)
x = MaxPool2D(2, 2, 'same')(x)
x = DarknetConv(x, 64, 3)
x = MaxPool2D(2, 2, 'same')(x)
x = DarknetConv(x, 128, 3)
x = MaxPool2D(2, 2, 'same')(x)
x = x_8 = DarknetConv(x, 256, 3) # skip connection
x = MaxPool2D(2, 2, 'same')(x)
x = DarknetConv(x, 512, 3)
x = MaxPool2D(2, 1, 'same')(x)
x = DarknetConv(x, 1024, 3)
return tf.keras.Model(inputs, (x_8, x), name=name)
def YoloConv(filters, name=None):
def yolo_conv(x_in):
if isinstance(x_in, tuple):
inputs = Input(x_in[0].shape[1:]), Input(x_in[1].shape[1:])
x, x_skip = inputs
# concat with skip connection
x = DarknetConv(x, filters, 1)
x = UpSampling2D(2)(x)
x = Concatenate()([x, x_skip])
else:
x = inputs = Input(x_in.shape[1:])
x = DarknetConv(x, filters, 1)
x = DarknetConv(x, filters * 2, 3)
x = DarknetConv(x, filters, 1)
x = DarknetConv(x, filters * 2, 3)
x = DarknetConv(x, filters, 1)
return Model(inputs, x, name=name)(x_in)
return yolo_conv
def YoloConvTiny(filters, name=None):
def yolo_conv(x_in):
if isinstance(x_in, tuple):
inputs = Input(x_in[0].shape[1:]), Input(x_in[1].shape[1:])
x, x_skip = inputs
# concat with skip connection
x = DarknetConv(x, filters, 1)
x = UpSampling2D(2)(x)
x = Concatenate()([x, x_skip])
else:
x = inputs = Input(x_in.shape[1:])
x = DarknetConv(x, filters, 1)
return Model(inputs, x, name=name)(x_in)
return yolo_conv
def YoloOutput(filters, anchors, classes, name=None):
def yolo_output(x_in):
x = inputs = Input(x_in.shape[1:])
x = DarknetConv(x, filters * 2, 3)
x = DarknetConv(x, anchors * (classes + 5), 1, batch_norm=False)
x = Lambda(lambda x: tf.reshape(x, (-1, tf.shape(x)[1], tf.shape(x)[2],
anchors, classes + 5)))(x)
return tf.keras.Model(inputs, x, name=name)(x_in)
return yolo_output
# As tensorflow lite doesn't support tf.size used in tf.meshgrid,
# we reimplemented a simple meshgrid function that use basic tf function.
def _meshgrid(n_a, n_b):
return [
tf.reshape(tf.tile(tf.range(n_a), [n_b]), (n_b, n_a)),
tf.reshape(tf.repeat(tf.range(n_b), n_a), (n_b, n_a))
]
def yolo_boxes(pred, anchors, classes):
# pred: (batch_size, grid, grid, anchors, (x, y, w, h, obj, ...classes))
grid_size = tf.shape(pred)[1:3]
box_xy, box_wh, objectness, class_probs = tf.split(
pred, (2, 2, 1, classes), axis=-1)
box_xy = tf.sigmoid(box_xy)
objectness = tf.sigmoid(objectness)
class_probs = tf.sigmoid(class_probs)
pred_box = tf.concat((box_xy, box_wh), axis=-1) # original xywh for loss
# !!! grid[x][y] == (y, x)
grid = _meshgrid(grid_size[1],grid_size[0])
grid = tf.expand_dims(tf.stack(grid, axis=-1), axis=2) # [gx, gy, 1, 2]
box_xy = (box_xy + tf.cast(grid, tf.float32)) / \
tf.cast(grid_size, tf.float32)
box_wh = tf.exp(box_wh) * anchors
box_x1y1 = box_xy - box_wh / 2
box_x2y2 = box_xy + box_wh / 2
bbox = tf.concat([box_x1y1, box_x2y2], axis=-1)
return bbox, objectness, class_probs, pred_box
def yolo_nms(outputs, anchors, masks, classes):
# boxes, conf, type
b, c, t = [], [], []
for o in outputs:
b.append(tf.reshape(o[0], (tf.shape(o[0])[0], -1, tf.shape(o[0])[-1])))
c.append(tf.reshape(o[1], (tf.shape(o[1])[0], -1, tf.shape(o[1])[-1])))
t.append(tf.reshape(o[2], (tf.shape(o[2])[0], -1, tf.shape(o[2])[-1])))
bbox = tf.concat(b, axis=1)
confidence = tf.concat(c, axis=1)
class_probs = tf.concat(t, axis=1)
# If we only have one class, do not multiply by class_prob (always 0.5)
if classes == 1:
scores = confidence
else:
scores = confidence * class_probs
dscores = tf.squeeze(scores, axis=0)
scores = tf.reduce_max(dscores,[1])
bbox = tf.reshape(bbox,(-1,4))
classes = tf.argmax(dscores,1)
selected_indices, selected_scores = tf.image.non_max_suppression_with_scores(
boxes=bbox,
scores=scores,
max_output_size=FLAGS.yolo_max_boxes,
iou_threshold=FLAGS.yolo_iou_threshold,
score_threshold=FLAGS.yolo_score_threshold,
soft_nms_sigma=0.5
)
num_valid_nms_boxes = tf.shape(selected_indices)[0]
selected_indices = tf.concat([selected_indices,tf.zeros(FLAGS.yolo_max_boxes-num_valid_nms_boxes, tf.int32)], 0)
selected_scores = tf.concat([selected_scores,tf.zeros(FLAGS.yolo_max_boxes-num_valid_nms_boxes,tf.float32)], -1)
boxes=tf.gather(bbox, selected_indices)
boxes = tf.expand_dims(boxes, axis=0)
scores=selected_scores
scores = tf.expand_dims(scores, axis=0)
classes = tf.gather(classes,selected_indices)
classes = tf.expand_dims(classes, axis=0)
valid_detections=num_valid_nms_boxes
valid_detections = tf.expand_dims(valid_detections, axis=0)
return boxes, scores, classes, valid_detections
def YoloV3(size=None, channels=3, anchors=yolo_anchors,
masks=yolo_anchor_masks, classes=80, training=False):
x = inputs = Input([size, size, channels], name='input')
x_36, x_61, x = Darknet(name='yolo_darknet')(x)
x = YoloConv(512, name='yolo_conv_0')(x)
output_0 = YoloOutput(512, len(masks[0]), classes, name='yolo_output_0')(x)
x = YoloConv(256, name='yolo_conv_1')((x, x_61))
output_1 = YoloOutput(256, len(masks[1]), classes, name='yolo_output_1')(x)
x = YoloConv(128, name='yolo_conv_2')((x, x_36))
output_2 = YoloOutput(128, len(masks[2]), classes, name='yolo_output_2')(x)
if training:
return Model(inputs, (output_0, output_1, output_2), name='yolov3')
boxes_0 = Lambda(lambda x: yolo_boxes(x, anchors[masks[0]], classes),
name='yolo_boxes_0')(output_0)
boxes_1 = Lambda(lambda x: yolo_boxes(x, anchors[masks[1]], classes),
name='yolo_boxes_1')(output_1)
boxes_2 = Lambda(lambda x: yolo_boxes(x, anchors[masks[2]], classes),
name='yolo_boxes_2')(output_2)
outputs = Lambda(lambda x: yolo_nms(x, anchors, masks, classes),
name='yolo_nms')((boxes_0[:3], boxes_1[:3], boxes_2[:3]))
return Model(inputs, outputs, name='yolov3')
def YoloV3Tiny(size=None, channels=3, anchors=yolo_tiny_anchors,
masks=yolo_tiny_anchor_masks, classes=80, training=False):
x = inputs = Input([size, size, channels], name='input')
x_8, x = DarknetTiny(name='yolo_darknet')(x)
x = YoloConvTiny(256, name='yolo_conv_0')(x)
output_0 = YoloOutput(256, len(masks[0]), classes, name='yolo_output_0')(x)
x = YoloConvTiny(128, name='yolo_conv_1')((x, x_8))
output_1 = YoloOutput(128, len(masks[1]), classes, name='yolo_output_1')(x)
if training:
return Model(inputs, (output_0, output_1), name='yolov3')
boxes_0 = Lambda(lambda x: yolo_boxes(x, anchors[masks[0]], classes),
name='yolo_boxes_0')(output_0)
boxes_1 = Lambda(lambda x: yolo_boxes(x, anchors[masks[1]], classes),
name='yolo_boxes_1')(output_1)
outputs = Lambda(lambda x: yolo_nms(x, anchors, masks, classes),
name='yolo_nms')((boxes_0[:3], boxes_1[:3]))
return Model(inputs, outputs, name='yolov3_tiny')
def YoloLoss(anchors, classes=80, ignore_thresh=0.5):
def yolo_loss(y_true, y_pred):
# 1. transform all pred outputs
# y_pred: (batch_size, grid, grid, anchors, (x, y, w, h, obj, ...cls))
pred_box, pred_obj, pred_class, pred_xywh = yolo_boxes(
y_pred, anchors, classes)
pred_xy = pred_xywh[..., 0:2]
pred_wh = pred_xywh[..., 2:4]
# 2. transform all true outputs
# y_true: (batch_size, grid, grid, anchors, (x1, y1, x2, y2, obj, cls))
true_box, true_obj, true_class_idx = tf.split(
y_true, (4, 1, 1), axis=-1)
true_xy = (true_box[..., 0:2] + true_box[..., 2:4]) / 2
true_wh = true_box[..., 2:4] - true_box[..., 0:2]
# give higher weights to small boxes
box_loss_scale = 2 - true_wh[..., 0] * true_wh[..., 1]
# 3. inverting the pred box equations
grid_size = tf.shape(y_true)[1]
grid = tf.meshgrid(tf.range(grid_size), tf.range(grid_size))
grid = tf.expand_dims(tf.stack(grid, axis=-1), axis=2)
true_xy = true_xy * tf.cast(grid_size, tf.float32) - \
tf.cast(grid, tf.float32)
true_wh = tf.math.log(true_wh / anchors)
true_wh = tf.where(tf.math.is_inf(true_wh),
tf.zeros_like(true_wh), true_wh)
# 4. calculate all masks
obj_mask = tf.squeeze(true_obj, -1)
# ignore false positive when iou is over threshold
best_iou = tf.map_fn(
lambda x: tf.reduce_max(broadcast_iou(x[0], tf.boolean_mask(
x[1], tf.cast(x[2], tf.bool))), axis=-1),
(pred_box, true_box, obj_mask),
tf.float32)
ignore_mask = tf.cast(best_iou < ignore_thresh, tf.float32)
# 5. calculate all losses
xy_loss = obj_mask * box_loss_scale * \
tf.reduce_sum(tf.square(true_xy - pred_xy), axis=-1)
wh_loss = obj_mask * box_loss_scale * \
tf.reduce_sum(tf.square(true_wh - pred_wh), axis=-1)
obj_loss = binary_crossentropy(true_obj, pred_obj)
obj_loss = obj_mask * obj_loss + \
(1 - obj_mask) * ignore_mask * obj_loss
# TODO: use binary_crossentropy instead
class_loss = obj_mask * sparse_categorical_crossentropy(
true_class_idx, pred_class)
# 6. sum over (batch, gridx, gridy, anchors) => (batch, 1)
xy_loss = tf.reduce_sum(xy_loss, axis=(1, 2, 3))
wh_loss = tf.reduce_sum(wh_loss, axis=(1, 2, 3))
obj_loss = tf.reduce_sum(obj_loss, axis=(1, 2, 3))
class_loss = tf.reduce_sum(class_loss, axis=(1, 2, 3))
return xy_loss + wh_loss + obj_loss + class_loss
return yolo_loss
from absl import logging
import numpy as np
import tensorflow as tf
import cv2
YOLOV3_LAYER_LIST = [
'yolo_darknet',
'yolo_conv_0',
'yolo_output_0',
'yolo_conv_1',
'yolo_output_1',
'yolo_conv_2',
'yolo_output_2',
]
YOLOV3_TINY_LAYER_LIST = [
'yolo_darknet',
'yolo_conv_0',
'yolo_output_0',
'yolo_conv_1',
'yolo_output_1',
]
def load_darknet_weights(model, weights_file, tiny=False):
wf = open(weights_file, 'rb')
major, minor, revision, seen, _ = np.fromfile(wf, dtype=np.int32, count=5)
if tiny:
layers = YOLOV3_TINY_LAYER_LIST
else:
layers = YOLOV3_LAYER_LIST
for layer_name in layers:
sub_model = model.get_layer(layer_name)
for i, layer in enumerate(sub_model.layers):
if not layer.name.startswith('conv2d'):
continue
batch_norm = None
if i + 1 < len(sub_model.layers) and \
sub_model.layers[i + 1].name.startswith('batch_norm'):
batch_norm = sub_model.layers[i + 1]
logging.info("{}/{} {}".format(
sub_model.name, layer.name, 'bn' if batch_norm else 'bias'))
filters = layer.filters
size = layer.kernel_size[0]
in_dim = layer.get_input_shape_at(0)[-1]
if batch_norm is None:
conv_bias = np.fromfile(wf, dtype=np.float32, count=filters)
else:
# darknet [beta, gamma, mean, variance]
bn_weights = np.fromfile(
wf, dtype=np.float32, count=4 * filters)
# tf [gamma, beta, mean, variance]
bn_weights = bn_weights.reshape((4, filters))[[1, 0, 2, 3]]
# darknet shape (out_dim, in_dim, height, width)
conv_shape = (filters, in_dim, size, size)
conv_weights = np.fromfile(
wf, dtype=np.float32, count=np.product(conv_shape))
# tf shape (height, width, in_dim, out_dim)
conv_weights = conv_weights.reshape(
conv_shape).transpose([2, 3, 1, 0])
if batch_norm is None:
layer.set_weights([conv_weights, conv_bias])
else:
layer.set_weights([conv_weights])
batch_norm.set_weights(bn_weights)
assert len(wf.read()) == 0, 'failed to read all data'
wf.close()
def broadcast_iou(box_1, box_2):
# box_1: (..., (x1, y1, x2, y2))
# box_2: (N, (x1, y1, x2, y2))
# broadcast boxes
box_1 = tf.expand_dims(box_1, -2)
box_2 = tf.expand_dims(box_2, 0)
# new_shape: (..., N, (x1, y1, x2, y2))
new_shape = tf.broadcast_dynamic_shape(tf.shape(box_1), tf.shape(box_2))
box_1 = tf.broadcast_to(box_1, new_shape)
box_2 = tf.broadcast_to(box_2, new_shape)
int_w = tf.maximum(tf.minimum(box_1[..., 2], box_2[..., 2]) -
tf.maximum(box_1[..., 0], box_2[..., 0]), 0)
int_h = tf.maximum(tf.minimum(box_1[..., 3], box_2[..., 3]) -
tf.maximum(box_1[..., 1], box_2[..., 1]), 0)
int_area = int_w * int_h
box_1_area = (box_1[..., 2] - box_1[..., 0]) * \
(box_1[..., 3] - box_1[..., 1])
box_2_area = (box_2[..., 2] - box_2[..., 0]) * \
(box_2[..., 3] - box_2[..., 1])
return int_area / (box_1_area + box_2_area - int_area)
def draw_outputs(img, outputs, class_names):
boxes, objectness, classes, nums = outputs
boxes, objectness, classes, nums = boxes[0], objectness[0], classes[0], nums[0]
wh = np.flip(img.shape[0:2])
for i in range(nums):
x1y1 = tuple((np.array(boxes[i][0:2]) * wh).astype(np.int32))
x2y2 = tuple((np.array(boxes[i][2:4]) * wh).astype(np.int32))
img = cv2.rectangle(img, x1y1, x2y2, (255, 0, 0), 2)
img = cv2.putText(img, '{} {:.4f}'.format(
class_names[int(classes[i])], objectness[i]),
x1y1, cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
return img
def draw_labels(x, y, class_names):
img = x.numpy()
boxes, classes = tf.split(y, (4, 1), axis=-1)
classes = classes[..., 0]
wh = np.flip(img.shape[0:2])
for i in range(len(boxes)):
x1y1 = tuple((np.array(boxes[i][0:2]) * wh).astype(np.int32))
x2y2 = tuple((np.array(boxes[i][2:4]) * wh).astype(np.int32))
img = cv2.rectangle(img, x1y1, x2y2, (255, 0, 0), 2)
img = cv2.putText(img, class_names[classes[i]],
x1y1, cv2.FONT_HERSHEY_COMPLEX_SMALL,
1, (0, 0, 255), 2)
return img
def freeze_all(model, frozen=True):
model.trainable = not frozen
if isinstance(model, tf.keras.Model):
for l in model.layers:
freeze_all(l, frozen)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment