Commit c320b6ef authored by zhenyi's avatar zhenyi
Browse files

tf2 detection

parent 0fc002df
import time
from absl import app, flags, logging
from absl.flags import FLAGS
import cv2
import numpy as np
import tensorflow as tf
from yolov3_tf2.models import (
YoloV3, YoloV3Tiny
)
from yolov3_tf2.dataset import transform_images, load_tfrecord_dataset
from yolov3_tf2.utils import draw_outputs
flags.DEFINE_string('classes', './data/coco.names', 'path to classes file')
flags.DEFINE_string('weights', './checkpoints/yolov3.tf',
'path to weights file')
flags.DEFINE_boolean('tiny', False, 'yolov3 or yolov3-tiny')
flags.DEFINE_integer('size', 416, 'resize images to')
flags.DEFINE_string('image', './data/girl.png', 'path to input image')
flags.DEFINE_string('tfrecord', None, 'tfrecord instead of image')
flags.DEFINE_string('output', './output.jpg', 'path to output image')
flags.DEFINE_integer('num_classes', 80, 'number of classes in the model')
def main(_argv):
physical_devices = tf.config.experimental.list_physical_devices('GPU')
for physical_device in physical_devices:
tf.config.experimental.set_memory_growth(physical_device, True)
if FLAGS.tiny:
yolo = YoloV3Tiny(classes=FLAGS.num_classes)
else:
yolo = YoloV3(classes=FLAGS.num_classes)
yolo.load_weights(FLAGS.weights).expect_partial()
logging.info('weights loaded')
class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
logging.info('classes loaded')
if FLAGS.tfrecord:
dataset = load_tfrecord_dataset(
FLAGS.tfrecord, FLAGS.classes, FLAGS.size)
dataset = dataset.shuffle(512)
img_raw, _label = next(iter(dataset.take(1)))
else:
img_raw = tf.image.decode_image(
open(FLAGS.image, 'rb').read(), channels=3)
img = tf.expand_dims(img_raw, 0)
img = transform_images(img, FLAGS.size)
t1 = time.time()
boxes, scores, classes, nums = yolo(img)
t2 = time.time()
logging.info('time: {}'.format(t2 - t1))
logging.info('detections:')
for i in range(nums[0]):
logging.info('\t{}, {}, {}'.format(class_names[int(classes[0][i])],
np.array(scores[0][i]),
np.array(boxes[0][i])))
img = cv2.cvtColor(img_raw.numpy(), cv2.COLOR_RGB2BGR)
img = draw_outputs(img, (boxes, scores, classes, nums), class_names)
cv2.imwrite(FLAGS.output, img)
logging.info('output saved to: {}'.format(FLAGS.output))
if __name__ == '__main__':
try:
app.run(main)
except SystemExit:
pass
import time
from absl import app, flags, logging
from absl.flags import FLAGS
import cv2
import tensorflow as tf
from yolov3_tf2.models import (
YoloV3, YoloV3Tiny
)
from yolov3_tf2.dataset import transform_images
from yolov3_tf2.utils import draw_outputs
flags.DEFINE_string('classes', './data/coco.names', 'path to classes file')
flags.DEFINE_string('weights', './checkpoints/yolov3.tf',
'path to weights file')
flags.DEFINE_boolean('tiny', False, 'yolov3 or yolov3-tiny')
flags.DEFINE_integer('size', 416, 'resize images to')
flags.DEFINE_string('video', './data/video.mp4',
'path to video file or number for webcam)')
flags.DEFINE_string('output', None, 'path to output video')
flags.DEFINE_string('output_format', 'XVID', 'codec used in VideoWriter when saving video to file')
flags.DEFINE_integer('num_classes', 80, 'number of classes in the model')
def main(_argv):
physical_devices = tf.config.experimental.list_physical_devices('GPU')
for physical_device in physical_devices:
tf.config.experimental.set_memory_growth(physical_device, True)
if FLAGS.tiny:
yolo = YoloV3Tiny(classes=FLAGS.num_classes)
else:
yolo = YoloV3(classes=FLAGS.num_classes)
yolo.load_weights(FLAGS.weights)
logging.info('weights loaded')
class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
logging.info('classes loaded')
times = []
try:
vid = cv2.VideoCapture(int(FLAGS.video))
except:
vid = cv2.VideoCapture(FLAGS.video)
out = None
if FLAGS.output:
# by default VideoCapture returns float instead of int
width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(vid.get(cv2.CAP_PROP_FPS))
codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))
while True:
_, img = vid.read()
if img is None:
logging.warning("Empty Frame")
time.sleep(0.1)
continue
img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img_in = tf.expand_dims(img_in, 0)
img_in = transform_images(img_in, FLAGS.size)
t1 = time.time()
boxes, scores, classes, nums = yolo.predict(img_in)
t2 = time.time()
times.append(t2-t1)
times = times[-20:]
img = draw_outputs(img, (boxes, scores, classes, nums), class_names)
img = cv2.putText(img, "Time: {:.2f}ms".format(sum(times)/len(times)*1000), (0, 30),
cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
if FLAGS.output:
out.write(img)
cv2.imshow('output', img)
if cv2.waitKey(1) == ord('q'):
break
cv2.destroyAllWindows()
if __name__ == '__main__':
try:
app.run(main)
except SystemExit:
pass
# Training Instruction
## VOC 2012 Dataset from Scratch
Full instruction on how to train using VOC 2012 from scratch
Requirement:
1. Able to detect image using pretrained darknet model
2. Many Gigabytes of Disk Space
3. High Speed Internet Connection Preferred
4. GPU Preferred
### 1. Download Dataset
You can read the full description of dataset [here](http://host.robots.ox.ac.uk/pascal/VOC/)
```bash
wget http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar -O ./data/voc2012_raw.tar
mkdir -p ./data/voc2012_raw
tar -xf ./data/voc2012_raw.tar -C ./data/voc2012_raw
ls ./data/voc2012_raw/VOCdevkit/VOC2012 # Explore the dataset
```
### 2. Transform Dataset
See tools/voc2012.py for implementation, this format is based on [tensorflow object detection API](https://github.com/tensorflow/models/tree/master/research/object_detection). Many fields
are not required, I left them there for compatibility with official API.
```bash
python tools/voc2012.py \
--data_dir './data/voc2012_raw/VOCdevkit/VOC2012' \
--split train \
--output_file ./data/voc2012_train.tfrecord
python tools/voc2012.py \
--data_dir './data/voc2012_raw/VOCdevkit/VOC2012' \
--split val \
--output_file ./data/voc2012_val.tfrecord
```
You can visualize the dataset using this tool
```
python tools/visualize_dataset.py --classes=./data/voc2012.names
```
It will output one random image with label to `output.jpg`
### 3. Training
You can adjust the parameters based on your setup
#### With Transfer Learning
This step requires loading the pretrained darknet (feature extractor) weights.
```
wget https://pjreddie.com/media/files/yolov3.weights -O data/yolov3.weights
python convert.py
python detect.py --image ./data/meme.jpg # Sanity check
python train.py \
--dataset ./data/voc2012_train.tfrecord \
--val_dataset ./data/voc2012_val.tfrecord \
--classes ./data/voc2012.names \
--num_classes 20 \
--mode fit --transfer darknet \
--batch_size 16 \
--epochs 10 \
--weights ./checkpoints/yolov3.tf \
--weights_num_classes 80
```
Original pretrained yolov3 has 80 classes, here we demonstrated how to
do transfer learning on 20 classes.
#### Training from random weights (NOT RECOMMENDED)
Training from scratch is very difficult to converge
The original paper trained darknet
on imagenet before training the whole network as well.
```bash
python train.py \
--dataset ./data/voc2012_train.tfrecord \
--val_dataset ./data/voc2012_val.tfrecord \
--classes ./data/voc2012.names \
--num_classes 20 \
--mode fit --transfer none \
--batch_size 16 \
--epochs 10 \
```
I have tested this works 100% with correct loss and converging over time.
Each epoch takes around 10 minutes on single AWS p2.xlarge (Nvidia K80 GPU) Instance.
You might see warnings or error messages during training, they are not critical dont' worry too much about them.
There might be a long wait time between each epoch becaues we are calculating validation loss.
### 4. Inference
```bash
# detect from images
python detect.py \
--classes ./data/voc2012.names \
--num_classes 20 \
--weights ./checkpoints/yolov3_train_5.tf \
--image ./data/street.jpg
# detect from validation set
python detect.py \
--classes ./data/voc2012.names \
--num_classes 20 \
--weights ./checkpoints/yolov3_train_5.tf \
--tfrecord ./data/voc2012_val.tfrecord
```
You should see some detect objects in the standard output and the visualization at `output.jpg`.
this is just a proof of concept, so it won't be as good as pretrained models.
In my experience, you might need lower score score thershold if you didn't train it enough.
tensorflow==2.5.1
opencv-python==4.2.0.32
lxml
tqdm
-e .
from setuptools import setup
setup(name='yolov3_tf2',
version='0.1',
url='https://github.com/zzh8829/yolov3-tf2',
author='Zihao Zhang',
author_email='zzh8829@gmail.com',
packages=['yolov3_tf2'])
\ No newline at end of file
import time
from absl import app, flags, logging
from absl.flags import FLAGS
import cv2
import numpy as np
import tensorflow as tf
from yolov3_tf2.models import (
YoloV3, YoloV3Tiny
)
from yolov3_tf2.dataset import transform_images
from tensorflow.python.eager import def_function
from tensorflow.python.framework import tensor_spec
from tensorflow.python.util import nest
flags.DEFINE_string('weights', './checkpoints/yolov3.tf',
'path to weights file')
flags.DEFINE_boolean('tiny', False, 'yolov3 or yolov3-tiny')
flags.DEFINE_string('output', './checkpoints/yolov3.tflite',
'path to saved_model')
flags.DEFINE_string('classes', './data/coco.names', 'path to classes file')
flags.DEFINE_string('image', './data/girl.png', 'path to input image')
flags.DEFINE_integer('num_classes', 80, 'number of classes in the model')
flags.DEFINE_integer('size', 416, 'image size')
def main(_argv):
if FLAGS.tiny:
yolo = YoloV3Tiny(size=FLAGS.size, classes=FLAGS.num_classes)
else:
yolo = YoloV3(size=FLAGS.size, classes=FLAGS.num_classes)
yolo.load_weights(FLAGS.weights)
logging.info('weights loaded')
converter = tf.lite.TFLiteConverter.from_keras_model(yolo)
# Fix from https://stackoverflow.com/questions/64490203/tf-lite-non-max-suppression
converter.experimental_new_converter = True
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]
tflite_model = converter.convert()
open(FLAGS.output, 'wb').write(tflite_model)
logging.info("model saved to: {}".format(FLAGS.output))
interpreter = tf.lite.Interpreter(model_path=FLAGS.output)
interpreter.allocate_tensors()
logging.info('tflite model loaded')
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
logging.info('classes loaded')
img = tf.image.decode_image(open(FLAGS.image, 'rb').read(), channels=3)
img = tf.expand_dims(img, 0)
img = transform_images(img, 416)
t1 = time.time()
outputs = interpreter.set_tensor(input_details[0]['index'], img)
interpreter.invoke()
output_data = interpreter.get_tensor(output_details[0]['index'])
print(output_data)
if __name__ == '__main__':
app.run(main)
import time
from absl import app, flags, logging
from absl.flags import FLAGS
import cv2
import numpy as np
import tensorflow as tf
from yolov3_tf2.models import (
YoloV3, YoloV3Tiny
)
from yolov3_tf2.dataset import transform_images
from tensorflow.python.eager import def_function
from tensorflow.python.framework import tensor_spec
from tensorflow.python.util import nest
flags.DEFINE_string('weights', './checkpoints/yolov3.tf',
'path to weights file')
flags.DEFINE_boolean('tiny', False, 'yolov3 or yolov3-tiny')
flags.DEFINE_string('output', './serving/yolov3/1', 'path to saved_model')
flags.DEFINE_string('classes', './data/coco.names', 'path to classes file')
flags.DEFINE_string('image', './data/girl.png', 'path to input image')
flags.DEFINE_integer('num_classes', 80, 'number of classes in the model')
def main(_argv):
if FLAGS.tiny:
yolo = YoloV3Tiny(classes=FLAGS.num_classes)
else:
yolo = YoloV3(classes=FLAGS.num_classes)
yolo.load_weights(FLAGS.weights)
logging.info('weights loaded')
tf.saved_model.save(yolo, FLAGS.output)
logging.info("model saved to: {}".format(FLAGS.output))
model = tf.saved_model.load(FLAGS.output)
infer = model.signatures[tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY]
logging.info(infer.structured_outputs)
class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
logging.info('classes loaded')
img = tf.image.decode_image(open(FLAGS.image, 'rb').read(), channels=3)
img = tf.expand_dims(img, 0)
img = transform_images(img, 416)
t1 = time.time()
outputs = infer(img)
boxes, scores, classes, nums = outputs["yolo_nms"], outputs[
"yolo_nms_1"], outputs["yolo_nms_2"], outputs["yolo_nms_3"]
t2 = time.time()
logging.info('time: {}'.format(t2 - t1))
logging.info('detections:')
for i in range(nums[0]):
logging.info('\t{}, {}, {}'.format(class_names[int(classes[0][i])],
scores[0][i].numpy(),
boxes[0][i].numpy()))
if __name__ == '__main__':
try:
app.run(main)
except SystemExit:
pass
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment