tf2 detection

c320b6ef · zhenyi · 0fc002df · c320b6ef · c320b6ef · c320b6ef
Commit c320b6ef authored Apr 15, 2022 by zhenyi
20 changed files
--- a/TensorFlow2x/ComputeVision/Detection/YOLOv3/detect.py
+++ b/TensorFlow2x/ComputeVision/Detection/YOLOv3/detect.py
+import time
+from absl import app, flags, logging
+from absl.flags import FLAGS
+import cv2
+import numpy as np
+import tensorflow as tf
+from yolov3_tf2.models import (
+    YoloV3, YoloV3Tiny
+)
+from yolov3_tf2.dataset import transform_images, load_tfrecord_dataset
+from yolov3_tf2.utils import draw_outputs
+
+flags.DEFINE_string('classes', './data/coco.names', 'path to classes file')
+flags.DEFINE_string('weights', './checkpoints/yolov3.tf',
+                    'path to weights file')
+flags.DEFINE_boolean('tiny', False, 'yolov3 or yolov3-tiny')
+flags.DEFINE_integer('size', 416, 'resize images to')
+flags.DEFINE_string('image', './data/girl.png', 'path to input image')
+flags.DEFINE_string('tfrecord', None, 'tfrecord instead of image')
+flags.DEFINE_string('output', './output.jpg', 'path to output image')
+flags.DEFINE_integer('num_classes', 80, 'number of classes in the model')
+
+
+def main(_argv):
+    physical_devices = tf.config.experimental.list_physical_devices('GPU')
+    for physical_device in physical_devices:
+        tf.config.experimental.set_memory_growth(physical_device, True)
+
+    if FLAGS.tiny:
+        yolo = YoloV3Tiny(classes=FLAGS.num_classes)
+    else:
+        yolo = YoloV3(classes=FLAGS.num_classes)
+
+    yolo.load_weights(FLAGS.weights).expect_partial()
+    logging.info('weights loaded')
+
+    class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
+    logging.info('classes loaded')
+
+    if FLAGS.tfrecord:
+        dataset = load_tfrecord_dataset(
+            FLAGS.tfrecord, FLAGS.classes, FLAGS.size)
+        dataset = dataset.shuffle(512)
+        img_raw, _label = next(iter(dataset.take(1)))
+    else:
+        img_raw = tf.image.decode_image(
+            open(FLAGS.image, 'rb').read(), channels=3)
+
+    img = tf.expand_dims(img_raw, 0)
+    img = transform_images(img, FLAGS.size)
+
+    t1 = time.time()
+    boxes, scores, classes, nums = yolo(img)
+    t2 = time.time()
+    logging.info('time: {}'.format(t2 - t1))
+
+    logging.info('detections:')
+    for i in range(nums[0]):
+        logging.info('\t{}, {}, {}'.format(class_names[int(classes[0][i])],
+                                           np.array(scores[0][i]),
+                                           np.array(boxes[0][i])))
+
+    img = cv2.cvtColor(img_raw.numpy(), cv2.COLOR_RGB2BGR)
+    img = draw_outputs(img, (boxes, scores, classes, nums), class_names)
+    cv2.imwrite(FLAGS.output, img)
+    logging.info('output saved to: {}'.format(FLAGS.output))
+
+
+if __name__ == '__main__':
+    try:
+        app.run(main)
+    except SystemExit:
+        pass
--- a/TensorFlow2x/ComputeVision/Detection/YOLOv3/detect_video.py
+++ b/TensorFlow2x/ComputeVision/Detection/YOLOv3/detect_video.py
+import time
+from absl import app, flags, logging
+from absl.flags import FLAGS
+import cv2
+import tensorflow as tf
+from yolov3_tf2.models import (
+    YoloV3, YoloV3Tiny
+)
+from yolov3_tf2.dataset import transform_images
+from yolov3_tf2.utils import draw_outputs
+
+
+flags.DEFINE_string('classes', './data/coco.names', 'path to classes file')
+flags.DEFINE_string('weights', './checkpoints/yolov3.tf',
+                    'path to weights file')
+flags.DEFINE_boolean('tiny', False, 'yolov3 or yolov3-tiny')
+flags.DEFINE_integer('size', 416, 'resize images to')
+flags.DEFINE_string('video', './data/video.mp4',
+                    'path to video file or number for webcam)')
+flags.DEFINE_string('output', None, 'path to output video')
+flags.DEFINE_string('output_format', 'XVID', 'codec used in VideoWriter when saving video to file')
+flags.DEFINE_integer('num_classes', 80, 'number of classes in the model')
+
+
+def main(_argv):
+    physical_devices = tf.config.experimental.list_physical_devices('GPU')
+    for physical_device in physical_devices:
+        tf.config.experimental.set_memory_growth(physical_device, True)
+
+    if FLAGS.tiny:
+        yolo = YoloV3Tiny(classes=FLAGS.num_classes)
+    else:
+        yolo = YoloV3(classes=FLAGS.num_classes)
+
+    yolo.load_weights(FLAGS.weights)
+    logging.info('weights loaded')
+
+    class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
+    logging.info('classes loaded')
+
+    times = []
+
+    try:
+        vid = cv2.VideoCapture(int(FLAGS.video))
+    except:
+        vid = cv2.VideoCapture(FLAGS.video)
+
+    out = None
+
+    if FLAGS.output:
+        # by default VideoCapture returns float instead of int
+        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
+        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        fps = int(vid.get(cv2.CAP_PROP_FPS))
+        codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
+        out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))
+
+    while True:
+        _, img = vid.read()
+
+        if img is None:
+            logging.warning("Empty Frame")
+            time.sleep(0.1)
+            continue
+
+        img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        img_in = tf.expand_dims(img_in, 0)
+        img_in = transform_images(img_in, FLAGS.size)
+
+        t1 = time.time()
+        boxes, scores, classes, nums = yolo.predict(img_in)
+        t2 = time.time()
+        times.append(t2-t1)
+        times = times[-20:]
+
+        img = draw_outputs(img, (boxes, scores, classes, nums), class_names)
+        img = cv2.putText(img, "Time: {:.2f}ms".format(sum(times)/len(times)*1000), (0, 30),
+                          cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
+        if FLAGS.output:
+            out.write(img)
+        cv2.imshow('output', img)
+        if cv2.waitKey(1) == ord('q'):
+            break
+
+    cv2.destroyAllWindows()
+
+
+if __name__ == '__main__':
+    try:
+        app.run(main)
+    except SystemExit:
+        pass
--- a/TensorFlow2x/ComputeVision/Detection/YOLOv3/docs/docs/training_voc.md
+++ b/TensorFlow2x/ComputeVision/Detection/YOLOv3/docs/docs/training_voc.md
+# Training Instruction
+
+## VOC 2012 Dataset from Scratch
+
+Full instruction on how to train using VOC 2012 from scratch
+
+Requirement:
+  1. Able to detect image using pretrained darknet model
+  2. Many Gigabytes of Disk Space
+  3. High Speed Internet Connection Preferred
+  4. GPU Preferred
+
+
+### 1. Download Dataset
+
+You can read the full description of dataset [here](http://host.robots.ox.ac.uk/pascal/VOC/)
+```bash
+wget http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar -O ./data/voc2012_raw.tar
+mkdir -p ./data/voc2012_raw
+tar -xf ./data/voc2012_raw.tar -C ./data/voc2012_raw
+ls ./data/voc2012_raw/VOCdevkit/VOC2012 # Explore the dataset
+```
+
+### 2. Transform Dataset
+
+See tools/voc2012.py for implementation, this format is based on [tensorflow object detection API](https://github.com/tensorflow/models/tree/master/research/object_detection). Many fields 
+are not required, I left them there for compatibility with official API.
+
+```bash
+python tools/voc2012.py \
+  --data_dir './data/voc2012_raw/VOCdevkit/VOC2012' \
+  --split train \
+  --output_file ./data/voc2012_train.tfrecord
+
+python tools/voc2012.py \
+  --data_dir './data/voc2012_raw/VOCdevkit/VOC2012' \
+  --split val \
+  --output_file ./data/voc2012_val.tfrecord
+```
+
+You can visualize the dataset using this tool
+```
+python tools/visualize_dataset.py --classes=./data/voc2012.names
+```
+
+It will output one random image with label to `output.jpg`
+
+### 3. Training
+
+You can adjust the parameters based on your setup
+
+#### With Transfer Learning
+
+This step requires loading the pretrained darknet (feature extractor) weights.
+```
+wget https://pjreddie.com/media/files/yolov3.weights -O data/yolov3.weights
+python convert.py
+python detect.py --image ./data/meme.jpg # Sanity check
+
+python train.py \
+	--dataset ./data/voc2012_train.tfrecord \
+	--val_dataset ./data/voc2012_val.tfrecord \
+	--classes ./data/voc2012.names \
+	--num_classes 20 \
+	--mode fit --transfer darknet \
+	--batch_size 16 \
+	--epochs 10 \
+	--weights ./checkpoints/yolov3.tf \
+	--weights_num_classes 80 
+```
+
+Original pretrained yolov3 has 80 classes, here we demonstrated how to
+do transfer learning on 20 classes.
+
+#### Training from random weights (NOT RECOMMENDED)
+Training from scratch is very difficult to converge
+The original paper trained darknet 
+on imagenet before training the whole network as well.
+
+```bash
+python train.py \
+	--dataset ./data/voc2012_train.tfrecord \
+	--val_dataset ./data/voc2012_val.tfrecord \
+	--classes ./data/voc2012.names \
+	--num_classes 20 \
+	--mode fit --transfer none \
+	--batch_size 16 \
+	--epochs 10 \
+```
+
+I have tested this works 100% with correct loss and converging over time.
+Each epoch takes around 10 minutes on single AWS p2.xlarge (Nvidia K80 GPU) Instance.
+
+You might see warnings or error messages during training, they are not critical dont' worry too much about them.
+There might be a long wait time between each epoch becaues we are calculating validation loss.
+
+### 4. Inference
+
+```bash
+# detect from images
+python detect.py \
+	--classes ./data/voc2012.names \
+	--num_classes 20 \
+	--weights ./checkpoints/yolov3_train_5.tf \
+	--image ./data/street.jpg
+
+# detect from validation set
+python detect.py \
+	--classes ./data/voc2012.names \
+	--num_classes 20 \
+	--weights ./checkpoints/yolov3_train_5.tf \
+	--tfrecord ./data/voc2012_val.tfrecord
+```
+
+You should see some detect objects in the standard output and the visualization at `output.jpg`.
+this is just a proof of concept, so it won't be as good as pretrained models.
+In my experience, you might need lower score score thershold if you didn't train it enough.
+
--- a/TensorFlow2x/ComputeVision/Detection/YOLOv3/logs/logs/train/events.out.tfevents.1649755222.e09r2n12.4961.0.v2
+++ b/TensorFlow2x/ComputeVision/Detection/YOLOv3/logs/logs/train/events.out.tfevents.1649755222.e09r2n12.4961.0.v2
--- a/TensorFlow2x/ComputeVision/Detection/YOLOv3/logs/logs/train/events.out.tfevents.1649761847.e09r2n12.6924.0.v2
+++ b/TensorFlow2x/ComputeVision/Detection/YOLOv3/logs/logs/train/events.out.tfevents.1649761847.e09r2n12.6924.0.v2
--- a/TensorFlow2x/ComputeVision/Detection/YOLOv3/logs/logs/train/events.out.tfevents.1649770830.e14r2n08.18416.0.v2
+++ b/TensorFlow2x/ComputeVision/Detection/YOLOv3/logs/logs/train/events.out.tfevents.1649770830.e14r2n08.18416.0.v2
--- a/TensorFlow2x/ComputeVision/Detection/YOLOv3/logs/logs/train/events.out.tfevents.1649774610.e14r2n08.17925.0.v2
+++ b/TensorFlow2x/ComputeVision/Detection/YOLOv3/logs/logs/train/events.out.tfevents.1649774610.e14r2n08.17925.0.v2
--- a/TensorFlow2x/ComputeVision/Detection/YOLOv3/logs/logs/train/events.out.tfevents.1649777502.b20r3n18.6784.0.v2
+++ b/TensorFlow2x/ComputeVision/Detection/YOLOv3/logs/logs/train/events.out.tfevents.1649777502.b20r3n18.6784.0.v2
--- a/TensorFlow2x/ComputeVision/Detection/YOLOv3/logs/logs/train/events.out.tfevents.1649779246.e09r2n15.10399.0.v2
+++ b/TensorFlow2x/ComputeVision/Detection/YOLOv3/logs/logs/train/events.out.tfevents.1649779246.e09r2n15.10399.0.v2
--- a/TensorFlow2x/ComputeVision/Detection/YOLOv3/logs/logs/validation/events.out.tfevents.1649755456.e09r2n12.4961.1.v2
+++ b/TensorFlow2x/ComputeVision/Detection/YOLOv3/logs/logs/validation/events.out.tfevents.1649755456.e09r2n12.4961.1.v2
--- a/TensorFlow2x/ComputeVision/Detection/YOLOv3/logs/logs/validation/events.out.tfevents.1649762231.e09r2n12.6924.1.v2
+++ b/TensorFlow2x/ComputeVision/Detection/YOLOv3/logs/logs/validation/events.out.tfevents.1649762231.e09r2n12.6924.1.v2
--- a/TensorFlow2x/ComputeVision/Detection/YOLOv3/logs/logs/validation/events.out.tfevents.1649771067.e14r2n08.18416.1.v2
+++ b/TensorFlow2x/ComputeVision/Detection/YOLOv3/logs/logs/validation/events.out.tfevents.1649771067.e14r2n08.18416.1.v2
--- a/TensorFlow2x/ComputeVision/Detection/YOLOv3/logs/logs/validation/events.out.tfevents.1649775053.e14r2n08.17925.1.v2
+++ b/TensorFlow2x/ComputeVision/Detection/YOLOv3/logs/logs/validation/events.out.tfevents.1649775053.e14r2n08.17925.1.v2
--- a/TensorFlow2x/ComputeVision/Detection/YOLOv3/logs/logs/validation/events.out.tfevents.1649777831.b20r3n18.6784.1.v2
+++ b/TensorFlow2x/ComputeVision/Detection/YOLOv3/logs/logs/validation/events.out.tfevents.1649777831.b20r3n18.6784.1.v2
--- a/TensorFlow2x/ComputeVision/Detection/YOLOv3/logs/logs/validation/events.out.tfevents.1649779571.e09r2n15.10399.1.v2
+++ b/TensorFlow2x/ComputeVision/Detection/YOLOv3/logs/logs/validation/events.out.tfevents.1649779571.e09r2n15.10399.1.v2
--- a/TensorFlow2x/ComputeVision/Detection/YOLOv3/requirements-gpu.txt
+++ b/TensorFlow2x/ComputeVision/Detection/YOLOv3/requirements-gpu.txt
+opencv-python==4.2.0.32
+lxml
+tqdm
+
+-e .
--- a/TensorFlow2x/ComputeVision/Detection/YOLOv3/requirements.txt
+++ b/TensorFlow2x/ComputeVision/Detection/YOLOv3/requirements.txt
+tensorflow==2.5.1
+opencv-python==4.2.0.32
+lxml
+tqdm
+
+-e .
--- a/TensorFlow2x/ComputeVision/Detection/YOLOv3/setup.py
+++ b/TensorFlow2x/ComputeVision/Detection/YOLOv3/setup.py
+from setuptools import setup
+
+setup(name='yolov3_tf2',
+      version='0.1',
+      url='https://github.com/zzh8829/yolov3-tf2',
+      author='Zihao Zhang',
+      author_email='zzh8829@gmail.com',
+      packages=['yolov3_tf2'])
\ No newline at end of file
--- a/TensorFlow2x/ComputeVision/Detection/YOLOv3/tools/tools/export_tflite.py
+++ b/TensorFlow2x/ComputeVision/Detection/YOLOv3/tools/tools/export_tflite.py
+import time
+from absl import app, flags, logging
+from absl.flags import FLAGS
+import cv2
+import numpy as np
+import tensorflow as tf
+from yolov3_tf2.models import (
+    YoloV3, YoloV3Tiny
+)
+from yolov3_tf2.dataset import transform_images
+
+from tensorflow.python.eager import def_function
+from tensorflow.python.framework import tensor_spec
+from tensorflow.python.util import nest
+
+flags.DEFINE_string('weights', './checkpoints/yolov3.tf',
+                    'path to weights file')
+flags.DEFINE_boolean('tiny', False, 'yolov3 or yolov3-tiny')
+flags.DEFINE_string('output', './checkpoints/yolov3.tflite',
+                    'path to saved_model')
+flags.DEFINE_string('classes', './data/coco.names', 'path to classes file')
+flags.DEFINE_string('image', './data/girl.png', 'path to input image')
+flags.DEFINE_integer('num_classes', 80, 'number of classes in the model')
+flags.DEFINE_integer('size', 416, 'image size')
+
+
+def main(_argv):
+    if FLAGS.tiny:
+        yolo = YoloV3Tiny(size=FLAGS.size, classes=FLAGS.num_classes)
+    else:
+        yolo = YoloV3(size=FLAGS.size, classes=FLAGS.num_classes)
+
+    yolo.load_weights(FLAGS.weights)
+    logging.info('weights loaded')
+
+    converter = tf.lite.TFLiteConverter.from_keras_model(yolo)
+
+    # Fix from https://stackoverflow.com/questions/64490203/tf-lite-non-max-suppression
+    converter.experimental_new_converter = True
+    converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]
+
+    tflite_model = converter.convert()
+    open(FLAGS.output, 'wb').write(tflite_model)
+    logging.info("model saved to: {}".format(FLAGS.output))
+
+    interpreter = tf.lite.Interpreter(model_path=FLAGS.output)
+    interpreter.allocate_tensors()
+    logging.info('tflite model loaded')
+
+    input_details = interpreter.get_input_details()
+    output_details = interpreter.get_output_details()
+
+    class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
+    logging.info('classes loaded')
+
+    img = tf.image.decode_image(open(FLAGS.image, 'rb').read(), channels=3)
+    img = tf.expand_dims(img, 0)
+    img = transform_images(img, 416)
+
+    t1 = time.time()
+    outputs = interpreter.set_tensor(input_details[0]['index'], img)
+
+    interpreter.invoke()
+
+    output_data = interpreter.get_tensor(output_details[0]['index'])
+
+    print(output_data)
+
+if __name__ == '__main__':
+    app.run(main)
--- a/TensorFlow2x/ComputeVision/Detection/YOLOv3/tools/tools/export_tfserving.py
+++ b/TensorFlow2x/ComputeVision/Detection/YOLOv3/tools/tools/export_tfserving.py
+import time
+from absl import app, flags, logging
+from absl.flags import FLAGS
+import cv2
+import numpy as np
+import tensorflow as tf
+from yolov3_tf2.models import (
+    YoloV3, YoloV3Tiny
+)
+from yolov3_tf2.dataset import transform_images
+
+from tensorflow.python.eager import def_function
+from tensorflow.python.framework import tensor_spec
+from tensorflow.python.util import nest
+
+flags.DEFINE_string('weights', './checkpoints/yolov3.tf',
+                    'path to weights file')
+flags.DEFINE_boolean('tiny', False, 'yolov3 or yolov3-tiny')
+flags.DEFINE_string('output', './serving/yolov3/1', 'path to saved_model')
+flags.DEFINE_string('classes', './data/coco.names', 'path to classes file')
+flags.DEFINE_string('image', './data/girl.png', 'path to input image')
+flags.DEFINE_integer('num_classes', 80, 'number of classes in the model')
+
+
+def main(_argv):
+    if FLAGS.tiny:
+        yolo = YoloV3Tiny(classes=FLAGS.num_classes)
+    else:
+        yolo = YoloV3(classes=FLAGS.num_classes)
+
+    yolo.load_weights(FLAGS.weights)
+    logging.info('weights loaded')
+
+    tf.saved_model.save(yolo, FLAGS.output)
+    logging.info("model saved to: {}".format(FLAGS.output))
+
+    model = tf.saved_model.load(FLAGS.output)
+    infer = model.signatures[tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY]
+    logging.info(infer.structured_outputs)
+
+    class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
+    logging.info('classes loaded')
+
+    img = tf.image.decode_image(open(FLAGS.image, 'rb').read(), channels=3)
+    img = tf.expand_dims(img, 0)
+    img = transform_images(img, 416)
+
+    t1 = time.time()
+    outputs = infer(img)
+    boxes, scores, classes, nums = outputs["yolo_nms"], outputs[
+        "yolo_nms_1"], outputs["yolo_nms_2"], outputs["yolo_nms_3"]
+    t2 = time.time()
+    logging.info('time: {}'.format(t2 - t1))
+
+    logging.info('detections:')
+    for i in range(nums[0]):
+        logging.info('\t{}, {}, {}'.format(class_names[int(classes[0][i])],
+                                           scores[0][i].numpy(),
+                                           boxes[0][i].numpy()))
+
+
+if __name__ == '__main__':
+    try:
+        app.run(main)
+    except SystemExit:
+        pass