Unverified Commit 8518d053 authored by pkulzc's avatar pkulzc Committed by GitHub
Browse files

Open source MnasFPN and minor fixes to OD API (#8484)

310447280  by lzc:

    Internal change

310420845  by Zhichao Lu:

    Open source the internal Context RCNN code.

--
310362339  by Zhichao Lu:

    Internal change

310259448  by lzc:

    Update required TF version for OD API.

--
310252159  by Zhichao Lu:

    Port patch_ops_test to TF1/TF2 as TPUs.

--
310247180  by Zhichao Lu:

    Ignore keypoint heatmap loss in the regions/bounding boxes with target keypoint
    class but no valid keypoint annotations.

--
310178294  by Zhichao Lu:

    Opensource MnasFPN
    https://arxiv.org/abs/1912.01106

--
310094222  by lzc:

    Internal changes.

--
310085250  by lzc:

    Internal Change.

--
310016447  by huizhongc:

    Remove unrecognized classes from labeled_classes.

--
310009470  by rathodv:

    Mark batcher.py as TF1 only.

--
310001984  by rathodv:

    Update core/preprocessor.py to be compatible with TF1/TF2..

--
309455035  by Zhi...
parent ac5fff19
![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen)
![TensorFlow Requirement: 1.15](https://img.shields.io/badge/TensorFlow%20Requirement-1.15-brightgreen)
![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg)
# Tensorflow Object Detection API
......@@ -40,7 +40,6 @@ https://scholar.googleusercontent.com/scholar.bib?q=info:l291WsrB-hQJ:scholar.go
| Alireza Fathi | [afathi3](https://github.com/afathi3) |
| Zhichao Lu | [pkulzc](https://github.com/pkulzc) |
## Table of contents
Setup:
......@@ -105,6 +104,25 @@ reporting an issue.
## Release information
### May 7th, 2020
We have released a mobile model with the
[MnasFPN head](https://arxiv.org/abs/1912.01106).
* MnasFPN with MobileNet-V2 backbone is the most accurate (26.6 mAP at 183ms on
Pixel 1) mobile detection model we have released to date. With depth-multiplier,
MnasFPN with MobileNet-V2 backbone is 1.8 mAP higher than MobileNet-V3-Large
with SSDLite (23.8 mAP vs 22.0 mAP) at similar latency (120ms) on Pixel 1.
We have released model definition, model checkpoints trained on
the COCO14 dataset and a converted TFLite model.
<b>Thanks to contributors</b>: Bo Chen, Golnaz Ghiasi, Hanxiao Liu,
Tsung-Yi Lin, Dmitry Kalenichenko, Hartwig Adam, Quoc Le, Zhichao Lu,
Jonathan Huang.
### Nov 13th, 2019
We have released MobileNetEdgeTPU SSDLite model.
......
......@@ -24,6 +24,7 @@ from object_detection.utils import test_case
class FlexibleGridAnchorGeneratorTest(test_case.TestCase):
def test_construct_single_anchor(self):
def graph_fn():
anchor_strides = [(32, 32),]
anchor_offsets = [(16, 16),]
base_sizes = [(128.0,)]
......@@ -31,22 +32,22 @@ class FlexibleGridAnchorGeneratorTest(test_case.TestCase):
im_height = 64
im_width = 64
feature_map_shape_list = [(2, 2)]
exp_anchor_corners = [[-48, -48, 80, 80],
[-48, -16, 80, 112],
[-16, -48, 112, 80],
[-16, -16, 112, 112]]
anchor_generator = fg.FlexibleGridAnchorGenerator(
base_sizes, aspect_ratios, anchor_strides, anchor_offsets,
normalize_coordinates=False)
anchors_list = anchor_generator.generate(
feature_map_shape_list, im_height=im_height, im_width=im_width)
anchor_corners = anchors_list[0].get()
with self.test_session():
anchor_corners_out = anchor_corners.eval()
return anchor_corners
anchor_corners_out = self.execute(graph_fn, [])
exp_anchor_corners = [[-48, -48, 80, 80],
[-48, -16, 80, 112],
[-16, -48, 112, 80],
[-16, -16, 112, 112]]
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
def test_construct_single_anchor_unit_dimensions(self):
def graph_fn():
anchor_strides = [(32, 32),]
anchor_offsets = [(16, 16),]
base_sizes = [(32.0,)]
......@@ -54,21 +55,19 @@ class FlexibleGridAnchorGeneratorTest(test_case.TestCase):
im_height = 1
im_width = 1
feature_map_shape_list = [(2, 2)]
# Positive offsets are produced.
exp_anchor_corners = [[0, 0, 32, 32],
[0, 32, 32, 64],
[32, 0, 64, 32],
[32, 32, 64, 64]]
anchor_generator = fg.FlexibleGridAnchorGenerator(
base_sizes, aspect_ratios, anchor_strides, anchor_offsets,
normalize_coordinates=False)
anchors_list = anchor_generator.generate(
feature_map_shape_list, im_height=im_height, im_width=im_width)
anchor_corners = anchors_list[0].get()
with self.test_session():
anchor_corners_out = anchor_corners.eval()
return anchor_corners
# Positive offsets are produced.
exp_anchor_corners = [[0, 0, 32, 32],
[0, 32, 32, 64],
[32, 0, 64, 32],
[32, 32, 64, 64]]
anchor_corners_out = self.execute(graph_fn, [])
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
def test_construct_normalized_anchors_fails_with_unit_dimensions(self):
......@@ -80,6 +79,7 @@ class FlexibleGridAnchorGeneratorTest(test_case.TestCase):
feature_map_shape_list=[(2, 2)], im_height=1, im_width=1)
def test_construct_single_anchor_in_normalized_coordinates(self):
def graph_fn():
anchor_strides = [(32, 32),]
anchor_offsets = [(16, 16),]
base_sizes = [(128.0,)]
......@@ -87,19 +87,18 @@ class FlexibleGridAnchorGeneratorTest(test_case.TestCase):
im_height = 64
im_width = 128
feature_map_shape_list = [(2, 2)]
exp_anchor_corners = [[-48./64, -48./128, 80./64, 80./128],
[-48./64, -16./128, 80./64, 112./128],
[-16./64, -48./128, 112./64, 80./128],
[-16./64, -16./128, 112./64, 112./128]]
anchor_generator = fg.FlexibleGridAnchorGenerator(
base_sizes, aspect_ratios, anchor_strides, anchor_offsets,
normalize_coordinates=True)
anchors_list = anchor_generator.generate(
feature_map_shape_list, im_height=im_height, im_width=im_width)
anchor_corners = anchors_list[0].get()
with self.test_session():
anchor_corners_out = anchor_corners.eval()
return anchor_corners
exp_anchor_corners = [[-48./64, -48./128, 80./64, 80./128],
[-48./64, -16./128, 80./64, 112./128],
[-16./64, -48./128, 112./64, 80./128],
[-16./64, -16./128, 112./64, 112./128]]
anchor_corners_out = self.execute(graph_fn, [])
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
def test_num_anchors_per_location(self):
......@@ -115,6 +114,7 @@ class FlexibleGridAnchorGeneratorTest(test_case.TestCase):
self.assertEqual(anchor_generator.num_anchors_per_location(), [6, 6])
def test_construct_single_anchor_dynamic_size(self):
def graph_fn():
anchor_strides = [(32, 32),]
anchor_offsets = [(0, 0),]
base_sizes = [(128.0,)]
......@@ -122,21 +122,19 @@ class FlexibleGridAnchorGeneratorTest(test_case.TestCase):
im_height = tf.constant(64)
im_width = tf.constant(64)
feature_map_shape_list = [(2, 2)]
# Zero offsets are used.
exp_anchor_corners = [[-64, -64, 64, 64],
[-64, -32, 64, 96],
[-32, -64, 96, 64],
[-32, -32, 96, 96]]
anchor_generator = fg.FlexibleGridAnchorGenerator(
base_sizes, aspect_ratios, anchor_strides, anchor_offsets,
normalize_coordinates=False)
anchors_list = anchor_generator.generate(
feature_map_shape_list, im_height=im_height, im_width=im_width)
anchor_corners = anchors_list[0].get()
with self.test_session():
anchor_corners_out = anchor_corners.eval()
return anchor_corners
# Zero offsets are used.
exp_anchor_corners = [[-64, -64, 64, 64],
[-64, -32, 64, 96],
[-32, -64, 96, 64],
[-32, -32, 96, 96]]
anchor_corners_out = self.execute_cpu(graph_fn, [])
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
def test_construct_single_anchor_with_odd_input_dimension(self):
......
......@@ -212,7 +212,7 @@ class MultipleGridAnchorGenerator(anchor_generator.AnchorGenerator):
min_im_shape = tf.minimum(im_height, im_width)
scale_height = min_im_shape / im_height
scale_width = min_im_shape / im_width
if not tf.contrib.framework.is_tensor(self._base_anchor_size):
if not tf.is_tensor(self._base_anchor_size):
base_anchor_size = [
scale_height * tf.constant(self._base_anchor_size[0],
dtype=tf.float32),
......
......@@ -20,6 +20,8 @@ described in:
T.-Y. Lin, P. Goyal, R. Girshick, K. He, P. Dollar
"""
import tensorflow as tf
from object_detection.anchor_generators import grid_anchor_generator
from object_detection.core import anchor_generator
from object_detection.core import box_list_ops
......@@ -85,8 +87,10 @@ class MultiscaleGridAnchorGenerator(anchor_generator.AnchorGenerator):
def _generate(self, feature_map_shape_list, im_height=1, im_width=1):
"""Generates a collection of bounding boxes to be used as anchors.
Currently we require the input image shape to be statically defined. That
is, im_height and im_width should be integers rather than tensors.
For training, we require the input image shape to be statically defined.
That is, im_height and im_width should be integers rather than tensors.
For inference, im_height and im_width can be either integers (for fixed
image size), or tensors (for arbitrary image size).
Args:
feature_map_shape_list: list of pairs of convnet layer resolutions in the
......@@ -124,6 +128,9 @@ class MultiscaleGridAnchorGenerator(anchor_generator.AnchorGenerator):
anchor_offset[0] = stride / 2.0
if im_width % 2.0**level == 0 or im_width == 1:
anchor_offset[1] = stride / 2.0
if tf.is_tensor(im_height) and tf.is_tensor(im_width):
anchor_offset[0] = stride / 2.0
anchor_offset[1] = stride / 2.0
ag = grid_anchor_generator.GridAnchorGenerator(
scales,
aspect_ratios,
......
......@@ -24,6 +24,7 @@ from object_detection.utils import test_case
class MultiscaleGridAnchorGeneratorTest(test_case.TestCase):
def test_construct_single_anchor(self):
def graph_fn():
min_level = 5
max_level = 5
anchor_scale = 4.0
......@@ -32,22 +33,23 @@ class MultiscaleGridAnchorGeneratorTest(test_case.TestCase):
im_height = 64
im_width = 64
feature_map_shape_list = [(2, 2)]
exp_anchor_corners = [[-48, -48, 80, 80],
[-48, -16, 80, 112],
[-16, -48, 112, 80],
[-16, -16, 112, 112]]
anchor_generator = mg.MultiscaleGridAnchorGenerator(
min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
normalize_coordinates=False)
anchors_list = anchor_generator.generate(
feature_map_shape_list, im_height=im_height, im_width=im_width)
anchor_corners = anchors_list[0].get()
return anchor_corners
with self.test_session():
anchor_corners_out = anchor_corners.eval()
exp_anchor_corners = [[-48, -48, 80, 80],
[-48, -16, 80, 112],
[-16, -48, 112, 80],
[-16, -16, 112, 112]]
anchor_corners_out = self.execute(graph_fn, [])
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
def test_construct_single_anchor_unit_dimensions(self):
def graph_fn():
min_level = 5
max_level = 5
anchor_scale = 1.0
......@@ -56,21 +58,20 @@ class MultiscaleGridAnchorGeneratorTest(test_case.TestCase):
im_height = 1
im_width = 1
feature_map_shape_list = [(2, 2)]
# Positive offsets are produced.
exp_anchor_corners = [[0, 0, 32, 32],
[0, 32, 32, 64],
[32, 0, 64, 32],
[32, 32, 64, 64]]
anchor_generator = mg.MultiscaleGridAnchorGenerator(
min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
normalize_coordinates=False)
anchors_list = anchor_generator.generate(
feature_map_shape_list, im_height=im_height, im_width=im_width)
anchor_corners = anchors_list[0].get()
return anchor_corners
with self.test_session():
anchor_corners_out = anchor_corners.eval()
# Positive offsets are produced.
exp_anchor_corners = [[0, 0, 32, 32],
[0, 32, 32, 64],
[32, 0, 64, 32],
[32, 32, 64, 64]]
anchor_corners_out = self.execute(graph_fn, [])
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
def test_construct_normalized_anchors_fails_with_unit_dimensions(self):
......@@ -82,6 +83,7 @@ class MultiscaleGridAnchorGeneratorTest(test_case.TestCase):
feature_map_shape_list=[(2, 2)], im_height=1, im_width=1)
def test_construct_single_anchor_in_normalized_coordinates(self):
def graph_fn():
min_level = 5
max_level = 5
anchor_scale = 4.0
......@@ -90,19 +92,19 @@ class MultiscaleGridAnchorGeneratorTest(test_case.TestCase):
im_height = 64
im_width = 128
feature_map_shape_list = [(2, 2)]
exp_anchor_corners = [[-48./64, -48./128, 80./64, 80./128],
[-48./64, -16./128, 80./64, 112./128],
[-16./64, -48./128, 112./64, 80./128],
[-16./64, -16./128, 112./64, 112./128]]
anchor_generator = mg.MultiscaleGridAnchorGenerator(
min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
normalize_coordinates=True)
anchors_list = anchor_generator.generate(
feature_map_shape_list, im_height=im_height, im_width=im_width)
anchor_corners = anchors_list[0].get()
return anchor_corners
with self.test_session():
anchor_corners_out = anchor_corners.eval()
exp_anchor_corners = [[-48./64, -48./128, 80./64, 80./128],
[-48./64, -16./128, 80./64, 112./128],
[-16./64, -48./128, 112./64, 80./128],
[-16./64, -16./128, 112./64, 112./128]]
anchor_corners_out = self.execute(graph_fn, [])
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
def test_num_anchors_per_location(self):
......@@ -117,6 +119,7 @@ class MultiscaleGridAnchorGeneratorTest(test_case.TestCase):
self.assertEqual(anchor_generator.num_anchors_per_location(), [6, 6])
def test_construct_single_anchor_dynamic_size(self):
def graph_fn():
min_level = 5
max_level = 5
anchor_scale = 4.0
......@@ -125,21 +128,24 @@ class MultiscaleGridAnchorGeneratorTest(test_case.TestCase):
im_height = tf.constant(64)
im_width = tf.constant(64)
feature_map_shape_list = [(2, 2)]
# Zero offsets are used.
exp_anchor_corners = [[-64, -64, 64, 64],
[-64, -32, 64, 96],
[-32, -64, 96, 64],
[-32, -32, 96, 96]]
anchor_generator = mg.MultiscaleGridAnchorGenerator(
min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
normalize_coordinates=False)
anchors_list = anchor_generator.generate(
feature_map_shape_list, im_height=im_height, im_width=im_width)
anchor_corners = anchors_list[0].get()
return anchor_corners
with self.test_session():
anchor_corners_out = anchor_corners.eval()
exp_anchor_corners = [[-64, -64, 64, 64],
[-64, -32, 64, 96],
[-32, -64, 96, 64],
[-32, -32, 96, 96]]
# Add anchor offset.
anchor_offset = 2.0**5 / 2.0
exp_anchor_corners = [
[b + anchor_offset for b in a] for a in exp_anchor_corners
]
anchor_corners_out = self.execute(graph_fn, [])
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
def test_construct_single_anchor_with_odd_input_dimension(self):
......
......@@ -14,80 +14,99 @@
# ==============================================================================
"""Tests for object_detection.box_coder.faster_rcnn_box_coder."""
import numpy as np
import tensorflow as tf
from object_detection.box_coders import faster_rcnn_box_coder
from object_detection.core import box_list
from object_detection.utils import test_case
class FasterRcnnBoxCoderTest(tf.test.TestCase):
class FasterRcnnBoxCoderTest(test_case.TestCase):
def test_get_correct_relative_codes_after_encoding(self):
boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
boxes = np.array([[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]],
np.float32)
anchors = np.array([[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]],
np.float32)
expected_rel_codes = [[-0.5, -0.416666, -0.405465, -0.182321],
[-0.083333, -0.222222, -0.693147, -1.098612]]
boxes = box_list.BoxList(tf.constant(boxes))
anchors = box_list.BoxList(tf.constant(anchors))
def graph_fn(boxes, anchors):
boxes = box_list.BoxList(boxes)
anchors = box_list.BoxList(anchors)
coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
rel_codes = coder.encode(boxes, anchors)
with self.test_session() as sess:
rel_codes_out, = sess.run([rel_codes])
self.assertAllClose(rel_codes_out, expected_rel_codes)
return rel_codes
rel_codes_out = self.execute(graph_fn, [boxes, anchors])
self.assertAllClose(rel_codes_out, expected_rel_codes, rtol=1e-04,
atol=1e-04)
def test_get_correct_relative_codes_after_encoding_with_scaling(self):
boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
scale_factors = [2, 3, 4, 5]
boxes = np.array([[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]],
np.float32)
anchors = np.array([[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]],
np.float32)
expected_rel_codes = [[-1., -1.25, -1.62186, -0.911608],
[-0.166667, -0.666667, -2.772588, -5.493062]]
boxes = box_list.BoxList(tf.constant(boxes))
anchors = box_list.BoxList(tf.constant(anchors))
def graph_fn(boxes, anchors):
scale_factors = [2, 3, 4, 5]
boxes = box_list.BoxList(boxes)
anchors = box_list.BoxList(anchors)
coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
scale_factors=scale_factors)
rel_codes = coder.encode(boxes, anchors)
with self.test_session() as sess:
rel_codes_out, = sess.run([rel_codes])
self.assertAllClose(rel_codes_out, expected_rel_codes)
return rel_codes
rel_codes_out = self.execute(graph_fn, [boxes, anchors])
self.assertAllClose(rel_codes_out, expected_rel_codes, rtol=1e-04,
atol=1e-04)
def test_get_correct_boxes_after_decoding(self):
anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
rel_codes = [[-0.5, -0.416666, -0.405465, -0.182321],
[-0.083333, -0.222222, -0.693147, -1.098612]]
anchors = np.array([[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]],
np.float32)
rel_codes = np.array([[-0.5, -0.416666, -0.405465, -0.182321],
[-0.083333, -0.222222, -0.693147, -1.098612]],
np.float32)
expected_boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
anchors = box_list.BoxList(tf.constant(anchors))
def graph_fn(rel_codes, anchors):
anchors = box_list.BoxList(anchors)
coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
boxes = coder.decode(rel_codes, anchors)
with self.test_session() as sess:
boxes_out, = sess.run([boxes.get()])
self.assertAllClose(boxes_out, expected_boxes)
return boxes.get()
boxes_out = self.execute(graph_fn, [rel_codes, anchors])
self.assertAllClose(boxes_out, expected_boxes, rtol=1e-04,
atol=1e-04)
def test_get_correct_boxes_after_decoding_with_scaling(self):
anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
rel_codes = [[-1., -1.25, -1.62186, -0.911608],
[-0.166667, -0.666667, -2.772588, -5.493062]]
scale_factors = [2, 3, 4, 5]
anchors = np.array([[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]],
np.float32)
rel_codes = np.array([[-1., -1.25, -1.62186, -0.911608],
[-0.166667, -0.666667, -2.772588, -5.493062]],
np.float32)
expected_boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
anchors = box_list.BoxList(tf.constant(anchors))
def graph_fn(rel_codes, anchors):
scale_factors = [2, 3, 4, 5]
anchors = box_list.BoxList(anchors)
coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
scale_factors=scale_factors)
boxes = coder.decode(rel_codes, anchors)
with self.test_session() as sess:
boxes_out, = sess.run([boxes.get()])
self.assertAllClose(boxes_out, expected_boxes)
boxes = coder.decode(rel_codes, anchors).get()
return boxes
boxes_out = self.execute(graph_fn, [rel_codes, anchors])
self.assertAllClose(expected_boxes, boxes_out, rtol=1e-04,
atol=1e-04)
def test_very_small_Width_nan_after_encoding(self):
boxes = [[10.0, 10.0, 10.0000001, 20.0]]
anchors = [[15.0, 12.0, 30.0, 18.0]]
boxes = np.array([[10.0, 10.0, 10.0000001, 20.0]], np.float32)
anchors = np.array([[15.0, 12.0, 30.0, 18.0]], np.float32)
expected_rel_codes = [[-0.833333, 0., -21.128731, 0.510826]]
boxes = box_list.BoxList(tf.constant(boxes))
anchors = box_list.BoxList(tf.constant(anchors))
def graph_fn(boxes, anchors):
boxes = box_list.BoxList(boxes)
anchors = box_list.BoxList(anchors)
coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
rel_codes = coder.encode(boxes, anchors)
with self.test_session() as sess:
rel_codes_out, = sess.run([rel_codes])
self.assertAllClose(rel_codes_out, expected_rel_codes)
return rel_codes
rel_codes_out = self.execute(graph_fn, [boxes, anchors])
self.assertAllClose(rel_codes_out, expected_rel_codes, rtol=1e-04,
atol=1e-04)
if __name__ == '__main__':
......
......@@ -14,126 +14,137 @@
# ==============================================================================
"""Tests for object_detection.box_coder.keypoint_box_coder."""
import numpy as np
import tensorflow as tf
from object_detection.box_coders import keypoint_box_coder
from object_detection.core import box_list
from object_detection.core import standard_fields as fields
from object_detection.utils import test_case
class KeypointBoxCoderTest(tf.test.TestCase):
class KeypointBoxCoderTest(test_case.TestCase):
def test_get_correct_relative_codes_after_encoding(self):
boxes = [[10., 10., 20., 15.],
[0.2, 0.1, 0.5, 0.4]]
keypoints = [[[15., 12.], [10., 15.]],
[[0.5, 0.3], [0.2, 0.4]]]
boxes = np.array([[10., 10., 20., 15.],
[0.2, 0.1, 0.5, 0.4]], np.float32)
keypoints = np.array([[[15., 12.], [10., 15.]],
[[0.5, 0.3], [0.2, 0.4]]], np.float32)
num_keypoints = len(keypoints[0])
anchors = [[15., 12., 30., 18.],
[0.1, 0.0, 0.7, 0.9]]
anchors = np.array([[15., 12., 30., 18.],
[0.1, 0.0, 0.7, 0.9]], np.float32)
expected_rel_codes = [
[-0.5, -0.416666, -0.405465, -0.182321,
-0.5, -0.5, -0.833333, 0.],
[-0.083333, -0.222222, -0.693147, -1.098612,
0.166667, -0.166667, -0.333333, -0.055556]
]
boxes = box_list.BoxList(tf.constant(boxes))
boxes.add_field(fields.BoxListFields.keypoints, tf.constant(keypoints))
anchors = box_list.BoxList(tf.constant(anchors))
def graph_fn(boxes, keypoints, anchors):
boxes = box_list.BoxList(boxes)
boxes.add_field(fields.BoxListFields.keypoints, keypoints)
anchors = box_list.BoxList(anchors)
coder = keypoint_box_coder.KeypointBoxCoder(num_keypoints)
rel_codes = coder.encode(boxes, anchors)
with self.test_session() as sess:
rel_codes_out, = sess.run([rel_codes])
self.assertAllClose(rel_codes_out, expected_rel_codes)
return rel_codes
rel_codes_out = self.execute(graph_fn, [boxes, keypoints, anchors])
self.assertAllClose(rel_codes_out, expected_rel_codes, rtol=1e-04,
atol=1e-04)
def test_get_correct_relative_codes_after_encoding_with_scaling(self):
boxes = [[10., 10., 20., 15.],
[0.2, 0.1, 0.5, 0.4]]
keypoints = [[[15., 12.], [10., 15.]],
[[0.5, 0.3], [0.2, 0.4]]]
boxes = np.array([[10., 10., 20., 15.],
[0.2, 0.1, 0.5, 0.4]], np.float32)
keypoints = np.array([[[15., 12.], [10., 15.]],
[[0.5, 0.3], [0.2, 0.4]]], np.float32)
num_keypoints = len(keypoints[0])
anchors = [[15., 12., 30., 18.],
[0.1, 0.0, 0.7, 0.9]]
scale_factors = [2, 3, 4, 5]
anchors = np.array([[15., 12., 30., 18.],
[0.1, 0.0, 0.7, 0.9]], np.float32)
expected_rel_codes = [
[-1., -1.25, -1.62186, -0.911608,
-1.0, -1.5, -1.666667, 0.],
[-0.166667, -0.666667, -2.772588, -5.493062,
0.333333, -0.5, -0.666667, -0.166667]
]
boxes = box_list.BoxList(tf.constant(boxes))
boxes.add_field(fields.BoxListFields.keypoints, tf.constant(keypoints))
anchors = box_list.BoxList(tf.constant(anchors))
def graph_fn(boxes, keypoints, anchors):
scale_factors = [2, 3, 4, 5]
boxes = box_list.BoxList(boxes)
boxes.add_field(fields.BoxListFields.keypoints, keypoints)
anchors = box_list.BoxList(anchors)
coder = keypoint_box_coder.KeypointBoxCoder(
num_keypoints, scale_factors=scale_factors)
rel_codes = coder.encode(boxes, anchors)
with self.test_session() as sess:
rel_codes_out, = sess.run([rel_codes])
self.assertAllClose(rel_codes_out, expected_rel_codes)
return rel_codes
rel_codes_out = self.execute(graph_fn, [boxes, keypoints, anchors])
self.assertAllClose(rel_codes_out, expected_rel_codes, rtol=1e-04,
atol=1e-04)
def test_get_correct_boxes_after_decoding(self):
anchors = [[15., 12., 30., 18.],
[0.1, 0.0, 0.7, 0.9]]
rel_codes = [
anchors = np.array([[15., 12., 30., 18.],
[0.1, 0.0, 0.7, 0.9]], np.float32)
rel_codes = np.array([
[-0.5, -0.416666, -0.405465, -0.182321,
-0.5, -0.5, -0.833333, 0.],
[-0.083333, -0.222222, -0.693147, -1.098612,
0.166667, -0.166667, -0.333333, -0.055556]
]
], np.float32)
expected_boxes = [[10., 10., 20., 15.],
[0.2, 0.1, 0.5, 0.4]]
expected_keypoints = [[[15., 12.], [10., 15.]],
[[0.5, 0.3], [0.2, 0.4]]]
num_keypoints = len(expected_keypoints[0])
anchors = box_list.BoxList(tf.constant(anchors))
def graph_fn(rel_codes, anchors):
anchors = box_list.BoxList(anchors)
coder = keypoint_box_coder.KeypointBoxCoder(num_keypoints)
boxes = coder.decode(rel_codes, anchors)
with self.test_session() as sess:
boxes_out, keypoints_out = sess.run(
[boxes.get(), boxes.get_field(fields.BoxListFields.keypoints)])
self.assertAllClose(boxes_out, expected_boxes)
self.assertAllClose(keypoints_out, expected_keypoints)
return boxes.get(), boxes.get_field(fields.BoxListFields.keypoints)
boxes_out, keypoints_out = self.execute(graph_fn, [rel_codes, anchors])
self.assertAllClose(keypoints_out, expected_keypoints, rtol=1e-04,
atol=1e-04)
self.assertAllClose(boxes_out, expected_boxes, rtol=1e-04,
atol=1e-04)
def test_get_correct_boxes_after_decoding_with_scaling(self):
anchors = [[15., 12., 30., 18.],
[0.1, 0.0, 0.7, 0.9]]
rel_codes = [
anchors = np.array([[15., 12., 30., 18.],
[0.1, 0.0, 0.7, 0.9]], np.float32)
rel_codes = np.array([
[-1., -1.25, -1.62186, -0.911608,
-1.0, -1.5, -1.666667, 0.],
[-0.166667, -0.666667, -2.772588, -5.493062,
0.333333, -0.5, -0.666667, -0.166667]
]
scale_factors = [2, 3, 4, 5]
], np.float32)
expected_boxes = [[10., 10., 20., 15.],
[0.2, 0.1, 0.5, 0.4]]
expected_keypoints = [[[15., 12.], [10., 15.]],
[[0.5, 0.3], [0.2, 0.4]]]
num_keypoints = len(expected_keypoints[0])
anchors = box_list.BoxList(tf.constant(anchors))
def graph_fn(rel_codes, anchors):
scale_factors = [2, 3, 4, 5]
anchors = box_list.BoxList(anchors)
coder = keypoint_box_coder.KeypointBoxCoder(
num_keypoints, scale_factors=scale_factors)
boxes = coder.decode(rel_codes, anchors)
with self.test_session() as sess:
boxes_out, keypoints_out = sess.run(
[boxes.get(), boxes.get_field(fields.BoxListFields.keypoints)])
self.assertAllClose(boxes_out, expected_boxes)
self.assertAllClose(keypoints_out, expected_keypoints)
return boxes.get(), boxes.get_field(fields.BoxListFields.keypoints)
boxes_out, keypoints_out = self.execute(graph_fn, [rel_codes, anchors])
self.assertAllClose(keypoints_out, expected_keypoints, rtol=1e-04,
atol=1e-04)
self.assertAllClose(boxes_out, expected_boxes, rtol=1e-04,
atol=1e-04)
def test_very_small_width_nan_after_encoding(self):
boxes = [[10., 10., 10.0000001, 20.]]
keypoints = [[[10., 10.], [10.0000001, 20.]]]
anchors = [[15., 12., 30., 18.]]
boxes = np.array([[10., 10., 10.0000001, 20.]], np.float32)
keypoints = np.array([[[10., 10.], [10.0000001, 20.]]], np.float32)
anchors = np.array([[15., 12., 30., 18.]], np.float32)
expected_rel_codes = [[-0.833333, 0., -21.128731, 0.510826,
-0.833333, -0.833333, -0.833333, 0.833333]]
boxes = box_list.BoxList(tf.constant(boxes))
boxes.add_field(fields.BoxListFields.keypoints, tf.constant(keypoints))
anchors = box_list.BoxList(tf.constant(anchors))
def graph_fn(boxes, keypoints, anchors):
boxes = box_list.BoxList(boxes)
boxes.add_field(fields.BoxListFields.keypoints, keypoints)
anchors = box_list.BoxList(anchors)
coder = keypoint_box_coder.KeypointBoxCoder(2)
rel_codes = coder.encode(boxes, anchors)
with self.test_session() as sess:
rel_codes_out, = sess.run([rel_codes])
self.assertAllClose(rel_codes_out, expected_rel_codes)
return rel_codes
rel_codes_out = self.execute(graph_fn, [boxes, keypoints, anchors])
self.assertAllClose(rel_codes_out, expected_rel_codes, rtol=1e-04,
atol=1e-04)
if __name__ == '__main__':
......
......@@ -14,40 +14,47 @@
# ==============================================================================
"""Tests for object_detection.box_coder.mean_stddev_boxcoder."""
import numpy as np
import tensorflow as tf
from object_detection.box_coders import mean_stddev_box_coder
from object_detection.core import box_list
from object_detection.utils import test_case
class MeanStddevBoxCoderTest(tf.test.TestCase):
class MeanStddevBoxCoderTest(test_case.TestCase):
def testGetCorrectRelativeCodesAfterEncoding(self):
box_corners = [[0.0, 0.0, 0.5, 0.5], [0.0, 0.0, 0.5, 0.5]]
boxes = box_list.BoxList(tf.constant(box_corners))
boxes = np.array([[0.0, 0.0, 0.5, 0.5], [0.0, 0.0, 0.5, 0.5]], np.float32)
anchors = np.array([[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 1.0, 0.8]], np.float32)
expected_rel_codes = [[0.0, 0.0, 0.0, 0.0], [-5.0, -5.0, -5.0, -3.0]]
prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 1.0, 0.8]])
priors = box_list.BoxList(prior_means)
def graph_fn(boxes, anchors):
anchors = box_list.BoxList(anchors)
boxes = box_list.BoxList(boxes)
coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1)
rel_codes = coder.encode(boxes, priors)
with self.test_session() as sess:
rel_codes_out = sess.run(rel_codes)
self.assertAllClose(rel_codes_out, expected_rel_codes)
rel_codes = coder.encode(boxes, anchors)
return rel_codes
rel_codes_out = self.execute(graph_fn, [boxes, anchors])
self.assertAllClose(rel_codes_out, expected_rel_codes, rtol=1e-04,
atol=1e-04)
def testGetCorrectBoxesAfterDecoding(self):
rel_codes = tf.constant([[0.0, 0.0, 0.0, 0.0], [-5.0, -5.0, -5.0, -3.0]])
rel_codes = np.array([[0.0, 0.0, 0.0, 0.0], [-5.0, -5.0, -5.0, -3.0]],
np.float32)
expected_box_corners = [[0.0, 0.0, 0.5, 0.5], [0.0, 0.0, 0.5, 0.5]]
prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 1.0, 0.8]])
priors = box_list.BoxList(prior_means)
anchors = np.array([[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 1.0, 0.8]], np.float32)
def graph_fn(rel_codes, anchors):
anchors = box_list.BoxList(anchors)
coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1)
decoded_boxes = coder.decode(rel_codes, priors)
decoded_box_corners = decoded_boxes.get()
with self.test_session() as sess:
decoded_out = sess.run(decoded_box_corners)
self.assertAllClose(decoded_out, expected_box_corners)
decoded_boxes = coder.decode(rel_codes, anchors).get()
return decoded_boxes
decoded_boxes_out = self.execute(graph_fn, [rel_codes, anchors])
self.assertAllClose(decoded_boxes_out, expected_box_corners, rtol=1e-04,
atol=1e-04)
if __name__ == '__main__':
......
......@@ -14,83 +14,100 @@
# ==============================================================================
"""Tests for object_detection.box_coder.square_box_coder."""
import numpy as np
import tensorflow as tf
from object_detection.box_coders import square_box_coder
from object_detection.core import box_list
from object_detection.utils import test_case
class SquareBoxCoderTest(tf.test.TestCase):
class SquareBoxCoderTest(test_case.TestCase):
def test_correct_relative_codes_with_default_scale(self):
boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
scale_factors = None
boxes = np.array([[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]],
np.float32)
anchors = np.array([[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]],
np.float32)
expected_rel_codes = [[-0.790569, -0.263523, -0.293893],
[-0.068041, -0.272166, -0.89588]]
boxes = box_list.BoxList(tf.constant(boxes))
anchors = box_list.BoxList(tf.constant(anchors))
def graph_fn(boxes, anchors):
scale_factors = None
boxes = box_list.BoxList(boxes)
anchors = box_list.BoxList(anchors)
coder = square_box_coder.SquareBoxCoder(scale_factors=scale_factors)
rel_codes = coder.encode(boxes, anchors)
with self.test_session() as sess:
(rel_codes_out,) = sess.run([rel_codes])
self.assertAllClose(rel_codes_out, expected_rel_codes)
return rel_codes
rel_codes_out = self.execute(graph_fn, [boxes, anchors])
self.assertAllClose(rel_codes_out, expected_rel_codes, rtol=1e-04,
atol=1e-04)
def test_correct_relative_codes_with_non_default_scale(self):
boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
scale_factors = [2, 3, 4]
boxes = np.array([[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]],
np.float32)
anchors = np.array([[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]],
np.float32)
expected_rel_codes = [[-1.581139, -0.790569, -1.175573],
[-0.136083, -0.816497, -3.583519]]
boxes = box_list.BoxList(tf.constant(boxes))
anchors = box_list.BoxList(tf.constant(anchors))
def graph_fn(boxes, anchors):
scale_factors = [2, 3, 4]
boxes = box_list.BoxList(boxes)
anchors = box_list.BoxList(anchors)
coder = square_box_coder.SquareBoxCoder(scale_factors=scale_factors)
rel_codes = coder.encode(boxes, anchors)
with self.test_session() as sess:
(rel_codes_out,) = sess.run([rel_codes])
self.assertAllClose(rel_codes_out, expected_rel_codes)
return rel_codes
rel_codes_out = self.execute(graph_fn, [boxes, anchors])
self.assertAllClose(rel_codes_out, expected_rel_codes, rtol=1e-03,
atol=1e-03)
def test_correct_relative_codes_with_small_width(self):
boxes = [[10.0, 10.0, 10.0000001, 20.0]]
anchors = [[15.0, 12.0, 30.0, 18.0]]
scale_factors = None
boxes = np.array([[10.0, 10.0, 10.0000001, 20.0]], np.float32)
anchors = np.array([[15.0, 12.0, 30.0, 18.0]], np.float32)
expected_rel_codes = [[-1.317616, 0., -20.670586]]
boxes = box_list.BoxList(tf.constant(boxes))
anchors = box_list.BoxList(tf.constant(anchors))
def graph_fn(boxes, anchors):
scale_factors = None
boxes = box_list.BoxList(boxes)
anchors = box_list.BoxList(anchors)
coder = square_box_coder.SquareBoxCoder(scale_factors=scale_factors)
rel_codes = coder.encode(boxes, anchors)
with self.test_session() as sess:
(rel_codes_out,) = sess.run([rel_codes])
self.assertAllClose(rel_codes_out, expected_rel_codes)
return rel_codes
rel_codes_out = self.execute(graph_fn, [boxes, anchors])
self.assertAllClose(rel_codes_out, expected_rel_codes, rtol=1e-04,
atol=1e-04)
def test_correct_boxes_with_default_scale(self):
anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
rel_codes = [[-0.5, -0.416666, -0.405465],
[-0.083333, -0.222222, -0.693147]]
scale_factors = None
anchors = np.array([[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]],
np.float32)
rel_codes = np.array([[-0.5, -0.416666, -0.405465],
[-0.083333, -0.222222, -0.693147]], np.float32)
expected_boxes = [[14.594306, 7.884875, 20.918861, 14.209432],
[0.155051, 0.102989, 0.522474, 0.470412]]
anchors = box_list.BoxList(tf.constant(anchors))
def graph_fn(rel_codes, anchors):
scale_factors = None
anchors = box_list.BoxList(anchors)
coder = square_box_coder.SquareBoxCoder(scale_factors=scale_factors)
boxes = coder.decode(rel_codes, anchors)
with self.test_session() as sess:
(boxes_out,) = sess.run([boxes.get()])
self.assertAllClose(boxes_out, expected_boxes)
boxes = coder.decode(rel_codes, anchors).get()
return boxes
boxes_out = self.execute(graph_fn, [rel_codes, anchors])
self.assertAllClose(boxes_out, expected_boxes, rtol=1e-04,
atol=1e-04)
def test_correct_boxes_with_non_default_scale(self):
anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
rel_codes = [[-1., -1.25, -1.62186], [-0.166667, -0.666667, -2.772588]]
scale_factors = [2, 3, 4]
anchors = np.array([[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]],
np.float32)
rel_codes = np.array(
[[-1., -1.25, -1.62186], [-0.166667, -0.666667, -2.772588]], np.float32)
expected_boxes = [[14.594306, 7.884875, 20.918861, 14.209432],
[0.155051, 0.102989, 0.522474, 0.470412]]
anchors = box_list.BoxList(tf.constant(anchors))
def graph_fn(rel_codes, anchors):
scale_factors = [2, 3, 4]
anchors = box_list.BoxList(anchors)
coder = square_box_coder.SquareBoxCoder(scale_factors=scale_factors)
boxes = coder.decode(rel_codes, anchors)
with self.test_session() as sess:
(boxes_out,) = sess.run([boxes.get()])
self.assertAllClose(boxes_out, expected_boxes)
boxes = coder.decode(rel_codes, anchors).get()
return boxes
boxes_out = self.execute(graph_fn, [rel_codes, anchors])
self.assertAllClose(boxes_out, expected_boxes, rtol=1e-04,
atol=1e-04)
if __name__ == '__main__':
......
# Lint as: python2, python3
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
......@@ -15,6 +16,10 @@
"""A function to build an object detection anchor generator from config."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from six.moves import zip
from object_detection.anchor_generators import flexible_grid_anchor_generator
from object_detection.anchor_generators import grid_anchor_generator
from object_detection.anchor_generators import multiple_grid_anchor_generator
......@@ -58,12 +63,14 @@ def build(anchor_generator_config):
ssd_anchor_generator_config = anchor_generator_config.ssd_anchor_generator
anchor_strides = None
if ssd_anchor_generator_config.height_stride:
anchor_strides = zip(ssd_anchor_generator_config.height_stride,
ssd_anchor_generator_config.width_stride)
anchor_strides = list(
zip(ssd_anchor_generator_config.height_stride,
ssd_anchor_generator_config.width_stride))
anchor_offsets = None
if ssd_anchor_generator_config.height_offset:
anchor_offsets = zip(ssd_anchor_generator_config.height_offset,
ssd_anchor_generator_config.width_offset)
anchor_offsets = list(
zip(ssd_anchor_generator_config.height_offset,
ssd_anchor_generator_config.width_offset))
return multiple_grid_anchor_generator.create_ssd_anchors(
num_layers=ssd_anchor_generator_config.num_layers,
min_scale=ssd_anchor_generator_config.min_scale,
......
# Lint as: python2, python3
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
......@@ -15,8 +16,14 @@
"""Tests for anchor_generator_builder."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
from six.moves import range
from six.moves import zip
import tensorflow as tf
from google.protobuf import text_format
......
# Lint as: python2, python3
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
......@@ -487,8 +488,8 @@ class MaskRCNNBoxPredictorBuilderTest(tf.test.TestCase):
self.assertEqual(box_predictor.num_classes, 90)
self.assertTrue(box_predictor._is_training)
self.assertEqual(box_head._box_code_size, 4)
self.assertTrue(
mask_rcnn_box_predictor.MASK_PREDICTIONS in third_stage_heads)
self.assertIn(
mask_rcnn_box_predictor.MASK_PREDICTIONS, third_stage_heads)
self.assertEqual(
third_stage_heads[mask_rcnn_box_predictor.MASK_PREDICTIONS]
._mask_prediction_conv_depth, 512)
......@@ -527,8 +528,8 @@ class MaskRCNNBoxPredictorBuilderTest(tf.test.TestCase):
self.assertEqual(box_predictor.num_classes, 90)
self.assertTrue(box_predictor._is_training)
self.assertEqual(box_head._box_code_size, 4)
self.assertTrue(
mask_rcnn_box_predictor.MASK_PREDICTIONS in third_stage_heads)
self.assertIn(
mask_rcnn_box_predictor.MASK_PREDICTIONS, third_stage_heads)
self.assertEqual(
third_stage_heads[mask_rcnn_box_predictor.MASK_PREDICTIONS]
._mask_prediction_conv_depth, 512)
......
# Lint as: python2, python3
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
......@@ -15,8 +16,12 @@
"""Tests for calibration_builder."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from scipy import interpolate
from six.moves import zip
import tensorflow as tf
from object_detection.builders import calibration_builder
from object_detection.protos import calibration_pb2
......
# Lint as: python2, python3
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
......@@ -21,10 +22,15 @@ Note: If users wishes to also use their own InputReaders with the Object
Detection configuration framework, they should define their own builder function
that wraps the build function.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import functools
import tensorflow as tf
from object_detection.data_decoders import tf_example_decoder
from tensorflow.contrib import data as tf_data
from object_detection.builders import decoder_builder
from object_detection.protos import input_reader_pb2
......@@ -45,14 +51,20 @@ def make_initializable_iterator(dataset):
return iterator
def read_dataset(file_read_func, input_files, config):
def read_dataset(file_read_func, input_files, config,
filename_shard_fn=None):
"""Reads a dataset, and handles repetition and shuffling.
Args:
file_read_func: Function to use in tf.contrib.data.parallel_interleave, to
file_read_func: Function to use in tf_data.parallel_interleave, to
read every individual file into a tf.data.Dataset.
input_files: A list of file paths to read.
config: A input_reader_builder.InputReader object.
filename_shard_fn: optional, A funciton used to shard filenames across
replicas. This function takes as input a TF dataset of filenames and
is expected to return its sharded version. It is useful when the
dataset is being loaded on one of possibly many replicas and we want
to evenly shard the files between the replicas.
Returns:
A tf.data.Dataset of (undecoded) tf-records based on config.
......@@ -77,9 +89,12 @@ def read_dataset(file_read_func, input_files, config):
elif num_readers > 1:
tf.logging.warning('`shuffle` is false, but the input data stream is '
'still slightly shuffled since `num_readers` > 1.')
if filename_shard_fn:
filename_dataset = filename_shard_fn(filename_dataset)
filename_dataset = filename_dataset.repeat(config.num_epochs or None)
records_dataset = filename_dataset.apply(
tf.contrib.data.parallel_interleave(
tf_data.parallel_interleave(
file_read_func,
cycle_length=num_readers,
block_length=config.read_block_length,
......@@ -89,7 +104,21 @@ def read_dataset(file_read_func, input_files, config):
return records_dataset
def build(input_reader_config, batch_size=None, transform_input_data_fn=None):
def shard_function_for_context(input_context):
"""Returns a function that shards filenames based on the input context."""
if input_context is None:
return None
def shard_fn(dataset):
return dataset.shard(
input_context.num_input_pipelines, input_context.input_pipeline_id)
return shard_fn
def build(input_reader_config, batch_size=None, transform_input_data_fn=None,
input_context=None):
"""Builds a tf.data.Dataset.
Builds a tf.data.Dataset by applying the `transform_input_data_fn` on all
......@@ -100,6 +129,9 @@ def build(input_reader_config, batch_size=None, transform_input_data_fn=None):
batch_size: Batch size. If batch size is None, no batching is performed.
transform_input_data_fn: Function to apply transformation to all records,
or None if no extra decoding is required.
input_context: optional, A tf.distribute.InputContext object used to
shard filenames and compute per-replica batch_size when this function
is being called per-replica.
Returns:
A tf.data.Dataset based on the input_reader_config.
......@@ -112,23 +144,14 @@ def build(input_reader_config, batch_size=None, transform_input_data_fn=None):
raise ValueError('input_reader_config not of type '
'input_reader_pb2.InputReader.')
decoder = decoder_builder.build(input_reader_config)
if input_reader_config.WhichOneof('input_reader') == 'tf_record_input_reader':
config = input_reader_config.tf_record_input_reader
if not config.input_path:
raise ValueError('At least one input path must be specified in '
'`input_reader_config`.')
label_map_proto_file = None
if input_reader_config.HasField('label_map_path'):
label_map_proto_file = input_reader_config.label_map_path
decoder = tf_example_decoder.TfExampleDecoder(
load_instance_masks=input_reader_config.load_instance_masks,
load_multiclass_scores=input_reader_config.load_multiclass_scores,
instance_mask_type=input_reader_config.mask_type,
label_map_proto_file=label_map_proto_file,
use_display_name=input_reader_config.use_display_name,
num_additional_channels=input_reader_config.num_additional_channels)
def process_fn(value):
"""Sets up tf graph that decodes, transforms and pads input data."""
processed_tensors = decoder.decode(value)
......@@ -136,9 +159,13 @@ def build(input_reader_config, batch_size=None, transform_input_data_fn=None):
processed_tensors = transform_input_data_fn(processed_tensors)
return processed_tensors
shard_fn = shard_function_for_context(input_context)
if input_context is not None:
batch_size = input_context.get_per_replica_batch_size(batch_size)
dataset = read_dataset(
functools.partial(tf.data.TFRecordDataset, buffer_size=8 * 1000 * 1000),
config.input_path[:], input_reader_config)
config.input_path[:], input_reader_config, filename_shard_fn=shard_fn)
if input_reader_config.sample_1_of_n_examples > 1:
dataset = dataset.shard(input_reader_config.sample_1_of_n_examples, 0)
# TODO(rathodv): make batch size a required argument once the old binaries
......@@ -155,7 +182,7 @@ def build(input_reader_config, batch_size=None, transform_input_data_fn=None):
dataset = data_map_fn(process_fn, num_parallel_calls=num_parallel_calls)
if batch_size:
dataset = dataset.apply(
tf.contrib.data.batch_and_drop_remainder(batch_size))
tf_data.batch_and_drop_remainder(batch_size))
dataset = dataset.prefetch(input_reader_config.num_prefetch_batches)
return dataset
......
# Lint as: python2, python3
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
......@@ -14,8 +15,13 @@
# ==============================================================================
"""Tests for dataset_builder."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import numpy as np
from six.moves import range
import tensorflow as tf
from google.protobuf import text_format
......@@ -24,25 +30,63 @@ from object_detection.builders import dataset_builder
from object_detection.core import standard_fields as fields
from object_detection.protos import input_reader_pb2
from object_detection.utils import dataset_util
from object_detection.utils import test_case
class DatasetBuilderTest(tf.test.TestCase):
# pylint: disable=g-import-not-at-top
try:
from tensorflow.contrib import lookup as contrib_lookup
except ImportError:
# TF 2.0 doesn't ship with contrib.
pass
# pylint: enable=g-import-not-at-top
def create_tf_record(self, has_additional_channels=False, num_examples=1):
path = os.path.join(self.get_temp_dir(), 'tfrecord')
writer = tf.python_io.TFRecordWriter(path)
def get_iterator_next_for_testing(dataset, is_tf2):
# In TF2, lookup tables are not supported in one shot iterators, but
# initialization is implicit.
if is_tf2:
return dataset.make_initializable_iterator().get_next()
# In TF1, we use one shot iterator because it does not require running
# a separate init op.
else:
return dataset.make_one_shot_iterator().get_next()
class DatasetBuilderTest(test_case.TestCase):
def create_tf_record(self, has_additional_channels=False, num_shards=1,
num_examples_per_shard=1):
def dummy_jpeg_fn():
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
additional_channels_tensor = np.random.randint(
255, size=(4, 5, 1)).astype(np.uint8)
flat_mask = (4 * 5) * [1.0]
with self.test_session():
encoded_jpeg = tf.image.encode_jpeg(tf.constant(image_tensor)).eval()
encoded_jpeg = tf.image.encode_jpeg(image_tensor)
encoded_additional_channels_jpeg = tf.image.encode_jpeg(
tf.constant(additional_channels_tensor)).eval()
for i in range(num_examples):
additional_channels_tensor)
return encoded_jpeg, encoded_additional_channels_jpeg
encoded_jpeg, encoded_additional_channels_jpeg = self.execute(
dummy_jpeg_fn, [])
tmp_dir = self.get_temp_dir()
flat_mask = (4 * 5) * [1.0]
for i in range(num_shards):
path = os.path.join(tmp_dir, '%05d.tfrecord' % i)
writer = tf.python_io.TFRecordWriter(path)
for j in range(num_examples_per_shard):
if num_shards > 1:
source_id = (str(i) + '_' + str(j)).encode()
else:
source_id = str(j).encode()
features = {
'image/source_id': dataset_util.bytes_feature(str(i)),
'image/source_id': dataset_util.bytes_feature(source_id),
'image/encoded': dataset_util.bytes_feature(encoded_jpeg),
'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
'image/height': dataset_util.int64_feature(4),
......@@ -54,15 +98,18 @@ class DatasetBuilderTest(tf.test.TestCase):
'image/object/class/label': dataset_util.int64_list_feature([2]),
'image/object/mask': dataset_util.float_list_feature(flat_mask),
}
if has_additional_channels:
additional_channels_key = 'image/additional_channels/encoded'
features[additional_channels_key] = dataset_util.bytes_list_feature(
[encoded_additional_channels_jpeg] * 2)
example = tf.train.Example(features=tf.train.Features(feature=features))
writer.write(example.SerializeToString())
writer.close()
return path
return os.path.join(self.get_temp_dir(), '?????.tfrecord')
def test_build_tf_record_input_reader(self):
tf_record_path = self.create_tf_record()
......@@ -76,19 +123,21 @@ class DatasetBuilderTest(tf.test.TestCase):
""".format(tf_record_path)
input_reader_proto = input_reader_pb2.InputReader()
text_format.Merge(input_reader_text_proto, input_reader_proto)
tensor_dict = dataset_builder.make_initializable_iterator(
dataset_builder.build(input_reader_proto, batch_size=1)).get_next()
with tf.train.MonitoredSession() as sess:
output_dict = sess.run(tensor_dict)
def graph_fn():
return get_iterator_next_for_testing(
dataset_builder.build(input_reader_proto, batch_size=1),
self.is_tf2())
self.assertTrue(
fields.InputDataFields.groundtruth_instance_masks not in output_dict)
self.assertEquals((1, 4, 5, 3),
output_dict = self.execute(graph_fn, [])
self.assertNotIn(
fields.InputDataFields.groundtruth_instance_masks, output_dict)
self.assertEqual((1, 4, 5, 3),
output_dict[fields.InputDataFields.image].shape)
self.assertAllEqual([[2]],
output_dict[fields.InputDataFields.groundtruth_classes])
self.assertEquals(
self.assertEqual(
(1, 1, 4), output_dict[fields.InputDataFields.groundtruth_boxes].shape)
self.assertAllEqual(
[0.0, 0.0, 1.0, 1.0],
......@@ -107,11 +156,14 @@ class DatasetBuilderTest(tf.test.TestCase):
""".format(tf_record_path)
input_reader_proto = input_reader_pb2.InputReader()
text_format.Merge(input_reader_text_proto, input_reader_proto)
tensor_dict = dataset_builder.make_initializable_iterator(
dataset_builder.build(input_reader_proto, batch_size=1)).get_next()
with tf.train.MonitoredSession() as sess:
output_dict = sess.run(tensor_dict)
def graph_fn():
return get_iterator_next_for_testing(
dataset_builder.build(input_reader_proto, batch_size=1),
self.is_tf2()
)
output_dict = self.execute(graph_fn, [])
self.assertAllEqual(
(1, 1, 4, 5),
output_dict[fields.InputDataFields.groundtruth_instance_masks].shape)
......@@ -134,14 +186,14 @@ class DatasetBuilderTest(tf.test.TestCase):
tensor_dict[fields.InputDataFields.groundtruth_classes] - 1, depth=3)
return tensor_dict
tensor_dict = dataset_builder.make_initializable_iterator(
def graph_fn():
return dataset_builder.make_initializable_iterator(
dataset_builder.build(
input_reader_proto,
transform_input_data_fn=one_hot_class_encoding_fn,
batch_size=2)).get_next()
with tf.train.MonitoredSession() as sess:
output_dict = sess.run(tensor_dict)
output_dict = self.execute(graph_fn, [])
self.assertAllEqual([2, 4, 5, 3],
output_dict[fields.InputDataFields.image].shape)
......@@ -172,14 +224,14 @@ class DatasetBuilderTest(tf.test.TestCase):
tensor_dict[fields.InputDataFields.groundtruth_classes] - 1, depth=3)
return tensor_dict
tensor_dict = dataset_builder.make_initializable_iterator(
def graph_fn():
return dataset_builder.make_initializable_iterator(
dataset_builder.build(
input_reader_proto,
transform_input_data_fn=one_hot_class_encoding_fn,
batch_size=2)).get_next()
with tf.train.MonitoredSession() as sess:
output_dict = sess.run(tensor_dict)
output_dict = self.execute(graph_fn, [])
self.assertAllEqual(
[2, 1, 4, 5],
......@@ -197,7 +249,7 @@ class DatasetBuilderTest(tf.test.TestCase):
dataset_builder.build(input_reader_proto, batch_size=1)
def test_sample_all_data(self):
tf_record_path = self.create_tf_record(num_examples=2)
tf_record_path = self.create_tf_record(num_examples_per_shard=2)
input_reader_text_proto = """
shuffle: false
......@@ -209,17 +261,22 @@ class DatasetBuilderTest(tf.test.TestCase):
""".format(tf_record_path)
input_reader_proto = input_reader_pb2.InputReader()
text_format.Merge(input_reader_text_proto, input_reader_proto)
tensor_dict = dataset_builder.make_initializable_iterator(
dataset_builder.build(input_reader_proto, batch_size=1)).get_next()
with tf.train.MonitoredSession() as sess:
output_dict = sess.run(tensor_dict)
self.assertAllEqual(['0'], output_dict[fields.InputDataFields.source_id])
output_dict = sess.run(tensor_dict)
self.assertEquals(['1'], output_dict[fields.InputDataFields.source_id])
def graph_fn():
dataset = dataset_builder.build(input_reader_proto, batch_size=1)
sample1_ds = dataset.take(1)
sample2_ds = dataset.skip(1)
iter1 = dataset_builder.make_initializable_iterator(sample1_ds)
iter2 = dataset_builder.make_initializable_iterator(sample2_ds)
return iter1.get_next(), iter2.get_next()
output_dict1, output_dict2 = self.execute(graph_fn, [])
self.assertAllEqual(['0'], output_dict1[fields.InputDataFields.source_id])
self.assertEqual([b'1'], output_dict2[fields.InputDataFields.source_id])
def test_sample_one_of_n_shards(self):
tf_record_path = self.create_tf_record(num_examples=4)
tf_record_path = self.create_tf_record(num_examples_per_shard=4)
input_reader_text_proto = """
shuffle: false
......@@ -231,17 +288,99 @@ class DatasetBuilderTest(tf.test.TestCase):
""".format(tf_record_path)
input_reader_proto = input_reader_pb2.InputReader()
text_format.Merge(input_reader_text_proto, input_reader_proto)
tensor_dict = dataset_builder.make_initializable_iterator(
dataset_builder.build(input_reader_proto, batch_size=1)).get_next()
with tf.train.MonitoredSession() as sess:
output_dict = sess.run(tensor_dict)
self.assertAllEqual(['0'], output_dict[fields.InputDataFields.source_id])
output_dict = sess.run(tensor_dict)
self.assertEquals(['2'], output_dict[fields.InputDataFields.source_id])
def graph_fn():
dataset = dataset_builder.build(input_reader_proto, batch_size=1)
sample1_ds = dataset.take(1)
sample2_ds = dataset.skip(1)
iter1 = dataset_builder.make_initializable_iterator(sample1_ds)
iter2 = dataset_builder.make_initializable_iterator(sample2_ds)
return iter1.get_next(), iter2.get_next()
output_dict1, output_dict2 = self.execute(graph_fn, [])
self.assertAllEqual([b'0'], output_dict1[fields.InputDataFields.source_id])
self.assertEqual([b'2'], output_dict2[fields.InputDataFields.source_id])
def test_no_input_context(self):
"""Test that all samples are read with no input context given."""
tf_record_path = self.create_tf_record(num_examples_per_shard=16,
num_shards=2)
input_reader_text_proto = """
shuffle: false
num_readers: 1
num_epochs: 1
tf_record_input_reader {{
input_path: '{0}'
}}
""".format(tf_record_path)
input_reader_proto = input_reader_pb2.InputReader()
text_format.Merge(input_reader_text_proto, input_reader_proto)
class ReadDatasetTest(tf.test.TestCase):
for i in range(4):
# pylint:disable=cell-var-from-loop
def graph_fn():
dataset = dataset_builder.build(input_reader_proto, batch_size=8)
dataset = dataset.skip(i)
return get_iterator_next_for_testing(dataset, self.is_tf2())
batch = self.execute(graph_fn, [])
self.assertEqual(batch['image'].shape, (8, 4, 5, 3))
def graph_fn_last_batch():
dataset = dataset_builder.build(input_reader_proto, batch_size=8)
dataset = dataset.skip(4)
return get_iterator_next_for_testing(dataset, self.is_tf2())
self.assertRaises(tf.errors.OutOfRangeError, self.execute,
compute_fn=graph_fn_last_batch, inputs=[])
def test_with_input_context(self):
"""Test that a subset is read with input context given."""
tf_record_path = self.create_tf_record(num_examples_per_shard=16,
num_shards=2)
input_reader_text_proto = """
shuffle: false
num_readers: 1
num_epochs: 1
tf_record_input_reader {{
input_path: '{0}'
}}
""".format(tf_record_path)
input_reader_proto = input_reader_pb2.InputReader()
text_format.Merge(input_reader_text_proto, input_reader_proto)
input_context = tf.distribute.InputContext(
num_input_pipelines=2, input_pipeline_id=0, num_replicas_in_sync=4
)
for i in range(8):
# pylint:disable=cell-var-from-loop
def graph_fn():
dataset = dataset_builder.build(input_reader_proto, batch_size=8,
input_context=input_context)
dataset = dataset.skip(i)
return get_iterator_next_for_testing(dataset, self.is_tf2())
batch = self.execute(graph_fn, [])
self.assertEqual(batch['image'].shape, (2, 4, 5, 3))
def graph_fn_last_batch():
dataset = dataset_builder.build(input_reader_proto, batch_size=8,
input_context=input_context)
dataset = dataset.skip(8)
return get_iterator_next_for_testing(dataset, self.is_tf2())
self.assertRaises(tf.errors.OutOfRangeError, self.execute,
compute_fn=graph_fn_last_batch, inputs=[])
class ReadDatasetTest(test_case.TestCase):
def setUp(self):
self._path_template = os.path.join(self.get_temp_dir(), 'examples_%s.txt')
......@@ -258,7 +397,9 @@ class ReadDatasetTest(tf.test.TestCase):
with tf.gfile.Open(path, 'wb') as f:
f.write('\n'.join([str(i)] * 5))
def _get_dataset_next(self, files, config, batch_size):
super(ReadDatasetTest, self).setUp()
def _get_dataset_next(self, files, config, batch_size, num_batches_skip=0):
def decode_func(value):
return [tf.string_to_number(value, out_type=tf.int32)]
......@@ -267,50 +408,62 @@ class ReadDatasetTest(tf.test.TestCase):
config)
dataset = dataset.map(decode_func)
dataset = dataset.batch(batch_size)
return dataset.make_one_shot_iterator().get_next()
if num_batches_skip > 0:
dataset = dataset.skip(num_batches_skip)
return get_iterator_next_for_testing(dataset, self.is_tf2())
def test_make_initializable_iterator_with_hashTable(self):
def graph_fn():
keys = [1, 0, -1]
dataset = tf.data.Dataset.from_tensor_slices([[1, 2, -1, 5]])
table = tf.contrib.lookup.HashTable(
initializer=tf.contrib.lookup.KeyValueTensorInitializer(
table = contrib_lookup.HashTable(
initializer=contrib_lookup.KeyValueTensorInitializer(
keys=keys, values=list(reversed(keys))),
default_value=100)
dataset = dataset.map(table.lookup)
data = dataset_builder.make_initializable_iterator(dataset).get_next()
init = tf.tables_initializer()
return dataset_builder.make_initializable_iterator(dataset).get_next()
with self.test_session() as sess:
sess.run(init)
self.assertAllEqual(sess.run(data), [-1, 100, 1, 100])
result = self.execute(graph_fn, [])
self.assertAllEqual(result, [-1, 100, 1, 100])
def test_read_dataset(self):
config = input_reader_pb2.InputReader()
config.num_readers = 1
config.shuffle = False
data = self._get_dataset_next(
def graph_fn():
return self._get_dataset_next(
[self._path_template % '*'], config, batch_size=20)
with self.test_session() as sess:
data = self.execute(graph_fn, [])
# Note that the execute function extracts single outputs if the return
# value is of size 1.
self.assertAllEqual(
sess.run(data), [[
data, [
1, 10, 2, 20, 3, 30, 4, 40, 5, 50, 1, 10, 2, 20, 3, 30, 4, 40, 5,
50
]])
])
def test_reduce_num_reader(self):
config = input_reader_pb2.InputReader()
config.num_readers = 10
config.shuffle = False
data = self._get_dataset_next(
def graph_fn():
return self._get_dataset_next(
[self._path_template % '*'], config, batch_size=20)
with self.test_session() as sess:
data = self.execute(graph_fn, [])
# Note that the execute function extracts single outputs if the return
# value is of size 1.
self.assertAllEqual(
sess.run(data), [[
data, [
1, 10, 2, 20, 3, 30, 4, 40, 5, 50, 1, 10, 2, 20, 3, 30, 4, 40, 5,
50
]])
])
def test_enable_shuffle(self):
config = input_reader_pb2.InputReader()
......@@ -318,25 +471,30 @@ class ReadDatasetTest(tf.test.TestCase):
config.shuffle = True
tf.set_random_seed(1) # Set graph level seed.
data = self._get_dataset_next(
def graph_fn():
return self._get_dataset_next(
[self._shuffle_path_template % '*'], config, batch_size=10)
expected_non_shuffle_output = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
data = self.execute(graph_fn, [])
with self.test_session() as sess:
self.assertTrue(
np.any(np.not_equal(sess.run(data), expected_non_shuffle_output)))
np.any(np.not_equal(data, expected_non_shuffle_output)))
def test_disable_shuffle_(self):
config = input_reader_pb2.InputReader()
config.num_readers = 1
config.shuffle = False
data = self._get_dataset_next(
def graph_fn():
return self._get_dataset_next(
[self._shuffle_path_template % '*'], config, batch_size=10)
expected_non_shuffle_output = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
with self.test_session() as sess:
self.assertAllEqual(sess.run(data), [expected_non_shuffle_output])
# Note that the execute function extracts single outputs if the return
# value is of size 1.
data = self.execute(graph_fn, [])
self.assertAllEqual(data, expected_non_shuffle_output)
def test_read_dataset_single_epoch(self):
config = input_reader_pb2.InputReader()
......@@ -344,12 +502,24 @@ class ReadDatasetTest(tf.test.TestCase):
config.num_readers = 1
config.shuffle = False
data = self._get_dataset_next(
def graph_fn():
return self._get_dataset_next(
[self._path_template % '0'], config, batch_size=30)
with self.test_session() as sess:
data = self.execute(graph_fn, [])
# Note that the execute function extracts single outputs if the return
# value is of size 1.
self.assertAllEqual(data, [1, 10])
# First batch will retrieve as much as it can, second batch will fail.
self.assertAllEqual(sess.run(data), [[1, 10]])
self.assertRaises(tf.errors.OutOfRangeError, sess.run, data)
def graph_fn_second_batch():
return self._get_dataset_next(
[self._path_template % '0'], config, batch_size=30,
num_batches_skip=1)
self.assertRaises(tf.errors.OutOfRangeError, self.execute,
compute_fn=graph_fn_second_batch, inputs=[])
if __name__ == '__main__':
......
# Lint as: python2, python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""DataDecoder builder.
Creates DataDecoders from InputReader configs.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from object_detection.data_decoders import tf_example_decoder
from object_detection.protos import input_reader_pb2
def build(input_reader_config):
"""Builds a DataDecoder based only on the open source config proto.
Args:
input_reader_config: An input_reader_pb2.InputReader object.
Returns:
A DataDecoder based on the input_reader_config.
Raises:
ValueError: On invalid input reader proto.
"""
if not isinstance(input_reader_config, input_reader_pb2.InputReader):
raise ValueError('input_reader_config not of type '
'input_reader_pb2.InputReader.')
if input_reader_config.WhichOneof('input_reader') == 'tf_record_input_reader':
label_map_proto_file = None
if input_reader_config.HasField('label_map_path'):
label_map_proto_file = input_reader_config.label_map_path
decoder = tf_example_decoder.TfExampleDecoder(
load_instance_masks=input_reader_config.load_instance_masks,
load_multiclass_scores=input_reader_config.load_multiclass_scores,
load_context_features=input_reader_config.load_context_features,
instance_mask_type=input_reader_config.mask_type,
label_map_proto_file=label_map_proto_file,
use_display_name=input_reader_config.use_display_name,
num_additional_channels=input_reader_config.num_additional_channels,
num_keypoints=input_reader_config.num_keypoints)
return decoder
raise ValueError('Unsupported input_reader_config.')
# Lint as: python2, python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for decoder_builder."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import tensorflow as tf
from google.protobuf import text_format
from object_detection.builders import decoder_builder
from object_detection.core import standard_fields as fields
from object_detection.protos import input_reader_pb2
from object_detection.utils import dataset_util
class DecoderBuilderTest(tf.test.TestCase):
def _make_serialized_tf_example(self, has_additional_channels=False):
image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
additional_channels_tensor = np.random.randint(
255, size=(4, 5, 1)).astype(np.uint8)
flat_mask = (4 * 5) * [1.0]
with self.test_session():
encoded_jpeg = tf.image.encode_jpeg(tf.constant(image_tensor)).eval()
encoded_additional_channels_jpeg = tf.image.encode_jpeg(
tf.constant(additional_channels_tensor)).eval()
features = {
'image/source_id': dataset_util.bytes_feature('0'.encode()),
'image/encoded': dataset_util.bytes_feature(encoded_jpeg),
'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
'image/height': dataset_util.int64_feature(4),
'image/width': dataset_util.int64_feature(5),
'image/object/bbox/xmin': dataset_util.float_list_feature([0.0]),
'image/object/bbox/xmax': dataset_util.float_list_feature([1.0]),
'image/object/bbox/ymin': dataset_util.float_list_feature([0.0]),
'image/object/bbox/ymax': dataset_util.float_list_feature([1.0]),
'image/object/class/label': dataset_util.int64_list_feature([2]),
'image/object/mask': dataset_util.float_list_feature(flat_mask),
}
if has_additional_channels:
additional_channels_key = 'image/additional_channels/encoded'
features[additional_channels_key] = dataset_util.bytes_list_feature(
[encoded_additional_channels_jpeg] * 2)
example = tf.train.Example(features=tf.train.Features(feature=features))
return example.SerializeToString()
def test_build_tf_record_input_reader(self):
input_reader_text_proto = 'tf_record_input_reader {}'
input_reader_proto = input_reader_pb2.InputReader()
text_format.Parse(input_reader_text_proto, input_reader_proto)
decoder = decoder_builder.build(input_reader_proto)
tensor_dict = decoder.decode(self._make_serialized_tf_example())
with tf.train.MonitoredSession() as sess:
output_dict = sess.run(tensor_dict)
self.assertNotIn(
fields.InputDataFields.groundtruth_instance_masks, output_dict)
self.assertEqual((4, 5, 3), output_dict[fields.InputDataFields.image].shape)
self.assertAllEqual([2],
output_dict[fields.InputDataFields.groundtruth_classes])
self.assertEqual(
(1, 4), output_dict[fields.InputDataFields.groundtruth_boxes].shape)
self.assertAllEqual(
[0.0, 0.0, 1.0, 1.0],
output_dict[fields.InputDataFields.groundtruth_boxes][0])
def test_build_tf_record_input_reader_and_load_instance_masks(self):
input_reader_text_proto = """
load_instance_masks: true
tf_record_input_reader {}
"""
input_reader_proto = input_reader_pb2.InputReader()
text_format.Parse(input_reader_text_proto, input_reader_proto)
decoder = decoder_builder.build(input_reader_proto)
tensor_dict = decoder.decode(self._make_serialized_tf_example())
with tf.train.MonitoredSession() as sess:
output_dict = sess.run(tensor_dict)
self.assertAllEqual(
(1, 4, 5),
output_dict[fields.InputDataFields.groundtruth_instance_masks].shape)
if __name__ == '__main__':
tf.test.main()
......@@ -16,6 +16,15 @@
import tensorflow as tf
# pylint: disable=g-import-not-at-top
try:
from tensorflow.contrib import layers as contrib_layers
from tensorflow.contrib import quantize as contrib_quantize
except ImportError:
# TF 2.0 doesn't ship with contrib.
pass
# pylint: enable=g-import-not-at-top
def build(graph_rewriter_config, is_training):
"""Returns a function that modifies default graph based on options.
......@@ -32,14 +41,15 @@ def build(graph_rewriter_config, is_training):
# Quantize the graph by inserting quantize ops for weights and activations
if is_training:
tf.contrib.quantize.experimental_create_training_graph(
contrib_quantize.experimental_create_training_graph(
input_graph=tf.get_default_graph(),
quant_delay=graph_rewriter_config.quantization.delay
)
else:
tf.contrib.quantize.experimental_create_eval_graph(
contrib_quantize.experimental_create_eval_graph(
input_graph=tf.get_default_graph()
)
tf.contrib.layers.summarize_collection('quant_vars')
contrib_layers.summarize_collection('quant_vars')
return graph_rewrite_fn
......@@ -18,14 +18,23 @@ import tensorflow as tf
from object_detection.builders import graph_rewriter_builder
from object_detection.protos import graph_rewriter_pb2
# pylint: disable=g-import-not-at-top
try:
from tensorflow.contrib import layers as contrib_layers
from tensorflow.contrib import quantize as contrib_quantize
except ImportError:
# TF 2.0 doesn't ship with contrib.
pass
# pylint: enable=g-import-not-at-top
class QuantizationBuilderTest(tf.test.TestCase):
def testQuantizationBuilderSetsUpCorrectTrainArguments(self):
with mock.patch.object(
tf.contrib.quantize,
contrib_quantize,
'experimental_create_training_graph') as mock_quant_fn:
with mock.patch.object(tf.contrib.layers,
with mock.patch.object(contrib_layers,
'summarize_collection') as mock_summarize_col:
graph_rewriter_proto = graph_rewriter_pb2.GraphRewriter()
graph_rewriter_proto.quantization.delay = 10
......@@ -40,9 +49,9 @@ class QuantizationBuilderTest(tf.test.TestCase):
mock_summarize_col.assert_called_with('quant_vars')
def testQuantizationBuilderSetsUpCorrectEvalArguments(self):
with mock.patch.object(tf.contrib.quantize,
with mock.patch.object(contrib_quantize,
'experimental_create_eval_graph') as mock_quant_fn:
with mock.patch.object(tf.contrib.layers,
with mock.patch.object(contrib_layers,
'summarize_collection') as mock_summarize_col:
graph_rewriter_proto = graph_rewriter_pb2.GraphRewriter()
graph_rewriter_proto.quantization.delay = 10
......
......@@ -20,7 +20,14 @@ from object_detection.core import freezable_batch_norm
from object_detection.protos import hyperparams_pb2
from object_detection.utils import context_manager
slim = tf.contrib.slim
# pylint: disable=g-import-not-at-top
try:
from tensorflow.contrib import slim
from tensorflow.contrib import layers as contrib_layers
except ImportError:
# TF 2.0 doesn't ship with contrib.
pass
# pylint: enable=g-import-not-at-top
class KerasLayerHyperparams(object):
......@@ -216,7 +223,7 @@ def build(hyperparams_config, is_training):
batch_norm_params = _build_batch_norm_params(
hyperparams_config.batch_norm, is_training)
if hyperparams_config.HasField('group_norm'):
normalizer_fn = tf.contrib.layers.group_norm
normalizer_fn = contrib_layers.group_norm
affected_ops = [slim.conv2d, slim.separable_conv2d, slim.conv2d_transpose]
if hyperparams_config.HasField('op') and (
hyperparams_config.op == hyperparams_pb2.Hyperparams.FC):
......@@ -256,6 +263,8 @@ def _build_activation_fn(activation_fn):
return tf.nn.relu
if activation_fn == hyperparams_pb2.Hyperparams.RELU_6:
return tf.nn.relu6
if activation_fn == hyperparams_pb2.Hyperparams.SWISH:
return tf.nn.swish
raise ValueError('Unknown activation function: {}'.format(activation_fn))
......@@ -301,6 +310,8 @@ def _build_keras_regularizer(regularizer):
# weight by a factor of 2
return tf.keras.regularizers.l2(
float(regularizer.l2_regularizer.weight * 0.5))
if regularizer_oneof is None:
return None
raise ValueError('Unknown regularizer function: {}'.format(regularizer_oneof))
......@@ -369,6 +380,8 @@ def _build_initializer(initializer, build_for_keras=False):
factor=initializer.variance_scaling_initializer.factor,
mode=mode,
uniform=initializer.variance_scaling_initializer.uniform)
if initializer_oneof is None:
return None
raise ValueError('Unknown initializer function: {}'.format(
initializer_oneof))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment