Unverified Commit 451906e4 authored by pkulzc's avatar pkulzc Committed by GitHub
Browse files

Release MobileDet code and model, and require tf_slim installation for OD API. (#8562)



* Merged commit includes the following changes:
311933687  by Sergio Guadarrama:

    Removes spurios use of tf.compat.v2, which results in spurious tf.compat.v1.compat.v2. Adds basic test to nasnet_utils.
    Replaces all remaining import tensorflow as tf with import tensorflow.compat.v1 as tf

--
311766063  by Sergio Guadarrama:

    Removes explicit tf.compat.v1 in all call sites (we already import tf.compat.v1, so this code was  doing tf.compat.v1.compat.v1). The existing code worked in latest version of tensorflow, 2.2, (and 1.15) but not in 1.14 or in 2.0.0a, this CL fixes it.

--
311624958  by Sergio Guadarrama:

    Updates README that doesn't render properly in github documentation

--
310980959  by Sergio Guadarrama:

    Moves research_models/slim off tf.contrib.slim/layers/framework to tf_slim

--
310263156  by Sergio Guadarrama:

    Adds model breakdown for MobilenetV3

--
308640516  by Sergio Guadarrama:

    Internal change

308244396  by Sergio Guadarrama:

    GroupNormalization support for MobilenetV3.

--
307475800  by Sergio Guadarrama:

    Internal change

--
302077708  by Sergio Guadarrama:

    Remove `disable_tf2` behavior from slim py_library targets

--
301208453  by Sergio Guadarrama:

    Automated refactoring to make code Python 3 compatible.

--
300816672  by Sergio Guadarrama:

    Internal change

299433840  by Sergio Guadarrama:

    Internal change

299221609  by Sergio Guadarrama:

    Explicitly disable Tensorflow v2 behaviors for all TF1.x binaries and tests

--
299179617  by Sergio Guadarrama:

    Internal change

299040784  by Sergio Guadarrama:

    Internal change

299036699  by Sergio Guadarrama:

    Internal change

298736510  by Sergio Guadarrama:

    Internal change

298732599  by Sergio Guadarrama:

    Internal change

298729507  by Sergio Guadarrama:

    Internal change

298253328  by Sergio Guadarrama:

    Internal change

297788346  by Sergio Guadarrama:

    Internal change

297785278  by Sergio Guadarrama:

    Internal change

297783127  by Sergio Guadarrama:

    Internal change

297725870  by Sergio Guadarrama:

    Internal change

297721811  by Sergio Guadarrama:

    Internal change

297711347  by Sergio Guadarrama:

    Internal change

297708059  by Sergio Guadarrama:

    Internal change

297701831  by Sergio Guadarrama:

    Internal change

297700038  by Sergio Guadarrama:

    Internal change

297670468  by Sergio Guadarrama:

    Internal change.

--
297350326  by Sergio Guadarrama:

    Explicitly replace "import tensorflow" with "tensorflow.compat.v1" for TF2.x migration

--
297201668  by Sergio Guadarrama:

    Explicitly replace "import tensorflow" with "tensorflow.compat.v1" for TF2.x migration

--
294483372  by Sergio Guadarrama:

    Internal change

PiperOrigin-RevId: 311933687

* Merged commit includes the following changes:
312578615  by Menglong Zhu:

    Modify the LSTM feature extractors to be python 3 compatible.

--
311264357  by Menglong Zhu:

    Removes contrib.slim

--
308957207  by Menglong Zhu:

    Automated refactoring to make code Python 3 compatible.

--
306976470  by yongzhe:

    Internal change

306777559  by Menglong Zhu:

    Internal change

--
299232507  by lzyuan:

    Internal update.

--
299221735  by lzyuan:

    Add small epsilon on max_range for quantize_op to prevent range collapse.

--

PiperOrigin-RevId: 312578615

* Merged commit includes the following changes:
310447280  by lzc:

    Internal changes.

--

PiperOrigin-RevId: 310447280
Co-authored-by: default avatarSergio Guadarrama <sguada@google.com>
Co-authored-by: default avatarMenglong Zhu <menglong@google.com>
parent 73b5be67
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for tf_sequence_example_decoder.py."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import numpy as np
import tensorflow.compat.v1 as tf
from object_detection.core import standard_fields as fields
from object_detection.data_decoders import tf_sequence_example_decoder
from object_detection.dataset_tools import seq_example_util
from object_detection.utils import test_case
class TfSequenceExampleDecoderTest(test_case.TestCase):
def _create_label_map(self, path):
label_map_text = """
item {
name: "dog"
id: 1
}
item {
name: "cat"
id: 2
}
item {
name: "panda"
id: 4
}
"""
with tf.gfile.Open(path, 'wb') as f:
f.write(label_map_text)
def _make_random_serialized_jpeg_images(self, num_frames, image_height,
image_width):
def graph_fn():
images = tf.cast(tf.random.uniform(
[num_frames, image_height, image_width, 3],
maxval=256,
dtype=tf.int32), dtype=tf.uint8)
images_list = tf.unstack(images, axis=0)
return [tf.io.encode_jpeg(image) for image in images_list]
encoded_images = self.execute(graph_fn, [])
return encoded_images
def test_decode_sequence_example(self):
num_frames = 4
image_height = 20
image_width = 30
expected_groundtruth_boxes = [
[[0.0, 0.0, 1.0, 1.0], [0.0, 0.0, 0.0, 0.0]],
[[0.2, 0.2, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0]],
[[0.0, 0.0, 1.0, 1.0], [0.1, 0.1, 0.2, 0.2]],
[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]]
]
expected_groundtruth_classes = [
[-1, -1],
[-1, 1],
[1, 2],
[-1, -1]
]
flds = fields.InputDataFields
encoded_images = self._make_random_serialized_jpeg_images(
num_frames, image_height, image_width)
def graph_fn():
label_map_proto_file = os.path.join(self.get_temp_dir(), 'labelmap.pbtxt')
self._create_label_map(label_map_proto_file)
decoder = tf_sequence_example_decoder.TfSequenceExampleDecoder(
label_map_proto_file=label_map_proto_file)
sequence_example_serialized = seq_example_util.make_sequence_example(
dataset_name='video_dataset',
video_id='video',
encoded_images=encoded_images,
image_height=image_height,
image_width=image_width,
image_format='JPEG',
image_source_ids=[str(i) for i in range(num_frames)],
is_annotated=[[1], [1], [1], [1]],
bboxes=[
[[0., 0., 1., 1.]], # Frame 0.
[[0.2, 0.2, 1., 1.],
[0., 0., 1., 1.]], # Frame 1.
[[0., 0., 1., 1.], # Frame 2.
[0.1, 0.1, 0.2, 0.2]],
[[]], # Frame 3.
],
label_strings=[
['fox'], # Frame 0. Fox will be filtered out.
['fox', 'dog'], # Frame 1. Fox will be filtered out.
['dog', 'cat'], # Frame 2.
[], # Frame 3
]).SerializeToString()
example_string_tensor = tf.convert_to_tensor(sequence_example_serialized)
return decoder.decode(example_string_tensor)
tensor_dict_out = self.execute(graph_fn, [])
self.assertAllClose(expected_groundtruth_boxes,
tensor_dict_out[flds.groundtruth_boxes])
self.assertAllEqual(expected_groundtruth_classes,
tensor_dict_out[flds.groundtruth_classes])
def test_decode_sequence_example_negative_clip(self):
num_frames = 4
image_height = 20
image_width = 30
expected_groundtruth_boxes = -1 * np.ones((4, 0, 4))
expected_groundtruth_classes = -1 * np.ones((4, 0))
flds = fields.InputDataFields
encoded_images = self._make_random_serialized_jpeg_images(
num_frames, image_height, image_width)
def graph_fn():
sequence_example_serialized = seq_example_util.make_sequence_example(
dataset_name='video_dataset',
video_id='video',
encoded_images=encoded_images,
image_height=image_height,
image_width=image_width,
image_format='JPEG',
image_source_ids=[str(i) for i in range(num_frames)],
bboxes=[
[[]],
[[]],
[[]],
[[]]
],
label_strings=[
[],
[],
[],
[]
]).SerializeToString()
example_string_tensor = tf.convert_to_tensor(sequence_example_serialized)
label_map_proto_file = os.path.join(self.get_temp_dir(), 'labelmap.pbtxt')
self._create_label_map(label_map_proto_file)
decoder = tf_sequence_example_decoder.TfSequenceExampleDecoder(
label_map_proto_file=label_map_proto_file)
return decoder.decode(example_string_tensor)
tensor_dict_out = self.execute(graph_fn, [])
self.assertAllClose(expected_groundtruth_boxes,
tensor_dict_out[flds.groundtruth_boxes])
self.assertAllEqual(expected_groundtruth_classes,
tensor_dict_out[flds.groundtruth_classes])
if __name__ == '__main__':
tf.test.main()
......@@ -40,7 +40,7 @@ import numpy as np
import PIL.Image
from pycocotools import mask
import tensorflow as tf
import tensorflow.compat.v1 as tf
from object_detection.dataset_tools import tf_record_creation_util
from object_detection.utils import dataset_util
......
......@@ -21,7 +21,7 @@ import os
import numpy as np
import PIL.Image
import six
import tensorflow as tf
import tensorflow.compat.v1 as tf
from object_detection.dataset_tools import create_coco_tf_record
......
......@@ -42,7 +42,7 @@ import os
import numpy as np
import PIL.Image as pil
import tensorflow as tf
import tensorflow.compat.v1 as tf
from object_detection.utils import dataset_util
from object_detection.utils import label_map_util
......
......@@ -20,7 +20,7 @@ import os
import numpy as np
import PIL.Image
import six
import tensorflow as tf
import tensorflow.compat.v1 as tf
from object_detection.dataset_tools import create_kitti_tf_record
......
......@@ -40,7 +40,7 @@ import os
import contextlib2
import pandas as pd
import tensorflow as tf
import tensorflow.compat.v1 as tf
from object_detection.dataset_tools import oid_tfrecord_creation
from object_detection.dataset_tools import tf_record_creation_util
......
......@@ -32,7 +32,7 @@ import os
from lxml import etree
import PIL.Image
import tensorflow as tf
import tensorflow.compat.v1 as tf
from object_detection.utils import dataset_util
from object_detection.utils import label_map_util
......
......@@ -20,7 +20,7 @@ import os
import numpy as np
import PIL.Image
import six
import tensorflow as tf
import tensorflow.compat.v1 as tf
from object_detection.dataset_tools import create_pascal_tf_record
......
......@@ -37,7 +37,7 @@ import contextlib2
from lxml import etree
import numpy as np
import PIL.Image
import tensorflow as tf
import tensorflow.compat.v1 as tf
from object_detection.dataset_tools import tf_record_creation_util
from object_detection.utils import dataset_util
......
......@@ -18,7 +18,7 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
import tensorflow.compat.v1 as tf
from object_detection.dataset_tools import oid_hierarchical_labels_expansion
......
......@@ -19,7 +19,7 @@ from __future__ import division
from __future__ import print_function
import six
import tensorflow as tf
import tensorflow.compat.v1 as tf
from object_detection.core import standard_fields
from object_detection.utils import dataset_util
......
......@@ -16,7 +16,7 @@
import pandas as pd
import six
import tensorflow as tf
import tensorflow.compat.v1 as tf
from object_detection.dataset_tools import oid_tfrecord_creation
......
# Lint as: python2, python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Common utility for object detection tf.train.SequenceExamples."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import tensorflow.compat.v1 as tf
def context_float_feature(ndarray):
"""Converts a numpy float array to a context float feature.
Args:
ndarray: A numpy float array.
Returns:
A context float feature.
"""
feature = tf.train.Feature()
for val in ndarray:
feature.float_list.value.append(val)
return feature
def context_int64_feature(ndarray):
"""Converts a numpy array to a context int64 feature.
Args:
ndarray: A numpy int64 array.
Returns:
A context int64 feature.
"""
feature = tf.train.Feature()
for val in ndarray:
feature.int64_list.value.append(val)
return feature
def context_bytes_feature(ndarray):
"""Converts a numpy bytes array to a context bytes feature.
Args:
ndarray: A numpy bytes array.
Returns:
A context bytes feature.
"""
feature = tf.train.Feature()
for val in ndarray:
if isinstance(val, np.ndarray):
val = val.tolist()
feature.bytes_list.value.append(tf.compat.as_bytes(val))
return feature
def sequence_float_feature(ndarray):
"""Converts a numpy float array to a sequence float feature.
Args:
ndarray: A numpy float array.
Returns:
A sequence float feature.
"""
feature_list = tf.train.FeatureList()
for row in ndarray:
feature = feature_list.feature.add()
if row.size:
feature.float_list.value[:] = row
return feature_list
def sequence_int64_feature(ndarray):
"""Converts a numpy int64 array to a sequence int64 feature.
Args:
ndarray: A numpy int64 array.
Returns:
A sequence int64 feature.
"""
feature_list = tf.train.FeatureList()
for row in ndarray:
feature = feature_list.feature.add()
if row.size:
feature.int64_list.value[:] = row
return feature_list
def sequence_bytes_feature(ndarray):
"""Converts a bytes float array to a sequence bytes feature.
Args:
ndarray: A numpy bytes array.
Returns:
A sequence bytes feature.
"""
feature_list = tf.train.FeatureList()
for row in ndarray:
if isinstance(row, np.ndarray):
row = row.tolist()
feature = feature_list.feature.add()
if row:
row = [tf.compat.as_bytes(val) for val in row]
feature.bytes_list.value[:] = row
return feature_list
def boxes_to_box_components(bboxes):
"""Converts a list of numpy arrays (boxes) to box components.
Args:
bboxes: A numpy array of bounding boxes.
Returns:
Bounding box component lists.
"""
ymin_list = []
xmin_list = []
ymax_list = []
xmax_list = []
for bbox in bboxes:
bbox = np.array(bbox).astype(np.float32)
ymin, xmin, ymax, xmax = np.split(bbox, 4, axis=1)
ymin_list.append(np.reshape(ymin, [-1]))
xmin_list.append(np.reshape(xmin, [-1]))
ymax_list.append(np.reshape(ymax, [-1]))
xmax_list.append(np.reshape(xmax, [-1]))
return ymin_list, xmin_list, ymax_list, xmax_list
def make_sequence_example(dataset_name,
video_id,
encoded_images,
image_height,
image_width,
image_format=None,
image_source_ids=None,
timestamps=None,
is_annotated=None,
bboxes=None,
label_strings=None,
detection_bboxes=None,
detection_classes=None,
detection_scores=None):
"""Constructs tf.SequenceExamples.
Args:
dataset_name: String with dataset name.
video_id: String with video id.
encoded_images: A [num_frames] list (or numpy array) of encoded image
frames.
image_height: Height of the images.
image_width: Width of the images.
image_format: Format of encoded images.
image_source_ids: (Optional) A [num_frames] list of unique string ids for
each image.
timestamps: (Optional) A [num_frames] list (or numpy array) array with image
timestamps.
is_annotated: (Optional) A [num_frames] list (or numpy array) array
in which each element indicates whether the frame has been annotated
(1) or not (0).
bboxes: (Optional) A list (with num_frames elements) of [num_boxes_i, 4]
numpy float32 arrays holding boxes for each frame.
label_strings: (Optional) A list (with num_frames_elements) of [num_boxes_i]
numpy string arrays holding object string labels for each frame.
detection_bboxes: (Optional) A list (with num_frames elements) of
[num_boxes_i, 4] numpy float32 arrays holding prediction boxes for each
frame.
detection_classes: (Optional) A list (with num_frames_elements) of
[num_boxes_i] numpy int64 arrays holding predicted classes for each frame.
detection_scores: (Optional) A list (with num_frames_elements) of
[num_boxes_i] numpy float32 arrays holding predicted object scores for
each frame.
Returns:
A tf.train.SequenceExample.
"""
num_frames = len(encoded_images)
image_encoded = np.expand_dims(encoded_images, axis=-1)
if timestamps is None:
timestamps = np.arange(num_frames)
image_timestamps = np.expand_dims(timestamps, axis=-1)
# Context fields.
context_dict = {
'example/dataset_name': context_bytes_feature([dataset_name]),
'clip/start/timestamp': context_int64_feature([image_timestamps[0][0]]),
'clip/end/timestamp': context_int64_feature([image_timestamps[-1][0]]),
'clip/frames': context_int64_feature([num_frames]),
'image/channels': context_int64_feature([3]),
'image/height': context_int64_feature([image_height]),
'image/width': context_int64_feature([image_width]),
'clip/media_id': context_bytes_feature([video_id])
}
# Sequence fields.
feature_list = {
'image/encoded': sequence_bytes_feature(image_encoded),
'image/timestamp': sequence_int64_feature(image_timestamps),
}
# Add optional fields.
if image_format is not None:
context_dict['image/format'] = context_bytes_feature([image_format])
if image_source_ids is not None:
feature_list['image/source_id'] = sequence_bytes_feature(image_source_ids)
if bboxes is not None:
bbox_ymin, bbox_xmin, bbox_ymax, bbox_xmax = boxes_to_box_components(bboxes)
feature_list['region/bbox/xmin'] = sequence_float_feature(bbox_xmin)
feature_list['region/bbox/xmax'] = sequence_float_feature(bbox_xmax)
feature_list['region/bbox/ymin'] = sequence_float_feature(bbox_ymin)
feature_list['region/bbox/ymax'] = sequence_float_feature(bbox_ymax)
if is_annotated is None:
is_annotated = np.ones(num_frames, dtype=np.int64)
is_annotated = np.expand_dims(is_annotated, axis=-1)
feature_list['region/is_annotated'] = sequence_int64_feature(is_annotated)
if label_strings is not None:
feature_list['region/label/string'] = sequence_bytes_feature(
label_strings)
if detection_bboxes is not None:
det_bbox_ymin, det_bbox_xmin, det_bbox_ymax, det_bbox_xmax = (
boxes_to_box_components(detection_bboxes))
feature_list['predicted/region/bbox/xmin'] = sequence_float_feature(
det_bbox_xmin)
feature_list['predicted/region/bbox/xmax'] = sequence_float_feature(
det_bbox_xmax)
feature_list['predicted/region/bbox/ymin'] = sequence_float_feature(
det_bbox_ymin)
feature_list['predicted/region/bbox/ymax'] = sequence_float_feature(
det_bbox_ymax)
if detection_classes is not None:
feature_list['predicted/region/label/index'] = sequence_int64_feature(
detection_classes)
if detection_scores is not None:
feature_list['predicted/region/label/confidence'] = sequence_float_feature(
detection_scores)
context = tf.train.Features(feature=context_dict)
feature_lists = tf.train.FeatureLists(feature_list=feature_list)
sequence_example = tf.train.SequenceExample(
context=context,
feature_lists=feature_lists)
return sequence_example
# Lint as: python2, python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.utils.seq_example_util."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import six
import tensorflow.compat.v1 as tf
from object_detection.dataset_tools import seq_example_util
class SeqExampleUtilTest(tf.test.TestCase):
def test_make_unlabeled_example(self):
num_frames = 5
image_height = 100
image_width = 200
dataset_name = b'unlabeled_dataset'
video_id = b'video_000'
images = tf.cast(tf.random.uniform(
[num_frames, image_height, image_width, 3],
maxval=256,
dtype=tf.int32), dtype=tf.uint8)
image_source_ids = [str(idx) for idx in range(num_frames)]
images_list = tf.unstack(images, axis=0)
encoded_images_list = [tf.io.encode_jpeg(image) for image in images_list]
with tf.Session() as sess:
encoded_images = sess.run(encoded_images_list)
seq_example = seq_example_util.make_sequence_example(
dataset_name=dataset_name,
video_id=video_id,
encoded_images=encoded_images,
image_height=image_height,
image_width=image_width,
image_format='JPEG',
image_source_ids=image_source_ids)
context_feature_dict = seq_example.context.feature
self.assertEqual(
dataset_name,
context_feature_dict['example/dataset_name'].bytes_list.value[0])
self.assertEqual(
0,
context_feature_dict['clip/start/timestamp'].int64_list.value[0])
self.assertEqual(
num_frames - 1,
context_feature_dict['clip/end/timestamp'].int64_list.value[0])
self.assertEqual(
num_frames,
context_feature_dict['clip/frames'].int64_list.value[0])
self.assertEqual(
3,
context_feature_dict['image/channels'].int64_list.value[0])
self.assertEqual(
b'JPEG',
context_feature_dict['image/format'].bytes_list.value[0])
self.assertEqual(
image_height,
context_feature_dict['image/height'].int64_list.value[0])
self.assertEqual(
image_width,
context_feature_dict['image/width'].int64_list.value[0])
self.assertEqual(
video_id,
context_feature_dict['clip/media_id'].bytes_list.value[0])
seq_feature_dict = seq_example.feature_lists.feature_list
self.assertLen(
seq_feature_dict['image/encoded'].feature[:],
num_frames)
timestamps = [
feature.int64_list.value[0] for feature
in seq_feature_dict['image/timestamp'].feature]
self.assertAllEqual(list(range(num_frames)), timestamps)
source_ids = [
feature.bytes_list.value[0] for feature
in seq_feature_dict['image/source_id'].feature]
self.assertAllEqual(
[six.ensure_binary(str(idx)) for idx in range(num_frames)],
source_ids)
def test_make_labeled_example(self):
num_frames = 2
image_height = 100
image_width = 200
dataset_name = b'unlabeled_dataset'
video_id = b'video_000'
labels = [b'dog', b'cat']
images = tf.cast(tf.random.uniform(
[num_frames, image_height, image_width, 3],
maxval=256,
dtype=tf.int32), dtype=tf.uint8)
images_list = tf.unstack(images, axis=0)
encoded_images_list = [tf.io.encode_jpeg(image) for image in images_list]
with tf.Session() as sess:
encoded_images = sess.run(encoded_images_list)
timestamps = [100000, 110000]
is_annotated = [1, 0]
bboxes = [
np.array([[0., 0., 0., 0.],
[0., 0., 1., 1.]], dtype=np.float32),
np.zeros([0, 4], dtype=np.float32)
]
label_strings = [
np.array(labels),
np.array([])
]
seq_example = seq_example_util.make_sequence_example(
dataset_name=dataset_name,
video_id=video_id,
encoded_images=encoded_images,
image_height=image_height,
image_width=image_width,
timestamps=timestamps,
is_annotated=is_annotated,
bboxes=bboxes,
label_strings=label_strings)
context_feature_dict = seq_example.context.feature
self.assertEqual(
dataset_name,
context_feature_dict['example/dataset_name'].bytes_list.value[0])
self.assertEqual(
timestamps[0],
context_feature_dict['clip/start/timestamp'].int64_list.value[0])
self.assertEqual(
timestamps[-1],
context_feature_dict['clip/end/timestamp'].int64_list.value[0])
self.assertEqual(
num_frames,
context_feature_dict['clip/frames'].int64_list.value[0])
seq_feature_dict = seq_example.feature_lists.feature_list
self.assertLen(
seq_feature_dict['image/encoded'].feature[:],
num_frames)
actual_timestamps = [
feature.int64_list.value[0] for feature
in seq_feature_dict['image/timestamp'].feature]
self.assertAllEqual(timestamps, actual_timestamps)
# Frame 0.
self.assertAllEqual(
is_annotated[0],
seq_feature_dict['region/is_annotated'].feature[0].int64_list.value[0])
self.assertAllClose(
[0., 0.],
seq_feature_dict['region/bbox/ymin'].feature[0].float_list.value[:])
self.assertAllClose(
[0., 0.],
seq_feature_dict['region/bbox/xmin'].feature[0].float_list.value[:])
self.assertAllClose(
[0., 1.],
seq_feature_dict['region/bbox/ymax'].feature[0].float_list.value[:])
self.assertAllClose(
[0., 1.],
seq_feature_dict['region/bbox/xmax'].feature[0].float_list.value[:])
self.assertAllEqual(
labels,
seq_feature_dict['region/label/string'].feature[0].bytes_list.value[:])
# Frame 1.
self.assertAllEqual(
is_annotated[1],
seq_feature_dict['region/is_annotated'].feature[1].int64_list.value[0])
self.assertAllClose(
[],
seq_feature_dict['region/bbox/ymin'].feature[1].float_list.value[:])
self.assertAllClose(
[],
seq_feature_dict['region/bbox/xmin'].feature[1].float_list.value[:])
self.assertAllClose(
[],
seq_feature_dict['region/bbox/ymax'].feature[1].float_list.value[:])
self.assertAllClose(
[],
seq_feature_dict['region/bbox/xmax'].feature[1].float_list.value[:])
self.assertAllEqual(
[],
seq_feature_dict['region/label/string'].feature[1].bytes_list.value[:])
def test_make_labeled_example_with_predictions(self):
num_frames = 2
image_height = 100
image_width = 200
dataset_name = b'unlabeled_dataset'
video_id = b'video_000'
images = tf.cast(tf.random.uniform(
[num_frames, image_height, image_width, 3],
maxval=256,
dtype=tf.int32), dtype=tf.uint8)
images_list = tf.unstack(images, axis=0)
encoded_images_list = [tf.io.encode_jpeg(image) for image in images_list]
with tf.Session() as sess:
encoded_images = sess.run(encoded_images_list)
bboxes = [
np.array([[0., 0., 0.75, 0.75],
[0., 0., 1., 1.]], dtype=np.float32),
np.array([[0., 0.25, 0.5, 0.75]], dtype=np.float32)
]
label_strings = [
np.array(['cat', 'frog']),
np.array(['cat'])
]
detection_bboxes = [
np.array([[0., 0., 0.75, 0.75]], dtype=np.float32),
np.zeros([0, 4], dtype=np.float32)
]
detection_classes = [
np.array([5], dtype=np.int64),
np.array([], dtype=np.int64)
]
detection_scores = [
np.array([0.9], dtype=np.float32),
np.array([], dtype=np.float32)
]
seq_example = seq_example_util.make_sequence_example(
dataset_name=dataset_name,
video_id=video_id,
encoded_images=encoded_images,
image_height=image_height,
image_width=image_width,
bboxes=bboxes,
label_strings=label_strings,
detection_bboxes=detection_bboxes,
detection_classes=detection_classes,
detection_scores=detection_scores)
context_feature_dict = seq_example.context.feature
self.assertEqual(
dataset_name,
context_feature_dict['example/dataset_name'].bytes_list.value[0])
self.assertEqual(
0,
context_feature_dict['clip/start/timestamp'].int64_list.value[0])
self.assertEqual(
1,
context_feature_dict['clip/end/timestamp'].int64_list.value[0])
self.assertEqual(
num_frames,
context_feature_dict['clip/frames'].int64_list.value[0])
seq_feature_dict = seq_example.feature_lists.feature_list
self.assertLen(
seq_feature_dict['image/encoded'].feature[:],
num_frames)
actual_timestamps = [
feature.int64_list.value[0] for feature
in seq_feature_dict['image/timestamp'].feature]
self.assertAllEqual([0, 1], actual_timestamps)
# Frame 0.
self.assertAllEqual(
1,
seq_feature_dict['region/is_annotated'].feature[0].int64_list.value[0])
self.assertAllClose(
[0., 0.],
seq_feature_dict['region/bbox/ymin'].feature[0].float_list.value[:])
self.assertAllClose(
[0., 0.],
seq_feature_dict['region/bbox/xmin'].feature[0].float_list.value[:])
self.assertAllClose(
[0.75, 1.],
seq_feature_dict['region/bbox/ymax'].feature[0].float_list.value[:])
self.assertAllClose(
[0.75, 1.],
seq_feature_dict['region/bbox/xmax'].feature[0].float_list.value[:])
self.assertAllEqual(
['cat', 'frog'],
seq_feature_dict['region/label/string'].feature[0].bytes_list.value[:])
self.assertAllClose(
[0.],
seq_feature_dict[
'predicted/region/bbox/ymin'].feature[0].float_list.value[:])
self.assertAllClose(
[0.],
seq_feature_dict[
'predicted/region/bbox/xmin'].feature[0].float_list.value[:])
self.assertAllClose(
[0.75],
seq_feature_dict[
'predicted/region/bbox/ymax'].feature[0].float_list.value[:])
self.assertAllClose(
[0.75],
seq_feature_dict[
'predicted/region/bbox/xmax'].feature[0].float_list.value[:])
self.assertAllEqual(
[5],
seq_feature_dict[
'predicted/region/label/index'].feature[0].int64_list.value[:])
self.assertAllClose(
[0.9],
seq_feature_dict[
'predicted/region/label/confidence'].feature[0].float_list.value[:])
# Frame 1.
self.assertAllEqual(
1,
seq_feature_dict['region/is_annotated'].feature[1].int64_list.value[0])
self.assertAllClose(
[0.0],
seq_feature_dict['region/bbox/ymin'].feature[1].float_list.value[:])
self.assertAllClose(
[0.25],
seq_feature_dict['region/bbox/xmin'].feature[1].float_list.value[:])
self.assertAllClose(
[0.5],
seq_feature_dict['region/bbox/ymax'].feature[1].float_list.value[:])
self.assertAllClose(
[0.75],
seq_feature_dict['region/bbox/xmax'].feature[1].float_list.value[:])
self.assertAllEqual(
['cat'],
seq_feature_dict['region/label/string'].feature[1].bytes_list.value[:])
self.assertAllClose(
[],
seq_feature_dict[
'predicted/region/bbox/ymin'].feature[1].float_list.value[:])
self.assertAllClose(
[],
seq_feature_dict[
'predicted/region/bbox/xmin'].feature[1].float_list.value[:])
self.assertAllClose(
[],
seq_feature_dict[
'predicted/region/bbox/ymax'].feature[1].float_list.value[:])
self.assertAllClose(
[],
seq_feature_dict[
'predicted/region/bbox/xmax'].feature[1].float_list.value[:])
self.assertAllEqual(
[],
seq_feature_dict[
'predicted/region/label/index'].feature[1].int64_list.value[:])
self.assertAllClose(
[],
seq_feature_dict[
'predicted/region/label/confidence'].feature[1].float_list.value[:])
if __name__ == '__main__':
tf.test.main()
......@@ -20,7 +20,7 @@ from __future__ import division
from __future__ import print_function
from six.moves import range
import tensorflow as tf
import tensorflow.compat.v1 as tf
def open_sharded_output_tfrecords(exit_stack, base_path, num_shards):
......
......@@ -23,7 +23,7 @@ import os
import contextlib2
import six
from six.moves import range
import tensorflow as tf
import tensorflow.compat.v1 as tf
from object_detection.dataset_tools import tf_record_creation_util
......
......@@ -24,9 +24,10 @@ import time
import numpy as np
from six.moves import range
import tensorflow as tf
import tensorflow.compat.v1 as tf
import tf_slim as slim
from tensorflow.contrib import slim
from object_detection.core import box_list
from object_detection.core import box_list_ops
from object_detection.core import keypoint_ops
......@@ -553,11 +554,15 @@ def _resize_detection_masks(args):
def _resize_groundtruth_masks(args):
mask, image_shape = args
"""Resizes groundgtruth masks to the original image size."""
mask, true_image_shape, original_image_shape = args
true_height = true_image_shape[0]
true_width = true_image_shape[1]
mask = mask[:, :true_height, :true_width]
mask = tf.expand_dims(mask, 3)
mask = tf.image.resize_images(
mask,
image_shape,
original_image_shape,
method=tf.image.ResizeMethod.NEAREST_NEIGHBOR,
align_corners=True)
return tf.cast(tf.squeeze(mask, 3), tf.uint8)
......@@ -689,7 +694,7 @@ def result_dict_for_batched_example(images,
Args:
images: A single 4D uint8 image tensor of shape [batch_size, H, W, C].
keys: A [batch_size] string tensor with image identifier.
keys: A [batch_size] string/int tensor with image identifier.
detections: A dictionary of detections, returned from
DetectionModel.postprocess().
groundtruth: (Optional) Dictionary of groundtruth items, with fields:
......@@ -711,6 +716,8 @@ def result_dict_for_batched_example(images,
2] float32 tensor with keypoints (Optional).
'groundtruth_keypoint_visibilities': [batch_size, max_number_of_boxes,
num_keypoints] bool tensor with keypoint visibilities (Optional).
'groundtruth_labeled_classes': [batch_size, num_classes] int64
tensor of 1-indexed classes. (Optional)
class_agnostic: Boolean indicating whether the detections are class-agnostic
(i.e. binary). Default False.
scale_to_absolute: Boolean indicating whether boxes and keypoints should be
......@@ -762,6 +769,8 @@ def result_dict_for_batched_example(images,
tensor with keypoints (Optional).
'groundtruth_keypoint_visibilities': [batch_size, num_boxes, num_keypoints]
bool tensor with keypoint visibilities (Optional).
'groundtruth_labeled_classes': [batch_size, num_classes] int64 tensor
of 1-indexed classes. (Optional)
'num_groundtruth_boxes': [batch_size] tensor containing the maximum number
of groundtruth boxes per image.
......@@ -871,7 +880,7 @@ def result_dict_for_batched_example(images,
groundtruth[input_data_fields.groundtruth_instance_masks] = (
shape_utils.static_or_dynamic_map_fn(
_resize_groundtruth_masks,
elems=[masks, original_image_spatial_shapes],
elems=[masks, true_image_shapes, original_image_spatial_shapes],
dtype=tf.uint8))
output_dict.update(groundtruth)
......
......@@ -23,7 +23,7 @@ from absl.testing import parameterized
import numpy as np
import six
from six.moves import range
import tensorflow as tf
import tensorflow.compat.v1 as tf
from object_detection import eval_util
from object_detection.core import standard_fields as fields
......
......@@ -104,7 +104,7 @@ python export_inference_graph.py \
} \
}"
"""
import tensorflow as tf
import tensorflow.compat.v1 as tf
from google.protobuf import text_format
from object_detection import exporter
from object_detection.protos import pipeline_pb2
......
......@@ -92,7 +92,7 @@ python object_detection/export_tflite_ssd_graph.py \
"
"""
import tensorflow as tf
import tensorflow.compat.v1 as tf
from google.protobuf import text_format
from object_detection import export_tflite_ssd_graph_lib
from object_detection.protos import pipeline_pb2
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment