Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
2e9bb539
Commit
2e9bb539
authored
Feb 25, 2021
by
stephenwu
Browse files
Merge branch 'master' of
https://github.com/tensorflow/models
into RTESuperGLUE
parents
7bae5317
8fba84f8
Changes
121
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1458 additions
and
76 deletions
+1458
-76
official/vision/beta/projects/yolo/ops/box_ops.py
official/vision/beta/projects/yolo/ops/box_ops.py
+297
-0
official/vision/beta/projects/yolo/ops/box_ops_test.py
official/vision/beta/projects/yolo/ops/box_ops_test.py
+56
-0
official/vision/beta/projects/yolo/ops/preprocess_ops.py
official/vision/beta/projects/yolo/ops/preprocess_ops.py
+524
-0
official/vision/beta/projects/yolo/ops/preprocess_ops_test.py
...cial/vision/beta/projects/yolo/ops/preprocess_ops_test.py
+67
-0
official/vision/beta/serving/detection.py
official/vision/beta/serving/detection.py
+9
-8
official/vision/beta/serving/detection_test.py
official/vision/beta/serving/detection_test.py
+3
-2
official/vision/beta/serving/export_base.py
official/vision/beta/serving/export_base.py
+72
-25
official/vision/beta/serving/export_saved_model_lib.py
official/vision/beta/serving/export_saved_model_lib.py
+45
-20
official/vision/beta/serving/export_tfhub.py
official/vision/beta/serving/export_tfhub.py
+101
-0
official/vision/beta/serving/image_classification.py
official/vision/beta/serving/image_classification.py
+3
-2
official/vision/beta/tasks/image_classification.py
official/vision/beta/tasks/image_classification.py
+4
-2
official/vision/beta/tasks/maskrcnn.py
official/vision/beta/tasks/maskrcnn.py
+2
-2
official/vision/beta/tasks/retinanet.py
official/vision/beta/tasks/retinanet.py
+2
-2
official/vision/beta/tasks/semantic_segmentation.py
official/vision/beta/tasks/semantic_segmentation.py
+19
-11
official/vision/beta/tasks/video_classification.py
official/vision/beta/tasks/video_classification.py
+2
-2
research/delf/delf/python/normalization_layers/__init__.py
research/delf/delf/python/normalization_layers/__init__.py
+14
-0
research/delf/delf/python/normalization_layers/normalization.py
...ch/delf/delf/python/normalization_layers/normalization.py
+40
-0
research/delf/delf/python/normalization_layers/normalization_test.py
...lf/delf/python/normalization_layers/normalization_test.py
+36
-0
research/delf/delf/python/pooling_layers/__init__.py
research/delf/delf/python/pooling_layers/__init__.py
+14
-0
research/delf/delf/python/pooling_layers/pooling.py
research/delf/delf/python/pooling_layers/pooling.py
+148
-0
No files found.
official/vision/beta/projects/yolo/ops/box_ops.py
0 → 100644
View file @
2e9bb539
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Bounding box utils."""
import
math
import
tensorflow
as
tf
def
yxyx_to_xcycwh
(
box
:
tf
.
Tensor
):
"""Converts boxes from ymin, xmin, ymax, xmax.
to x_center, y_center, width, height.
Args:
box: `Tensor` whose shape is [..., 4] and represents the coordinates
of boxes in ymin, xmin, ymax, xmax.
Returns:
`Tensor` whose shape is [..., 4] and contains the new format.
Raises:
ValueError: If the last dimension of box is not 4 or if box's dtype isn't
a floating point type.
"""
with
tf
.
name_scope
(
'yxyx_to_xcycwh'
):
ymin
,
xmin
,
ymax
,
xmax
=
tf
.
split
(
box
,
4
,
axis
=-
1
)
x_center
=
(
xmax
+
xmin
)
/
2
y_center
=
(
ymax
+
ymin
)
/
2
width
=
xmax
-
xmin
height
=
ymax
-
ymin
box
=
tf
.
concat
([
x_center
,
y_center
,
width
,
height
],
axis
=-
1
)
return
box
def
xcycwh_to_yxyx
(
box
:
tf
.
Tensor
,
split_min_max
:
bool
=
False
):
"""Converts boxes from x_center, y_center, width, height.
to ymin, xmin, ymax, xmax.
Args:
box: a `Tensor` whose shape is [..., 4] and represents the coordinates
of boxes in x_center, y_center, width, height.
split_min_max: bool, whether or not to split x, y min and max values.
Returns:
box: a `Tensor` whose shape is [..., 4] and contains the new format.
Raises:
ValueError: If the last dimension of box is not 4 or if box's dtype isn't
a floating point type.
"""
with
tf
.
name_scope
(
'xcycwh_to_yxyx'
):
xy
,
wh
=
tf
.
split
(
box
,
2
,
axis
=-
1
)
xy_min
=
xy
-
wh
/
2
xy_max
=
xy
+
wh
/
2
x_min
,
y_min
=
tf
.
split
(
xy_min
,
2
,
axis
=-
1
)
x_max
,
y_max
=
tf
.
split
(
xy_max
,
2
,
axis
=-
1
)
box
=
tf
.
concat
([
y_min
,
x_min
,
y_max
,
x_max
],
axis
=-
1
)
if
split_min_max
:
box
=
tf
.
split
(
box
,
2
,
axis
=-
1
)
return
box
def
xcycwh_to_xyxy
(
box
:
tf
.
Tensor
,
split_min_max
:
bool
=
False
):
"""Converts boxes from x_center, y_center, width, height to.
xmin, ymin, xmax, ymax.
Args:
box: box: a `Tensor` whose shape is [..., 4] and represents the
coordinates of boxes in x_center, y_center, width, height.
split_min_max: bool, whether or not to split x, y min and max values.
Returns:
box: a `Tensor` whose shape is [..., 4] and contains the new format.
Raises:
ValueError: If the last dimension of box is not 4 or if box's dtype isn't
a floating point type.
"""
with
tf
.
name_scope
(
'xcycwh_to_yxyx'
):
xy
,
wh
=
tf
.
split
(
box
,
2
,
axis
=-
1
)
xy_min
=
xy
-
wh
/
2
xy_max
=
xy
+
wh
/
2
box
=
(
xy_min
,
xy_max
)
if
not
split_min_max
:
box
=
tf
.
concat
(
box
,
axis
=-
1
)
return
box
def
center_distance
(
center_1
:
tf
.
Tensor
,
center_2
:
tf
.
Tensor
):
"""Calculates the squared distance between two points.
This function is mathematically equivalent to the following code, but has
smaller rounding errors.
tf.norm(center_1 - center_2, axis=-1)**2
Args:
center_1: a `Tensor` whose shape is [..., 2] and represents a point.
center_2: a `Tensor` whose shape is [..., 2] and represents a point.
Returns:
dist: a `Tensor` whose shape is [...] and value represents the squared
distance between center_1 and center_2.
Raises:
ValueError: If the last dimension of either center_1 or center_2 is not 2.
"""
with
tf
.
name_scope
(
'center_distance'
):
dist
=
(
center_1
[...,
0
]
-
center_2
[...,
0
])
**
2
+
(
center_1
[...,
1
]
-
center_2
[...,
1
])
**
2
return
dist
def
compute_iou
(
box1
,
box2
,
yxyx
=
False
):
"""Calculates the intersection of union between box1 and box2.
Args:
box1: a `Tensor` whose shape is [..., 4] and represents the coordinates of
boxes in x_center, y_center, width, height.
box2: a `Tensor` whose shape is [..., 4] and represents the coordinates of
boxes in x_center, y_center, width, height.
yxyx: `bool`, whether or not box1, and box2 are in yxyx format.
Returns:
iou: a `Tensor` whose shape is [...] and value represents the intersection
over union.
Raises:
ValueError: If the last dimension of either box1 or box2 is not 4.
"""
# Get box corners
with
tf
.
name_scope
(
'iou'
):
if
not
yxyx
:
box1
=
xcycwh_to_yxyx
(
box1
)
box2
=
xcycwh_to_yxyx
(
box2
)
b1mi
,
b1ma
=
tf
.
split
(
box1
,
2
,
axis
=-
1
)
b2mi
,
b2ma
=
tf
.
split
(
box2
,
2
,
axis
=-
1
)
intersect_mins
=
tf
.
math
.
maximum
(
b1mi
,
b2mi
)
intersect_maxes
=
tf
.
math
.
minimum
(
b1ma
,
b2ma
)
intersect_wh
=
tf
.
math
.
maximum
(
intersect_maxes
-
intersect_mins
,
tf
.
zeros_like
(
intersect_mins
))
intersection
=
tf
.
reduce_prod
(
intersect_wh
,
axis
=-
1
)
# intersect_wh[..., 0] * intersect_wh[..., 1]
box1_area
=
tf
.
math
.
abs
(
tf
.
reduce_prod
(
b1ma
-
b1mi
,
axis
=-
1
))
box2_area
=
tf
.
math
.
abs
(
tf
.
reduce_prod
(
b2ma
-
b2mi
,
axis
=-
1
))
union
=
box1_area
+
box2_area
-
intersection
iou
=
intersection
/
(
union
+
1e-7
)
iou
=
tf
.
clip_by_value
(
iou
,
clip_value_min
=
0.0
,
clip_value_max
=
1.0
)
return
iou
def
compute_giou
(
box1
,
box2
):
"""Calculates the generalized intersection of union between box1 and box2.
Args:
box1: a `Tensor` whose shape is [..., 4] and represents the coordinates of
boxes in x_center, y_center, width, height.
box2: a `Tensor` whose shape is [..., 4] and represents the coordinates of
boxes in x_center, y_center, width, height.
Returns:
iou: a `Tensor` whose shape is [...] and value represents the generalized
intersection over union.
Raises:
ValueError: If the last dimension of either box1 or box2 is not 4.
"""
with
tf
.
name_scope
(
'giou'
):
# get box corners
box1
=
xcycwh_to_yxyx
(
box1
)
box2
=
xcycwh_to_yxyx
(
box2
)
# compute IOU
intersect_mins
=
tf
.
math
.
maximum
(
box1
[...,
0
:
2
],
box2
[...,
0
:
2
])
intersect_maxes
=
tf
.
math
.
minimum
(
box1
[...,
2
:
4
],
box2
[...,
2
:
4
])
intersect_wh
=
tf
.
math
.
maximum
(
intersect_maxes
-
intersect_mins
,
tf
.
zeros_like
(
intersect_mins
))
intersection
=
intersect_wh
[...,
0
]
*
intersect_wh
[...,
1
]
box1_area
=
tf
.
math
.
abs
(
tf
.
reduce_prod
(
box1
[...,
2
:
4
]
-
box1
[...,
0
:
2
],
axis
=-
1
))
box2_area
=
tf
.
math
.
abs
(
tf
.
reduce_prod
(
box2
[...,
2
:
4
]
-
box2
[...,
0
:
2
],
axis
=-
1
))
union
=
box1_area
+
box2_area
-
intersection
iou
=
tf
.
math
.
divide_no_nan
(
intersection
,
union
)
iou
=
tf
.
clip_by_value
(
iou
,
clip_value_min
=
0.0
,
clip_value_max
=
1.0
)
# find the smallest box to encompase both box1 and box2
c_mins
=
tf
.
math
.
minimum
(
box1
[...,
0
:
2
],
box2
[...,
0
:
2
])
c_maxes
=
tf
.
math
.
maximum
(
box1
[...,
2
:
4
],
box2
[...,
2
:
4
])
c
=
tf
.
math
.
abs
(
tf
.
reduce_prod
(
c_mins
-
c_maxes
,
axis
=-
1
))
# compute giou
giou
=
iou
-
tf
.
math
.
divide_no_nan
((
c
-
union
),
c
)
return
iou
,
giou
def
compute_diou
(
box1
,
box2
):
"""Calculates the distance intersection of union between box1 and box2.
Args:
box1: a `Tensor` whose shape is [..., 4] and represents the coordinates of
boxes in x_center, y_center, width, height.
box2: a `Tensor` whose shape is [..., 4] and represents the coordinates of
boxes in x_center, y_center, width, height.
Returns:
iou: a `Tensor` whose shape is [...] and value represents the distance
intersection over union.
Raises:
ValueError: If the last dimension of either box1 or box2 is not 4.
"""
with
tf
.
name_scope
(
'diou'
):
# compute center distance
dist
=
center_distance
(
box1
[...,
0
:
2
],
box2
[...,
0
:
2
])
# get box corners
box1
=
xcycwh_to_yxyx
(
box1
)
box2
=
xcycwh_to_yxyx
(
box2
)
# compute IOU
intersect_mins
=
tf
.
math
.
maximum
(
box1
[...,
0
:
2
],
box2
[...,
0
:
2
])
intersect_maxes
=
tf
.
math
.
minimum
(
box1
[...,
2
:
4
],
box2
[...,
2
:
4
])
intersect_wh
=
tf
.
math
.
maximum
(
intersect_maxes
-
intersect_mins
,
tf
.
zeros_like
(
intersect_mins
))
intersection
=
intersect_wh
[...,
0
]
*
intersect_wh
[...,
1
]
box1_area
=
tf
.
math
.
abs
(
tf
.
reduce_prod
(
box1
[...,
2
:
4
]
-
box1
[...,
0
:
2
],
axis
=-
1
))
box2_area
=
tf
.
math
.
abs
(
tf
.
reduce_prod
(
box2
[...,
2
:
4
]
-
box2
[...,
0
:
2
],
axis
=-
1
))
union
=
box1_area
+
box2_area
-
intersection
iou
=
tf
.
math
.
divide_no_nan
(
intersection
,
union
)
iou
=
tf
.
clip_by_value
(
iou
,
clip_value_min
=
0.0
,
clip_value_max
=
1.0
)
# compute max diagnal of the smallest enclosing box
c_mins
=
tf
.
math
.
minimum
(
box1
[...,
0
:
2
],
box2
[...,
0
:
2
])
c_maxes
=
tf
.
math
.
maximum
(
box1
[...,
2
:
4
],
box2
[...,
2
:
4
])
diag_dist
=
tf
.
reduce_sum
((
c_maxes
-
c_mins
)
**
2
,
axis
=-
1
)
regularization
=
tf
.
math
.
divide_no_nan
(
dist
,
diag_dist
)
diou
=
iou
+
regularization
return
iou
,
diou
def
compute_ciou
(
box1
,
box2
):
"""Calculates the complete intersection of union between box1 and box2.
Args:
box1: a `Tensor` whose shape is [..., 4] and represents the coordinates
of boxes in x_center, y_center, width, height.
box2: a `Tensor` whose shape is [..., 4] and represents the coordinates of
boxes in x_center, y_center, width, height.
Returns:
iou: a `Tensor` whose shape is [...] and value represents the complete
intersection over union.
Raises:
ValueError: If the last dimension of either box1 or box2 is not 4.
"""
with
tf
.
name_scope
(
'ciou'
):
# compute DIOU and IOU
iou
,
diou
=
compute_diou
(
box1
,
box2
)
# computer aspect ratio consistency
arcterm
=
(
tf
.
math
.
atan
(
tf
.
math
.
divide_no_nan
(
box1
[...,
2
],
box1
[...,
3
]))
-
tf
.
math
.
atan
(
tf
.
math
.
divide_no_nan
(
box2
[...,
2
],
box2
[...,
3
])))
**
2
v
=
4
*
arcterm
/
(
math
.
pi
)
**
2
# compute IOU regularization
a
=
tf
.
math
.
divide_no_nan
(
v
,
((
1
-
iou
)
+
v
))
ciou
=
diou
+
v
*
a
return
iou
,
ciou
official/vision/beta/projects/yolo/ops/box_ops_test.py
0 → 100644
View file @
2e9bb539
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
from
absl.testing
import
parameterized
import
numpy
as
np
import
tensorflow
as
tf
from
official.vision.beta.projects.yolo.ops
import
box_ops
class
InputUtilsTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
((
1
),
(
4
))
def
test_box_conversions
(
self
,
num_boxes
):
boxes
=
tf
.
convert_to_tensor
(
np
.
random
.
rand
(
num_boxes
,
4
))
expected_shape
=
np
.
array
([
num_boxes
,
4
])
xywh_box
=
box_ops
.
yxyx_to_xcycwh
(
boxes
)
yxyx_box
=
box_ops
.
xcycwh_to_yxyx
(
boxes
)
xyxy_box
=
box_ops
.
xcycwh_to_xyxy
(
boxes
)
self
.
assertAllEqual
(
tf
.
shape
(
xywh_box
).
numpy
(),
expected_shape
)
self
.
assertAllEqual
(
tf
.
shape
(
yxyx_box
).
numpy
(),
expected_shape
)
self
.
assertAllEqual
(
tf
.
shape
(
xyxy_box
).
numpy
(),
expected_shape
)
@
parameterized
.
parameters
((
1
),
(
5
),
(
7
))
def
test_ious
(
self
,
num_boxes
):
boxes
=
tf
.
convert_to_tensor
(
np
.
random
.
rand
(
num_boxes
,
4
))
expected_shape
=
np
.
array
([
num_boxes
,
])
expected_iou
=
np
.
ones
([
num_boxes
,
])
iou
=
box_ops
.
compute_iou
(
boxes
,
boxes
)
_
,
giou
=
box_ops
.
compute_giou
(
boxes
,
boxes
)
_
,
ciou
=
box_ops
.
compute_ciou
(
boxes
,
boxes
)
_
,
diou
=
box_ops
.
compute_diou
(
boxes
,
boxes
)
self
.
assertAllEqual
(
tf
.
shape
(
iou
).
numpy
(),
expected_shape
)
self
.
assertArrayNear
(
iou
,
expected_iou
,
0.001
)
self
.
assertArrayNear
(
giou
,
expected_iou
,
0.001
)
self
.
assertArrayNear
(
ciou
,
expected_iou
,
0.001
)
self
.
assertArrayNear
(
diou
,
expected_iou
,
0.001
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/beta/projects/yolo/ops/preprocess_ops.py
0 → 100644
View file @
2e9bb539
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Yolo preprocess ops."""
import
tensorflow
as
tf
import
tensorflow_addons
as
tfa
from
official.vision.beta.projects.yolo.ops
import
box_ops
def
resize_crop_filter
(
image
,
boxes
,
default_width
,
default_height
,
target_width
,
target_height
):
"""Apply zooming to the image and boxes.
Args:
image: a `Tensor` representing the image.
boxes: a `Tensor` represeting the boxes.
default_width: a `Tensor` representing the width of the image.
default_height: a `Tensor` representing the height of the image.
target_width: a `Tensor` representing the desired width of the image.
target_height: a `Tensor` representing the desired height of the image.
Returns:
images: a `Tensor` representing the augmented image.
boxes: a `Tensor` representing the augmented boxes.
"""
with
tf
.
name_scope
(
'resize_crop_filter'
):
image
=
tf
.
image
.
resize
(
image
,
(
target_width
,
target_height
))
image
=
tf
.
image
.
resize_with_crop_or_pad
(
image
,
target_height
=
default_height
,
target_width
=
default_width
)
default_width
=
tf
.
cast
(
default_width
,
boxes
.
dtype
)
default_height
=
tf
.
cast
(
default_height
,
boxes
.
dtype
)
target_width
=
tf
.
cast
(
target_width
,
boxes
.
dtype
)
target_height
=
tf
.
cast
(
target_height
,
boxes
.
dtype
)
aspect_change_width
=
target_width
/
default_width
aspect_change_height
=
target_height
/
default_height
x
,
y
,
width
,
height
=
tf
.
split
(
boxes
,
4
,
axis
=-
1
)
x
=
(
x
-
0.5
)
*
target_width
/
default_width
+
0.5
y
=
(
y
-
0.5
)
*
target_height
/
default_height
+
0.5
width
=
width
*
aspect_change_width
height
=
height
*
aspect_change_height
boxes
=
tf
.
concat
([
x
,
y
,
width
,
height
],
axis
=-
1
)
return
image
,
boxes
def
random_translate
(
image
,
box
,
t
,
seed
=
None
):
"""Randomly translate the image and boxes.
Args:
image: a `Tensor` representing the image.
box: a `Tensor` represeting the boxes.
t: an `int` representing the translation factor
seed: an optional seed for tf.random operations
Returns:
image: a `Tensor` representing the augmented image.
box: a `Tensor` representing the augmented boxes.
"""
t_x
=
tf
.
random
.
uniform
(
minval
=-
t
,
maxval
=
t
,
shape
=
(),
dtype
=
tf
.
float32
,
seed
=
seed
)
t_y
=
tf
.
random
.
uniform
(
minval
=-
t
,
maxval
=
t
,
shape
=
(),
dtype
=
tf
.
float32
,
seed
=
seed
)
box
=
translate_boxes
(
box
,
t_x
,
t_y
)
image
=
translate_image
(
image
,
t_x
,
t_y
)
return
image
,
box
def
translate_boxes
(
box
,
translate_x
,
translate_y
):
"""Randomly translate the boxes.
Args:
box: a `Tensor` represeitng the boxes.
translate_x: a `Tensor` represting the translation on the x-axis.
translate_y: a `Tensor` represting the translation on the y-axis.
Returns:
box: a `Tensor` representing the augmented boxes.
"""
with
tf
.
name_scope
(
'translate_boxs'
):
x
=
box
[...,
0
]
+
translate_x
y
=
box
[...,
1
]
+
translate_y
box
=
tf
.
stack
([
x
,
y
,
box
[...,
2
],
box
[...,
3
]],
axis
=-
1
)
box
.
set_shape
([
None
,
4
])
return
box
def
translate_image
(
image
,
translate_x
,
translate_y
):
"""Randomly translate the image.
Args:
image: a `Tensor` representing the image.
translate_x: a `Tensor` represting the translation on the x-axis.
translate_y: a `Tensor` represting the translation on the y-axis.
Returns:
box: a `Tensor` representing the augmented boxes.
"""
with
tf
.
name_scope
(
'translate_image'
):
if
(
translate_x
!=
0
and
translate_y
!=
0
):
image_jitter
=
tf
.
convert_to_tensor
([
translate_x
,
translate_y
])
image_jitter
.
set_shape
([
2
])
image
=
tfa
.
image
.
translate
(
image
,
image_jitter
*
tf
.
cast
(
tf
.
shape
(
image
)[
1
],
tf
.
float32
))
return
image
def
pad_max_instances
(
value
,
instances
,
pad_value
=
0
,
pad_axis
=
0
):
"""Pads tensors to max number of instances."""
shape
=
tf
.
shape
(
value
)
dim1
=
shape
[
pad_axis
]
take
=
tf
.
math
.
reduce_min
([
instances
,
dim1
])
value
,
_
=
tf
.
split
(
value
,
[
take
,
-
1
],
axis
=
pad_axis
)
# value[:instances, ...]
pad
=
tf
.
convert_to_tensor
([
tf
.
math
.
reduce_max
([
instances
-
dim1
,
0
])])
nshape
=
tf
.
concat
([
shape
[:
pad_axis
],
pad
,
shape
[(
pad_axis
+
1
):]],
axis
=
0
)
pad_tensor
=
tf
.
fill
(
nshape
,
tf
.
cast
(
pad_value
,
dtype
=
value
.
dtype
))
value
=
tf
.
concat
([
value
,
pad_tensor
],
axis
=
pad_axis
)
return
value
def
fit_preserve_aspect_ratio
(
image
,
boxes
,
width
=
None
,
height
=
None
,
target_dim
=
None
):
"""Resizes the image while peserving the image aspect ratio.
Args:
image: a `Tensor` representing the image.
boxes: a `Tensor` representing the boxes.
width: int for the image width.
height: int for the image height.
target_dim: list or a Tensor of height and width.
Returns:
image: a `Tensor` representing the image.
box: a `Tensor` representing the boxes.
"""
if
width
is
None
or
height
is
None
:
shape
=
tf
.
shape
(
image
)
if
tf
.
shape
(
shape
)[
0
]
==
4
:
width
=
shape
[
1
]
height
=
shape
[
2
]
else
:
width
=
shape
[
0
]
height
=
shape
[
1
]
clipper
=
tf
.
math
.
maximum
(
width
,
height
)
if
target_dim
is
None
:
target_dim
=
clipper
pad_width
=
clipper
-
width
pad_height
=
clipper
-
height
image
=
tf
.
image
.
pad_to_bounding_box
(
image
,
pad_width
//
2
,
pad_height
//
2
,
clipper
,
clipper
)
boxes
=
box_ops
.
yxyx_to_xcycwh
(
boxes
)
x
,
y
,
w
,
h
=
tf
.
split
(
boxes
,
4
,
axis
=-
1
)
y
*=
tf
.
cast
(
width
/
clipper
,
tf
.
float32
)
x
*=
tf
.
cast
(
height
/
clipper
,
tf
.
float32
)
y
+=
tf
.
cast
((
pad_width
/
clipper
)
/
2
,
tf
.
float32
)
x
+=
tf
.
cast
((
pad_height
/
clipper
)
/
2
,
tf
.
float32
)
h
*=
tf
.
cast
(
width
/
clipper
,
tf
.
float32
)
w
*=
tf
.
cast
(
height
/
clipper
,
tf
.
float32
)
boxes
=
tf
.
concat
([
x
,
y
,
w
,
h
],
axis
=-
1
)
boxes
=
box_ops
.
xcycwh_to_yxyx
(
boxes
)
image
=
tf
.
image
.
resize
(
image
,
(
target_dim
,
target_dim
))
return
image
,
boxes
def
get_best_anchor
(
y_true
,
anchors
,
width
=
1
,
height
=
1
):
"""Gets the correct anchor that is assoiciated with each box using IOU.
Args:
y_true: tf.Tensor[] for the list of bounding boxes in the yolo format
anchors: list or tensor for the anchor boxes to be used in prediction
found via Kmeans
width: int for the image width
height: int for the image height
Returns:
tf.Tensor: y_true with the anchor associated with each ground truth
box known.
"""
with
tf
.
name_scope
(
'get_anchor'
):
width
=
tf
.
cast
(
width
,
dtype
=
tf
.
float32
)
height
=
tf
.
cast
(
height
,
dtype
=
tf
.
float32
)
# split the boxes into center and width height
anchor_xy
=
y_true
[...,
0
:
2
]
# scale thhe boxes
anchors
=
tf
.
convert_to_tensor
(
anchors
,
dtype
=
tf
.
float32
)
anchors_x
=
anchors
[...,
0
]
/
width
anchors_y
=
anchors
[...,
1
]
/
height
anchors
=
tf
.
stack
([
anchors_x
,
anchors_y
],
axis
=-
1
)
k
=
tf
.
shape
(
anchors
)[
0
]
# build a matrix of anchor boxes of shape [num_anchors, num_boxes, 4]
anchors
=
tf
.
transpose
(
anchors
,
perm
=
[
1
,
0
])
anchor_xy
=
tf
.
tile
(
tf
.
expand_dims
(
anchor_xy
,
axis
=-
1
),
[
1
,
1
,
tf
.
shape
(
anchors
)[
-
1
]])
anchors
=
tf
.
tile
(
tf
.
expand_dims
(
anchors
,
axis
=
0
),
[
tf
.
shape
(
anchor_xy
)[
0
],
1
,
1
])
# stack the xy so, each anchor is asscoaited once with each center from
# the ground truth input
anchors
=
tf
.
concat
([
anchor_xy
,
anchors
],
axis
=
1
)
anchors
=
tf
.
transpose
(
anchors
,
perm
=
[
2
,
0
,
1
])
# copy the gt n times so that each anchor from above can be compared to
# input ground truth to shape: [num_anchors, num_boxes, 4]
truth_comp
=
tf
.
tile
(
tf
.
expand_dims
(
y_true
[...,
0
:
4
],
axis
=-
1
),
[
1
,
1
,
tf
.
shape
(
anchors
)[
0
]])
truth_comp
=
tf
.
transpose
(
truth_comp
,
perm
=
[
2
,
0
,
1
])
# compute intersection over union of the boxes, and take the argmax of
# comuted iou for each box. thus each box is associated with the
# largest interection over union
iou_raw
=
box_ops
.
compute_iou
(
truth_comp
,
anchors
)
values
,
indexes
=
tf
.
math
.
top_k
(
tf
.
transpose
(
iou_raw
,
perm
=
[
1
,
0
]),
k
=
tf
.
cast
(
k
,
dtype
=
tf
.
int32
),
sorted
=
True
)
ind_mask
=
tf
.
cast
(
values
>
0.213
,
dtype
=
indexes
.
dtype
)
# pad the indexs such that all values less than the thresh are -1
# add one, multiply the mask to zeros all the bad locations
# subtract 1 makeing all the bad locations 0.
iou_index
=
tf
.
concat
([
tf
.
keras
.
backend
.
expand_dims
(
indexes
[...,
0
],
axis
=-
1
),
((
indexes
[...,
1
:]
+
1
)
*
ind_mask
[...,
1
:])
-
1
],
axis
=-
1
)
iou_index
=
iou_index
[...,
:
6
]
return
tf
.
cast
(
iou_index
,
dtype
=
tf
.
float32
)
def
build_grided_gt
(
y_true
,
mask
,
size
,
dtype
,
use_tie_breaker
):
"""Converts ground truth for use in loss functions.
Args:
y_true: tf.Tensor[] ground truth
[box coords[0:4], classes_onehot[0:-1], best_fit_anchor_box]
mask: list of the anchor boxes choresponding to the output,
ex. [1, 2, 3] tells this layer to predict only the first 3
anchors in the total.
size: The dimensions of this output, for regular, it progresses
from 13, to 26, to 52.
dtype: The expected output dtype.
use_tie_breaker: boolean value for wether or not to use the tie_breaker.
Returns:
tf.Tensor[] of shape [size, size, #of_anchors, 4, 1, num_classes]
"""
# unpack required components from the input ground truth
boxes
=
tf
.
cast
(
y_true
[
'bbox'
],
dtype
)
classes
=
tf
.
expand_dims
(
tf
.
cast
(
y_true
[
'classes'
],
dtype
=
dtype
),
axis
=-
1
)
anchors
=
tf
.
cast
(
y_true
[
'best_anchors'
],
dtype
)
# get the number of boxes in the ground truth boxs
num_boxes
=
tf
.
shape
(
boxes
)[
0
]
# get the number of anchor boxes used for this anchor scale
len_masks
=
tf
.
shape
(
mask
)[
0
]
# init a fixed memeory size grid for this prediction scale
# [size, size, # of anchors, 1 + 1 + number of anchors per scale]
full
=
tf
.
zeros
([
size
,
size
,
len_masks
,
6
],
dtype
=
dtype
)
# init a grid to use to track which locations have already
# been used before (for the tie breaker)
depth_track
=
tf
.
zeros
((
size
,
size
,
len_masks
),
dtype
=
tf
.
int32
)
# rescale the x and y centers to the size of the grid [size, size]
x
=
tf
.
cast
(
boxes
[...,
0
]
*
tf
.
cast
(
size
,
dtype
=
dtype
),
dtype
=
tf
.
int32
)
y
=
tf
.
cast
(
boxes
[...,
1
]
*
tf
.
cast
(
size
,
dtype
=
dtype
),
dtype
=
tf
.
int32
)
# init all the tensorArrays to be used in storeing the index
# and the values to be used to update both depth_track and full
update_index
=
tf
.
TensorArray
(
tf
.
int32
,
size
=
0
,
dynamic_size
=
True
)
update
=
tf
.
TensorArray
(
dtype
,
size
=
0
,
dynamic_size
=
True
)
# init constants and match data types before entering loop
i
=
0
anchor_id
=
0
const
=
tf
.
cast
(
tf
.
convert_to_tensor
([
1.
]),
dtype
=
dtype
)
mask
=
tf
.
cast
(
mask
,
dtype
=
dtype
)
rand_update
=
0.0
for
box_id
in
range
(
num_boxes
):
# If the width or height of the box is zero, skip it.
# After pre processing, if the box is not in the i image bounds anymore,
# skip it.
if
tf
.
keras
.
backend
.
all
(
tf
.
math
.
equal
(
boxes
[
box_id
,
2
:
4
],
0
))
or
tf
.
keras
.
backend
.
any
(
tf
.
math
.
less
(
boxes
[
box_id
,
0
:
2
],
0.0
))
or
tf
.
keras
.
backend
.
any
(
tf
.
math
.
greater_equal
(
boxes
[
box_id
,
0
:
2
],
1.0
)):
continue
if
use_tie_breaker
:
for
anchor_id
in
range
(
tf
.
shape
(
anchors
)[
-
1
]):
index
=
tf
.
math
.
equal
(
anchors
[
box_id
,
anchor_id
],
mask
)
if
tf
.
keras
.
backend
.
any
(
index
):
# using the boolean index mask to determine exactly which
# anchor box was used
p
=
tf
.
cast
(
tf
.
keras
.
backend
.
argmax
(
tf
.
cast
(
index
,
dtype
=
tf
.
int32
)),
dtype
=
tf
.
int32
)
# determine if the index was used or not
used
=
depth_track
[
y
[
box_id
],
x
[
box_id
],
p
]
# defualt used upadte value
uid
=
1
# if anchor_id is 0, this is the best matched anchor for this box
# with the highest IOU
if
anchor_id
==
0
:
# write the box to the update list
# create random numbr to trigger a replacment if the cell
# is used already
if
tf
.
math
.
equal
(
used
,
1
):
rand_update
=
tf
.
random
.
uniform
([],
maxval
=
1
)
else
:
rand_update
=
1.0
if
rand_update
>
0.5
:
# write the box to the update list
update_index
=
update_index
.
write
(
i
,
[
y
[
box_id
],
x
[
box_id
],
p
])
value
=
tf
.
concat
([
boxes
[
box_id
],
const
,
classes
[
box_id
]],
axis
=-
1
)
update
=
update
.
write
(
i
,
value
)
# if used is 2, this cell is filled with a non-optimal box
# if used is 0, the cell in the ground truth is not yet consumed
# in either case you can replace that cell with a new box, as long
# as it is not consumed by an optimal box with anchor_id = 0
elif
tf
.
math
.
equal
(
used
,
2
)
or
tf
.
math
.
equal
(
used
,
0
):
uid
=
2
# write the box to the update list
update_index
=
update_index
.
write
(
i
,
[
y
[
box_id
],
x
[
box_id
],
p
])
value
=
tf
.
concat
([
boxes
[
box_id
],
const
,
classes
[
box_id
]],
axis
=-
1
)
update
=
update
.
write
(
i
,
value
)
depth_track
=
tf
.
tensor_scatter_nd_update
(
depth_track
,
[(
y
[
box_id
],
x
[
box_id
],
p
)],
[
uid
])
i
+=
1
else
:
index
=
tf
.
math
.
equal
(
anchors
[
box_id
,
0
],
mask
)
# if any there is an index match
if
tf
.
keras
.
backend
.
any
(
index
):
# find the index
p
=
tf
.
cast
(
tf
.
keras
.
backend
.
argmax
(
tf
.
cast
(
index
,
dtype
=
tf
.
int32
)),
dtype
=
tf
.
int32
)
# update the list of used boxes
update_index
=
update_index
.
write
(
i
,
[
y
[
box_id
],
x
[
box_id
],
p
])
value
=
tf
.
concat
([
boxes
[
box_id
],
const
,
classes
[
box_id
]],
axis
=-
1
)
update
=
update
.
write
(
i
,
value
)
i
+=
1
# if the size of the update list is not 0, do an update, other wise,
# no boxes and pass an empty grid
if
tf
.
math
.
greater
(
update_index
.
size
(),
0
):
update_index
=
update_index
.
stack
()
update
=
update
.
stack
()
full
=
tf
.
tensor_scatter_nd_update
(
full
,
update_index
,
update
)
return
full
def
build_batch_grided_gt
(
y_true
,
mask
,
size
,
dtype
,
use_tie_breaker
):
"""Converts ground truth for use in loss functions.
Args:
y_true: tf.Tensor[] ground truth
[batch, box coords[0:4], classes_onehot[0:-1], best_fit_anchor_box]
mask: list of the anchor boxes choresponding to the output,
ex. [1, 2, 3] tells this layer to predict only the first 3 anchors
in the total.
size: the dimensions of this output, for regular, it progresses from
13, to 26, to 52
dtype: expected output datatype
use_tie_breaker: boolean value for wether or not to use the tie
breaker
Returns:
tf.Tensor[] of shape [batch, size, size, #of_anchors, 4, 1, num_classes]
"""
# unpack required components from the input ground truth
boxes
=
tf
.
cast
(
y_true
[
'bbox'
],
dtype
)
classes
=
tf
.
expand_dims
(
tf
.
cast
(
y_true
[
'classes'
],
dtype
=
dtype
),
axis
=-
1
)
anchors
=
tf
.
cast
(
y_true
[
'best_anchors'
],
dtype
)
# get the batch size
batches
=
tf
.
shape
(
boxes
)[
0
]
# get the number of boxes in the ground truth boxs
num_boxes
=
tf
.
shape
(
boxes
)[
1
]
# get the number of anchor boxes used for this anchor scale
len_masks
=
tf
.
shape
(
mask
)[
0
]
# init a fixed memeory size grid for this prediction scale
# [batch, size, size, # of anchors, 1 + 1 + number of anchors per scale]
full
=
tf
.
zeros
([
batches
,
size
,
size
,
len_masks
,
1
+
4
+
1
],
dtype
=
dtype
)
# init a grid to use to track which locations have already
# been used before (for the tie breaker)
depth_track
=
tf
.
zeros
((
batches
,
size
,
size
,
len_masks
),
dtype
=
tf
.
int32
)
# rescale the x and y centers to the size of the grid [size, size]
x
=
tf
.
cast
(
boxes
[...,
0
]
*
tf
.
cast
(
size
,
dtype
=
dtype
),
dtype
=
tf
.
int32
)
y
=
tf
.
cast
(
boxes
[...,
1
]
*
tf
.
cast
(
size
,
dtype
=
dtype
),
dtype
=
tf
.
int32
)
# init all the tensorArrays to be used in storeing the index and the values
# to be used to update both depth_track and full
update_index
=
tf
.
TensorArray
(
tf
.
int32
,
size
=
0
,
dynamic_size
=
True
)
update
=
tf
.
TensorArray
(
dtype
,
size
=
0
,
dynamic_size
=
True
)
# init constants and match data types before entering loop
i
=
0
anchor_id
=
0
const
=
tf
.
cast
(
tf
.
convert_to_tensor
([
1.
]),
dtype
=
dtype
)
mask
=
tf
.
cast
(
mask
,
dtype
=
dtype
)
rand_update
=
0.0
for
batch
in
range
(
batches
):
for
box_id
in
range
(
num_boxes
):
# if the width or height of the box is zero, skip it
if
tf
.
keras
.
backend
.
all
(
tf
.
math
.
equal
(
boxes
[
batch
,
box_id
,
2
:
4
],
0
)):
continue
# after pre processing, if the box is not in the image bounds anymore
# skip the box
if
tf
.
keras
.
backend
.
any
(
tf
.
math
.
less
(
boxes
[
batch
,
box_id
,
0
:
2
],
0.0
))
or
tf
.
keras
.
backend
.
any
(
tf
.
math
.
greater_equal
(
boxes
[
batch
,
box_id
,
0
:
2
],
1.0
)):
continue
if
use_tie_breaker
:
for
anchor_id
in
range
(
tf
.
shape
(
anchors
)[
-
1
]):
index
=
tf
.
math
.
equal
(
anchors
[
batch
,
box_id
,
anchor_id
],
mask
)
if
tf
.
keras
.
backend
.
any
(
index
):
# using the boolean index mask to determine exactly which anchor
# box was used
p
=
tf
.
cast
(
tf
.
keras
.
backend
.
argmax
(
tf
.
cast
(
index
,
dtype
=
tf
.
int32
)),
dtype
=
tf
.
int32
)
# determine if the index was used or not
used
=
depth_track
[
batch
,
y
[
batch
,
box_id
],
x
[
batch
,
box_id
],
p
]
# defualt used upadte value
uid
=
1
# if anchor_id is 0, this is the best matched anchor for this box
# with the highest IOU
if
anchor_id
==
0
:
# create random number to trigger a replacment if the cell
# is used already
if
tf
.
math
.
equal
(
used
,
1
):
rand_update
=
tf
.
random
.
uniform
([],
maxval
=
1
)
else
:
rand_update
=
1.0
if
rand_update
>
0.5
:
# write the box to the update list
update_index
=
update_index
.
write
(
i
,
[
batch
,
y
[
batch
,
box_id
],
x
[
batch
,
box_id
],
p
])
value
=
tf
.
concat
(
[
boxes
[
batch
,
box_id
],
const
,
classes
[
batch
,
box_id
]],
axis
=-
1
)
update
=
update
.
write
(
i
,
value
)
# if used is 2, this cell is filled with a non-optimal box
# if used is 0, the cell in the ground truth is not yet consumed
# in either case you can replace that cell with a new box, as long
# as it is not consumed by an optimal box with anchor_id = 0
elif
tf
.
math
.
equal
(
used
,
2
)
or
tf
.
math
.
equal
(
used
,
0
):
uid
=
2
# write the box to the update list
update_index
=
update_index
.
write
(
i
,
[
batch
,
y
[
batch
,
box_id
],
x
[
batch
,
box_id
],
p
])
value
=
([
boxes
[
batch
,
box_id
],
const
,
classes
[
batch
,
box_id
]])
update
=
update
.
write
(
i
,
value
)
# update the used index for where and how the box was placed
depth_track
=
tf
.
tensor_scatter_nd_update
(
depth_track
,
[(
batch
,
y
[
batch
,
box_id
],
x
[
batch
,
box_id
],
p
)],
[
uid
])
i
+=
1
else
:
index
=
tf
.
math
.
equal
(
anchors
[
batch
,
box_id
,
0
],
mask
)
if
tf
.
keras
.
backend
.
any
(
index
):
# if any there is an index match
p
=
tf
.
cast
(
tf
.
keras
.
backend
.
argmax
(
tf
.
cast
(
index
,
dtype
=
tf
.
int32
)),
dtype
=
tf
.
int32
)
# write the box to the update list
update_index
=
update_index
.
write
(
i
,
[
batch
,
y
[
batch
,
box_id
],
x
[
batch
,
box_id
],
p
])
value
=
tf
.
concat
(
[
boxes
[
batch
,
box_id
],
const
,
classes
[
batch
,
box_id
]],
axis
=-
1
)
update
=
update
.
write
(
i
,
value
)
i
+=
1
# if the size of the update list is not 0, do an update, other wise,
# no boxes and pass an empty grid
if
tf
.
math
.
greater
(
update_index
.
size
(),
0
):
update_index
=
update_index
.
stack
()
update
=
update
.
stack
()
full
=
tf
.
tensor_scatter_nd_update
(
full
,
update_index
,
update
)
return
full
official/vision/beta/projects/yolo/ops/preprocess_ops_test.py
0 → 100644
View file @
2e9bb539
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
from
absl.testing
import
parameterized
import
numpy
as
np
import
tensorflow
as
tf
from
official.vision.beta.projects.yolo.ops
import
preprocess_ops
class
PreprocessOpsTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
((
416
,
416
,
5
,
300
,
300
),
(
100
,
200
,
6
,
50
,
50
))
def
test_resize_crop_filter
(
self
,
default_width
,
default_height
,
num_boxes
,
target_width
,
target_height
):
image
=
tf
.
convert_to_tensor
(
np
.
random
.
rand
(
default_width
,
default_height
,
3
))
boxes
=
tf
.
convert_to_tensor
(
np
.
random
.
rand
(
num_boxes
,
4
))
resized_image
,
resized_boxes
=
preprocess_ops
.
resize_crop_filter
(
image
,
boxes
,
default_width
,
default_height
,
target_width
,
target_height
)
resized_image_shape
=
tf
.
shape
(
resized_image
)
resized_boxes_shape
=
tf
.
shape
(
resized_boxes
)
self
.
assertAllEqual
([
default_height
,
default_width
,
3
],
resized_image_shape
.
numpy
())
self
.
assertAllEqual
([
num_boxes
,
4
],
resized_boxes_shape
.
numpy
())
@
parameterized
.
parameters
((
7
,
7.
,
5.
),
(
25
,
35.
,
45.
))
def
test_translate_boxes
(
self
,
num_boxes
,
translate_x
,
translate_y
):
boxes
=
tf
.
convert_to_tensor
(
np
.
random
.
rand
(
num_boxes
,
4
))
translated_boxes
=
preprocess_ops
.
translate_boxes
(
boxes
,
translate_x
,
translate_y
)
translated_boxes_shape
=
tf
.
shape
(
translated_boxes
)
self
.
assertAllEqual
([
num_boxes
,
4
],
translated_boxes_shape
.
numpy
())
@
parameterized
.
parameters
((
100
,
200
,
75.
,
25.
),
(
400
,
600
,
25.
,
75.
))
def
test_translate_image
(
self
,
image_height
,
image_width
,
translate_x
,
translate_y
):
image
=
tf
.
convert_to_tensor
(
np
.
random
.
rand
(
image_height
,
image_width
,
4
))
translated_image
=
preprocess_ops
.
translate_image
(
image
,
translate_x
,
translate_y
)
translated_image_shape
=
tf
.
shape
(
translated_image
)
self
.
assertAllEqual
([
image_height
,
image_width
,
4
],
translated_image_shape
.
numpy
())
@
parameterized
.
parameters
(([
1
,
2
],
20
,
0
),
([
13
,
2
,
4
],
15
,
0
))
def
test_pad_max_instances
(
self
,
input_shape
,
instances
,
pad_axis
):
expected_output_shape
=
input_shape
expected_output_shape
[
pad_axis
]
=
instances
output
=
preprocess_ops
.
pad_max_instances
(
np
.
ones
(
input_shape
),
instances
,
pad_axis
=
pad_axis
)
self
.
assertAllEqual
(
expected_output_shape
,
tf
.
shape
(
output
).
numpy
())
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/beta/serving/detection.py
View file @
2e9bb539
...
...
@@ -70,8 +70,6 @@ class DetectionModule(export_base.ExportModule):
aug_scale_min
=
1.0
,
aug_scale_max
=
1.0
)
image_shape
=
image_info
[
1
,
:]
# Shape of original image.
input_anchor
=
anchor
.
build_anchor_generator
(
min_level
=
model_params
.
min_level
,
max_level
=
model_params
.
max_level
,
...
...
@@ -81,7 +79,7 @@ class DetectionModule(export_base.ExportModule):
anchor_boxes
=
input_anchor
(
image_size
=
(
self
.
_input_image_size
[
0
],
self
.
_input_image_size
[
1
]))
return
image
,
anchor_boxes
,
image_
shape
return
image
,
anchor_boxes
,
image_
info
def
_run_inference_on_image_tensors
(
self
,
images
:
tf
.
Tensor
):
"""Cast image to float and run inference.
...
...
@@ -111,20 +109,22 @@ class DetectionModule(export_base.ExportModule):
dtype
=
tf
.
float32
)
anchor_shapes
.
append
((
str
(
level
),
anchor_level_spec
))
image_
shape
_spec
=
tf
.
TensorSpec
(
shape
=
[
2
,
],
dtype
=
tf
.
float32
)
image_
info
_spec
=
tf
.
TensorSpec
(
shape
=
[
4
,
2
],
dtype
=
tf
.
float32
)
images
,
anchor_boxes
,
image_
shape
=
tf
.
nest
.
map_structure
(
images
,
anchor_boxes
,
image_
info
=
tf
.
nest
.
map_structure
(
tf
.
identity
,
tf
.
map_fn
(
self
.
_build_inputs
,
elems
=
images
,
fn_output_signature
=
(
images_spec
,
dict
(
anchor_shapes
),
image_
shape
_spec
),
image_
info
_spec
),
parallel_iterations
=
32
))
input_image_shape
=
image_info
[:,
1
,
:]
detections
=
self
.
_model
.
call
(
images
=
images
,
image_shape
=
image_shape
,
image_shape
=
input_
image_shape
,
anchor_boxes
=
anchor_boxes
,
training
=
False
)
...
...
@@ -132,7 +132,8 @@ class DetectionModule(export_base.ExportModule):
'detection_boxes'
:
detections
[
'detection_boxes'
],
'detection_scores'
:
detections
[
'detection_scores'
],
'detection_classes'
:
detections
[
'detection_classes'
],
'num_detections'
:
detections
[
'num_detections'
]
'num_detections'
:
detections
[
'num_detections'
],
'image_info'
:
image_info
}
if
'detection_masks'
in
detections
.
keys
():
final_outputs
[
'detection_masks'
]
=
detections
[
'detection_masks'
]
...
...
official/vision/beta/serving/detection_test.py
View file @
2e9bb539
...
...
@@ -125,10 +125,11 @@ class DetectionExportTest(tf.test.TestCase, parameterized.TestCase):
images
=
self
.
_get_dummy_input
(
input_type
,
batch_size
,
image_size
)
processed_images
,
anchor_boxes
,
image_
shape
=
module
.
_build_inputs
(
processed_images
,
anchor_boxes
,
image_
info
=
module
.
_build_inputs
(
tf
.
zeros
((
224
,
224
,
3
),
dtype
=
tf
.
uint8
))
processed_images
=
tf
.
expand_dims
(
processed_images
,
0
)
image_shape
=
image_info
[
1
,
:]
image_shape
=
tf
.
expand_dims
(
image_shape
,
0
)
processed_images
=
tf
.
expand_dims
(
processed_images
,
0
)
for
l
,
l_boxes
in
anchor_boxes
.
items
():
anchor_boxes
[
l
]
=
tf
.
expand_dims
(
l_boxes
,
0
)
...
...
official/vision/beta/serving/export_base.py
View file @
2e9bb539
...
...
@@ -16,33 +16,30 @@
"""Base class for model export."""
import
abc
import
tensorflow
as
tf
def
_decode_image
(
encoded_image_bytes
):
image_tensor
=
tf
.
image
.
decode_image
(
encoded_image_bytes
,
channels
=
3
)
image_tensor
.
set_shape
((
None
,
None
,
3
))
return
image_tensor
from
typing
import
Optional
,
Sequence
,
Mapping
import
tensorflow
as
tf
def
_decode_tf_example
(
tf_example_string_tensor
):
keys_to_features
=
{
'image/encoded'
:
tf
.
io
.
FixedLenFeature
((),
tf
.
string
)}
parsed_tensors
=
tf
.
io
.
parse_single_example
(
serialized
=
tf_example_string_tensor
,
features
=
keys_to_features
)
image_tensor
=
_decode_image
(
parsed_tensors
[
'image/encoded'
])
return
image_tensor
from
official.modeling.hyperparams
import
config_definitions
as
cfg
class
ExportModule
(
tf
.
Module
,
metaclass
=
abc
.
ABCMeta
):
"""Base Export Module."""
def
__init__
(
self
,
params
,
batch_size
,
input_image_size
,
model
=
None
):
def
__init__
(
self
,
params
:
cfg
.
ExperimentConfig
,
batch_size
:
int
,
input_image_size
:
Sequence
[
int
],
num_channels
:
int
=
3
,
model
:
Optional
[
tf
.
keras
.
Model
]
=
None
):
"""Initializes a module for export.
Args:
params: Experiment params.
batch_size: Int or None.
input_image_size: List or Tuple of height, width of the input image.
batch_size: The batch size of the model input. Can be `int` or None.
input_image_size: List or Tuple of size of the input image. For 2D image,
it is [height, width].
num_channels: The number of the image channels.
model: A tf.keras.Model instance to be exported.
"""
...
...
@@ -50,48 +47,98 @@ class ExportModule(tf.Module, metaclass=abc.ABCMeta):
self
.
_params
=
params
self
.
_batch_size
=
batch_size
self
.
_input_image_size
=
input_image_size
self
.
_num_channels
=
num_channels
self
.
_model
=
model
def
_decode_image
(
self
,
encoded_image_bytes
:
str
)
->
tf
.
Tensor
:
"""Decodes an image bytes to an image tensor.
Use `tf.image.decode_image` to decode an image if input is expected to be 2D
image; otherwise use `tf.io.decode_raw` to convert the raw bytes to tensor
and reshape it to desire shape.
Args:
encoded_image_bytes: An encoded image string to be decoded.
Returns:
A decoded image tensor.
"""
if
len
(
self
.
_input_image_size
)
==
2
:
# Decode an image if 2D input is expected.
image_tensor
=
tf
.
image
.
decode_image
(
encoded_image_bytes
,
channels
=
self
.
_num_channels
)
image_tensor
.
set_shape
((
None
,
None
,
self
.
_num_channels
))
else
:
# Convert raw bytes into a tensor and reshape it, if not 2D input.
image_tensor
=
tf
.
io
.
decode_raw
(
encoded_image_bytes
,
out_type
=
tf
.
uint8
)
image_tensor
=
tf
.
reshape
(
image_tensor
,
self
.
_input_image_size
+
[
self
.
_num_channels
])
return
image_tensor
def
_decode_tf_example
(
self
,
tf_example_string_tensor
:
tf
.
train
.
Example
)
->
tf
.
Tensor
:
"""Decodes a TF Example to an image tensor.
Args:
tf_example_string_tensor: A tf.train.Example of encoded image and other
information.
Returns:
A decoded image tensor.
"""
keys_to_features
=
{
'image/encoded'
:
tf
.
io
.
FixedLenFeature
((),
tf
.
string
)}
parsed_tensors
=
tf
.
io
.
parse_single_example
(
serialized
=
tf_example_string_tensor
,
features
=
keys_to_features
)
image_tensor
=
self
.
_decode_image
(
parsed_tensors
[
'image/encoded'
])
return
image_tensor
@
abc
.
abstractmethod
def
build_model
(
self
):
def
build_model
(
self
,
**
kwargs
):
"""Builds model and sets self._model."""
@
abc
.
abstractmethod
def
_run_inference_on_image_tensors
(
self
,
images
):
def
_run_inference_on_image_tensors
(
self
,
images
:
tf
.
Tensor
)
->
Mapping
[
str
,
tf
.
Tensor
]:
"""Runs inference on images."""
@
tf
.
function
def
inference_from_image_tensors
(
self
,
input_tensor
):
def
inference_from_image_tensors
(
self
,
input_tensor
:
tf
.
Tensor
)
->
Mapping
[
str
,
tf
.
Tensor
]:
return
self
.
_run_inference_on_image_tensors
(
input_tensor
)
@
tf
.
function
def
inference_from_image_bytes
(
self
,
input_tensor
):
def
inference_from_image_bytes
(
self
,
input_tensor
:
str
):
with
tf
.
device
(
'cpu:0'
):
images
=
tf
.
nest
.
map_structure
(
tf
.
identity
,
tf
.
map_fn
(
_decode_image
,
self
.
_decode_image
,
elems
=
input_tensor
,
fn_output_signature
=
tf
.
TensorSpec
(
shape
=
[
None
,
None
,
3
],
dtype
=
tf
.
uint8
),
shape
=
[
None
]
*
len
(
self
.
_input_image_size
)
+
[
self
.
_num_channels
],
dtype
=
tf
.
uint8
),
parallel_iterations
=
32
))
images
=
tf
.
stack
(
images
)
return
self
.
_run_inference_on_image_tensors
(
images
)
@
tf
.
function
def
inference_from_tf_example
(
self
,
input_tensor
):
def
inference_from_tf_example
(
self
,
input_tensor
:
tf
.
train
.
Example
)
->
Mapping
[
str
,
tf
.
Tensor
]:
with
tf
.
device
(
'cpu:0'
):
images
=
tf
.
nest
.
map_structure
(
tf
.
identity
,
tf
.
map_fn
(
_decode_tf_example
,
self
.
_decode_tf_example
,
elems
=
input_tensor
,
# Height/width of the shape of input images is unspecified (None)
# at the time of decoding the example, but the shape will
# be adjusted to conform to the input layer of the model,
# by _run_inference_on_image_tensors() below.
fn_output_signature
=
tf
.
TensorSpec
(
shape
=
[
None
,
None
,
3
],
dtype
=
tf
.
uint8
),
shape
=
[
None
]
*
len
(
self
.
_input_image_size
)
+
[
self
.
_num_channels
],
dtype
=
tf
.
uint8
),
dtype
=
tf
.
uint8
,
parallel_iterations
=
32
))
images
=
tf
.
stack
(
images
)
...
...
official/vision/beta/serving/export_saved_model_lib.py
View file @
2e9bb539
...
...
@@ -17,19 +17,30 @@ r"""Vision models export utility function for serving/inference."""
import
os
from
typing
import
Optional
,
List
import
tensorflow
as
tf
from
official.core
import
config_definitions
as
cfg
from
official.core
import
train_utils
from
official.vision.beta
import
configs
from
official.vision.beta.serving
import
detection
from
official.vision.beta.serving
import
export_base
from
official.vision.beta.serving
import
image_classification
from
official.vision.beta.serving
import
semantic_segmentation
def
export_inference_graph
(
input_type
,
batch_size
,
input_image_size
,
params
,
checkpoint_path
,
export_dir
,
export_checkpoint_subdir
=
None
,
export_saved_model_subdir
=
None
):
def
export_inference_graph
(
input_type
:
str
,
batch_size
:
Optional
[
int
],
input_image_size
:
List
[
int
],
params
:
cfg
.
ExperimentConfig
,
checkpoint_path
:
str
,
export_dir
:
str
,
num_channels
:
Optional
[
int
]
=
3
,
export_module
:
Optional
[
export_base
.
ExportModule
]
=
None
,
export_checkpoint_subdir
:
Optional
[
str
]
=
None
,
export_saved_model_subdir
:
Optional
[
str
]
=
None
):
"""Exports inference graph for the model specified in the exp config.
Saved model is stored at export_dir/saved_model, checkpoint is saved
...
...
@@ -42,6 +53,10 @@ def export_inference_graph(input_type, batch_size, input_image_size, params,
params: Experiment params.
checkpoint_path: Trained checkpoint path or directory.
export_dir: Export directory path.
num_channels: The number of input image channels.
export_module: Optional export module to be used instead of using params
to create one. If None, the params will be used to create an export
module.
export_checkpoint_subdir: Optional subdirectory under export_dir
to store checkpoint.
export_saved_model_subdir: Optional subdirectory under export_dir
...
...
@@ -60,21 +75,31 @@ def export_inference_graph(input_type, batch_size, input_image_size, params,
else
:
output_saved_model_directory
=
export_dir
if
isinstance
(
params
.
task
,
configs
.
image_classification
.
ImageClassificationTask
):
export_module
=
image_classification
.
ClassificationModule
(
params
=
params
,
batch_size
=
batch_size
,
input_image_size
=
input_image_size
)
elif
isinstance
(
params
.
task
,
configs
.
retinanet
.
RetinaNetTask
)
or
isinstance
(
params
.
task
,
configs
.
maskrcnn
.
MaskRCNNTask
):
export_module
=
detection
.
DetectionModule
(
params
=
params
,
batch_size
=
batch_size
,
input_image_size
=
input_image_size
)
elif
isinstance
(
params
.
task
,
configs
.
semantic_segmentation
.
SemanticSegmentationTask
):
export_module
=
semantic_segmentation
.
SegmentationModule
(
params
=
params
,
batch_size
=
batch_size
,
input_image_size
=
input_image_size
)
else
:
raise
ValueError
(
'Export module not implemented for {} task.'
.
format
(
type
(
params
.
task
)))
if
not
export_module
:
if
isinstance
(
params
.
task
,
configs
.
image_classification
.
ImageClassificationTask
):
export_module
=
image_classification
.
ClassificationModule
(
params
=
params
,
batch_size
=
batch_size
,
input_image_size
=
input_image_size
,
num_channels
=
num_channels
)
elif
isinstance
(
params
.
task
,
configs
.
retinanet
.
RetinaNetTask
)
or
isinstance
(
params
.
task
,
configs
.
maskrcnn
.
MaskRCNNTask
):
export_module
=
detection
.
DetectionModule
(
params
=
params
,
batch_size
=
batch_size
,
input_image_size
=
input_image_size
,
num_channels
=
num_channels
)
elif
isinstance
(
params
.
task
,
configs
.
semantic_segmentation
.
SemanticSegmentationTask
):
export_module
=
semantic_segmentation
.
SegmentationModule
(
params
=
params
,
batch_size
=
batch_size
,
input_image_size
=
input_image_size
,
num_channels
=
num_channels
)
else
:
raise
ValueError
(
'Export module not implemented for {} task.'
.
format
(
type
(
params
.
task
)))
model
=
export_module
.
build_model
()
...
...
@@ -87,7 +112,7 @@ def export_inference_graph(input_type, batch_size, input_image_size, params,
if
input_type
==
'image_tensor'
:
input_signature
=
tf
.
TensorSpec
(
shape
=
[
batch_size
,
None
,
None
,
3
],
shape
=
[
batch_size
]
+
[
None
]
*
len
(
input_image_size
)
+
[
num_channels
],
dtype
=
tf
.
uint8
)
signatures
=
{
'serving_default'
:
...
...
official/vision/beta/serving/export_tfhub.py
0 → 100644
View file @
2e9bb539
# Lint as: python3
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A script to export the image classification as a TF-Hub SavedModel."""
# Import libraries
from
absl
import
app
from
absl
import
flags
import
tensorflow
as
tf
from
official.common
import
registry_imports
# pylint: disable=unused-import
from
official.core
import
exp_factory
from
official.modeling
import
hyperparams
from
official.vision.beta.serving
import
image_classification
FLAGS
=
flags
.
FLAGS
flags
.
DEFINE_string
(
'experiment'
,
None
,
'experiment type, e.g. resnet_imagenet'
)
flags
.
DEFINE_string
(
'checkpoint_path'
,
None
,
'Checkpoint path.'
)
flags
.
DEFINE_string
(
'export_path'
,
None
,
'The export directory.'
)
flags
.
DEFINE_multi_string
(
'config_file'
,
None
,
'A YAML/JSON files which specifies overrides. The override order '
'follows the order of args. Note that each file '
'can be used as an override template to override the default parameters '
'specified in Python. If the same parameter is specified in both '
'`--config_file` and `--params_override`, `config_file` will be used '
'first, followed by params_override.'
)
flags
.
DEFINE_string
(
'params_override'
,
''
,
'The JSON/YAML file or string which specifies the parameter to be overriden'
' on top of `config_file` template.'
)
flags
.
DEFINE_integer
(
'batch_size'
,
None
,
'The batch size.'
)
flags
.
DEFINE_string
(
'input_image_size'
,
'224,224'
,
'The comma-separated string of two integers representing the height,width '
'of the input to the model.'
)
flags
.
DEFINE_boolean
(
'skip_logits_layer'
,
False
,
'Whether to skip the prediction layer and only output the feature vector.'
)
def
export_model_to_tfhub
(
params
,
batch_size
,
input_image_size
,
skip_logits_layer
,
checkpoint_path
,
export_path
):
"""Export an image classification model to TF-Hub."""
export_module
=
image_classification
.
ClassificationModule
(
params
=
params
,
batch_size
=
batch_size
,
input_image_size
=
input_image_size
)
model
=
export_module
.
build_model
(
skip_logits_layer
=
skip_logits_layer
)
checkpoint
=
tf
.
train
.
Checkpoint
(
model
=
model
)
checkpoint
.
restore
(
checkpoint_path
).
assert_existing_objects_matched
()
model
.
save
(
export_path
,
include_optimizer
=
False
,
save_format
=
'tf'
)
def
main
(
_
):
params
=
exp_factory
.
get_exp_config
(
FLAGS
.
experiment
)
for
config_file
in
FLAGS
.
config_file
or
[]:
params
=
hyperparams
.
override_params_dict
(
params
,
config_file
,
is_strict
=
True
)
if
FLAGS
.
params_override
:
params
=
hyperparams
.
override_params_dict
(
params
,
FLAGS
.
params_override
,
is_strict
=
True
)
params
.
validate
()
params
.
lock
()
export_model_to_tfhub
(
params
=
params
,
batch_size
=
FLAGS
.
batch_size
,
input_image_size
=
[
int
(
x
)
for
x
in
FLAGS
.
input_image_size
.
split
(
','
)],
skip_logits_layer
=
FLAGS
.
skip_logits_layer
,
checkpoint_path
=
FLAGS
.
checkpoint_path
,
export_path
=
FLAGS
.
export_path
)
if
__name__
==
'__main__'
:
app
.
run
(
main
)
official/vision/beta/serving/image_classification.py
View file @
2e9bb539
...
...
@@ -29,14 +29,15 @@ STDDEV_RGB = (0.229 * 255, 0.224 * 255, 0.225 * 255)
class
ClassificationModule
(
export_base
.
ExportModule
):
"""classification Module."""
def
build_model
(
self
):
def
build_model
(
self
,
skip_logits_layer
=
False
):
input_specs
=
tf
.
keras
.
layers
.
InputSpec
(
shape
=
[
self
.
_batch_size
]
+
self
.
_input_image_size
+
[
3
])
self
.
_model
=
factory
.
build_classification_model
(
input_specs
=
input_specs
,
model_config
=
self
.
_params
.
task
.
model
,
l2_regularizer
=
None
)
l2_regularizer
=
None
,
skip_logits_layer
=
skip_logits_layer
)
return
self
.
_model
...
...
official/vision/beta/tasks/image_classification.py
View file @
2e9bb539
...
...
@@ -16,13 +16,14 @@
"""Image classification task definition."""
from
absl
import
logging
import
tensorflow
as
tf
from
official.common
import
dataset_fn
from
official.core
import
base_task
from
official.core
import
input_reader
from
official.core
import
task_factory
from
official.modeling
import
tf_utils
from
official.vision.beta.configs
import
image_classification
as
exp_cfg
from
official.vision.beta.dataloaders
import
classification_input
from
official.vision.beta.dataloaders
import
input_reader_factory
from
official.vision.beta.dataloaders
import
tfds_classification_decoders
from
official.vision.beta.modeling
import
factory
...
...
@@ -93,9 +94,10 @@ class ImageClassificationTask(base_task.Task):
output_size
=
input_size
[:
2
],
num_classes
=
num_classes
,
aug_policy
=
params
.
aug_policy
,
randaug_magnitude
=
params
.
randaug_magnitude
,
dtype
=
params
.
dtype
)
reader
=
input_reader
.
I
nput
R
eader
(
reader
=
input_reader
_factory
.
i
nput
_r
eader
_generator
(
params
,
dataset_fn
=
dataset_fn
.
pick_dataset_fn
(
params
.
file_type
),
decoder_fn
=
decoder
.
decode
,
...
...
official/vision/beta/tasks/maskrcnn.py
View file @
2e9bb539
...
...
@@ -19,9 +19,9 @@ from absl import logging
import
tensorflow
as
tf
from
official.common
import
dataset_fn
from
official.core
import
base_task
from
official.core
import
input_reader
from
official.core
import
task_factory
from
official.vision.beta.configs
import
maskrcnn
as
exp_cfg
from
official.vision.beta.dataloaders
import
input_reader_factory
from
official.vision.beta.dataloaders
import
maskrcnn_input
from
official.vision.beta.dataloaders
import
tf_example_decoder
from
official.vision.beta.dataloaders
import
tf_example_label_map_decoder
...
...
@@ -143,7 +143,7 @@ class MaskRCNNTask(base_task.Task):
include_mask
=
self
.
_task_config
.
model
.
include_mask
,
mask_crop_size
=
params
.
parser
.
mask_crop_size
)
reader
=
input_reader
.
I
nput
R
eader
(
reader
=
input_reader
_factory
.
i
nput
_r
eader
_generator
(
params
,
dataset_fn
=
dataset_fn
.
pick_dataset_fn
(
params
.
file_type
),
decoder_fn
=
decoder
.
decode
,
...
...
official/vision/beta/tasks/retinanet.py
View file @
2e9bb539
...
...
@@ -19,10 +19,10 @@ from absl import logging
import
tensorflow
as
tf
from
official.common
import
dataset_fn
from
official.core
import
base_task
from
official.core
import
input_reader
from
official.core
import
task_factory
from
official.vision
import
keras_cv
from
official.vision.beta.configs
import
retinanet
as
exp_cfg
from
official.vision.beta.dataloaders
import
input_reader_factory
from
official.vision.beta.dataloaders
import
retinanet_input
from
official.vision.beta.dataloaders
import
tf_example_decoder
from
official.vision.beta.dataloaders
import
tfds_detection_decoders
...
...
@@ -122,7 +122,7 @@ class RetinaNetTask(base_task.Task):
skip_crowd_during_training
=
params
.
parser
.
skip_crowd_during_training
,
max_num_instances
=
params
.
parser
.
max_num_instances
)
reader
=
input_reader
.
I
nput
R
eader
(
reader
=
input_reader
_factory
.
i
nput
_r
eader
_generator
(
params
,
dataset_fn
=
dataset_fn
.
pick_dataset_fn
(
params
.
file_type
),
decoder_fn
=
decoder
.
decode
,
...
...
official/vision/beta/tasks/semantic_segmentation.py
View file @
2e9bb539
...
...
@@ -19,9 +19,9 @@ from absl import logging
import
tensorflow
as
tf
from
official.common
import
dataset_fn
from
official.core
import
base_task
from
official.core
import
input_reader
from
official.core
import
task_factory
from
official.vision.beta.configs
import
semantic_segmentation
as
exp_cfg
from
official.vision.beta.dataloaders
import
input_reader_factory
from
official.vision.beta.dataloaders
import
segmentation_input
from
official.vision.beta.dataloaders
import
tfds_segmentation_decoders
from
official.vision.beta.evaluation
import
segmentation_metrics
...
...
@@ -104,7 +104,7 @@ class SemanticSegmentationTask(base_task.Task):
aug_rand_hflip
=
params
.
aug_rand_hflip
,
dtype
=
params
.
dtype
)
reader
=
input_reader
.
I
nput
R
eader
(
reader
=
input_reader
_factory
.
i
nput
_r
eader
_generator
(
params
,
dataset_fn
=
dataset_fn
.
pick_dataset_fn
(
params
.
file_type
),
decoder_fn
=
decoder
.
decode
,
...
...
@@ -143,15 +143,15 @@ class SemanticSegmentationTask(base_task.Task):
def
build_metrics
(
self
,
training
=
True
):
"""Gets streaming metrics for training/validation."""
metrics
=
[]
if
training
:
if
training
and
self
.
task_config
.
evaluation
.
report_train_mean_iou
:
metrics
.
append
(
segmentation_metrics
.
MeanIoU
(
name
=
'mean_iou'
,
num_classes
=
self
.
task_config
.
model
.
num_classes
,
rescale_predictions
=
False
,
dtype
=
tf
.
float32
))
else
:
self
.
m
iou_metric
=
segmentation_metrics
.
Mean
IoU
(
name
=
'
val_mean
_iou'
,
self
.
iou_metric
=
segmentation_metrics
.
PerClass
IoU
(
name
=
'
per_class
_iou'
,
num_classes
=
self
.
task_config
.
model
.
num_classes
,
rescale_predictions
=
not
self
.
task_config
.
validation_data
.
resize_eval_groundtruth
,
...
...
@@ -243,7 +243,7 @@ class SemanticSegmentationTask(base_task.Task):
loss
=
0
logs
=
{
self
.
loss
:
loss
}
logs
.
update
({
self
.
m
iou_metric
.
name
:
(
labels
,
outputs
)})
logs
.
update
({
self
.
iou_metric
.
name
:
(
labels
,
outputs
)})
if
metrics
:
self
.
process_metrics
(
metrics
,
labels
,
outputs
)
...
...
@@ -257,11 +257,19 @@ class SemanticSegmentationTask(base_task.Task):
def
aggregate_logs
(
self
,
state
=
None
,
step_outputs
=
None
):
if
state
is
None
:
self
.
m
iou_metric
.
reset_states
()
state
=
self
.
m
iou_metric
self
.
m
iou_metric
.
update_state
(
step_outputs
[
self
.
m
iou_metric
.
name
][
0
],
step_outputs
[
self
.
m
iou_metric
.
name
][
1
])
self
.
iou_metric
.
reset_states
()
state
=
self
.
iou_metric
self
.
iou_metric
.
update_state
(
step_outputs
[
self
.
iou_metric
.
name
][
0
],
step_outputs
[
self
.
iou_metric
.
name
][
1
])
return
state
def
reduce_aggregated_logs
(
self
,
aggregated_logs
):
return
{
self
.
miou_metric
.
name
:
self
.
miou_metric
.
result
().
numpy
()}
result
=
{}
ious
=
self
.
iou_metric
.
result
()
# TODO(arashwan): support loading class name from a label map file.
if
self
.
task_config
.
evaluation
.
report_per_class_iou
:
for
i
,
value
in
enumerate
(
ious
.
numpy
()):
result
.
update
({
'iou/{}'
.
format
(
i
):
value
})
# Computes mean IoU
result
.
update
({
'mean_iou'
:
tf
.
reduce_mean
(
ious
).
numpy
()})
return
result
official/vision/beta/tasks/video_classification.py
View file @
2e9bb539
...
...
@@ -17,10 +17,10 @@
from
absl
import
logging
import
tensorflow
as
tf
from
official.core
import
base_task
from
official.core
import
input_reader
from
official.core
import
task_factory
from
official.modeling
import
tf_utils
from
official.vision.beta.configs
import
video_classification
as
exp_cfg
from
official.vision.beta.dataloaders
import
input_reader_factory
from
official.vision.beta.dataloaders
import
video_input
from
official.vision.beta.modeling
import
factory_3d
...
...
@@ -74,7 +74,7 @@ class VideoClassificationTask(base_task.Task):
parser
=
video_input
.
Parser
(
input_params
=
params
)
postprocess_fn
=
video_input
.
PostBatchProcessor
(
params
)
reader
=
input_reader
.
I
nput
R
eader
(
reader
=
input_reader
_factory
.
i
nput
_r
eader
_generator
(
params
,
dataset_fn
=
self
.
_get_dataset_fn
(
params
),
decoder_fn
=
self
.
_get_decoder_fn
(
params
),
...
...
research/delf/delf/python/normalization_layers/__init__.py
0 → 100644
View file @
2e9bb539
# Copyright 2021 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
\ No newline at end of file
research/delf/delf/python/normalization_layers/normalization.py
0 → 100644
View file @
2e9bb539
# Copyright 2021 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Normalization layer definitions."""
import
tensorflow
as
tf
class
L2Normalization
(
tf
.
keras
.
layers
.
Layer
):
"""Normalization layer using L2 norm."""
def
__init__
(
self
):
"""Initialization of the L2Normalization layer."""
super
(
L2Normalization
,
self
).
__init__
()
# A lower bound value for the norm.
self
.
eps
=
1e-6
def
call
(
self
,
x
,
axis
=
1
):
"""Invokes the L2Normalization instance.
Args:
x: A Tensor.
axis: Dimension along which to normalize. A scalar or a vector of
integers.
Returns:
norm: A Tensor with the same shape as `x`.
"""
return
tf
.
nn
.
l2_normalize
(
x
,
axis
,
epsilon
=
self
.
eps
)
official/utils/docs/build_docs
_test.py
→
research/delf/delf/python/normalization_layers/normalization
_test.py
View file @
2e9bb539
# Lint as: python3
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
# Copyright 2021 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -13,40 +12,25 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for official.tools.build_docs."""
import
os
import
shutil
"""Tests for normalization layers."""
import
tensorflow
as
tf
from
official.utils.docs
import
build_docs
class
BuildDocsTest
(
tf
.
test
.
TestCase
):
from
delf.python.normalization_layers
import
normalization
def
setUp
(
self
):
super
(
BuildDocsTest
,
self
).
setUp
()
self
.
workdir
=
self
.
get_temp_dir
()
if
os
.
path
.
exists
(
self
.
workdir
):
shutil
.
rmtree
(
self
.
workdir
)
os
.
makedirs
(
self
.
workdir
)
def
test_api_gen
(
self
):
build_docs
.
gen_api_docs
(
code_url_prefix
=
"http://official/nlp/modeling/"
,
site_path
=
"tf_nlp_modeling/api_docs/python"
,
output_dir
=
self
.
workdir
,
gen_report
=
False
,
project_short_name
=
"tf_nlp_modeling"
,
project_full_name
=
"TensorFlow Modeling - NLP Library"
,
search_hints
=
True
)
class
NormalizationsTest
(
tf
.
test
.
TestCase
):
# Check that the "defined in" section is working
with
open
(
os
.
path
.
join
(
self
.
workdir
,
"tf_nlp_modeling.md"
))
as
f
:
content
=
f
.
read
()
self
.
assertIn
(
"__init__.py"
,
content
)
def
testL2Normalization
(
self
):
x
=
tf
.
constant
([
-
4.0
,
0.0
,
4.0
])
layer
=
normalization
.
L2Normalization
()
# Run tested function.
result
=
layer
(
x
,
axis
=
0
)
# Define expected result.
exp_output
=
[
-
0.70710677
,
0.0
,
0.70710677
]
# Compare actual and expected.
self
.
assertAllClose
(
exp_output
,
result
)
if
__name__
==
"
__main__
"
:
if
__name__
==
'
__main__
'
:
tf
.
test
.
main
()
research/delf/delf/python/pooling_layers/__init__.py
0 → 100644
View file @
2e9bb539
# Copyright 2021 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
\ No newline at end of file
research/delf/delf/python/pooling_layers/pooling.py
0 → 100644
View file @
2e9bb539
# Copyright 2021 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Pooling layers definitions."""
import
tensorflow
as
tf
class
MAC
(
tf
.
keras
.
layers
.
Layer
):
"""Global max pooling (MAC) layer.
Maximum Activations of Convolutions (MAC) is simply constructed by
max-pooling over all dimensions per feature map. See
https://arxiv.org/abs/1511.05879 for a reference.
"""
def
call
(
self
,
x
,
axis
=
None
):
"""Invokes the MAC pooling instance.
Args:
x: [B, H, W, D] A float32 Tensor.
axis: Dimensions to reduce. By default, dimensions [1, 2] are reduced.
Returns:
output: [B, D] A float32 Tensor.
"""
if
axis
is
None
:
axis
=
[
1
,
2
]
return
mac
(
x
,
axis
=
axis
)
class
SPoC
(
tf
.
keras
.
layers
.
Layer
):
"""Average pooling (SPoC) layer.
Sum-pooled convolutional features (SPoC) is based on the sum pooling of the
deep features. See https://arxiv.org/pdf/1510.07493.pdf for a reference.
"""
def
call
(
self
,
x
,
axis
=
None
):
"""Invokes the SPoC instance.
Args:
x: [B, H, W, D] A float32 Tensor.
axis: Dimensions to reduce. By default, dimensions [1, 2] are reduced.
Returns:
output: [B, D] A float32 Tensor.
"""
if
axis
is
None
:
axis
=
[
1
,
2
]
return
spoc
(
x
,
axis
)
class
GeM
(
tf
.
keras
.
layers
.
Layer
):
"""Generalized mean pooling (GeM) layer.
Generalized Mean Pooling (GeM) computes the generalized mean of each
channel in a tensor. See https://arxiv.org/abs/1711.02512 for a reference.
"""
def
__init__
(
self
,
power
=
3.
):
"""Initialization of the generalized mean pooling (GeM) layer.
Args:
power: Float power > 0 is an inverse exponent parameter, used during the
generalized mean pooling computation. Setting this exponent as power > 1
increases the contrast of the pooled feature map and focuses on the
salient features of the image. GeM is a generalization of the average
pooling commonly used in classification networks (power = 1) and of
spatial max-pooling layer (power = inf).
"""
super
(
GeM
,
self
).
__init__
()
self
.
power
=
power
self
.
eps
=
1e-6
def
call
(
self
,
x
,
axis
=
None
):
"""Invokes the GeM instance.
Args:
x: [B, H, W, D] A float32 Tensor.
axis: Dimensions to reduce. By default, dimensions [1, 2] are reduced.
Returns:
output: [B, D] A float32 Tensor.
"""
if
axis
is
None
:
axis
=
[
1
,
2
]
return
gem
(
x
,
power
=
self
.
power
,
eps
=
self
.
eps
,
axis
=
axis
)
def
mac
(
x
,
axis
=
None
):
"""Performs global max pooling (MAC).
Args:
x: [B, H, W, D] A float32 Tensor.
axis: Dimensions to reduce. By default, dimensions [1, 2] are reduced.
Returns:
output: [B, D] A float32 Tensor.
"""
if
axis
is
None
:
axis
=
[
1
,
2
]
return
tf
.
reduce_max
(
x
,
axis
=
axis
,
keepdims
=
False
)
def
spoc
(
x
,
axis
=
None
):
"""Performs average pooling (SPoC).
Args:
x: [B, H, W, D] A float32 Tensor.
axis: Dimensions to reduce. By default, dimensions [1, 2] are reduced.
Returns:
output: [B, D] A float32 Tensor.
"""
if
axis
is
None
:
axis
=
[
1
,
2
]
return
tf
.
reduce_mean
(
x
,
axis
=
axis
,
keepdims
=
False
)
def
gem
(
x
,
axis
=
None
,
power
=
3.
,
eps
=
1e-6
):
"""Performs generalized mean pooling (GeM).
Args:
x: [B, H, W, D] A float32 Tensor.
axis: Dimensions to reduce. By default, dimensions [1, 2] are reduced.
power: Float, power > 0 is an inverse exponent parameter (GeM power).
eps: Float, parameter for numerical stability.
Returns:
output: [B, D] A float32 Tensor.
"""
if
axis
is
None
:
axis
=
[
1
,
2
]
tmp
=
tf
.
pow
(
tf
.
maximum
(
x
,
eps
),
power
)
out
=
tf
.
pow
(
tf
.
reduce_mean
(
tmp
,
axis
=
axis
,
keepdims
=
False
),
1.
/
power
)
return
out
Prev
1
2
3
4
5
6
7
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment