Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
88253ce5
Commit
88253ce5
authored
Aug 12, 2020
by
Hongkun Yu
Committed by
A. Unique TensorFlower
Aug 12, 2020
Browse files
Internal change
PiperOrigin-RevId: 326286926
parent
52371ffe
Changes
205
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
433 additions
and
357 deletions
+433
-357
official/utils/misc/distribution_utils.py
official/utils/misc/distribution_utils.py
+17
-13
official/utils/misc/distribution_utils_test.py
official/utils/misc/distribution_utils_test.py
+2
-1
official/utils/misc/keras_utils.py
official/utils/misc/keras_utils.py
+7
-11
official/utils/misc/model_helpers.py
official/utils/misc/model_helpers.py
+6
-3
official/utils/misc/model_helpers_test.py
official/utils/misc/model_helpers_test.py
+19
-13
official/utils/registry_test.py
official/utils/registry_test.py
+10
-2
official/utils/testing/integration.py
official/utils/testing/integration.py
+6
-3
official/vision/detection/dataloader/anchor.py
official/vision/detection/dataloader/anchor.py
+40
-40
official/vision/detection/dataloader/input_reader.py
official/vision/detection/dataloader/input_reader.py
+2
-1
official/vision/detection/dataloader/retinanet_parser.py
official/vision/detection/dataloader/retinanet_parser.py
+57
-54
official/vision/detection/dataloader/shapemask_parser.py
official/vision/detection/dataloader/shapemask_parser.py
+0
-1
official/vision/detection/evaluation/coco_evaluator.py
official/vision/detection/evaluation/coco_evaluator.py
+15
-16
official/vision/detection/main.py
official/vision/detection/main.py
+1
-0
official/vision/detection/modeling/architecture/heads.py
official/vision/detection/modeling/architecture/heads.py
+128
-123
official/vision/detection/modeling/architecture/nn_ops.py
official/vision/detection/modeling/architecture/nn_ops.py
+2
-1
official/vision/detection/modeling/architecture/resnet.py
official/vision/detection/modeling/architecture/resnet.py
+93
-48
official/vision/detection/modeling/base_model.py
official/vision/detection/modeling/base_model.py
+4
-4
official/vision/detection/modeling/checkpoint_utils.py
official/vision/detection/modeling/checkpoint_utils.py
+16
-13
official/vision/detection/modeling/learning_rates.py
official/vision/detection/modeling/learning_rates.py
+4
-2
official/vision/detection/modeling/maskrcnn_model.py
official/vision/detection/modeling/maskrcnn_model.py
+4
-8
No files found.
official/utils/misc/distribution_utils.py
View file @
88253ce5
...
...
@@ -93,9 +93,9 @@ def get_distribution_strategy(distribution_strategy="mirrored",
Args:
distribution_strategy: a string specifying which distribution strategy to
use. Accepted values are "off", "one_device", "mirrored",
"parameter_server", "multi_worker_mirrored", and "tpu" -- case
insensitive.
"off" means not to use Distribution Strategy; "tpu" means to
use
TPUStrategy using `tpu_address`.
"parameter_server", "multi_worker_mirrored", and "tpu" -- case
insensitive.
"off" means not to use Distribution Strategy; "tpu" means to
use
TPUStrategy using `tpu_address`.
num_gpus: Number of GPUs to run this model.
all_reduce_alg: Optional. Specifies which algorithm to use when performing
all-reduce. For `MirroredStrategy`, valid values are "nccl" and
...
...
@@ -104,8 +104,9 @@ def get_distribution_strategy(distribution_strategy="mirrored",
device topology.
num_packs: Optional. Sets the `num_packs` in `tf.distribute.NcclAllReduce`
or `tf.distribute.HierarchicalCopyAllReduce` for `MirroredStrategy`.
tpu_address: Optional. String that represents TPU to connect to. Must not
be None if `distribution_strategy` is set to `tpu`.
tpu_address: Optional. String that represents TPU to connect to. Must not be
None if `distribution_strategy` is set to `tpu`.
Returns:
tf.distribute.DistibutionStrategy object.
Raises:
...
...
@@ -119,9 +120,8 @@ def get_distribution_strategy(distribution_strategy="mirrored",
distribution_strategy
=
distribution_strategy
.
lower
()
if
distribution_strategy
==
"off"
:
if
num_gpus
>
1
:
raise
ValueError
(
"When {} GPUs are specified, distribution_strategy "
"flag cannot be set to `off`."
.
format
(
num_gpus
))
raise
ValueError
(
"When {} GPUs are specified, distribution_strategy "
"flag cannot be set to `off`."
.
format
(
num_gpus
))
return
None
if
distribution_strategy
==
"tpu"
:
...
...
@@ -153,8 +153,8 @@ def get_distribution_strategy(distribution_strategy="mirrored",
if
distribution_strategy
==
"parameter_server"
:
return
tf
.
distribute
.
experimental
.
ParameterServerStrategy
()
raise
ValueError
(
"Unrecognized Distribution Strategy: %r"
%
distribution_strategy
)
raise
ValueError
(
"Unrecognized Distribution Strategy: %r"
%
distribution_strategy
)
def
configure_cluster
(
worker_hosts
=
None
,
task_index
=-
1
):
...
...
@@ -168,8 +168,9 @@ def configure_cluster(worker_hosts=None, task_index=-1):
"""
tf_config
=
json
.
loads
(
os
.
environ
.
get
(
"TF_CONFIG"
,
"{}"
))
if
tf_config
:
num_workers
=
(
len
(
tf_config
[
"cluster"
].
get
(
"chief"
,
[]))
+
len
(
tf_config
[
"cluster"
].
get
(
"worker"
,
[])))
num_workers
=
(
len
(
tf_config
[
"cluster"
].
get
(
"chief"
,
[]))
+
len
(
tf_config
[
"cluster"
].
get
(
"worker"
,
[])))
elif
worker_hosts
:
workers
=
worker_hosts
.
split
(
","
)
num_workers
=
len
(
workers
)
...
...
@@ -180,7 +181,10 @@ def configure_cluster(worker_hosts=None, task_index=-1):
"cluster"
:
{
"worker"
:
workers
},
"task"
:
{
"type"
:
"worker"
,
"index"
:
task_index
}
"task"
:
{
"type"
:
"worker"
,
"index"
:
task_index
}
})
else
:
num_workers
=
1
...
...
official/utils/misc/distribution_utils_test.py
View file @
88253ce5
...
...
@@ -25,6 +25,7 @@ from official.utils.misc import distribution_utils
class
GetDistributionStrategyTest
(
tf
.
test
.
TestCase
):
"""Tests for get_distribution_strategy."""
def
test_one_device_strategy_cpu
(
self
):
ds
=
distribution_utils
.
get_distribution_strategy
(
num_gpus
=
0
)
self
.
assertEquals
(
ds
.
num_replicas_in_sync
,
1
)
...
...
@@ -45,5 +46,5 @@ class GetDistributionStrategyTest(tf.test.TestCase):
self
.
assertIn
(
'GPU'
,
device
)
if
__name__
==
"
__main__
"
:
if
__name__
==
'
__main__
'
:
tf
.
test
.
main
()
official/utils/misc/keras_utils.py
View file @
88253ce5
...
...
@@ -25,7 +25,6 @@ import time
from
absl
import
logging
import
tensorflow
as
tf
from
tensorflow.python.eager
import
monitoring
global_batch_size_gauge
=
monitoring
.
IntGauge
(
...
...
@@ -121,8 +120,8 @@ class TimeHistory(tf.keras.callbacks.Callback):
# Record the timestamp of the first global step
if
not
self
.
timestamp_log
:
self
.
timestamp_log
.
append
(
BatchTimestamp
(
self
.
global_steps
,
self
.
start_time
))
self
.
timestamp_log
.
append
(
BatchTimestamp
(
self
.
global_steps
,
self
.
start_time
))
def
on_batch_end
(
self
,
batch
,
logs
=
None
):
"""Records elapse time of the batch and calculates examples per second."""
...
...
@@ -175,12 +174,12 @@ def set_session_config(enable_xla=False):
if
enable_xla
:
tf
.
config
.
optimizer
.
set_jit
(
True
)
# TODO(hongkuny): remove set_config_v2 globally.
set_config_v2
=
set_session_config
def
set_gpu_thread_mode_and_count
(
gpu_thread_mode
,
datasets_num_private_threads
,
def
set_gpu_thread_mode_and_count
(
gpu_thread_mode
,
datasets_num_private_threads
,
num_gpus
,
per_gpu_thread_count
):
"""Set GPU thread mode and count, and adjust dataset threads count."""
cpu_count
=
multiprocessing
.
cpu_count
()
...
...
@@ -190,10 +189,8 @@ def set_gpu_thread_mode_and_count(gpu_thread_mode,
per_gpu_thread_count
=
per_gpu_thread_count
or
2
os
.
environ
[
'TF_GPU_THREAD_MODE'
]
=
gpu_thread_mode
os
.
environ
[
'TF_GPU_THREAD_COUNT'
]
=
str
(
per_gpu_thread_count
)
logging
.
info
(
'TF_GPU_THREAD_COUNT: %s'
,
os
.
environ
[
'TF_GPU_THREAD_COUNT'
])
logging
.
info
(
'TF_GPU_THREAD_MODE: %s'
,
os
.
environ
[
'TF_GPU_THREAD_MODE'
])
logging
.
info
(
'TF_GPU_THREAD_COUNT: %s'
,
os
.
environ
[
'TF_GPU_THREAD_COUNT'
])
logging
.
info
(
'TF_GPU_THREAD_MODE: %s'
,
os
.
environ
[
'TF_GPU_THREAD_MODE'
])
# Limit data preprocessing threadpool to CPU cores minus number of total GPU
# private threads and memory copy threads.
...
...
@@ -201,7 +198,6 @@ def set_gpu_thread_mode_and_count(gpu_thread_mode,
num_runtime_threads
=
num_gpus
if
not
datasets_num_private_threads
:
datasets_num_private_threads
=
min
(
cpu_count
-
total_gpu_thread_count
-
num_runtime_threads
,
num_gpus
*
8
)
cpu_count
-
total_gpu_thread_count
-
num_runtime_threads
,
num_gpus
*
8
)
logging
.
info
(
'Set datasets_num_private_threads to %s'
,
datasets_num_private_threads
)
official/utils/misc/model_helpers.py
View file @
88253ce5
...
...
@@ -58,9 +58,12 @@ def past_stop_threshold(stop_threshold, eval_metric):
return
False
def
generate_synthetic_data
(
input_shape
,
input_value
=
0
,
input_dtype
=
None
,
label_shape
=
None
,
label_value
=
0
,
label_dtype
=
None
):
def
generate_synthetic_data
(
input_shape
,
input_value
=
0
,
input_dtype
=
None
,
label_shape
=
None
,
label_value
=
0
,
label_dtype
=
None
):
"""Create a repeating dataset with constant values.
Args:
...
...
official/utils/misc/model_helpers_test.py
View file @
88253ce5
...
...
@@ -51,19 +51,19 @@ class PastStopThresholdTest(tf.test.TestCase):
def
test_past_stop_threshold_not_number
(
self
):
"""Tests for error conditions."""
with
self
.
assertRaises
(
ValueError
):
model_helpers
.
past_stop_threshold
(
"
str
"
,
1
)
model_helpers
.
past_stop_threshold
(
'
str
'
,
1
)
with
self
.
assertRaises
(
ValueError
):
model_helpers
.
past_stop_threshold
(
"
str
"
,
tf
.
constant
(
5
))
model_helpers
.
past_stop_threshold
(
'
str
'
,
tf
.
constant
(
5
))
with
self
.
assertRaises
(
ValueError
):
model_helpers
.
past_stop_threshold
(
"
str
"
,
"
another
"
)
model_helpers
.
past_stop_threshold
(
'
str
'
,
'
another
'
)
with
self
.
assertRaises
(
ValueError
):
model_helpers
.
past_stop_threshold
(
0
,
None
)
with
self
.
assertRaises
(
ValueError
):
model_helpers
.
past_stop_threshold
(
0.7
,
"
str
"
)
model_helpers
.
past_stop_threshold
(
0.7
,
'
str
'
)
with
self
.
assertRaises
(
ValueError
):
model_helpers
.
past_stop_threshold
(
tf
.
constant
(
4
),
None
)
...
...
@@ -74,12 +74,13 @@ class SyntheticDataTest(tf.test.TestCase):
def
test_generate_synethetic_data
(
self
):
input_element
,
label_element
=
tf
.
compat
.
v1
.
data
.
make_one_shot_iterator
(
model_helpers
.
generate_synthetic_data
(
input_shape
=
tf
.
TensorShape
([
5
]),
input_value
=
123
,
input_dtype
=
tf
.
float32
,
label_shape
=
tf
.
TensorShape
([]),
label_value
=
456
,
label_dtype
=
tf
.
int32
)).
get_next
()
model_helpers
.
generate_synthetic_data
(
input_shape
=
tf
.
TensorShape
([
5
]),
input_value
=
123
,
input_dtype
=
tf
.
float32
,
label_shape
=
tf
.
TensorShape
([]),
label_value
=
456
,
label_dtype
=
tf
.
int32
)).
get_next
()
with
self
.
session
()
as
sess
:
for
n
in
range
(
5
):
...
...
@@ -102,8 +103,13 @@ class SyntheticDataTest(tf.test.TestCase):
def
test_generate_nested_data
(
self
):
d
=
model_helpers
.
generate_synthetic_data
(
input_shape
=
{
'a'
:
tf
.
TensorShape
([
2
]),
'b'
:
{
'c'
:
tf
.
TensorShape
([
3
]),
'd'
:
tf
.
TensorShape
([])}},
input_shape
=
{
'a'
:
tf
.
TensorShape
([
2
]),
'b'
:
{
'c'
:
tf
.
TensorShape
([
3
]),
'd'
:
tf
.
TensorShape
([])
}
},
input_value
=
1.1
)
element
=
tf
.
compat
.
v1
.
data
.
make_one_shot_iterator
(
d
).
get_next
()
...
...
@@ -121,5 +127,5 @@ class SyntheticDataTest(tf.test.TestCase):
self
.
assertAllClose
(
inp
[
'b'
][
'd'
],
1.1
)
if
__name__
==
"
__main__
"
:
if
__name__
==
'
__main__
'
:
tf
.
test
.
main
()
official/utils/registry_test.py
View file @
88253ce5
...
...
@@ -31,18 +31,20 @@ class RegistryTest(tf.test.TestCase):
@
registry
.
register
(
collection
,
'functions/func_0'
)
def
func_test
():
pass
self
.
assertEqual
(
registry
.
lookup
(
collection
,
'functions/func_0'
),
func_test
)
self
.
assertEqual
(
registry
.
lookup
(
collection
,
'functions/func_0'
),
func_test
)
@
registry
.
register
(
collection
,
'classes/cls_0'
)
class
ClassRegistryKey
:
pass
self
.
assertEqual
(
registry
.
lookup
(
collection
,
'classes/cls_0'
),
ClassRegistryKey
)
@
registry
.
register
(
collection
,
ClassRegistryKey
)
class
ClassRegistryValue
:
pass
self
.
assertEqual
(
registry
.
lookup
(
collection
,
ClassRegistryKey
),
ClassRegistryValue
)
...
...
@@ -52,12 +54,15 @@ class RegistryTest(tf.test.TestCase):
@
registry
.
register
(
collection
,
'functions/func_0'
)
def
func_test0
():
pass
@
registry
.
register
(
collection
,
'func_1'
)
def
func_test1
():
pass
@
registry
.
register
(
collection
,
func_test1
)
def
func_test2
():
pass
expected_collection
=
{
'functions'
:
{
'func_0'
:
func_test0
,
...
...
@@ -73,10 +78,13 @@ class RegistryTest(tf.test.TestCase):
@
registry
.
register
(
collection
,
'functions/func_0'
)
def
func_test0
():
# pylint: disable=unused-variable
pass
with
self
.
assertRaises
(
KeyError
):
@
registry
.
register
(
collection
,
'functions/func_0/sub_func'
)
def
func_test1
():
# pylint: disable=unused-variable
pass
with
self
.
assertRaises
(
LookupError
):
registry
.
lookup
(
collection
,
'non-exist'
)
...
...
official/utils/testing/integration.py
View file @
88253ce5
...
...
@@ -12,8 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Helper code to run complete models from within python.
"""
"""Helper code to run complete models from within python."""
from
__future__
import
absolute_import
from
__future__
import
division
...
...
@@ -31,7 +30,11 @@ from official.utils.flags import core as flags_core
@
flagsaver
.
flagsaver
def
run_synthetic
(
main
,
tmp_root
,
extra_flags
=
None
,
synth
=
True
,
train_epochs
=
1
,
def
run_synthetic
(
main
,
tmp_root
,
extra_flags
=
None
,
synth
=
True
,
train_epochs
=
1
,
epochs_between_evals
=
1
):
"""Performs a minimal run of a model.
...
...
official/vision/detection/dataloader/anchor.py
View file @
88253ce5
...
...
@@ -19,6 +19,7 @@ from __future__ import division
from
__future__
import
print_function
import
collections
import
tensorflow
as
tf
from
official.vision.detection.utils.object_detection
import
argmax_matcher
from
official.vision.detection.utils.object_detection
import
balanced_positive_negative_sampler
...
...
@@ -31,30 +32,25 @@ from official.vision.detection.utils.object_detection import target_assigner
class
Anchor
(
object
):
"""Anchor class for anchor-based object detectors."""
def
__init__
(
self
,
min_level
,
max_level
,
num_scales
,
aspect_ratios
,
anchor_size
,
image_size
):
def
__init__
(
self
,
min_level
,
max_level
,
num_scales
,
aspect_ratios
,
anchor_size
,
image_size
):
"""Constructs multiscale anchors.
Args:
min_level: integer number of minimum level of the output feature pyramid.
max_level: integer number of maximum level of the output feature pyramid.
num_scales: integer number representing intermediate scales added
on each
level. For instances, num_scales=2 adds one additional
intermediate
anchor scales [2^0, 2^0.5] on each level.
num_scales: integer number representing intermediate scales added
on each
level. For instances, num_scales=2 adds one additional
intermediate
anchor scales [2^0, 2^0.5] on each level.
aspect_ratios: list of float numbers representing the aspect ratio anchors
added on each level. The number indicates the ratio of width to height.
For instances, aspect_ratios=[1.0, 2.0, 0.5] adds three anchors on each
scale level.
anchor_size: float number representing the scale of size of the base
anchor to the feature stride 2^level.
image_size: a list of integer numbers or Tensors representing
[height,
width] of the input image size.The image_size should be
divisible by the
largest feature stride 2^max_level.
image_size: a list of integer numbers or Tensors representing
[height,
width] of the input image size.The image_size should be
divisible by the
largest feature stride 2^max_level.
"""
self
.
min_level
=
min_level
self
.
max_level
=
max_level
...
...
@@ -76,11 +72,11 @@ class Anchor(object):
boxes_l
=
[]
for
scale
in
range
(
self
.
num_scales
):
for
aspect_ratio
in
self
.
aspect_ratios
:
stride
=
2
**
level
intermediate_scale
=
2
**
(
scale
/
float
(
self
.
num_scales
))
stride
=
2
**
level
intermediate_scale
=
2
**
(
scale
/
float
(
self
.
num_scales
))
base_anchor_size
=
self
.
anchor_size
*
stride
*
intermediate_scale
aspect_x
=
aspect_ratio
**
0.5
aspect_y
=
aspect_ratio
**
-
0.5
aspect_x
=
aspect_ratio
**
0.5
aspect_y
=
aspect_ratio
**-
0.5
half_anchor_size_x
=
base_anchor_size
*
aspect_x
/
2.0
half_anchor_size_y
=
base_anchor_size
*
aspect_y
/
2.0
x
=
tf
.
range
(
stride
/
2
,
self
.
image_size
[
1
],
stride
)
...
...
@@ -89,8 +85,10 @@ class Anchor(object):
xv
=
tf
.
cast
(
tf
.
reshape
(
xv
,
[
-
1
]),
dtype
=
tf
.
float32
)
yv
=
tf
.
cast
(
tf
.
reshape
(
yv
,
[
-
1
]),
dtype
=
tf
.
float32
)
# Tensor shape Nx4.
boxes
=
tf
.
stack
([
yv
-
half_anchor_size_y
,
xv
-
half_anchor_size_x
,
yv
+
half_anchor_size_y
,
xv
+
half_anchor_size_x
],
boxes
=
tf
.
stack
([
yv
-
half_anchor_size_y
,
xv
-
half_anchor_size_x
,
yv
+
half_anchor_size_y
,
xv
+
half_anchor_size_x
],
axis
=
1
)
boxes_l
.
append
(
boxes
)
# Concat anchors on the same level to tensor shape NxAx4.
...
...
@@ -104,11 +102,11 @@ class Anchor(object):
unpacked_labels
=
collections
.
OrderedDict
()
count
=
0
for
level
in
range
(
self
.
min_level
,
self
.
max_level
+
1
):
feat_size_y
=
tf
.
cast
(
self
.
image_size
[
0
]
/
2
**
level
,
tf
.
int32
)
feat_size_x
=
tf
.
cast
(
self
.
image_size
[
1
]
/
2
**
level
,
tf
.
int32
)
feat_size_y
=
tf
.
cast
(
self
.
image_size
[
0
]
/
2
**
level
,
tf
.
int32
)
feat_size_x
=
tf
.
cast
(
self
.
image_size
[
1
]
/
2
**
level
,
tf
.
int32
)
steps
=
feat_size_y
*
feat_size_x
*
self
.
anchors_per_location
unpacked_labels
[
level
]
=
tf
.
reshape
(
labels
[
count
:
count
+
steps
],
[
feat_size_y
,
feat_size_x
,
-
1
])
unpacked_labels
[
level
]
=
tf
.
reshape
(
labels
[
count
:
count
+
steps
],
[
feat_size_y
,
feat_size_x
,
-
1
])
count
+=
steps
return
unpacked_labels
...
...
@@ -124,10 +122,7 @@ class Anchor(object):
class
AnchorLabeler
(
object
):
"""Labeler for dense object detector."""
def
__init__
(
self
,
anchor
,
match_threshold
=
0.5
,
unmatched_threshold
=
0.5
):
def
__init__
(
self
,
anchor
,
match_threshold
=
0.5
,
unmatched_threshold
=
0.5
):
"""Constructs anchor labeler to assign labels to anchors.
Args:
...
...
@@ -161,6 +156,7 @@ class AnchorLabeler(object):
For each row, it stores [y0, x0, y1, x1] for four corners of a box.
gt_labels: A integer tensor with shape [N, 1] representing groundtruth
classes.
Returns:
cls_targets_dict: ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with
...
...
@@ -205,11 +201,14 @@ class AnchorLabeler(object):
class
RpnAnchorLabeler
(
AnchorLabeler
):
"""Labeler for Region Proposal Network."""
def
__init__
(
self
,
anchor
,
match_threshold
=
0.7
,
unmatched_threshold
=
0.3
,
rpn_batch_size_per_im
=
256
,
def
__init__
(
self
,
anchor
,
match_threshold
=
0.7
,
unmatched_threshold
=
0.3
,
rpn_batch_size_per_im
=
256
,
rpn_fg_fraction
=
0.5
):
AnchorLabeler
.
__init__
(
self
,
anchor
,
match_threshold
=
0.7
,
unmatched_threshold
=
0.3
)
AnchorLabeler
.
__init__
(
self
,
anchor
,
match_threshold
=
0.7
,
unmatched_threshold
=
0.3
)
self
.
_rpn_batch_size_per_im
=
rpn_batch_size_per_im
self
.
_rpn_fg_fraction
=
rpn_fg_fraction
...
...
@@ -219,11 +218,12 @@ class RpnAnchorLabeler(AnchorLabeler):
This function performs subsampling for foreground (fg) and background (bg)
anchors.
Args:
match_results: A integer tensor with shape [N] representing the
matching results of anchors. (1) match_results[i]>=0,
meaning that column i is matched with row match_results[i].
(2) match_results[i]=-1, meaning that column i is not matched.
(3) match_results[i]=-2, meaning that column i is ignored.
match_results: A integer tensor with shape [N] representing the matching
results of anchors. (1) match_results[i]>=0, meaning that column i is
matched with row match_results[i]. (2) match_results[i]=-1, meaning that
column i is not matched. (3) match_results[i]=-2, meaning that column i
is ignored.
Returns:
score_targets: a integer tensor with the a shape of [N].
(1) score_targets[i]=1, the anchor is a positive sample.
...
...
@@ -241,8 +241,7 @@ class RpnAnchorLabeler(AnchorLabeler):
indicator
=
tf
.
greater
(
match_results
,
-
2
)
labels
=
tf
.
greater
(
match_results
,
-
1
)
samples
=
sampler
.
subsample
(
indicator
,
self
.
_rpn_batch_size_per_im
,
labels
)
samples
=
sampler
.
subsample
(
indicator
,
self
.
_rpn_batch_size_per_im
,
labels
)
positive_labels
=
tf
.
where
(
tf
.
logical_and
(
samples
,
labels
),
tf
.
constant
(
2
,
dtype
=
tf
.
int32
,
shape
=
match_results
.
shape
),
...
...
@@ -253,8 +252,8 @@ class RpnAnchorLabeler(AnchorLabeler):
tf
.
constant
(
0
,
dtype
=
tf
.
int32
,
shape
=
match_results
.
shape
))
ignore_labels
=
tf
.
fill
(
match_results
.
shape
,
-
1
)
return
(
ignore_labels
+
positive_labels
+
negative_labels
,
positive_labels
,
negative_labels
)
return
(
ignore_labels
+
positive_labels
+
negative_labels
,
positive_labels
,
negative_labels
)
def
label_anchors
(
self
,
gt_boxes
,
gt_labels
):
"""Labels anchors with ground truth inputs.
...
...
@@ -264,6 +263,7 @@ class RpnAnchorLabeler(AnchorLabeler):
For each row, it stores [y0, x0, y1, x1] for four corners of a box.
gt_labels: A integer tensor with shape [N, 1] representing groundtruth
classes.
Returns:
score_targets_dict: ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with
...
...
official/vision/detection/dataloader/input_reader.py
View file @
88253ce5
...
...
@@ -91,7 +91,8 @@ class InputFn(object):
dataset
=
dataset
.
repeat
()
dataset
=
dataset
.
interleave
(
map_func
=
self
.
_dataset_fn
,
cycle_length
=
32
,
map_func
=
self
.
_dataset_fn
,
cycle_length
=
32
,
num_parallel_calls
=
tf
.
data
.
experimental
.
AUTOTUNE
)
if
self
.
_is_training
:
...
...
official/vision/detection/dataloader/retinanet_parser.py
View file @
88253ce5
...
...
@@ -79,9 +79,9 @@ class Parser(object):
output_size should be divided by the largest feature stride 2^max_level.
min_level: `int` number of minimum level of the output feature pyramid.
max_level: `int` number of maximum level of the output feature pyramid.
num_scales: `int` number representing intermediate scales added
on each
level. For instances, num_scales=2 adds one additional
intermediate
anchor scales [2^0, 2^0.5] on each level.
num_scales: `int` number representing intermediate scales added
on each
level. For instances, num_scales=2 adds one additional
intermediate
anchor scales [2^0, 2^0.5] on each level.
aspect_ratios: `list` of float numbers representing the aspect raito
anchors added on each level. The number indicates the ratio of width to
height. For instances, aspect_ratios=[1.0, 2.0, 0.5] adds three anchors
...
...
@@ -94,8 +94,8 @@ class Parser(object):
unmatched_threshold: `float` number between 0 and 1 representing the
upper-bound threshold to assign negative labels for anchors. An anchor
with a score below the threshold is labeled negative.
aug_rand_hflip: `bool`, if True, augment training with random
horizontal
flip.
aug_rand_hflip: `bool`, if True, augment training with random
horizontal
flip.
aug_scale_min: `float`, the minimum scale applied to `output_size` for
data augmentation during training.
aug_scale_max: `float`, the maximum scale applied to `output_size` for
...
...
@@ -109,8 +109,8 @@ class Parser(object):
max_num_instances: `int` number of maximum number of instances in an
image. The groundtruth data will be padded to `max_num_instances`.
use_bfloat16: `bool`, if True, cast output image to tf.bfloat16.
mode: a ModeKeys. Specifies if this is training, evaluation, prediction
or
prediction with groundtruths in the outputs.
mode: a ModeKeys. Specifies if this is training, evaluation, prediction
or
prediction with groundtruths in the outputs.
"""
self
.
_mode
=
mode
self
.
_max_num_instances
=
max_num_instances
...
...
@@ -232,8 +232,8 @@ class Parser(object):
image
,
image_info
=
input_utils
.
resize_and_crop_image
(
image
,
self
.
_output_size
,
padded_size
=
input_utils
.
compute_padded_size
(
self
.
_output_size
,
2
**
self
.
_max_level
),
padded_size
=
input_utils
.
compute_padded_size
(
self
.
_output_size
,
2
**
self
.
_max_level
),
aug_scale_min
=
self
.
_aug_scale_min
,
aug_scale_max
=
self
.
_aug_scale_max
)
image_height
,
image_width
,
_
=
image
.
get_shape
().
as_list
()
...
...
@@ -241,22 +241,21 @@ class Parser(object):
# Resizes and crops boxes.
image_scale
=
image_info
[
2
,
:]
offset
=
image_info
[
3
,
:]
boxes
=
input_utils
.
resize_and_crop_boxes
(
boxes
,
image_scale
,
image_info
[
1
,
:],
offset
)
boxes
=
input_utils
.
resize_and_crop_boxes
(
boxes
,
image_scale
,
image_info
[
1
,
:],
offset
)
# Filters out ground truth boxes that are all zeros.
indices
=
box_utils
.
get_non_empty_box_indices
(
boxes
)
boxes
=
tf
.
gather
(
boxes
,
indices
)
classes
=
tf
.
gather
(
classes
,
indices
)
# Assigns anchors.
input_anchor
=
anchor
.
Anchor
(
self
.
_min_level
,
self
.
_max_level
,
self
.
_num_scale
s
,
self
.
_aspect_ratios
,
self
.
_anchor_size
,
(
image_height
,
image_width
))
anchor_labeler
=
anchor
.
AnchorLabeler
(
input_anchor
,
self
.
_match_threshold
,
self
.
_unmatched_threshold
)
input_anchor
=
anchor
.
Anchor
(
self
.
_min_level
,
self
.
_max_level
,
self
.
_num_scales
,
self
.
_aspect_ratio
s
,
self
.
_anchor_size
,
(
image_height
,
image_width
))
anchor_labeler
=
anchor
.
AnchorLabeler
(
input_anchor
,
self
.
_match_threshold
,
self
.
_unmatched_threshold
)
(
cls_targets
,
box_targets
,
num_positives
)
=
anchor_labeler
.
label_anchors
(
boxes
,
tf
.
cast
(
tf
.
expand_dims
(
classes
,
axis
=
1
),
tf
.
float32
))
boxes
,
tf
.
cast
(
tf
.
expand_dims
(
classes
,
axis
=
1
),
tf
.
float32
))
# If bfloat16 is used, casts input image to tf.bfloat16.
if
self
.
_use_bfloat16
:
...
...
@@ -292,8 +291,8 @@ class Parser(object):
image
,
image_info
=
input_utils
.
resize_and_crop_image
(
image
,
self
.
_output_size
,
padded_size
=
input_utils
.
compute_padded_size
(
self
.
_output_size
,
2
**
self
.
_max_level
),
padded_size
=
input_utils
.
compute_padded_size
(
self
.
_output_size
,
2
**
self
.
_max_level
),
aug_scale_min
=
1.0
,
aug_scale_max
=
1.0
)
image_height
,
image_width
,
_
=
image
.
get_shape
().
as_list
()
...
...
@@ -301,22 +300,21 @@ class Parser(object):
# Resizes and crops boxes.
image_scale
=
image_info
[
2
,
:]
offset
=
image_info
[
3
,
:]
boxes
=
input_utils
.
resize_and_crop_boxes
(
boxes
,
image_scale
,
image_info
[
1
,
:],
offset
)
boxes
=
input_utils
.
resize_and_crop_boxes
(
boxes
,
image_scale
,
image_info
[
1
,
:],
offset
)
# Filters out ground truth boxes that are all zeros.
indices
=
box_utils
.
get_non_empty_box_indices
(
boxes
)
boxes
=
tf
.
gather
(
boxes
,
indices
)
classes
=
tf
.
gather
(
classes
,
indices
)
# Assigns anchors.
input_anchor
=
anchor
.
Anchor
(
self
.
_min_level
,
self
.
_max_level
,
self
.
_num_scale
s
,
self
.
_aspect_ratios
,
self
.
_anchor_size
,
(
image_height
,
image_width
))
anchor_labeler
=
anchor
.
AnchorLabeler
(
input_anchor
,
self
.
_match_threshold
,
self
.
_unmatched_threshold
)
input_anchor
=
anchor
.
Anchor
(
self
.
_min_level
,
self
.
_max_level
,
self
.
_num_scales
,
self
.
_aspect_ratio
s
,
self
.
_anchor_size
,
(
image_height
,
image_width
))
anchor_labeler
=
anchor
.
AnchorLabeler
(
input_anchor
,
self
.
_match_threshold
,
self
.
_unmatched_threshold
)
(
cls_targets
,
box_targets
,
num_positives
)
=
anchor_labeler
.
label_anchors
(
boxes
,
tf
.
cast
(
tf
.
expand_dims
(
classes
,
axis
=
1
),
tf
.
float32
))
boxes
,
tf
.
cast
(
tf
.
expand_dims
(
classes
,
axis
=
1
),
tf
.
float32
))
# If bfloat16 is used, casts input image to tf.bfloat16.
if
self
.
_use_bfloat16
:
...
...
@@ -324,18 +322,24 @@ class Parser(object):
# Sets up groundtruth data for evaluation.
groundtruths
=
{
'source_id'
:
data
[
'source_id'
],
'num_groundtrtuhs'
:
tf
.
shape
(
data
[
'groundtruth_classes'
]),
'image_info'
:
image_info
,
'boxes'
:
box_utils
.
denormalize_boxes
(
data
[
'groundtruth_boxes'
],
image_shape
),
'classes'
:
data
[
'groundtruth_classes'
],
'areas'
:
data
[
'groundtruth_area'
],
'is_crowds'
:
tf
.
cast
(
data
[
'groundtruth_is_crowd'
],
tf
.
int32
),
'source_id'
:
data
[
'source_id'
],
'num_groundtrtuhs'
:
tf
.
shape
(
data
[
'groundtruth_classes'
]),
'image_info'
:
image_info
,
'boxes'
:
box_utils
.
denormalize_boxes
(
data
[
'groundtruth_boxes'
],
image_shape
),
'classes'
:
data
[
'groundtruth_classes'
],
'areas'
:
data
[
'groundtruth_area'
],
'is_crowds'
:
tf
.
cast
(
data
[
'groundtruth_is_crowd'
],
tf
.
int32
),
}
groundtruths
[
'source_id'
]
=
process_source_id
(
groundtruths
[
'source_id'
])
groundtruths
=
pad_groundtruths_to_fixed_size
(
groundtruths
,
self
.
_max_num_instances
)
groundtruths
=
pad_groundtruths_to_fixed_size
(
groundtruths
,
self
.
_max_num_instances
)
# Packs labels for model_fn outputs.
labels
=
{
...
...
@@ -361,8 +365,8 @@ class Parser(object):
image
,
image_info
=
input_utils
.
resize_and_crop_image
(
image
,
self
.
_output_size
,
padded_size
=
input_utils
.
compute_padded_size
(
self
.
_output_size
,
2
**
self
.
_max_level
),
padded_size
=
input_utils
.
compute_padded_size
(
self
.
_output_size
,
2
**
self
.
_max_level
),
aug_scale_min
=
1.0
,
aug_scale_max
=
1.0
)
image_height
,
image_width
,
_
=
image
.
get_shape
().
as_list
()
...
...
@@ -372,9 +376,9 @@ class Parser(object):
image
=
tf
.
cast
(
image
,
dtype
=
tf
.
bfloat16
)
# Compute Anchor boxes.
input_anchor
=
anchor
.
Anchor
(
self
.
_min_level
,
self
.
_max_level
,
self
.
_num_scale
s
,
self
.
_aspect_ratios
,
self
.
_anchor_size
,
(
image_height
,
image_width
))
input_anchor
=
anchor
.
Anchor
(
self
.
_min_level
,
self
.
_max_level
,
self
.
_num_scales
,
self
.
_aspect_ratio
s
,
self
.
_anchor_size
,
(
image_height
,
image_width
))
labels
=
{
'anchor_boxes'
:
input_anchor
.
multilevel_boxes
,
...
...
@@ -384,8 +388,8 @@ class Parser(object):
# in labels.
if
self
.
_mode
==
ModeKeys
.
PREDICT_WITH_GT
:
# Converts boxes from normalized coordinates to pixel coordinates.
boxes
=
box_utils
.
denormalize_boxes
(
data
[
'groundtruth_boxes'
],
image_shape
)
boxes
=
box_utils
.
denormalize_boxes
(
data
[
'groundtruth_boxes'
],
image_shape
)
groundtruths
=
{
'source_id'
:
data
[
'source_id'
],
'num_detections'
:
tf
.
shape
(
data
[
'groundtruth_classes'
]),
...
...
@@ -395,8 +399,8 @@ class Parser(object):
'is_crowds'
:
tf
.
cast
(
data
[
'groundtruth_is_crowd'
],
tf
.
int32
),
}
groundtruths
[
'source_id'
]
=
process_source_id
(
groundtruths
[
'source_id'
])
groundtruths
=
pad_groundtruths_to_fixed_size
(
groundtruths
,
self
.
_max_num_instances
)
groundtruths
=
pad_groundtruths_to_fixed_size
(
groundtruths
,
self
.
_max_num_instances
)
labels
[
'groundtruths'
]
=
groundtruths
# Computes training objective for evaluation loss.
...
...
@@ -404,18 +408,17 @@ class Parser(object):
image_scale
=
image_info
[
2
,
:]
offset
=
image_info
[
3
,
:]
boxes
=
input_utils
.
resize_and_crop_boxes
(
boxes
,
image_scale
,
image_info
[
1
,
:],
offset
)
boxes
=
input_utils
.
resize_and_crop_boxes
(
boxes
,
image_scale
,
image_info
[
1
,
:],
offset
)
# Filters out ground truth boxes that are all zeros.
indices
=
box_utils
.
get_non_empty_box_indices
(
boxes
)
boxes
=
tf
.
gather
(
boxes
,
indices
)
# Assigns anchors.
anchor_labeler
=
anchor
.
AnchorLabeler
(
input_anchor
,
self
.
_match_threshold
,
self
.
_unmatched_threshold
)
anchor_labeler
=
anchor
.
AnchorLabeler
(
input_anchor
,
self
.
_match_threshold
,
self
.
_unmatched_threshold
)
(
cls_targets
,
box_targets
,
num_positives
)
=
anchor_labeler
.
label_anchors
(
boxes
,
tf
.
cast
(
tf
.
expand_dims
(
classes
,
axis
=
1
),
tf
.
float32
))
boxes
,
tf
.
cast
(
tf
.
expand_dims
(
classes
,
axis
=
1
),
tf
.
float32
))
labels
[
'cls_targets'
]
=
cls_targets
labels
[
'box_targets'
]
=
box_targets
labels
[
'num_positives'
]
=
num_positives
...
...
official/vision/detection/dataloader/shapemask_parser.py
View file @
88253ce5
...
...
@@ -21,7 +21,6 @@ Weicheng Kuo, Anelia Angelova, Jitendra Malik, Tsung-Yi Lin
ShapeMask: Learning to Segment Novel Objects by Refining Shape Priors.
arXiv:1904.03239.
"""
import
tensorflow
as
tf
from
official.vision.detection.dataloader
import
anchor
...
...
official/vision/detection/evaluation/coco_evaluator.py
View file @
88253ce5
...
...
@@ -32,6 +32,7 @@ from __future__ import print_function
import
atexit
import
tempfile
import
numpy
as
np
from
absl
import
logging
from
pycocotools
import
cocoeval
...
...
@@ -197,22 +198,21 @@ class COCOEvaluator(object):
"""Update and aggregate detection results and groundtruth data.
Args:
predictions: a dictionary of numpy arrays including the fields below.
See
different parsers under `../dataloader` for more details.
predictions: a dictionary of numpy arrays including the fields below.
See
different parsers under `../dataloader` for more details.
Required fields:
- source_id: a numpy array of int or string of shape [batch_size].
- image_info [if `need_rescale_bboxes` is True]: a numpy array of
float of shape [batch_size, 4, 2].
- num_detections: a numpy array of
int of shape [batch_size].
- num_detections: a numpy array of int of shape [batch_size].
- detection_boxes: a numpy array of float of shape [batch_size, K, 4].
- detection_classes: a numpy array of int of shape [batch_size, K].
- detection_scores: a numpy array of float of shape [batch_size, K].
Optional fields:
- detection_masks: a numpy array of float of shape
[batch_size, K,
mask_height, mask_width].
groundtruths: a dictionary of numpy arrays including the fields below.
See
also different parsers under `../dataloader` for more details.
- detection_masks: a numpy array of float of shape
[batch_size, K,
mask_height, mask_width].
groundtruths: a dictionary of numpy arrays including the fields below.
See
also different parsers under `../dataloader` for more details.
Required fields:
- source_id: a numpy array of int or string of shape [batch_size].
- height: a numpy array of int of shape [batch_size].
...
...
@@ -222,12 +222,12 @@ class COCOEvaluator(object):
- classes: a numpy array of int of shape [batch_size, K].
Optional fields:
- is_crowds: a numpy array of int of shape [batch_size, K]. If the
field is absent, it is assumed that this instance is not crowd.
- areas: a numy array of float of shape [batch_size, K]. If the
field
is absent, the area is calculated using either boxes or
masks
depending on which one is available.
- masks: a numpy array of float of shape
[batch_size, K, mask_height,
mask_width],
field is absent, it is assumed that this instance is not crowd.
- areas: a numy array of float of shape [batch_size, K]. If the
field
is absent, the area is calculated using either boxes or
masks
depending on which one is available.
- masks: a numpy array of float of shape
[batch_size, K, mask_height,
mask_width],
Raises:
ValueError: if the required prediction or groundtruth fields are not
...
...
@@ -318,8 +318,7 @@ class ShapeMaskCOCOEvaluator(COCOEvaluator):
metrics
=
np
.
hstack
((
coco_metrics
,
mcoco_eval
.
stats
))
else
:
mask_coco_metrics
=
mcoco_eval
.
category_stats
val_catg_idx
=
np
.
isin
(
mcoco_eval
.
params
.
catIds
,
self
.
_eval_categories
)
val_catg_idx
=
np
.
isin
(
mcoco_eval
.
params
.
catIds
,
self
.
_eval_categories
)
# Gather the valid evaluation of the eval categories.
if
np
.
any
(
val_catg_idx
):
mean_val_metrics
=
[]
...
...
official/vision/detection/main.py
View file @
88253ce5
...
...
@@ -23,6 +23,7 @@ import functools
import
pprint
# pylint: disable=g-bad-import-order
# Import libraries
import
tensorflow
as
tf
from
absl
import
app
...
...
official/vision/detection/modeling/architecture/heads.py
View file @
88253ce5
...
...
@@ -31,17 +31,17 @@ from official.vision.detection.ops import spatial_transform_ops
class
RpnHead
(
tf
.
keras
.
layers
.
Layer
):
"""Region Proposal Network head."""
def
__init__
(
self
,
min_lev
el
,
m
ax
_level
,
anchors_per_location
,
num_convs
=
2
,
num_
filter
s
=
2
56
,
use_separable_conv
=
False
,
activation
=
'relu'
,
use_batch_norm
=
True
,
norm_activation
=
nn_ops
.
norm_activation_builder
(
activation
=
'relu'
)):
def
__init__
(
s
el
f
,
m
in
_level
,
max_level
,
anchors_per_location
,
num_
conv
s
=
2
,
num_filters
=
256
,
use_separable_conv
=
False
,
activation
=
'relu'
,
use_batch_norm
=
True
,
norm_activation
=
nn_ops
.
norm_activation_builder
(
activation
=
'relu'
)):
"""Initialize params to build Region Proposal Network head.
Args:
...
...
@@ -57,8 +57,8 @@ class RpnHead(tf.keras.layers.Layer):
is used.
activation: activation function. Support 'relu' and 'swish'.
use_batch_norm: 'bool', indicating whether batchnorm layers are added.
norm_activation: an operation that includes a normalization layer
followed
by an optional activation layer.
norm_activation: an operation that includes a normalization layer
followed
by an optional activation layer.
"""
self
.
_min_level
=
min_level
self
.
_max_level
=
max_level
...
...
@@ -140,17 +140,17 @@ class RpnHead(tf.keras.layers.Layer):
class
FastrcnnHead
(
tf
.
keras
.
layers
.
Layer
):
"""Fast R-CNN box head."""
def
__init__
(
self
,
num_classes
,
num_c
onvs
=
0
,
num_
filters
=
256
,
use_separable_conv
=
False
,
num_fcs
=
2
,
fc_dims
=
1024
,
activation
=
'relu'
,
use_batch_norm
=
True
,
norm_activation
=
nn_ops
.
norm_activation_builder
(
activation
=
'relu'
)):
def
__init__
(
self
,
num_c
lasses
,
num_
convs
=
0
,
num_filters
=
256
,
use_separable_conv
=
False
,
num_fcs
=
2
,
fc_dims
=
1024
,
activation
=
'relu'
,
use_batch_norm
=
True
,
norm_activation
=
nn_ops
.
norm_activation_builder
(
activation
=
'relu'
)):
"""Initialize params to build Fast R-CNN box head.
Args:
...
...
@@ -167,8 +167,8 @@ class FastrcnnHead(tf.keras.layers.Layer):
layers.
activation: activation function. Support 'relu' and 'swish'.
use_batch_norm: 'bool', indicating whether batchnorm layers are added.
norm_activation: an operation that includes a normalization layer
followed
by an optional activation layer.
norm_activation: an operation that includes a normalization layer
followed
by an optional activation layer.
"""
self
.
_num_classes
=
num_classes
...
...
@@ -207,7 +207,8 @@ class FastrcnnHead(tf.keras.layers.Layer):
strides
=
(
1
,
1
),
padding
=
'same'
,
dilation_rate
=
(
1
,
1
),
activation
=
(
None
if
self
.
_use_batch_norm
else
self
.
_activation_op
),
activation
=
(
None
if
self
.
_use_batch_norm
else
self
.
_activation_op
),
name
=
'conv_{}'
.
format
(
i
)))
if
self
.
_use_batch_norm
:
self
.
_conv_bn_ops
.
append
(
self
.
_norm_activation
())
...
...
@@ -218,7 +219,8 @@ class FastrcnnHead(tf.keras.layers.Layer):
self
.
_fc_ops
.
append
(
tf
.
keras
.
layers
.
Dense
(
units
=
self
.
_fc_dims
,
activation
=
(
None
if
self
.
_use_batch_norm
else
self
.
_activation_op
),
activation
=
(
None
if
self
.
_use_batch_norm
else
self
.
_activation_op
),
name
=
'fc{}'
.
format
(
i
)))
if
self
.
_use_batch_norm
:
self
.
_fc_bn_ops
.
append
(
self
.
_norm_activation
(
fused
=
False
))
...
...
@@ -238,8 +240,8 @@ class FastrcnnHead(tf.keras.layers.Layer):
"""Box and class branches for the Mask-RCNN model.
Args:
roi_features: A ROI feature tensor of shape
[batch_size, num_rois,
height_l, width_l, num_filters].
roi_features: A ROI feature tensor of shape
[batch_size, num_rois,
height_l, width_l, num_filters].
is_training: `boolean`, if True if model is in training mode.
Returns:
...
...
@@ -277,16 +279,16 @@ class FastrcnnHead(tf.keras.layers.Layer):
class
MaskrcnnHead
(
tf
.
keras
.
layers
.
Layer
):
"""Mask R-CNN head."""
def
__init__
(
self
,
num_classes
,
mask_target_size
,
num_convs
=
4
,
num_
filters
=
256
,
use_separable_conv
=
False
,
activation
=
'relu'
,
use_batch_norm
=
True
,
norm_activation
=
nn_ops
.
norm_activation_builder
(
activation
=
'relu'
)):
def
__init__
(
self
,
num_classes
,
mask_target_size
,
num_
convs
=
4
,
num_filters
=
256
,
use_separable_conv
=
False
,
activation
=
'relu'
,
use_batch_norm
=
True
,
norm_activation
=
nn_ops
.
norm_activation_builder
(
activation
=
'relu'
)):
"""Initialize params to build Fast R-CNN head.
Args:
...
...
@@ -300,8 +302,8 @@ class MaskrcnnHead(tf.keras.layers.Layer):
is used.
activation: activation function. Support 'relu' and 'swish'.
use_batch_norm: 'bool', indicating whether batchnorm layers are added.
norm_activation: an operation that includes a normalization layer
followed
by an optional activation layer.
norm_activation: an operation that includes a normalization layer
followed
by an optional activation layer.
"""
self
.
_num_classes
=
num_classes
self
.
_mask_target_size
=
mask_target_size
...
...
@@ -336,7 +338,8 @@ class MaskrcnnHead(tf.keras.layers.Layer):
strides
=
(
1
,
1
),
padding
=
'same'
,
dilation_rate
=
(
1
,
1
),
activation
=
(
None
if
self
.
_use_batch_norm
else
self
.
_activation_op
),
activation
=
(
None
if
self
.
_use_batch_norm
else
self
.
_activation_op
),
name
=
'mask-conv-l%d'
%
i
))
self
.
_mask_conv_transpose
=
tf
.
keras
.
layers
.
Conv2DTranspose
(
self
.
_num_filters
,
...
...
@@ -353,10 +356,10 @@ class MaskrcnnHead(tf.keras.layers.Layer):
"""Mask branch for the Mask-RCNN model.
Args:
roi_features: A ROI feature tensor of shape
[batch_size, num_rois,
height_l, width_l, num_filters].
class_indices: a Tensor of shape [batch_size, num_rois], indicating
which
class the ROI is.
roi_features: A ROI feature tensor of shape
[batch_size, num_rois,
height_l, width_l, num_filters].
class_indices: a Tensor of shape [batch_size, num_rois], indicating
which
class the ROI is.
is_training: `boolean`, if True if model is in training mode.
Returns:
...
...
@@ -415,16 +418,16 @@ class MaskrcnnHead(tf.keras.layers.Layer):
class
RetinanetHead
(
object
):
"""RetinaNet head."""
def
__init__
(
self
,
min_lev
el
,
m
ax
_level
,
num_classes
,
anchors_per_location
,
num_convs
=
4
,
num_
filters
=
256
,
use_separable_conv
=
False
,
norm_activation
=
nn_ops
.
norm_activation_builder
(
activation
=
'relu'
)):
def
__init__
(
s
el
f
,
m
in
_level
,
max_level
,
num_classes
,
anchors_per_location
,
num_
convs
=
4
,
num_filters
=
256
,
use_separable_conv
=
False
,
norm_activation
=
nn_ops
.
norm_activation_builder
(
activation
=
'relu'
)):
"""Initialize params to build RetinaNet head.
Args:
...
...
@@ -437,8 +440,8 @@ class RetinanetHead(object):
num_filters: `int` number of filters used in the head architecture.
use_separable_conv: `bool` to indicate whether to use separable
convoluation.
norm_activation: an operation that includes a normalization layer
followed
by an optional activation layer.
norm_activation: an operation that includes a normalization layer
followed
by an optional activation layer.
"""
self
.
_min_level
=
min_level
self
.
_max_level
=
max_level
...
...
@@ -600,12 +603,8 @@ class RetinanetHead(object):
class
ShapemaskPriorHead
(
object
):
"""ShapeMask Prior head."""
def
__init__
(
self
,
num_classes
,
num_downsample_channels
,
mask_crop_size
,
use_category_for_mask
,
shape_prior_path
):
def
__init__
(
self
,
num_classes
,
num_downsample_channels
,
mask_crop_size
,
use_category_for_mask
,
shape_prior_path
):
"""Initialize params to build RetinaNet head.
Args:
...
...
@@ -632,12 +631,12 @@ class ShapemaskPriorHead(object):
Args:
fpn_features: a dictionary of FPN features.
boxes: a float tensor of shape [batch_size, num_instances, 4]
representing
the tight gt boxes from dataloader/detection.
boxes: a float tensor of shape [batch_size, num_instances, 4]
representing
the tight gt boxes from dataloader/detection.
outer_boxes: a float tensor of shape [batch_size, num_instances, 4]
representing the loose gt boxes from dataloader/detection.
classes: a int Tensor of shape [batch_size, num_instances]
of instance
classes.
classes: a int Tensor of shape [batch_size, num_instances]
of instance
classes.
is_training: training mode or not.
Returns:
...
...
@@ -658,8 +657,9 @@ class ShapemaskPriorHead(object):
shape_priors
=
self
.
_get_priors
()
# Get uniform priors for each outer box.
uniform_priors
=
tf
.
ones
([
batch_size
,
num_instances
,
self
.
_mask_crop_size
,
self
.
_mask_crop_size
])
uniform_priors
=
tf
.
ones
([
batch_size
,
num_instances
,
self
.
_mask_crop_size
,
self
.
_mask_crop_size
])
uniform_priors
=
spatial_transform_ops
.
crop_mask_in_target_box
(
uniform_priors
,
boxes
,
outer_boxes
,
self
.
_mask_crop_size
)
...
...
@@ -668,8 +668,9 @@ class ShapemaskPriorHead(object):
tf
.
cast
(
instance_features
,
tf
.
float32
),
uniform_priors
,
classes
)
instance_priors
=
tf
.
gather
(
shape_priors
,
classes
)
instance_priors
*=
tf
.
expand_dims
(
tf
.
expand_dims
(
tf
.
cast
(
prior_distribution
,
tf
.
float32
),
axis
=-
1
),
axis
=-
1
)
instance_priors
*=
tf
.
expand_dims
(
tf
.
expand_dims
(
tf
.
cast
(
prior_distribution
,
tf
.
float32
),
axis
=-
1
),
axis
=-
1
)
instance_priors
=
tf
.
reduce_sum
(
instance_priors
,
axis
=
2
)
detection_priors
=
spatial_transform_ops
.
crop_mask_in_target_box
(
instance_priors
,
boxes
,
outer_boxes
,
self
.
_mask_crop_size
)
...
...
@@ -688,8 +689,10 @@ class ShapemaskPriorHead(object):
# If prior path does not exist, do not use priors, i.e., pirors equal to
# uniform empty 32x32 patch.
self
.
_num_clusters
=
1
priors
=
tf
.
zeros
([
self
.
_mask_num_classes
,
self
.
_num_clusters
,
self
.
_mask_crop_size
,
self
.
_mask_crop_size
])
priors
=
tf
.
zeros
([
self
.
_mask_num_classes
,
self
.
_num_clusters
,
self
.
_mask_crop_size
,
self
.
_mask_crop_size
])
return
priors
def
_classify_shape_priors
(
self
,
features
,
uniform_priors
,
classes
):
...
...
@@ -699,12 +702,12 @@ class ShapemaskPriorHead(object):
category.
Args:
features: A float Tensor of shape [batch_size, num_instances,
mask_size,
mask_size,
num_channels].
features: A float Tensor of shape [batch_size, num_instances,
mask_size,
mask_size, num_channels].
uniform_priors: A float Tensor of shape [batch_size, num_instances,
mask_size, mask_size] representing the uniform detection priors.
classes: A int Tensor of shape [batch_size, num_instances]
of detection
class ids.
classes: A int Tensor of shape [batch_size, num_instances]
of detection
class ids.
Returns:
prior_distribution: A float Tensor of shape
...
...
@@ -719,10 +722,11 @@ class ShapemaskPriorHead(object):
features
=
tf
.
reduce_mean
(
features
,
axis
=
(
2
,
3
))
logits
=
tf
.
keras
.
layers
.
Dense
(
self
.
_mask_num_classes
*
self
.
_num_clusters
,
kernel_initializer
=
tf
.
random_normal_initializer
(
stddev
=
0.01
))(
features
)
logits
=
tf
.
reshape
(
logits
,
[
batch_size
,
num_instances
,
self
.
_mask_num_classes
,
self
.
_num_clusters
])
kernel_initializer
=
tf
.
random_normal_initializer
(
stddev
=
0.01
))(
features
)
logits
=
tf
.
reshape
(
logits
,
[
batch_size
,
num_instances
,
self
.
_mask_num_classes
,
self
.
_num_clusters
])
if
self
.
_use_category_for_mask
:
logits
=
tf
.
gather
(
logits
,
tf
.
expand_dims
(
classes
,
axis
=-
1
),
batch_dims
=
2
)
logits
=
tf
.
squeeze
(
logits
,
axis
=
2
)
...
...
@@ -752,8 +756,8 @@ class ShapemaskCoarsemaskHead(object):
use_category_for_mask: use class information in mask branch.
num_convs: `int` number of stacked convolution before the last prediction
layer.
norm_activation: an operation that includes a normalization layer
followed
by an optional activation layer.
norm_activation: an operation that includes a normalization layer
followed
by an optional activation layer.
"""
self
.
_mask_num_classes
=
num_classes
if
use_category_for_mask
else
1
self
.
_use_category_for_mask
=
use_category_for_mask
...
...
@@ -769,13 +773,15 @@ class ShapemaskCoarsemaskHead(object):
self
.
_class_norm_activation
=
[]
for
i
in
range
(
self
.
_num_convs
):
self
.
_class_conv
.
append
(
tf
.
keras
.
layers
.
Conv2D
(
self
.
_num_downsample_channels
,
kernel_size
=
(
3
,
3
),
bias_initializer
=
tf
.
zeros_initializer
(),
kernel_initializer
=
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
0.01
),
padding
=
'same'
,
name
=
'coarse-mask-class-%d'
%
i
))
self
.
_class_conv
.
append
(
tf
.
keras
.
layers
.
Conv2D
(
self
.
_num_downsample_channels
,
kernel_size
=
(
3
,
3
),
bias_initializer
=
tf
.
zeros_initializer
(),
kernel_initializer
=
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
0.01
),
padding
=
'same'
,
name
=
'coarse-mask-class-%d'
%
i
))
self
.
_class_norm_activation
.
append
(
norm_activation
(
name
=
'coarse-mask-class-%d-bn'
%
i
))
...
...
@@ -800,10 +806,10 @@ class ShapemaskCoarsemaskHead(object):
mask_crop_size, mask_crop_size, num_downsample_channels]. This is the
instance feature crop.
detection_priors: a float Tensor of shape [batch_size, num_instances,
mask_crop_size, mask_crop_size, 1]. This is the detection prior for
the
instance.
classes: a int Tensor of shape [batch_size, num_instances]
of instance
classes.
mask_crop_size, mask_crop_size, 1]. This is the detection prior for
the
instance.
classes: a int Tensor of shape [batch_size, num_instances]
of instance
classes.
is_training: a bool indicating whether in training mode.
Returns:
...
...
@@ -820,8 +826,8 @@ class ShapemaskCoarsemaskHead(object):
# Gather the logits with right input class.
if
self
.
_use_category_for_mask
:
mask_logits
=
tf
.
transpose
(
mask_logits
,
[
0
,
1
,
4
,
2
,
3
])
mask_logits
=
tf
.
gather
(
mask_logits
,
tf
.
expand_dims
(
classes
,
-
1
),
batch_dims
=
2
)
mask_logits
=
tf
.
gather
(
mask_logits
,
tf
.
expand_dims
(
classes
,
-
1
),
batch_dims
=
2
)
mask_logits
=
tf
.
squeeze
(
mask_logits
,
axis
=
2
)
else
:
mask_logits
=
mask_logits
[...,
0
]
...
...
@@ -841,16 +847,17 @@ class ShapemaskCoarsemaskHead(object):
"""
(
batch_size
,
num_instances
,
height
,
width
,
num_channels
)
=
features
.
get_shape
().
as_list
()
features
=
tf
.
reshape
(
features
,
[
batch_size
*
num_instances
,
height
,
width
,
num_channels
])
features
=
tf
.
reshape
(
features
,
[
batch_size
*
num_instances
,
height
,
width
,
num_channels
])
for
i
in
range
(
self
.
_num_convs
):
features
=
self
.
_class_conv
[
i
](
features
)
features
=
self
.
_class_norm_activation
[
i
](
features
,
is_training
=
is_training
)
features
=
self
.
_class_norm_activation
[
i
](
features
,
is_training
=
is_training
)
mask_logits
=
self
.
_class_predict
(
features
)
mask_logits
=
tf
.
reshape
(
mask_logits
,
[
batch_size
,
num_instances
,
height
,
width
,
self
.
_mask_num_classes
])
mask_logits
=
tf
.
reshape
(
mask_logits
,
[
batch_size
,
num_instances
,
height
,
width
,
self
.
_mask_num_classes
])
return
mask_logits
...
...
@@ -907,8 +914,8 @@ class ShapemaskFinemaskHead(object):
activation
=
None
,
padding
=
'same'
,
name
=
'fine-mask-class-%d'
%
i
))
self
.
_fine_class_bn
.
append
(
norm_activation
(
name
=
'fine-mask-class-%d-bn'
%
i
))
self
.
_fine_class_bn
.
append
(
norm_activation
(
name
=
'fine-mask-class-%d-bn'
%
i
))
self
.
_class_predict_conv
=
tf
.
keras
.
layers
.
Conv2D
(
self
.
_mask_num_classes
,
...
...
@@ -926,14 +933,13 @@ class ShapemaskFinemaskHead(object):
https://arxiv.org/pdf/1904.03239.pdf
Args:
features: a float Tensor of shape
[batch_size, num_instances, mask_crop_size, mask_crop_size,
num_downsample_channels]. This is the instance feature crop.
mask_logits: a float Tensor of shape
[batch_size, num_instances, mask_crop_size, mask_crop_size] indicating
predicted mask logits.
classes: a int Tensor of shape [batch_size, num_instances]
of instance classes.
features: a float Tensor of shape [batch_size, num_instances,
mask_crop_size, mask_crop_size, num_downsample_channels]. This is the
instance feature crop.
mask_logits: a float Tensor of shape [batch_size, num_instances,
mask_crop_size, mask_crop_size] indicating predicted mask logits.
classes: a int Tensor of shape [batch_size, num_instances] of instance
classes.
is_training: a bool indicating whether in training mode.
Returns:
...
...
@@ -960,8 +966,8 @@ class ShapemaskFinemaskHead(object):
mask_logits
=
self
.
decoder_net
(
features
,
is_training
)
if
self
.
_use_category_for_mask
:
mask_logits
=
tf
.
transpose
(
mask_logits
,
[
0
,
1
,
4
,
2
,
3
])
mask_logits
=
tf
.
gather
(
mask_logits
,
tf
.
expand_dims
(
classes
,
-
1
),
batch_dims
=
2
)
mask_logits
=
tf
.
gather
(
mask_logits
,
tf
.
expand_dims
(
classes
,
-
1
),
batch_dims
=
2
)
mask_logits
=
tf
.
squeeze
(
mask_logits
,
axis
=
2
)
else
:
mask_logits
=
mask_logits
[...,
0
]
...
...
@@ -982,8 +988,8 @@ class ShapemaskFinemaskHead(object):
"""
(
batch_size
,
num_instances
,
height
,
width
,
num_channels
)
=
features
.
get_shape
().
as_list
()
features
=
tf
.
reshape
(
features
,
[
batch_size
*
num_instances
,
height
,
width
,
num_channels
])
features
=
tf
.
reshape
(
features
,
[
batch_size
*
num_instances
,
height
,
width
,
num_channels
])
for
i
in
range
(
self
.
_num_convs
):
features
=
self
.
_fine_class_conv
[
i
](
features
)
features
=
self
.
_fine_class_bn
[
i
](
features
,
is_training
=
is_training
)
...
...
@@ -994,9 +1000,8 @@ class ShapemaskFinemaskHead(object):
# Predict per-class instance masks.
mask_logits
=
self
.
_class_predict_conv
(
features
)
mask_logits
=
tf
.
reshape
(
mask_logits
,
[
batch_size
,
num_instances
,
height
*
self
.
up_sample_factor
,
width
*
self
.
up_sample_factor
,
self
.
_mask_num_classes
])
mask_logits
=
tf
.
reshape
(
mask_logits
,
[
batch_size
,
num_instances
,
height
*
self
.
up_sample_factor
,
width
*
self
.
up_sample_factor
,
self
.
_mask_num_classes
])
return
mask_logits
official/vision/detection/modeling/architecture/nn_ops.py
View file @
88253ce5
...
...
@@ -19,6 +19,7 @@ from __future__ import division
from
__future__
import
print_function
import
functools
import
tensorflow
as
tf
...
...
@@ -43,7 +44,7 @@ class NormActivation(tf.keras.layers.Layer):
GraphKeys.TRAINABLE_VARIABLES. If False, freeze batch normalization
layer.
init_zero: `bool` if True, initializes scale parameter of batch
normalization with 0. If False, initialize it with 1.
normalization with 0. If False, initialize it with 1.
fused: `bool` fused option in batch normalziation.
use_actiation: `bool`, whether to add the optional activation layer after
the batch normalization layer.
...
...
official/vision/detection/modeling/architecture/resnet.py
View file @
88253ce5
...
...
@@ -28,22 +28,23 @@ import tensorflow as tf
from
official.vision.detection.modeling.architecture
import
keras_utils
from
official.vision.detection.modeling.architecture
import
nn_ops
# TODO(b/140112644): Refactor the code with Keras style, i.e. build and call.
class
Resnet
(
object
):
"""Class to build ResNet family model."""
def
__init__
(
self
,
resnet_depth
,
activation
=
'relu'
,
norm_activation
=
nn_ops
.
norm_activation_builder
(
activation
=
'relu'
),
data_format
=
'channels_last'
):
def
__init__
(
self
,
resnet_depth
,
activation
=
'relu'
,
norm_activation
=
nn_ops
.
norm_activation_builder
(
activation
=
'relu'
),
data_format
=
'channels_last'
):
"""ResNet initialization function.
Args:
resnet_depth: `int` depth of ResNet backbone model.
norm_activation: an operation that includes a normalization layer
followed
by an optional activation layer.
norm_activation: an operation that includes a normalization layer
followed
by an optional activation layer.
data_format: `str` either "channels_first" for `[batch, channels, height,
width]` or "channels_last for `[batch, height, width, channels]`.
"""
...
...
@@ -58,24 +59,45 @@ class Resnet(object):
self
.
_data_format
=
data_format
model_params
=
{
10
:
{
'block'
:
self
.
residual_block
,
'layers'
:
[
1
,
1
,
1
,
1
]},
18
:
{
'block'
:
self
.
residual_block
,
'layers'
:
[
2
,
2
,
2
,
2
]},
34
:
{
'block'
:
self
.
residual_block
,
'layers'
:
[
3
,
4
,
6
,
3
]},
50
:
{
'block'
:
self
.
bottleneck_block
,
'layers'
:
[
3
,
4
,
6
,
3
]},
101
:
{
'block'
:
self
.
bottleneck_block
,
'layers'
:
[
3
,
4
,
23
,
3
]},
152
:
{
'block'
:
self
.
bottleneck_block
,
'layers'
:
[
3
,
8
,
36
,
3
]},
200
:
{
'block'
:
self
.
bottleneck_block
,
'layers'
:
[
3
,
24
,
36
,
3
]}
10
:
{
'block'
:
self
.
residual_block
,
'layers'
:
[
1
,
1
,
1
,
1
]
},
18
:
{
'block'
:
self
.
residual_block
,
'layers'
:
[
2
,
2
,
2
,
2
]
},
34
:
{
'block'
:
self
.
residual_block
,
'layers'
:
[
3
,
4
,
6
,
3
]
},
50
:
{
'block'
:
self
.
bottleneck_block
,
'layers'
:
[
3
,
4
,
6
,
3
]
},
101
:
{
'block'
:
self
.
bottleneck_block
,
'layers'
:
[
3
,
4
,
23
,
3
]
},
152
:
{
'block'
:
self
.
bottleneck_block
,
'layers'
:
[
3
,
8
,
36
,
3
]
},
200
:
{
'block'
:
self
.
bottleneck_block
,
'layers'
:
[
3
,
24
,
36
,
3
]
}
}
if
resnet_depth
not
in
model_params
:
valid_resnet_depths
=
', '
.
join
(
[
str
(
depth
)
for
depth
in
sorted
(
model_params
.
keys
())])
raise
ValueError
(
'The resnet_depth should be in [%s]. Not a valid resnet_depth:'
%
(
valid_resnet_depths
),
self
.
_resnet_depth
)
'The resnet_depth should be in [%s]. Not a valid resnet_depth:'
%
(
valid_resnet_depths
),
self
.
_resnet_depth
)
params
=
model_params
[
resnet_depth
]
self
.
_resnet_fn
=
self
.
resnet_v1_generator
(
params
[
'block'
],
params
[
'layers'
])
self
.
_resnet_fn
=
self
.
resnet_v1_generator
(
params
[
'block'
],
params
[
'layers'
])
def
__call__
(
self
,
inputs
,
is_training
=
None
):
"""Returns the ResNet model for a given size and number of output classes.
...
...
@@ -98,10 +120,10 @@ class Resnet(object):
"""Pads the input along the spatial dimensions independently of input size.
Args:
inputs: `Tensor` of size `[batch, channels, height, width]` or
`[batch,
height, width, channels]` depending on `data_format`.
inputs: `Tensor` of size `[batch, channels, height, width]` or
`[batch,
height, width, channels]` depending on `data_format`.
kernel_size: `int` kernel size to be used for `conv2d` or max_pool2d`
operations. Should be a positive integer.
operations. Should be a positive integer.
Returns:
A padded `Tensor` of the same `data_format` with size either intact
...
...
@@ -160,14 +182,15 @@ class Resnet(object):
Args:
inputs: `Tensor` of size `[batch, channels, height, width]`.
filters: `int` number of filters for the first two convolutions. Note that
the third and final convolution will use 4 times as many filters.
the third and final convolution will use 4 times as many filters.
strides: `int` block stride. If greater than 1, this block will ultimately
downsample the input.
downsample the input.
use_projection: `bool` for whether this block should use a projection
shortcut (versus the default identity shortcut). This is usually
`True`
for the first block of a block group, which may change the
number of
filters and the resolution.
shortcut (versus the default identity shortcut). This is usually
`True`
for the first block of a block group, which may change the
number of
filters and the resolution.
is_training: `bool` if True, the model is in training mode.
Returns:
The output `Tensor` of the block.
"""
...
...
@@ -185,8 +208,9 @@ class Resnet(object):
inputs
=
self
.
conv2d_fixed_padding
(
inputs
=
inputs
,
filters
=
filters
,
kernel_size
=
3
,
strides
=
1
)
inputs
=
self
.
_norm_activation
(
use_activation
=
False
,
init_zero
=
True
)(
inputs
,
is_training
=
is_training
)
inputs
=
self
.
_norm_activation
(
use_activation
=
False
,
init_zero
=
True
)(
inputs
,
is_training
=
is_training
)
return
self
.
_activation_op
(
inputs
+
shortcut
)
...
...
@@ -201,13 +225,13 @@ class Resnet(object):
Args:
inputs: `Tensor` of size `[batch, channels, height, width]`.
filters: `int` number of filters for the first two convolutions. Note that
the third and final convolution will use 4 times as many filters.
the third and final convolution will use 4 times as many filters.
strides: `int` block stride. If greater than 1, this block will ultimately
downsample the input.
downsample the input.
use_projection: `bool` for whether this block should use a projection
shortcut (versus the default identity shortcut). This is usually
`True`
for the first block of a block group, which may change the
number of
filters and the resolution.
shortcut (versus the default identity shortcut). This is usually
`True`
for the first block of a block group, which may change the
number of
filters and the resolution.
is_training: `bool` if True, the model is in training mode.
Returns:
...
...
@@ -233,8 +257,9 @@ class Resnet(object):
inputs
=
self
.
conv2d_fixed_padding
(
inputs
=
inputs
,
filters
=
4
*
filters
,
kernel_size
=
1
,
strides
=
1
)
inputs
=
self
.
_norm_activation
(
use_activation
=
False
,
init_zero
=
True
)(
inputs
,
is_training
=
is_training
)
inputs
=
self
.
_norm_activation
(
use_activation
=
False
,
init_zero
=
True
)(
inputs
,
is_training
=
is_training
)
return
self
.
_activation_op
(
inputs
+
shortcut
)
...
...
@@ -248,7 +273,7 @@ class Resnet(object):
block_fn: `function` for the block to use within the model
blocks: `int` number of blocks contained in the layer.
strides: `int` stride to use for the first convolution of the layer. If
greater than 1, this layer will downsample the input.
greater than 1, this layer will downsample the input.
name: `str`name for the Tensor output of the block layer.
is_training: `bool` if True, the model is in training mode.
...
...
@@ -256,8 +281,8 @@ class Resnet(object):
The output `Tensor` of the block layer.
"""
# Only the first block per block_group uses projection shortcut and strides.
inputs
=
block_fn
(
inputs
,
filters
,
strides
,
use_projection
=
True
,
is_training
=
is_training
)
inputs
=
block_fn
(
inputs
,
filters
,
strides
,
use_projection
=
True
,
is_training
=
is_training
)
for
_
in
range
(
1
,
blocks
):
inputs
=
block_fn
(
inputs
,
filters
,
1
,
is_training
=
is_training
)
...
...
@@ -269,7 +294,7 @@ class Resnet(object):
Args:
block_fn: `function` for the block to use within the model. Either
`residual_block` or `bottleneck_block`.
`residual_block` or `bottleneck_block`.
layers: list of 4 `int`s denoting the number of blocks to include in each
of the 4 block groups. Each group consists of blocks that take inputs of
the same resolution.
...
...
@@ -293,17 +318,37 @@ class Resnet(object):
inputs
=
tf
.
identity
(
inputs
,
'initial_max_pool'
)
c2
=
self
.
block_group
(
inputs
=
inputs
,
filters
=
64
,
block_fn
=
block_fn
,
blocks
=
layers
[
0
],
strides
=
1
,
name
=
'block_group1'
,
is_training
=
is_training
)
inputs
=
inputs
,
filters
=
64
,
block_fn
=
block_fn
,
blocks
=
layers
[
0
],
strides
=
1
,
name
=
'block_group1'
,
is_training
=
is_training
)
c3
=
self
.
block_group
(
inputs
=
c2
,
filters
=
128
,
block_fn
=
block_fn
,
blocks
=
layers
[
1
],
strides
=
2
,
name
=
'block_group2'
,
is_training
=
is_training
)
inputs
=
c2
,
filters
=
128
,
block_fn
=
block_fn
,
blocks
=
layers
[
1
],
strides
=
2
,
name
=
'block_group2'
,
is_training
=
is_training
)
c4
=
self
.
block_group
(
inputs
=
c3
,
filters
=
256
,
block_fn
=
block_fn
,
blocks
=
layers
[
2
],
strides
=
2
,
name
=
'block_group3'
,
is_training
=
is_training
)
inputs
=
c3
,
filters
=
256
,
block_fn
=
block_fn
,
blocks
=
layers
[
2
],
strides
=
2
,
name
=
'block_group3'
,
is_training
=
is_training
)
c5
=
self
.
block_group
(
inputs
=
c4
,
filters
=
512
,
block_fn
=
block_fn
,
blocks
=
layers
[
3
],
strides
=
2
,
name
=
'block_group4'
,
is_training
=
is_training
)
inputs
=
c4
,
filters
=
512
,
block_fn
=
block_fn
,
blocks
=
layers
[
3
],
strides
=
2
,
name
=
'block_group4'
,
is_training
=
is_training
)
return
{
2
:
c2
,
3
:
c3
,
4
:
c4
,
5
:
c5
}
return
model
official/vision/detection/modeling/base_model.py
View file @
88253ce5
...
...
@@ -21,6 +21,7 @@ from __future__ import print_function
import
abc
import
functools
import
re
import
tensorflow
as
tf
from
official.vision.detection.modeling
import
checkpoint_utils
from
official.vision.detection.modeling
import
learning_rates
...
...
@@ -42,8 +43,7 @@ def _make_filter_trainable_variables_fn(frozen_variable_prefix):
# frozen_variable_prefix: a regex string specifing the prefix pattern of
# the frozen variables' names.
filtered_variables
=
[
v
for
v
in
variables
if
not
frozen_variable_prefix
or
v
for
v
in
variables
if
not
frozen_variable_prefix
or
not
re
.
match
(
frozen_variable_prefix
,
v
.
name
)
]
return
filtered_variables
...
...
@@ -115,8 +115,8 @@ class Model(object):
def
weight_decay_loss
(
self
,
trainable_variables
):
reg_variables
=
[
v
for
v
in
trainable_variables
if
self
.
_regularization_var_regex
is
None
or
re
.
match
(
self
.
_regularization_var_regex
,
v
.
name
)
if
self
.
_regularization_var_regex
is
None
or
re
.
match
(
self
.
_regularization_var_regex
,
v
.
name
)
]
return
self
.
_l2_weight_decay
*
tf
.
add_n
(
...
...
official/vision/detection/modeling/checkpoint_utils.py
View file @
88253ce5
...
...
@@ -12,7 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Util functions for loading checkpoints. Especially for loading Tensorflow 1.x
"""Util functions for loading checkpoints.
Especially for loading Tensorflow 1.x
checkpoint to Tensorflow 2.x (keras) model.
"""
...
...
@@ -20,18 +22,19 @@ from __future__ import absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
re
from
absl
import
logging
import
tensorflow
as
tf
def
_build_assignment_map
(
keras_model
,
prefix
=
''
,
skip_variables_regex
=
None
,
var_to_shape_map
=
None
):
prefix
=
''
,
skip_variables_regex
=
None
,
var_to_shape_map
=
None
):
"""Compute an assignment mapping for loading older checkpoints into a Keras
model. Variable names are remapped from the original TPUEstimator model to
the new Keras name.
...
...
@@ -48,12 +51,12 @@ def _build_assignment_map(keras_model,
"""
assignment_map
=
{}
checkpoint_names
=
None
if
var_to_shape_map
:
checkpoint_names
=
list
(
filter
(
lambda
x
:
not
x
.
endswith
(
'Momentum'
)
and
not
x
.
endswith
(
'global_step'
),
var_to_shape_map
.
keys
()))
checkpoint_names
=
list
(
filter
(
lambda
x
:
not
x
.
endswith
(
'Momentum'
)
and
not
x
.
endswith
(
'global_step'
),
var_to_shape_map
.
keys
()))
for
var
in
keras_model
.
variables
:
var_name
=
var
.
name
...
...
@@ -95,14 +98,15 @@ def _get_checkpoint_map(checkpoint_path):
def
make_restore_checkpoint_fn
(
checkpoint_path
,
prefix
=
''
,
skip_regex
=
None
):
"""Returns scaffold function to restore parameters from v1 checkpoint.
Args:
checkpoint_path: path of the checkpoint folder or file.
Example 1: '/path/to/model_dir/'
Example 2: '/path/to/model.ckpt-22500'
prefix: prefix in the variable name to be remove for alignment with names in
the checkpoint.
skip_regex: regular expression to math the names of variables that
do not
need to be assign.
skip_regex: regular expression to math the names of variables that
do not
need to be assign.
Returns:
Callable[tf.kears.Model] -> void. Fn to load v1 checkpoint to keras model.
...
...
@@ -125,7 +129,6 @@ def make_restore_checkpoint_fn(checkpoint_path, prefix='', skip_regex=None):
var_to_shape_map
=
var_to_shape_map
)
if
not
vars_to_load
:
raise
ValueError
(
'Variables to load is empty.'
)
tf
.
compat
.
v1
.
train
.
init_from_checkpoint
(
checkpoint_path
,
vars_to_load
)
tf
.
compat
.
v1
.
train
.
init_from_checkpoint
(
checkpoint_path
,
vars_to_load
)
return
_restore_checkpoint_fn
official/vision/detection/modeling/learning_rates.py
View file @
88253ce5
...
...
@@ -25,7 +25,8 @@ import tensorflow as tf
from
official.modeling.hyperparams
import
params_dict
class
StepLearningRateWithLinearWarmup
(
tf
.
keras
.
optimizers
.
schedules
.
LearningRateSchedule
):
class
StepLearningRateWithLinearWarmup
(
tf
.
keras
.
optimizers
.
schedules
.
LearningRateSchedule
):
"""Class to generate learning rate tensor."""
def
__init__
(
self
,
total_steps
,
params
):
...
...
@@ -57,7 +58,8 @@ class StepLearningRateWithLinearWarmup(tf.keras.optimizers.schedules.LearningRat
return
{
'_params'
:
self
.
_params
.
as_dict
()}
class
CosineLearningRateWithLinearWarmup
(
tf
.
keras
.
optimizers
.
schedules
.
LearningRateSchedule
):
class
CosineLearningRateWithLinearWarmup
(
tf
.
keras
.
optimizers
.
schedules
.
LearningRateSchedule
):
"""Class to generate learning rate tensor."""
def
__init__
(
self
,
total_steps
,
params
):
...
...
official/vision/detection/modeling/maskrcnn_model.py
View file @
88253ce5
...
...
@@ -118,9 +118,7 @@ class MaskrcnnModel(base_model.Model):
box_targets
=
tf
.
where
(
tf
.
tile
(
tf
.
expand_dims
(
tf
.
equal
(
matched_gt_classes
,
0
),
axis
=-
1
),
[
1
,
1
,
4
]),
tf
.
zeros_like
(
box_targets
),
box_targets
)
[
1
,
1
,
4
]),
tf
.
zeros_like
(
box_targets
),
box_targets
)
model_outputs
.
update
({
'class_targets'
:
matched_gt_classes
,
'box_targets'
:
box_targets
,
...
...
@@ -183,9 +181,7 @@ class MaskrcnnModel(base_model.Model):
mask_outputs
),
})
else
:
model_outputs
.
update
({
'detection_masks'
:
tf
.
nn
.
sigmoid
(
mask_outputs
)
})
model_outputs
.
update
({
'detection_masks'
:
tf
.
nn
.
sigmoid
(
mask_outputs
)})
return
model_outputs
...
...
@@ -312,8 +308,8 @@ class MaskrcnnModel(base_model.Model):
required_output_fields
=
[
'class_outputs'
,
'box_outputs'
]
for
field
in
required_output_fields
:
if
field
not
in
outputs
:
raise
ValueError
(
'"%s" is missing in outputs, requried %s found %s'
%
(
field
,
required_output_fields
,
outputs
.
keys
()))
raise
ValueError
(
'"%s" is missing in outputs, requried %s found %s'
%
(
field
,
required_output_fields
,
outputs
.
keys
()))
predictions
=
{
'image_info'
:
labels
[
'image_info'
],
'num_detections'
:
outputs
[
'num_detections'
],
...
...
Prev
1
…
4
5
6
7
8
9
10
11
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment