Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
88253ce5
Commit
88253ce5
authored
Aug 12, 2020
by
Hongkun Yu
Committed by
A. Unique TensorFlower
Aug 12, 2020
Browse files
Internal change
PiperOrigin-RevId: 326286926
parent
52371ffe
Changes
205
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
433 additions
and
357 deletions
+433
-357
official/utils/misc/distribution_utils.py
official/utils/misc/distribution_utils.py
+17
-13
official/utils/misc/distribution_utils_test.py
official/utils/misc/distribution_utils_test.py
+2
-1
official/utils/misc/keras_utils.py
official/utils/misc/keras_utils.py
+7
-11
official/utils/misc/model_helpers.py
official/utils/misc/model_helpers.py
+6
-3
official/utils/misc/model_helpers_test.py
official/utils/misc/model_helpers_test.py
+19
-13
official/utils/registry_test.py
official/utils/registry_test.py
+10
-2
official/utils/testing/integration.py
official/utils/testing/integration.py
+6
-3
official/vision/detection/dataloader/anchor.py
official/vision/detection/dataloader/anchor.py
+40
-40
official/vision/detection/dataloader/input_reader.py
official/vision/detection/dataloader/input_reader.py
+2
-1
official/vision/detection/dataloader/retinanet_parser.py
official/vision/detection/dataloader/retinanet_parser.py
+57
-54
official/vision/detection/dataloader/shapemask_parser.py
official/vision/detection/dataloader/shapemask_parser.py
+0
-1
official/vision/detection/evaluation/coco_evaluator.py
official/vision/detection/evaluation/coco_evaluator.py
+15
-16
official/vision/detection/main.py
official/vision/detection/main.py
+1
-0
official/vision/detection/modeling/architecture/heads.py
official/vision/detection/modeling/architecture/heads.py
+128
-123
official/vision/detection/modeling/architecture/nn_ops.py
official/vision/detection/modeling/architecture/nn_ops.py
+2
-1
official/vision/detection/modeling/architecture/resnet.py
official/vision/detection/modeling/architecture/resnet.py
+93
-48
official/vision/detection/modeling/base_model.py
official/vision/detection/modeling/base_model.py
+4
-4
official/vision/detection/modeling/checkpoint_utils.py
official/vision/detection/modeling/checkpoint_utils.py
+16
-13
official/vision/detection/modeling/learning_rates.py
official/vision/detection/modeling/learning_rates.py
+4
-2
official/vision/detection/modeling/maskrcnn_model.py
official/vision/detection/modeling/maskrcnn_model.py
+4
-8
No files found.
official/utils/misc/distribution_utils.py
View file @
88253ce5
...
@@ -93,9 +93,9 @@ def get_distribution_strategy(distribution_strategy="mirrored",
...
@@ -93,9 +93,9 @@ def get_distribution_strategy(distribution_strategy="mirrored",
Args:
Args:
distribution_strategy: a string specifying which distribution strategy to
distribution_strategy: a string specifying which distribution strategy to
use. Accepted values are "off", "one_device", "mirrored",
use. Accepted values are "off", "one_device", "mirrored",
"parameter_server", "multi_worker_mirrored", and "tpu" -- case
insensitive.
"parameter_server", "multi_worker_mirrored", and "tpu" -- case
"off" means not to use Distribution Strategy; "tpu" means to
use
insensitive.
"off" means not to use Distribution Strategy; "tpu" means to
TPUStrategy using `tpu_address`.
use
TPUStrategy using `tpu_address`.
num_gpus: Number of GPUs to run this model.
num_gpus: Number of GPUs to run this model.
all_reduce_alg: Optional. Specifies which algorithm to use when performing
all_reduce_alg: Optional. Specifies which algorithm to use when performing
all-reduce. For `MirroredStrategy`, valid values are "nccl" and
all-reduce. For `MirroredStrategy`, valid values are "nccl" and
...
@@ -104,8 +104,9 @@ def get_distribution_strategy(distribution_strategy="mirrored",
...
@@ -104,8 +104,9 @@ def get_distribution_strategy(distribution_strategy="mirrored",
device topology.
device topology.
num_packs: Optional. Sets the `num_packs` in `tf.distribute.NcclAllReduce`
num_packs: Optional. Sets the `num_packs` in `tf.distribute.NcclAllReduce`
or `tf.distribute.HierarchicalCopyAllReduce` for `MirroredStrategy`.
or `tf.distribute.HierarchicalCopyAllReduce` for `MirroredStrategy`.
tpu_address: Optional. String that represents TPU to connect to. Must not
tpu_address: Optional. String that represents TPU to connect to. Must not be
be None if `distribution_strategy` is set to `tpu`.
None if `distribution_strategy` is set to `tpu`.
Returns:
Returns:
tf.distribute.DistibutionStrategy object.
tf.distribute.DistibutionStrategy object.
Raises:
Raises:
...
@@ -119,9 +120,8 @@ def get_distribution_strategy(distribution_strategy="mirrored",
...
@@ -119,9 +120,8 @@ def get_distribution_strategy(distribution_strategy="mirrored",
distribution_strategy
=
distribution_strategy
.
lower
()
distribution_strategy
=
distribution_strategy
.
lower
()
if
distribution_strategy
==
"off"
:
if
distribution_strategy
==
"off"
:
if
num_gpus
>
1
:
if
num_gpus
>
1
:
raise
ValueError
(
raise
ValueError
(
"When {} GPUs are specified, distribution_strategy "
"When {} GPUs are specified, distribution_strategy "
"flag cannot be set to `off`."
.
format
(
num_gpus
))
"flag cannot be set to `off`."
.
format
(
num_gpus
))
return
None
return
None
if
distribution_strategy
==
"tpu"
:
if
distribution_strategy
==
"tpu"
:
...
@@ -153,8 +153,8 @@ def get_distribution_strategy(distribution_strategy="mirrored",
...
@@ -153,8 +153,8 @@ def get_distribution_strategy(distribution_strategy="mirrored",
if
distribution_strategy
==
"parameter_server"
:
if
distribution_strategy
==
"parameter_server"
:
return
tf
.
distribute
.
experimental
.
ParameterServerStrategy
()
return
tf
.
distribute
.
experimental
.
ParameterServerStrategy
()
raise
ValueError
(
raise
ValueError
(
"Unrecognized Distribution Strategy: %r"
%
"Unrecognized Distribution Strategy: %r"
%
distribution_strategy
)
distribution_strategy
)
def
configure_cluster
(
worker_hosts
=
None
,
task_index
=-
1
):
def
configure_cluster
(
worker_hosts
=
None
,
task_index
=-
1
):
...
@@ -168,8 +168,9 @@ def configure_cluster(worker_hosts=None, task_index=-1):
...
@@ -168,8 +168,9 @@ def configure_cluster(worker_hosts=None, task_index=-1):
"""
"""
tf_config
=
json
.
loads
(
os
.
environ
.
get
(
"TF_CONFIG"
,
"{}"
))
tf_config
=
json
.
loads
(
os
.
environ
.
get
(
"TF_CONFIG"
,
"{}"
))
if
tf_config
:
if
tf_config
:
num_workers
=
(
len
(
tf_config
[
"cluster"
].
get
(
"chief"
,
[]))
+
num_workers
=
(
len
(
tf_config
[
"cluster"
].
get
(
"worker"
,
[])))
len
(
tf_config
[
"cluster"
].
get
(
"chief"
,
[]))
+
len
(
tf_config
[
"cluster"
].
get
(
"worker"
,
[])))
elif
worker_hosts
:
elif
worker_hosts
:
workers
=
worker_hosts
.
split
(
","
)
workers
=
worker_hosts
.
split
(
","
)
num_workers
=
len
(
workers
)
num_workers
=
len
(
workers
)
...
@@ -180,7 +181,10 @@ def configure_cluster(worker_hosts=None, task_index=-1):
...
@@ -180,7 +181,10 @@ def configure_cluster(worker_hosts=None, task_index=-1):
"cluster"
:
{
"cluster"
:
{
"worker"
:
workers
"worker"
:
workers
},
},
"task"
:
{
"type"
:
"worker"
,
"index"
:
task_index
}
"task"
:
{
"type"
:
"worker"
,
"index"
:
task_index
}
})
})
else
:
else
:
num_workers
=
1
num_workers
=
1
...
...
official/utils/misc/distribution_utils_test.py
View file @
88253ce5
...
@@ -25,6 +25,7 @@ from official.utils.misc import distribution_utils
...
@@ -25,6 +25,7 @@ from official.utils.misc import distribution_utils
class
GetDistributionStrategyTest
(
tf
.
test
.
TestCase
):
class
GetDistributionStrategyTest
(
tf
.
test
.
TestCase
):
"""Tests for get_distribution_strategy."""
"""Tests for get_distribution_strategy."""
def
test_one_device_strategy_cpu
(
self
):
def
test_one_device_strategy_cpu
(
self
):
ds
=
distribution_utils
.
get_distribution_strategy
(
num_gpus
=
0
)
ds
=
distribution_utils
.
get_distribution_strategy
(
num_gpus
=
0
)
self
.
assertEquals
(
ds
.
num_replicas_in_sync
,
1
)
self
.
assertEquals
(
ds
.
num_replicas_in_sync
,
1
)
...
@@ -45,5 +46,5 @@ class GetDistributionStrategyTest(tf.test.TestCase):
...
@@ -45,5 +46,5 @@ class GetDistributionStrategyTest(tf.test.TestCase):
self
.
assertIn
(
'GPU'
,
device
)
self
.
assertIn
(
'GPU'
,
device
)
if
__name__
==
"
__main__
"
:
if
__name__
==
'
__main__
'
:
tf
.
test
.
main
()
tf
.
test
.
main
()
official/utils/misc/keras_utils.py
View file @
88253ce5
...
@@ -25,7 +25,6 @@ import time
...
@@ -25,7 +25,6 @@ import time
from
absl
import
logging
from
absl
import
logging
import
tensorflow
as
tf
import
tensorflow
as
tf
from
tensorflow.python.eager
import
monitoring
from
tensorflow.python.eager
import
monitoring
global_batch_size_gauge
=
monitoring
.
IntGauge
(
global_batch_size_gauge
=
monitoring
.
IntGauge
(
...
@@ -121,8 +120,8 @@ class TimeHistory(tf.keras.callbacks.Callback):
...
@@ -121,8 +120,8 @@ class TimeHistory(tf.keras.callbacks.Callback):
# Record the timestamp of the first global step
# Record the timestamp of the first global step
if
not
self
.
timestamp_log
:
if
not
self
.
timestamp_log
:
self
.
timestamp_log
.
append
(
BatchTimestamp
(
self
.
global_steps
,
self
.
timestamp_log
.
append
(
self
.
start_time
))
BatchTimestamp
(
self
.
global_steps
,
self
.
start_time
))
def
on_batch_end
(
self
,
batch
,
logs
=
None
):
def
on_batch_end
(
self
,
batch
,
logs
=
None
):
"""Records elapse time of the batch and calculates examples per second."""
"""Records elapse time of the batch and calculates examples per second."""
...
@@ -175,12 +174,12 @@ def set_session_config(enable_xla=False):
...
@@ -175,12 +174,12 @@ def set_session_config(enable_xla=False):
if
enable_xla
:
if
enable_xla
:
tf
.
config
.
optimizer
.
set_jit
(
True
)
tf
.
config
.
optimizer
.
set_jit
(
True
)
# TODO(hongkuny): remove set_config_v2 globally.
# TODO(hongkuny): remove set_config_v2 globally.
set_config_v2
=
set_session_config
set_config_v2
=
set_session_config
def
set_gpu_thread_mode_and_count
(
gpu_thread_mode
,
def
set_gpu_thread_mode_and_count
(
gpu_thread_mode
,
datasets_num_private_threads
,
datasets_num_private_threads
,
num_gpus
,
per_gpu_thread_count
):
num_gpus
,
per_gpu_thread_count
):
"""Set GPU thread mode and count, and adjust dataset threads count."""
"""Set GPU thread mode and count, and adjust dataset threads count."""
cpu_count
=
multiprocessing
.
cpu_count
()
cpu_count
=
multiprocessing
.
cpu_count
()
...
@@ -190,10 +189,8 @@ def set_gpu_thread_mode_and_count(gpu_thread_mode,
...
@@ -190,10 +189,8 @@ def set_gpu_thread_mode_and_count(gpu_thread_mode,
per_gpu_thread_count
=
per_gpu_thread_count
or
2
per_gpu_thread_count
=
per_gpu_thread_count
or
2
os
.
environ
[
'TF_GPU_THREAD_MODE'
]
=
gpu_thread_mode
os
.
environ
[
'TF_GPU_THREAD_MODE'
]
=
gpu_thread_mode
os
.
environ
[
'TF_GPU_THREAD_COUNT'
]
=
str
(
per_gpu_thread_count
)
os
.
environ
[
'TF_GPU_THREAD_COUNT'
]
=
str
(
per_gpu_thread_count
)
logging
.
info
(
'TF_GPU_THREAD_COUNT: %s'
,
logging
.
info
(
'TF_GPU_THREAD_COUNT: %s'
,
os
.
environ
[
'TF_GPU_THREAD_COUNT'
])
os
.
environ
[
'TF_GPU_THREAD_COUNT'
])
logging
.
info
(
'TF_GPU_THREAD_MODE: %s'
,
os
.
environ
[
'TF_GPU_THREAD_MODE'
])
logging
.
info
(
'TF_GPU_THREAD_MODE: %s'
,
os
.
environ
[
'TF_GPU_THREAD_MODE'
])
# Limit data preprocessing threadpool to CPU cores minus number of total GPU
# Limit data preprocessing threadpool to CPU cores minus number of total GPU
# private threads and memory copy threads.
# private threads and memory copy threads.
...
@@ -201,7 +198,6 @@ def set_gpu_thread_mode_and_count(gpu_thread_mode,
...
@@ -201,7 +198,6 @@ def set_gpu_thread_mode_and_count(gpu_thread_mode,
num_runtime_threads
=
num_gpus
num_runtime_threads
=
num_gpus
if
not
datasets_num_private_threads
:
if
not
datasets_num_private_threads
:
datasets_num_private_threads
=
min
(
datasets_num_private_threads
=
min
(
cpu_count
-
total_gpu_thread_count
-
num_runtime_threads
,
cpu_count
-
total_gpu_thread_count
-
num_runtime_threads
,
num_gpus
*
8
)
num_gpus
*
8
)
logging
.
info
(
'Set datasets_num_private_threads to %s'
,
logging
.
info
(
'Set datasets_num_private_threads to %s'
,
datasets_num_private_threads
)
datasets_num_private_threads
)
official/utils/misc/model_helpers.py
View file @
88253ce5
...
@@ -58,9 +58,12 @@ def past_stop_threshold(stop_threshold, eval_metric):
...
@@ -58,9 +58,12 @@ def past_stop_threshold(stop_threshold, eval_metric):
return
False
return
False
def
generate_synthetic_data
(
def
generate_synthetic_data
(
input_shape
,
input_shape
,
input_value
=
0
,
input_dtype
=
None
,
label_shape
=
None
,
input_value
=
0
,
label_value
=
0
,
label_dtype
=
None
):
input_dtype
=
None
,
label_shape
=
None
,
label_value
=
0
,
label_dtype
=
None
):
"""Create a repeating dataset with constant values.
"""Create a repeating dataset with constant values.
Args:
Args:
...
...
official/utils/misc/model_helpers_test.py
View file @
88253ce5
...
@@ -51,19 +51,19 @@ class PastStopThresholdTest(tf.test.TestCase):
...
@@ -51,19 +51,19 @@ class PastStopThresholdTest(tf.test.TestCase):
def
test_past_stop_threshold_not_number
(
self
):
def
test_past_stop_threshold_not_number
(
self
):
"""Tests for error conditions."""
"""Tests for error conditions."""
with
self
.
assertRaises
(
ValueError
):
with
self
.
assertRaises
(
ValueError
):
model_helpers
.
past_stop_threshold
(
"
str
"
,
1
)
model_helpers
.
past_stop_threshold
(
'
str
'
,
1
)
with
self
.
assertRaises
(
ValueError
):
with
self
.
assertRaises
(
ValueError
):
model_helpers
.
past_stop_threshold
(
"
str
"
,
tf
.
constant
(
5
))
model_helpers
.
past_stop_threshold
(
'
str
'
,
tf
.
constant
(
5
))
with
self
.
assertRaises
(
ValueError
):
with
self
.
assertRaises
(
ValueError
):
model_helpers
.
past_stop_threshold
(
"
str
"
,
"
another
"
)
model_helpers
.
past_stop_threshold
(
'
str
'
,
'
another
'
)
with
self
.
assertRaises
(
ValueError
):
with
self
.
assertRaises
(
ValueError
):
model_helpers
.
past_stop_threshold
(
0
,
None
)
model_helpers
.
past_stop_threshold
(
0
,
None
)
with
self
.
assertRaises
(
ValueError
):
with
self
.
assertRaises
(
ValueError
):
model_helpers
.
past_stop_threshold
(
0.7
,
"
str
"
)
model_helpers
.
past_stop_threshold
(
0.7
,
'
str
'
)
with
self
.
assertRaises
(
ValueError
):
with
self
.
assertRaises
(
ValueError
):
model_helpers
.
past_stop_threshold
(
tf
.
constant
(
4
),
None
)
model_helpers
.
past_stop_threshold
(
tf
.
constant
(
4
),
None
)
...
@@ -74,12 +74,13 @@ class SyntheticDataTest(tf.test.TestCase):
...
@@ -74,12 +74,13 @@ class SyntheticDataTest(tf.test.TestCase):
def
test_generate_synethetic_data
(
self
):
def
test_generate_synethetic_data
(
self
):
input_element
,
label_element
=
tf
.
compat
.
v1
.
data
.
make_one_shot_iterator
(
input_element
,
label_element
=
tf
.
compat
.
v1
.
data
.
make_one_shot_iterator
(
model_helpers
.
generate_synthetic_data
(
input_shape
=
tf
.
TensorShape
([
5
]),
model_helpers
.
generate_synthetic_data
(
input_value
=
123
,
input_shape
=
tf
.
TensorShape
([
5
]),
input_dtype
=
tf
.
float32
,
input_value
=
123
,
label_shape
=
tf
.
TensorShape
([]),
input_dtype
=
tf
.
float32
,
label_value
=
456
,
label_shape
=
tf
.
TensorShape
([]),
label_dtype
=
tf
.
int32
)).
get_next
()
label_value
=
456
,
label_dtype
=
tf
.
int32
)).
get_next
()
with
self
.
session
()
as
sess
:
with
self
.
session
()
as
sess
:
for
n
in
range
(
5
):
for
n
in
range
(
5
):
...
@@ -102,8 +103,13 @@ class SyntheticDataTest(tf.test.TestCase):
...
@@ -102,8 +103,13 @@ class SyntheticDataTest(tf.test.TestCase):
def
test_generate_nested_data
(
self
):
def
test_generate_nested_data
(
self
):
d
=
model_helpers
.
generate_synthetic_data
(
d
=
model_helpers
.
generate_synthetic_data
(
input_shape
=
{
'a'
:
tf
.
TensorShape
([
2
]),
input_shape
=
{
'b'
:
{
'c'
:
tf
.
TensorShape
([
3
]),
'd'
:
tf
.
TensorShape
([])}},
'a'
:
tf
.
TensorShape
([
2
]),
'b'
:
{
'c'
:
tf
.
TensorShape
([
3
]),
'd'
:
tf
.
TensorShape
([])
}
},
input_value
=
1.1
)
input_value
=
1.1
)
element
=
tf
.
compat
.
v1
.
data
.
make_one_shot_iterator
(
d
).
get_next
()
element
=
tf
.
compat
.
v1
.
data
.
make_one_shot_iterator
(
d
).
get_next
()
...
@@ -121,5 +127,5 @@ class SyntheticDataTest(tf.test.TestCase):
...
@@ -121,5 +127,5 @@ class SyntheticDataTest(tf.test.TestCase):
self
.
assertAllClose
(
inp
[
'b'
][
'd'
],
1.1
)
self
.
assertAllClose
(
inp
[
'b'
][
'd'
],
1.1
)
if
__name__
==
"
__main__
"
:
if
__name__
==
'
__main__
'
:
tf
.
test
.
main
()
tf
.
test
.
main
()
official/utils/registry_test.py
View file @
88253ce5
...
@@ -31,18 +31,20 @@ class RegistryTest(tf.test.TestCase):
...
@@ -31,18 +31,20 @@ class RegistryTest(tf.test.TestCase):
@
registry
.
register
(
collection
,
'functions/func_0'
)
@
registry
.
register
(
collection
,
'functions/func_0'
)
def
func_test
():
def
func_test
():
pass
pass
self
.
assertEqual
(
registry
.
lookup
(
collection
,
'functions/func_0'
),
func_test
)
self
.
assertEqual
(
registry
.
lookup
(
collection
,
'functions/func_0'
),
func_test
)
@
registry
.
register
(
collection
,
'classes/cls_0'
)
@
registry
.
register
(
collection
,
'classes/cls_0'
)
class
ClassRegistryKey
:
class
ClassRegistryKey
:
pass
pass
self
.
assertEqual
(
self
.
assertEqual
(
registry
.
lookup
(
collection
,
'classes/cls_0'
),
ClassRegistryKey
)
registry
.
lookup
(
collection
,
'classes/cls_0'
),
ClassRegistryKey
)
@
registry
.
register
(
collection
,
ClassRegistryKey
)
@
registry
.
register
(
collection
,
ClassRegistryKey
)
class
ClassRegistryValue
:
class
ClassRegistryValue
:
pass
pass
self
.
assertEqual
(
self
.
assertEqual
(
registry
.
lookup
(
collection
,
ClassRegistryKey
),
ClassRegistryValue
)
registry
.
lookup
(
collection
,
ClassRegistryKey
),
ClassRegistryValue
)
...
@@ -52,12 +54,15 @@ class RegistryTest(tf.test.TestCase):
...
@@ -52,12 +54,15 @@ class RegistryTest(tf.test.TestCase):
@
registry
.
register
(
collection
,
'functions/func_0'
)
@
registry
.
register
(
collection
,
'functions/func_0'
)
def
func_test0
():
def
func_test0
():
pass
pass
@
registry
.
register
(
collection
,
'func_1'
)
@
registry
.
register
(
collection
,
'func_1'
)
def
func_test1
():
def
func_test1
():
pass
pass
@
registry
.
register
(
collection
,
func_test1
)
@
registry
.
register
(
collection
,
func_test1
)
def
func_test2
():
def
func_test2
():
pass
pass
expected_collection
=
{
expected_collection
=
{
'functions'
:
{
'functions'
:
{
'func_0'
:
func_test0
,
'func_0'
:
func_test0
,
...
@@ -73,10 +78,13 @@ class RegistryTest(tf.test.TestCase):
...
@@ -73,10 +78,13 @@ class RegistryTest(tf.test.TestCase):
@
registry
.
register
(
collection
,
'functions/func_0'
)
@
registry
.
register
(
collection
,
'functions/func_0'
)
def
func_test0
():
# pylint: disable=unused-variable
def
func_test0
():
# pylint: disable=unused-variable
pass
pass
with
self
.
assertRaises
(
KeyError
):
with
self
.
assertRaises
(
KeyError
):
@
registry
.
register
(
collection
,
'functions/func_0/sub_func'
)
@
registry
.
register
(
collection
,
'functions/func_0/sub_func'
)
def
func_test1
():
# pylint: disable=unused-variable
def
func_test1
():
# pylint: disable=unused-variable
pass
pass
with
self
.
assertRaises
(
LookupError
):
with
self
.
assertRaises
(
LookupError
):
registry
.
lookup
(
collection
,
'non-exist'
)
registry
.
lookup
(
collection
,
'non-exist'
)
...
...
official/utils/testing/integration.py
View file @
88253ce5
...
@@ -12,8 +12,7 @@
...
@@ -12,8 +12,7 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
# ==============================================================================
# ==============================================================================
"""Helper code to run complete models from within python.
"""Helper code to run complete models from within python."""
"""
from
__future__
import
absolute_import
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
division
...
@@ -31,7 +30,11 @@ from official.utils.flags import core as flags_core
...
@@ -31,7 +30,11 @@ from official.utils.flags import core as flags_core
@
flagsaver
.
flagsaver
@
flagsaver
.
flagsaver
def
run_synthetic
(
main
,
tmp_root
,
extra_flags
=
None
,
synth
=
True
,
train_epochs
=
1
,
def
run_synthetic
(
main
,
tmp_root
,
extra_flags
=
None
,
synth
=
True
,
train_epochs
=
1
,
epochs_between_evals
=
1
):
epochs_between_evals
=
1
):
"""Performs a minimal run of a model.
"""Performs a minimal run of a model.
...
...
official/vision/detection/dataloader/anchor.py
View file @
88253ce5
...
@@ -19,6 +19,7 @@ from __future__ import division
...
@@ -19,6 +19,7 @@ from __future__ import division
from
__future__
import
print_function
from
__future__
import
print_function
import
collections
import
collections
import
tensorflow
as
tf
import
tensorflow
as
tf
from
official.vision.detection.utils.object_detection
import
argmax_matcher
from
official.vision.detection.utils.object_detection
import
argmax_matcher
from
official.vision.detection.utils.object_detection
import
balanced_positive_negative_sampler
from
official.vision.detection.utils.object_detection
import
balanced_positive_negative_sampler
...
@@ -31,30 +32,25 @@ from official.vision.detection.utils.object_detection import target_assigner
...
@@ -31,30 +32,25 @@ from official.vision.detection.utils.object_detection import target_assigner
class
Anchor
(
object
):
class
Anchor
(
object
):
"""Anchor class for anchor-based object detectors."""
"""Anchor class for anchor-based object detectors."""
def
__init__
(
self
,
def
__init__
(
self
,
min_level
,
max_level
,
num_scales
,
aspect_ratios
,
min_level
,
anchor_size
,
image_size
):
max_level
,
num_scales
,
aspect_ratios
,
anchor_size
,
image_size
):
"""Constructs multiscale anchors.
"""Constructs multiscale anchors.
Args:
Args:
min_level: integer number of minimum level of the output feature pyramid.
min_level: integer number of minimum level of the output feature pyramid.
max_level: integer number of maximum level of the output feature pyramid.
max_level: integer number of maximum level of the output feature pyramid.
num_scales: integer number representing intermediate scales added
num_scales: integer number representing intermediate scales added
on each
on each
level. For instances, num_scales=2 adds one additional
level. For instances, num_scales=2 adds one additional
intermediate
intermediate
anchor scales [2^0, 2^0.5] on each level.
anchor scales [2^0, 2^0.5] on each level.
aspect_ratios: list of float numbers representing the aspect ratio anchors
aspect_ratios: list of float numbers representing the aspect ratio anchors
added on each level. The number indicates the ratio of width to height.
added on each level. The number indicates the ratio of width to height.
For instances, aspect_ratios=[1.0, 2.0, 0.5] adds three anchors on each
For instances, aspect_ratios=[1.0, 2.0, 0.5] adds three anchors on each
scale level.
scale level.
anchor_size: float number representing the scale of size of the base
anchor_size: float number representing the scale of size of the base
anchor to the feature stride 2^level.
anchor to the feature stride 2^level.
image_size: a list of integer numbers or Tensors representing
image_size: a list of integer numbers or Tensors representing
[height,
[height,
width] of the input image size.The image_size should be
width] of the input image size.The image_size should be
divisible by the
divisible by the
largest feature stride 2^max_level.
largest feature stride 2^max_level.
"""
"""
self
.
min_level
=
min_level
self
.
min_level
=
min_level
self
.
max_level
=
max_level
self
.
max_level
=
max_level
...
@@ -76,11 +72,11 @@ class Anchor(object):
...
@@ -76,11 +72,11 @@ class Anchor(object):
boxes_l
=
[]
boxes_l
=
[]
for
scale
in
range
(
self
.
num_scales
):
for
scale
in
range
(
self
.
num_scales
):
for
aspect_ratio
in
self
.
aspect_ratios
:
for
aspect_ratio
in
self
.
aspect_ratios
:
stride
=
2
**
level
stride
=
2
**
level
intermediate_scale
=
2
**
(
scale
/
float
(
self
.
num_scales
))
intermediate_scale
=
2
**
(
scale
/
float
(
self
.
num_scales
))
base_anchor_size
=
self
.
anchor_size
*
stride
*
intermediate_scale
base_anchor_size
=
self
.
anchor_size
*
stride
*
intermediate_scale
aspect_x
=
aspect_ratio
**
0.5
aspect_x
=
aspect_ratio
**
0.5
aspect_y
=
aspect_ratio
**
-
0.5
aspect_y
=
aspect_ratio
**-
0.5
half_anchor_size_x
=
base_anchor_size
*
aspect_x
/
2.0
half_anchor_size_x
=
base_anchor_size
*
aspect_x
/
2.0
half_anchor_size_y
=
base_anchor_size
*
aspect_y
/
2.0
half_anchor_size_y
=
base_anchor_size
*
aspect_y
/
2.0
x
=
tf
.
range
(
stride
/
2
,
self
.
image_size
[
1
],
stride
)
x
=
tf
.
range
(
stride
/
2
,
self
.
image_size
[
1
],
stride
)
...
@@ -89,8 +85,10 @@ class Anchor(object):
...
@@ -89,8 +85,10 @@ class Anchor(object):
xv
=
tf
.
cast
(
tf
.
reshape
(
xv
,
[
-
1
]),
dtype
=
tf
.
float32
)
xv
=
tf
.
cast
(
tf
.
reshape
(
xv
,
[
-
1
]),
dtype
=
tf
.
float32
)
yv
=
tf
.
cast
(
tf
.
reshape
(
yv
,
[
-
1
]),
dtype
=
tf
.
float32
)
yv
=
tf
.
cast
(
tf
.
reshape
(
yv
,
[
-
1
]),
dtype
=
tf
.
float32
)
# Tensor shape Nx4.
# Tensor shape Nx4.
boxes
=
tf
.
stack
([
yv
-
half_anchor_size_y
,
xv
-
half_anchor_size_x
,
boxes
=
tf
.
stack
([
yv
+
half_anchor_size_y
,
xv
+
half_anchor_size_x
],
yv
-
half_anchor_size_y
,
xv
-
half_anchor_size_x
,
yv
+
half_anchor_size_y
,
xv
+
half_anchor_size_x
],
axis
=
1
)
axis
=
1
)
boxes_l
.
append
(
boxes
)
boxes_l
.
append
(
boxes
)
# Concat anchors on the same level to tensor shape NxAx4.
# Concat anchors on the same level to tensor shape NxAx4.
...
@@ -104,11 +102,11 @@ class Anchor(object):
...
@@ -104,11 +102,11 @@ class Anchor(object):
unpacked_labels
=
collections
.
OrderedDict
()
unpacked_labels
=
collections
.
OrderedDict
()
count
=
0
count
=
0
for
level
in
range
(
self
.
min_level
,
self
.
max_level
+
1
):
for
level
in
range
(
self
.
min_level
,
self
.
max_level
+
1
):
feat_size_y
=
tf
.
cast
(
self
.
image_size
[
0
]
/
2
**
level
,
tf
.
int32
)
feat_size_y
=
tf
.
cast
(
self
.
image_size
[
0
]
/
2
**
level
,
tf
.
int32
)
feat_size_x
=
tf
.
cast
(
self
.
image_size
[
1
]
/
2
**
level
,
tf
.
int32
)
feat_size_x
=
tf
.
cast
(
self
.
image_size
[
1
]
/
2
**
level
,
tf
.
int32
)
steps
=
feat_size_y
*
feat_size_x
*
self
.
anchors_per_location
steps
=
feat_size_y
*
feat_size_x
*
self
.
anchors_per_location
unpacked_labels
[
level
]
=
tf
.
reshape
(
unpacked_labels
[
level
]
=
tf
.
reshape
(
labels
[
count
:
count
+
steps
],
labels
[
count
:
count
+
steps
],
[
feat_size_y
,
feat_size_x
,
-
1
])
[
feat_size_y
,
feat_size_x
,
-
1
])
count
+=
steps
count
+=
steps
return
unpacked_labels
return
unpacked_labels
...
@@ -124,10 +122,7 @@ class Anchor(object):
...
@@ -124,10 +122,7 @@ class Anchor(object):
class
AnchorLabeler
(
object
):
class
AnchorLabeler
(
object
):
"""Labeler for dense object detector."""
"""Labeler for dense object detector."""
def
__init__
(
self
,
def
__init__
(
self
,
anchor
,
match_threshold
=
0.5
,
unmatched_threshold
=
0.5
):
anchor
,
match_threshold
=
0.5
,
unmatched_threshold
=
0.5
):
"""Constructs anchor labeler to assign labels to anchors.
"""Constructs anchor labeler to assign labels to anchors.
Args:
Args:
...
@@ -161,6 +156,7 @@ class AnchorLabeler(object):
...
@@ -161,6 +156,7 @@ class AnchorLabeler(object):
For each row, it stores [y0, x0, y1, x1] for four corners of a box.
For each row, it stores [y0, x0, y1, x1] for four corners of a box.
gt_labels: A integer tensor with shape [N, 1] representing groundtruth
gt_labels: A integer tensor with shape [N, 1] representing groundtruth
classes.
classes.
Returns:
Returns:
cls_targets_dict: ordered dictionary with keys
cls_targets_dict: ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with
[min_level, min_level+1, ..., max_level]. The values are tensor with
...
@@ -205,11 +201,14 @@ class AnchorLabeler(object):
...
@@ -205,11 +201,14 @@ class AnchorLabeler(object):
class
RpnAnchorLabeler
(
AnchorLabeler
):
class
RpnAnchorLabeler
(
AnchorLabeler
):
"""Labeler for Region Proposal Network."""
"""Labeler for Region Proposal Network."""
def
__init__
(
self
,
anchor
,
match_threshold
=
0.7
,
def
__init__
(
self
,
unmatched_threshold
=
0.3
,
rpn_batch_size_per_im
=
256
,
anchor
,
match_threshold
=
0.7
,
unmatched_threshold
=
0.3
,
rpn_batch_size_per_im
=
256
,
rpn_fg_fraction
=
0.5
):
rpn_fg_fraction
=
0.5
):
AnchorLabeler
.
__init__
(
self
,
anchor
,
match_threshold
=
0.7
,
AnchorLabeler
.
__init__
(
unmatched_threshold
=
0.3
)
self
,
anchor
,
match_threshold
=
0.7
,
unmatched_threshold
=
0.3
)
self
.
_rpn_batch_size_per_im
=
rpn_batch_size_per_im
self
.
_rpn_batch_size_per_im
=
rpn_batch_size_per_im
self
.
_rpn_fg_fraction
=
rpn_fg_fraction
self
.
_rpn_fg_fraction
=
rpn_fg_fraction
...
@@ -219,11 +218,12 @@ class RpnAnchorLabeler(AnchorLabeler):
...
@@ -219,11 +218,12 @@ class RpnAnchorLabeler(AnchorLabeler):
This function performs subsampling for foreground (fg) and background (bg)
This function performs subsampling for foreground (fg) and background (bg)
anchors.
anchors.
Args:
Args:
match_results: A integer tensor with shape [N] representing the
match_results: A integer tensor with shape [N] representing the matching
matching results of anchors. (1) match_results[i]>=0,
results of anchors. (1) match_results[i]>=0, meaning that column i is
meaning that column i is matched with row match_results[i].
matched with row match_results[i]. (2) match_results[i]=-1, meaning that
(2) match_results[i]=-1, meaning that column i is not matched.
column i is not matched. (3) match_results[i]=-2, meaning that column i
(3) match_results[i]=-2, meaning that column i is ignored.
is ignored.
Returns:
Returns:
score_targets: a integer tensor with the a shape of [N].
score_targets: a integer tensor with the a shape of [N].
(1) score_targets[i]=1, the anchor is a positive sample.
(1) score_targets[i]=1, the anchor is a positive sample.
...
@@ -241,8 +241,7 @@ class RpnAnchorLabeler(AnchorLabeler):
...
@@ -241,8 +241,7 @@ class RpnAnchorLabeler(AnchorLabeler):
indicator
=
tf
.
greater
(
match_results
,
-
2
)
indicator
=
tf
.
greater
(
match_results
,
-
2
)
labels
=
tf
.
greater
(
match_results
,
-
1
)
labels
=
tf
.
greater
(
match_results
,
-
1
)
samples
=
sampler
.
subsample
(
samples
=
sampler
.
subsample
(
indicator
,
self
.
_rpn_batch_size_per_im
,
labels
)
indicator
,
self
.
_rpn_batch_size_per_im
,
labels
)
positive_labels
=
tf
.
where
(
positive_labels
=
tf
.
where
(
tf
.
logical_and
(
samples
,
labels
),
tf
.
logical_and
(
samples
,
labels
),
tf
.
constant
(
2
,
dtype
=
tf
.
int32
,
shape
=
match_results
.
shape
),
tf
.
constant
(
2
,
dtype
=
tf
.
int32
,
shape
=
match_results
.
shape
),
...
@@ -253,8 +252,8 @@ class RpnAnchorLabeler(AnchorLabeler):
...
@@ -253,8 +252,8 @@ class RpnAnchorLabeler(AnchorLabeler):
tf
.
constant
(
0
,
dtype
=
tf
.
int32
,
shape
=
match_results
.
shape
))
tf
.
constant
(
0
,
dtype
=
tf
.
int32
,
shape
=
match_results
.
shape
))
ignore_labels
=
tf
.
fill
(
match_results
.
shape
,
-
1
)
ignore_labels
=
tf
.
fill
(
match_results
.
shape
,
-
1
)
return
(
ignore_labels
+
positive_labels
+
negative_labels
,
return
(
ignore_labels
+
positive_labels
+
negative_labels
,
positive_labels
,
positive_labels
,
negative_labels
)
negative_labels
)
def
label_anchors
(
self
,
gt_boxes
,
gt_labels
):
def
label_anchors
(
self
,
gt_boxes
,
gt_labels
):
"""Labels anchors with ground truth inputs.
"""Labels anchors with ground truth inputs.
...
@@ -264,6 +263,7 @@ class RpnAnchorLabeler(AnchorLabeler):
...
@@ -264,6 +263,7 @@ class RpnAnchorLabeler(AnchorLabeler):
For each row, it stores [y0, x0, y1, x1] for four corners of a box.
For each row, it stores [y0, x0, y1, x1] for four corners of a box.
gt_labels: A integer tensor with shape [N, 1] representing groundtruth
gt_labels: A integer tensor with shape [N, 1] representing groundtruth
classes.
classes.
Returns:
Returns:
score_targets_dict: ordered dictionary with keys
score_targets_dict: ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with
[min_level, min_level+1, ..., max_level]. The values are tensor with
...
...
official/vision/detection/dataloader/input_reader.py
View file @
88253ce5
...
@@ -91,7 +91,8 @@ class InputFn(object):
...
@@ -91,7 +91,8 @@ class InputFn(object):
dataset
=
dataset
.
repeat
()
dataset
=
dataset
.
repeat
()
dataset
=
dataset
.
interleave
(
dataset
=
dataset
.
interleave
(
map_func
=
self
.
_dataset_fn
,
cycle_length
=
32
,
map_func
=
self
.
_dataset_fn
,
cycle_length
=
32
,
num_parallel_calls
=
tf
.
data
.
experimental
.
AUTOTUNE
)
num_parallel_calls
=
tf
.
data
.
experimental
.
AUTOTUNE
)
if
self
.
_is_training
:
if
self
.
_is_training
:
...
...
official/vision/detection/dataloader/retinanet_parser.py
View file @
88253ce5
...
@@ -79,9 +79,9 @@ class Parser(object):
...
@@ -79,9 +79,9 @@ class Parser(object):
output_size should be divided by the largest feature stride 2^max_level.
output_size should be divided by the largest feature stride 2^max_level.
min_level: `int` number of minimum level of the output feature pyramid.
min_level: `int` number of minimum level of the output feature pyramid.
max_level: `int` number of maximum level of the output feature pyramid.
max_level: `int` number of maximum level of the output feature pyramid.
num_scales: `int` number representing intermediate scales added
num_scales: `int` number representing intermediate scales added
on each
on each
level. For instances, num_scales=2 adds one additional
level. For instances, num_scales=2 adds one additional
intermediate
intermediate
anchor scales [2^0, 2^0.5] on each level.
anchor scales [2^0, 2^0.5] on each level.
aspect_ratios: `list` of float numbers representing the aspect raito
aspect_ratios: `list` of float numbers representing the aspect raito
anchors added on each level. The number indicates the ratio of width to
anchors added on each level. The number indicates the ratio of width to
height. For instances, aspect_ratios=[1.0, 2.0, 0.5] adds three anchors
height. For instances, aspect_ratios=[1.0, 2.0, 0.5] adds three anchors
...
@@ -94,8 +94,8 @@ class Parser(object):
...
@@ -94,8 +94,8 @@ class Parser(object):
unmatched_threshold: `float` number between 0 and 1 representing the
unmatched_threshold: `float` number between 0 and 1 representing the
upper-bound threshold to assign negative labels for anchors. An anchor
upper-bound threshold to assign negative labels for anchors. An anchor
with a score below the threshold is labeled negative.
with a score below the threshold is labeled negative.
aug_rand_hflip: `bool`, if True, augment training with random
aug_rand_hflip: `bool`, if True, augment training with random
horizontal
horizontal
flip.
flip.
aug_scale_min: `float`, the minimum scale applied to `output_size` for
aug_scale_min: `float`, the minimum scale applied to `output_size` for
data augmentation during training.
data augmentation during training.
aug_scale_max: `float`, the maximum scale applied to `output_size` for
aug_scale_max: `float`, the maximum scale applied to `output_size` for
...
@@ -109,8 +109,8 @@ class Parser(object):
...
@@ -109,8 +109,8 @@ class Parser(object):
max_num_instances: `int` number of maximum number of instances in an
max_num_instances: `int` number of maximum number of instances in an
image. The groundtruth data will be padded to `max_num_instances`.
image. The groundtruth data will be padded to `max_num_instances`.
use_bfloat16: `bool`, if True, cast output image to tf.bfloat16.
use_bfloat16: `bool`, if True, cast output image to tf.bfloat16.
mode: a ModeKeys. Specifies if this is training, evaluation, prediction
mode: a ModeKeys. Specifies if this is training, evaluation, prediction
or
or
prediction with groundtruths in the outputs.
prediction with groundtruths in the outputs.
"""
"""
self
.
_mode
=
mode
self
.
_mode
=
mode
self
.
_max_num_instances
=
max_num_instances
self
.
_max_num_instances
=
max_num_instances
...
@@ -232,8 +232,8 @@ class Parser(object):
...
@@ -232,8 +232,8 @@ class Parser(object):
image
,
image_info
=
input_utils
.
resize_and_crop_image
(
image
,
image_info
=
input_utils
.
resize_and_crop_image
(
image
,
image
,
self
.
_output_size
,
self
.
_output_size
,
padded_size
=
input_utils
.
compute_padded_size
(
padded_size
=
input_utils
.
compute_padded_size
(
self
.
_output_size
,
self
.
_output_size
,
2
**
self
.
_max_level
),
2
**
self
.
_max_level
),
aug_scale_min
=
self
.
_aug_scale_min
,
aug_scale_min
=
self
.
_aug_scale_min
,
aug_scale_max
=
self
.
_aug_scale_max
)
aug_scale_max
=
self
.
_aug_scale_max
)
image_height
,
image_width
,
_
=
image
.
get_shape
().
as_list
()
image_height
,
image_width
,
_
=
image
.
get_shape
().
as_list
()
...
@@ -241,22 +241,21 @@ class Parser(object):
...
@@ -241,22 +241,21 @@ class Parser(object):
# Resizes and crops boxes.
# Resizes and crops boxes.
image_scale
=
image_info
[
2
,
:]
image_scale
=
image_info
[
2
,
:]
offset
=
image_info
[
3
,
:]
offset
=
image_info
[
3
,
:]
boxes
=
input_utils
.
resize_and_crop_boxes
(
boxes
=
input_utils
.
resize_and_crop_boxes
(
boxes
,
image_scale
,
boxes
,
image_scale
,
image_info
[
1
,
:],
offset
)
image_info
[
1
,
:],
offset
)
# Filters out ground truth boxes that are all zeros.
# Filters out ground truth boxes that are all zeros.
indices
=
box_utils
.
get_non_empty_box_indices
(
boxes
)
indices
=
box_utils
.
get_non_empty_box_indices
(
boxes
)
boxes
=
tf
.
gather
(
boxes
,
indices
)
boxes
=
tf
.
gather
(
boxes
,
indices
)
classes
=
tf
.
gather
(
classes
,
indices
)
classes
=
tf
.
gather
(
classes
,
indices
)
# Assigns anchors.
# Assigns anchors.
input_anchor
=
anchor
.
Anchor
(
input_anchor
=
anchor
.
Anchor
(
self
.
_min_level
,
self
.
_max_level
,
self
.
_min_level
,
self
.
_max_level
,
self
.
_num_scale
s
,
self
.
_num_scales
,
self
.
_aspect_ratio
s
,
self
.
_aspect_ratios
,
self
.
_anchor_size
,
(
image_height
,
image_width
))
self
.
_anchor_size
,
(
image_height
,
image_width
))
anchor_labeler
=
anchor
.
AnchorLabeler
(
anchor_labeler
=
anchor
.
AnchorLabeler
(
input_anchor
,
self
.
_match_threshold
,
input_anchor
,
self
.
_match_threshold
,
self
.
_unmatched_threshold
)
self
.
_unmatched_threshold
)
(
cls_targets
,
box_targets
,
num_positives
)
=
anchor_labeler
.
label_anchors
(
(
cls_targets
,
box_targets
,
num_positives
)
=
anchor_labeler
.
label_anchors
(
boxes
,
boxes
,
tf
.
cast
(
tf
.
expand_dims
(
classes
,
axis
=
1
),
tf
.
float32
))
tf
.
cast
(
tf
.
expand_dims
(
classes
,
axis
=
1
),
tf
.
float32
))
# If bfloat16 is used, casts input image to tf.bfloat16.
# If bfloat16 is used, casts input image to tf.bfloat16.
if
self
.
_use_bfloat16
:
if
self
.
_use_bfloat16
:
...
@@ -292,8 +291,8 @@ class Parser(object):
...
@@ -292,8 +291,8 @@ class Parser(object):
image
,
image_info
=
input_utils
.
resize_and_crop_image
(
image
,
image_info
=
input_utils
.
resize_and_crop_image
(
image
,
image
,
self
.
_output_size
,
self
.
_output_size
,
padded_size
=
input_utils
.
compute_padded_size
(
padded_size
=
input_utils
.
compute_padded_size
(
self
.
_output_size
,
self
.
_output_size
,
2
**
self
.
_max_level
),
2
**
self
.
_max_level
),
aug_scale_min
=
1.0
,
aug_scale_min
=
1.0
,
aug_scale_max
=
1.0
)
aug_scale_max
=
1.0
)
image_height
,
image_width
,
_
=
image
.
get_shape
().
as_list
()
image_height
,
image_width
,
_
=
image
.
get_shape
().
as_list
()
...
@@ -301,22 +300,21 @@ class Parser(object):
...
@@ -301,22 +300,21 @@ class Parser(object):
# Resizes and crops boxes.
# Resizes and crops boxes.
image_scale
=
image_info
[
2
,
:]
image_scale
=
image_info
[
2
,
:]
offset
=
image_info
[
3
,
:]
offset
=
image_info
[
3
,
:]
boxes
=
input_utils
.
resize_and_crop_boxes
(
boxes
=
input_utils
.
resize_and_crop_boxes
(
boxes
,
image_scale
,
boxes
,
image_scale
,
image_info
[
1
,
:],
offset
)
image_info
[
1
,
:],
offset
)
# Filters out ground truth boxes that are all zeros.
# Filters out ground truth boxes that are all zeros.
indices
=
box_utils
.
get_non_empty_box_indices
(
boxes
)
indices
=
box_utils
.
get_non_empty_box_indices
(
boxes
)
boxes
=
tf
.
gather
(
boxes
,
indices
)
boxes
=
tf
.
gather
(
boxes
,
indices
)
classes
=
tf
.
gather
(
classes
,
indices
)
classes
=
tf
.
gather
(
classes
,
indices
)
# Assigns anchors.
# Assigns anchors.
input_anchor
=
anchor
.
Anchor
(
input_anchor
=
anchor
.
Anchor
(
self
.
_min_level
,
self
.
_max_level
,
self
.
_min_level
,
self
.
_max_level
,
self
.
_num_scale
s
,
self
.
_num_scales
,
self
.
_aspect_ratio
s
,
self
.
_aspect_ratios
,
self
.
_anchor_size
,
(
image_height
,
image_width
))
self
.
_anchor_size
,
(
image_height
,
image_width
))
anchor_labeler
=
anchor
.
AnchorLabeler
(
anchor_labeler
=
anchor
.
AnchorLabeler
(
input_anchor
,
self
.
_match_threshold
,
input_anchor
,
self
.
_match_threshold
,
self
.
_unmatched_threshold
)
self
.
_unmatched_threshold
)
(
cls_targets
,
box_targets
,
num_positives
)
=
anchor_labeler
.
label_anchors
(
(
cls_targets
,
box_targets
,
num_positives
)
=
anchor_labeler
.
label_anchors
(
boxes
,
boxes
,
tf
.
cast
(
tf
.
expand_dims
(
classes
,
axis
=
1
),
tf
.
float32
))
tf
.
cast
(
tf
.
expand_dims
(
classes
,
axis
=
1
),
tf
.
float32
))
# If bfloat16 is used, casts input image to tf.bfloat16.
# If bfloat16 is used, casts input image to tf.bfloat16.
if
self
.
_use_bfloat16
:
if
self
.
_use_bfloat16
:
...
@@ -324,18 +322,24 @@ class Parser(object):
...
@@ -324,18 +322,24 @@ class Parser(object):
# Sets up groundtruth data for evaluation.
# Sets up groundtruth data for evaluation.
groundtruths
=
{
groundtruths
=
{
'source_id'
:
data
[
'source_id'
],
'source_id'
:
'num_groundtrtuhs'
:
tf
.
shape
(
data
[
'groundtruth_classes'
]),
data
[
'source_id'
],
'image_info'
:
image_info
,
'num_groundtrtuhs'
:
'boxes'
:
box_utils
.
denormalize_boxes
(
tf
.
shape
(
data
[
'groundtruth_classes'
]),
data
[
'groundtruth_boxes'
],
image_shape
),
'image_info'
:
'classes'
:
data
[
'groundtruth_classes'
],
image_info
,
'areas'
:
data
[
'groundtruth_area'
],
'boxes'
:
'is_crowds'
:
tf
.
cast
(
data
[
'groundtruth_is_crowd'
],
tf
.
int32
),
box_utils
.
denormalize_boxes
(
data
[
'groundtruth_boxes'
],
image_shape
),
'classes'
:
data
[
'groundtruth_classes'
],
'areas'
:
data
[
'groundtruth_area'
],
'is_crowds'
:
tf
.
cast
(
data
[
'groundtruth_is_crowd'
],
tf
.
int32
),
}
}
groundtruths
[
'source_id'
]
=
process_source_id
(
groundtruths
[
'source_id'
])
groundtruths
[
'source_id'
]
=
process_source_id
(
groundtruths
[
'source_id'
])
groundtruths
=
pad_groundtruths_to_fixed_size
(
groundtruths
=
pad_groundtruths_to_fixed_size
(
groundtruths
,
groundtruths
,
self
.
_max_num_instances
)
self
.
_max_num_instances
)
# Packs labels for model_fn outputs.
# Packs labels for model_fn outputs.
labels
=
{
labels
=
{
...
@@ -361,8 +365,8 @@ class Parser(object):
...
@@ -361,8 +365,8 @@ class Parser(object):
image
,
image_info
=
input_utils
.
resize_and_crop_image
(
image
,
image_info
=
input_utils
.
resize_and_crop_image
(
image
,
image
,
self
.
_output_size
,
self
.
_output_size
,
padded_size
=
input_utils
.
compute_padded_size
(
padded_size
=
input_utils
.
compute_padded_size
(
self
.
_output_size
,
self
.
_output_size
,
2
**
self
.
_max_level
),
2
**
self
.
_max_level
),
aug_scale_min
=
1.0
,
aug_scale_min
=
1.0
,
aug_scale_max
=
1.0
)
aug_scale_max
=
1.0
)
image_height
,
image_width
,
_
=
image
.
get_shape
().
as_list
()
image_height
,
image_width
,
_
=
image
.
get_shape
().
as_list
()
...
@@ -372,9 +376,9 @@ class Parser(object):
...
@@ -372,9 +376,9 @@ class Parser(object):
image
=
tf
.
cast
(
image
,
dtype
=
tf
.
bfloat16
)
image
=
tf
.
cast
(
image
,
dtype
=
tf
.
bfloat16
)
# Compute Anchor boxes.
# Compute Anchor boxes.
input_anchor
=
anchor
.
Anchor
(
input_anchor
=
anchor
.
Anchor
(
self
.
_min_level
,
self
.
_max_level
,
self
.
_min_level
,
self
.
_max_level
,
self
.
_num_scale
s
,
self
.
_num_scales
,
self
.
_aspect_ratio
s
,
self
.
_aspect_ratios
,
self
.
_anchor_size
,
(
image_height
,
image_width
))
self
.
_anchor_size
,
(
image_height
,
image_width
))
labels
=
{
labels
=
{
'anchor_boxes'
:
input_anchor
.
multilevel_boxes
,
'anchor_boxes'
:
input_anchor
.
multilevel_boxes
,
...
@@ -384,8 +388,8 @@ class Parser(object):
...
@@ -384,8 +388,8 @@ class Parser(object):
# in labels.
# in labels.
if
self
.
_mode
==
ModeKeys
.
PREDICT_WITH_GT
:
if
self
.
_mode
==
ModeKeys
.
PREDICT_WITH_GT
:
# Converts boxes from normalized coordinates to pixel coordinates.
# Converts boxes from normalized coordinates to pixel coordinates.
boxes
=
box_utils
.
denormalize_boxes
(
boxes
=
box_utils
.
denormalize_boxes
(
data
[
'groundtruth_boxes'
],
data
[
'groundtruth_boxes'
],
image_shape
)
image_shape
)
groundtruths
=
{
groundtruths
=
{
'source_id'
:
data
[
'source_id'
],
'source_id'
:
data
[
'source_id'
],
'num_detections'
:
tf
.
shape
(
data
[
'groundtruth_classes'
]),
'num_detections'
:
tf
.
shape
(
data
[
'groundtruth_classes'
]),
...
@@ -395,8 +399,8 @@ class Parser(object):
...
@@ -395,8 +399,8 @@ class Parser(object):
'is_crowds'
:
tf
.
cast
(
data
[
'groundtruth_is_crowd'
],
tf
.
int32
),
'is_crowds'
:
tf
.
cast
(
data
[
'groundtruth_is_crowd'
],
tf
.
int32
),
}
}
groundtruths
[
'source_id'
]
=
process_source_id
(
groundtruths
[
'source_id'
])
groundtruths
[
'source_id'
]
=
process_source_id
(
groundtruths
[
'source_id'
])
groundtruths
=
pad_groundtruths_to_fixed_size
(
groundtruths
=
pad_groundtruths_to_fixed_size
(
groundtruths
,
groundtruths
,
self
.
_max_num_instances
)
self
.
_max_num_instances
)
labels
[
'groundtruths'
]
=
groundtruths
labels
[
'groundtruths'
]
=
groundtruths
# Computes training objective for evaluation loss.
# Computes training objective for evaluation loss.
...
@@ -404,18 +408,17 @@ class Parser(object):
...
@@ -404,18 +408,17 @@ class Parser(object):
image_scale
=
image_info
[
2
,
:]
image_scale
=
image_info
[
2
,
:]
offset
=
image_info
[
3
,
:]
offset
=
image_info
[
3
,
:]
boxes
=
input_utils
.
resize_and_crop_boxes
(
boxes
=
input_utils
.
resize_and_crop_boxes
(
boxes
,
image_scale
,
boxes
,
image_scale
,
image_info
[
1
,
:],
offset
)
image_info
[
1
,
:],
offset
)
# Filters out ground truth boxes that are all zeros.
# Filters out ground truth boxes that are all zeros.
indices
=
box_utils
.
get_non_empty_box_indices
(
boxes
)
indices
=
box_utils
.
get_non_empty_box_indices
(
boxes
)
boxes
=
tf
.
gather
(
boxes
,
indices
)
boxes
=
tf
.
gather
(
boxes
,
indices
)
# Assigns anchors.
# Assigns anchors.
anchor_labeler
=
anchor
.
AnchorLabeler
(
anchor_labeler
=
anchor
.
AnchorLabeler
(
input_anchor
,
self
.
_match_threshold
,
input_anchor
,
self
.
_match_threshold
,
self
.
_unmatched_threshold
)
self
.
_unmatched_threshold
)
(
cls_targets
,
box_targets
,
num_positives
)
=
anchor_labeler
.
label_anchors
(
(
cls_targets
,
box_targets
,
num_positives
)
=
anchor_labeler
.
label_anchors
(
boxes
,
boxes
,
tf
.
cast
(
tf
.
expand_dims
(
classes
,
axis
=
1
),
tf
.
float32
))
tf
.
cast
(
tf
.
expand_dims
(
classes
,
axis
=
1
),
tf
.
float32
))
labels
[
'cls_targets'
]
=
cls_targets
labels
[
'cls_targets'
]
=
cls_targets
labels
[
'box_targets'
]
=
box_targets
labels
[
'box_targets'
]
=
box_targets
labels
[
'num_positives'
]
=
num_positives
labels
[
'num_positives'
]
=
num_positives
...
...
official/vision/detection/dataloader/shapemask_parser.py
View file @
88253ce5
...
@@ -21,7 +21,6 @@ Weicheng Kuo, Anelia Angelova, Jitendra Malik, Tsung-Yi Lin
...
@@ -21,7 +21,6 @@ Weicheng Kuo, Anelia Angelova, Jitendra Malik, Tsung-Yi Lin
ShapeMask: Learning to Segment Novel Objects by Refining Shape Priors.
ShapeMask: Learning to Segment Novel Objects by Refining Shape Priors.
arXiv:1904.03239.
arXiv:1904.03239.
"""
"""
import
tensorflow
as
tf
import
tensorflow
as
tf
from
official.vision.detection.dataloader
import
anchor
from
official.vision.detection.dataloader
import
anchor
...
...
official/vision/detection/evaluation/coco_evaluator.py
View file @
88253ce5
...
@@ -32,6 +32,7 @@ from __future__ import print_function
...
@@ -32,6 +32,7 @@ from __future__ import print_function
import
atexit
import
atexit
import
tempfile
import
tempfile
import
numpy
as
np
import
numpy
as
np
from
absl
import
logging
from
absl
import
logging
from
pycocotools
import
cocoeval
from
pycocotools
import
cocoeval
...
@@ -197,22 +198,21 @@ class COCOEvaluator(object):
...
@@ -197,22 +198,21 @@ class COCOEvaluator(object):
"""Update and aggregate detection results and groundtruth data.
"""Update and aggregate detection results and groundtruth data.
Args:
Args:
predictions: a dictionary of numpy arrays including the fields below.
predictions: a dictionary of numpy arrays including the fields below.
See
See
different parsers under `../dataloader` for more details.
different parsers under `../dataloader` for more details.
Required fields:
Required fields:
- source_id: a numpy array of int or string of shape [batch_size].
- source_id: a numpy array of int or string of shape [batch_size].
- image_info [if `need_rescale_bboxes` is True]: a numpy array of
- image_info [if `need_rescale_bboxes` is True]: a numpy array of
float of shape [batch_size, 4, 2].
float of shape [batch_size, 4, 2].
- num_detections: a numpy array of
- num_detections: a numpy array of int of shape [batch_size].
int of shape [batch_size].
- detection_boxes: a numpy array of float of shape [batch_size, K, 4].
- detection_boxes: a numpy array of float of shape [batch_size, K, 4].
- detection_classes: a numpy array of int of shape [batch_size, K].
- detection_classes: a numpy array of int of shape [batch_size, K].
- detection_scores: a numpy array of float of shape [batch_size, K].
- detection_scores: a numpy array of float of shape [batch_size, K].
Optional fields:
Optional fields:
- detection_masks: a numpy array of float of shape
- detection_masks: a numpy array of float of shape
[batch_size, K,
[batch_size, K,
mask_height, mask_width].
mask_height, mask_width].
groundtruths: a dictionary of numpy arrays including the fields below.
groundtruths: a dictionary of numpy arrays including the fields below.
See
See
also different parsers under `../dataloader` for more details.
also different parsers under `../dataloader` for more details.
Required fields:
Required fields:
- source_id: a numpy array of int or string of shape [batch_size].
- source_id: a numpy array of int or string of shape [batch_size].
- height: a numpy array of int of shape [batch_size].
- height: a numpy array of int of shape [batch_size].
...
@@ -222,12 +222,12 @@ class COCOEvaluator(object):
...
@@ -222,12 +222,12 @@ class COCOEvaluator(object):
- classes: a numpy array of int of shape [batch_size, K].
- classes: a numpy array of int of shape [batch_size, K].
Optional fields:
Optional fields:
- is_crowds: a numpy array of int of shape [batch_size, K]. If the
- is_crowds: a numpy array of int of shape [batch_size, K]. If the
field is absent, it is assumed that this instance is not crowd.
field is absent, it is assumed that this instance is not crowd.
- areas: a numy array of float of shape [batch_size, K]. If the
- areas: a numy array of float of shape [batch_size, K]. If the
field
field
is absent, the area is calculated using either boxes or
is absent, the area is calculated using either boxes or
masks
masks
depending on which one is available.
depending on which one is available.
- masks: a numpy array of float of shape
- masks: a numpy array of float of shape
[batch_size, K, mask_height,
[batch_size, K, mask_height,
mask_width],
mask_width],
Raises:
Raises:
ValueError: if the required prediction or groundtruth fields are not
ValueError: if the required prediction or groundtruth fields are not
...
@@ -318,8 +318,7 @@ class ShapeMaskCOCOEvaluator(COCOEvaluator):
...
@@ -318,8 +318,7 @@ class ShapeMaskCOCOEvaluator(COCOEvaluator):
metrics
=
np
.
hstack
((
coco_metrics
,
mcoco_eval
.
stats
))
metrics
=
np
.
hstack
((
coco_metrics
,
mcoco_eval
.
stats
))
else
:
else
:
mask_coco_metrics
=
mcoco_eval
.
category_stats
mask_coco_metrics
=
mcoco_eval
.
category_stats
val_catg_idx
=
np
.
isin
(
mcoco_eval
.
params
.
catIds
,
val_catg_idx
=
np
.
isin
(
mcoco_eval
.
params
.
catIds
,
self
.
_eval_categories
)
self
.
_eval_categories
)
# Gather the valid evaluation of the eval categories.
# Gather the valid evaluation of the eval categories.
if
np
.
any
(
val_catg_idx
):
if
np
.
any
(
val_catg_idx
):
mean_val_metrics
=
[]
mean_val_metrics
=
[]
...
...
official/vision/detection/main.py
View file @
88253ce5
...
@@ -23,6 +23,7 @@ import functools
...
@@ -23,6 +23,7 @@ import functools
import
pprint
import
pprint
# pylint: disable=g-bad-import-order
# pylint: disable=g-bad-import-order
# Import libraries
import
tensorflow
as
tf
import
tensorflow
as
tf
from
absl
import
app
from
absl
import
app
...
...
official/vision/detection/modeling/architecture/heads.py
View file @
88253ce5
...
@@ -31,17 +31,17 @@ from official.vision.detection.ops import spatial_transform_ops
...
@@ -31,17 +31,17 @@ from official.vision.detection.ops import spatial_transform_ops
class
RpnHead
(
tf
.
keras
.
layers
.
Layer
):
class
RpnHead
(
tf
.
keras
.
layers
.
Layer
):
"""Region Proposal Network head."""
"""Region Proposal Network head."""
def
__init__
(
self
,
def
__init__
(
min_lev
el
,
s
el
f
,
m
ax
_level
,
m
in
_level
,
anchors_per_location
,
max_level
,
num_convs
=
2
,
anchors_per_location
,
num_
filter
s
=
2
56
,
num_
conv
s
=
2
,
use_separable_conv
=
False
,
num_filters
=
256
,
activation
=
'relu'
,
use_separable_conv
=
False
,
use_batch_norm
=
True
,
activation
=
'relu'
,
norm_activation
=
nn_ops
.
norm_activation_builder
(
use_batch_norm
=
True
,
activation
=
'relu'
)):
norm_activation
=
nn_ops
.
norm_activation_builder
(
activation
=
'relu'
)):
"""Initialize params to build Region Proposal Network head.
"""Initialize params to build Region Proposal Network head.
Args:
Args:
...
@@ -57,8 +57,8 @@ class RpnHead(tf.keras.layers.Layer):
...
@@ -57,8 +57,8 @@ class RpnHead(tf.keras.layers.Layer):
is used.
is used.
activation: activation function. Support 'relu' and 'swish'.
activation: activation function. Support 'relu' and 'swish'.
use_batch_norm: 'bool', indicating whether batchnorm layers are added.
use_batch_norm: 'bool', indicating whether batchnorm layers are added.
norm_activation: an operation that includes a normalization layer
norm_activation: an operation that includes a normalization layer
followed
followed
by an optional activation layer.
by an optional activation layer.
"""
"""
self
.
_min_level
=
min_level
self
.
_min_level
=
min_level
self
.
_max_level
=
max_level
self
.
_max_level
=
max_level
...
@@ -140,17 +140,17 @@ class RpnHead(tf.keras.layers.Layer):
...
@@ -140,17 +140,17 @@ class RpnHead(tf.keras.layers.Layer):
class
FastrcnnHead
(
tf
.
keras
.
layers
.
Layer
):
class
FastrcnnHead
(
tf
.
keras
.
layers
.
Layer
):
"""Fast R-CNN box head."""
"""Fast R-CNN box head."""
def
__init__
(
self
,
def
__init__
(
num_classes
,
self
,
num_c
onvs
=
0
,
num_c
lasses
,
num_
filters
=
256
,
num_
convs
=
0
,
use_separable_conv
=
False
,
num_filters
=
256
,
num_fcs
=
2
,
use_separable_conv
=
False
,
fc_dims
=
1024
,
num_fcs
=
2
,
activation
=
'relu'
,
fc_dims
=
1024
,
use_batch_norm
=
True
,
activation
=
'relu'
,
norm_activation
=
nn_ops
.
norm_activation_builder
(
use_batch_norm
=
True
,
activation
=
'relu'
)):
norm_activation
=
nn_ops
.
norm_activation_builder
(
activation
=
'relu'
)):
"""Initialize params to build Fast R-CNN box head.
"""Initialize params to build Fast R-CNN box head.
Args:
Args:
...
@@ -167,8 +167,8 @@ class FastrcnnHead(tf.keras.layers.Layer):
...
@@ -167,8 +167,8 @@ class FastrcnnHead(tf.keras.layers.Layer):
layers.
layers.
activation: activation function. Support 'relu' and 'swish'.
activation: activation function. Support 'relu' and 'swish'.
use_batch_norm: 'bool', indicating whether batchnorm layers are added.
use_batch_norm: 'bool', indicating whether batchnorm layers are added.
norm_activation: an operation that includes a normalization layer
norm_activation: an operation that includes a normalization layer
followed
followed
by an optional activation layer.
by an optional activation layer.
"""
"""
self
.
_num_classes
=
num_classes
self
.
_num_classes
=
num_classes
...
@@ -207,7 +207,8 @@ class FastrcnnHead(tf.keras.layers.Layer):
...
@@ -207,7 +207,8 @@ class FastrcnnHead(tf.keras.layers.Layer):
strides
=
(
1
,
1
),
strides
=
(
1
,
1
),
padding
=
'same'
,
padding
=
'same'
,
dilation_rate
=
(
1
,
1
),
dilation_rate
=
(
1
,
1
),
activation
=
(
None
if
self
.
_use_batch_norm
else
self
.
_activation_op
),
activation
=
(
None
if
self
.
_use_batch_norm
else
self
.
_activation_op
),
name
=
'conv_{}'
.
format
(
i
)))
name
=
'conv_{}'
.
format
(
i
)))
if
self
.
_use_batch_norm
:
if
self
.
_use_batch_norm
:
self
.
_conv_bn_ops
.
append
(
self
.
_norm_activation
())
self
.
_conv_bn_ops
.
append
(
self
.
_norm_activation
())
...
@@ -218,7 +219,8 @@ class FastrcnnHead(tf.keras.layers.Layer):
...
@@ -218,7 +219,8 @@ class FastrcnnHead(tf.keras.layers.Layer):
self
.
_fc_ops
.
append
(
self
.
_fc_ops
.
append
(
tf
.
keras
.
layers
.
Dense
(
tf
.
keras
.
layers
.
Dense
(
units
=
self
.
_fc_dims
,
units
=
self
.
_fc_dims
,
activation
=
(
None
if
self
.
_use_batch_norm
else
self
.
_activation_op
),
activation
=
(
None
if
self
.
_use_batch_norm
else
self
.
_activation_op
),
name
=
'fc{}'
.
format
(
i
)))
name
=
'fc{}'
.
format
(
i
)))
if
self
.
_use_batch_norm
:
if
self
.
_use_batch_norm
:
self
.
_fc_bn_ops
.
append
(
self
.
_norm_activation
(
fused
=
False
))
self
.
_fc_bn_ops
.
append
(
self
.
_norm_activation
(
fused
=
False
))
...
@@ -238,8 +240,8 @@ class FastrcnnHead(tf.keras.layers.Layer):
...
@@ -238,8 +240,8 @@ class FastrcnnHead(tf.keras.layers.Layer):
"""Box and class branches for the Mask-RCNN model.
"""Box and class branches for the Mask-RCNN model.
Args:
Args:
roi_features: A ROI feature tensor of shape
roi_features: A ROI feature tensor of shape
[batch_size, num_rois,
[batch_size, num_rois,
height_l, width_l, num_filters].
height_l, width_l, num_filters].
is_training: `boolean`, if True if model is in training mode.
is_training: `boolean`, if True if model is in training mode.
Returns:
Returns:
...
@@ -277,16 +279,16 @@ class FastrcnnHead(tf.keras.layers.Layer):
...
@@ -277,16 +279,16 @@ class FastrcnnHead(tf.keras.layers.Layer):
class
MaskrcnnHead
(
tf
.
keras
.
layers
.
Layer
):
class
MaskrcnnHead
(
tf
.
keras
.
layers
.
Layer
):
"""Mask R-CNN head."""
"""Mask R-CNN head."""
def
__init__
(
self
,
def
__init__
(
num_classes
,
self
,
mask_target_size
,
num_classes
,
num_convs
=
4
,
mask_target_size
,
num_
filters
=
256
,
num_
convs
=
4
,
use_separable_conv
=
False
,
num_filters
=
256
,
activation
=
'relu'
,
use_separable_conv
=
False
,
use_batch_norm
=
True
,
activation
=
'relu'
,
norm_activation
=
nn_ops
.
norm_activation_builder
(
use_batch_norm
=
True
,
activation
=
'relu'
)):
norm_activation
=
nn_ops
.
norm_activation_builder
(
activation
=
'relu'
)):
"""Initialize params to build Fast R-CNN head.
"""Initialize params to build Fast R-CNN head.
Args:
Args:
...
@@ -300,8 +302,8 @@ class MaskrcnnHead(tf.keras.layers.Layer):
...
@@ -300,8 +302,8 @@ class MaskrcnnHead(tf.keras.layers.Layer):
is used.
is used.
activation: activation function. Support 'relu' and 'swish'.
activation: activation function. Support 'relu' and 'swish'.
use_batch_norm: 'bool', indicating whether batchnorm layers are added.
use_batch_norm: 'bool', indicating whether batchnorm layers are added.
norm_activation: an operation that includes a normalization layer
norm_activation: an operation that includes a normalization layer
followed
followed
by an optional activation layer.
by an optional activation layer.
"""
"""
self
.
_num_classes
=
num_classes
self
.
_num_classes
=
num_classes
self
.
_mask_target_size
=
mask_target_size
self
.
_mask_target_size
=
mask_target_size
...
@@ -336,7 +338,8 @@ class MaskrcnnHead(tf.keras.layers.Layer):
...
@@ -336,7 +338,8 @@ class MaskrcnnHead(tf.keras.layers.Layer):
strides
=
(
1
,
1
),
strides
=
(
1
,
1
),
padding
=
'same'
,
padding
=
'same'
,
dilation_rate
=
(
1
,
1
),
dilation_rate
=
(
1
,
1
),
activation
=
(
None
if
self
.
_use_batch_norm
else
self
.
_activation_op
),
activation
=
(
None
if
self
.
_use_batch_norm
else
self
.
_activation_op
),
name
=
'mask-conv-l%d'
%
i
))
name
=
'mask-conv-l%d'
%
i
))
self
.
_mask_conv_transpose
=
tf
.
keras
.
layers
.
Conv2DTranspose
(
self
.
_mask_conv_transpose
=
tf
.
keras
.
layers
.
Conv2DTranspose
(
self
.
_num_filters
,
self
.
_num_filters
,
...
@@ -353,10 +356,10 @@ class MaskrcnnHead(tf.keras.layers.Layer):
...
@@ -353,10 +356,10 @@ class MaskrcnnHead(tf.keras.layers.Layer):
"""Mask branch for the Mask-RCNN model.
"""Mask branch for the Mask-RCNN model.
Args:
Args:
roi_features: A ROI feature tensor of shape
roi_features: A ROI feature tensor of shape
[batch_size, num_rois,
[batch_size, num_rois,
height_l, width_l, num_filters].
height_l, width_l, num_filters].
class_indices: a Tensor of shape [batch_size, num_rois], indicating
class_indices: a Tensor of shape [batch_size, num_rois], indicating
which
which
class the ROI is.
class the ROI is.
is_training: `boolean`, if True if model is in training mode.
is_training: `boolean`, if True if model is in training mode.
Returns:
Returns:
...
@@ -415,16 +418,16 @@ class MaskrcnnHead(tf.keras.layers.Layer):
...
@@ -415,16 +418,16 @@ class MaskrcnnHead(tf.keras.layers.Layer):
class
RetinanetHead
(
object
):
class
RetinanetHead
(
object
):
"""RetinaNet head."""
"""RetinaNet head."""
def
__init__
(
self
,
def
__init__
(
min_lev
el
,
s
el
f
,
m
ax
_level
,
m
in
_level
,
num_classes
,
max_level
,
anchors_per_location
,
num_classes
,
num_convs
=
4
,
anchors_per_location
,
num_
filters
=
256
,
num_
convs
=
4
,
use_separable_conv
=
False
,
num_filters
=
256
,
norm_activation
=
nn_ops
.
norm_activation_builder
(
use_separable_conv
=
False
,
activation
=
'relu'
)):
norm_activation
=
nn_ops
.
norm_activation_builder
(
activation
=
'relu'
)):
"""Initialize params to build RetinaNet head.
"""Initialize params to build RetinaNet head.
Args:
Args:
...
@@ -437,8 +440,8 @@ class RetinanetHead(object):
...
@@ -437,8 +440,8 @@ class RetinanetHead(object):
num_filters: `int` number of filters used in the head architecture.
num_filters: `int` number of filters used in the head architecture.
use_separable_conv: `bool` to indicate whether to use separable
use_separable_conv: `bool` to indicate whether to use separable
convoluation.
convoluation.
norm_activation: an operation that includes a normalization layer
norm_activation: an operation that includes a normalization layer
followed
followed
by an optional activation layer.
by an optional activation layer.
"""
"""
self
.
_min_level
=
min_level
self
.
_min_level
=
min_level
self
.
_max_level
=
max_level
self
.
_max_level
=
max_level
...
@@ -600,12 +603,8 @@ class RetinanetHead(object):
...
@@ -600,12 +603,8 @@ class RetinanetHead(object):
class
ShapemaskPriorHead
(
object
):
class
ShapemaskPriorHead
(
object
):
"""ShapeMask Prior head."""
"""ShapeMask Prior head."""
def
__init__
(
self
,
def
__init__
(
self
,
num_classes
,
num_downsample_channels
,
mask_crop_size
,
num_classes
,
use_category_for_mask
,
shape_prior_path
):
num_downsample_channels
,
mask_crop_size
,
use_category_for_mask
,
shape_prior_path
):
"""Initialize params to build RetinaNet head.
"""Initialize params to build RetinaNet head.
Args:
Args:
...
@@ -632,12 +631,12 @@ class ShapemaskPriorHead(object):
...
@@ -632,12 +631,12 @@ class ShapemaskPriorHead(object):
Args:
Args:
fpn_features: a dictionary of FPN features.
fpn_features: a dictionary of FPN features.
boxes: a float tensor of shape [batch_size, num_instances, 4]
boxes: a float tensor of shape [batch_size, num_instances, 4]
representing
representing
the tight gt boxes from dataloader/detection.
the tight gt boxes from dataloader/detection.
outer_boxes: a float tensor of shape [batch_size, num_instances, 4]
outer_boxes: a float tensor of shape [batch_size, num_instances, 4]
representing the loose gt boxes from dataloader/detection.
representing the loose gt boxes from dataloader/detection.
classes: a int Tensor of shape [batch_size, num_instances]
classes: a int Tensor of shape [batch_size, num_instances]
of instance
of instance
classes.
classes.
is_training: training mode or not.
is_training: training mode or not.
Returns:
Returns:
...
@@ -658,8 +657,9 @@ class ShapemaskPriorHead(object):
...
@@ -658,8 +657,9 @@ class ShapemaskPriorHead(object):
shape_priors
=
self
.
_get_priors
()
shape_priors
=
self
.
_get_priors
()
# Get uniform priors for each outer box.
# Get uniform priors for each outer box.
uniform_priors
=
tf
.
ones
([
batch_size
,
num_instances
,
self
.
_mask_crop_size
,
uniform_priors
=
tf
.
ones
([
self
.
_mask_crop_size
])
batch_size
,
num_instances
,
self
.
_mask_crop_size
,
self
.
_mask_crop_size
])
uniform_priors
=
spatial_transform_ops
.
crop_mask_in_target_box
(
uniform_priors
=
spatial_transform_ops
.
crop_mask_in_target_box
(
uniform_priors
,
boxes
,
outer_boxes
,
self
.
_mask_crop_size
)
uniform_priors
,
boxes
,
outer_boxes
,
self
.
_mask_crop_size
)
...
@@ -668,8 +668,9 @@ class ShapemaskPriorHead(object):
...
@@ -668,8 +668,9 @@ class ShapemaskPriorHead(object):
tf
.
cast
(
instance_features
,
tf
.
float32
),
uniform_priors
,
classes
)
tf
.
cast
(
instance_features
,
tf
.
float32
),
uniform_priors
,
classes
)
instance_priors
=
tf
.
gather
(
shape_priors
,
classes
)
instance_priors
=
tf
.
gather
(
shape_priors
,
classes
)
instance_priors
*=
tf
.
expand_dims
(
tf
.
expand_dims
(
instance_priors
*=
tf
.
expand_dims
(
tf
.
cast
(
prior_distribution
,
tf
.
float32
),
axis
=-
1
),
axis
=-
1
)
tf
.
expand_dims
(
tf
.
cast
(
prior_distribution
,
tf
.
float32
),
axis
=-
1
),
axis
=-
1
)
instance_priors
=
tf
.
reduce_sum
(
instance_priors
,
axis
=
2
)
instance_priors
=
tf
.
reduce_sum
(
instance_priors
,
axis
=
2
)
detection_priors
=
spatial_transform_ops
.
crop_mask_in_target_box
(
detection_priors
=
spatial_transform_ops
.
crop_mask_in_target_box
(
instance_priors
,
boxes
,
outer_boxes
,
self
.
_mask_crop_size
)
instance_priors
,
boxes
,
outer_boxes
,
self
.
_mask_crop_size
)
...
@@ -688,8 +689,10 @@ class ShapemaskPriorHead(object):
...
@@ -688,8 +689,10 @@ class ShapemaskPriorHead(object):
# If prior path does not exist, do not use priors, i.e., pirors equal to
# If prior path does not exist, do not use priors, i.e., pirors equal to
# uniform empty 32x32 patch.
# uniform empty 32x32 patch.
self
.
_num_clusters
=
1
self
.
_num_clusters
=
1
priors
=
tf
.
zeros
([
self
.
_mask_num_classes
,
self
.
_num_clusters
,
priors
=
tf
.
zeros
([
self
.
_mask_crop_size
,
self
.
_mask_crop_size
])
self
.
_mask_num_classes
,
self
.
_num_clusters
,
self
.
_mask_crop_size
,
self
.
_mask_crop_size
])
return
priors
return
priors
def
_classify_shape_priors
(
self
,
features
,
uniform_priors
,
classes
):
def
_classify_shape_priors
(
self
,
features
,
uniform_priors
,
classes
):
...
@@ -699,12 +702,12 @@ class ShapemaskPriorHead(object):
...
@@ -699,12 +702,12 @@ class ShapemaskPriorHead(object):
category.
category.
Args:
Args:
features: A float Tensor of shape [batch_size, num_instances,
features: A float Tensor of shape [batch_size, num_instances,
mask_size,
mask_size,
mask_size,
num_channels].
mask_size, num_channels].
uniform_priors: A float Tensor of shape [batch_size, num_instances,
uniform_priors: A float Tensor of shape [batch_size, num_instances,
mask_size, mask_size] representing the uniform detection priors.
mask_size, mask_size] representing the uniform detection priors.
classes: A int Tensor of shape [batch_size, num_instances]
classes: A int Tensor of shape [batch_size, num_instances]
of detection
of detection
class ids.
class ids.
Returns:
Returns:
prior_distribution: A float Tensor of shape
prior_distribution: A float Tensor of shape
...
@@ -719,10 +722,11 @@ class ShapemaskPriorHead(object):
...
@@ -719,10 +722,11 @@ class ShapemaskPriorHead(object):
features
=
tf
.
reduce_mean
(
features
,
axis
=
(
2
,
3
))
features
=
tf
.
reduce_mean
(
features
,
axis
=
(
2
,
3
))
logits
=
tf
.
keras
.
layers
.
Dense
(
logits
=
tf
.
keras
.
layers
.
Dense
(
self
.
_mask_num_classes
*
self
.
_num_clusters
,
self
.
_mask_num_classes
*
self
.
_num_clusters
,
kernel_initializer
=
tf
.
random_normal_initializer
(
stddev
=
0.01
))(
features
)
kernel_initializer
=
tf
.
random_normal_initializer
(
stddev
=
0.01
))(
logits
=
tf
.
reshape
(
logits
,
features
)
[
batch_size
,
num_instances
,
logits
=
tf
.
reshape
(
self
.
_mask_num_classes
,
self
.
_num_clusters
])
logits
,
[
batch_size
,
num_instances
,
self
.
_mask_num_classes
,
self
.
_num_clusters
])
if
self
.
_use_category_for_mask
:
if
self
.
_use_category_for_mask
:
logits
=
tf
.
gather
(
logits
,
tf
.
expand_dims
(
classes
,
axis
=-
1
),
batch_dims
=
2
)
logits
=
tf
.
gather
(
logits
,
tf
.
expand_dims
(
classes
,
axis
=-
1
),
batch_dims
=
2
)
logits
=
tf
.
squeeze
(
logits
,
axis
=
2
)
logits
=
tf
.
squeeze
(
logits
,
axis
=
2
)
...
@@ -752,8 +756,8 @@ class ShapemaskCoarsemaskHead(object):
...
@@ -752,8 +756,8 @@ class ShapemaskCoarsemaskHead(object):
use_category_for_mask: use class information in mask branch.
use_category_for_mask: use class information in mask branch.
num_convs: `int` number of stacked convolution before the last prediction
num_convs: `int` number of stacked convolution before the last prediction
layer.
layer.
norm_activation: an operation that includes a normalization layer
norm_activation: an operation that includes a normalization layer
followed
followed
by an optional activation layer.
by an optional activation layer.
"""
"""
self
.
_mask_num_classes
=
num_classes
if
use_category_for_mask
else
1
self
.
_mask_num_classes
=
num_classes
if
use_category_for_mask
else
1
self
.
_use_category_for_mask
=
use_category_for_mask
self
.
_use_category_for_mask
=
use_category_for_mask
...
@@ -769,13 +773,15 @@ class ShapemaskCoarsemaskHead(object):
...
@@ -769,13 +773,15 @@ class ShapemaskCoarsemaskHead(object):
self
.
_class_norm_activation
=
[]
self
.
_class_norm_activation
=
[]
for
i
in
range
(
self
.
_num_convs
):
for
i
in
range
(
self
.
_num_convs
):
self
.
_class_conv
.
append
(
tf
.
keras
.
layers
.
Conv2D
(
self
.
_class_conv
.
append
(
self
.
_num_downsample_channels
,
tf
.
keras
.
layers
.
Conv2D
(
kernel_size
=
(
3
,
3
),
self
.
_num_downsample_channels
,
bias_initializer
=
tf
.
zeros_initializer
(),
kernel_size
=
(
3
,
3
),
kernel_initializer
=
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
0.01
),
bias_initializer
=
tf
.
zeros_initializer
(),
padding
=
'same'
,
kernel_initializer
=
tf
.
keras
.
initializers
.
RandomNormal
(
name
=
'coarse-mask-class-%d'
%
i
))
stddev
=
0.01
),
padding
=
'same'
,
name
=
'coarse-mask-class-%d'
%
i
))
self
.
_class_norm_activation
.
append
(
self
.
_class_norm_activation
.
append
(
norm_activation
(
name
=
'coarse-mask-class-%d-bn'
%
i
))
norm_activation
(
name
=
'coarse-mask-class-%d-bn'
%
i
))
...
@@ -800,10 +806,10 @@ class ShapemaskCoarsemaskHead(object):
...
@@ -800,10 +806,10 @@ class ShapemaskCoarsemaskHead(object):
mask_crop_size, mask_crop_size, num_downsample_channels]. This is the
mask_crop_size, mask_crop_size, num_downsample_channels]. This is the
instance feature crop.
instance feature crop.
detection_priors: a float Tensor of shape [batch_size, num_instances,
detection_priors: a float Tensor of shape [batch_size, num_instances,
mask_crop_size, mask_crop_size, 1]. This is the detection prior for
mask_crop_size, mask_crop_size, 1]. This is the detection prior for
the
the
instance.
instance.
classes: a int Tensor of shape [batch_size, num_instances]
classes: a int Tensor of shape [batch_size, num_instances]
of instance
of instance
classes.
classes.
is_training: a bool indicating whether in training mode.
is_training: a bool indicating whether in training mode.
Returns:
Returns:
...
@@ -820,8 +826,8 @@ class ShapemaskCoarsemaskHead(object):
...
@@ -820,8 +826,8 @@ class ShapemaskCoarsemaskHead(object):
# Gather the logits with right input class.
# Gather the logits with right input class.
if
self
.
_use_category_for_mask
:
if
self
.
_use_category_for_mask
:
mask_logits
=
tf
.
transpose
(
mask_logits
,
[
0
,
1
,
4
,
2
,
3
])
mask_logits
=
tf
.
transpose
(
mask_logits
,
[
0
,
1
,
4
,
2
,
3
])
mask_logits
=
tf
.
gather
(
mask_logits
,
tf
.
expand_dims
(
classes
,
-
1
),
mask_logits
=
tf
.
gather
(
batch_dims
=
2
)
mask_logits
,
tf
.
expand_dims
(
classes
,
-
1
),
batch_dims
=
2
)
mask_logits
=
tf
.
squeeze
(
mask_logits
,
axis
=
2
)
mask_logits
=
tf
.
squeeze
(
mask_logits
,
axis
=
2
)
else
:
else
:
mask_logits
=
mask_logits
[...,
0
]
mask_logits
=
mask_logits
[...,
0
]
...
@@ -841,16 +847,17 @@ class ShapemaskCoarsemaskHead(object):
...
@@ -841,16 +847,17 @@ class ShapemaskCoarsemaskHead(object):
"""
"""
(
batch_size
,
num_instances
,
height
,
width
,
(
batch_size
,
num_instances
,
height
,
width
,
num_channels
)
=
features
.
get_shape
().
as_list
()
num_channels
)
=
features
.
get_shape
().
as_list
()
features
=
tf
.
reshape
(
features
,
[
batch_size
*
num_instances
,
height
,
width
,
features
=
tf
.
reshape
(
num_channels
])
features
,
[
batch_size
*
num_instances
,
height
,
width
,
num_channels
])
for
i
in
range
(
self
.
_num_convs
):
for
i
in
range
(
self
.
_num_convs
):
features
=
self
.
_class_conv
[
i
](
features
)
features
=
self
.
_class_conv
[
i
](
features
)
features
=
self
.
_class_norm_activation
[
i
](
features
,
features
=
self
.
_class_norm_activation
[
i
](
is_training
=
is_training
)
features
,
is_training
=
is_training
)
mask_logits
=
self
.
_class_predict
(
features
)
mask_logits
=
self
.
_class_predict
(
features
)
mask_logits
=
tf
.
reshape
(
mask_logits
,
[
batch_size
,
num_instances
,
height
,
mask_logits
=
tf
.
reshape
(
width
,
self
.
_mask_num_classes
])
mask_logits
,
[
batch_size
,
num_instances
,
height
,
width
,
self
.
_mask_num_classes
])
return
mask_logits
return
mask_logits
...
@@ -907,8 +914,8 @@ class ShapemaskFinemaskHead(object):
...
@@ -907,8 +914,8 @@ class ShapemaskFinemaskHead(object):
activation
=
None
,
activation
=
None
,
padding
=
'same'
,
padding
=
'same'
,
name
=
'fine-mask-class-%d'
%
i
))
name
=
'fine-mask-class-%d'
%
i
))
self
.
_fine_class_bn
.
append
(
norm_activation
(
self
.
_fine_class_bn
.
append
(
name
=
'fine-mask-class-%d-bn'
%
i
))
norm_activation
(
name
=
'fine-mask-class-%d-bn'
%
i
))
self
.
_class_predict_conv
=
tf
.
keras
.
layers
.
Conv2D
(
self
.
_class_predict_conv
=
tf
.
keras
.
layers
.
Conv2D
(
self
.
_mask_num_classes
,
self
.
_mask_num_classes
,
...
@@ -926,14 +933,13 @@ class ShapemaskFinemaskHead(object):
...
@@ -926,14 +933,13 @@ class ShapemaskFinemaskHead(object):
https://arxiv.org/pdf/1904.03239.pdf
https://arxiv.org/pdf/1904.03239.pdf
Args:
Args:
features: a float Tensor of shape
features: a float Tensor of shape [batch_size, num_instances,
[batch_size, num_instances, mask_crop_size, mask_crop_size,
mask_crop_size, mask_crop_size, num_downsample_channels]. This is the
num_downsample_channels]. This is the instance feature crop.
instance feature crop.
mask_logits: a float Tensor of shape
mask_logits: a float Tensor of shape [batch_size, num_instances,
[batch_size, num_instances, mask_crop_size, mask_crop_size] indicating
mask_crop_size, mask_crop_size] indicating predicted mask logits.
predicted mask logits.
classes: a int Tensor of shape [batch_size, num_instances] of instance
classes: a int Tensor of shape [batch_size, num_instances]
classes.
of instance classes.
is_training: a bool indicating whether in training mode.
is_training: a bool indicating whether in training mode.
Returns:
Returns:
...
@@ -960,8 +966,8 @@ class ShapemaskFinemaskHead(object):
...
@@ -960,8 +966,8 @@ class ShapemaskFinemaskHead(object):
mask_logits
=
self
.
decoder_net
(
features
,
is_training
)
mask_logits
=
self
.
decoder_net
(
features
,
is_training
)
if
self
.
_use_category_for_mask
:
if
self
.
_use_category_for_mask
:
mask_logits
=
tf
.
transpose
(
mask_logits
,
[
0
,
1
,
4
,
2
,
3
])
mask_logits
=
tf
.
transpose
(
mask_logits
,
[
0
,
1
,
4
,
2
,
3
])
mask_logits
=
tf
.
gather
(
mask_logits
,
mask_logits
=
tf
.
gather
(
tf
.
expand_dims
(
classes
,
-
1
),
batch_dims
=
2
)
mask_logits
,
tf
.
expand_dims
(
classes
,
-
1
),
batch_dims
=
2
)
mask_logits
=
tf
.
squeeze
(
mask_logits
,
axis
=
2
)
mask_logits
=
tf
.
squeeze
(
mask_logits
,
axis
=
2
)
else
:
else
:
mask_logits
=
mask_logits
[...,
0
]
mask_logits
=
mask_logits
[...,
0
]
...
@@ -982,8 +988,8 @@ class ShapemaskFinemaskHead(object):
...
@@ -982,8 +988,8 @@ class ShapemaskFinemaskHead(object):
"""
"""
(
batch_size
,
num_instances
,
height
,
width
,
(
batch_size
,
num_instances
,
height
,
width
,
num_channels
)
=
features
.
get_shape
().
as_list
()
num_channels
)
=
features
.
get_shape
().
as_list
()
features
=
tf
.
reshape
(
features
,
[
batch_size
*
num_instances
,
height
,
width
,
features
=
tf
.
reshape
(
num_channels
])
features
,
[
batch_size
*
num_instances
,
height
,
width
,
num_channels
])
for
i
in
range
(
self
.
_num_convs
):
for
i
in
range
(
self
.
_num_convs
):
features
=
self
.
_fine_class_conv
[
i
](
features
)
features
=
self
.
_fine_class_conv
[
i
](
features
)
features
=
self
.
_fine_class_bn
[
i
](
features
,
is_training
=
is_training
)
features
=
self
.
_fine_class_bn
[
i
](
features
,
is_training
=
is_training
)
...
@@ -994,9 +1000,8 @@ class ShapemaskFinemaskHead(object):
...
@@ -994,9 +1000,8 @@ class ShapemaskFinemaskHead(object):
# Predict per-class instance masks.
# Predict per-class instance masks.
mask_logits
=
self
.
_class_predict_conv
(
features
)
mask_logits
=
self
.
_class_predict_conv
(
features
)
mask_logits
=
tf
.
reshape
(
mask_logits
,
mask_logits
=
tf
.
reshape
(
mask_logits
,
[
[
batch_size
,
num_instances
,
batch_size
,
num_instances
,
height
*
self
.
up_sample_factor
,
height
*
self
.
up_sample_factor
,
width
*
self
.
up_sample_factor
,
self
.
_mask_num_classes
width
*
self
.
up_sample_factor
,
])
self
.
_mask_num_classes
])
return
mask_logits
return
mask_logits
official/vision/detection/modeling/architecture/nn_ops.py
View file @
88253ce5
...
@@ -19,6 +19,7 @@ from __future__ import division
...
@@ -19,6 +19,7 @@ from __future__ import division
from
__future__
import
print_function
from
__future__
import
print_function
import
functools
import
functools
import
tensorflow
as
tf
import
tensorflow
as
tf
...
@@ -43,7 +44,7 @@ class NormActivation(tf.keras.layers.Layer):
...
@@ -43,7 +44,7 @@ class NormActivation(tf.keras.layers.Layer):
GraphKeys.TRAINABLE_VARIABLES. If False, freeze batch normalization
GraphKeys.TRAINABLE_VARIABLES. If False, freeze batch normalization
layer.
layer.
init_zero: `bool` if True, initializes scale parameter of batch
init_zero: `bool` if True, initializes scale parameter of batch
normalization with 0. If False, initialize it with 1.
normalization with 0. If False, initialize it with 1.
fused: `bool` fused option in batch normalziation.
fused: `bool` fused option in batch normalziation.
use_actiation: `bool`, whether to add the optional activation layer after
use_actiation: `bool`, whether to add the optional activation layer after
the batch normalization layer.
the batch normalization layer.
...
...
official/vision/detection/modeling/architecture/resnet.py
View file @
88253ce5
...
@@ -28,22 +28,23 @@ import tensorflow as tf
...
@@ -28,22 +28,23 @@ import tensorflow as tf
from
official.vision.detection.modeling.architecture
import
keras_utils
from
official.vision.detection.modeling.architecture
import
keras_utils
from
official.vision.detection.modeling.architecture
import
nn_ops
from
official.vision.detection.modeling.architecture
import
nn_ops
# TODO(b/140112644): Refactor the code with Keras style, i.e. build and call.
# TODO(b/140112644): Refactor the code with Keras style, i.e. build and call.
class
Resnet
(
object
):
class
Resnet
(
object
):
"""Class to build ResNet family model."""
"""Class to build ResNet family model."""
def
__init__
(
self
,
def
__init__
(
resnet_depth
,
self
,
activation
=
'relu'
,
resnet_depth
,
norm_activation
=
nn_ops
.
norm_activation_builder
(
activation
=
'relu'
,
activation
=
'relu'
),
norm_activation
=
nn_ops
.
norm_activation_builder
(
activation
=
'relu'
),
data_format
=
'channels_last'
):
data_format
=
'channels_last'
):
"""ResNet initialization function.
"""ResNet initialization function.
Args:
Args:
resnet_depth: `int` depth of ResNet backbone model.
resnet_depth: `int` depth of ResNet backbone model.
norm_activation: an operation that includes a normalization layer
norm_activation: an operation that includes a normalization layer
followed
followed
by an optional activation layer.
by an optional activation layer.
data_format: `str` either "channels_first" for `[batch, channels, height,
data_format: `str` either "channels_first" for `[batch, channels, height,
width]` or "channels_last for `[batch, height, width, channels]`.
width]` or "channels_last for `[batch, height, width, channels]`.
"""
"""
...
@@ -58,24 +59,45 @@ class Resnet(object):
...
@@ -58,24 +59,45 @@ class Resnet(object):
self
.
_data_format
=
data_format
self
.
_data_format
=
data_format
model_params
=
{
model_params
=
{
10
:
{
'block'
:
self
.
residual_block
,
'layers'
:
[
1
,
1
,
1
,
1
]},
10
:
{
18
:
{
'block'
:
self
.
residual_block
,
'layers'
:
[
2
,
2
,
2
,
2
]},
'block'
:
self
.
residual_block
,
34
:
{
'block'
:
self
.
residual_block
,
'layers'
:
[
3
,
4
,
6
,
3
]},
'layers'
:
[
1
,
1
,
1
,
1
]
50
:
{
'block'
:
self
.
bottleneck_block
,
'layers'
:
[
3
,
4
,
6
,
3
]},
},
101
:
{
'block'
:
self
.
bottleneck_block
,
'layers'
:
[
3
,
4
,
23
,
3
]},
18
:
{
152
:
{
'block'
:
self
.
bottleneck_block
,
'layers'
:
[
3
,
8
,
36
,
3
]},
'block'
:
self
.
residual_block
,
200
:
{
'block'
:
self
.
bottleneck_block
,
'layers'
:
[
3
,
24
,
36
,
3
]}
'layers'
:
[
2
,
2
,
2
,
2
]
},
34
:
{
'block'
:
self
.
residual_block
,
'layers'
:
[
3
,
4
,
6
,
3
]
},
50
:
{
'block'
:
self
.
bottleneck_block
,
'layers'
:
[
3
,
4
,
6
,
3
]
},
101
:
{
'block'
:
self
.
bottleneck_block
,
'layers'
:
[
3
,
4
,
23
,
3
]
},
152
:
{
'block'
:
self
.
bottleneck_block
,
'layers'
:
[
3
,
8
,
36
,
3
]
},
200
:
{
'block'
:
self
.
bottleneck_block
,
'layers'
:
[
3
,
24
,
36
,
3
]
}
}
}
if
resnet_depth
not
in
model_params
:
if
resnet_depth
not
in
model_params
:
valid_resnet_depths
=
', '
.
join
(
valid_resnet_depths
=
', '
.
join
(
[
str
(
depth
)
for
depth
in
sorted
(
model_params
.
keys
())])
[
str
(
depth
)
for
depth
in
sorted
(
model_params
.
keys
())])
raise
ValueError
(
raise
ValueError
(
'The resnet_depth should be in [%s]. Not a valid resnet_depth:'
%
(
'The resnet_depth should be in [%s]. Not a valid resnet_depth:'
%
valid_resnet_depths
),
self
.
_resnet_depth
)
(
valid_resnet_depths
),
self
.
_resnet_depth
)
params
=
model_params
[
resnet_depth
]
params
=
model_params
[
resnet_depth
]
self
.
_resnet_fn
=
self
.
resnet_v1_generator
(
self
.
_resnet_fn
=
self
.
resnet_v1_generator
(
params
[
'block'
],
params
[
'block'
],
params
[
'layers'
])
params
[
'layers'
])
def
__call__
(
self
,
inputs
,
is_training
=
None
):
def
__call__
(
self
,
inputs
,
is_training
=
None
):
"""Returns the ResNet model for a given size and number of output classes.
"""Returns the ResNet model for a given size and number of output classes.
...
@@ -98,10 +120,10 @@ class Resnet(object):
...
@@ -98,10 +120,10 @@ class Resnet(object):
"""Pads the input along the spatial dimensions independently of input size.
"""Pads the input along the spatial dimensions independently of input size.
Args:
Args:
inputs: `Tensor` of size `[batch, channels, height, width]` or
inputs: `Tensor` of size `[batch, channels, height, width]` or
`[batch,
`[batch,
height, width, channels]` depending on `data_format`.
height, width, channels]` depending on `data_format`.
kernel_size: `int` kernel size to be used for `conv2d` or max_pool2d`
kernel_size: `int` kernel size to be used for `conv2d` or max_pool2d`
operations. Should be a positive integer.
operations. Should be a positive integer.
Returns:
Returns:
A padded `Tensor` of the same `data_format` with size either intact
A padded `Tensor` of the same `data_format` with size either intact
...
@@ -160,14 +182,15 @@ class Resnet(object):
...
@@ -160,14 +182,15 @@ class Resnet(object):
Args:
Args:
inputs: `Tensor` of size `[batch, channels, height, width]`.
inputs: `Tensor` of size `[batch, channels, height, width]`.
filters: `int` number of filters for the first two convolutions. Note that
filters: `int` number of filters for the first two convolutions. Note that
the third and final convolution will use 4 times as many filters.
the third and final convolution will use 4 times as many filters.
strides: `int` block stride. If greater than 1, this block will ultimately
strides: `int` block stride. If greater than 1, this block will ultimately
downsample the input.
downsample the input.
use_projection: `bool` for whether this block should use a projection
use_projection: `bool` for whether this block should use a projection
shortcut (versus the default identity shortcut). This is usually
shortcut (versus the default identity shortcut). This is usually
`True`
`True`
for the first block of a block group, which may change the
for the first block of a block group, which may change the
number of
number of
filters and the resolution.
filters and the resolution.
is_training: `bool` if True, the model is in training mode.
is_training: `bool` if True, the model is in training mode.
Returns:
Returns:
The output `Tensor` of the block.
The output `Tensor` of the block.
"""
"""
...
@@ -185,8 +208,9 @@ class Resnet(object):
...
@@ -185,8 +208,9 @@ class Resnet(object):
inputs
=
self
.
conv2d_fixed_padding
(
inputs
=
self
.
conv2d_fixed_padding
(
inputs
=
inputs
,
filters
=
filters
,
kernel_size
=
3
,
strides
=
1
)
inputs
=
inputs
,
filters
=
filters
,
kernel_size
=
3
,
strides
=
1
)
inputs
=
self
.
_norm_activation
(
use_activation
=
False
,
init_zero
=
True
)(
inputs
=
self
.
_norm_activation
(
inputs
,
is_training
=
is_training
)
use_activation
=
False
,
init_zero
=
True
)(
inputs
,
is_training
=
is_training
)
return
self
.
_activation_op
(
inputs
+
shortcut
)
return
self
.
_activation_op
(
inputs
+
shortcut
)
...
@@ -201,13 +225,13 @@ class Resnet(object):
...
@@ -201,13 +225,13 @@ class Resnet(object):
Args:
Args:
inputs: `Tensor` of size `[batch, channels, height, width]`.
inputs: `Tensor` of size `[batch, channels, height, width]`.
filters: `int` number of filters for the first two convolutions. Note that
filters: `int` number of filters for the first two convolutions. Note that
the third and final convolution will use 4 times as many filters.
the third and final convolution will use 4 times as many filters.
strides: `int` block stride. If greater than 1, this block will ultimately
strides: `int` block stride. If greater than 1, this block will ultimately
downsample the input.
downsample the input.
use_projection: `bool` for whether this block should use a projection
use_projection: `bool` for whether this block should use a projection
shortcut (versus the default identity shortcut). This is usually
shortcut (versus the default identity shortcut). This is usually
`True`
`True`
for the first block of a block group, which may change the
for the first block of a block group, which may change the
number of
number of
filters and the resolution.
filters and the resolution.
is_training: `bool` if True, the model is in training mode.
is_training: `bool` if True, the model is in training mode.
Returns:
Returns:
...
@@ -233,8 +257,9 @@ class Resnet(object):
...
@@ -233,8 +257,9 @@ class Resnet(object):
inputs
=
self
.
conv2d_fixed_padding
(
inputs
=
self
.
conv2d_fixed_padding
(
inputs
=
inputs
,
filters
=
4
*
filters
,
kernel_size
=
1
,
strides
=
1
)
inputs
=
inputs
,
filters
=
4
*
filters
,
kernel_size
=
1
,
strides
=
1
)
inputs
=
self
.
_norm_activation
(
use_activation
=
False
,
init_zero
=
True
)(
inputs
=
self
.
_norm_activation
(
inputs
,
is_training
=
is_training
)
use_activation
=
False
,
init_zero
=
True
)(
inputs
,
is_training
=
is_training
)
return
self
.
_activation_op
(
inputs
+
shortcut
)
return
self
.
_activation_op
(
inputs
+
shortcut
)
...
@@ -248,7 +273,7 @@ class Resnet(object):
...
@@ -248,7 +273,7 @@ class Resnet(object):
block_fn: `function` for the block to use within the model
block_fn: `function` for the block to use within the model
blocks: `int` number of blocks contained in the layer.
blocks: `int` number of blocks contained in the layer.
strides: `int` stride to use for the first convolution of the layer. If
strides: `int` stride to use for the first convolution of the layer. If
greater than 1, this layer will downsample the input.
greater than 1, this layer will downsample the input.
name: `str`name for the Tensor output of the block layer.
name: `str`name for the Tensor output of the block layer.
is_training: `bool` if True, the model is in training mode.
is_training: `bool` if True, the model is in training mode.
...
@@ -256,8 +281,8 @@ class Resnet(object):
...
@@ -256,8 +281,8 @@ class Resnet(object):
The output `Tensor` of the block layer.
The output `Tensor` of the block layer.
"""
"""
# Only the first block per block_group uses projection shortcut and strides.
# Only the first block per block_group uses projection shortcut and strides.
inputs
=
block_fn
(
inputs
,
filters
,
strides
,
use_projection
=
True
,
inputs
=
block_fn
(
is_training
=
is_training
)
inputs
,
filters
,
strides
,
use_projection
=
True
,
is_training
=
is_training
)
for
_
in
range
(
1
,
blocks
):
for
_
in
range
(
1
,
blocks
):
inputs
=
block_fn
(
inputs
,
filters
,
1
,
is_training
=
is_training
)
inputs
=
block_fn
(
inputs
,
filters
,
1
,
is_training
=
is_training
)
...
@@ -269,7 +294,7 @@ class Resnet(object):
...
@@ -269,7 +294,7 @@ class Resnet(object):
Args:
Args:
block_fn: `function` for the block to use within the model. Either
block_fn: `function` for the block to use within the model. Either
`residual_block` or `bottleneck_block`.
`residual_block` or `bottleneck_block`.
layers: list of 4 `int`s denoting the number of blocks to include in each
layers: list of 4 `int`s denoting the number of blocks to include in each
of the 4 block groups. Each group consists of blocks that take inputs of
of the 4 block groups. Each group consists of blocks that take inputs of
the same resolution.
the same resolution.
...
@@ -293,17 +318,37 @@ class Resnet(object):
...
@@ -293,17 +318,37 @@ class Resnet(object):
inputs
=
tf
.
identity
(
inputs
,
'initial_max_pool'
)
inputs
=
tf
.
identity
(
inputs
,
'initial_max_pool'
)
c2
=
self
.
block_group
(
c2
=
self
.
block_group
(
inputs
=
inputs
,
filters
=
64
,
block_fn
=
block_fn
,
blocks
=
layers
[
0
],
inputs
=
inputs
,
strides
=
1
,
name
=
'block_group1'
,
is_training
=
is_training
)
filters
=
64
,
block_fn
=
block_fn
,
blocks
=
layers
[
0
],
strides
=
1
,
name
=
'block_group1'
,
is_training
=
is_training
)
c3
=
self
.
block_group
(
c3
=
self
.
block_group
(
inputs
=
c2
,
filters
=
128
,
block_fn
=
block_fn
,
blocks
=
layers
[
1
],
inputs
=
c2
,
strides
=
2
,
name
=
'block_group2'
,
is_training
=
is_training
)
filters
=
128
,
block_fn
=
block_fn
,
blocks
=
layers
[
1
],
strides
=
2
,
name
=
'block_group2'
,
is_training
=
is_training
)
c4
=
self
.
block_group
(
c4
=
self
.
block_group
(
inputs
=
c3
,
filters
=
256
,
block_fn
=
block_fn
,
blocks
=
layers
[
2
],
inputs
=
c3
,
strides
=
2
,
name
=
'block_group3'
,
is_training
=
is_training
)
filters
=
256
,
block_fn
=
block_fn
,
blocks
=
layers
[
2
],
strides
=
2
,
name
=
'block_group3'
,
is_training
=
is_training
)
c5
=
self
.
block_group
(
c5
=
self
.
block_group
(
inputs
=
c4
,
filters
=
512
,
block_fn
=
block_fn
,
blocks
=
layers
[
3
],
inputs
=
c4
,
strides
=
2
,
name
=
'block_group4'
,
is_training
=
is_training
)
filters
=
512
,
block_fn
=
block_fn
,
blocks
=
layers
[
3
],
strides
=
2
,
name
=
'block_group4'
,
is_training
=
is_training
)
return
{
2
:
c2
,
3
:
c3
,
4
:
c4
,
5
:
c5
}
return
{
2
:
c2
,
3
:
c3
,
4
:
c4
,
5
:
c5
}
return
model
return
model
official/vision/detection/modeling/base_model.py
View file @
88253ce5
...
@@ -21,6 +21,7 @@ from __future__ import print_function
...
@@ -21,6 +21,7 @@ from __future__ import print_function
import
abc
import
abc
import
functools
import
functools
import
re
import
re
import
tensorflow
as
tf
import
tensorflow
as
tf
from
official.vision.detection.modeling
import
checkpoint_utils
from
official.vision.detection.modeling
import
checkpoint_utils
from
official.vision.detection.modeling
import
learning_rates
from
official.vision.detection.modeling
import
learning_rates
...
@@ -42,8 +43,7 @@ def _make_filter_trainable_variables_fn(frozen_variable_prefix):
...
@@ -42,8 +43,7 @@ def _make_filter_trainable_variables_fn(frozen_variable_prefix):
# frozen_variable_prefix: a regex string specifing the prefix pattern of
# frozen_variable_prefix: a regex string specifing the prefix pattern of
# the frozen variables' names.
# the frozen variables' names.
filtered_variables
=
[
filtered_variables
=
[
v
for
v
in
variables
v
for
v
in
variables
if
not
frozen_variable_prefix
or
if
not
frozen_variable_prefix
or
not
re
.
match
(
frozen_variable_prefix
,
v
.
name
)
not
re
.
match
(
frozen_variable_prefix
,
v
.
name
)
]
]
return
filtered_variables
return
filtered_variables
...
@@ -115,8 +115,8 @@ class Model(object):
...
@@ -115,8 +115,8 @@ class Model(object):
def
weight_decay_loss
(
self
,
trainable_variables
):
def
weight_decay_loss
(
self
,
trainable_variables
):
reg_variables
=
[
reg_variables
=
[
v
for
v
in
trainable_variables
v
for
v
in
trainable_variables
if
self
.
_regularization_var_regex
is
None
if
self
.
_regularization_var_regex
is
None
or
or
re
.
match
(
self
.
_regularization_var_regex
,
v
.
name
)
re
.
match
(
self
.
_regularization_var_regex
,
v
.
name
)
]
]
return
self
.
_l2_weight_decay
*
tf
.
add_n
(
return
self
.
_l2_weight_decay
*
tf
.
add_n
(
...
...
official/vision/detection/modeling/checkpoint_utils.py
View file @
88253ce5
...
@@ -12,7 +12,9 @@
...
@@ -12,7 +12,9 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
# ==============================================================================
# ==============================================================================
"""Util functions for loading checkpoints. Especially for loading Tensorflow 1.x
"""Util functions for loading checkpoints.
Especially for loading Tensorflow 1.x
checkpoint to Tensorflow 2.x (keras) model.
checkpoint to Tensorflow 2.x (keras) model.
"""
"""
...
@@ -20,18 +22,19 @@ from __future__ import absolute_import
...
@@ -20,18 +22,19 @@ from __future__ import absolute_import
from
__future__
import
division
from
__future__
import
division
from
__future__
import
print_function
from
__future__
import
print_function
import
re
import
re
from
absl
import
logging
from
absl
import
logging
import
tensorflow
as
tf
import
tensorflow
as
tf
def
_build_assignment_map
(
keras_model
,
def
_build_assignment_map
(
keras_model
,
prefix
=
''
,
prefix
=
''
,
skip_variables_regex
=
None
,
skip_variables_regex
=
None
,
var_to_shape_map
=
None
):
var_to_shape_map
=
None
):
"""Compute an assignment mapping for loading older checkpoints into a Keras
"""Compute an assignment mapping for loading older checkpoints into a Keras
model. Variable names are remapped from the original TPUEstimator model to
model. Variable names are remapped from the original TPUEstimator model to
the new Keras name.
the new Keras name.
...
@@ -48,12 +51,12 @@ def _build_assignment_map(keras_model,
...
@@ -48,12 +51,12 @@ def _build_assignment_map(keras_model,
"""
"""
assignment_map
=
{}
assignment_map
=
{}
checkpoint_names
=
None
checkpoint_names
=
None
if
var_to_shape_map
:
if
var_to_shape_map
:
checkpoint_names
=
list
(
filter
(
checkpoint_names
=
list
(
lambda
x
:
not
x
.
endswith
(
'Momentum'
)
and
not
x
.
endswith
(
filter
(
'global_step'
),
var_to_shape_map
.
keys
()))
lambda
x
:
not
x
.
endswith
(
'Momentum'
)
and
not
x
.
endswith
(
'global_step'
),
var_to_shape_map
.
keys
()))
for
var
in
keras_model
.
variables
:
for
var
in
keras_model
.
variables
:
var_name
=
var
.
name
var_name
=
var
.
name
...
@@ -95,14 +98,15 @@ def _get_checkpoint_map(checkpoint_path):
...
@@ -95,14 +98,15 @@ def _get_checkpoint_map(checkpoint_path):
def
make_restore_checkpoint_fn
(
checkpoint_path
,
prefix
=
''
,
skip_regex
=
None
):
def
make_restore_checkpoint_fn
(
checkpoint_path
,
prefix
=
''
,
skip_regex
=
None
):
"""Returns scaffold function to restore parameters from v1 checkpoint.
"""Returns scaffold function to restore parameters from v1 checkpoint.
Args:
Args:
checkpoint_path: path of the checkpoint folder or file.
checkpoint_path: path of the checkpoint folder or file.
Example 1: '/path/to/model_dir/'
Example 1: '/path/to/model_dir/'
Example 2: '/path/to/model.ckpt-22500'
Example 2: '/path/to/model.ckpt-22500'
prefix: prefix in the variable name to be remove for alignment with names in
prefix: prefix in the variable name to be remove for alignment with names in
the checkpoint.
the checkpoint.
skip_regex: regular expression to math the names of variables that
skip_regex: regular expression to math the names of variables that
do not
do not
need to be assign.
need to be assign.
Returns:
Returns:
Callable[tf.kears.Model] -> void. Fn to load v1 checkpoint to keras model.
Callable[tf.kears.Model] -> void. Fn to load v1 checkpoint to keras model.
...
@@ -125,7 +129,6 @@ def make_restore_checkpoint_fn(checkpoint_path, prefix='', skip_regex=None):
...
@@ -125,7 +129,6 @@ def make_restore_checkpoint_fn(checkpoint_path, prefix='', skip_regex=None):
var_to_shape_map
=
var_to_shape_map
)
var_to_shape_map
=
var_to_shape_map
)
if
not
vars_to_load
:
if
not
vars_to_load
:
raise
ValueError
(
'Variables to load is empty.'
)
raise
ValueError
(
'Variables to load is empty.'
)
tf
.
compat
.
v1
.
train
.
init_from_checkpoint
(
checkpoint_path
,
tf
.
compat
.
v1
.
train
.
init_from_checkpoint
(
checkpoint_path
,
vars_to_load
)
vars_to_load
)
return
_restore_checkpoint_fn
return
_restore_checkpoint_fn
official/vision/detection/modeling/learning_rates.py
View file @
88253ce5
...
@@ -25,7 +25,8 @@ import tensorflow as tf
...
@@ -25,7 +25,8 @@ import tensorflow as tf
from
official.modeling.hyperparams
import
params_dict
from
official.modeling.hyperparams
import
params_dict
class
StepLearningRateWithLinearWarmup
(
tf
.
keras
.
optimizers
.
schedules
.
LearningRateSchedule
):
class
StepLearningRateWithLinearWarmup
(
tf
.
keras
.
optimizers
.
schedules
.
LearningRateSchedule
):
"""Class to generate learning rate tensor."""
"""Class to generate learning rate tensor."""
def
__init__
(
self
,
total_steps
,
params
):
def
__init__
(
self
,
total_steps
,
params
):
...
@@ -57,7 +58,8 @@ class StepLearningRateWithLinearWarmup(tf.keras.optimizers.schedules.LearningRat
...
@@ -57,7 +58,8 @@ class StepLearningRateWithLinearWarmup(tf.keras.optimizers.schedules.LearningRat
return
{
'_params'
:
self
.
_params
.
as_dict
()}
return
{
'_params'
:
self
.
_params
.
as_dict
()}
class
CosineLearningRateWithLinearWarmup
(
tf
.
keras
.
optimizers
.
schedules
.
LearningRateSchedule
):
class
CosineLearningRateWithLinearWarmup
(
tf
.
keras
.
optimizers
.
schedules
.
LearningRateSchedule
):
"""Class to generate learning rate tensor."""
"""Class to generate learning rate tensor."""
def
__init__
(
self
,
total_steps
,
params
):
def
__init__
(
self
,
total_steps
,
params
):
...
...
official/vision/detection/modeling/maskrcnn_model.py
View file @
88253ce5
...
@@ -118,9 +118,7 @@ class MaskrcnnModel(base_model.Model):
...
@@ -118,9 +118,7 @@ class MaskrcnnModel(base_model.Model):
box_targets
=
tf
.
where
(
box_targets
=
tf
.
where
(
tf
.
tile
(
tf
.
tile
(
tf
.
expand_dims
(
tf
.
equal
(
matched_gt_classes
,
0
),
axis
=-
1
),
tf
.
expand_dims
(
tf
.
equal
(
matched_gt_classes
,
0
),
axis
=-
1
),
[
1
,
1
,
4
]),
[
1
,
1
,
4
]),
tf
.
zeros_like
(
box_targets
),
box_targets
)
tf
.
zeros_like
(
box_targets
),
box_targets
)
model_outputs
.
update
({
model_outputs
.
update
({
'class_targets'
:
matched_gt_classes
,
'class_targets'
:
matched_gt_classes
,
'box_targets'
:
box_targets
,
'box_targets'
:
box_targets
,
...
@@ -183,9 +181,7 @@ class MaskrcnnModel(base_model.Model):
...
@@ -183,9 +181,7 @@ class MaskrcnnModel(base_model.Model):
mask_outputs
),
mask_outputs
),
})
})
else
:
else
:
model_outputs
.
update
({
model_outputs
.
update
({
'detection_masks'
:
tf
.
nn
.
sigmoid
(
mask_outputs
)})
'detection_masks'
:
tf
.
nn
.
sigmoid
(
mask_outputs
)
})
return
model_outputs
return
model_outputs
...
@@ -312,8 +308,8 @@ class MaskrcnnModel(base_model.Model):
...
@@ -312,8 +308,8 @@ class MaskrcnnModel(base_model.Model):
required_output_fields
=
[
'class_outputs'
,
'box_outputs'
]
required_output_fields
=
[
'class_outputs'
,
'box_outputs'
]
for
field
in
required_output_fields
:
for
field
in
required_output_fields
:
if
field
not
in
outputs
:
if
field
not
in
outputs
:
raise
ValueError
(
'"%s" is missing in outputs, requried %s found %s'
raise
ValueError
(
'"%s" is missing in outputs, requried %s found %s'
%
%
(
field
,
required_output_fields
,
outputs
.
keys
()))
(
field
,
required_output_fields
,
outputs
.
keys
()))
predictions
=
{
predictions
=
{
'image_info'
:
labels
[
'image_info'
],
'image_info'
:
labels
[
'image_info'
],
'num_detections'
:
outputs
[
'num_detections'
],
'num_detections'
:
outputs
[
'num_detections'
],
...
...
Prev
1
…
4
5
6
7
8
9
10
11
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment