Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
dcuai
dlexamples
Commits
c320b6ef
Commit
c320b6ef
authored
Apr 15, 2022
by
zhenyi
Browse files
tf2 detection
parent
0fc002df
Changes
195
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
4634 additions
and
0 deletions
+4634
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/distributed_executer.py
...sion/Detection/MaskRCNN/mask_rcnn/distributed_executer.py
+564
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/evaluation.py
.../ComputeVision/Detection/MaskRCNN/mask_rcnn/evaluation.py
+452
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/hooks/__init__.py
...puteVision/Detection/MaskRCNN/mask_rcnn/hooks/__init__.py
+24
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/hooks/ckpt_hook.py
...uteVision/Detection/MaskRCNN/mask_rcnn/hooks/ckpt_hook.py
+114
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/hooks/logging_hook.py
...Vision/Detection/MaskRCNN/mask_rcnn/hooks/logging_hook.py
+518
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/hooks/pretrained_restore_hook.py
...ction/MaskRCNN/mask_rcnn/hooks/pretrained_restore_hook.py
+216
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/hyperparameters/__init__.py
.../Detection/MaskRCNN/mask_rcnn/hyperparameters/__init__.py
+0
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/hyperparameters/cmdline_utils.py
...ction/MaskRCNN/mask_rcnn/hyperparameters/cmdline_utils.py
+179
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/hyperparameters/flags_to_params.py
...ion/MaskRCNN/mask_rcnn/hyperparameters/flags_to_params.py
+85
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/hyperparameters/hyperparameters.py
...ion/MaskRCNN/mask_rcnn/hyperparameters/hyperparameters.py
+226
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/hyperparameters/mask_rcnn_params.py
...on/MaskRCNN/mask_rcnn/hyperparameters/mask_rcnn_params.py
+102
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/hyperparameters/params_dict.py
...tection/MaskRCNN/mask_rcnn/hyperparameters/params_dict.py
+398
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/hyperparameters/params_io.py
...Detection/MaskRCNN/mask_rcnn/hyperparameters/params_io.py
+89
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/mask_rcnn_model.py
...uteVision/Detection/MaskRCNN/mask_rcnn/mask_rcnn_model.py
+522
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/models/__init__.py
...uteVision/Detection/MaskRCNN/mask_rcnn/models/__init__.py
+0
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/models/fpn.py
.../ComputeVision/Detection/MaskRCNN/mask_rcnn/models/fpn.py
+138
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/models/heads.py
...omputeVision/Detection/MaskRCNN/mask_rcnn/models/heads.py
+323
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/models/keras_utils.py
...Vision/Detection/MaskRCNN/mask_rcnn/models/keras_utils.py
+102
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/models/resnet.py
...mputeVision/Detection/MaskRCNN/mask_rcnn/models/resnet.py
+582
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/object_detection/__init__.py
...Detection/MaskRCNN/mask_rcnn/object_detection/__init__.py
+0
-0
No files found.
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/distributed_executer.py
0 → 100644
View file @
c320b6ef
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Interface to run mask rcnn model in different distributed strategies."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
abc
import
os
import
six
import
math
import
multiprocessing
import
tensorflow
as
tf
from
mask_rcnn.utils.logging_formatter
import
logging
from
mask_rcnn.utils.distributed_utils
import
MPI_is_distributed
from
mask_rcnn.utils.distributed_utils
import
MPI_local_rank
from
mask_rcnn.utils.distributed_utils
import
MPI_rank
from
mask_rcnn.hooks.logging_hook
import
AutoLoggingHook
from
mask_rcnn.utils.lazy_imports
import
LazyImport
hvd
=
LazyImport
(
"horovod.tensorflow"
)
from
tensorflow.core.protobuf
import
rewriter_config_pb2
from
mask_rcnn
import
evaluation
from
mask_rcnn.hyperparameters
import
params_io
from
mask_rcnn.hooks
import
CheckpointSaverHook
from
mask_rcnn.hooks
import
PretrainedWeightsLoadingHook
def
get_training_hooks
(
mode
,
model_dir
,
checkpoint_path
=
None
,
skip_checkpoint_variables
=
None
):
assert
mode
in
(
'train'
,
'eval'
)
training_hooks
=
[
AutoLoggingHook
(
# log_every_n_steps=RUNNING_CONFIG.display_step,
log_every_n_steps
=
5
if
"NGC_JOB_ID"
not
in
os
.
environ
else
100
,
# warmup_steps=RUNNING_CONFIG.warmup_steps,
warmup_steps
=
100
,
is_training
=
True
)
]
if
not
MPI_is_distributed
()
or
MPI_rank
()
==
0
:
training_hooks
.
append
(
PretrainedWeightsLoadingHook
(
prefix
=
"resnet50/"
,
checkpoint_path
=
checkpoint_path
,
skip_variables_regex
=
skip_checkpoint_variables
))
if
MPI_is_distributed
()
and
mode
==
"train"
:
training_hooks
.
append
(
hvd
.
BroadcastGlobalVariablesHook
(
root_rank
=
0
))
if
not
MPI_is_distributed
()
or
MPI_rank
()
==
0
:
training_hooks
.
append
(
CheckpointSaverHook
(
checkpoint_dir
=
model_dir
,
checkpoint_basename
=
"model.ckpt"
))
return
training_hooks
@
six
.
add_metaclass
(
abc
.
ABCMeta
)
class
BaseExecuter
(
object
):
"""Interface to run Mask RCNN model in TPUs/GPUs.
Arguments:
flags: FLAGS object passed from the user.
model_config: Model configuration needed to run distribution strategy.
model_fn: Model function to be passed to Estimator.
"""
def
__init__
(
self
,
runtime_config
,
model_fn
):
self
.
_runtime_config
=
runtime_config
self
.
_model_fn
=
model_fn
os
.
environ
[
'CUDA_CACHE_DISABLE'
]
=
'0'
os
.
environ
[
'TF_USE_CUDNN_BATCHNORM_SPATIAL_PERSISTENT'
]
=
'1'
os
.
environ
[
'TF_ADJUST_HUE_FUSED'
]
=
'1'
os
.
environ
[
'TF_ADJUST_SATURATION_FUSED'
]
=
'1'
os
.
environ
[
'TF_ENABLE_WINOGRAD_NONFUSED'
]
=
'1'
os
.
environ
[
'TF_AUTOTUNE_THRESHOLD'
]
=
'2'
@
staticmethod
def
_get_session_config
(
mode
,
use_xla
,
use_amp
,
use_tf_distributed
=
False
,
allow_xla_at_inference
=
False
):
assert
mode
in
(
'train'
,
'eval'
)
rewrite_options
=
rewriter_config_pb2
.
RewriterConfig
(
# arithmetic_optimization=rewriter_config_pb2.RewriterConfig.OFF,
# arithmetic_optimization=rewriter_config_pb2.RewriterConfig.ON,
# constant_folding=rewriter_config_pb2.RewriterConfig.OFF,
# constant_folding=rewriter_config_pb2.RewriterConfig.ON, # TO TEST
# debug_stripper=rewriter_config_pb2.RewriterConfig.OFF,
# debug_stripper=rewriter_config_pb2.RewriterConfig.ON, # TO TEST
# dependency_optimization=rewriter_config_pb2.RewriterConfig.OFF,
# dependency_optimization=rewriter_config_pb2.RewriterConfig.ON, # TO TEST
# disable_model_pruning=False, # INCOMPATIBLE with AMP
# function_optimization=True,
# implementation_selector=True,
# loop_optimization=rewriter_config_pb2.RewriterConfig.OFF,
# loop_optimization=rewriter_config_pb2.RewriterConfig.ON, # TO TEST
# The default setting (SCHEDULING and SWAPPING HEURISTICS only)
# memory_optimization=rewriter_config_pb2.RewriterConfig.DEFAULT_MEM_OPT,
# Disabled in the meta-optimizer.
# memory_optimization=rewriter_config_pb2.RewriterConfig.NO_MEM_OPT,
# Driven by manual op-level annotations.
# memory_optimization=rewriter_config_pb2.RewriterConfig.MANUAL,
# Swapping heuristic will move a tensor from the GPU to the CPU and move it
# back when needed to reduce peak memory usage..
# memory_optimization=rewriter_config_pb2.RewriterConfig.SWAPPING_HEURISTICS,
# Recomputation heuristics will recompute ops (such as Relu activation)
# during backprop instead of storing them, reducing peak memory usage.
# memory_optimization=rewriter_config_pb2.RewriterConfig.RECOMPUTATION_HEURISTICS,
# Scheduling will split big ops such as AddN and try to enforce a schedule of
# the new computations that decreases peak memory usage.
# memory_optimization=rewriter_config_pb2.RewriterConfig.SCHEDULING_HEURISTICS,
# Use any combination of swapping and recomputation heuristics.
# memory_optimization=rewriter_config_pb2.RewriterConfig.HEURISTICS,
meta_optimizer_iterations
=
rewriter_config_pb2
.
RewriterConfig
.
TWO
,
# meta_optimizer_iterations=rewriter_config_pb2.RewriterConfig.ONE,
# meta_optimizer_iterations=rewriter_config_pb2.RewriterConfig.DEFAULT_NUM_ITERS,
# pin_to_host_optimization=rewriter_config_pb2.RewriterConfig.OFF,
# pin_to_host_optimization=rewriter_config_pb2.RewriterConfig.ON, # TO TEST
#
# remapping=rewriter_config_pb2.RewriterConfig.OFF,
# remapping=rewriter_config_pb2.RewriterConfig.ON, # TO TEST
# scoped_allocator_optimization=rewriter_config_pb2.RewriterConfig.OFF,
# scoped_allocator_optimization=rewriter_config_pb2.RewriterConfig.ON, # TO TEST
# shape_optimization=rewriter_config_pb2.RewriterConfig.OFF,
# shape_optimization=rewriter_config_pb2.RewriterConfig.ON, # TO TEST
)
if
use_amp
:
logging
.
info
(
"[%s] AMP is activated - Experiment Feature"
%
mode
)
rewrite_options
.
auto_mixed_precision
=
True
config
=
tf
.
compat
.
v1
.
ConfigProto
(
allow_soft_placement
=
True
,
log_device_placement
=
False
,
graph_options
=
tf
.
compat
.
v1
.
GraphOptions
(
rewrite_options
=
rewrite_options
,
# infer_shapes=True # Heavily drops throughput by 30%
)
)
if
use_tf_distributed
:
config
.
gpu_options
.
force_gpu_compatible
=
False
else
:
config
.
gpu_options
.
force_gpu_compatible
=
True
# Force pinned memory
if
MPI_is_distributed
():
config
.
gpu_options
.
visible_device_list
=
str
(
MPI_local_rank
())
if
use_xla
and
(
mode
==
"train"
or
allow_xla_at_inference
):
logging
.
info
(
"[%s] XLA is activated - Experiment Feature"
%
mode
)
config
.
graph_options
.
optimizer_options
.
global_jit_level
=
tf
.
compat
.
v1
.
OptimizerOptions
.
ON_1
# config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_2
if
mode
==
'train'
:
config
.
intra_op_parallelism_threads
=
1
# Avoid pool of Eigen threads
if
MPI_is_distributed
():
config
.
inter_op_parallelism_threads
=
max
(
2
,
multiprocessing
.
cpu_count
()
//
hvd
.
local_size
())
elif
not
use_tf_distributed
:
config
.
inter_op_parallelism_threads
=
4
return
config
@
abc
.
abstractmethod
def
build_strategy_configuration
(
self
,
mode
):
"""Builds run configuration for distributed train/eval.
Returns:
RunConfig with distribution strategy configurations
to pass to the constructor of TPUEstimator/Estimator.
"""
NotImplementedError
(
'Must be implemented in subclass'
)
def
build_model_parameters
(
self
,
mode
):
"""Builds model parameter."""
assert
mode
in
(
'train'
,
'eval'
)
batch_size
=
self
.
_runtime_config
.
train_batch_size
if
mode
==
'train'
else
self
.
_runtime_config
.
eval_batch_size
params
=
dict
(
self
.
_runtime_config
.
values
(),
mode
=
mode
,
batch_size
=
batch_size
,
model_dir
=
self
.
_runtime_config
.
model_dir
,
)
if
mode
==
'eval'
:
params
=
dict
(
params
,
augment_input_data
=
False
,
)
return
params
def
build_mask_rcnn_estimator
(
self
,
params
,
run_config
,
mode
):
"""Creates TPUEstimator/Estimator instance.
Arguments:
params: A dictionary to pass to Estimator `model_fn`.
run_config: RunConfig instance specifying distribution strategy
configurations.
mode: Mode -- one of 'train` or `eval`.
Returns:
TFEstimator or TPUEstimator instance.
"""
assert
mode
in
(
'train'
,
'eval'
)
return
tf
.
estimator
.
Estimator
(
model_fn
=
self
.
_model_fn
,
model_dir
=
self
.
_runtime_config
.
model_dir
,
config
=
run_config
,
params
=
params
)
def
_save_config
(
self
):
"""Save parameters to config files if model_dir is defined."""
model_dir
=
self
.
_runtime_config
.
model_dir
if
model_dir
is
not
None
:
if
not
tf
.
io
.
gfile
.
exists
(
model_dir
):
tf
.
io
.
gfile
.
makedirs
(
model_dir
)
params_io
.
save_hparams_to_yaml
(
self
.
_runtime_config
,
model_dir
+
'/params.yaml'
)
def
_write_summary
(
self
,
summary_dir
,
eval_results
,
predictions
,
current_step
):
if
not
self
.
_runtime_config
.
visualize_images_summary
:
predictions
=
None
evaluation
.
write_summary
(
eval_results
,
summary_dir
,
current_step
,
predictions
=
predictions
)
def
train
(
self
,
train_input_fn
,
run_eval_after_train
=
False
,
eval_input_fn
=
None
):
"""Run distributed training on Mask RCNN model."""
self
.
_save_config
()
train_run_config
=
self
.
build_strategy_configuration
(
'train'
)
train_params
=
self
.
build_model_parameters
(
'train'
)
train_estimator
=
self
.
build_mask_rcnn_estimator
(
train_params
,
train_run_config
,
'train'
)
print
(
'train_estimator111111111111111111111111111111111111111111111111111111111111111111'
)
train_estimator
.
train
(
input_fn
=
train_input_fn
,
max_steps
=
self
.
_runtime_config
.
total_steps
,
hooks
=
get_training_hooks
(
mode
=
"train"
,
model_dir
=
self
.
_runtime_config
.
model_dir
,
checkpoint_path
=
self
.
_runtime_config
.
checkpoint
,
skip_checkpoint_variables
=
self
.
_runtime_config
.
skip_checkpoint_variables
)
)
if
not
run_eval_after_train
:
return
None
if
eval_input_fn
is
None
:
raise
ValueError
(
'Eval input_fn must be passed to conduct evaluation after training.'
)
eval_run_config
=
self
.
build_strategy_configuration
(
'eval'
)
eval_params
=
self
.
build_model_parameters
(
'eval'
)
eval_estimator
=
self
.
build_mask_rcnn_estimator
(
eval_params
,
eval_run_config
,
'eval'
)
last_ckpt
=
tf
.
train
.
latest_checkpoint
(
self
.
_runtime_config
.
model_dir
,
latest_filename
=
None
)
logging
.
info
(
"Restoring parameters from %s
\n
"
%
last_ckpt
)
eval_results
,
predictions
=
evaluation
.
evaluate
(
eval_estimator
,
eval_input_fn
,
self
.
_runtime_config
.
eval_samples
,
self
.
_runtime_config
.
eval_batch_size
,
self
.
_runtime_config
.
include_mask
,
self
.
_runtime_config
.
val_json_file
,
report_frequency
=
self
.
_runtime_config
.
report_frequency
)
output_dir
=
os
.
path
.
join
(
self
.
_runtime_config
.
model_dir
,
'eval'
)
tf
.
io
.
gfile
.
makedirs
(
output_dir
)
# Summary writer writes out eval metrics.
self
.
_write_summary
(
output_dir
,
eval_results
,
predictions
,
self
.
_runtime_config
.
total_steps
)
return
eval_results
def
train_and_eval
(
self
,
train_input_fn
,
eval_input_fn
):
"""Run distributed train and eval on Mask RCNN model."""
self
.
_save_config
()
output_dir
=
os
.
path
.
join
(
self
.
_runtime_config
.
model_dir
,
'eval'
)
tf
.
io
.
gfile
.
makedirs
(
output_dir
)
train_run_config
=
self
.
build_strategy_configuration
(
'train'
)
train_params
=
self
.
build_model_parameters
(
'train'
)
train_estimator
=
self
.
build_mask_rcnn_estimator
(
train_params
,
train_run_config
,
'train'
)
eval_estimator
=
None
eval_results
=
None
num_cycles
=
math
.
ceil
(
self
.
_runtime_config
.
total_steps
/
self
.
_runtime_config
.
num_steps_per_eval
)
training_hooks
=
get_training_hooks
(
mode
=
"train"
,
model_dir
=
self
.
_runtime_config
.
model_dir
,
checkpoint_path
=
self
.
_runtime_config
.
checkpoint
,
skip_checkpoint_variables
=
self
.
_runtime_config
.
skip_checkpoint_variables
)
for
cycle
in
range
(
1
,
num_cycles
+
1
):
if
not
MPI_is_distributed
()
or
MPI_rank
()
==
0
:
print
()
# Visual Spacing
logging
.
info
(
"================================="
)
logging
.
info
(
' Start training cycle %02d'
%
cycle
)
logging
.
info
(
"=================================
\n
"
)
max_cycle_step
=
min
(
int
(
cycle
*
self
.
_runtime_config
.
num_steps_per_eval
),
self
.
_runtime_config
.
total_steps
)
PROFILER_ENABLED
=
False
if
(
not
MPI_is_distributed
()
or
MPI_rank
()
==
0
)
and
PROFILER_ENABLED
:
profiler_context_manager
=
tf
.
contrib
.
tfprof
.
ProfileContext
else
:
from
contextlib
import
suppress
profiler_context_manager
=
lambda
*
args
,
**
kwargs
:
suppress
()
# No-Op context manager
with
profiler_context_manager
(
'/workspace/profiling/'
,
trace_steps
=
range
(
100
,
200
,
3
),
dump_steps
=
[
200
]
)
as
pctx
:
if
(
not
MPI_is_distributed
()
or
MPI_rank
()
==
0
)
and
PROFILER_ENABLED
:
opts
=
tf
.
compat
.
v1
.
profiler
.
ProfileOptionBuilder
.
time_and_memory
()
pctx
.
add_auto_profiling
(
'op'
,
opts
,
[
150
,
200
])
train_estimator
.
train
(
input_fn
=
train_input_fn
,
max_steps
=
max_cycle_step
,
hooks
=
training_hooks
,
)
if
not
MPI_is_distributed
()
or
MPI_rank
()
==
0
:
print
()
# Visual Spacing
logging
.
info
(
"================================="
)
logging
.
info
(
' Start evaluation cycle %02d'
%
cycle
)
logging
.
info
(
"=================================
\n
"
)
if
eval_estimator
is
None
:
eval_run_config
=
self
.
build_strategy_configuration
(
'eval'
)
eval_params
=
self
.
build_model_parameters
(
'eval'
)
eval_estimator
=
self
.
build_mask_rcnn_estimator
(
eval_params
,
eval_run_config
,
'eval'
)
last_ckpt
=
tf
.
train
.
latest_checkpoint
(
self
.
_runtime_config
.
model_dir
,
latest_filename
=
None
)
logging
.
info
(
"Restoring parameters from %s
\n
"
%
last_ckpt
)
eval_results
,
predictions
=
evaluation
.
evaluate
(
eval_estimator
,
eval_input_fn
,
self
.
_runtime_config
.
eval_samples
,
self
.
_runtime_config
.
eval_batch_size
,
self
.
_runtime_config
.
include_mask
,
self
.
_runtime_config
.
val_json_file
,
report_frequency
=
self
.
_runtime_config
.
report_frequency
)
self
.
_write_summary
(
output_dir
,
eval_results
,
predictions
,
max_cycle_step
)
if
MPI_is_distributed
():
from
mpi4py
import
MPI
MPI
.
COMM_WORLD
.
Barrier
()
# Waiting for all MPI processes to sync
return
eval_results
def
eval
(
self
,
eval_input_fn
):
"""Run distributed eval on Mask RCNN model."""
output_dir
=
os
.
path
.
join
(
self
.
_runtime_config
.
model_dir
,
'eval'
)
tf
.
io
.
gfile
.
makedirs
(
output_dir
)
# Summary writer writes out eval metrics.
run_config
=
self
.
build_strategy_configuration
(
'eval'
)
eval_params
=
self
.
build_model_parameters
(
'eval'
)
eval_estimator
=
self
.
build_mask_rcnn_estimator
(
eval_params
,
run_config
,
'eval'
)
logging
.
info
(
'Starting to evaluate.'
)
last_ckpt
=
tf
.
train
.
latest_checkpoint
(
self
.
_runtime_config
.
model_dir
,
latest_filename
=
None
)
if
last_ckpt
is
not
None
:
logging
.
info
(
"Restoring parameters from %s
\n
"
%
last_ckpt
)
current_step
=
int
(
os
.
path
.
basename
(
last_ckpt
).
split
(
'-'
)[
1
])
else
:
logging
.
warning
(
"Could not find trained model in model_dir: `%s`, running initialization to predict
\n
"
%
self
.
_runtime_config
.
model_dir
)
current_step
=
0
eval_results
,
predictions
=
evaluation
.
evaluate
(
eval_estimator
,
eval_input_fn
,
self
.
_runtime_config
.
eval_samples
,
self
.
_runtime_config
.
eval_batch_size
,
self
.
_runtime_config
.
include_mask
,
self
.
_runtime_config
.
val_json_file
)
self
.
_write_summary
(
output_dir
,
eval_results
,
predictions
,
current_step
)
if
current_step
>=
self
.
_runtime_config
.
total_steps
:
logging
.
info
(
'Evaluation finished after training step %d'
%
current_step
)
return
eval_results
class
EstimatorExecuter
(
BaseExecuter
):
"""Interface that runs Mask RCNN model using TPUEstimator."""
def
__init__
(
self
,
runtime_config
,
model_fn
):
super
(
EstimatorExecuter
,
self
).
__init__
(
runtime_config
,
model_fn
)
if
MPI_is_distributed
():
os
.
environ
[
'HOROVOD_GPU_ALLREDUCE'
]
=
'NCCL'
os
.
environ
[
'HOROVOD_NUM_NCCL_STREAMS'
]
=
'1'
# os.environ['HOROVOD_AUTOTUNE'] = '2'
hvd
.
init
()
logging
.
info
(
"Horovod successfully initialized ..."
)
os
.
environ
[
'TF_GPU_THREAD_MODE'
]
=
'gpu_private'
os
.
environ
[
'TF_GPU_THREAD_COUNT'
]
=
'1'
if
not
MPI_is_distributed
()
else
str
(
hvd
.
size
())
os
.
environ
[
'TF_SYNC_ON_FINISH'
]
=
'0'
def
build_strategy_configu
ration
(
self
,
mode
):
"""Retrieves model configuration for running TF Estimator."""
run_config
=
tf
.
estimator
.
RunConfig
(
tf_random_seed
=
(
self
.
_runtime_config
.
seed
if
not
MPI_is_distributed
()
or
self
.
_runtime_config
.
seed
is
None
else
self
.
_runtime_config
.
seed
+
MPI_rank
()
),
model_dir
=
self
.
_runtime_config
.
model_dir
,
save_summary_steps
=
None
,
# disabled
save_checkpoints_steps
=
None
,
# disabled
save_checkpoints_secs
=
None
,
# disabled
keep_checkpoint_max
=
20
,
# disabled
keep_checkpoint_every_n_hours
=
None
,
# disabled
log_step_count_steps
=
None
,
# disabled
session_config
=
self
.
_get_session_config
(
mode
=
mode
,
use_xla
=
self
.
_runtime_config
.
xla
,
use_amp
=
self
.
_runtime_config
.
amp
,
use_tf_distributed
=
False
,
allow_xla_at_inference
=
self
.
_runtime_config
.
allow_xla_at_inference
# TODO: Remove when XLA at inference fixed
),
protocol
=
None
,
device_fn
=
None
,
train_distribute
=
None
,
eval_distribute
=
None
,
experimental_distribute
=
None
)
return
run_config
class
TFDistributedExecuter
(
BaseExecuter
):
"""Interface that runs Mask RCNN model using MultiWorkerMirroredStrategy."""
@
staticmethod
def
is_eval_task
():
return
tf
.
distribute
.
cluster_resolver
.
TFConfigClusterResolver
().
task_type
==
'evaluator'
def
build_strategy_configuration
(
self
,
mode
):
"""Retrieves model configuration for MultiWorkerMirroredStrategy."""
distributed_strategy
=
tf
.
distribute
.
MirroredStrategy
()
# distributed_strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy(
# tf.distribute.experimental.CollectiveCommunication.NCCL
# )
run_config
=
tf
.
estimator
.
RunConfig
(
tf_random_seed
=
self
.
_runtime_config
.
seed
,
model_dir
=
self
.
_runtime_config
.
model_dir
,
save_summary_steps
=
None
,
# disabled
save_checkpoints_steps
=
None
,
# disabled
save_checkpoints_secs
=
None
,
# disabled
keep_checkpoint_max
=
20
,
# disabled
keep_checkpoint_every_n_hours
=
None
,
# disabled
log_step_count_steps
=
None
,
# disabled
session_config
=
self
.
_
(
mode
=
mode
,
use_xla
=
self
.
_runtime_config
.
xla
,
use_amp
=
self
.
_runtime_config
.
amp
,
use_tf_distributed
=
True
,
# TODO: Remove when XLA at inference fixed
allow_xla_at_inference
=
self
.
_runtime_config
.
allow_xla_at_inference
),
protocol
=
None
,
device_fn
=
None
,
train_distribute
=
distributed_strategy
if
mode
==
"train"
else
None
,
eval_distribute
=
None
,
experimental_distribute
=
None
)
return
run_config
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/evaluation.py
0 → 100644
View file @
c320b6ef
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Functions to perform COCO evaluation."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
copy
import
operator
import
pprint
import
six
import
time
import
io
from
PIL
import
Image
import
numpy
as
np
import
tensorflow
as
tf
from
mask_rcnn.utils.logging_formatter
import
logging
from
mask_rcnn
import
coco_metric
from
mask_rcnn.utils
import
coco_utils
from
mask_rcnn.object_detection
import
visualization_utils
import
dllogger
from
dllogger
import
Verbosity
def
process_prediction_for_eval
(
prediction
):
"""Process the model prediction for COCO eval."""
image_info
=
prediction
[
'image_info'
]
box_coordinates
=
prediction
[
'detection_boxes'
]
processed_box_coordinates
=
np
.
zeros_like
(
box_coordinates
)
for
image_id
in
range
(
box_coordinates
.
shape
[
0
]):
scale
=
image_info
[
image_id
][
2
]
for
box_id
in
range
(
box_coordinates
.
shape
[
1
]):
# Map [y1, x1, y2, x2] -> [x1, y1, w, h] and multiply detections
# Map [y1, x1, y2, x2] -> [x1, y1, w, h] and multiply detections
# by image scale.
y1
,
x1
,
y2
,
x2
=
box_coordinates
[
image_id
,
box_id
,
:]
new_box
=
scale
*
np
.
array
([
x1
,
y1
,
x2
-
x1
,
y2
-
y1
])
processed_box_coordinates
[
image_id
,
box_id
,
:]
=
new_box
prediction
[
'detection_boxes'
]
=
processed_box_coordinates
return
prediction
def
compute_coco_eval_metric
(
predictor
,
num_batches
=-
1
,
include_mask
=
True
,
annotation_json_file
=
""
,
eval_batch_size
=-
1
,
report_frequency
=
None
):
"""Compute COCO eval metric given a prediction generator.
Args:
predictor: a generator that iteratively pops a dictionary of predictions
with the format compatible with COCO eval tool.
num_batches: the number of batches to be aggregated in eval. This is how
many times that the predictor gets pulled.
include_mask: a boolean that indicates whether we include the mask eval.
annotation_json_file: the annotation json file of the eval dataset.
Returns:
eval_results: the aggregated COCO metric eval results.
"""
if
annotation_json_file
==
""
:
annotation_json_file
=
None
use_groundtruth_from_json
=
(
annotation_json_file
is
not
None
)
predictions
=
dict
()
batch_idx
=
0
if
use_groundtruth_from_json
:
eval_metric
=
coco_metric
.
EvaluationMetric
(
annotation_json_file
,
include_mask
=
include_mask
)
else
:
eval_metric
=
coco_metric
.
EvaluationMetric
(
filename
=
None
,
include_mask
=
include_mask
)
def
evaluation_preds
(
preds
):
# Essential to avoid modifying the source dict
_preds
=
copy
.
deepcopy
(
preds
)
for
k
,
v
in
six
.
iteritems
(
_preds
):
_preds
[
k
]
=
np
.
concatenate
(
_preds
[
k
],
axis
=
0
)
if
'orig_images'
in
_preds
and
_preds
[
'orig_images'
].
shape
[
0
]
>
10
:
# Only samples a few images for visualization.
_preds
[
'orig_images'
]
=
_preds
[
'orig_images'
][:
10
]
if
use_groundtruth_from_json
:
eval_results
=
eval_metric
.
predict_metric_fn
(
_preds
)
else
:
images
,
annotations
=
coco_utils
.
extract_coco_groundtruth
(
_preds
,
include_mask
)
coco_dataset
=
coco_utils
.
create_coco_format_dataset
(
images
,
annotations
)
eval_results
=
eval_metric
.
predict_metric_fn
(
_preds
,
groundtruth_data
=
coco_dataset
)
return
eval_results
# Take into account cuDNN & Tensorflow warmup
# Drop N first steps for avg throughput calculation
BURNIN_STEPS
=
100
model_throughput_list
=
list
()
inference_time_list
=
list
()
while
num_batches
<
0
or
batch_idx
<
num_batches
:
try
:
step_t0
=
time
.
time
()
step_predictions
=
six
.
next
(
predictor
)
batch_time
=
time
.
time
()
-
step_t0
throughput
=
eval_batch_size
/
batch_time
model_throughput_list
.
append
(
throughput
)
inference_time_list
.
append
(
batch_time
)
logging
.
info
(
'Running inference on batch %03d/%03d... - Step Time: %.4fs - Throughput: %.1f imgs/s'
%
(
batch_idx
+
1
,
num_batches
,
batch_time
,
throughput
))
except
StopIteration
:
logging
.
info
(
'Get StopIteration at %d batch.'
%
(
batch_idx
+
1
))
break
step_predictions
=
process_prediction_for_eval
(
step_predictions
)
for
k
,
v
in
step_predictions
.
items
():
if
k
not
in
predictions
:
predictions
[
k
]
=
[
v
]
else
:
predictions
[
k
].
append
(
v
)
batch_idx
=
batch_idx
+
1
# If you want the report to happen each report_frequency to happen each report_frequency batches.
# Thus, each report is of eval_batch_size * report_frequency
if
report_frequency
and
batch_idx
%
report_frequency
==
0
:
eval_results
=
evaluation_preds
(
preds
=
predictions
)
logging
.
info
(
'Eval results: %s'
%
pprint
.
pformat
(
eval_results
,
indent
=
4
))
inference_time_list
.
sort
()
eval_results
=
evaluation_preds
(
preds
=
predictions
)
average_time
=
np
.
mean
(
inference_time_list
)
latency_50
=
max
(
inference_time_list
[:
int
(
len
(
inference_time_list
)
*
0.5
)])
latency_90
=
max
(
inference_time_list
[:
int
(
len
(
inference_time_list
)
*
0.90
)])
latency_95
=
max
(
inference_time_list
[:
int
(
len
(
inference_time_list
)
*
0.95
)])
latency_99
=
max
(
inference_time_list
[:
int
(
len
(
inference_time_list
)
*
0.99
)])
latency_100
=
max
(
inference_time_list
[:
int
(
len
(
inference_time_list
)
*
1
)])
print
()
# Visual Spacing
logging
.
info
(
"# @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ #"
)
logging
.
info
(
" Evaluation Performance Summary "
)
logging
.
info
(
"# @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ #"
)
total_processing_hours
,
rem
=
divmod
(
np
.
sum
(
model_throughput_list
),
3600
)
total_processing_minutes
,
total_processing_seconds
=
divmod
(
rem
,
60
)
if
len
(
model_throughput_list
)
>
BURNIN_STEPS
:
# Take into account cuDNN & Tensorflow warmup
# Drop N first steps for avg throughput calculation
# Also drop last step which may have a different batch size
avg_throughput
=
np
.
mean
(
model_throughput_list
[
BURNIN_STEPS
:
-
1
])
else
:
avg_throughput
=
-
1.
print
()
# Visual Spacing
logging
.
info
(
"Average throughput: {throughput:.1f} samples/sec"
.
format
(
throughput
=
avg_throughput
))
logging
.
info
(
"Inference Latency Average (s) = {avg:.4f}"
.
format
(
avg
=
average_time
))
logging
.
info
(
"Inference Latency 50% (s) = {cf_50:.4f}"
.
format
(
cf_50
=
latency_50
))
logging
.
info
(
"Inference Latency 90% (s) = {cf_90:.4f}"
.
format
(
cf_90
=
latency_90
))
logging
.
info
(
"Inference Latency 95% (s) = {cf_95:.4f}"
.
format
(
cf_95
=
latency_95
))
logging
.
info
(
"Inference Latency 99% (s) = {cf_99:.4f}"
.
format
(
cf_99
=
latency_99
))
logging
.
info
(
"Inference Latency 100% (s) = {cf_100:.4f}"
.
format
(
cf_100
=
latency_100
))
logging
.
info
(
"Total processed steps: {total_steps}"
.
format
(
total_steps
=
len
(
model_throughput_list
)))
logging
.
info
(
"Total processing time: {hours}h {minutes:02d}m {seconds:02d}s"
.
format
(
hours
=
total_processing_hours
,
minutes
=
int
(
total_processing_minutes
),
seconds
=
int
(
total_processing_seconds
)
)
)
dllogger
.
log
(
step
=
(),
data
=
{
"avg_inference_throughput"
:
avg_throughput
},
verbosity
=
Verbosity
.
DEFAULT
)
avg_inference_time
=
float
(
total_processing_hours
*
3600
+
int
(
total_processing_minutes
)
*
60
+
int
(
total_processing_seconds
))
dllogger
.
log
(
step
=
(),
data
=
{
"avg_inference_time"
:
avg_inference_time
},
verbosity
=
Verbosity
.
DEFAULT
)
logging
.
info
(
"==================== Metrics ===================="
)
# logging.info('Eval Epoch results: %s' % pprint.pformat(eval_results, indent=4))
for
key
,
value
in
sorted
(
eval_results
.
items
(),
key
=
operator
.
itemgetter
(
0
)):
logging
.
info
(
"%s: %.9f"
%
(
key
,
value
))
print
()
# Visual Spacing
return
eval_results
,
predictions
def
evaluate
(
eval_estimator
,
input_fn
,
num_eval_samples
,
eval_batch_size
,
include_mask
=
True
,
validation_json_file
=
""
,
report_frequency
=
None
):
"""Runs COCO evaluation once."""
predictor
=
eval_estimator
.
predict
(
input_fn
=
input_fn
,
yield_single_examples
=
False
)
# Every predictor.next() gets a batch of prediction (a dictionary).
num_eval_times
=
num_eval_samples
//
eval_batch_size
assert
num_eval_times
>
0
,
'num_eval_samples must be >= eval_batch_size!'
eval_results
,
predictions
=
compute_coco_eval_metric
(
predictor
,
num_eval_times
,
include_mask
,
validation_json_file
,
eval_batch_size
=
eval_batch_size
,
report_frequency
=
report_frequency
)
return
eval_results
,
predictions
def
write_summary
(
eval_results
,
summary_dir
,
current_step
,
predictions
=
None
):
"""Write out eval results for the checkpoint."""
with
tf
.
Graph
().
as_default
():
summaries
=
[]
# Summary writer writes out eval metrics.
try
:
# Tensorflow 1.x
summary_writer
=
tf
.
compat
.
v1
.
summary
.
FileWriter
(
summary_dir
)
except
AttributeError
:
# Tensorflow 2.x
summary_writer
=
tf
.
summary
.
create_file_writer
(
summary_dir
)
summary_writer
.
as_default
()
eval_results_dict
=
{}
for
metric
in
eval_results
:
try
:
summaries
.
append
(
tf
.
compat
.
v1
.
Summary
.
Value
(
tag
=
metric
,
simple_value
=
eval_results
[
metric
]))
eval_results_dict
[
metric
]
=
float
(
eval_results
[
metric
])
except
AttributeError
:
tf
.
summary
.
scalar
(
name
=
metric
,
data
=
eval_results
[
metric
],
step
=
current_step
)
eval_results_dict
[
metric
]
=
float
(
eval_results
[
metric
])
dllogger
.
log
(
step
=
(),
data
=
eval_results_dict
,
verbosity
=
Verbosity
.
DEFAULT
)
if
isinstance
(
predictions
,
dict
)
and
predictions
:
images_summary
=
get_image_summary
(
predictions
,
current_step
)
try
:
summaries
+=
images_summary
except
TypeError
:
summaries
.
append
(
images_summary
)
try
:
# tf_summaries = tf.compat.v1.Summary(value=list(summaries))
tf_summaries
=
tf
.
compat
.
v1
.
Summary
(
value
=
summaries
)
summary_writer
.
add_summary
(
tf_summaries
,
current_step
)
summary_writer
.
flush
()
except
AttributeError
:
tf
.
summary
.
flush
(
summary_writer
)
def
generate_image_preview
(
image
,
boxes
,
scores
,
classes
,
gt_boxes
=
None
,
segmentations
=
None
):
"""Creates an image summary given predictions."""
max_boxes_to_draw
=
100
min_score_thresh
=
0.1
# Visualizes the predicitons.
image_with_detections
=
visualization_utils
.
visualize_boxes_and_labels_on_image_array
(
image
,
boxes
,
classes
=
classes
,
scores
=
scores
,
category_index
=
{},
instance_masks
=
segmentations
,
use_normalized_coordinates
=
False
,
max_boxes_to_draw
=
max_boxes_to_draw
,
min_score_thresh
=
min_score_thresh
,
agnostic_mode
=
False
)
if
gt_boxes
is
not
None
:
# Visualizes the groundtruth boxes. They are in black by default.
image_with_detections
=
visualization_utils
.
visualize_boxes_and_labels_on_image_array
(
image_with_detections
,
gt_boxes
,
classes
=
None
,
scores
=
None
,
category_index
=
{},
use_normalized_coordinates
=
False
,
max_boxes_to_draw
=
max_boxes_to_draw
,
agnostic_mode
=
True
)
return
image_with_detections
def
generate_image_buffer
(
input_image
):
buf
=
io
.
BytesIO
()
w
,
h
=
input_image
.
shape
[:
2
]
ratio
=
1024
/
w
new_size
=
[
int
(
w
*
ratio
),
int
(
h
*
ratio
)]
image
=
Image
.
fromarray
(
input_image
.
astype
(
np
.
uint8
))
image
.
thumbnail
(
new_size
)
image
.
save
(
buf
,
format
=
'png'
)
return
buf
.
getvalue
()
def
get_image_summary
(
predictions
,
current_step
,
max_images
=
10
):
"""Write out image and prediction for summary."""
if
'orig_images'
not
in
predictions
:
logging
.
info
(
'Missing orig_images in predictions: %s'
,
predictions
.
keys
())
return
max_images
=
min
(
len
(
predictions
[
'orig_images'
])
*
predictions
[
'orig_images'
][
0
].
shape
[
0
],
max_images
)
_detection_boxes
=
np
.
concatenate
(
predictions
[
'detection_boxes'
],
axis
=
0
)
_detection_scores
=
np
.
concatenate
(
predictions
[
'detection_scores'
],
axis
=
0
)
_detection_classes
=
np
.
concatenate
(
predictions
[
'detection_classes'
],
axis
=
0
)
_image_info
=
np
.
concatenate
(
predictions
[
'image_info'
],
axis
=
0
)
_num_detections
=
np
.
concatenate
(
predictions
[
'num_detections'
],
axis
=
0
)
_orig_images
=
np
.
concatenate
(
predictions
[
'orig_images'
],
axis
=
0
)
if
'detection_masks'
in
predictions
:
_detection_masks
=
np
.
concatenate
(
predictions
[
'detection_masks'
],
axis
=
0
)
else
:
_detection_masks
=
None
if
'groundtruth_boxes'
in
predictions
:
_groundtruth_boxes
=
np
.
concatenate
(
predictions
[
'groundtruth_boxes'
],
axis
=
0
)
else
:
_groundtruth_boxes
=
None
_orig_images
=
_orig_images
*
255
_orig_images
=
_orig_images
.
astype
(
np
.
uint8
)
image_previews
=
[]
for
i
in
range
(
max_images
):
num_detections
=
min
(
len
(
_detection_boxes
[
i
]),
int
(
_num_detections
[
i
]))
detection_boxes
=
_detection_boxes
[
i
][:
num_detections
]
detection_scores
=
_detection_scores
[
i
][:
num_detections
]
detection_classes
=
_detection_classes
[
i
][:
num_detections
]
image
=
_orig_images
[
i
]
image_height
=
image
.
shape
[
0
]
image_width
=
image
.
shape
[
1
]
# Rescale the box to fit the visualization image.
h
,
w
=
_image_info
[
i
][
3
:
5
]
detection_boxes
=
detection_boxes
/
np
.
array
([
w
,
h
,
w
,
h
])
detection_boxes
=
detection_boxes
*
np
.
array
([
image_width
,
image_height
,
image_width
,
image_height
])
if
_groundtruth_boxes
is
not
None
:
gt_boxes
=
_groundtruth_boxes
[
i
]
gt_boxes
=
gt_boxes
*
np
.
array
([
image_height
,
image_width
,
image_height
,
image_width
])
else
:
gt_boxes
=
None
if
_detection_masks
is
not
None
:
instance_masks
=
_detection_masks
[
i
][
0
:
num_detections
]
segmentations
=
coco_metric
.
generate_segmentation_from_masks
(
instance_masks
,
detection_boxes
,
image_height
,
image_width
)
else
:
segmentations
=
None
# From [x, y, w, h] to [x1, y1, x2, y2] and
# process_prediction_for_eval() set the box to be [x, y] format, need to
# reverted them to [y, x] format.
xmin
,
ymin
,
w
,
h
=
np
.
split
(
detection_boxes
,
4
,
axis
=-
1
)
xmax
=
xmin
+
w
ymax
=
ymin
+
h
boxes_to_visualize
=
np
.
concatenate
([
ymin
,
xmin
,
ymax
,
xmax
],
axis
=-
1
)
image_preview
=
generate_image_preview
(
image
,
boxes
=
boxes_to_visualize
,
scores
=
detection_scores
,
classes
=
detection_classes
.
astype
(
np
.
int32
),
gt_boxes
=
gt_boxes
,
segmentations
=
segmentations
)
image_previews
.
append
(
image_preview
)
try
:
summaries
=
[]
for
i
,
image_preview
in
enumerate
(
image_previews
):
image_buffer
=
generate_image_buffer
(
image_preview
)
image_summary
=
tf
.
compat
.
v1
.
Summary
.
Image
(
encoded_image_string
=
image_buffer
)
image_value
=
tf
.
compat
.
v1
.
Summary
.
Value
(
tag
=
'%d_input'
%
i
,
image
=
image_summary
)
summaries
.
append
(
image_value
)
except
AttributeError
:
image_previews
=
np
.
array
(
image_previews
)
summaries
=
tf
.
summary
.
image
(
name
=
'image_summary'
,
data
=
image_previews
,
step
=
current_step
,
max_outputs
=
max_images
)
return
summaries
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/hooks/__init__.py
0 → 100644
View file @
c320b6ef
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
mask_rcnn.hooks.ckpt_hook
import
CheckpointSaverHook
from
mask_rcnn.hooks.pretrained_restore_hook
import
PretrainedWeightsLoadingHook
__all__
=
[
"CheckpointSaverHook"
,
"PretrainedWeightsLoadingHook"
,
]
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/hooks/ckpt_hook.py
0 → 100644
View file @
c320b6ef
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
tensorflow
as
tf
from
mask_rcnn.utils.logging_formatter
import
logging
__all__
=
[
"CheckpointSaverHook"
]
class
CheckpointSaverHook
(
tf
.
estimator
.
SessionRunHook
):
"""Saves checkpoints every N steps or seconds."""
def
__init__
(
self
,
checkpoint_dir
,
checkpoint_basename
=
"model.ckpt"
):
"""Initializes a `CheckpointSaverHook`.
Args:
checkpoint_dir: `str`, base directory for the checkpoint files.
checkpoint_basename: `str`, base name for the checkpoint files.
Raises:
ValueError: One of `save_steps` or `save_secs` should be set.
ValueError: At most one of `saver` or `scaffold` should be set.
"""
logging
.
info
(
"Create CheckpointSaverHook."
)
self
.
_saver
=
None
self
.
_checkpoint_dir
=
checkpoint_dir
self
.
_save_path
=
os
.
path
.
join
(
checkpoint_dir
,
checkpoint_basename
)
self
.
_steps_per_run
=
1
self
.
_is_initialized
=
False
self
.
_global_step_tensor
=
None
self
.
_summary_writer
=
None
def
_set_steps_per_run
(
self
,
steps_per_run
):
self
.
_steps_per_run
=
steps_per_run
def
begin
(
self
):
self
.
_global_step_tensor
=
tf
.
compat
.
v1
.
train
.
get_or_create_global_step
()
self
.
_saver
=
tf
.
compat
.
v1
.
train
.
Saver
()
from
tensorflow.python.training
import
summary_io
self
.
_summary_writer
=
summary_io
.
SummaryWriterCache
.
get
(
self
.
_checkpoint_dir
)
if
self
.
_global_step_tensor
is
None
:
raise
RuntimeError
(
"Global step should be created to use CheckpointSaverHook."
)
def
after_create_session
(
self
,
session
,
coord
):
if
not
self
.
_is_initialized
:
global_step
=
session
.
run
(
self
.
_global_step_tensor
)
from
tensorflow.python.keras.backend
import
get_graph
default_graph
=
get_graph
()
# We do write graph and saver_def at the first call of before_run.
# We cannot do this in begin, since we let other hooks to change graph and
# add variables in begin. Graph is finalized after all begin calls.
tf
.
io
.
write_graph
(
default_graph
.
as_graph_def
(
add_shapes
=
True
),
self
.
_checkpoint_dir
,
"graph.pbtxt"
)
saver_def
=
self
.
_saver
.
saver_def
from
tensorflow.python.framework
import
meta_graph
meta_graph_def
=
meta_graph
.
create_meta_graph_def
(
graph_def
=
default_graph
.
as_graph_def
(
add_shapes
=
True
),
saver_def
=
saver_def
)
self
.
_summary_writer
.
add_graph
(
default_graph
)
self
.
_summary_writer
.
add_meta_graph
(
meta_graph_def
)
# The checkpoint saved here is the state at step "global_step".
self
.
_save
(
session
,
global_step
)
self
.
_is_initialized
=
True
def
end
(
self
,
session
):
last_step
=
session
.
run
(
self
.
_global_step_tensor
)
self
.
_save
(
session
,
last_step
)
def
_save
(
self
,
session
,
step
):
"""Saves the latest checkpoint, returns should_stop."""
logging
.
info
(
"Saving checkpoints for %d into %s."
,
step
,
self
.
_save_path
)
self
.
_saver
.
save
(
session
,
self
.
_save_path
,
global_step
=
step
)
self
.
_summary_writer
.
add_session_log
(
tf
.
compat
.
v1
.
SessionLog
(
status
=
tf
.
compat
.
v1
.
SessionLog
.
CHECKPOINT
,
checkpoint_path
=
self
.
_save_path
),
step
)
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/hooks/logging_hook.py
0 → 100644
View file @
c320b6ef
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
copy
import
operator
import
time
import
numpy
as
np
import
tensorflow
as
tf
from
distutils.version
import
LooseVersion
from
mask_rcnn.utils.logging_formatter
import
logging
from
mask_rcnn.utils
import
meters
from
mask_rcnn.utils.decorators
import
atexit_hook
from
mask_rcnn.utils.distributed_utils
import
MPI_is_distributed
from
mask_rcnn.utils.distributed_utils
import
MPI_rank_and_size
from
mask_rcnn.utils.distributed_utils
import
MPI_size
from
mask_rcnn.utils.logging_backend
import
LoggingBackend
from
mask_rcnn.utils.logging_backend
import
RuntimeMode
from
mask_rcnn.utils.metric_tracking
import
clear_registered_metrics
from
mask_rcnn.utils.metric_tracking
import
TF_METRICS
from
mask_rcnn.utils.metric_tracking
import
KERAS_MODELS
from
mask_rcnn.utils.lazy_imports
import
LazyImport
hvd
=
LazyImport
(
"horovod.tensorflow"
)
__all__
=
[
"AutoLoggingHook"
]
@
atexit_hook
class
_AutoLoggingHook
(
tf
.
estimator
.
SessionRunHook
):
def
__init__
(
self
,
log_every_n_steps
=
200
,
warmup_steps
=
500
,
is_training
=
True
):
"""
AutoLogging Hook for Tensorflow
:param log_every_n_steps: log will be output on the console every N steps
:param warmup_steps: integers, numbers of steps considered as warmup
:param is_training: boolean
"""
self
.
_logging_proxy
=
LoggingBackend
()
self
.
_initialized
=
False
self
.
_metrics
=
copy
.
copy
(
TF_METRICS
)
self
.
_batch_size_tensor
=
None
self
.
_AMP_steps_since_last_loss_scale
=
None
self
.
_AMP_loss_scale_tensor
=
None
self
.
_current_step
=
None
self
.
_amp_steps_non_skipped
=
None
self
.
_warmup_steps
=
warmup_steps
self
.
_log_every_n_steps
=
log_every_n_steps
self
.
_step_t0
=
None
self
.
_session_t0
=
None
self
.
_session_run_times
=
list
()
self
.
_global_step_tensor
=
None
self
.
_is_training
=
is_training
self
.
_runtime_mode
=
RuntimeMode
.
TRAIN
if
is_training
else
RuntimeMode
.
VALIDATION
self
.
_model_throughput
=
meters
.
MovingAverageMeter
(
window_size
=
1000
)
self
.
_model_stats
=
None
self
.
_n_gpus
=
None
def
__atexit__
(
self
):
if
self
.
_initialized
:
total_processing_time
=
int
(
np
.
sum
(
self
.
_session_run_times
))
try
:
avg_throughput
=
self
.
_model_throughput
.
read
()
except
ValueError
:
avg_throughput
=
-
1
self
.
_logging_proxy
.
log_summary
(
is_train
=
self
.
_is_training
,
total_steps
=
self
.
_current_step
,
total_processing_time
=
total_processing_time
,
avg_throughput
=
avg_throughput
)
metric_data
=
dict
()
for
key
,
value
in
self
.
_metrics
.
items
():
try
:
metric_data
[
key
]
=
value
[
"aggregator"
].
read
()
except
ValueError
:
pass
self
.
_logging_proxy
.
log_final_metrics
(
metric_data
=
metric_data
,
runtime_mode
=
self
.
_runtime_mode
)
def
begin
(
self
):
"""Called once before using the session.
When called, the default graph is the one that will be launched in the
session. The hook can modify the graph by adding new operations to it.
After the `begin()` call the graph will be finalized and the other callbacks
can not modify the graph anymore. Second call of `begin()` on the same
graph, should not change the graph.
"""
from
tensorflow.python.keras.backend
import
get_graph
_graph
=
get_graph
()
try
:
self
.
_batch_size_tensor
=
None
for
tensor
in
_graph
.
as_graph_def
().
node
:
if
"IteratorGetNext"
in
tensor
.
name
:
_input_tensor
=
_graph
.
get_tensor_by_name
(
tensor
.
name
+
":0"
)
try
:
self
.
_batch_size_tensor
=
tf
.
shape
(
input
=
_input_tensor
)[
0
]
except
TypeError
:
# Ragged Tensor
self
.
_batch_size_tensor
=
_input_tensor
.
bounding_shape
()[
0
]
break
else
:
raise
RuntimeError
(
"Tensor `{}` could not be found. "
"Make sure you are using tf.data API"
.
format
(
"IteratorGetNext"
)
)
except
RuntimeError
:
raise
except
Exception
as
e
:
raise
RuntimeError
(
"Impossible to fetch the tensor: `IteratorGetNext`. Make sure you are using tf.data API."
)
from
e
self
.
_global_step_tensor
=
tf
.
compat
.
v1
.
train
.
get_or_create_global_step
()
try
:
self
.
_AMP_loss_scale_tensor
=
_graph
.
get_tensor_by_name
(
"current_loss_scale/Read/ReadVariableOp:0"
)
self
.
_AMP_steps_since_last_loss_scale
=
_graph
.
get_tensor_by_name
(
"current_loss_scale/Read/ReadVariableOp:0"
)
except
RuntimeError
:
raise
# TF-AMP is not activated
except
Exception
:
pass
# if self._is_training:
# self.runtime_data["params_count"] = tf.reduce_sum(
# [tf.reduce_prod(v.shape) for v in tf.trainable_variables()]
# )
def
end
(
self
,
session
):
# pylint: disable=unused-argument
"""Called at the end of session.
The `session` argument can be used in case the hook wants to run final ops,
such as saving a last checkpoint.
If `session.run()` raises exception other than OutOfRangeError or
StopIteration then `end()` is not called.
Note the difference between `end()` and `after_run()` behavior when
`session.run()` raises OutOfRangeError or StopIteration. In that case
`end()` is called but `after_run()` is not called.
Args:
session: A TensorFlow Session that will be soon closed.
"""
self
.
_session_run_times
.
append
(
time
.
time
()
-
self
.
_session_t0
)
def
after_create_session
(
self
,
session
,
coord
):
# pylint: disable=unused-argument3
"""Called when new TensorFlow session is created.
This is called to signal the hooks that a new session has been created. This
has two essential differences with the situation in which `begin` is called:
* When this is called, the graph is finalized and ops can no longer be added
to the graph.
* This method will also be called as a result of recovering a wrapped
session, not only at the beginning of the overall session.
Args:
session: A TensorFlow Session that has been created.
coord: A Coordinator object which keeps track of all threads.
"""
# ========= Collect the number of GPUs ======== #
if
self
.
_is_training
:
if
MPI_is_distributed
():
self
.
_n_gpus
=
MPI_size
()
elif
tf
.
distribute
.
has_strategy
():
self
.
_n_gpus
=
tf
.
distribute
.
get_strategy
().
num_replicas_in_sync
else
:
self
.
_n_gpus
=
1
else
:
self
.
_n_gpus
=
1
# =========== TensorFlow Hook Setup =========== #
_global_step
,
_metrics
=
setup_tensorflow_hook
(
sess
=
session
,
logging_proxy
=
self
.
_logging_proxy
,
is_training
=
self
.
_is_training
,
is_initialized
=
self
.
_initialized
)
if
_global_step
>=
0
:
self
.
_current_step
=
self
.
_amp_steps_non_skipped
=
_global_step
self
.
_metrics
.
update
(
_metrics
)
if
not
self
.
_is_training
:
for
metric_name
in
self
.
_metrics
.
keys
():
self
.
_metrics
[
metric_name
][
"aggregator"
].
reset
()
self
.
_initialized
=
True
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #
self
.
_session_t0
=
time
.
time
()
def
before_run
(
self
,
run_context
):
# pylint: disable=unused-argument
"""Called before each call to run().
You can return from this call a `SessionRunArgs` object indicating ops or
tensors to add to the upcoming `run()` call. These ops/tensors will be run
together with the ops/tensors originally passed to the original run() call.
The run args you return can also contain feeds to be added to the run()
call.
The `run_context` argument is a `SessionRunContext` that provides
information about the upcoming `run()` call: the originally requested
op/tensors, the TensorFlow Session.
At this point graph is finalized and you can not add ops.
Args:
run_context: A `SessionRunContext` object.
Returns:
None or a `SessionRunArgs` object.
"""
self
.
_current_step
+=
1
request_fetches
=
{
"global_step"
:
self
.
_global_step_tensor
,
"metrics"
:
dict
(),
"batch_size"
:
self
.
_batch_size_tensor
}
if
self
.
_is_training
and
self
.
_AMP_steps_since_last_loss_scale
is
not
None
:
request_fetches
[
"AMP"
]
=
{
"steps_since_last_loss_scale"
:
self
.
_AMP_steps_since_last_loss_scale
,
"current_loss_scale"
:
self
.
_AMP_loss_scale_tensor
,
}
if
self
.
_current_step
%
self
.
_log_every_n_steps
==
0
:
for
key
,
value
in
self
.
_metrics
.
items
():
request_fetches
[
"metrics"
][
key
]
=
value
[
"tensor"
]
self
.
_step_t0
=
time
.
time
()
return
tf
.
estimator
.
SessionRunArgs
(
request_fetches
)
def
after_run
(
self
,
run_context
,
run_values
):
# pylint: disable=unused-argument
"""Called after each call to run().
The `run_values` argument contains results of requested ops/tensors by
`before_run()`.
The `run_context` argument is the same one send to `before_run` call.
`run_context.request_stop()` can be called to stop the iteration.
If `session.run()` raises any exceptions then `after_run()` is not called.
Args:
run_context: A `SessionRunContext` object.
run_values: A SessionRunValues object.
"""
batch_time
=
time
.
time
()
-
self
.
_step_t0
_global_step
=
run_values
.
results
[
"global_step"
]
if
self
.
_is_training
and
self
.
_AMP_steps_since_last_loss_scale
is
not
None
:
try
:
AMP_steps_since_last_loss_scale
=
run_values
.
results
[
"AMP"
][
"steps_since_last_loss_scale"
]
AMP_loss_scale
=
run_values
.
results
[
"AMP"
][
"current_loss_scale"
]
except
KeyError
:
AMP_steps_since_last_loss_scale
=
None
AMP_loss_scale
=
None
if
AMP_steps_since_last_loss_scale
is
not
None
:
# Step has been skipped
if
_global_step
!=
(
self
.
_amp_steps_non_skipped
+
1
):
logging
.
warning
(
"AMP - Training iteration `#{step}` has been skipped and loss rescaled. "
"New Loss Scale: {loss_scale}
\n
"
.
format
(
step
=
self
.
_current_step
,
loss_scale
=
AMP_loss_scale
)
)
else
:
self
.
_amp_steps_non_skipped
+=
1
if
AMP_steps_since_last_loss_scale
==
0
:
logging
.
warning
(
"AMP - Training iteration `#{step}` - Loss scale has been automatically increased. "
"New Loss Scale: {loss_scale}
\n
"
.
format
(
step
=
self
.
_current_step
,
loss_scale
=
AMP_loss_scale
)
)
else
:
AMP_steps_since_last_loss_scale
=
None
AMP_loss_scale
=
None
def
get_model_throughput
():
gpu_batch_size
=
run_values
.
results
[
"batch_size"
]
return
gpu_batch_size
/
batch_time
*
self
.
_n_gpus
# def get_model_stats():
# return get_tf_model_statistics(batch_size=run_values.results["batch_size"], scope_name=None)
#
# if self._model_stats is None:
# self._model_stats = get_model_stats()
is_log_step
=
self
.
_current_step
%
self
.
_log_every_n_steps
==
0
if
is_log_step
:
if
self
.
_current_step
>
self
.
_warmup_steps
:
try
:
model_throughput
=
self
.
_model_throughput
.
read
()
except
ValueError
:
model_throughput
=
get_model_throughput
()
else
:
model_throughput
=
get_model_throughput
()
self
.
_logging_proxy
.
log_step
(
iteration
=
self
.
_current_step
,
throughput
=
model_throughput
,
gpu_stats
=
[])
self
.
_logging_proxy
.
log_amp_runtime
(
current_loss_scale
=
AMP_loss_scale
,
steps_non_skipped
=
_global_step
,
steps_since_last_scale
=
AMP_steps_since_last_loss_scale
,
)
metric_data
=
dict
()
for
name
,
value
in
sorted
(
run_values
.
results
[
"metrics"
].
items
(),
key
=
operator
.
itemgetter
(
0
)):
self
.
_metrics
[
name
][
"aggregator"
].
record
(
value
)
metric_data
[
name
]
=
self
.
_metrics
[
name
][
"aggregator"
].
read
()
self
.
_logging_proxy
.
log_metrics
(
metric_data
=
metric_data
,
iteration
=
self
.
_current_step
,
runtime_mode
=
self
.
_runtime_mode
)
print
()
# Visual Spacing
elif
self
.
_current_step
>
self
.
_warmup_steps
:
# Do not store speed for log step due to additional fetches
self
.
_model_throughput
.
record
(
get_model_throughput
())
class
_SlaveGPUsHook
(
tf
.
estimator
.
SessionRunHook
):
def
after_create_session
(
self
,
session
,
coord
):
with
logging
.
temp_verbosity
(
logging
.
INFO
):
# Do not warn user about metric cleaning
clear_registered_metrics
()
def
real_autologging_hook
(
*
args
,
**
kwargs
):
replica_id
=
tf
.
distribute
.
get_replica_context
().
replica_id_in_sync_group
# Do not set a logging hook for GPUs != 0
if
MPI_rank_and_size
()[
0
]
!=
0
or
(
isinstance
(
replica_id
,
tf
.
Tensor
)
and
tf
.
get_static_value
(
replica_id
)
!=
0
):
return
_SlaveGPUsHook
()
else
:
_
=
LoggingBackend
()
# Making sure the backend is defined before any hook due to __atexit__ hook
return
_AutoLoggingHook
(
*
args
,
**
kwargs
)
def
collect_registered_metrics
():
if
TF_METRICS
:
# if not empty
metrics
=
copy
.
copy
(
TF_METRICS
)
# Do not warn user about metric cleaning
with
logging
.
temp_verbosity
(
logging
.
INFO
):
clear_registered_metrics
()
return
metrics
else
:
return
dict
()
def
get_model_variables
():
"""return model variables: global variables without optimizer's variables"""
return
[
# yapf: disable
var
for
var
in
tf
.
compat
.
v1
.
global_variables
()
if
(
var
.
name
[
-
11
:]
not
in
"/Momentum:0"
and
var
.
name
[
-
11
:]
not
in
"/Adadelta:0"
and
var
.
name
[
-
13
:]
not
in
"/Adadelta_1:0"
and
var
.
name
[
-
7
:]
not
in
"/Adam:0"
and
var
.
name
[
-
9
:]
not
in
"/Adam_1:0"
and
var
.
name
[
-
10
:]
not
in
"/Adagrad:0"
and
var
.
name
[
10
:]
not
in
"/RMSProp:0"
and
var
.
name
[
-
12
:]
not
in
"/RMSProp_1:0"
and
var
.
name
[
-
16
:]
not
in
"/LARSOptimizer:0"
)
# yapf: enable
]
def
get_trainable_variables
():
"""Get a list of trainable TensorFlow variables.
Parameters
----------
train_only : boolean
If True, only get the trainable variables.
Returns
-------
list of Tensor
A list of trainable TensorFlow variables
Examples
--------
"""
if
KERAS_MODELS
or
LooseVersion
(
tf
.
__version__
)
>=
LooseVersion
(
"2.0.0"
):
logging
.
warning
(
"In TF2.x, only trainable variables created with Keras Models are captured for logging.
\n
"
"In TF1.x, if any keras model is defined. Only variables created inside Keras Models will be logged."
)
var_list
=
list
()
for
model
in
KERAS_MODELS
:
var_list
.
extend
(
model
.
trainable_variables
)
# Keep only a list of unique variables (remove potential duplicates)
var_list
=
list
(
set
(
var_list
))
# clearing the list of Keras Model to avoid memory leaks
KERAS_MODELS
.
clear
()
return
[
var
for
var
in
sorted
(
var_list
,
key
=
lambda
v
:
v
.
name
)]
else
:
# return tf.trainable_variables() # deprecated in TF2.x
from
tensorflow.python.keras.backend
import
get_graph
return
get_graph
().
get_collection
(
'trainable_variables'
)
def
setup_tensorflow_hook
(
sess
,
logging_proxy
,
is_training
,
is_initialized
):
global_step
=
-
1
if
is_training
:
if
not
is_initialized
:
_global_step_tensor
=
tf
.
compat
.
v1
.
train
.
get_or_create_global_step
()
global_step
=
sess
.
run
(
_global_step_tensor
)
trainable_variables
=
get_trainable_variables
()
def
count_weights_in_varlist
(
var_list
):
return
np
.
sum
([
np
.
prod
(
s
.
get_shape
())
for
s
in
var_list
])
logging_proxy
.
log_git_status
()
logging_proxy
.
log_model_statistics
(
model_statistics
=
{
"# Trainable Weights"
:
"{:,}"
.
format
(
int
(
count_weights_in_varlist
(
trainable_variables
))),
"# Model Weights"
:
"{:,}"
.
format
(
int
(
count_weights_in_varlist
(
get_model_variables
()))),
}
)
logging_proxy
.
log_trainable_variables
([(
var
.
name
,
var
.
get_shape
())
for
var
in
trainable_variables
])
else
:
if
not
is_initialized
:
global_step
=
0
metrics
=
collect_registered_metrics
()
logging_proxy
.
log_runtime
(
is_train
=
is_training
)
return
global_step
,
metrics
AutoLoggingHook
=
lambda
*
args
,
**
kwargs
:
real_autologging_hook
(
*
args
,
**
kwargs
)
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/hooks/pretrained_restore_hook.py
0 → 100644
View file @
c320b6ef
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
sys
import
re
import
tensorflow
as
tf
from
mask_rcnn.utils.logging_formatter
import
logging
from
mask_rcnn.utils.distributed_utils
import
MPI_rank
__all__
=
[
"PretrainedWeightsLoadingHook"
]
# pylint: disable=protected-access
# Currently variable_scope doesn't provide very good APIs to access
# all variables under scope and retrieve and check existing scopes.
def
get_variable_full_name
(
var
):
"""Returns the full name of a variable.
For normal Variables, this is the same as the var.op.name. For
sliced or PartitionedVariables, this name is the same for all the
slices/partitions. In both cases, this is normally the name used in
a checkpoint file.
Args:
var: A `Variable` object.
Returns:
A string that is the full name.
"""
if
var
.
_save_slice_info
:
return
var
.
_save_slice_info
.
full_name
else
:
return
var
.
op
.
name
def
assign_from_checkpoint
(
model_path
,
var_list
,
ignore_missing_vars
=
False
):
"""Creates an operation to assign specific variables from a checkpoint.
Args:
model_path: The full path to the model checkpoint. To get latest checkpoint
use `model_path = tf.train.latest_checkpoint(checkpoint_dir)`
var_list: A list of (possibly partitioned) `Variable` objects or a
dictionary mapping names in the checkpoint to the corresponding variables
or list of variables to initialize from that checkpoint value. For
partitioned Variables, the name in the checkpoint must be the full
variable, not the name of the partitioned variable, eg. "my_var" rather
than "my_var/part_4". If empty, returns no_op(), {}.
ignore_missing_vars: Boolean, if True ignore variables missing in the
checkpoint with a warning instead of failing.
Returns:
the restore_op and the feed_dict that need to be run to restore var_list.
Raises:
ValueError: If `ignore_missing_vars` is False and the checkpoint specified
at `model_path` is missing one of the variables in `var_list`.
"""
# Normalize var_list into a dictionary mapping names in the
# checkpoint to the list of variables to initialize from that
# checkpoint variable. Sliced (including partitioned) variables will
# end up under the same key.
grouped_vars
=
{}
if
isinstance
(
var_list
,
(
tuple
,
list
)):
for
var
in
var_list
:
ckpt_name
=
get_variable_full_name
(
var
)
if
ckpt_name
not
in
grouped_vars
:
grouped_vars
[
ckpt_name
]
=
[]
grouped_vars
[
ckpt_name
].
append
(
var
)
else
:
for
ckpt_name
,
value
in
var_list
.
items
():
if
isinstance
(
value
,
(
tuple
,
list
)):
grouped_vars
[
ckpt_name
]
=
value
else
:
grouped_vars
[
ckpt_name
]
=
[
value
]
# Read each checkpoint entry. Create a placeholder variable and
# add the (possibly sliced) data from the checkpoint to the feed_dict.
reader
=
tf
.
compat
.
v1
.
train
.
NewCheckpointReader
(
model_path
)
feed_dict
=
{}
assign_ops
=
[]
for
ckpt_name
in
grouped_vars
:
if
not
reader
.
has_tensor
(
ckpt_name
):
log_str
=
'Checkpoint is missing variable [%s]'
%
ckpt_name
if
ignore_missing_vars
:
logging
.
warning
(
log_str
)
continue
else
:
raise
ValueError
(
log_str
)
ckpt_value
=
reader
.
get_tensor
(
ckpt_name
)
for
var
in
grouped_vars
[
ckpt_name
]:
placeholder_tensor
=
tf
.
compat
.
v1
.
placeholder
(
dtype
=
var
.
dtype
.
base_dtype
,
shape
=
var
.
get_shape
(),
name
=
'placeholder/'
+
var
.
op
.
name
)
assign_ops
.
append
(
var
.
assign
(
placeholder_tensor
))
if
not
var
.
_save_slice_info
:
if
var
.
get_shape
()
!=
ckpt_value
.
shape
:
raise
ValueError
(
'Total size of new array must be unchanged for %s '
'lh_shape: [%s], rh_shape: [%s]'
%
(
ckpt_name
,
str
(
ckpt_value
.
shape
),
str
(
var
.
get_shape
())))
feed_dict
[
placeholder_tensor
]
=
ckpt_value
.
reshape
(
ckpt_value
.
shape
)
else
:
slice_dims
=
zip
(
var
.
_save_slice_info
.
var_offset
,
var
.
_save_slice_info
.
var_shape
)
slice_dims
=
[(
start
,
start
+
size
)
for
(
start
,
size
)
in
slice_dims
]
slice_dims
=
[
slice
(
*
x
)
for
x
in
slice_dims
]
slice_value
=
ckpt_value
[
slice_dims
]
slice_value
=
slice_value
.
reshape
(
var
.
_save_slice_info
.
var_shape
)
feed_dict
[
placeholder_tensor
]
=
slice_value
print_op
=
tf
.
print
(
"[GPU %02d] Restoring pretrained weights (%d Tensors) from: %s"
%
(
MPI_rank
(),
len
(
assign_ops
),
model_path
),
output_stream
=
sys
.
stdout
)
with
tf
.
control_dependencies
([
print_op
]):
assign_op
=
tf
.
group
(
*
assign_ops
)
return
assign_op
,
feed_dict
def
build_assigment_map
(
prefix
=
None
,
skip_variables_regex
=
None
):
"""Generate assigment map for loading checkpoints."""
all_vars
=
tf
.
compat
.
v1
.
get_collection
(
tf
.
compat
.
v1
.
GraphKeys
.
GLOBAL_VARIABLES
,
scope
=
prefix
)
if
not
prefix
:
prefix
=
''
assignment_map
=
{}
for
var
in
all_vars
:
var_name
=
var
.
name
if
(
var_name
[
-
11
:]
in
"/Momentum:0"
or
var_name
[
-
11
:]
in
"/Adadelta:0"
or
var_name
[
-
13
:]
in
"/Adadelta_1:0"
or
var_name
[
-
7
:]
in
"/Adam:0"
or
var_name
[
-
9
:]
in
"/Adam_1:0"
or
var_name
[
-
10
:]
in
"/Adagrad:0"
or
var_name
[
-
10
:]
in
"/RMSProp:0"
or
var_name
[
-
12
:]
in
"/RMSProp_1:0"
or
var_name
[
-
16
:]
in
"/LARSOptimizer:0"
):
continue
# Trim the index of the variable.
if
':'
in
var_name
:
var_name
=
var_name
[:
var_name
.
rindex
(
':'
)]
if
skip_variables_regex
and
re
.
match
(
skip_variables_regex
,
var_name
[
len
(
prefix
):]):
continue
assignment_map
[
var_name
[
len
(
prefix
):]]
=
var
# assignment_map[var_name] = var
return
assignment_map
class
PretrainedWeightsLoadingHook
(
tf
.
estimator
.
SessionRunHook
):
def
__init__
(
self
,
prefix
,
checkpoint_path
,
skip_variables_regex
=
None
):
self
.
_prefix
=
prefix
self
.
_checkpoint_path
=
checkpoint_path
self
.
_skip_variables_regex
=
skip_variables_regex
self
.
_is_initialized
=
False
self
.
_init_op
=
None
self
.
_init_feed_dict
=
None
def
begin
(
self
):
vars_to_load
=
build_assigment_map
(
prefix
=
self
.
_prefix
,
skip_variables_regex
=
self
.
_skip_variables_regex
)
self
.
_init_op
,
self
.
_init_feed_dict
=
assign_from_checkpoint
(
model_path
=
self
.
_checkpoint_path
,
var_list
=
vars_to_load
,
ignore_missing_vars
=
False
)
def
after_create_session
(
self
,
session
,
coord
=
None
):
if
not
self
.
_is_initialized
:
session
.
run
(
self
.
_init_op
,
feed_dict
=
self
.
_init_feed_dict
)
logging
.
info
(
"Pretrained weights loaded with success...
\n
"
)
self
.
_is_initialized
=
True
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/hyperparameters/__init__.py
0 → 100644
View file @
c320b6ef
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/hyperparameters/cmdline_utils.py
0 → 100644
View file @
c320b6ef
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Defining common model params used across all the models."""
from
absl
import
flags
def
define_hparams_flags
():
flags
.
DEFINE_string
(
'log_path'
,
default
=
"./mrcnn.json"
,
help
=
(
'The path where dllogger json file will be saved. Please include the'
' name of the json file as well.'
)
)
flags
.
DEFINE_string
(
'data_dir'
,
default
=
None
,
help
=
(
'The directory where the input data is stored. Please see the model'
' specific README.md for the expected data format.'
)
)
flags
.
DEFINE_string
(
'checkpoint'
,
default
=
''
,
help
=
'Checkpoint filepath'
)
flags
.
DEFINE_integer
(
'eval_batch_size'
,
default
=
8
,
help
=
'Batch size for evaluation.'
)
flags
.
DEFINE_bool
(
'eval_after_training'
,
default
=
True
,
help
=
'Run one eval after the training finishes.'
)
flags
.
DEFINE_integer
(
'eval_samples'
,
default
=
5000
,
help
=
'Number of training steps'
)
flags
.
DEFINE_bool
(
'include_groundtruth_in_features'
,
default
=
False
,
help
=
(
'If `val_json_file` is not provided, one can also read groundtruth'
' from input by setting `include_groundtruth_in_features`=True'
)
)
# Gradient clipping is a fairly coarse heuristic to stabilize training.
# This model clips the gradient by its L2 norm globally (i.e., across
# all variables), using a threshold obtained from multiplying this
# parameter with sqrt(number_of_weights), to have a meaningful value
# across both training phases and different sizes of imported modules.
# Refer value: 0.02, for 25M weights, yields clip norm 10.
# Zero or negative number means no clipping.
flags
.
DEFINE_float
(
"global_gradient_clip_ratio"
,
default
=-
1.0
,
help
=
"Global Gradient Clipping Ratio"
)
flags
.
DEFINE_float
(
"init_learning_rate"
,
default
=
2.5e-3
,
help
=
"Initial Learning Rate"
)
flags
.
DEFINE_float
(
"warmup_learning_rate"
,
default
=
0.
,
help
=
"Warmup Learning Rate Decay Factor"
)
flags
.
DEFINE_bool
(
'finetune_bn'
,
False
,
'is batchnorm training mode'
)
flags
.
DEFINE_float
(
"l2_weight_decay"
,
default
=
1e-4
,
help
=
"l2 regularization weight"
)
flags
.
DEFINE_string
(
'mode'
,
default
=
'train_and_eval'
,
help
=
'Mode to run: train or eval'
)
flags
.
DEFINE_string
(
'model_dir'
,
default
=
None
,
help
=
'The directory where the model and training/evaluation summaries are stored.'
)
flags
.
DEFINE_float
(
"momentum"
,
default
=
0.9
,
help
=
"Optimizer Momentum"
)
flags
.
DEFINE_integer
(
'num_steps_per_eval'
,
default
=
2500
,
help
=
'Number of steps per evaluation epoch.'
)
flags
.
DEFINE_integer
(
'save_checkpoints_steps'
,
default
=
2500
,
help
=
'Save a checkpoint every N steps.'
)
flags
.
DEFINE_integer
(
'seed'
,
default
=
None
,
help
=
'Set a debug seed for reproducibility.'
)
flags
.
DEFINE_integer
(
'train_batch_size'
,
default
=
2
,
help
=
'Batch size for training.'
)
flags
.
DEFINE_integer
(
'total_steps'
,
default
=
938240
,
help
=
(
'The number of steps to use for training. This flag'
' should be adjusted according to the --train_batch_size flag.'
)
)
flags
.
DEFINE_list
(
'learning_rate_decay_levels'
,
default
=
[
'0.1'
,
'0.01'
],
help
=
(
'The learning rate decay levels which modify the learning rate using the formula:'
' `lr = decay * init_lr`. Decay factor applied at learning_rate_steps.'
)
)
flags
.
DEFINE_list
(
'learning_rate_steps'
,
default
=
[
'480000'
,
'640000'
],
help
=
(
'The steps at which learning rate changes. This flag'
' should be adjusted according to the --train_batch_size flag.'
)
)
flags
.
DEFINE_integer
(
'warmup_steps'
,
default
=
1000
,
help
=
'The number of steps to use warmup learning rate for'
)
flags
.
DEFINE_bool
(
'amp'
,
default
=
False
,
help
=
'Enable automatic mixed precision'
)
flags
.
DEFINE_bool
(
'use_batched_nms'
,
default
=
False
,
help
=
'Enable Batched NMS at inference.'
)
flags
.
DEFINE_bool
(
'use_custom_box_proposals_op'
,
default
=
False
,
help
=
'Use GenerateBoundingBoxProposals op.'
)
flags
.
DEFINE_bool
(
'use_fake_data'
,
False
,
'Use fake input.'
)
flags
.
DEFINE_bool
(
'use_tf_distributed'
,
default
=
False
,
help
=
'Use tensorflow distributed API'
)
flags
.
DEFINE_bool
(
'xla'
,
default
=
False
,
help
=
'Enable XLA JIT Compiler.'
)
flags
.
DEFINE_string
(
'training_file_pattern'
,
default
=
""
,
help
=
'TFRecords file pattern for the training files'
)
flags
.
DEFINE_string
(
'validation_file_pattern'
,
default
=
""
,
help
=
'TFRecords file pattern for the validation files'
)
flags
.
DEFINE_string
(
'val_json_file'
,
default
=
""
,
help
=
'Filepath for the validation json file'
)
############################# TO BE REMOVED ###################################
flags
.
DEFINE_integer
(
'report_frequency'
,
default
=
None
,
help
=
'The amount of batches in between accuracy reports at evaluation time'
)
############################# TO BE REMOVED ###################################
############################### ISSUES TO FIX - FLAGS #############################"
# TODO: Remove when XLA at inference fixed
flags
.
DEFINE_bool
(
'allow_xla_at_inference'
,
default
=
False
,
help
=
'Enable XLA JIT Compiler at Inference'
)
return
flags
.
FLAGS
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/hyperparameters/flags_to_params.py
0 → 100644
View file @
c320b6ef
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Functions to override model parameters from command-line flags."""
from
mask_rcnn.hyperparameters
import
params_dict
ESSENTIAL_FLAGS
=
[
'tpu'
,
'data_dir'
,
'model_dir'
]
def
override_params_from_input_flags
(
params
,
input_flags
):
"""Update params dictionary with input flags.
Args:
params: ParamsDict object containing dictionary of model parameters.
input_flags: All the flags with non-null value of overridden model
parameters.
Returns:
ParamsDict object containing dictionary of model parameters.
"""
if
params
is
None
:
raise
ValueError
(
'Input dictionary is empty. It is expected to be loaded with default '
'values'
)
if
not
isinstance
(
params
,
params_dict
.
ParamsDict
):
raise
ValueError
(
'The base parameter set must be a ParamsDict, was: {}'
.
format
(
type
(
params
)))
essential_flag_dict
=
{}
for
key
in
ESSENTIAL_FLAGS
:
flag_value
=
input_flags
.
get_flag_value
(
key
,
None
)
if
flag_value
is
None
:
raise
ValueError
(
'Flag {} could not be None.'
.
format
(
key
))
else
:
essential_flag_dict
[
key
]
=
flag_value
params_dict
.
override_params_dict
(
params
,
essential_flag_dict
,
is_strict
=
False
)
normal_flag_dict
=
get_dictionary_from_flags
(
params
.
as_dict
(),
input_flags
)
params_dict
.
override_params_dict
(
params
,
normal_flag_dict
,
is_strict
=
False
)
return
params
def
get_dictionary_from_flags
(
params
,
input_flags
):
"""Generate dictionary from non-null flags.
Args:
params: Python dictionary of model parameters.
input_flags: All the flags with non-null value of overridden model
parameters.
Returns:
Python dict of overriding model parameters.
"""
flag_dict
=
{}
for
k
,
v
in
params
.
items
():
if
isinstance
(
v
,
dict
):
d
=
get_dictionary_from_flags
(
v
,
input_flags
)
flag_dict
[
k
]
=
d
else
:
try
:
flag_value
=
input_flags
.
get_flag_value
(
k
,
None
)
if
flag_value
is
not
None
:
flag_dict
[
k
]
=
flag_value
except
AttributeError
:
flag_dict
[
k
]
=
v
return
flag_dict
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/hyperparameters/hyperparameters.py
0 → 100644
View file @
c320b6ef
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
import
os
import
warnings
import
six
import
yaml
import
tensorflow
as
tf
from
mask_rcnn.utils.logging_formatter
import
logging
class
_Hyperparameters
(
object
):
"""_Hyperparameters class to generate final hparams from various inputs."""
def
__init__
(
self
,
default_hparams_file
,
specific_hparams_file
,
input_flags
,
hparams_overrides
):
"""Initialze and load parameter dictionary with different input sources.
Args:
default_hparams_file: YAML storing default values of all hyperparameters.
specific_hparams_file: YAML file storing accelerator specific values of
hyperparameters to override the default values.
input_flags: Command line flags values for hyperparameters. [This is
for backward compatibility, so that users passing hyperparameters as
regular flags should not run into trouble].
hparams_overrides: A kv string representing which hyperparameters need to
be override from the command-line.
Raises:
ValueError: Raised when 'default_hparams_file' is not readable.
"""
if
not
tf
.
io
.
gfile
.
exists
(
default_hparams_file
):
raise
ValueError
(
'Expected a valid path to a YAML file, which represents the default '
'hyperparameters file. {}'
.
format
(
default_hparams_file
)
)
self
.
_params
=
{}
self
.
_params_source
=
{}
self
.
_default_hparams_file
=
default_hparams_file
self
.
_specific_hparams_file
=
specific_hparams_file
self
.
_input_flags
=
input_flags
self
.
_hparams_overrides
=
hparams_overrides
def
get_parameters
(
self
,
log_params
):
"""Returns the dictionary loaded with final values of all hyperparameters.
Args:
log_params: Bool to specify if the hyperparameters final value need to be
logged or not.
Returns:
Python dictionary with all the final hyperparameters.
"""
self
.
_params
,
self
.
_params_source
=
load_from_file
(
self
.
_params
,
self
.
_params_source
,
self
.
_default_hparams_file
)
self
.
_params
,
self
.
_params_source
=
load_from_file
(
self
.
_params
,
self
.
_params_source
,
self
.
_specific_hparams_file
)
self
.
_params
,
self
.
_params_source
=
load_from_input_flags
(
self
.
_params
,
self
.
_params_source
,
self
.
_input_flags
)
self
.
_params
,
self
.
_params_source
=
load_from_hparams_overrides
(
self
.
_params
,
self
.
_params_source
,
self
.
_hparams_overrides
)
if
log_params
:
self
.
log_parameters
()
return
self
.
_params
def
log_parameters
(
self
):
"""Log the hyperparameters value along with the source of those values.
"""
params_log
=
''
for
k
in
self
.
_params
:
params_log
+=
k
+
':
\t
'
+
str
(
self
.
_params
[
k
])
params_log
+=
'
\t
['
+
self
.
_params_source
[
k
]
+
']
\n
'
logging
.
info
(
'
\n
Model hyperparameters [source]:
\n
%s'
,
params_log
)
def
load_from_file
(
params
,
params_source
,
file_path
):
"""Given a path to a YAML file, read the file and load it to dictionary.
Args:
params: Python dictionary of hyperparameters.
params_source: Python dictionary to record source of hyperparameters.
file_path: Python string containing path to file.
Returns:
Python dict of hyperparameters.
"""
if
file_path
is
None
:
return
params
,
params_source
if
not
tf
.
io
.
gfile
.
exists
(
file_path
):
warnings
.
warn
(
'Could not read Hyperparameter file : '
+
file_path
,
RuntimeWarning
)
return
params
,
params_source
with
tf
.
io
.
gfile
.
GFile
(
file_path
,
'r'
)
as
f
:
overrides
=
yaml
.
load
(
f
)
for
key
,
value
in
six
.
iteritems
(
overrides
):
params
[
key
]
=
value
params_source
[
key
]
=
os
.
path
.
basename
(
file_path
)
return
params
,
params_source
# TODO(amangu): Once global hyperparameter flags will be removed, we won't need
# this function. Remove this functions after implementing this.
def
load_from_input_flags
(
params
,
params_source
,
input_flags
):
"""Update params dictionary with input flags.
Args:
params: Python dictionary of hyperparameters.
params_source: Python dictionary to record source of hyperparameters.
input_flags: All the flags with non-null value of overridden
hyperparameters.
Returns:
Python dict of hyperparameters.
"""
if
params
is
None
:
raise
ValueError
(
'Input dictionary is empty. It is expected to be loaded with default '
'values'
)
if
not
isinstance
(
params
,
dict
):
raise
ValueError
(
'The base parameter set must be a Python dict, was: {}'
.
format
(
type
(
params
)))
for
key
in
params
:
flag_value
=
input_flags
.
get_flag_value
(
key
,
None
)
if
flag_value
is
not
None
:
params
[
key
]
=
flag_value
params_source
[
key
]
=
'Command-line flags'
return
params
,
params_source
# TODO(amangu): Add tests to verify different dtypes of params.
def
load_from_hparams_overrides
(
params
,
params_source
,
hparams_overrides
):
"""Given a dictionary of hyperparameters and a list of overrides, merge them.
Args:
params: Python dict containing a base hyperparameters set.
params_source: Python dictionary to record source of hyperparameters.
hparams_overrides: Python list of strings. This is a set of k=v overrides
for the hyperparameters in `params`; if `k=v1` in `params` but `k=v2` in
`hparams_overrides`, the second value wins and the value for `k` is `v2`.
Returns:
Python dict of hyperparameters.
"""
if
params
is
None
:
raise
ValueError
(
'Input dictionary is empty. It is expected to be loaded with default '
'values'
)
if
not
isinstance
(
params
,
dict
):
raise
ValueError
(
'The base hyperparameters set must be a Python dict, was: {}'
.
format
(
type
(
params
)))
if
hparams_overrides
is
None
:
return
params
,
params_source
if
isinstance
(
hparams_overrides
,
six
.
string_types
):
hparams_overrides
=
[
hparams_overrides
]
if
not
isinstance
(
hparams_overrides
,
list
):
raise
ValueError
(
'Expected that hparams_overrides would be `None`, a single string, or a'
' list of strings, was: {}'
.
format
(
type
(
hparams_overrides
))
)
for
kv_pair
in
hparams_overrides
:
if
not
isinstance
(
kv_pair
,
six
.
string_types
):
raise
ValueError
(
'Expected that hparams_overrides would contain Python list of strings,'
' but encountered an item: {}'
.
format
(
type
(
kv_pair
))
)
key
,
value
=
kv_pair
.
split
(
'='
)
parser
=
type
(
params
[
key
])
if
parser
is
bool
:
params
[
key
]
=
value
not
in
(
'0'
,
'False'
,
'false'
)
else
:
params
[
key
]
=
parser
(
value
)
params_source
[
key
]
=
'Command-line `hparams` flag'
return
params
,
params_source
def
get_hyperparameters
(
default_hparams_file
,
specific_hparams_file
,
input_flags
,
hparams_overrides
,
log_params
=
True
):
"""Single function to get hparams for any model using different sources.
Args:
default_hparams_file: YAML storing default values of all hyperparameters.
specific_hparams_file: YAML file storing accelerator specific values of
hyperparameters to override the default values.
input_flags: Command line flags values for hyperparameters. [This is
for backward compatibility, so that users passing hyperparameters as
regular flags should not run into trouble].
hparams_overrides: A kv string representing which hyperparameters need to
be override from the command-line.
log_params: Bool to specify if the hyperparameters final value need to be
logged or not.
Returns:
Python dictionary with all the final hyperparameters.
"""
parameter
=
_Hyperparameters
(
default_hparams_file
,
specific_hparams_file
,
input_flags
,
hparams_overrides
)
return
parameter
.
get_parameters
(
log_params
)
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/hyperparameters/mask_rcnn_params.py
0 → 100644
View file @
c320b6ef
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Parameters used to build Mask-RCNN model."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
argparse
import
Namespace
class
_Namespace
(
Namespace
):
def
values
(
self
):
return
self
.
__dict__
def
default_config
():
return
_Namespace
(
**
dict
(
# input pre-processing parameters
image_size
=
(
832
,
1344
),
augment_input_data
=
True
,
gt_mask_size
=
112
,
# dataset specific parameters
num_classes
=
91
,
# num_classes=81,
skip_crowd_during_training
=
True
,
use_category
=
True
,
# Region Proposal Network
rpn_positive_overlap
=
0.7
,
rpn_negative_overlap
=
0.3
,
rpn_batch_size_per_im
=
256
,
rpn_fg_fraction
=
0.5
,
rpn_min_size
=
0.
,
# Proposal layer.
batch_size_per_im
=
512
,
fg_fraction
=
0.25
,
fg_thresh
=
0.5
,
bg_thresh_hi
=
0.5
,
bg_thresh_lo
=
0.
,
# Faster-RCNN heads.
fast_rcnn_mlp_head_dim
=
1024
,
bbox_reg_weights
=
(
10.
,
10.
,
5.
,
5.
),
# Mask-RCNN heads.
include_mask
=
True
,
# whether or not to include mask branch. # ===== Not existing in MLPerf ===== #
mrcnn_resolution
=
28
,
# training
train_rpn_pre_nms_topn
=
2000
,
train_rpn_post_nms_topn
=
1000
,
train_rpn_nms_threshold
=
0.7
,
# evaluation
test_detections_per_image
=
100
,
test_nms
=
0.5
,
test_rpn_pre_nms_topn
=
1000
,
test_rpn_post_nms_topn
=
1000
,
test_rpn_nms_thresh
=
0.7
,
# model architecture
min_level
=
2
,
max_level
=
6
,
num_scales
=
1
,
aspect_ratios
=
[(
1.0
,
1.0
),
(
1.4
,
0.7
),
(
0.7
,
1.4
)],
anchor_scale
=
8.0
,
# localization loss
rpn_box_loss_weight
=
1.0
,
fast_rcnn_box_loss_weight
=
1.0
,
mrcnn_weight_loss_mask
=
1.0
,
# ---------- Training configurations ----------
# Skips loading variables from the resnet checkpoint. It is used for
# skipping nonexistent variables from the constructed graph. The list
# of loaded variables is constructed from the scope 'resnetX', where 'X'
# is depth of the resnet model. Supports regular expression.
skip_checkpoint_variables
=
'^NO_SKIP$'
,
# ---------- Eval configurations ----------
# Visualizes images and detection boxes on TensorBoard.
visualize_images_summary
=
False
,
))
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/hyperparameters/params_dict.py
0 → 100644
View file @
c320b6ef
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A parameter dictionary class which supports the nest structure."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
collections
import
copy
import
re
import
six
import
yaml
import
tensorflow
as
tf
# regex pattern that matches on key-value pairs in a comma-separated
# key-value pair string. It splits each k-v pair on the = sign, and
# matches on values that are within single quotes, double quotes, single
# values (e.g. floats, ints, etc.), and a lists within brackets.
_PARAM_RE
=
re
.
compile
(
r
"""
(?P<name>[a-zA-Z][\w\.]*) # variable name: "var" or "x"
\s*=\s*
((?P<val>\'[^\]]*\' # single quote
|
\"[^\]]*\" # double quote
|
[^,\[]* # single value
|
\[[^\]]*\])) # list of values
($|,\s*)"""
,
re
.
VERBOSE
)
class
ParamsDict
(
object
):
"""A hyperparameter container class."""
RESERVED_ATTR
=
[
'_locked'
,
'_restrictions'
]
def
__init__
(
self
,
default_params
=
None
,
restrictions
=
None
):
"""Instantiate a ParamsDict.
Instantiate a ParamsDict given a set of default parameters and a list of
restrictions. Upon initialization, it validates itself by checking all the
defined restrictions, and raise error if it finds inconsistency.
Args:
default_params: a Python dict or another ParamsDict object including the
default parameters to initialize.
restrictions: a list of strings, which define a list of restrictions to
ensure the consistency of different parameters internally. Each
restriction string is defined as a binary relation with a set of
operators, including {'==', '!=', '<', '<=', '>', '>='}.
"""
self
.
_locked
=
False
self
.
_restrictions
=
[]
if
restrictions
:
self
.
_restrictions
=
restrictions
if
default_params
is
None
:
default_params
=
{}
self
.
override
(
default_params
,
is_strict
=
False
)
self
.
validate
()
def
_set
(
self
,
k
,
v
):
if
isinstance
(
v
,
dict
):
self
.
__dict__
[
k
]
=
ParamsDict
(
v
)
else
:
self
.
__dict__
[
k
]
=
copy
.
deepcopy
(
v
)
def
__setattr__
(
self
,
k
,
v
):
"""Sets the value of the existing key.
Note that this does not allow directly defining a new key. Use the
`override` method with `is_strict=False` instead.
Args:
k: the key string.
v: the value to be used to set the key `k`.
Raises:
KeyError: if k is not defined in the ParamsDict.
"""
if
k
not
in
ParamsDict
.
RESERVED_ATTR
:
if
k
not
in
self
.
__dict__
.
keys
():
raise
KeyError
(
'The key `%{}` does not exist. '
'To extend the existing keys, use '
'`override` with `is_strict` = True.'
.
format
(
k
)
)
if
self
.
_locked
:
raise
ValueError
(
'The ParamsDict has been locked. '
'No change is allowed.'
)
self
.
_set
(
k
,
v
)
def
__getattr__
(
self
,
k
):
"""Gets the value of the existing key.
Args:
k: the key string.
Returns:
the value of the key.
Raises:
KeyError: if k is not defined in the ParamsDict.
"""
if
k
not
in
self
.
__dict__
.
keys
():
raise
KeyError
(
'The key `{}` does not exist. '
.
format
(
k
))
return
self
.
__dict__
[
k
]
def
override
(
self
,
override_params
,
is_strict
=
True
):
"""Override the ParamsDict with a set of given params.
Args:
override_params: a dict or a ParamsDict specifying the parameters to
be overridden.
is_strict: a boolean specifying whether override is strict or not. If
True, keys in `override_params` must be present in the ParamsDict.
If False, keys in `override_params` can be different from what is
currently defined in the ParamsDict. In this case, the ParamsDict will
be extended to include the new keys.
"""
if
self
.
_locked
:
raise
ValueError
(
'The ParamsDict has been locked. No change is allowed.'
)
if
isinstance
(
override_params
,
ParamsDict
):
override_params
=
override_params
.
as_dict
()
self
.
_override
(
override_params
,
is_strict
)
# pylint: disable=protected-access
def
_override
(
self
,
override_dict
,
is_strict
=
True
):
"""The implementation of `override`."""
for
k
,
v
in
six
.
iteritems
(
override_dict
):
if
k
in
ParamsDict
.
RESERVED_ATTR
:
raise
KeyError
(
'The key `%{}` is internally reserved. '
'Can not be overridden.'
)
if
k
not
in
self
.
__dict__
.
keys
():
if
is_strict
:
raise
KeyError
(
'The key `{}` does not exist. '
'To extend the existing keys, use '
'`override` with `is_strict` = False.'
.
format
(
k
)
)
else
:
self
.
_set
(
k
,
v
)
else
:
if
isinstance
(
v
,
dict
):
self
.
__dict__
[
k
].
_override
(
v
,
is_strict
)
# pylint: disable=protected-access
elif
isinstance
(
v
,
ParamsDict
):
self
.
__dict__
[
k
].
_override
(
v
.
as_dict
(),
is_strict
)
# pylint: disable=protected-access
else
:
self
.
__dict__
[
k
]
=
copy
.
deepcopy
(
v
)
def
lock
(
self
):
"""Makes the ParamsDict immutable."""
self
.
_locked
=
True
def
as_dict
(
self
):
"""Returns a dict representation of ParamsDict.
For the nested ParamsDict, a nested dict will be returned.
"""
params_dict
=
{}
for
k
,
v
in
six
.
iteritems
(
self
.
__dict__
):
if
k
not
in
ParamsDict
.
RESERVED_ATTR
:
if
isinstance
(
v
,
ParamsDict
):
params_dict
[
k
]
=
v
.
as_dict
()
else
:
params_dict
[
k
]
=
copy
.
deepcopy
(
v
)
return
params_dict
def
validate
(
self
):
"""Validate the parameters consistency based on the restrictions.
This method validates the internal consistency using the pre-defined list of
restrictions. A restriction is defined as a string which specfiies a binary
operation. The supported binary operations are {'==', '!=', '<', '<=', '>',
'>='}. Note that the meaning of these operators are consistent with the
underlying Python immplementation. Users should make sure the define
restrictions on their type make sense.
For example, for a ParamsDict like the following
```
a:
a1: 1
a2: 2
b:
bb:
bb1: 10
bb2: 20
ccc:
a1: 1
a3: 3
```
one can define two restrictions like this
['a.a1 == b.ccc.a1', 'a.a2 <= b.bb.bb2']
What it enforces are:
- a.a1 = 1 == b.ccc.a1 = 2
- a.a2 = 2 <= b.bb.bb2 = 20
Raises:
KeyError: if any of the following happens
(1) any of parameters in any of restrictions is not defined in
ParamsDict,
(2) any inconsistency violating the restriction is found.
ValueError: if the restriction defined in the string is not supported.
"""
def
_get_kv
(
dotted_string
,
params_dict
):
tokenized_params
=
dotted_string
.
split
(
'.'
)
v
=
params_dict
for
t
in
tokenized_params
:
v
=
v
[
t
]
return
tokenized_params
[
-
1
],
v
def
_get_kvs
(
tokens
,
params_dict
):
if
len
(
tokens
)
!=
2
:
raise
ValueError
(
'Only support binary relation in restriction.'
)
stripped_tokens
=
[
t
.
strip
()
for
t
in
tokens
]
left_k
,
left_v
=
_get_kv
(
stripped_tokens
[
0
],
params_dict
)
right_k
,
right_v
=
_get_kv
(
stripped_tokens
[
1
],
params_dict
)
return
left_k
,
left_v
,
right_k
,
right_v
params_dict
=
self
.
as_dict
()
for
restriction
in
self
.
_restrictions
:
if
'=='
in
restriction
:
tokens
=
restriction
.
split
(
'=='
)
_
,
left_v
,
_
,
right_v
=
_get_kvs
(
tokens
,
params_dict
)
if
left_v
!=
right_v
:
raise
KeyError
(
'Found inconsistncy between key `{}` and key `{}`.'
.
format
(
tokens
[
0
],
tokens
[
1
]))
elif
'!='
in
restriction
:
tokens
=
restriction
.
split
(
'!='
)
_
,
left_v
,
_
,
right_v
=
_get_kvs
(
tokens
,
params_dict
)
if
left_v
==
right_v
:
raise
KeyError
(
'Found inconsistncy between key `{}` and key `{}`.'
.
format
(
tokens
[
0
],
tokens
[
1
]))
elif
'<'
in
restriction
:
tokens
=
restriction
.
split
(
'<'
)
_
,
left_v
,
_
,
right_v
=
_get_kvs
(
tokens
,
params_dict
)
if
left_v
>=
right_v
:
raise
KeyError
(
'Found inconsistncy between key `{}` and key `{}`.'
.
format
(
tokens
[
0
],
tokens
[
1
]))
elif
'<='
in
restriction
:
tokens
=
restriction
.
split
(
'<='
)
_
,
left_v
,
_
,
right_v
=
_get_kvs
(
tokens
,
params_dict
)
if
left_v
>
right_v
:
raise
KeyError
(
'Found inconsistncy between key `{}` and key `{}`.'
.
format
(
tokens
[
0
],
tokens
[
1
]))
elif
'>'
in
restriction
:
tokens
=
restriction
.
split
(
'>'
)
_
,
left_v
,
_
,
right_v
=
_get_kvs
(
tokens
,
params_dict
)
if
left_v
<=
right_v
:
raise
KeyError
(
'Found inconsistncy between key `{}` and key `{}`.'
.
format
(
tokens
[
0
],
tokens
[
1
]))
elif
'>='
in
restriction
:
tokens
=
restriction
.
split
(
'>='
)
_
,
left_v
,
_
,
right_v
=
_get_kvs
(
tokens
,
params_dict
)
if
left_v
<
right_v
:
raise
KeyError
(
'Found inconsistncy between key `{}` and key `{}`.'
.
format
(
tokens
[
0
],
tokens
[
1
]))
else
:
raise
ValueError
(
'Unsupported relation in restriction.'
)
def
read_yaml_to_params_dict
(
file_path
):
"""Reads a YAML file to a ParamsDict."""
with
tf
.
io
.
gfile
.
GFile
(
file_path
,
'r'
)
as
f
:
params_dict
=
yaml
.
load
(
f
)
return
ParamsDict
(
params_dict
)
def
save_params_dict_to_yaml
(
params
,
file_path
):
"""Saves the input ParamsDict to a YAML file."""
with
tf
.
io
.
gfile
.
GFile
(
file_path
,
'w'
)
as
f
:
def
_my_list_rep
(
dumper
,
data
):
# u'tag:yaml.org,2002:seq' is the YAML internal tag for sequence.
return
dumper
.
represent_sequence
(
u
'tag:yaml.org,2002:seq'
,
data
,
flow_style
=
True
)
yaml
.
add_representer
(
list
,
_my_list_rep
)
yaml
.
dump
(
params
.
as_dict
(),
f
,
default_flow_style
=
False
)
def
nested_csv_str_to_json_str
(
csv_str
):
"""Converts a nested (using '.') comma-separated k=v string to a JSON string.
Converts a comma-separated string of key/value pairs that supports
nesting of keys to a JSON string. Nesting is implemented using
'.' between levels for a given key.
Spacing between commas and = is supported (e.g. there is no difference between
"a=1,b=2", "a = 1, b = 2", or "a=1, b=2") but there should be no spaces before
keys or after values (e.g. " a=1,b=2" and "a=1,b=2 " are not supported).
Note that this will only support values supported by CSV, meaning
values such as nested lists (e.g. "a=[[1,2,3],[4,5,6]]") are not
supported. Strings are supported as well, e.g. "a='hello'".
An example conversion would be:
"a=1, b=2, c.a=2, c.b=3, d.a.a=5"
to
"{ a: 1, b : 2, c: {a : 2, b : 3}, d: {a: {a : 5}}}"
Args:
csv_str: the comma separated string.
Returns:
the converted JSON string.
Raises:
ValueError: If csv_str is not in a comma separated string or
if the string is formatted incorrectly.
"""
if
not
csv_str
:
return
''
formatted_entries
=
[]
nested_map
=
collections
.
defaultdict
(
list
)
pos
=
0
while
pos
<
len
(
csv_str
):
m
=
_PARAM_RE
.
match
(
csv_str
,
pos
)
if
not
m
:
raise
ValueError
(
'Malformed hyperparameter value while parsing '
'CSV string: %s'
%
csv_str
[
pos
:])
pos
=
m
.
end
()
# Parse the values.
m_dict
=
m
.
groupdict
()
name
=
m_dict
[
'name'
]
v
=
m_dict
[
'val'
]
name_nested
=
name
.
split
(
'.'
)
if
len
(
name_nested
)
>
1
:
grouping
=
name_nested
[
0
]
value
=
'.'
.
join
(
name_nested
[
1
:])
+
'='
+
v
nested_map
[
grouping
].
append
(
value
)
else
:
formatted_entries
.
append
(
'%s : %s'
%
(
name
,
v
))
for
grouping
,
value
in
nested_map
.
items
():
value
=
','
.
join
(
value
)
value
=
nested_csv_str_to_json_str
(
value
)
formatted_entries
.
append
(
'%s : %s'
%
(
grouping
,
value
))
return
'{'
+
', '
.
join
(
formatted_entries
)
+
'}'
def
override_params_dict
(
params
,
dict_or_string_or_yaml_file
,
is_strict
):
"""Override a given ParamsDict using a dict, JSON/YAML/CSV string or YAML file.
The logic of the function is outlined below:
1. Test that the input is a dict. If not, proceed to 2.
2. Tests that the input is a string. If not, raise unknown ValueError
2.1. Test if the string is in a CSV format. If so, parse.
If not, proceed to 2.2.
2.2. Try loading the string as a YAML/JSON. If successful, parse to
dict and use it to override. If not, proceed to 2.3.
2.3. Try using the string as a file path and load the YAML file.
Args:
params: a ParamsDict object to be overridden.
dict_or_string_or_yaml_file: a Python dict, JSON/YAML/CSV string or
path to a YAML file specifying the parameters to be overridden.
is_strict: a boolean specifying whether override is strict or not.
Returns:
params: the overridden ParamsDict object.
Raises:
ValueError: if failed to override the parameters.
"""
if
not
dict_or_string_or_yaml_file
:
return
params
if
isinstance
(
dict_or_string_or_yaml_file
,
dict
):
params
.
override
(
dict_or_string_or_yaml_file
,
is_strict
)
elif
isinstance
(
dict_or_string_or_yaml_file
,
six
.
string_types
):
try
:
dict_or_string_or_yaml_file
=
(
nested_csv_str_to_json_str
(
dict_or_string_or_yaml_file
))
except
ValueError
:
pass
params_dict
=
yaml
.
load
(
dict_or_string_or_yaml_file
)
if
isinstance
(
params_dict
,
dict
):
params
.
override
(
params_dict
,
is_strict
)
else
:
with
tf
.
io
.
gfile
.
GFile
(
dict_or_string_or_yaml_file
)
as
f
:
params
.
override
(
yaml
.
load
(
f
),
is_strict
)
else
:
raise
ValueError
(
'Unknown input type to parse.'
)
return
params
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/hyperparameters/params_io.py
0 → 100644
View file @
c320b6ef
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#============================================================================
"""Utils to handle parameters IO."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
six
import
yaml
import
tensorflow
as
tf
def
save_hparams_to_yaml
(
hparams
,
file_path
):
with
tf
.
io
.
gfile
.
GFile
(
file_path
,
'w'
)
as
f
:
try
:
hparams_val
=
hparams
.
values
()
except
AttributeError
:
hparams_val
=
hparams
.
__dict__
yaml
.
dump
(
hparams_val
,
f
)
def
override_hparams
(
hparams
,
dict_or_string_or_yaml_file
):
"""Override a given hparams using a dict or a string or a JSON file.
Args:
hparams: a HParams object to be overridden.
dict_or_string_or_yaml_file: a Python dict, or a comma-separated string,
or a path to a YAML file specifying the parameters to be overridden.
Returns:
hparams: the overridden HParams object.
Raises:
ValueError: if failed to override the parameters.
"""
if
not
dict_or_string_or_yaml_file
:
return
hparams
if
isinstance
(
dict_or_string_or_yaml_file
,
dict
):
for
key
,
val
in
dict_or_string_or_yaml_file
.
items
():
if
key
not
in
hparams
:
try
:
# TF 1.x
hparams
.
add_hparam
(
key
,
val
)
except
AttributeError
:
# TF 2.x
try
:
# Dict
hparams
[
key
]
=
val
except
TypeError
:
# Namespace
setattr
(
hparams
,
key
,
val
)
else
:
raise
ValueError
(
"Parameter `%s` is already defined"
%
key
)
# hparams.override_from_dict(dict_or_string_or_yaml_file)
elif
isinstance
(
dict_or_string_or_yaml_file
,
six
.
string_types
):
try
:
hparams
.
parse
(
dict_or_string_or_yaml_file
)
except
ValueError
as
parse_error
:
try
:
with
tf
.
io
.
gfile
.
GFile
(
dict_or_string_or_yaml_file
)
as
f
:
hparams
.
override_from_dict
(
yaml
.
load
(
f
))
except
Exception
as
read_error
:
parse_message
=
(
'Failed to parse config string: %s
\n
'
%
parse_error
.
message
)
read_message
=
(
'Failed to parse yaml file provided. %s'
%
read_error
.
message
)
raise
ValueError
(
parse_message
+
read_message
)
else
:
raise
ValueError
(
'Unknown input type to parse.'
)
return
hparams
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/mask_rcnn_model.py
0 → 100644
View file @
c320b6ef
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Model definition for the Mask-RCNN Model.
Defines model_fn of Mask-RCNN for TF Estimator. The model_fn includes Mask-RCNN
model architecture, loss function, learning rate schedule, and evaluation
procedure.
"""
import
itertools
import
tensorflow
as
tf
from
mask_rcnn
import
anchors
from
mask_rcnn.models
import
fpn
from
mask_rcnn.models
import
heads
from
mask_rcnn.models
import
resnet
from
mask_rcnn.training
import
losses
,
learning_rates
from
mask_rcnn.ops
import
postprocess_ops
from
mask_rcnn.ops
import
roi_ops
from
mask_rcnn.ops
import
spatial_transform_ops
from
mask_rcnn.ops
import
training_ops
from
mask_rcnn.utils.logging_formatter
import
logging
from
mask_rcnn.utils.distributed_utils
import
MPI_is_distributed
from
mask_rcnn.utils.distributed_utils
import
MPI_local_rank
from
mask_rcnn.utils.meters
import
StandardMeter
from
mask_rcnn.utils.metric_tracking
import
register_metric
from
mask_rcnn.utils.lazy_imports
import
LazyImport
hvd
=
LazyImport
(
"horovod.tensorflow"
)
MODELS
=
dict
()
def
create_optimizer
(
learning_rate
,
params
):
"""Creates optimized based on the specified flags."""
optimizer
=
tf
.
compat
.
v1
.
train
.
MomentumOptimizer
(
learning_rate
,
momentum
=
params
[
'momentum'
])
if
MPI_is_distributed
():
optimizer
=
hvd
.
DistributedOptimizer
(
optimizer
,
name
=
None
,
device_dense
=
'/gpu:0'
,
device_sparse
=
''
,
# compression=hvd.Compression.fp16,
compression
=
hvd
.
Compression
.
none
,
sparse_as_dense
=
False
)
if
params
[
"amp"
]:
loss_scale
=
tf
.
train
.
experimental
.
DynamicLossScale
(
initial_loss_scale
=
(
2
**
12
),
increment_period
=
2000
,
multiplier
=
2.0
)
optimizer
=
tf
.
compat
.
v1
.
train
.
experimental
.
MixedPrecisionLossScaleOptimizer
(
optimizer
,
loss_scale
=
loss_scale
)
return
optimizer
def
compute_model_statistics
(
batch_size
,
is_training
=
True
):
"""Compute number of parameters and FLOPS."""
options
=
tf
.
compat
.
v1
.
profiler
.
ProfileOptionBuilder
.
float_operation
()
options
[
'output'
]
=
'none'
from
tensorflow.python.keras.backend
import
get_graph
flops
=
tf
.
compat
.
v1
.
profiler
.
profile
(
get_graph
(),
options
=
options
).
total_float_ops
flops_per_image
=
flops
/
batch_size
logging
.
info
(
'[%s Compute Statistics] %.1f GFLOPS/image'
%
(
"Training"
if
is_training
else
"Inference"
,
flops_per_image
/
1e9
))
def
build_model_graph
(
features
,
labels
,
is_training
,
params
):
"""Builds the forward model graph."""
model_outputs
=
{}
is_gpu_inference
=
not
is_training
and
params
[
'use_batched_nms'
]
batch_size
,
image_height
,
image_width
,
_
=
features
[
'images'
].
get_shape
().
as_list
()
if
'source_ids'
not
in
features
:
features
[
'source_ids'
]
=
-
1
*
tf
.
ones
([
batch_size
],
dtype
=
tf
.
float32
)
all_anchors
=
anchors
.
Anchors
(
params
[
'min_level'
],
params
[
'max_level'
],
params
[
'num_scales'
],
params
[
'aspect_ratios'
],
params
[
'anchor_scale'
],
(
image_height
,
image_width
))
MODELS
[
"backbone"
]
=
resnet
.
Resnet_Model
(
"resnet50"
,
data_format
=
'channels_last'
,
trainable
=
is_training
,
finetune_bn
=
params
[
'finetune_bn'
]
)
backbone_feats
=
MODELS
[
"backbone"
](
features
[
'images'
],
training
=
is_training
,
)
MODELS
[
"FPN"
]
=
fpn
.
FPNNetwork
(
params
[
'min_level'
],
params
[
'max_level'
],
trainable
=
is_training
)
fpn_feats
=
MODELS
[
"FPN"
](
backbone_feats
,
training
=
is_training
)
model_outputs
.
update
({
'fpn_features'
:
fpn_feats
})
def
rpn_head_fn
(
features
,
min_level
=
2
,
max_level
=
6
,
num_anchors
=
3
):
"""Region Proposal Network (RPN) for Mask-RCNN."""
scores_outputs
=
dict
()
box_outputs
=
dict
()
MODELS
[
"RPN_Heads"
]
=
heads
.
RPN_Head_Model
(
name
=
"rpn_head"
,
num_anchors
=
num_anchors
,
trainable
=
is_training
)
for
level
in
range
(
min_level
,
max_level
+
1
):
scores_outputs
[
level
],
box_outputs
[
level
]
=
MODELS
[
"RPN_Heads"
](
features
[
level
],
training
=
is_training
)
return
scores_outputs
,
box_outputs
rpn_score_outputs
,
rpn_box_outputs
=
rpn_head_fn
(
features
=
fpn_feats
,
min_level
=
params
[
'min_level'
],
max_level
=
params
[
'max_level'
],
num_anchors
=
len
(
params
[
'aspect_ratios'
]
*
params
[
'num_scales'
])
)
if
is_training
:
rpn_pre_nms_topn
=
params
[
'train_rpn_pre_nms_topn'
]
rpn_post_nms_topn
=
params
[
'train_rpn_post_nms_topn'
]
rpn_nms_threshold
=
params
[
'train_rpn_nms_threshold'
]
else
:
rpn_pre_nms_topn
=
params
[
'test_rpn_pre_nms_topn'
]
rpn_post_nms_topn
=
params
[
'test_rpn_post_nms_topn'
]
rpn_nms_threshold
=
params
[
'test_rpn_nms_thresh'
]
if
params
[
'use_custom_box_proposals_op'
]:
rpn_box_scores
,
rpn_box_rois
=
roi_ops
.
custom_multilevel_propose_rois
(
scores_outputs
=
rpn_score_outputs
,
box_outputs
=
rpn_box_outputs
,
all_anchors
=
all_anchors
,
image_info
=
features
[
'image_info'
],
rpn_pre_nms_topn
=
rpn_pre_nms_topn
,
rpn_post_nms_topn
=
rpn_post_nms_topn
,
rpn_nms_threshold
=
rpn_nms_threshold
,
rpn_min_size
=
params
[
'rpn_min_size'
]
)
else
:
rpn_box_scores
,
rpn_box_rois
=
roi_ops
.
multilevel_propose_rois
(
scores_outputs
=
rpn_score_outputs
,
box_outputs
=
rpn_box_outputs
,
all_anchors
=
all_anchors
,
image_info
=
features
[
'image_info'
],
rpn_pre_nms_topn
=
rpn_pre_nms_topn
,
rpn_post_nms_topn
=
rpn_post_nms_topn
,
rpn_nms_threshold
=
rpn_nms_threshold
,
rpn_min_size
=
params
[
'rpn_min_size'
],
bbox_reg_weights
=
None
,
use_batched_nms
=
params
[
'use_batched_nms'
]
)
rpn_box_rois
=
tf
.
cast
(
rpn_box_rois
,
dtype
=
tf
.
float32
)
if
is_training
:
rpn_box_rois
=
tf
.
stop_gradient
(
rpn_box_rois
)
rpn_box_scores
=
tf
.
stop_gradient
(
rpn_box_scores
)
# TODO Jonathan: Unused => Shall keep ?
# Sampling
box_targets
,
class_targets
,
rpn_box_rois
,
proposal_to_label_map
=
training_ops
.
proposal_label_op
(
rpn_box_rois
,
labels
[
'gt_boxes'
],
labels
[
'gt_classes'
],
batch_size_per_im
=
params
[
'batch_size_per_im'
],
fg_fraction
=
params
[
'fg_fraction'
],
fg_thresh
=
params
[
'fg_thresh'
],
bg_thresh_hi
=
params
[
'bg_thresh_hi'
],
bg_thresh_lo
=
params
[
'bg_thresh_lo'
]
)
# Performs multi-level RoIAlign.
box_roi_features
=
spatial_transform_ops
.
multilevel_crop_and_resize
(
features
=
fpn_feats
,
boxes
=
rpn_box_rois
,
output_size
=
7
,
is_gpu_inference
=
is_gpu_inference
)
MODELS
[
"Box_Head"
]
=
heads
.
Box_Head_Model
(
num_classes
=
params
[
'num_classes'
],
mlp_head_dim
=
params
[
'fast_rcnn_mlp_head_dim'
],
trainable
=
is_training
)
class_outputs
,
box_outputs
,
_
=
MODELS
[
"Box_Head"
](
inputs
=
box_roi_features
)
if
not
is_training
:
if
params
[
'use_batched_nms'
]:
generate_detections_fn
=
postprocess_ops
.
generate_detections_gpu
else
:
generate_detections_fn
=
postprocess_ops
.
generate_detections_tpu
detections
=
generate_detections_fn
(
class_outputs
=
class_outputs
,
box_outputs
=
box_outputs
,
anchor_boxes
=
rpn_box_rois
,
image_info
=
features
[
'image_info'
],
pre_nms_num_detections
=
params
[
'test_rpn_post_nms_topn'
],
post_nms_num_detections
=
params
[
'test_detections_per_image'
],
nms_threshold
=
params
[
'test_nms'
],
bbox_reg_weights
=
params
[
'bbox_reg_weights'
]
)
model_outputs
.
update
({
'num_detections'
:
detections
[
0
],
'detection_boxes'
:
detections
[
1
],
'detection_classes'
:
detections
[
2
],
'detection_scores'
:
detections
[
3
],
})
else
:
# is training
encoded_box_targets
=
training_ops
.
encode_box_targets
(
boxes
=
rpn_box_rois
,
gt_boxes
=
box_targets
,
gt_labels
=
class_targets
,
bbox_reg_weights
=
params
[
'bbox_reg_weights'
]
)
model_outputs
.
update
({
'rpn_score_outputs'
:
rpn_score_outputs
,
'rpn_box_outputs'
:
rpn_box_outputs
,
'class_outputs'
:
class_outputs
,
'box_outputs'
:
box_outputs
,
'class_targets'
:
class_targets
,
'box_targets'
:
encoded_box_targets
,
'box_rois'
:
rpn_box_rois
,
})
# Faster-RCNN mode.
if
not
params
[
'include_mask'
]:
return
model_outputs
# Mask sampling
if
not
is_training
:
selected_box_rois
=
model_outputs
[
'detection_boxes'
]
class_indices
=
model_outputs
[
'detection_classes'
]
# If using GPU for inference, delay the cast until when Gather ops show up
# since GPU inference supports float point better.
# TODO(laigd): revisit this when newer versions of GPU libraries is
# released.
if
not
params
[
'use_batched_nms'
]:
class_indices
=
tf
.
cast
(
class_indices
,
dtype
=
tf
.
int32
)
else
:
selected_class_targets
,
selected_box_targets
,
\
selected_box_rois
,
proposal_to_label_map
=
training_ops
.
select_fg_for_masks
(
class_targets
=
class_targets
,
box_targets
=
box_targets
,
boxes
=
rpn_box_rois
,
proposal_to_label_map
=
proposal_to_label_map
,
max_num_fg
=
int
(
params
[
'batch_size_per_im'
]
*
params
[
'fg_fraction'
])
)
class_indices
=
tf
.
cast
(
selected_class_targets
,
dtype
=
tf
.
int32
)
mask_roi_features
=
spatial_transform_ops
.
multilevel_crop_and_resize
(
features
=
fpn_feats
,
boxes
=
selected_box_rois
,
output_size
=
14
,
is_gpu_inference
=
is_gpu_inference
)
MODELS
[
"Mask_Head"
]
=
heads
.
Mask_Head_Model
(
class_indices
,
num_classes
=
params
[
'num_classes'
],
mrcnn_resolution
=
params
[
'mrcnn_resolution'
],
is_gpu_inference
=
is_gpu_inference
,
trainable
=
is_training
,
name
=
"mask_head"
)
mask_outputs
=
MODELS
[
"Mask_Head"
](
inputs
=
mask_roi_features
)
if
MPI_local_rank
()
==
0
:
# Print #FLOPs in model.
compute_model_statistics
(
batch_size
,
is_training
=
is_training
)
if
is_training
:
mask_targets
=
training_ops
.
get_mask_targets
(
fg_boxes
=
selected_box_rois
,
fg_proposal_to_label_map
=
proposal_to_label_map
,
fg_box_targets
=
selected_box_targets
,
mask_gt_labels
=
labels
[
'cropped_gt_masks'
],
output_size
=
params
[
'mrcnn_resolution'
]
)
model_outputs
.
update
({
'mask_outputs'
:
mask_outputs
,
'mask_targets'
:
mask_targets
,
'selected_class_targets'
:
selected_class_targets
,
})
else
:
model_outputs
.
update
({
'detection_masks'
:
tf
.
nn
.
sigmoid
(
mask_outputs
),
})
return
model_outputs
def
_model_fn
(
features
,
labels
,
mode
,
params
):
"""Model defination for the Mask-RCNN model based on ResNet.
Args:
features: the input image tensor and auxiliary information, such as
`image_info` and `source_ids`. The image tensor has a shape of
[batch_size, height, width, 3]. The height and width are fixed and equal.
labels: the input labels in a dictionary. The labels include score targets
and box targets which are dense label maps. The labels are generated from
get_input_fn function in data/dataloader.py
mode: the mode of TPUEstimator including TRAIN, EVAL, and PREDICT.
params: the dictionary defines hyperparameters of model. The default
settings are in default_hparams function in this file.
Returns:
tpu_spec: the TPUEstimatorSpec to run training, evaluation, or prediction.
"""
# Set up training loss and learning rate.
global_step
=
tf
.
compat
.
v1
.
train
.
get_or_create_global_step
()
if
mode
==
tf
.
estimator
.
ModeKeys
.
PREDICT
:
if
params
[
'include_groundtruth_in_features'
]
and
'labels'
in
features
:
# In include groundtruth for eval.
labels
=
features
[
'labels'
]
else
:
labels
=
None
if
'features'
in
features
:
features
=
features
[
'features'
]
# Otherwise, it is in export mode, the features is past in directly.
model_outputs
=
build_model_graph
(
features
,
labels
,
mode
==
tf
.
estimator
.
ModeKeys
.
TRAIN
,
params
)
model_outputs
.
update
({
'source_id'
:
features
[
'source_ids'
],
'image_info'
:
features
[
'image_info'
],
})
if
mode
==
tf
.
estimator
.
ModeKeys
.
PREDICT
and
'orig_images'
in
features
:
model_outputs
[
'orig_images'
]
=
features
[
'orig_images'
]
# First check if it is in PREDICT mode or EVAL mode to fill out predictions.
# Predictions are used during the eval step to generate metrics.
if
mode
in
[
tf
.
estimator
.
ModeKeys
.
PREDICT
,
tf
.
estimator
.
ModeKeys
.
EVAL
]:
predictions
=
{}
try
:
model_outputs
[
'orig_images'
]
=
features
[
'orig_images'
]
except
KeyError
:
pass
if
labels
and
params
[
'include_groundtruth_in_features'
]:
# Labels can only be embedded in predictions. The prediction cannot output
# dictionary as a value.
predictions
.
update
(
labels
)
model_outputs
.
pop
(
'fpn_features'
,
None
)
predictions
.
update
(
model_outputs
)
if
mode
==
tf
.
estimator
.
ModeKeys
.
PREDICT
:
# If we are doing PREDICT, we can return here.
return
tf
.
estimator
.
EstimatorSpec
(
mode
=
mode
,
predictions
=
predictions
)
# score_loss and box_loss are for logging. only total_loss is optimized.
total_rpn_loss
,
rpn_score_loss
,
rpn_box_loss
=
losses
.
rpn_loss
(
score_outputs
=
model_outputs
[
'rpn_score_outputs'
],
box_outputs
=
model_outputs
[
'rpn_box_outputs'
],
labels
=
labels
,
params
=
params
)
total_fast_rcnn_loss
,
fast_rcnn_class_loss
,
fast_rcnn_box_loss
=
losses
.
fast_rcnn_loss
(
class_outputs
=
model_outputs
[
'class_outputs'
],
box_outputs
=
model_outputs
[
'box_outputs'
],
class_targets
=
model_outputs
[
'class_targets'
],
box_targets
=
model_outputs
[
'box_targets'
],
params
=
params
)
# Only training has the mask loss.
# Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/modeling/model_builder.py
if
mode
==
tf
.
estimator
.
ModeKeys
.
TRAIN
and
params
[
'include_mask'
]:
mask_loss
=
losses
.
mask_rcnn_loss
(
mask_outputs
=
model_outputs
[
'mask_outputs'
],
mask_targets
=
model_outputs
[
'mask_targets'
],
select_class_targets
=
model_outputs
[
'selected_class_targets'
],
params
=
params
)
else
:
mask_loss
=
0.
trainable_variables
=
list
(
itertools
.
chain
.
from_iterable
([
model
.
trainable_variables
for
model
in
MODELS
.
values
()]))
l2_regularization_loss
=
params
[
'l2_weight_decay'
]
*
tf
.
add_n
([
tf
.
nn
.
l2_loss
(
v
)
for
v
in
trainable_variables
if
not
any
([
pattern
in
v
.
name
for
pattern
in
[
"batch_normalization"
,
"bias"
,
"beta"
]])
])
total_loss
=
total_rpn_loss
+
total_fast_rcnn_loss
+
mask_loss
+
l2_regularization_loss
if
mode
==
tf
.
estimator
.
ModeKeys
.
EVAL
:
# Predictions can only contain a dict of tensors, not a dict of dict of
# tensors. These outputs are not used for eval purposes.
del
predictions
[
'rpn_score_outputs'
]
del
predictions
[
'rpn_box_outputs'
]
return
tf
.
estimator
.
EstimatorSpec
(
mode
=
mode
,
predictions
=
predictions
,
loss
=
total_loss
)
if
mode
==
tf
.
estimator
.
ModeKeys
.
TRAIN
:
learning_rate
=
learning_rates
.
step_learning_rate_with_linear_warmup
(
global_step
=
global_step
,
init_learning_rate
=
params
[
'init_learning_rate'
],
warmup_learning_rate
=
params
[
'warmup_learning_rate'
],
warmup_steps
=
params
[
'warmup_steps'
],
learning_rate_levels
=
params
[
'learning_rate_levels'
],
learning_rate_steps
=
params
[
'learning_rate_steps'
]
)
optimizer
=
create_optimizer
(
learning_rate
,
params
)
grads_and_vars
=
optimizer
.
compute_gradients
(
total_loss
,
trainable_variables
,
colocate_gradients_with_ops
=
True
)
gradients
,
variables
=
zip
(
*
grads_and_vars
)
grads_and_vars
=
[]
# Special treatment for biases (beta is named as bias in reference model)
# Reference: https://github.com/ddkang/Detectron/blob/80f3295308/lib/modeling/optimizer.py#L109
for
grad
,
var
in
zip
(
gradients
,
variables
):
if
grad
is
not
None
and
any
([
pattern
in
var
.
name
for
pattern
in
[
"bias"
,
"beta"
]]):
grad
=
2.0
*
grad
grads_and_vars
.
append
((
grad
,
var
))
train_op
=
optimizer
.
apply_gradients
(
grads_and_vars
,
global_step
=
global_step
)
else
:
train_op
=
None
learning_rate
=
None
replica_id
=
tf
.
distribute
.
get_replica_context
().
replica_id_in_sync_group
if
not
isinstance
(
replica_id
,
tf
.
Tensor
)
or
tf
.
get_static_value
(
replica_id
)
==
0
:
register_metric
(
name
=
"L2 loss"
,
tensor
=
l2_regularization_loss
,
aggregator
=
StandardMeter
())
register_metric
(
name
=
"Mask loss"
,
tensor
=
mask_loss
,
aggregator
=
StandardMeter
())
register_metric
(
name
=
"Total loss"
,
tensor
=
total_loss
,
aggregator
=
StandardMeter
())
register_metric
(
name
=
"RPN box loss"
,
tensor
=
rpn_box_loss
,
aggregator
=
StandardMeter
())
register_metric
(
name
=
"RPN score loss"
,
tensor
=
rpn_score_loss
,
aggregator
=
StandardMeter
())
register_metric
(
name
=
"RPN total loss"
,
tensor
=
total_rpn_loss
,
aggregator
=
StandardMeter
())
register_metric
(
name
=
"FastRCNN class loss"
,
tensor
=
fast_rcnn_class_loss
,
aggregator
=
StandardMeter
())
register_metric
(
name
=
"FastRCNN box loss"
,
tensor
=
fast_rcnn_box_loss
,
aggregator
=
StandardMeter
())
register_metric
(
name
=
"FastRCNN total loss"
,
tensor
=
total_fast_rcnn_loss
,
aggregator
=
StandardMeter
())
register_metric
(
name
=
"Learning rate"
,
tensor
=
learning_rate
,
aggregator
=
StandardMeter
())
pass
return
tf
.
estimator
.
EstimatorSpec
(
mode
=
mode
,
loss
=
total_loss
,
train_op
=
train_op
,
)
def
mask_rcnn_model_fn
(
features
,
labels
,
mode
,
params
):
"""Mask-RCNN model."""
return
_model_fn
(
features
,
labels
,
mode
,
params
)
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/models/__init__.py
0 → 100644
View file @
c320b6ef
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/models/fpn.py
0 → 100644
View file @
c320b6ef
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Feature Pyramid Network.
Feature Pyramid Networks were proposed in:
[1] Tsung-Yi Lin, Piotr Dollar, Ross Girshick, Kaiming He, Bharath Hariharan,
, and Serge Belongie
Feature Pyramid Networks for Object Detection. CVPR 2017.
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
tensorflow
as
tf
from
mask_rcnn.ops
import
spatial_transform_ops
class
FPNNetwork
(
tf
.
keras
.
models
.
Model
):
def
__init__
(
self
,
min_level
=
3
,
max_level
=
7
,
filters
=
256
,
trainable
=
True
):
"""Generates multiple scale feature pyramid (FPN).
Args:
feats_bottom_up: a dictionary of tensor with level as keys and bottom up
feature tensors as values. They are the features to generate FPN features.
min_level: the minimum level number to generate FPN features.
max_level: the maximum level number to generate FPN features.
filters: the FPN filter size.
Returns:
feats: a dictionary of tensor with level as keys and the generated FPN
features as values.
"""
super
(
FPNNetwork
,
self
).
__init__
(
name
=
"fpn"
,
trainable
=
trainable
)
self
.
_local_layers
=
dict
()
self
.
_min_level
=
min_level
self
.
_max_level
=
max_level
self
.
_filters
=
filters
self
.
_backbone_max_level
=
5
# max(feats_bottom_up.keys())
self
.
_upsample_max_level
=
(
self
.
_backbone_max_level
if
self
.
_max_level
>
self
.
_backbone_max_level
else
self
.
_max_level
)
self
.
_local_layers
[
"stage1"
]
=
dict
()
for
level
in
range
(
self
.
_min_level
,
self
.
_upsample_max_level
+
1
):
self
.
_local_layers
[
"stage1"
][
level
]
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
_filters
,
kernel_size
=
(
1
,
1
),
padding
=
'same'
,
name
=
'l%d'
%
level
,
trainable
=
trainable
)
self
.
_local_layers
[
"stage2"
]
=
dict
()
# add post-hoc 3x3 convolution kernel
for
level
in
range
(
self
.
_min_level
,
self
.
_upsample_max_level
+
1
):
self
.
_local_layers
[
"stage2"
][
level
]
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
_filters
,
strides
=
(
1
,
1
),
kernel_size
=
(
3
,
3
),
padding
=
'same'
,
name
=
'post_hoc_d%d'
%
level
,
trainable
=
trainable
)
self
.
_local_layers
[
"stage3_1"
]
=
dict
()
self
.
_local_layers
[
"stage3_2"
]
=
dict
()
if
self
.
_max_level
==
self
.
_upsample_max_level
+
1
:
self
.
_local_layers
[
"stage3_1"
]
=
tf
.
keras
.
layers
.
MaxPool2D
(
pool_size
=
1
,
strides
=
2
,
padding
=
'valid'
,
name
=
'p%d'
%
self
.
_max_level
,
trainable
=
trainable
)
else
:
for
level
in
range
(
self
.
_upsample_max_level
+
1
,
self
.
_max_level
+
1
):
self
.
_local_layers
[
"stage3_2"
][
level
]
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
self
.
_filters
,
strides
=
(
2
,
2
),
kernel_size
=
(
3
,
3
),
padding
=
'same'
,
name
=
'p%d'
%
level
,
trainable
=
trainable
)
def
call
(
self
,
inputs
,
*
args
,
**
kwargs
):
feats_bottom_up
=
inputs
# lateral connections
feats_lateral
=
{}
for
level
in
range
(
self
.
_min_level
,
self
.
_upsample_max_level
+
1
):
feats_lateral
[
level
]
=
self
.
_local_layers
[
"stage1"
][
level
](
feats_bottom_up
[
level
])
# add top-down path
feats
=
{
self
.
_upsample_max_level
:
feats_lateral
[
self
.
_upsample_max_level
]}
for
level
in
range
(
self
.
_upsample_max_level
-
1
,
self
.
_min_level
-
1
,
-
1
):
feats
[
level
]
=
spatial_transform_ops
.
nearest_upsampling
(
feats
[
level
+
1
],
2
)
+
feats_lateral
[
level
]
# add post-hoc 3x3 convolution kernel
for
level
in
range
(
self
.
_min_level
,
self
.
_upsample_max_level
+
1
):
feats
[
level
]
=
self
.
_local_layers
[
"stage2"
][
level
](
feats
[
level
])
if
self
.
_max_level
==
self
.
_upsample_max_level
+
1
:
feats
[
self
.
_max_level
]
=
self
.
_local_layers
[
"stage3_1"
](
feats
[
self
.
_max_level
-
1
])
else
:
for
level
in
range
(
self
.
_upsample_max_level
+
1
,
self
.
_max_level
+
1
):
feats
[
level
]
=
self
.
_local_layers
[
"stage3_2"
][
level
](
feats
[
level
-
1
])
return
feats
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/models/heads.py
0 → 100644
View file @
c320b6ef
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Functions to build various prediction heads in Mask-RCNN."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
tensorflow
as
tf
__all__
=
[
"RPN_Head_Model"
,
"Box_Head_Model"
,
"Mask_Head_Model"
]
class
RPN_Head_Model
(
tf
.
keras
.
models
.
Model
):
def
__init__
(
self
,
name
,
num_anchors
,
trainable
,
*
args
,
**
kwargs
):
super
(
RPN_Head_Model
,
self
).
__init__
(
name
=
name
,
trainable
=
trainable
,
*
args
,
**
kwargs
)
"""Shared RPN heads."""
self
.
_local_layers
=
dict
()
# TODO(chiachenc): check the channel depth of the first convolution.
self
.
_local_layers
[
"conv1"
]
=
tf
.
keras
.
layers
.
Conv2D
(
256
,
kernel_size
=
(
3
,
3
),
strides
=
(
1
,
1
),
activation
=
tf
.
nn
.
relu
,
bias_initializer
=
tf
.
keras
.
initializers
.
Zeros
(),
kernel_initializer
=
tf
.
random_normal_initializer
(
stddev
=
0.01
),
padding
=
'same'
,
trainable
=
trainable
,
name
=
'rpn'
)
# Proposal classification scores
# scores = tf.keras.layers.Conv2D(
self
.
_local_layers
[
"conv2"
]
=
tf
.
keras
.
layers
.
Conv2D
(
num_anchors
,
kernel_size
=
(
1
,
1
),
strides
=
(
1
,
1
),
bias_initializer
=
tf
.
keras
.
initializers
.
Zeros
(),
kernel_initializer
=
tf
.
random_normal_initializer
(
stddev
=
0.01
),
padding
=
'valid'
,
trainable
=
trainable
,
name
=
'rpn-class'
)
# Proposal bbox regression deltas
# bboxes = tf.keras.layers.Conv2D(
self
.
_local_layers
[
"conv3"
]
=
tf
.
keras
.
layers
.
Conv2D
(
4
*
num_anchors
,
kernel_size
=
(
1
,
1
),
strides
=
(
1
,
1
),
bias_initializer
=
tf
.
keras
.
initializers
.
Zeros
(),
kernel_initializer
=
tf
.
random_normal_initializer
(
stddev
=
0.01
),
padding
=
'valid'
,
trainable
=
trainable
,
name
=
'rpn-box'
)
def
call
(
self
,
inputs
,
*
args
,
**
kwargs
):
net
=
self
.
_local_layers
[
"conv1"
](
inputs
)
scores
=
self
.
_local_layers
[
"conv2"
](
net
)
bboxes
=
self
.
_local_layers
[
"conv3"
](
net
)
return
scores
,
bboxes
class
Box_Head_Model
(
tf
.
keras
.
Model
):
def
__init__
(
self
,
num_classes
=
91
,
mlp_head_dim
=
1024
,
name
=
"box_head"
,
trainable
=
True
,
*
args
,
**
kwargs
):
"""Box and class branches for the Mask-RCNN model.
Args:
roi_features: A ROI feature tensor of shape
[batch_size, num_rois, height_l, width_l, num_filters].
num_classes: a integer for the number of classes.
mlp_head_dim: a integer that is the hidden dimension in the fully-connected
layers.
"""
super
(
Box_Head_Model
,
self
).
__init__
(
name
=
name
,
trainable
=
trainable
,
*
args
,
**
kwargs
)
self
.
_num_classes
=
num_classes
self
.
_mlp_head_dim
=
mlp_head_dim
self
.
_dense_fc6
=
tf
.
keras
.
layers
.
Dense
(
units
=
mlp_head_dim
,
activation
=
tf
.
nn
.
relu
,
trainable
=
trainable
,
name
=
'fc6'
)
self
.
_dense_fc7
=
tf
.
keras
.
layers
.
Dense
(
units
=
mlp_head_dim
,
activation
=
tf
.
nn
.
relu
,
trainable
=
trainable
,
name
=
'fc7'
)
self
.
_dense_class
=
tf
.
keras
.
layers
.
Dense
(
num_classes
,
kernel_initializer
=
tf
.
random_normal_initializer
(
stddev
=
0.01
),
bias_initializer
=
tf
.
keras
.
initializers
.
Zeros
(),
trainable
=
trainable
,
name
=
'class-predict'
)
self
.
_dense_box
=
tf
.
keras
.
layers
.
Dense
(
num_classes
*
4
,
kernel_initializer
=
tf
.
random_normal_initializer
(
stddev
=
0.001
),
bias_initializer
=
tf
.
keras
.
initializers
.
Zeros
(),
trainable
=
trainable
,
name
=
'box-predict'
)
def
call
(
self
,
inputs
,
**
kwargs
):
"""
Returns:
class_outputs: a tensor with a shape of
[batch_size, num_rois, num_classes], representing the class predictions.
box_outputs: a tensor with a shape of
[batch_size, num_rois, num_classes * 4], representing the box predictions.
box_features: a tensor with a shape of
[batch_size, num_rois, mlp_head_dim], representing the box features.
"""
# reshape inputs before FC.
batch_size
,
num_rois
,
height
,
width
,
filters
=
inputs
.
get_shape
().
as_list
()
net
=
tf
.
reshape
(
inputs
,
[
batch_size
,
num_rois
,
height
*
width
*
filters
])
net
=
self
.
_dense_fc6
(
net
)
box_features
=
self
.
_dense_fc7
(
net
)
class_outputs
=
self
.
_dense_class
(
box_features
)
box_outputs
=
self
.
_dense_box
(
box_features
)
return
class_outputs
,
box_outputs
,
box_features
class
Mask_Head_Model
(
tf
.
keras
.
Model
):
@
staticmethod
def
_get_stddev_equivalent_to_msra_fill
(
kernel_size
,
fan_out
):
"""Returns the stddev of random normal initialization as MSRAFill."""
# Reference: https://github.com/pytorch/pytorch/blob/master/caffe2/operators/filler_op.h#L445-L463
# For example, kernel size is (3, 3) and fan out is 256, stddev is 0.029.
# stddev = (2/(3*3*256))^0.5 = 0.029
return
(
2
/
(
kernel_size
[
0
]
*
kernel_size
[
1
]
*
fan_out
))
**
0.5
def
__init__
(
self
,
class_indices
,
num_classes
=
91
,
mrcnn_resolution
=
28
,
is_gpu_inference
=
False
,
name
=
"mask_head"
,
trainable
=
True
,
*
args
,
**
kwargs
):
"""Mask branch for the Mask-RCNN model.
Args:
roi_features: A ROI feature tensor of shape
[batch_size, num_rois, height_l, width_l, num_filters].
class_indices: a Tensor of shape [batch_size, num_rois], indicating
which class the ROI is.
num_classes: an integer for the number of classes.
mrcnn_resolution: an integer that is the resolution of masks.
is_gpu_inference: whether to build the model for GPU inference.
"""
super
(
Mask_Head_Model
,
self
).
__init__
(
name
=
name
,
trainable
=
trainable
,
*
args
,
**
kwargs
)
self
.
_class_indices
=
class_indices
self
.
_num_classes
=
num_classes
self
.
_mrcnn_resolution
=
mrcnn_resolution
self
.
_is_gpu_inference
=
is_gpu_inference
self
.
_conv_stage1
=
list
()
kernel_size
=
(
3
,
3
)
fan_out
=
256
init_stddev
=
Mask_Head_Model
.
_get_stddev_equivalent_to_msra_fill
(
kernel_size
,
fan_out
)
for
conv_id
in
range
(
4
):
self
.
_conv_stage1
.
append
(
tf
.
keras
.
layers
.
Conv2D
(
fan_out
,
kernel_size
=
kernel_size
,
strides
=
(
1
,
1
),
padding
=
'same'
,
dilation_rate
=
(
1
,
1
),
activation
=
tf
.
nn
.
relu
,
kernel_initializer
=
tf
.
random_normal_initializer
(
stddev
=
init_stddev
),
bias_initializer
=
tf
.
keras
.
initializers
.
Zeros
(),
trainable
=
trainable
,
name
=
'mask-conv-l%d'
%
conv_id
))
kernel_size
=
(
2
,
2
)
fan_out
=
256
init_stddev
=
Mask_Head_Model
.
_get_stddev_equivalent_to_msra_fill
(
kernel_size
,
fan_out
)
self
.
_conv_stage2
=
tf
.
keras
.
layers
.
Conv2DTranspose
(
fan_out
,
kernel_size
=
kernel_size
,
strides
=
(
2
,
2
),
padding
=
'valid'
,
activation
=
tf
.
nn
.
relu
,
kernel_initializer
=
tf
.
random_normal_initializer
(
stddev
=
init_stddev
),
bias_initializer
=
tf
.
keras
.
initializers
.
Zeros
(),
trainable
=
trainable
,
name
=
'conv5-mask'
)
kernel_size
=
(
1
,
1
)
fan_out
=
self
.
_num_classes
init_stddev
=
Mask_Head_Model
.
_get_stddev_equivalent_to_msra_fill
(
kernel_size
,
fan_out
)
self
.
_conv_stage3
=
tf
.
keras
.
layers
.
Conv2D
(
fan_out
,
kernel_size
=
kernel_size
,
strides
=
(
1
,
1
),
padding
=
'valid'
,
kernel_initializer
=
tf
.
random_normal_initializer
(
stddev
=
init_stddev
),
bias_initializer
=
tf
.
keras
.
initializers
.
Zeros
(),
trainable
=
trainable
,
name
=
'mask_fcn_logits'
)
def
call
(
self
,
inputs
,
**
kwargs
):
"""
Returns:
mask_outputs: a tensor with a shape of
[batch_size, num_masks, mask_height, mask_width],
representing the mask predictions.
fg_gather_indices: a tensor with a shape of [batch_size, num_masks, 2],
representing the fg mask targets.
Raises:
ValueError: If boxes is not a rank-3 tensor or the last dimension of
boxes is not 4.
"""
batch_size
,
num_rois
,
height
,
width
,
filters
=
inputs
.
get_shape
().
as_list
()
net
=
tf
.
reshape
(
inputs
,
[
-
1
,
height
,
width
,
filters
])
for
conv_id
in
range
(
4
):
net
=
self
.
_conv_stage1
[
conv_id
](
net
)
net
=
self
.
_conv_stage2
(
net
)
mask_outputs
=
self
.
_conv_stage3
(
net
)
mask_outputs
=
tf
.
reshape
(
mask_outputs
,
[
-
1
,
num_rois
,
self
.
_mrcnn_resolution
,
self
.
_mrcnn_resolution
,
self
.
_num_classes
]
)
with
tf
.
name_scope
(
'masks_post_processing'
):
mask_outputs
=
tf
.
transpose
(
a
=
mask_outputs
,
perm
=
[
0
,
1
,
4
,
2
,
3
])
indices_dtype
=
tf
.
float32
if
self
.
_is_gpu_inference
else
tf
.
int32
if
batch_size
==
1
:
indices
=
tf
.
reshape
(
tf
.
reshape
(
tf
.
range
(
num_rois
,
dtype
=
indices_dtype
),
[
batch_size
,
num_rois
,
1
]
)
*
self
.
_num_classes
+
tf
.
expand_dims
(
self
.
_class_indices
,
axis
=-
1
),
[
batch_size
,
-
1
]
)
indices
=
tf
.
cast
(
indices
,
tf
.
int32
)
mask_outputs
=
tf
.
gather
(
tf
.
reshape
(
mask_outputs
,
[
batch_size
,
-
1
,
self
.
_mrcnn_resolution
,
self
.
_mrcnn_resolution
]),
indices
,
axis
=
1
)
mask_outputs
=
tf
.
squeeze
(
mask_outputs
,
axis
=
1
)
mask_outputs
=
tf
.
reshape
(
mask_outputs
,
[
batch_size
,
num_rois
,
self
.
_mrcnn_resolution
,
self
.
_mrcnn_resolution
])
else
:
batch_indices
=
(
tf
.
expand_dims
(
tf
.
range
(
batch_size
,
dtype
=
indices_dtype
),
axis
=
1
)
*
tf
.
ones
([
1
,
num_rois
],
dtype
=
indices_dtype
)
)
mask_indices
=
(
tf
.
expand_dims
(
tf
.
range
(
num_rois
,
dtype
=
indices_dtype
),
axis
=
0
)
*
tf
.
ones
([
batch_size
,
1
],
dtype
=
indices_dtype
)
)
gather_indices
=
tf
.
stack
([
batch_indices
,
mask_indices
,
self
.
_class_indices
],
axis
=
2
)
if
self
.
_is_gpu_inference
:
gather_indices
=
tf
.
cast
(
gather_indices
,
dtype
=
tf
.
int32
)
mask_outputs
=
tf
.
gather_nd
(
mask_outputs
,
gather_indices
)
return
mask_outputs
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/models/keras_utils.py
0 → 100644
View file @
c320b6ef
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
itertools
import
tensorflow
as
tf
__all__
=
[
"KerasMockLayer"
]
class
KerasMockLayer
(
tf
.
Module
):
"""
This class reproduces the Keras Layer important APIs without enforcing a variable scope.
"""
def
__init__
(
self
,
trainable
=
True
,
*
args
,
**
kwargs
):
super
(
KerasMockLayer
,
self
).
__init__
(
*
args
,
**
kwargs
)
self
.
_local_layers
=
dict
()
self
.
_trainable
=
trainable
@
property
def
trainable
(
self
):
return
self
.
_trainable
@
trainable
.
setter
def
trainable
(
self
,
value
):
self
.
_trainable
=
value
for
layer
in
getattr
(
self
,
'_layers'
,
[]):
layer
.
trainable
=
value
@
property
def
variables
(
self
):
"""Returns the list of all layer variables/weights.
Alias of `self.weights`.
Returns:
A list of variables.
"""
return
self
.
weights
@
property
def
trainable_variables
(
self
):
return
self
.
trainable_weights
@
property
def
non_trainable_variables
(
self
):
return
self
.
non_trainable_weights
@
property
def
weights
(
self
):
"""Returns the list of all layer variables/weights.
Returns:
A list of variables.
"""
return
self
.
trainable_weights
+
self
.
non_trainable_weights
@
property
def
name
(
self
):
return
self
.
_name
@
property
def
trainable_weights
(
self
):
layers
=
list
()
for
layer
in
self
.
_local_layers
.
values
():
if
not
isinstance
(
layer
,
dict
):
layers
.
append
(
layer
)
else
:
for
sublayer
in
layer
.
values
():
layers
.
append
(
sublayer
)
return
list
(
itertools
.
chain
.
from_iterable
([
layer
.
trainable_variables
for
layer
in
layers
]))
@
property
def
non_trainable_weights
(
self
):
layers
=
list
()
for
layer
in
self
.
_local_layers
.
values
():
if
not
isinstance
(
layer
,
dict
):
layers
.
append
(
layer
)
else
:
for
sublayer
in
layer
.
values
():
layers
.
append
(
sublayer
)
return
list
(
itertools
.
chain
.
from_iterable
([
layer
.
non_trainable_weights
for
layer
in
layers
]))
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/models/resnet.py
0 → 100644
View file @
c320b6ef
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Resnet."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
tensorflow
as
tf
from
tensorflow.python.ops
import
variable_scope
from
tensorflow.python.keras
import
backend
from
mask_rcnn.models.keras_utils
import
KerasMockLayer
_BATCH_NORM_DECAY
=
0.997
_BATCH_NORM_EPSILON
=
1e-4
class
BNReLULayer
(
KerasMockLayer
):
def
__init__
(
self
,
trainable
,
relu
=
True
,
init_zero
=
False
,
data_format
=
'channels_last'
):
"""Performs a batch normalization followed by a ReLU.
Args:
inputs: `Tensor` of shape `[batch, channels, ...]`.
trainable: `bool` for whether to finetune the batchnorm layer.
relu: `bool` if False, omits the ReLU operation.
init_zero: `bool` if True, initializes scale parameter of batch
normalization with 0 instead of 1 (default).
data_format: `str` either "channels_first" for `[batch, channels, height,
width]` or "channels_last for `[batch, height, width, channels]`.
name: the name of the batch normalization layer
Returns:
A normalized `Tensor` with the same `data_format`.
"""
super
(
BNReLULayer
,
self
).
__init__
(
trainable
=
trainable
)
if
init_zero
:
gamma_initializer
=
tf
.
keras
.
initializers
.
Zeros
()
else
:
gamma_initializer
=
tf
.
keras
.
initializers
.
Ones
()
if
data_format
==
'channels_first'
:
axis
=
1
else
:
axis
=
3
self
.
_local_layers
=
dict
()
self
.
_local_layers
[
"batchnorm"
]
=
tf
.
keras
.
layers
.
BatchNormalization
(
axis
=
axis
,
momentum
=
_BATCH_NORM_DECAY
,
epsilon
=
_BATCH_NORM_EPSILON
,
center
=
True
,
scale
=
True
,
trainable
=
self
.
_trainable
,
gamma_initializer
=
gamma_initializer
,
fused
=
True
,
name
=
"batch_normalization"
)
if
relu
:
self
.
_local_layers
[
"relu"
]
=
tf
.
keras
.
layers
.
ReLU
()
def
__call__
(
self
,
inputs
,
training
=
False
,
*
args
,
**
kwargs
):
net
=
self
.
_local_layers
[
"batchnorm"
](
inputs
,
training
=
training
and
self
.
_trainable
)
try
:
return
self
.
_local_layers
[
"relu"
](
net
)
except
KeyError
:
return
net
class
FixedPaddingLayer
(
KerasMockLayer
):
def
__init__
(
self
,
kernel_size
,
data_format
=
'channels_last'
,
trainable
=
True
):
"""Pads the input along the spatial dimensions independently of input size.
Args:
kernel_size: `int` kernel size to be used for `conv2d` or max_pool2d`
operations. Should be a positive integer.
data_format: `str` either "channels_first" for `[batch, channels, height,
width]` or "channels_last for `[batch, height, width, channels]`.
"""
super
(
FixedPaddingLayer
,
self
).
__init__
(
trainable
=
trainable
)
pad_total
=
kernel_size
-
1
pad_beg
=
pad_total
//
2
pad_end
=
pad_total
-
pad_beg
if
data_format
==
'channels_first'
:
self
.
_paddings
=
[[
0
,
0
],
[
0
,
0
],
[
pad_beg
,
pad_end
],
[
pad_beg
,
pad_end
]]
else
:
self
.
_paddings
=
[[
0
,
0
],
[
pad_beg
,
pad_end
],
[
pad_beg
,
pad_end
],
[
0
,
0
]]
def
__call__
(
self
,
inputs
,
*
args
,
**
kwargs
):
"""
Args:
inputs: `Tensor` of size `[batch, channels, height, width]` or
`[batch, height, width, channels]` depending on `data_format`.
Returns:
A padded `Tensor` of the same `data_format` with size either intact
(if `kernel_size == 1`) or padded (if `kernel_size > 1`).
:param **kwargs:
"""
return
tf
.
pad
(
tensor
=
inputs
,
paddings
=
self
.
_paddings
)
class
Conv2dFixedPadding
(
KerasMockLayer
):
def
__init__
(
self
,
filters
,
kernel_size
,
strides
,
data_format
=
'channels_last'
,
trainable
=
False
):
"""Strided 2-D convolution with explicit padding.
The padding is consistent and is based only on `kernel_size`, not on the
dimensions of `inputs` (as opposed to using `tf.layers.conv2d` alone).
Args:
inputs: `Tensor` of size `[batch, channels, height_in, width_in]`.
filters: `int` number of filters in the convolution.
kernel_size: `int` size of the kernel to be used in the convolution.
strides: `int` strides of the convolution.
data_format: `str` either "channels_first" for `[batch, channels, height,
width]` or "channels_last for `[batch, height, width, channels]`.
Returns:
A `Tensor` of shape `[batch, filters, height_out, width_out]`.
"""
super
(
Conv2dFixedPadding
,
self
).
__init__
(
trainable
=
trainable
)
if
strides
>
1
:
self
.
_local_layers
[
"fixed_padding"
]
=
FixedPaddingLayer
(
kernel_size
=
kernel_size
,
data_format
=
data_format
)
self
.
_local_layers
[
"conv2d"
]
=
tf
.
keras
.
layers
.
Conv2D
(
filters
=
filters
,
kernel_size
=
kernel_size
,
strides
=
strides
,
padding
=
(
'SAME'
if
strides
==
1
else
'VALID'
),
use_bias
=
False
,
kernel_initializer
=
tf
.
keras
.
initializers
.
VarianceScaling
(),
data_format
=
data_format
,
trainable
=
self
.
_trainable
,
name
=
"conv2d"
)
def
__call__
(
self
,
inputs
,
*
args
,
**
kwargs
):
try
:
net
=
self
.
_local_layers
[
"fixed_padding"
](
inputs
)
except
KeyError
:
net
=
inputs
return
self
.
_local_layers
[
"conv2d"
](
net
)
class
ResidualBlock
(
KerasMockLayer
):
def
__init__
(
self
,
filters
,
trainable
,
finetune_bn
,
strides
,
use_projection
=
False
,
data_format
=
'channels_last'
):
"""Standard building block for residual networks with BN after convolutions.
Args:
filters: `int` number of filters for the first two convolutions. Note that
the third and final convolution will use 4 times as many filters.
finetune_bn: `bool` for whether the model is in training.
strides: `int` block stride. If greater than 1, this block will ultimately downsample the input.
use_projection: `bool` for whether this block should use a projection
shortcut (versus the default identity shortcut). This is usually `True`
for the first block of a block group, which may change the number of
filters and the resolution.
data_format: `str` either "channels_first" for `[batch, channels, height, width]`
or "channels_last for `[batch, height, width, channels]`.
"""
super
(
ResidualBlock
,
self
).
__init__
(
trainable
=
trainable
)
self
.
_finetune_bn
=
finetune_bn
if
use_projection
:
self
.
_local_layers
[
"projection"
]
=
dict
()
self
.
_local_layers
[
"projection"
][
"conv2d"
]
=
Conv2dFixedPadding
(
filters
=
filters
,
kernel_size
=
1
,
strides
=
strides
,
data_format
=
data_format
,
trainable
=
trainable
)
self
.
_local_layers
[
"projection"
][
"batchnorm"
]
=
BNReLULayer
(
trainable
=
finetune_bn
and
trainable
,
relu
=
False
,
init_zero
=
False
,
data_format
=
data_format
,
)
self
.
_local_layers
[
"conv2d_1"
]
=
Conv2dFixedPadding
(
trainable
=
trainable
,
filters
=
filters
,
kernel_size
=
3
,
strides
=
strides
,
data_format
=
data_format
,
)
self
.
_local_layers
[
"conv2d_2"
]
=
Conv2dFixedPadding
(
trainable
=
trainable
,
filters
=
filters
,
kernel_size
=
3
,
strides
=
1
,
data_format
=
data_format
,
)
self
.
_local_layers
[
"batchnorm_1"
]
=
BNReLULayer
(
trainable
=
finetune_bn
and
trainable
,
relu
=
True
,
init_zero
=
False
,
data_format
=
data_format
,
)
self
.
_local_layers
[
"batchnorm_2"
]
=
BNReLULayer
(
trainable
=
finetune_bn
and
trainable
,
relu
=
False
,
init_zero
=
True
,
data_format
=
data_format
,
)
self
.
_local_layers
[
"activation"
]
=
tf
.
keras
.
layers
.
ReLU
()
def
__call__
(
self
,
inputs
,
training
=
False
):
"""
Args:
inputs: `Tensor` of size `[batch, channels, height, width]`.
Returns:
The output `Tensor` of the block.
"""
try
:
# Projection shortcut in first layer to match filters and strides
shortcut
=
self
.
_local_layers
[
"projection"
][
"conv2d"
](
inputs
=
inputs
)
shortcut
=
self
.
_local_layers
[
"projection"
][
"batchnorm"
](
inputs
=
shortcut
,
training
=
training
and
self
.
_trainable
and
self
.
_finetune_bn
)
except
KeyError
:
shortcut
=
inputs
net
=
inputs
for
i
in
range
(
1
,
3
):
net
=
self
.
_local_layers
[
"conv2d_%d"
%
i
](
inputs
=
net
)
net
=
self
.
_local_layers
[
"batchnorm_%d"
%
i
](
inputs
=
net
,
training
=
training
and
self
.
_trainable
and
self
.
_finetune_bn
)
return
self
.
_local_layers
[
"activation"
](
net
+
shortcut
)
class
BottleneckBlock
(
KerasMockLayer
):
def
__init__
(
self
,
filters
,
trainable
,
finetune_bn
,
strides
,
use_projection
=
False
,
data_format
=
'channels_last'
):
"""Bottleneck block variant for residual networks with BN after convolutions.
Args:
filters: `int` number of filters for the first two convolutions. Note that
the third and final convolution will use 4 times as many filters.
finetune_bn: `bool` for whether the model is in training.
strides: `int` block stride. If greater than 1, this block will ultimately downsample the input.
use_projection: `bool` for whether this block should use a projection
shortcut (versus the default identity shortcut). This is usually `True`
for the first block of a block group, which may change the number of
filters and the resolution.
data_format: `str` either "channels_first" for `[batch, channels, height, width]`
or "channels_last for `[batch, height, width, channels]`.
"""
super
(
BottleneckBlock
,
self
).
__init__
(
trainable
=
trainable
)
self
.
_finetune_bn
=
finetune_bn
if
use_projection
:
# Projection shortcut only in first block within a group. Bottleneck blocks
# end with 4 times the number of filters.
filters_out
=
4
*
filters
self
.
_local_layers
[
"projection"
]
=
dict
()
self
.
_local_layers
[
"projection"
][
"conv2d"
]
=
Conv2dFixedPadding
(
filters
=
filters_out
,
kernel_size
=
1
,
strides
=
strides
,
data_format
=
data_format
,
trainable
=
trainable
)
self
.
_local_layers
[
"projection"
][
"batchnorm"
]
=
BNReLULayer
(
trainable
=
finetune_bn
and
trainable
,
relu
=
False
,
init_zero
=
False
,
data_format
=
data_format
,
)
self
.
_local_layers
[
"conv2d_1"
]
=
Conv2dFixedPadding
(
filters
=
filters
,
kernel_size
=
1
,
strides
=
1
,
data_format
=
data_format
,
trainable
=
trainable
)
self
.
_local_layers
[
"conv2d_2"
]
=
Conv2dFixedPadding
(
filters
=
filters
,
kernel_size
=
3
,
strides
=
strides
,
data_format
=
data_format
,
trainable
=
trainable
)
self
.
_local_layers
[
"conv2d_3"
]
=
Conv2dFixedPadding
(
filters
=
4
*
filters
,
kernel_size
=
1
,
strides
=
1
,
data_format
=
data_format
,
trainable
=
trainable
)
self
.
_local_layers
[
"batchnorm_1"
]
=
BNReLULayer
(
trainable
=
finetune_bn
and
trainable
,
relu
=
True
,
init_zero
=
False
,
data_format
=
data_format
,
)
self
.
_local_layers
[
"batchnorm_2"
]
=
BNReLULayer
(
trainable
=
finetune_bn
and
trainable
,
relu
=
True
,
init_zero
=
False
,
data_format
=
data_format
,
)
self
.
_local_layers
[
"batchnorm_3"
]
=
BNReLULayer
(
trainable
=
finetune_bn
and
trainable
,
relu
=
False
,
init_zero
=
True
,
data_format
=
data_format
,
)
self
.
_local_layers
[
"activation"
]
=
tf
.
keras
.
layers
.
ReLU
()
def
__call__
(
self
,
inputs
,
training
=
False
):
"""
Args:
inputs: `Tensor` of size `[batch, channels, height, width]`.
Returns:
The output `Tensor` of the block.
"""
try
:
# Projection shortcut in first layer to match filters and strides
shortcut
=
self
.
_local_layers
[
"projection"
][
"conv2d"
](
inputs
=
inputs
)
shortcut
=
self
.
_local_layers
[
"projection"
][
"batchnorm"
](
inputs
=
shortcut
,
training
=
training
and
self
.
_trainable
and
self
.
_finetune_bn
)
except
KeyError
:
shortcut
=
inputs
net
=
inputs
for
i
in
range
(
1
,
4
):
net
=
self
.
_local_layers
[
"conv2d_%d"
%
i
](
inputs
=
net
)
net
=
self
.
_local_layers
[
"batchnorm_%d"
%
i
](
inputs
=
net
,
training
=
training
and
self
.
_trainable
and
self
.
_finetune_bn
)
return
self
.
_local_layers
[
"activation"
](
net
+
shortcut
)
class
BlockGroup
(
KerasMockLayer
):
def
__init__
(
self
,
filters
,
block_layer
,
n_blocks
,
strides
,
trainable
,
finetune_bn
,
data_format
=
'channels_last'
):
"""Creates one group of blocks for the ResNet model.
Args:
inputs: `Tensor` of size `[batch, channels, height, width]`.
filters: `int` number of filters for the first convolution of the layer.
block_layer: `layer` for the block to use within the model
n_blocks: `int` number of blocks contained in the layer.
strides: `int` stride to use for the first convolution of the layer. If
greater than 1, this layer will downsample the input.
finetune_bn: `bool` for whether the model is training.
name: `str`name for the Tensor output of the block layer.
data_format: `str` either "channels_first" for `[batch, channels, height,
width]` or "channels_last for `[batch, height, width, channels]`.
Returns:
The output `Tensor` of the block layer.
"""
super
(
BlockGroup
,
self
).
__init__
(
trainable
=
trainable
)
self
.
_finetune_bn
=
finetune_bn
self
.
_n_blocks
=
n_blocks
for
block_id
in
range
(
self
.
_n_blocks
):
# Only the first block per block_group uses projection shortcut and strides.
self
.
_local_layers
[
"block_%d"
%
(
block_id
+
1
)]
=
block_layer
(
filters
=
filters
,
finetune_bn
=
finetune_bn
,
trainable
=
trainable
,
strides
=
strides
if
block_id
==
0
else
1
,
use_projection
=
block_id
==
0
,
data_format
=
data_format
)
def
__call__
(
self
,
inputs
,
training
=
False
):
net
=
inputs
for
block_id
in
range
(
self
.
_n_blocks
):
net
=
self
.
_local_layers
[
"block_%d"
%
(
block_id
+
1
)](
net
,
training
=
training
and
self
.
_trainable
)
return
net
class
Resnet_Model
(
KerasMockLayer
,
tf
.
keras
.
models
.
Model
):
def
__init__
(
self
,
resnet_model
,
data_format
=
'channels_last'
,
trainable
=
True
,
finetune_bn
=
False
,
*
args
,
**
kwargs
):
"""
Our actual ResNet network. We return the output of c2, c3,c4,c5
N.B. batch norm is always run with trained parameters, as we use very small
batches when training the object layers.
Args:
resnet_model: model type. Authorized Values: (resnet18, resnet34, resnet50, resnet101, resnet152, resnet200)
data_format: `str` either "channels_first" for
`[batch, channels, height, width]` or "channels_last for `[batch, height, width, channels]`.
finetune_bn: `bool` for whether the model is training.
Returns the ResNet model for a given size and number of output classes.
"""
model_params
=
{
'resnet18'
:
{
'block'
:
ResidualBlock
,
'layers'
:
[
2
,
2
,
2
,
2
]},
'resnet34'
:
{
'block'
:
ResidualBlock
,
'layers'
:
[
3
,
4
,
6
,
3
]},
'resnet50'
:
{
'block'
:
BottleneckBlock
,
'layers'
:
[
3
,
4
,
6
,
3
]},
'resnet101'
:
{
'block'
:
BottleneckBlock
,
'layers'
:
[
3
,
4
,
23
,
3
]},
'resnet152'
:
{
'block'
:
BottleneckBlock
,
'layers'
:
[
3
,
8
,
36
,
3
]},
'resnet200'
:
{
'block'
:
BottleneckBlock
,
'layers'
:
[
3
,
24
,
36
,
3
]}
}
if
resnet_model
not
in
model_params
:
raise
ValueError
(
'Not a valid resnet_model: %s'
%
resnet_model
)
super
(
Resnet_Model
,
self
).
__init__
(
trainable
=
trainable
,
name
=
resnet_model
,
*
args
,
**
kwargs
)
self
.
_finetune_bn
=
finetune_bn
self
.
_data_format
=
data_format
self
.
_block_layer
=
model_params
[
resnet_model
][
'block'
]
self
.
_n_layers
=
model_params
[
resnet_model
][
'layers'
]
self
.
_local_layers
[
"conv2d"
]
=
Conv2dFixedPadding
(
filters
=
64
,
kernel_size
=
7
,
strides
=
2
,
data_format
=
self
.
_data_format
,
# Freeze at conv2d and batchnorm first 11 layers based on reference model.
# Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/core/config.py#L194
trainable
=
False
)
self
.
_local_layers
[
"batchnorm"
]
=
BNReLULayer
(
relu
=
True
,
init_zero
=
False
,
data_format
=
self
.
_data_format
,
# Freeze at conv2d and batchnorm first 11 layers based on reference model.
# Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/core/config.py#L194
trainable
=
False
)
self
.
_local_layers
[
"maxpool2d"
]
=
tf
.
keras
.
layers
.
MaxPool2D
(
pool_size
=
3
,
strides
=
2
,
padding
=
'SAME'
,
data_format
=
self
.
_data_format
)
self
.
_local_layers
[
"block_1"
]
=
BlockGroup
(
filters
=
64
,
strides
=
1
,
n_blocks
=
self
.
_n_layers
[
0
],
block_layer
=
self
.
_block_layer
,
data_format
=
self
.
_data_format
,
# Freeze at conv2d and batchnorm first 11 layers based on reference model.
# Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/core/config.py#L194
trainable
=
False
,
finetune_bn
=
False
)
self
.
_local_layers
[
"block_2"
]
=
BlockGroup
(
filters
=
128
,
strides
=
2
,
n_blocks
=
self
.
_n_layers
[
1
],
block_layer
=
self
.
_block_layer
,
data_format
=
self
.
_data_format
,
# Freeze at conv2d and batchnorm first 11 layers based on reference model.
# Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/core/config.py#L194
trainable
=
self
.
_trainable
,
finetune_bn
=
self
.
_finetune_bn
)
self
.
_local_layers
[
"block_3"
]
=
BlockGroup
(
filters
=
256
,
strides
=
2
,
n_blocks
=
self
.
_n_layers
[
2
],
block_layer
=
self
.
_block_layer
,
data_format
=
self
.
_data_format
,
# Freeze at conv2d and batchnorm first 11 layers based on reference model.
# Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/core/config.py#L194
trainable
=
self
.
_trainable
,
finetune_bn
=
self
.
_finetune_bn
)
self
.
_local_layers
[
"block_4"
]
=
BlockGroup
(
filters
=
512
,
strides
=
2
,
n_blocks
=
self
.
_n_layers
[
3
],
block_layer
=
self
.
_block_layer
,
data_format
=
self
.
_data_format
,
# Freeze at conv2d and batchnorm first 11 layers based on reference model.
# Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/core/config.py#L194
trainable
=
self
.
_trainable
,
finetune_bn
=
self
.
_finetune_bn
)
def
call
(
self
,
inputs
,
training
=
True
,
*
args
,
**
kwargs
):
"""Creation of the model graph."""
net
=
self
.
_local_layers
[
"conv2d"
](
inputs
=
inputs
)
net
=
self
.
_local_layers
[
"batchnorm"
](
inputs
=
net
,
training
=
False
)
net
=
self
.
_local_layers
[
"maxpool2d"
](
net
)
c2
=
self
.
_local_layers
[
"block_1"
](
inputs
=
net
,
training
=
False
,
)
c3
=
self
.
_local_layers
[
"block_2"
](
inputs
=
c2
,
training
=
training
,
)
c4
=
self
.
_local_layers
[
"block_3"
](
inputs
=
c3
,
training
=
training
,
)
c5
=
self
.
_local_layers
[
"block_4"
](
inputs
=
c4
,
training
=
training
,
)
return
{
2
:
c2
,
3
:
c3
,
4
:
c4
,
5
:
c5
}
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/object_detection/__init__.py
0 → 100644
View file @
c320b6ef
Prev
1
2
3
4
5
6
…
10
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment