Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
3e3b0c64
Commit
3e3b0c64
authored
Jun 02, 2022
by
A. Unique TensorFlower
Browse files
Merge pull request #10537 from srihari-humbarwadi:panoptic-deeplab
PiperOrigin-RevId: 452568716
parents
523c40b7
1f765c55
Changes
18
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
18 changed files
with
3308 additions
and
4 deletions
+3308
-4
official/vision/beta/projects/panoptic_maskrcnn/README.md
official/vision/beta/projects/panoptic_maskrcnn/README.md
+14
-0
official/vision/beta/projects/panoptic_maskrcnn/configs/panoptic_deeplab.py
...ta/projects/panoptic_maskrcnn/configs/panoptic_deeplab.py
+346
-0
official/vision/beta/projects/panoptic_maskrcnn/dataloaders/panoptic_deeplab_input.py
...s/panoptic_maskrcnn/dataloaders/panoptic_deeplab_input.py
+359
-0
official/vision/beta/projects/panoptic_maskrcnn/losses/panoptic_deeplab_losses.py
...jects/panoptic_maskrcnn/losses/panoptic_deeplab_losses.py
+148
-0
official/vision/beta/projects/panoptic_maskrcnn/modeling/factory.py
...ision/beta/projects/panoptic_maskrcnn/modeling/factory.py
+106
-0
official/vision/beta/projects/panoptic_maskrcnn/modeling/factory_test.py
.../beta/projects/panoptic_maskrcnn/modeling/factory_test.py
+48
-0
official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/panoptic_deeplab_heads.py
...anoptic_maskrcnn/modeling/heads/panoptic_deeplab_heads.py
+434
-0
official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/panoptic_deeplab_heads_test.py
...ic_maskrcnn/modeling/heads/panoptic_deeplab_heads_test.py
+96
-0
official/vision/beta/projects/panoptic_maskrcnn/modeling/layers/fusion_layers.py
...ojects/panoptic_maskrcnn/modeling/layers/fusion_layers.py
+180
-0
official/vision/beta/projects/panoptic_maskrcnn/modeling/layers/panoptic_deeplab_merge.py
...noptic_maskrcnn/modeling/layers/panoptic_deeplab_merge.py
+568
-0
official/vision/beta/projects/panoptic_maskrcnn/modeling/layers/panoptic_deeplab_merge_test.py
...c_maskrcnn/modeling/layers/panoptic_deeplab_merge_test.py
+142
-0
official/vision/beta/projects/panoptic_maskrcnn/modeling/panoptic_deeplab_model.py
...ects/panoptic_maskrcnn/modeling/panoptic_deeplab_model.py
+122
-0
official/vision/beta/projects/panoptic_maskrcnn/modeling/panoptic_deeplab_model_test.py
...panoptic_maskrcnn/modeling/panoptic_deeplab_model_test.py
+185
-0
official/vision/beta/projects/panoptic_maskrcnn/ops/mask_ops.py
...al/vision/beta/projects/panoptic_maskrcnn/ops/mask_ops.py
+55
-0
official/vision/beta/projects/panoptic_maskrcnn/tasks/panoptic_deeplab.py
...beta/projects/panoptic_maskrcnn/tasks/panoptic_deeplab.py
+408
-0
official/vision/beta/projects/panoptic_maskrcnn/tasks/panoptic_deeplab_test.py
...projects/panoptic_maskrcnn/tasks/panoptic_deeplab_test.py
+79
-0
official/vision/beta/projects/panoptic_maskrcnn/train.py
official/vision/beta/projects/panoptic_maskrcnn/train.py
+6
-3
official/vision/ops/augment.py
official/vision/ops/augment.py
+12
-1
No files found.
official/vision/beta/projects/panoptic_maskrcnn/README.md
View file @
3e3b0c64
...
...
@@ -83,6 +83,12 @@ ResNet-50 | 3x | `panoptic_fpn_coco` | 40.64 | 36.29
**Note**
: Here 1x schedule refers to ~12 epochs
### Panoptic Deeplab
Backbone | Experiment name | Overall PQ | Things PQ | Stuff PQ | Checkpoints
:---------------------| :-------------------------------| ---------- | --------- | -------- | ------------:
Dilated ResNet-50 |
`panoptic_deeplab_resnet_coco`
| 36.80 | 37.51 | 35.73 |
[
ckpt
](
gs://tf_model_garden/vision/panoptic/panoptic_deeplab/coco/resnet50
)
Dilated ResNet-101 |
`panoptic_deeplab_resnet_coco`
| 38.39 | 39.47 | 36.75 |
[
ckpt
](
gs://tf_model_garden/vision/panoptic/panoptic_deeplab/coco/resnet101
)
__
_
## Citation
```
...
...
@@ -94,4 +100,12 @@ ___
archivePrefix={arXiv},
primaryClass={cs.CV}
}
@article{Cheng2020PanopticDeepLabAS,
title={Panoptic-DeepLab: A Simple, Strong, and Fast Baseline for Bottom-Up Panoptic Segmentation},
author={Bowen Cheng and Maxwell D. Collins and Yukun Zhu and Ting Liu and Thomas S. Huang and Hartwig Adam and Liang-Chieh Chen},
journal={2020 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
year={2020},
pages={12472-12482}
}
```
official/vision/beta/projects/panoptic_maskrcnn/configs/panoptic_deeplab.py
0 → 100644
View file @
3e3b0c64
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Panoptic Deeplab configuration definition."""
import
dataclasses
import
os
from
typing
import
List
,
Optional
,
Union
import
numpy
as
np
from
official.core
import
config_definitions
as
cfg
from
official.core
import
exp_factory
from
official.modeling
import
hyperparams
from
official.modeling
import
optimization
from
official.vision.configs
import
common
from
official.vision.configs
import
decoders
from
official.vision.configs.google
import
backbones
_COCO_INPUT_PATH_BASE
=
'coco/tfrecords'
_COCO_TRAIN_EXAMPLES
=
118287
_COCO_VAL_EXAMPLES
=
5000
@
dataclasses
.
dataclass
class
Parser
(
hyperparams
.
Config
):
"""Panoptic deeplab parser."""
ignore_label
:
int
=
0
# If resize_eval_groundtruth is set to False, original image sizes are used
# for eval. In that case, groundtruth_padded_size has to be specified too to
# allow for batching the variable input sizes of images.
resize_eval_groundtruth
:
bool
=
True
groundtruth_padded_size
:
List
[
int
]
=
dataclasses
.
field
(
default_factory
=
list
)
aug_scale_min
:
float
=
1.0
aug_scale_max
:
float
=
1.0
aug_rand_hflip
:
bool
=
True
aug_type
:
common
.
Augmentation
=
common
.
Augmentation
()
sigma
:
float
=
8.0
small_instance_area_threshold
:
int
=
4096
small_instance_weight
:
float
=
3.0
dtype
=
'float32'
@
dataclasses
.
dataclass
class
TfExampleDecoder
(
common
.
TfExampleDecoder
):
"""A simple TF Example decoder config."""
panoptic_category_mask_key
:
str
=
'image/panoptic/category_mask'
panoptic_instance_mask_key
:
str
=
'image/panoptic/instance_mask'
@
dataclasses
.
dataclass
class
DataDecoder
(
common
.
DataDecoder
):
"""Data decoder config."""
simple_decoder
:
TfExampleDecoder
=
TfExampleDecoder
()
@
dataclasses
.
dataclass
class
DataConfig
(
cfg
.
DataConfig
):
"""Input config for training."""
decoder
:
DataDecoder
=
DataDecoder
()
parser
:
Parser
=
Parser
()
input_path
:
str
=
''
drop_remainder
:
bool
=
True
file_type
:
str
=
'tfrecord'
is_training
:
bool
=
True
global_batch_size
:
int
=
1
@
dataclasses
.
dataclass
class
PanopticDeeplabHead
(
hyperparams
.
Config
):
"""Panoptic Deeplab head config."""
level
:
int
=
3
num_convs
:
int
=
2
num_filters
:
int
=
256
kernel_size
:
int
=
5
use_depthwise_convolution
:
bool
=
False
upsample_factor
:
int
=
1
low_level
:
List
[
int
]
=
dataclasses
.
field
(
default_factory
=
lambda
:
[
3
,
2
])
low_level_num_filters
:
List
[
int
]
=
dataclasses
.
field
(
default_factory
=
lambda
:
[
64
,
32
])
fusion_num_output_filters
:
int
=
256
@
dataclasses
.
dataclass
class
SemanticHead
(
PanopticDeeplabHead
):
"""Semantic head config."""
prediction_kernel_size
:
int
=
1
@
dataclasses
.
dataclass
class
InstanceHead
(
PanopticDeeplabHead
):
"""Instance head config."""
prediction_kernel_size
:
int
=
1
@
dataclasses
.
dataclass
class
PanopticDeeplabPostProcessor
(
hyperparams
.
Config
):
"""Panoptic Deeplab PostProcessing config."""
output_size
:
List
[
int
]
=
dataclasses
.
field
(
default_factory
=
list
)
center_score_threshold
:
float
=
0.1
thing_class_ids
:
List
[
int
]
=
dataclasses
.
field
(
default_factory
=
list
)
label_divisor
:
int
=
256
*
256
*
256
stuff_area_limit
:
int
=
4096
ignore_label
:
int
=
0
nms_kernel
:
int
=
7
keep_k_centers
:
int
=
200
rescale_predictions
:
bool
=
True
@
dataclasses
.
dataclass
class
PanopticDeeplab
(
hyperparams
.
Config
):
"""Panoptic Deeplab model config."""
num_classes
:
int
=
2
input_size
:
List
[
int
]
=
dataclasses
.
field
(
default_factory
=
list
)
min_level
:
int
=
3
max_level
:
int
=
6
norm_activation
:
common
.
NormActivation
=
common
.
NormActivation
()
backbone
:
backbones
.
Backbone
=
backbones
.
Backbone
(
type
=
'resnet'
,
resnet
=
backbones
.
ResNet
())
decoder
:
decoders
.
Decoder
=
decoders
.
Decoder
(
type
=
'aspp'
)
semantic_head
:
SemanticHead
=
SemanticHead
()
instance_head
:
InstanceHead
=
InstanceHead
()
shared_decoder
:
bool
=
False
generate_panoptic_masks
:
bool
=
True
post_processor
:
PanopticDeeplabPostProcessor
=
PanopticDeeplabPostProcessor
()
@
dataclasses
.
dataclass
class
Losses
(
hyperparams
.
Config
):
label_smoothing
:
float
=
0.0
ignore_label
:
int
=
0
class_weights
:
List
[
float
]
=
dataclasses
.
field
(
default_factory
=
list
)
l2_weight_decay
:
float
=
1e-4
top_k_percent_pixels
:
float
=
0.15
segmentation_loss_weight
:
float
=
1.0
center_heatmap_loss_weight
:
float
=
200
center_offset_loss_weight
:
float
=
0.01
@
dataclasses
.
dataclass
class
Evaluation
(
hyperparams
.
Config
):
"""Evaluation config."""
ignored_label
:
int
=
0
max_instances_per_category
:
int
=
256
offset
:
int
=
256
*
256
*
256
is_thing
:
List
[
float
]
=
dataclasses
.
field
(
default_factory
=
list
)
rescale_predictions
:
bool
=
True
report_per_class_pq
:
bool
=
False
report_per_class_iou
:
bool
=
False
report_train_mean_iou
:
bool
=
True
# Turning this off can speed up training.
@
dataclasses
.
dataclass
class
PanopticDeeplabTask
(
cfg
.
TaskConfig
):
"""Panoptic deeplab task config."""
model
:
PanopticDeeplab
=
PanopticDeeplab
()
train_data
:
DataConfig
=
DataConfig
(
is_training
=
True
)
validation_data
:
DataConfig
=
DataConfig
(
is_training
=
False
,
drop_remainder
=
False
)
losses
:
Losses
=
Losses
()
init_checkpoint
:
Optional
[
str
]
=
None
init_checkpoint_modules
:
Union
[
str
,
List
[
str
]]
=
'all'
# all, backbone, and/or decoder
evaluation
:
Evaluation
=
Evaluation
()
@
exp_factory
.
register_config_factory
(
'panoptic_deeplab_resnet_coco'
)
def
panoptic_deeplab_coco
()
->
cfg
.
ExperimentConfig
:
"""COCO panoptic segmentation with Panoptic Deeplab."""
train_steps
=
200000
train_batch_size
=
64
eval_batch_size
=
1
steps_per_epoch
=
_COCO_TRAIN_EXAMPLES
//
train_batch_size
validation_steps
=
_COCO_VAL_EXAMPLES
//
eval_batch_size
num_panoptic_categories
=
201
num_thing_categories
=
91
ignore_label
=
0
is_thing
=
[
False
]
for
idx
in
range
(
1
,
num_panoptic_categories
):
is_thing
.
append
(
True
if
idx
<=
num_thing_categories
else
False
)
input_size
=
[
640
,
640
,
3
]
output_stride
=
16
aspp_dilation_rates
=
[
6
,
12
,
18
]
multigrid
=
[
1
,
2
,
4
]
stem_type
=
'v1'
level
=
int
(
np
.
math
.
log2
(
output_stride
))
config
=
cfg
.
ExperimentConfig
(
runtime
=
cfg
.
RuntimeConfig
(
mixed_precision_dtype
=
'bfloat16'
,
enable_xla
=
True
),
task
=
PanopticDeeplabTask
(
init_checkpoint
=
'gs://tf_model_garden/vision/panoptic/panoptic_deeplab/imagenet/resnet50_v1/ckpt-436800'
,
# pylint: disable=line-too-long
init_checkpoint_modules
=
[
'backbone'
],
model
=
PanopticDeeplab
(
num_classes
=
num_panoptic_categories
,
input_size
=
input_size
,
backbone
=
backbones
.
Backbone
(
type
=
'dilated_resnet'
,
dilated_resnet
=
backbones
.
DilatedResNet
(
model_id
=
50
,
stem_type
=
stem_type
,
output_stride
=
output_stride
,
multigrid
=
multigrid
,
se_ratio
=
0.25
,
last_stage_repeats
=
1
,
stochastic_depth_drop_rate
=
0.2
)),
decoder
=
decoders
.
Decoder
(
type
=
'aspp'
,
aspp
=
decoders
.
ASPP
(
level
=
level
,
num_filters
=
256
,
pool_kernel_size
=
input_size
[:
2
],
dilation_rates
=
aspp_dilation_rates
,
use_depthwise_convolution
=
True
,
dropout_rate
=
0.1
)),
semantic_head
=
SemanticHead
(
level
=
level
,
num_convs
=
1
,
num_filters
=
256
,
kernel_size
=
5
,
use_depthwise_convolution
=
True
,
upsample_factor
=
1
,
low_level
=
[
3
,
2
],
low_level_num_filters
=
[
64
,
32
],
fusion_num_output_filters
=
256
,
prediction_kernel_size
=
1
),
instance_head
=
InstanceHead
(
level
=
level
,
num_convs
=
1
,
num_filters
=
32
,
kernel_size
=
5
,
use_depthwise_convolution
=
True
,
upsample_factor
=
1
,
low_level
=
[
3
,
2
],
low_level_num_filters
=
[
32
,
16
],
fusion_num_output_filters
=
128
,
prediction_kernel_size
=
1
),
shared_decoder
=
False
,
generate_panoptic_masks
=
True
,
post_processor
=
PanopticDeeplabPostProcessor
(
output_size
=
input_size
[:
2
],
center_score_threshold
=
0.1
,
thing_class_ids
=
list
(
range
(
1
,
num_thing_categories
)),
label_divisor
=
256
,
stuff_area_limit
=
4096
,
ignore_label
=
ignore_label
,
nms_kernel
=
41
,
keep_k_centers
=
200
,
rescale_predictions
=
True
)),
losses
=
Losses
(
label_smoothing
=
0.0
,
ignore_label
=
ignore_label
,
l2_weight_decay
=
0.0
,
top_k_percent_pixels
=
0.2
,
segmentation_loss_weight
=
1.0
,
center_heatmap_loss_weight
=
200
,
center_offset_loss_weight
=
0.01
),
train_data
=
DataConfig
(
input_path
=
os
.
path
.
join
(
_COCO_INPUT_PATH_BASE
,
'train*'
),
is_training
=
True
,
global_batch_size
=
train_batch_size
,
parser
=
Parser
(
aug_scale_min
=
0.5
,
aug_scale_max
=
1.5
,
aug_rand_hflip
=
True
,
aug_type
=
common
.
Augmentation
(
type
=
'autoaug'
,
autoaug
=
common
.
AutoAugment
(
augmentation_name
=
'panoptic_deeplab_policy'
)),
sigma
=
8.0
,
small_instance_area_threshold
=
4096
,
small_instance_weight
=
3.0
)),
validation_data
=
DataConfig
(
input_path
=
os
.
path
.
join
(
_COCO_INPUT_PATH_BASE
,
'val*'
),
is_training
=
False
,
global_batch_size
=
eval_batch_size
,
parser
=
Parser
(
resize_eval_groundtruth
=
False
,
groundtruth_padded_size
=
[
640
,
640
],
aug_scale_min
=
1.0
,
aug_scale_max
=
1.0
,
aug_rand_hflip
=
False
,
aug_type
=
None
,
sigma
=
8.0
,
small_instance_area_threshold
=
4096
,
small_instance_weight
=
3.0
),
drop_remainder
=
False
),
evaluation
=
Evaluation
(
ignored_label
=
ignore_label
,
max_instances_per_category
=
256
,
offset
=
256
*
256
*
256
,
is_thing
=
is_thing
,
rescale_predictions
=
True
,
report_per_class_pq
=
False
,
report_per_class_iou
=
False
,
report_train_mean_iou
=
False
)),
trainer
=
cfg
.
TrainerConfig
(
train_steps
=
train_steps
,
validation_steps
=
validation_steps
,
validation_interval
=
steps_per_epoch
,
steps_per_loop
=
steps_per_epoch
,
summary_interval
=
steps_per_epoch
,
checkpoint_interval
=
steps_per_epoch
,
optimizer_config
=
optimization
.
OptimizationConfig
({
'optimizer'
:
{
'type'
:
'adam'
,
},
'learning_rate'
:
{
'type'
:
'polynomial'
,
'polynomial'
:
{
'initial_learning_rate'
:
0.0005
,
'decay_steps'
:
train_steps
,
'end_learning_rate'
:
0.0
,
'power'
:
0.9
}
},
'warmup'
:
{
'type'
:
'linear'
,
'linear'
:
{
'warmup_steps'
:
2000
,
'warmup_learning_rate'
:
0
}
}
})),
restrictions
=
[
'task.train_data.is_training != None'
,
'task.validation_data.is_training != None'
])
return
config
official/vision/beta/projects/panoptic_maskrcnn/dataloaders/panoptic_deeplab_input.py
0 → 100644
View file @
3e3b0c64
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Data parser and processing for Panoptic Deeplab."""
from
typing
import
List
,
Optional
import
numpy
as
np
import
tensorflow
as
tf
from
official.vision.configs
import
common
from
official.vision.dataloaders
import
parser
from
official.vision.dataloaders
import
tf_example_decoder
from
official.vision.ops
import
augment
from
official.vision.ops
import
preprocess_ops
def
_compute_gaussian_from_std
(
sigma
):
"""Computes the Gaussian and its size from a given standard deviation."""
size
=
int
(
6
*
sigma
+
3
)
x
=
np
.
arange
(
size
,
dtype
=
np
.
float
)
y
=
x
[:,
np
.
newaxis
]
x0
,
y0
=
3
*
sigma
+
1
,
3
*
sigma
+
1
gaussian
=
tf
.
constant
(
np
.
exp
(
-
((
x
-
x0
)
**
2
+
(
y
-
y0
)
**
2
)
/
(
2
*
sigma
**
2
)),
dtype
=
tf
.
float32
)
return
gaussian
,
size
class
TfExampleDecoder
(
tf_example_decoder
.
TfExampleDecoder
):
"""Tensorflow Example proto decoder."""
def
__init__
(
self
,
regenerate_source_id
:
bool
,
panoptic_category_mask_key
:
str
=
'image/panoptic/category_mask'
,
panoptic_instance_mask_key
:
str
=
'image/panoptic/instance_mask'
):
super
(
TfExampleDecoder
,
self
).
__init__
(
include_mask
=
True
,
regenerate_source_id
=
regenerate_source_id
)
self
.
_panoptic_category_mask_key
=
panoptic_category_mask_key
self
.
_panoptic_instance_mask_key
=
panoptic_instance_mask_key
self
.
_panoptic_keys_to_features
=
{
panoptic_category_mask_key
:
tf
.
io
.
FixedLenFeature
((),
tf
.
string
,
default_value
=
''
),
panoptic_instance_mask_key
:
tf
.
io
.
FixedLenFeature
((),
tf
.
string
,
default_value
=
''
)
}
def
decode
(
self
,
serialized_example
):
decoded_tensors
=
super
(
TfExampleDecoder
,
self
).
decode
(
serialized_example
)
parsed_tensors
=
tf
.
io
.
parse_single_example
(
serialized_example
,
self
.
_panoptic_keys_to_features
)
category_mask
=
tf
.
io
.
decode_image
(
parsed_tensors
[
self
.
_panoptic_category_mask_key
],
channels
=
1
)
instance_mask
=
tf
.
io
.
decode_image
(
parsed_tensors
[
self
.
_panoptic_instance_mask_key
],
channels
=
1
)
category_mask
.
set_shape
([
None
,
None
,
1
])
instance_mask
.
set_shape
([
None
,
None
,
1
])
decoded_tensors
.
update
({
'groundtruth_panoptic_category_mask'
:
category_mask
,
'groundtruth_panoptic_instance_mask'
:
instance_mask
})
return
decoded_tensors
class
Parser
(
parser
.
Parser
):
"""Parser to parse an image and its annotations into a dictionary of tensors."""
def
__init__
(
self
,
output_size
:
List
[
int
],
resize_eval_groundtruth
:
bool
=
True
,
groundtruth_padded_size
:
Optional
[
List
[
int
]]
=
None
,
ignore_label
:
int
=
0
,
aug_rand_hflip
:
bool
=
False
,
aug_scale_min
:
float
=
1.0
,
aug_scale_max
:
float
=
1.0
,
aug_type
:
Optional
[
common
.
Augmentation
]
=
None
,
sigma
:
float
=
8.0
,
small_instance_area_threshold
:
int
=
4096
,
small_instance_weight
:
float
=
3.0
,
dtype
:
str
=
'float32'
):
"""Initializes parameters for parsing annotations in the dataset.
Args:
output_size: `Tensor` or `list` for [height, width] of output image. The
output_size should be divided by the largest feature stride 2^max_level.
resize_eval_groundtruth: `bool`, if True, eval groundtruth masks are
resized to output_size.
groundtruth_padded_size: `Tensor` or `list` for [height, width]. When
resize_eval_groundtruth is set to False, the groundtruth masks are
padded to this size.
ignore_label: `int` the pixel with ignore label will not used for training
and evaluation.
aug_rand_hflip: `bool`, if True, augment training with random
horizontal flip.
aug_scale_min: `float`, the minimum scale applied to `output_size` for
data augmentation during training.
aug_scale_max: `float`, the maximum scale applied to `output_size` for
data augmentation during training.
aug_type: An optional Augmentation object with params for AutoAugment.
sigma: `float`, standard deviation for generating 2D Gaussian to encode
centers.
small_instance_area_threshold: `int`, small instance area threshold.
small_instance_weight: `float`, small instance weight.
dtype: `str`, data type. One of {`bfloat16`, `float32`, `float16`}.
"""
self
.
_output_size
=
output_size
self
.
_resize_eval_groundtruth
=
resize_eval_groundtruth
if
(
not
resize_eval_groundtruth
)
and
(
groundtruth_padded_size
is
None
):
raise
ValueError
(
'groundtruth_padded_size ([height, width]) needs to be'
'specified when resize_eval_groundtruth is False.'
)
self
.
_groundtruth_padded_size
=
groundtruth_padded_size
self
.
_ignore_label
=
ignore_label
# Data augmentation.
self
.
_aug_rand_hflip
=
aug_rand_hflip
self
.
_aug_scale_min
=
aug_scale_min
self
.
_aug_scale_max
=
aug_scale_max
if
aug_type
and
aug_type
.
type
:
if
aug_type
.
type
==
'autoaug'
:
self
.
_augmenter
=
augment
.
AutoAugment
(
augmentation_name
=
aug_type
.
autoaug
.
augmentation_name
,
cutout_const
=
aug_type
.
autoaug
.
cutout_const
,
translate_const
=
aug_type
.
autoaug
.
translate_const
)
else
:
raise
ValueError
(
'Augmentation policy {} not supported.'
.
format
(
aug_type
.
type
))
else
:
self
.
_augmenter
=
None
self
.
_dtype
=
dtype
self
.
_sigma
=
sigma
self
.
_gaussian
,
self
.
_gaussian_size
=
_compute_gaussian_from_std
(
self
.
_sigma
)
self
.
_gaussian
=
tf
.
reshape
(
self
.
_gaussian
,
shape
=
[
-
1
])
self
.
_small_instance_area_threshold
=
small_instance_area_threshold
self
.
_small_instance_weight
=
small_instance_weight
def
_resize_and_crop_mask
(
self
,
mask
,
image_info
,
is_training
):
"""Resizes and crops mask using `image_info` dict."""
height
=
image_info
[
0
][
0
]
width
=
image_info
[
0
][
1
]
mask
=
tf
.
reshape
(
mask
,
shape
=
[
1
,
height
,
width
,
1
])
mask
+=
1
if
is_training
or
self
.
_resize_eval_groundtruth
:
image_scale
=
image_info
[
2
,
:]
offset
=
image_info
[
3
,
:]
mask
=
preprocess_ops
.
resize_and_crop_masks
(
mask
,
image_scale
,
self
.
_output_size
,
offset
)
else
:
mask
=
tf
.
image
.
pad_to_bounding_box
(
mask
,
0
,
0
,
self
.
_groundtruth_padded_size
[
0
],
self
.
_groundtruth_padded_size
[
1
])
mask
-=
1
# Assign ignore label to the padded region.
mask
=
tf
.
where
(
tf
.
equal
(
mask
,
-
1
),
self
.
_ignore_label
*
tf
.
ones_like
(
mask
),
mask
)
mask
=
tf
.
squeeze
(
mask
,
axis
=
0
)
return
mask
def
_parse_data
(
self
,
data
,
is_training
):
image
=
data
[
'image'
]
if
self
.
_augmenter
is
not
None
and
is_training
:
image
=
self
.
_augmenter
.
distort
(
image
)
image
=
preprocess_ops
.
normalize_image
(
image
)
category_mask
=
tf
.
cast
(
data
[
'groundtruth_panoptic_category_mask'
][:,
:,
0
],
dtype
=
tf
.
float32
)
instance_mask
=
tf
.
cast
(
data
[
'groundtruth_panoptic_instance_mask'
][:,
:,
0
],
dtype
=
tf
.
float32
)
# Flips image randomly during training.
if
self
.
_aug_rand_hflip
and
is_training
:
masks
=
tf
.
stack
([
category_mask
,
instance_mask
],
axis
=
0
)
image
,
_
,
masks
=
preprocess_ops
.
random_horizontal_flip
(
image
=
image
,
masks
=
masks
)
category_mask
=
masks
[
0
]
instance_mask
=
masks
[
1
]
# Resizes and crops image.
image
,
image_info
=
preprocess_ops
.
resize_and_crop_image
(
image
,
self
.
_output_size
,
self
.
_output_size
,
aug_scale_min
=
self
.
_aug_scale_min
if
is_training
else
1.0
,
aug_scale_max
=
self
.
_aug_scale_max
if
is_training
else
1.0
)
category_mask
=
self
.
_resize_and_crop_mask
(
category_mask
,
image_info
,
is_training
=
is_training
)
instance_mask
=
self
.
_resize_and_crop_mask
(
instance_mask
,
image_info
,
is_training
=
is_training
)
(
instance_centers_heatmap
,
instance_centers_offset
,
semantic_weights
)
=
self
.
_encode_centers_and_offets
(
instance_mask
=
instance_mask
[:,
:,
0
])
# Cast image and labels as self._dtype
image
=
tf
.
cast
(
image
,
dtype
=
self
.
_dtype
)
category_mask
=
tf
.
cast
(
category_mask
,
dtype
=
self
.
_dtype
)
instance_mask
=
tf
.
cast
(
instance_mask
,
dtype
=
self
.
_dtype
)
instance_centers_heatmap
=
tf
.
cast
(
instance_centers_heatmap
,
dtype
=
self
.
_dtype
)
instance_centers_offset
=
tf
.
cast
(
instance_centers_offset
,
dtype
=
self
.
_dtype
)
valid_mask
=
tf
.
not_equal
(
category_mask
,
self
.
_ignore_label
)
things_mask
=
tf
.
not_equal
(
instance_mask
,
self
.
_ignore_label
)
labels
=
{
'category_mask'
:
category_mask
,
'instance_mask'
:
instance_mask
,
'instance_centers_heatmap'
:
instance_centers_heatmap
,
'instance_centers_offset'
:
instance_centers_offset
,
'semantic_weights'
:
semantic_weights
,
'valid_mask'
:
valid_mask
,
'things_mask'
:
things_mask
,
'image_info'
:
image_info
}
return
image
,
labels
def
_parse_train_data
(
self
,
data
):
"""Parses data for training."""
return
self
.
_parse_data
(
data
=
data
,
is_training
=
True
)
def
_parse_eval_data
(
self
,
data
):
"""Parses data for evaluation."""
return
self
.
_parse_data
(
data
=
data
,
is_training
=
False
)
def
_encode_centers_and_offets
(
self
,
instance_mask
):
"""Generates center heatmaps and offets from instance id mask.
Args:
instance_mask: `tf.Tensor` of shape [height, width] representing
groundtruth instance id mask.
Returns:
instance_centers_heatmap: `tf.Tensor` of shape [height, width, 1]
instance_centers_offset: `tf.Tensor` of shape [height, width, 2]
"""
shape
=
tf
.
shape
(
instance_mask
)
height
,
width
=
shape
[
0
],
shape
[
1
]
padding_start
=
int
(
3
*
self
.
_sigma
+
1
)
padding_end
=
int
(
3
*
self
.
_sigma
+
2
)
# padding should be equal to self._gaussian_size which is calculated
# as size = int(6 * sigma + 3)
padding
=
padding_start
+
padding_end
instance_centers_heatmap
=
tf
.
zeros
(
shape
=
[
height
+
padding
,
width
+
padding
],
dtype
=
tf
.
float32
)
centers_offset_y
=
tf
.
zeros
(
shape
=
[
height
,
width
],
dtype
=
tf
.
float32
)
centers_offset_x
=
tf
.
zeros
(
shape
=
[
height
,
width
],
dtype
=
tf
.
float32
)
semantic_weights
=
tf
.
ones
(
shape
=
[
height
,
width
],
dtype
=
tf
.
float32
)
unique_instance_ids
,
_
=
tf
.
unique
(
tf
.
reshape
(
instance_mask
,
[
-
1
]))
# The following method for encoding center heatmaps and offets is inspired
# by the reference implementation available at
# https://github.com/google-research/deeplab2/blob/main/data/sample_generator.py # pylint: disable=line-too-long
for
instance_id
in
unique_instance_ids
:
if
instance_id
==
self
.
_ignore_label
:
continue
mask
=
tf
.
equal
(
instance_mask
,
instance_id
)
mask_area
=
tf
.
reduce_sum
(
tf
.
cast
(
mask
,
dtype
=
tf
.
float32
))
mask_indices
=
tf
.
cast
(
tf
.
where
(
mask
),
dtype
=
tf
.
float32
)
mask_center
=
tf
.
reduce_mean
(
mask_indices
,
axis
=
0
)
mask_center_y
=
tf
.
cast
(
tf
.
round
(
mask_center
[
0
]),
dtype
=
tf
.
int32
)
mask_center_x
=
tf
.
cast
(
tf
.
round
(
mask_center
[
1
]),
dtype
=
tf
.
int32
)
if
mask_area
<
self
.
_small_instance_area_threshold
:
semantic_weights
=
tf
.
where
(
mask
,
self
.
_small_instance_weight
,
semantic_weights
)
gaussian_size
=
self
.
_gaussian_size
indices_y
=
tf
.
range
(
mask_center_y
,
mask_center_y
+
gaussian_size
)
indices_x
=
tf
.
range
(
mask_center_x
,
mask_center_x
+
gaussian_size
)
indices
=
tf
.
stack
(
tf
.
meshgrid
(
indices_y
,
indices_x
))
indices
=
tf
.
reshape
(
indices
,
shape
=
[
2
,
gaussian_size
*
gaussian_size
])
indices
=
tf
.
transpose
(
indices
)
instance_centers_heatmap
=
tf
.
tensor_scatter_nd_max
(
tensor
=
instance_centers_heatmap
,
indices
=
indices
,
updates
=
self
.
_gaussian
)
centers_offset_y
=
tf
.
tensor_scatter_nd_update
(
tensor
=
centers_offset_y
,
indices
=
tf
.
cast
(
mask_indices
,
dtype
=
tf
.
int32
),
updates
=
tf
.
cast
(
mask_center_y
,
dtype
=
tf
.
float32
)
-
mask_indices
[:,
0
])
centers_offset_x
=
tf
.
tensor_scatter_nd_update
(
tensor
=
centers_offset_x
,
indices
=
tf
.
cast
(
mask_indices
,
dtype
=
tf
.
int32
),
updates
=
tf
.
cast
(
mask_center_x
,
dtype
=
tf
.
float32
)
-
mask_indices
[:,
1
])
instance_centers_heatmap
=
instance_centers_heatmap
[
padding_start
:
padding_start
+
height
,
padding_start
:
padding_start
+
width
]
instance_centers_heatmap
=
tf
.
expand_dims
(
instance_centers_heatmap
,
axis
=-
1
)
instance_centers_offset
=
tf
.
stack
(
[
centers_offset_y
,
centers_offset_x
],
axis
=-
1
)
return
(
instance_centers_heatmap
,
instance_centers_offset
,
semantic_weights
)
official/vision/beta/projects/panoptic_maskrcnn/losses/panoptic_deeplab_losses.py
0 → 100644
View file @
3e3b0c64
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Losses used for panoptic deeplab model."""
import
tensorflow
as
tf
from
official.modeling
import
tf_utils
from
official.vision.beta.projects.panoptic_maskrcnn.ops
import
mask_ops
EPSILON
=
1e-5
class
WeightedBootstrappedCrossEntropyLoss
:
"""Weighted semantic segmentation loss."""
def
__init__
(
self
,
label_smoothing
,
class_weights
,
ignore_label
,
top_k_percent_pixels
=
1.0
):
self
.
_top_k_percent_pixels
=
top_k_percent_pixels
self
.
_class_weights
=
class_weights
self
.
_ignore_label
=
ignore_label
self
.
_label_smoothing
=
label_smoothing
def
__call__
(
self
,
logits
,
labels
,
sample_weight
=
None
):
_
,
_
,
_
,
num_classes
=
logits
.
get_shape
().
as_list
()
logits
=
tf
.
image
.
resize
(
logits
,
tf
.
shape
(
labels
)[
1
:
3
],
method
=
tf
.
image
.
ResizeMethod
.
BILINEAR
)
valid_mask
=
tf
.
not_equal
(
labels
,
self
.
_ignore_label
)
normalizer
=
tf
.
reduce_sum
(
tf
.
cast
(
valid_mask
,
tf
.
float32
))
+
EPSILON
# Assign pixel with ignore label to class 0 (background). The loss on the
# pixel will later be masked out.
labels
=
tf
.
where
(
valid_mask
,
labels
,
tf
.
zeros_like
(
labels
))
labels
=
tf
.
squeeze
(
tf
.
cast
(
labels
,
tf
.
int32
),
axis
=
3
)
valid_mask
=
tf
.
squeeze
(
tf
.
cast
(
valid_mask
,
tf
.
float32
),
axis
=
3
)
onehot_labels
=
tf
.
one_hot
(
labels
,
num_classes
)
onehot_labels
=
onehot_labels
*
(
1
-
self
.
_label_smoothing
)
+
self
.
_label_smoothing
/
num_classes
cross_entropy_loss
=
tf
.
nn
.
softmax_cross_entropy_with_logits
(
labels
=
onehot_labels
,
logits
=
logits
)
if
not
self
.
_class_weights
:
class_weights
=
[
1
]
*
num_classes
else
:
class_weights
=
self
.
_class_weights
if
num_classes
!=
len
(
class_weights
):
raise
ValueError
(
'Length of class_weights should be {}'
.
format
(
num_classes
))
weight_mask
=
tf
.
einsum
(
'...y,y->...'
,
tf
.
one_hot
(
labels
,
num_classes
,
dtype
=
tf
.
float32
),
tf
.
constant
(
class_weights
,
tf
.
float32
))
valid_mask
*=
weight_mask
if
sample_weight
is
not
None
:
valid_mask
*=
sample_weight
cross_entropy_loss
*=
tf
.
cast
(
valid_mask
,
tf
.
float32
)
if
self
.
_top_k_percent_pixels
>=
1.0
:
loss
=
tf
.
reduce_sum
(
cross_entropy_loss
)
/
normalizer
else
:
loss
=
self
.
_compute_top_k_loss
(
cross_entropy_loss
)
return
loss
def
_compute_top_k_loss
(
self
,
loss
):
"""Computs top k loss."""
batch_size
=
tf
.
shape
(
loss
)[
0
]
loss
=
tf
.
reshape
(
loss
,
shape
=
[
batch_size
,
-
1
])
top_k_pixels
=
tf
.
cast
(
self
.
_top_k_percent_pixels
*
tf
.
cast
(
tf
.
shape
(
loss
)[
-
1
],
dtype
=
tf
.
float32
),
dtype
=
tf
.
int32
)
# shape: [batch_size, top_k_pixels]
per_sample_top_k_loss
=
tf
.
map_fn
(
fn
=
lambda
x
:
tf
.
nn
.
top_k
(
x
,
k
=
top_k_pixels
,
sorted
=
False
)[
0
],
elems
=
loss
,
parallel_iterations
=
32
,
fn_output_signature
=
tf
.
float32
)
# shape: [batch_size]
per_sample_normalizer
=
tf
.
reduce_sum
(
tf
.
cast
(
tf
.
not_equal
(
per_sample_top_k_loss
,
0.0
),
dtype
=
tf
.
float32
),
axis
=-
1
)
+
EPSILON
per_sample_normalized_loss
=
tf
.
reduce_sum
(
per_sample_top_k_loss
,
axis
=-
1
)
/
per_sample_normalizer
normalized_loss
=
tf_utils
.
safe_mean
(
per_sample_normalized_loss
)
return
normalized_loss
class
CenterHeatmapLoss
:
"""Center heatmap loss."""
def
__init__
(
self
):
self
.
_loss_fn
=
tf
.
losses
.
mean_squared_error
def
__call__
(
self
,
logits
,
labels
,
sample_weight
=
None
):
_
,
height
,
width
,
_
=
labels
.
get_shape
().
as_list
()
logits
=
tf
.
image
.
resize
(
logits
,
size
=
[
height
,
width
],
method
=
tf
.
image
.
ResizeMethod
.
BILINEAR
)
loss
=
self
.
_loss_fn
(
y_true
=
labels
,
y_pred
=
logits
)
if
sample_weight
is
not
None
:
loss
*=
sample_weight
return
tf_utils
.
safe_mean
(
loss
)
class
CenterOffsetLoss
:
"""Center offset loss."""
def
__init__
(
self
):
self
.
_loss_fn
=
tf
.
losses
.
mean_absolute_error
def
__call__
(
self
,
logits
,
labels
,
sample_weight
=
None
):
_
,
height
,
width
,
_
=
labels
.
get_shape
().
as_list
()
logits
=
mask_ops
.
resize_and_rescale_offsets
(
logits
,
target_size
=
[
height
,
width
])
loss
=
self
.
_loss_fn
(
y_true
=
labels
,
y_pred
=
logits
)
if
sample_weight
is
not
None
:
loss
*=
sample_weight
return
tf_utils
.
safe_mean
(
loss
)
official/vision/beta/projects/panoptic_maskrcnn/modeling/factory.py
View file @
3e3b0c64
...
...
@@ -13,12 +13,17 @@
# limitations under the License.
"""Factory method to build panoptic segmentation model."""
from
typing
import
Optional
import
tensorflow
as
tf
from
official.projects.deepmac_maskrcnn.tasks
import
deep_mask_head_rcnn
from
official.vision.beta.projects.panoptic_maskrcnn.configs
import
panoptic_deeplab
as
panoptic_deeplab_cfg
from
official.vision.beta.projects.panoptic_maskrcnn.configs
import
panoptic_maskrcnn
as
panoptic_maskrcnn_cfg
from
official.vision.beta.projects.panoptic_maskrcnn.modeling
import
panoptic_deeplab_model
from
official.vision.beta.projects.panoptic_maskrcnn.modeling
import
panoptic_maskrcnn_model
from
official.vision.beta.projects.panoptic_maskrcnn.modeling.heads
import
panoptic_deeplab_heads
from
official.vision.beta.projects.panoptic_maskrcnn.modeling.layers
import
panoptic_deeplab_merge
from
official.vision.beta.projects.panoptic_maskrcnn.modeling.layers
import
panoptic_segmentation_generator
from
official.vision.modeling
import
backbones
from
official.vision.modeling.decoders
import
factory
as
decoder_factory
...
...
@@ -142,3 +147,104 @@ def build_panoptic_maskrcnn(
aspect_ratios
=
model_config
.
anchor
.
aspect_ratios
,
anchor_size
=
model_config
.
anchor
.
anchor_size
)
return
model
def
build_panoptic_deeplab
(
input_specs
:
tf
.
keras
.
layers
.
InputSpec
,
model_config
:
panoptic_deeplab_cfg
.
PanopticDeeplab
,
l2_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
)
->
tf
.
keras
.
Model
:
"""Builds Panoptic Deeplab model.
Args:
input_specs: `tf.keras.layers.InputSpec` specs of the input tensor.
model_config: Config instance for the panoptic deeplab model.
l2_regularizer: Optional `tf.keras.regularizers.Regularizer`, if specified,
the model is built with the provided regularization layer.
Returns:
tf.keras.Model for the panoptic segmentation model.
"""
norm_activation_config
=
model_config
.
norm_activation
backbone
=
backbones
.
factory
.
build_backbone
(
input_specs
=
input_specs
,
backbone_config
=
model_config
.
backbone
,
norm_activation_config
=
norm_activation_config
,
l2_regularizer
=
l2_regularizer
)
semantic_decoder
=
decoder_factory
.
build_decoder
(
input_specs
=
backbone
.
output_specs
,
model_config
=
model_config
,
l2_regularizer
=
l2_regularizer
)
if
model_config
.
shared_decoder
:
instance_decoder
=
None
else
:
# semantic and instance share the same decoder type
instance_decoder
=
decoder_factory
.
build_decoder
(
input_specs
=
backbone
.
output_specs
,
model_config
=
model_config
,
l2_regularizer
=
l2_regularizer
)
semantic_head_config
=
model_config
.
semantic_head
instance_head_config
=
model_config
.
instance_head
semantic_head
=
panoptic_deeplab_heads
.
SemanticHead
(
num_classes
=
model_config
.
num_classes
,
level
=
semantic_head_config
.
level
,
num_convs
=
semantic_head_config
.
num_convs
,
kernel_size
=
semantic_head_config
.
kernel_size
,
prediction_kernel_size
=
semantic_head_config
.
prediction_kernel_size
,
num_filters
=
semantic_head_config
.
num_filters
,
use_depthwise_convolution
=
semantic_head_config
.
use_depthwise_convolution
,
upsample_factor
=
semantic_head_config
.
upsample_factor
,
low_level
=
semantic_head_config
.
low_level
,
low_level_num_filters
=
semantic_head_config
.
low_level_num_filters
,
fusion_num_output_filters
=
semantic_head_config
.
fusion_num_output_filters
,
activation
=
norm_activation_config
.
activation
,
use_sync_bn
=
norm_activation_config
.
use_sync_bn
,
norm_momentum
=
norm_activation_config
.
norm_momentum
,
norm_epsilon
=
norm_activation_config
.
norm_epsilon
,
kernel_regularizer
=
l2_regularizer
)
instance_head
=
panoptic_deeplab_heads
.
InstanceHead
(
level
=
instance_head_config
.
level
,
num_convs
=
instance_head_config
.
num_convs
,
kernel_size
=
instance_head_config
.
kernel_size
,
prediction_kernel_size
=
instance_head_config
.
prediction_kernel_size
,
num_filters
=
instance_head_config
.
num_filters
,
use_depthwise_convolution
=
instance_head_config
.
use_depthwise_convolution
,
upsample_factor
=
instance_head_config
.
upsample_factor
,
low_level
=
instance_head_config
.
low_level
,
low_level_num_filters
=
instance_head_config
.
low_level_num_filters
,
fusion_num_output_filters
=
instance_head_config
.
fusion_num_output_filters
,
activation
=
norm_activation_config
.
activation
,
use_sync_bn
=
norm_activation_config
.
use_sync_bn
,
norm_momentum
=
norm_activation_config
.
norm_momentum
,
norm_epsilon
=
norm_activation_config
.
norm_epsilon
,
kernel_regularizer
=
l2_regularizer
)
if
model_config
.
generate_panoptic_masks
:
post_processing_config
=
model_config
.
post_processor
post_processor
=
panoptic_deeplab_merge
.
PostProcessor
(
output_size
=
post_processing_config
.
output_size
,
center_score_threshold
=
post_processing_config
.
center_score_threshold
,
thing_class_ids
=
post_processing_config
.
thing_class_ids
,
label_divisor
=
post_processing_config
.
label_divisor
,
stuff_area_limit
=
post_processing_config
.
stuff_area_limit
,
ignore_label
=
post_processing_config
.
ignore_label
,
nms_kernel
=
post_processing_config
.
nms_kernel
,
keep_k_centers
=
post_processing_config
.
keep_k_centers
,
rescale_predictions
=
post_processing_config
.
rescale_predictions
)
else
:
post_processor
=
None
model
=
panoptic_deeplab_model
.
PanopticDeeplabModel
(
backbone
=
backbone
,
semantic_decoder
=
semantic_decoder
,
instance_decoder
=
instance_decoder
,
semantic_head
=
semantic_head
,
instance_head
=
instance_head
,
post_processor
=
post_processor
)
return
model
official/vision/beta/projects/panoptic_maskrcnn/modeling/factory_test.py
View file @
3e3b0c64
...
...
@@ -18,6 +18,8 @@ from absl.testing import parameterized
import
numpy
as
np
import
tensorflow
as
tf
from
tensorflow.python.distribute
import
combinations
from
official.vision.beta.projects.panoptic_maskrcnn.configs
import
panoptic_deeplab
as
panoptic_deeplab_cfg
from
official.vision.beta.projects.panoptic_maskrcnn.configs
import
panoptic_maskrcnn
as
panoptic_maskrcnn_cfg
from
official.vision.beta.projects.panoptic_maskrcnn.modeling
import
factory
from
official.vision.configs
import
backbones
...
...
@@ -62,5 +64,51 @@ class PanopticMaskRCNNBuilderTest(parameterized.TestCase, tf.test.TestCase):
model_config
=
model_config
,
l2_regularizer
=
l2_regularizer
)
class
PanopticDeeplabBuilderTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
combinations
.
generate
(
combinations
.
combine
(
input_size
=
[(
640
,
640
),
(
512
,
512
)],
backbone_type
=
[
'resnet'
,
'dilated_resnet'
],
decoder_type
=
[
'aspp'
,
'fpn'
],
level
=
[
2
,
3
,
4
],
low_level
=
[(
4
,
3
),
(
3
,
2
)],
shared_decoder
=
[
True
,
False
],
generate_panoptic_masks
=
[
True
,
False
]))
def
test_builder
(
self
,
input_size
,
backbone_type
,
level
,
low_level
,
decoder_type
,
shared_decoder
,
generate_panoptic_masks
):
num_classes
=
10
input_specs
=
tf
.
keras
.
layers
.
InputSpec
(
shape
=
[
None
,
input_size
[
0
],
input_size
[
1
],
3
])
model_config
=
panoptic_deeplab_cfg
.
PanopticDeeplab
(
num_classes
=
num_classes
,
input_size
=
input_size
,
backbone
=
backbones
.
Backbone
(
type
=
backbone_type
),
decoder
=
decoders
.
Decoder
(
type
=
decoder_type
),
semantic_head
=
panoptic_deeplab_cfg
.
SemanticHead
(
level
=
level
,
num_convs
=
1
,
kernel_size
=
5
,
prediction_kernel_size
=
1
,
low_level
=
low_level
),
instance_head
=
panoptic_deeplab_cfg
.
InstanceHead
(
level
=
level
,
num_convs
=
1
,
kernel_size
=
5
,
prediction_kernel_size
=
1
,
low_level
=
low_level
),
shared_decoder
=
shared_decoder
,
generate_panoptic_masks
=
generate_panoptic_masks
)
l2_regularizer
=
tf
.
keras
.
regularizers
.
l2
(
5e-5
)
_
=
factory
.
build_panoptic_deeplab
(
input_specs
=
input_specs
,
model_config
=
model_config
,
l2_regularizer
=
l2_regularizer
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/panoptic_deeplab_heads.py
0 → 100644
View file @
3e3b0c64
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains definitions for Panoptic Deeplab heads."""
from
typing
import
List
,
Union
,
Optional
,
Mapping
,
Tuple
import
tensorflow
as
tf
from
official.modeling
import
tf_utils
from
official.vision.beta.projects.panoptic_maskrcnn.modeling.layers
import
fusion_layers
from
official.vision.ops
import
spatial_transform_ops
class
PanopticDeeplabHead
(
tf
.
keras
.
layers
.
Layer
):
"""Creates a panoptic deeplab head."""
def
__init__
(
self
,
level
:
Union
[
int
,
str
],
num_convs
:
int
=
2
,
num_filters
:
int
=
256
,
kernel_size
:
int
=
3
,
use_depthwise_convolution
:
bool
=
False
,
upsample_factor
:
int
=
1
,
low_level
:
Optional
[
List
[
int
]]
=
None
,
low_level_num_filters
:
Optional
[
List
[
int
]]
=
None
,
fusion_num_output_filters
:
int
=
256
,
activation
:
str
=
'relu'
,
use_sync_bn
:
bool
=
False
,
norm_momentum
:
float
=
0.99
,
norm_epsilon
:
float
=
0.001
,
kernel_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
bias_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
**
kwargs
):
"""Initializes a panoptic deeplab head.
Args:
level: An `int` or `str`, level to use to build head.
num_convs: An `int` number of stacked convolution before the last
prediction layer.
num_filters: An `int` number to specify the number of filters used.
Default is 256.
kernel_size: An `int` number to specify the kernel size of the
stacked convolutions before the last prediction layer.
use_depthwise_convolution: A bool to specify if use depthwise separable
convolutions.
upsample_factor: An `int` number to specify the upsampling factor to
generate finer mask. Default 1 means no upsampling is applied.
low_level: An `int` of backbone level to be used for feature fusion. It is
used when feature_fusion is set to `deeplabv3plus`.
low_level_num_filters: An `int` of reduced number of filters for the low
level features before fusing it with higher level features. It is only
used when feature_fusion is set to `deeplabv3plus`.
fusion_num_output_filters: An `int` number to specify the number of
filters used by output layer of fusion module. Default is 256.
activation: A `str` that indicates which activation is used, e.g. 'relu',
'swish', etc.
use_sync_bn: A `bool` that indicates whether to use synchronized batch
normalization across different replicas.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default is None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
**kwargs: Additional keyword arguments to be passed.
"""
super
(
PanopticDeeplabHead
,
self
).
__init__
(
**
kwargs
)
self
.
_config_dict
=
{
'level'
:
level
,
'num_convs'
:
num_convs
,
'num_filters'
:
num_filters
,
'kernel_size'
:
kernel_size
,
'use_depthwise_convolution'
:
use_depthwise_convolution
,
'upsample_factor'
:
upsample_factor
,
'low_level'
:
low_level
,
'low_level_num_filters'
:
low_level_num_filters
,
'fusion_num_output_filters'
:
fusion_num_output_filters
,
'activation'
:
activation
,
'use_sync_bn'
:
use_sync_bn
,
'norm_momentum'
:
norm_momentum
,
'norm_epsilon'
:
norm_epsilon
,
'kernel_regularizer'
:
kernel_regularizer
,
'bias_regularizer'
:
bias_regularizer
}
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
self
.
_bn_axis
=
-
1
else
:
self
.
_bn_axis
=
1
self
.
_activation
=
tf_utils
.
get_activation
(
activation
)
def
build
(
self
,
input_shape
:
Union
[
tf
.
TensorShape
,
List
[
tf
.
TensorShape
]]):
"""Creates the variables of the head."""
kernel_size
=
self
.
_config_dict
[
'kernel_size'
]
use_depthwise_convolution
=
self
.
_config_dict
[
'use_depthwise_convolution'
]
random_initializer
=
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
0.01
)
conv_op
=
tf
.
keras
.
layers
.
Conv2D
conv_kwargs
=
{
'kernel_size'
:
kernel_size
if
not
use_depthwise_convolution
else
1
,
'padding'
:
'same'
,
'use_bias'
:
True
,
'kernel_initializer'
:
random_initializer
,
'kernel_regularizer'
:
self
.
_config_dict
[
'kernel_regularizer'
],
}
bn_op
=
(
tf
.
keras
.
layers
.
experimental
.
SyncBatchNormalization
if
self
.
_config_dict
[
'use_sync_bn'
]
else
tf
.
keras
.
layers
.
BatchNormalization
)
bn_kwargs
=
{
'axis'
:
self
.
_bn_axis
,
'momentum'
:
self
.
_config_dict
[
'norm_momentum'
],
'epsilon'
:
self
.
_config_dict
[
'norm_epsilon'
],
}
self
.
_panoptic_deeplab_fusion
=
fusion_layers
.
PanopticDeepLabFusion
(
level
=
self
.
_config_dict
[
'level'
],
low_level
=
self
.
_config_dict
[
'low_level'
],
num_projection_filters
=
self
.
_config_dict
[
'low_level_num_filters'
],
num_output_filters
=
self
.
_config_dict
[
'fusion_num_output_filters'
],
use_depthwise_convolution
=
self
.
_config_dict
[
'use_depthwise_convolution'
],
activation
=
self
.
_config_dict
[
'activation'
],
use_sync_bn
=
self
.
_config_dict
[
'use_sync_bn'
],
norm_momentum
=
self
.
_config_dict
[
'norm_momentum'
],
norm_epsilon
=
self
.
_config_dict
[
'norm_epsilon'
],
kernel_regularizer
=
self
.
_config_dict
[
'kernel_regularizer'
],
bias_regularizer
=
self
.
_config_dict
[
'bias_regularizer'
])
# Stacked convolutions layers.
self
.
_convs
=
[]
self
.
_norms
=
[]
for
i
in
range
(
self
.
_config_dict
[
'num_convs'
]):
if
use_depthwise_convolution
:
self
.
_convs
.
append
(
tf
.
keras
.
layers
.
DepthwiseConv2D
(
name
=
'panoptic_deeplab_head_depthwise_conv_{}'
.
format
(
i
),
kernel_size
=
kernel_size
,
padding
=
'same'
,
use_bias
=
True
,
depthwise_initializer
=
random_initializer
,
depthwise_regularizer
=
self
.
_config_dict
[
'kernel_regularizer'
],
depth_multiplier
=
1
))
norm_name
=
'panoptic_deeplab_head_depthwise_norm_{}'
.
format
(
i
)
self
.
_norms
.
append
(
bn_op
(
name
=
norm_name
,
**
bn_kwargs
))
conv_name
=
'panoptic_deeplab_head_conv_{}'
.
format
(
i
)
self
.
_convs
.
append
(
conv_op
(
name
=
conv_name
,
filters
=
self
.
_config_dict
[
'num_filters'
],
**
conv_kwargs
))
norm_name
=
'panoptic_deeplab_head_norm_{}'
.
format
(
i
)
self
.
_norms
.
append
(
bn_op
(
name
=
norm_name
,
**
bn_kwargs
))
super
().
build
(
input_shape
)
def
call
(
self
,
inputs
:
Tuple
[
Union
[
tf
.
Tensor
,
Mapping
[
str
,
tf
.
Tensor
]],
Union
[
tf
.
Tensor
,
Mapping
[
str
,
tf
.
Tensor
]]],
training
=
None
):
"""Forward pass of the head.
It supports both a tuple of 2 tensors or 2 dictionaries. The first is
backbone endpoints, and the second is decoder endpoints. When inputs are
tensors, they are from a single level of feature maps. When inputs are
dictionaries, they contain multiple levels of feature maps, where the key
is the index of feature map.
Args:
inputs: A tuple of 2 feature map tensors of shape
[batch, height_l, width_l, channels] or 2 dictionaries of tensors:
- key: A `str` of the level of the multilevel features.
- values: A `tf.Tensor` of the feature map tensors, whose shape is
[batch, height_l, width_l, channels].
training: A bool, runs the model in training/eval mode.
Returns:
A `tf.Tensor` of the fused backbone and decoder features.
"""
if
training
is
None
:
training
=
tf
.
keras
.
backend
.
learning_phase
()
x
=
self
.
_panoptic_deeplab_fusion
(
inputs
,
training
=
training
)
for
conv
,
norm
in
zip
(
self
.
_convs
,
self
.
_norms
):
x
=
conv
(
x
)
x
=
norm
(
x
,
training
=
training
)
x
=
self
.
_activation
(
x
)
if
self
.
_config_dict
[
'upsample_factor'
]
>
1
:
x
=
spatial_transform_ops
.
nearest_upsampling
(
x
,
scale
=
self
.
_config_dict
[
'upsample_factor'
])
return
x
def
get_config
(
self
):
base_config
=
super
().
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
self
.
_config_dict
.
items
()))
@
classmethod
def
from_config
(
cls
,
config
):
return
cls
(
**
config
)
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
SemanticHead
(
PanopticDeeplabHead
):
"""Creates a semantic head."""
def
__init__
(
self
,
num_classes
:
int
,
level
:
Union
[
int
,
str
],
num_convs
:
int
=
2
,
num_filters
:
int
=
256
,
kernel_size
:
int
=
3
,
prediction_kernel_size
:
int
=
3
,
use_depthwise_convolution
:
bool
=
False
,
upsample_factor
:
int
=
1
,
low_level
:
Optional
[
List
[
int
]]
=
None
,
low_level_num_filters
:
Optional
[
List
[
int
]]
=
None
,
fusion_num_output_filters
:
int
=
256
,
activation
:
str
=
'relu'
,
use_sync_bn
:
bool
=
False
,
norm_momentum
:
float
=
0.99
,
norm_epsilon
:
float
=
0.001
,
kernel_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
bias_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
**
kwargs
):
"""Initializes a instance center head.
Args:
num_classes: An `int` number of mask classification categories. The number
of classes does not include background class.
level: An `int` or `str`, level to use to build head.
num_convs: An `int` number of stacked convolution before the last
prediction layer.
num_filters: An `int` number to specify the number of filters used.
Default is 256.
kernel_size: An `int` number to specify the kernel size of the
stacked convolutions before the last prediction layer.
prediction_kernel_size: An `int` number to specify the kernel size of the
prediction layer.
use_depthwise_convolution: A bool to specify if use depthwise separable
convolutions.
upsample_factor: An `int` number to specify the upsampling factor to
generate finer mask. Default 1 means no upsampling is applied.
low_level: An `int` of backbone level to be used for feature fusion. It is
used when feature_fusion is set to `deeplabv3plus`.
low_level_num_filters: An `int` of reduced number of filters for the low
level features before fusing it with higher level features. It is only
used when feature_fusion is set to `deeplabv3plus`.
fusion_num_output_filters: An `int` number to specify the number of
filters used by output layer of fusion module. Default is 256.
activation: A `str` that indicates which activation is used, e.g. 'relu',
'swish', etc.
use_sync_bn: A `bool` that indicates whether to use synchronized batch
normalization across different replicas.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default is None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
**kwargs: Additional keyword arguments to be passed.
"""
super
(
SemanticHead
,
self
).
__init__
(
level
=
level
,
num_convs
=
num_convs
,
num_filters
=
num_filters
,
use_depthwise_convolution
=
use_depthwise_convolution
,
kernel_size
=
kernel_size
,
upsample_factor
=
upsample_factor
,
low_level
=
low_level
,
low_level_num_filters
=
low_level_num_filters
,
fusion_num_output_filters
=
fusion_num_output_filters
,
activation
=
activation
,
use_sync_bn
=
use_sync_bn
,
norm_momentum
=
norm_momentum
,
norm_epsilon
=
norm_epsilon
,
kernel_regularizer
=
kernel_regularizer
,
bias_regularizer
=
bias_regularizer
,
**
kwargs
)
self
.
_config_dict
.
update
({
'num_classes'
:
num_classes
,
'prediction_kernel_size'
:
prediction_kernel_size
})
def
build
(
self
,
input_shape
:
Union
[
tf
.
TensorShape
,
List
[
tf
.
TensorShape
]]):
"""Creates the variables of the semantic head."""
super
(
SemanticHead
,
self
).
build
(
input_shape
)
self
.
_classifier
=
tf
.
keras
.
layers
.
Conv2D
(
name
=
'semantic_output'
,
filters
=
self
.
_config_dict
[
'num_classes'
],
kernel_size
=
self
.
_config_dict
[
'prediction_kernel_size'
],
padding
=
'same'
,
bias_initializer
=
tf
.
zeros_initializer
(),
kernel_initializer
=
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
0.01
),
kernel_regularizer
=
self
.
_config_dict
[
'kernel_regularizer'
],
bias_regularizer
=
self
.
_config_dict
[
'bias_regularizer'
])
def
call
(
self
,
inputs
:
Tuple
[
Union
[
tf
.
Tensor
,
Mapping
[
str
,
tf
.
Tensor
]],
Union
[
tf
.
Tensor
,
Mapping
[
str
,
tf
.
Tensor
]]],
training
=
None
):
"""Forward pass of the head."""
if
training
is
None
:
training
=
tf
.
keras
.
backend
.
learning_phase
()
x
=
super
(
SemanticHead
,
self
).
call
(
inputs
,
training
=
training
)
outputs
=
self
.
_classifier
(
x
)
return
outputs
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
InstanceHead
(
PanopticDeeplabHead
):
"""Creates a instance head."""
def
__init__
(
self
,
level
:
Union
[
int
,
str
],
num_convs
:
int
=
2
,
num_filters
:
int
=
256
,
kernel_size
:
int
=
3
,
prediction_kernel_size
:
int
=
3
,
use_depthwise_convolution
:
bool
=
False
,
upsample_factor
:
int
=
1
,
low_level
:
Optional
[
List
[
int
]]
=
None
,
low_level_num_filters
:
Optional
[
List
[
int
]]
=
None
,
fusion_num_output_filters
:
int
=
256
,
activation
:
str
=
'relu'
,
use_sync_bn
:
bool
=
False
,
norm_momentum
:
float
=
0.99
,
norm_epsilon
:
float
=
0.001
,
kernel_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
bias_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
**
kwargs
):
"""Initializes a instance center head.
Args:
level: An `int` or `str`, level to use to build head.
num_convs: An `int` number of stacked convolution before the last
prediction layer.
num_filters: An `int` number to specify the number of filters used.
Default is 256.
kernel_size: An `int` number to specify the kernel size of the
stacked convolutions before the last prediction layer.
prediction_kernel_size: An `int` number to specify the kernel size of the
prediction layer.
use_depthwise_convolution: A bool to specify if use depthwise separable
convolutions.
upsample_factor: An `int` number to specify the upsampling factor to
generate finer mask. Default 1 means no upsampling is applied.
low_level: An `int` of backbone level to be used for feature fusion. It is
used when feature_fusion is set to `deeplabv3plus`.
low_level_num_filters: An `int` of reduced number of filters for the low
level features before fusing it with higher level features. It is only
used when feature_fusion is set to `deeplabv3plus`.
fusion_num_output_filters: An `int` number to specify the number of
filters used by output layer of fusion module. Default is 256.
activation: A `str` that indicates which activation is used, e.g. 'relu',
'swish', etc.
use_sync_bn: A `bool` that indicates whether to use synchronized batch
normalization across different replicas.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default is None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
**kwargs: Additional keyword arguments to be passed.
"""
super
(
InstanceHead
,
self
).
__init__
(
level
=
level
,
num_convs
=
num_convs
,
num_filters
=
num_filters
,
use_depthwise_convolution
=
use_depthwise_convolution
,
kernel_size
=
kernel_size
,
upsample_factor
=
upsample_factor
,
low_level
=
low_level
,
low_level_num_filters
=
low_level_num_filters
,
fusion_num_output_filters
=
fusion_num_output_filters
,
activation
=
activation
,
use_sync_bn
=
use_sync_bn
,
norm_momentum
=
norm_momentum
,
norm_epsilon
=
norm_epsilon
,
kernel_regularizer
=
kernel_regularizer
,
bias_regularizer
=
bias_regularizer
,
**
kwargs
)
self
.
_config_dict
.
update
({
'prediction_kernel_size'
:
prediction_kernel_size
})
def
build
(
self
,
input_shape
:
Union
[
tf
.
TensorShape
,
List
[
tf
.
TensorShape
]]):
"""Creates the variables of the instance head."""
super
(
InstanceHead
,
self
).
build
(
input_shape
)
self
.
_instance_center_prediction_conv
=
tf
.
keras
.
layers
.
Conv2D
(
name
=
'instance_centers_heatmap'
,
filters
=
1
,
kernel_size
=
self
.
_config_dict
[
'prediction_kernel_size'
],
padding
=
'same'
,
bias_initializer
=
tf
.
zeros_initializer
(),
kernel_initializer
=
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
0.01
),
kernel_regularizer
=
self
.
_config_dict
[
'kernel_regularizer'
],
bias_regularizer
=
self
.
_config_dict
[
'bias_regularizer'
])
self
.
_instance_center_regression_conv
=
tf
.
keras
.
layers
.
Conv2D
(
name
=
'instance_centers_offset'
,
filters
=
2
,
kernel_size
=
self
.
_config_dict
[
'prediction_kernel_size'
],
padding
=
'same'
,
bias_initializer
=
tf
.
zeros_initializer
(),
kernel_initializer
=
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
0.01
),
kernel_regularizer
=
self
.
_config_dict
[
'kernel_regularizer'
],
bias_regularizer
=
self
.
_config_dict
[
'bias_regularizer'
])
def
call
(
self
,
inputs
:
Tuple
[
Union
[
tf
.
Tensor
,
Mapping
[
str
,
tf
.
Tensor
]],
Union
[
tf
.
Tensor
,
Mapping
[
str
,
tf
.
Tensor
]]],
training
=
None
):
"""Forward pass of the head."""
if
training
is
None
:
training
=
tf
.
keras
.
backend
.
learning_phase
()
x
=
super
(
InstanceHead
,
self
).
call
(
inputs
,
training
=
training
)
instance_centers_heatmap
=
self
.
_instance_center_prediction_conv
(
x
)
instance_centers_offset
=
self
.
_instance_center_regression_conv
(
x
)
outputs
=
{
'instance_centers_heatmap'
:
instance_centers_heatmap
,
'instance_centers_offset'
:
instance_centers_offset
}
return
outputs
official/vision/beta/projects/panoptic_maskrcnn/modeling/heads/panoptic_deeplab_heads_test.py
0 → 100644
View file @
3e3b0c64
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for panoptic_deeplab_heads.py."""
from
absl.testing
import
parameterized
import
numpy
as
np
import
tensorflow
as
tf
from
official.vision.beta.projects.panoptic_maskrcnn.modeling.heads
import
panoptic_deeplab_heads
class
PanopticDeeplabHeadsTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
parameterized
.
parameters
(
(
2
,
(
2
,),
(
48
,)),
(
3
,
(
2
,),
(
48
,)),
(
2
,
(
2
,),
(
48
,)),
(
2
,
(
2
,),
(
48
,)),
(
3
,
(
2
,),
(
48
,)),
(
3
,
(
2
,),
(
48
,)),
(
4
,
(
4
,
3
),
(
64
,
32
)),
(
4
,
(
3
,
2
),
(
64
,
32
)))
def
test_forward
(
self
,
level
,
low_level
,
low_level_num_filters
):
backbone_features
=
{
'3'
:
np
.
random
.
rand
(
2
,
128
,
128
,
16
),
'4'
:
np
.
random
.
rand
(
2
,
64
,
64
,
16
),
'5'
:
np
.
random
.
rand
(
2
,
32
,
32
,
16
),
}
decoder_features
=
{
'3'
:
np
.
random
.
rand
(
2
,
128
,
128
,
64
),
'4'
:
np
.
random
.
rand
(
2
,
64
,
64
,
64
),
'5'
:
np
.
random
.
rand
(
2
,
32
,
32
,
64
),
'6'
:
np
.
random
.
rand
(
2
,
16
,
16
,
64
),
}
backbone_features
[
'2'
]
=
np
.
random
.
rand
(
2
,
256
,
256
,
16
)
decoder_features
[
'2'
]
=
np
.
random
.
rand
(
2
,
256
,
256
,
64
)
num_classes
=
10
semantic_head
=
panoptic_deeplab_heads
.
SemanticHead
(
num_classes
=
num_classes
,
level
=
level
,
low_level
=
low_level
,
low_level_num_filters
=
low_level_num_filters
)
instance_head
=
panoptic_deeplab_heads
.
InstanceHead
(
level
=
level
,
low_level
=
low_level
,
low_level_num_filters
=
low_level_num_filters
)
semantic_outputs
=
semantic_head
((
backbone_features
,
decoder_features
))
instance_outputs
=
instance_head
((
backbone_features
,
decoder_features
))
if
str
(
level
)
in
decoder_features
:
h
,
w
=
decoder_features
[
str
(
low_level
[
-
1
])].
shape
[
1
:
3
]
self
.
assertAllEqual
(
semantic_outputs
.
numpy
().
shape
,
[
2
,
h
,
w
,
num_classes
])
self
.
assertAllEqual
(
instance_outputs
[
'instance_centers_heatmap'
].
numpy
().
shape
,
[
2
,
h
,
w
,
1
])
self
.
assertAllEqual
(
instance_outputs
[
'instance_centers_offset'
].
numpy
().
shape
,
[
2
,
h
,
w
,
2
])
def
test_serialize_deserialize
(
self
):
semantic_head
=
panoptic_deeplab_heads
.
SemanticHead
(
num_classes
=
2
,
level
=
3
)
instance_head
=
panoptic_deeplab_heads
.
InstanceHead
(
level
=
3
)
semantic_head_config
=
semantic_head
.
get_config
()
instance_head_config
=
instance_head
.
get_config
()
new_semantic_head
=
panoptic_deeplab_heads
.
SemanticHead
.
from_config
(
semantic_head_config
)
new_instance_head
=
panoptic_deeplab_heads
.
InstanceHead
.
from_config
(
instance_head_config
)
self
.
assertAllEqual
(
semantic_head
.
get_config
(),
new_semantic_head
.
get_config
())
self
.
assertAllEqual
(
instance_head
.
get_config
(),
new_instance_head
.
get_config
())
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/beta/projects/panoptic_maskrcnn/modeling/layers/fusion_layers.py
0 → 100644
View file @
3e3b0c64
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains feature fusion blocks for panoptic segmentation models."""
from
typing
import
Any
,
Callable
,
Dict
,
List
,
Mapping
,
Optional
,
Union
import
tensorflow
as
tf
from
official.modeling
import
tf_utils
# Type annotations.
States
=
Dict
[
str
,
tf
.
Tensor
]
Activation
=
Union
[
str
,
Callable
]
class
PanopticDeepLabFusion
(
tf
.
keras
.
layers
.
Layer
):
"""Creates a Panoptic DeepLab feature Fusion layer.
This implements the feature fusion introduced in the paper:
Cheng et al. Panoptic-DeepLab
(https://arxiv.org/pdf/1911.10194.pdf)
"""
def
__init__
(
self
,
level
:
int
,
low_level
:
List
[
int
],
num_projection_filters
:
List
[
int
],
num_output_filters
:
int
=
256
,
use_depthwise_convolution
:
bool
=
False
,
activation
:
str
=
'relu'
,
use_sync_bn
:
bool
=
False
,
norm_momentum
:
float
=
0.99
,
norm_epsilon
:
float
=
0.001
,
kernel_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
bias_regularizer
:
Optional
[
tf
.
keras
.
regularizers
.
Regularizer
]
=
None
,
interpolation
:
str
=
'bilinear'
,
**
kwargs
):
"""Initializes panoptic FPN feature fusion layer.
Args:
level: An `int` level at which the decoder was appled at.
low_level: A list of `int` of minimum level to use in feature fusion.
num_projection_filters: A list of `int` with number of filters for
projection conv2d layers.
num_output_filters: An `int` number of filters in output conv2d layers.
use_depthwise_convolution: A bool to specify if use depthwise separable
convolutions.
activation: A `str` name of the activation function.
use_sync_bn: A `bool` that indicates whether to use synchronized batch
normalization across different replicas.
norm_momentum: A `float` of normalization momentum for the moving average.
norm_epsilon: A `float` added to variance to avoid dividing by zero.
kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
Conv2D. Default is None.
bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
interpolation: A `str` interpolation method for upsampling. Defaults to
`bilinear`.
**kwargs: Additional keyword arguments to be passed.
Returns:
A `float` `tf.Tensor` of shape [batch_size, feature_height, feature_width,
feature_channel].
"""
super
(
PanopticDeepLabFusion
,
self
).
__init__
(
**
kwargs
)
self
.
_config_dict
=
{
'level'
:
level
,
'low_level'
:
low_level
,
'num_projection_filters'
:
num_projection_filters
,
'num_output_filters'
:
num_output_filters
,
'use_depthwise_convolution'
:
use_depthwise_convolution
,
'activation'
:
activation
,
'use_sync_bn'
:
use_sync_bn
,
'norm_momentum'
:
norm_momentum
,
'norm_epsilon'
:
norm_epsilon
,
'kernel_regularizer'
:
kernel_regularizer
,
'bias_regularizer'
:
bias_regularizer
,
'interpolation'
:
interpolation
}
if
tf
.
keras
.
backend
.
image_data_format
()
==
'channels_last'
:
self
.
_channel_axis
=
-
1
else
:
self
.
_channel_axis
=
1
self
.
_activation
=
tf_utils
.
get_activation
(
activation
)
def
build
(
self
,
input_shape
:
List
[
tf
.
TensorShape
]):
conv_op
=
tf
.
keras
.
layers
.
Conv2D
conv_kwargs
=
{
'padding'
:
'same'
,
'use_bias'
:
True
,
'kernel_initializer'
:
tf
.
initializers
.
VarianceScaling
(),
'kernel_regularizer'
:
self
.
_config_dict
[
'kernel_regularizer'
],
}
bn_op
=
(
tf
.
keras
.
layers
.
experimental
.
SyncBatchNormalization
if
self
.
_config_dict
[
'use_sync_bn'
]
else
tf
.
keras
.
layers
.
BatchNormalization
)
bn_kwargs
=
{
'axis'
:
self
.
_channel_axis
,
'momentum'
:
self
.
_config_dict
[
'norm_momentum'
],
'epsilon'
:
self
.
_config_dict
[
'norm_epsilon'
],
}
self
.
_projection_convs
=
[]
self
.
_projection_norms
=
[]
self
.
_fusion_convs
=
[]
self
.
_fusion_norms
=
[]
for
i
in
range
(
len
(
self
.
_config_dict
[
'low_level'
])):
self
.
_projection_convs
.
append
(
conv_op
(
filters
=
self
.
_config_dict
[
'num_projection_filters'
][
i
],
kernel_size
=
1
,
**
conv_kwargs
))
if
self
.
_config_dict
[
'use_depthwise_convolution'
]:
depthwise_initializer
=
tf
.
keras
.
initializers
.
RandomNormal
(
stddev
=
0.01
)
fusion_conv
=
tf
.
keras
.
Sequential
([
tf
.
keras
.
layers
.
DepthwiseConv2D
(
kernel_size
=
5
,
padding
=
'same'
,
use_bias
=
True
,
depthwise_initializer
=
depthwise_initializer
,
depthwise_regularizer
=
self
.
_config_dict
[
'kernel_regularizer'
],
depth_multiplier
=
1
),
bn_op
(
**
bn_kwargs
),
conv_op
(
filters
=
self
.
_config_dict
[
'num_output_filters'
],
kernel_size
=
1
,
**
conv_kwargs
)])
else
:
fusion_conv
=
conv_op
(
filters
=
self
.
_config_dict
[
'num_output_filters'
],
kernel_size
=
5
,
**
conv_kwargs
)
self
.
_fusion_convs
.
append
(
fusion_conv
)
self
.
_projection_norms
.
append
(
bn_op
(
**
bn_kwargs
))
self
.
_fusion_norms
.
append
(
bn_op
(
**
bn_kwargs
))
def
call
(
self
,
inputs
,
training
=
None
):
if
training
is
None
:
training
=
tf
.
keras
.
backend
.
learning_phase
()
backbone_output
=
inputs
[
0
]
decoder_output
=
inputs
[
1
][
str
(
self
.
_config_dict
[
'level'
])]
x
=
decoder_output
for
i
in
range
(
len
(
self
.
_config_dict
[
'low_level'
])):
feature
=
backbone_output
[
str
(
self
.
_config_dict
[
'low_level'
][
i
])]
feature
=
self
.
_projection_convs
[
i
](
feature
)
feature
=
self
.
_projection_norms
[
i
](
feature
,
training
=
training
)
feature
=
self
.
_activation
(
feature
)
shape
=
tf
.
shape
(
feature
)
x
=
tf
.
image
.
resize
(
x
,
size
=
[
shape
[
1
],
shape
[
2
]],
method
=
self
.
_config_dict
[
'interpolation'
])
x
=
tf
.
cast
(
x
,
dtype
=
feature
.
dtype
)
x
=
tf
.
concat
([
x
,
feature
],
axis
=
self
.
_channel_axis
)
x
=
self
.
_fusion_convs
[
i
](
x
)
x
=
self
.
_fusion_norms
[
i
](
x
,
training
=
training
)
x
=
self
.
_activation
(
x
)
return
x
def
get_config
(
self
)
->
Mapping
[
str
,
Any
]:
return
self
.
_config_dict
@
classmethod
def
from_config
(
cls
,
config
,
custom_objects
=
None
):
return
cls
(
**
config
)
official/vision/beta/projects/panoptic_maskrcnn/modeling/layers/panoptic_deeplab_merge.py
0 → 100644
View file @
3e3b0c64
This diff is collapsed.
Click to expand it.
official/vision/beta/projects/panoptic_maskrcnn/modeling/layers/panoptic_deeplab_merge_test.py
0 → 100644
View file @
3e3b0c64
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Test for panoptic_deeplab_merge.py.
Note that the tests are branched from
https://raw.githubusercontent.com/google-research/deeplab2/main/model/post_processor/panoptic_deeplab_test.py
"""
import
numpy
as
np
import
tensorflow
as
tf
from
official.vision.beta.projects.panoptic_maskrcnn.modeling.layers
import
panoptic_deeplab_merge
class
PostProcessingTest
(
tf
.
test
.
TestCase
):
def
test_py_func_merge_semantic_and_instance_maps_can_run
(
self
):
batch
=
1
height
=
5
width
=
5
semantic_prediction
=
tf
.
random
.
uniform
((
batch
,
height
,
width
),
minval
=
0
,
maxval
=
20
,
dtype
=
tf
.
int32
)
instance_maps
=
tf
.
random
.
uniform
((
batch
,
height
,
width
),
minval
=
0
,
maxval
=
3
,
dtype
=
tf
.
int32
)
thing_class_ids
=
tf
.
convert_to_tensor
([
1
,
2
,
3
])
label_divisor
=
256
stuff_area_limit
=
3
void_label
=
255
panoptic_prediction
=
panoptic_deeplab_merge
.
_merge_semantic_and_instance_maps
(
semantic_prediction
,
instance_maps
,
thing_class_ids
,
label_divisor
,
stuff_area_limit
,
void_label
)
self
.
assertListEqual
(
semantic_prediction
.
get_shape
().
as_list
(),
panoptic_prediction
.
get_shape
().
as_list
())
def
test_merge_semantic_and_instance_maps_with_a_simple_example
(
self
):
semantic_prediction
=
tf
.
convert_to_tensor
(
[[[
0
,
0
,
0
,
0
],
[
0
,
1
,
1
,
0
],
[
0
,
2
,
2
,
0
],
[
2
,
2
,
3
,
3
]]],
dtype
=
tf
.
int32
)
instance_maps
=
tf
.
convert_to_tensor
(
[[[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
1
,
1
,
0
],
[
2
,
2
,
3
,
3
]]],
dtype
=
tf
.
int32
)
thing_class_ids
=
tf
.
convert_to_tensor
([
2
,
3
])
label_divisor
=
256
stuff_area_limit
=
3
void_label
=
255
# The expected_panoptic_prediction is computed as follows.
# For `thing` segmentation, instance 1, 2, and 3 are kept, but instance 3
# will have a new instance ID 1, since it is the first instance in its
# own semantic label.
# For `stuff` segmentation, class-0 region is kept, while class-1 region
# is re-labeled as `void_label * label_divisor` since its area is smaller
# than stuff_area_limit.
expected_panoptic_prediction
=
tf
.
convert_to_tensor
(
[[[
0
,
0
,
0
,
0
],
[
0
,
void_label
*
label_divisor
,
void_label
*
label_divisor
,
0
],
[
0
,
2
*
label_divisor
+
1
,
2
*
label_divisor
+
1
,
0
],
[
2
*
label_divisor
+
2
,
2
*
label_divisor
+
2
,
3
*
label_divisor
+
1
,
3
*
label_divisor
+
1
]]],
dtype
=
tf
.
int32
)
panoptic_prediction
=
panoptic_deeplab_merge
.
_merge_semantic_and_instance_maps
(
semantic_prediction
,
instance_maps
,
thing_class_ids
,
label_divisor
,
stuff_area_limit
,
void_label
)
self
.
assertAllClose
(
expected_panoptic_prediction
,
panoptic_prediction
)
def
test_gets_panoptic_predictions_with_score
(
self
):
batch
=
1
height
=
5
width
=
5
classes
=
3
semantic_logits
=
tf
.
random
.
uniform
((
batch
,
1
,
1
,
classes
))
semantic_logits
=
tf
.
tile
(
semantic_logits
,
(
1
,
height
,
width
,
1
))
center_heatmap
=
tf
.
convert_to_tensor
([
[
1.0
,
0.0
,
0.0
,
0.0
,
0.0
],
[
0.8
,
0.0
,
0.0
,
0.0
,
0.0
],
[
0.0
,
0.0
,
0.0
,
0.0
,
0.0
],
[
0.0
,
0.0
,
0.0
,
0.1
,
0.7
],
[
0.0
,
0.0
,
0.0
,
0.0
,
0.2
],
],
dtype
=
tf
.
float32
)
center_heatmap
=
tf
.
expand_dims
(
center_heatmap
,
0
)
center_heatmap
=
tf
.
expand_dims
(
center_heatmap
,
3
)
center_offsets
=
tf
.
zeros
((
batch
,
height
,
width
,
2
))
center_threshold
=
0.0
thing_class_ids
=
tf
.
range
(
classes
)
# No "stuff" classes.
label_divisor
=
256
stuff_area_limit
=
16
void_label
=
classes
nms_kernel_size
=
3
keep_k_centers
=
2
result
=
panoptic_deeplab_merge
.
_get_panoptic_predictions
(
semantic_logits
,
center_heatmap
,
center_offsets
,
center_threshold
,
thing_class_ids
,
label_divisor
,
stuff_area_limit
,
void_label
,
nms_kernel_size
,
keep_k_centers
)
instance_maps
=
result
[
3
].
numpy
()
instance_scores
=
result
[
2
].
numpy
()
self
.
assertSequenceEqual
(
instance_maps
.
shape
,
(
batch
,
height
,
width
))
expected_instances
=
[[
[
1
,
1
,
1
,
1
,
2
],
[
1
,
1
,
1
,
2
,
2
],
[
1
,
1
,
2
,
2
,
2
],
[
1
,
2
,
2
,
2
,
2
],
[
1
,
2
,
2
,
2
,
2
],
]]
np
.
testing
.
assert_array_equal
(
instance_maps
,
expected_instances
)
self
.
assertSequenceEqual
(
instance_scores
.
shape
,
(
batch
,
height
,
width
))
expected_instance_scores
=
[[
[
1.0
,
1.0
,
1.0
,
1.0
,
0.7
],
[
1.0
,
1.0
,
1.0
,
0.7
,
0.7
],
[
1.0
,
1.0
,
0.7
,
0.7
,
0.7
],
[
1.0
,
0.7
,
0.7
,
0.7
,
0.7
],
[
1.0
,
0.7
,
0.7
,
0.7
,
0.7
],
]]
self
.
assertAllClose
(
result
[
2
],
tf
.
constant
(
expected_instance_scores
))
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/beta/projects/panoptic_maskrcnn/modeling/panoptic_deeplab_model.py
0 → 100644
View file @
3e3b0c64
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Build Panoptic Deeplab model."""
from
typing
import
Any
,
Mapping
,
Optional
,
Union
import
tensorflow
as
tf
from
official.vision.beta.projects.panoptic_maskrcnn.modeling.layers
import
panoptic_deeplab_merge
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
class
PanopticDeeplabModel
(
tf
.
keras
.
Model
):
"""Panoptic Deeplab model."""
def
__init__
(
self
,
backbone
:
tf
.
keras
.
Model
,
semantic_decoder
:
tf
.
keras
.
Model
,
semantic_head
:
tf
.
keras
.
layers
.
Layer
,
instance_head
:
tf
.
keras
.
layers
.
Layer
,
instance_decoder
:
Optional
[
tf
.
keras
.
Model
]
=
None
,
post_processor
:
Optional
[
panoptic_deeplab_merge
.
PostProcessor
]
=
None
,
**
kwargs
):
"""Panoptic deeplab model initializer.
Args:
backbone: a backbone network.
semantic_decoder: a decoder network. E.g. FPN.
semantic_head: segmentation head.
instance_head: instance center head.
instance_decoder: Optional decoder network for instance predictions.
post_processor: Optional post processor layer.
**kwargs: keyword arguments to be passed.
"""
super
(
PanopticDeeplabModel
,
self
).
__init__
(
**
kwargs
)
self
.
_config_dict
=
{
'backbone'
:
backbone
,
'semantic_decoder'
:
semantic_decoder
,
'instance_decoder'
:
instance_decoder
,
'semantic_head'
:
semantic_head
,
'instance_head'
:
instance_head
,
'post_processor'
:
post_processor
}
self
.
backbone
=
backbone
self
.
semantic_decoder
=
semantic_decoder
self
.
instance_decoder
=
instance_decoder
self
.
semantic_head
=
semantic_head
self
.
instance_head
=
instance_head
self
.
post_processor
=
post_processor
def
call
(
self
,
inputs
:
tf
.
Tensor
,
image_info
:
tf
.
Tensor
,
training
:
bool
=
None
):
if
training
is
None
:
training
=
tf
.
keras
.
backend
.
learning_phase
()
backbone_features
=
self
.
backbone
(
inputs
,
training
=
training
)
semantic_features
=
self
.
semantic_decoder
(
backbone_features
,
training
=
training
)
if
self
.
instance_decoder
is
None
:
instance_features
=
semantic_features
else
:
instance_features
=
self
.
instance_decoder
(
backbone_features
,
training
=
training
)
segmentation_outputs
=
self
.
semantic_head
(
(
backbone_features
,
semantic_features
),
training
=
training
)
instance_outputs
=
self
.
instance_head
(
(
backbone_features
,
instance_features
),
training
=
training
)
outputs
=
{
'segmentation_outputs'
:
segmentation_outputs
,
'instance_centers_heatmap'
:
instance_outputs
[
'instance_centers_heatmap'
],
'instance_centers_offset'
:
instance_outputs
[
'instance_centers_offset'
],
}
if
training
:
return
outputs
if
self
.
post_processor
is
not
None
:
panoptic_masks
=
self
.
post_processor
(
outputs
,
image_info
)
outputs
.
update
(
panoptic_masks
)
return
outputs
@
property
def
checkpoint_items
(
self
)
->
Mapping
[
str
,
Union
[
tf
.
keras
.
Model
,
tf
.
keras
.
layers
.
Layer
]]:
"""Returns a dictionary of items to be additionally checkpointed."""
items
=
dict
(
backbone
=
self
.
backbone
,
semantic_decoder
=
self
.
semantic_decoder
,
semantic_head
=
self
.
semantic_head
,
instance_head
=
self
.
instance_head
)
if
self
.
instance_decoder
is
not
None
:
items
.
update
(
instance_decoder
=
self
.
instance_decoder
)
return
items
def
get_config
(
self
)
->
Mapping
[
str
,
Any
]:
return
self
.
_config_dict
@
classmethod
def
from_config
(
cls
,
config
,
custom_objects
=
None
):
return
cls
(
**
config
)
official/vision/beta/projects/panoptic_maskrcnn/modeling/panoptic_deeplab_model_test.py
0 → 100644
View file @
3e3b0c64
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for Panoptic Deeplab network."""
from
absl.testing
import
parameterized
import
numpy
as
np
import
tensorflow
as
tf
from
tensorflow.python.distribute
import
combinations
from
official.vision.beta.projects.panoptic_maskrcnn.modeling
import
panoptic_deeplab_model
from
official.vision.beta.projects.panoptic_maskrcnn.modeling.heads
import
panoptic_deeplab_heads
from
official.vision.beta.projects.panoptic_maskrcnn.modeling.layers
import
panoptic_deeplab_merge
from
official.vision.modeling
import
backbones
from
official.vision.modeling.decoders
import
aspp
class
PanopticDeeplabNetworkTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
@
combinations
.
generate
(
combinations
.
combine
(
level
=
[
2
,
3
,
4
],
input_size
=
[
256
,
512
],
low_level
=
[[
4
,
3
],
[
3
,
2
]],
shared_decoder
=
[
True
,
False
],
training
=
[
True
,
False
]))
def
test_panoptic_deeplab_network_creation
(
self
,
input_size
,
level
,
low_level
,
shared_decoder
,
training
):
"""Test for creation of a panoptic deeplab network."""
batch_size
=
2
if
training
else
1
num_classes
=
10
inputs
=
np
.
random
.
rand
(
batch_size
,
input_size
,
input_size
,
3
)
image_info
=
tf
.
convert_to_tensor
(
[[[
input_size
,
input_size
],
[
input_size
,
input_size
],
[
1
,
1
],
[
0
,
0
]]])
image_info
=
tf
.
tile
(
image_info
,
[
batch_size
,
1
,
1
])
tf
.
keras
.
backend
.
set_image_data_format
(
'channels_last'
)
backbone
=
backbones
.
ResNet
(
model_id
=
50
)
semantic_decoder
=
aspp
.
ASPP
(
level
=
level
,
dilation_rates
=
[
6
,
12
,
18
])
if
shared_decoder
:
instance_decoder
=
semantic_decoder
else
:
instance_decoder
=
aspp
.
ASPP
(
level
=
level
,
dilation_rates
=
[
6
,
12
,
18
])
semantic_head
=
panoptic_deeplab_heads
.
SemanticHead
(
num_classes
,
level
=
level
,
low_level
=
low_level
,
low_level_num_filters
=
(
64
,
32
))
instance_head
=
panoptic_deeplab_heads
.
InstanceHead
(
level
=
level
,
low_level
=
low_level
,
low_level_num_filters
=
(
64
,
32
))
post_processor
=
panoptic_deeplab_merge
.
PostProcessor
(
output_size
=
[
input_size
,
input_size
],
center_score_threshold
=
0.1
,
thing_class_ids
=
[
1
,
2
,
3
,
4
],
label_divisor
=
[
256
],
stuff_area_limit
=
4096
,
ignore_label
=
0
,
nms_kernel
=
41
,
keep_k_centers
=
41
,
rescale_predictions
=
True
)
model
=
panoptic_deeplab_model
.
PanopticDeeplabModel
(
backbone
=
backbone
,
semantic_decoder
=
semantic_decoder
,
instance_decoder
=
instance_decoder
,
semantic_head
=
semantic_head
,
instance_head
=
instance_head
,
post_processor
=
post_processor
)
outputs
=
model
(
inputs
=
inputs
,
image_info
=
image_info
,
training
=
training
)
if
training
:
self
.
assertIn
(
'segmentation_outputs'
,
outputs
)
self
.
assertIn
(
'instance_centers_heatmap'
,
outputs
)
self
.
assertIn
(
'instance_centers_offset'
,
outputs
)
self
.
assertAllEqual
(
[
2
,
input_size
//
(
2
**
low_level
[
-
1
]),
input_size
//
(
2
**
low_level
[
-
1
]),
num_classes
],
outputs
[
'segmentation_outputs'
].
numpy
().
shape
)
self
.
assertAllEqual
(
[
2
,
input_size
//
(
2
**
low_level
[
-
1
]),
input_size
//
(
2
**
low_level
[
-
1
]),
1
],
outputs
[
'instance_centers_heatmap'
].
numpy
().
shape
)
self
.
assertAllEqual
(
[
2
,
input_size
//
(
2
**
low_level
[
-
1
]),
input_size
//
(
2
**
low_level
[
-
1
]),
2
],
outputs
[
'instance_centers_offset'
].
numpy
().
shape
)
else
:
self
.
assertIn
(
'panoptic_outputs'
,
outputs
)
self
.
assertIn
(
'category_mask'
,
outputs
)
self
.
assertIn
(
'instance_mask'
,
outputs
)
self
.
assertIn
(
'instance_centers'
,
outputs
)
self
.
assertIn
(
'instance_scores'
,
outputs
)
self
.
assertIn
(
'segmentation_outputs'
,
outputs
)
@
combinations
.
generate
(
combinations
.
combine
(
level
=
[
2
,
3
,
4
],
low_level
=
[(
4
,
3
),
(
3
,
2
)],
shared_decoder
=
[
True
,
False
]))
def
test_serialize_deserialize
(
self
,
level
,
low_level
,
shared_decoder
):
"""Validate the network can be serialized and deserialized."""
num_classes
=
10
backbone
=
backbones
.
ResNet
(
model_id
=
50
)
semantic_decoder
=
aspp
.
ASPP
(
level
=
level
,
dilation_rates
=
[
6
,
12
,
18
])
if
shared_decoder
:
instance_decoder
=
semantic_decoder
else
:
instance_decoder
=
aspp
.
ASPP
(
level
=
level
,
dilation_rates
=
[
6
,
12
,
18
])
semantic_head
=
panoptic_deeplab_heads
.
SemanticHead
(
num_classes
,
level
=
level
,
low_level
=
low_level
,
low_level_num_filters
=
(
64
,
32
))
instance_head
=
panoptic_deeplab_heads
.
InstanceHead
(
level
=
level
,
low_level
=
low_level
,
low_level_num_filters
=
(
64
,
32
))
post_processor
=
panoptic_deeplab_merge
.
PostProcessor
(
output_size
=
[
640
,
640
],
center_score_threshold
=
0.1
,
thing_class_ids
=
[
1
,
2
,
3
,
4
],
label_divisor
=
[
256
],
stuff_area_limit
=
4096
,
ignore_label
=
0
,
nms_kernel
=
41
,
keep_k_centers
=
41
,
rescale_predictions
=
True
)
model
=
panoptic_deeplab_model
.
PanopticDeeplabModel
(
backbone
=
backbone
,
semantic_decoder
=
semantic_decoder
,
instance_decoder
=
instance_decoder
,
semantic_head
=
semantic_head
,
instance_head
=
instance_head
,
post_processor
=
post_processor
)
config
=
model
.
get_config
()
new_model
=
panoptic_deeplab_model
.
PanopticDeeplabModel
.
from_config
(
config
)
# Validate that the config can be forced to JSON.
_
=
new_model
.
to_json
()
# If the serialization was successful, the new config should match the old.
self
.
assertAllEqual
(
model
.
get_config
(),
new_model
.
get_config
())
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/beta/projects/panoptic_maskrcnn/ops/mask_ops.py
0 → 100644
View file @
3e3b0c64
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utility functions for masks."""
import
tensorflow
as
tf
def
resize_and_rescale_offsets
(
input_tensor
:
tf
.
Tensor
,
target_size
):
"""Bilinearly resizes and rescales the offsets.
Reference:
https://github.com/google-research/deeplab2/blob/main/model/utils.py#L157
Args:
input_tensor: A tf.Tensor of shape [batch, height, width, 2].
target_size: A list or tuple or 1D tf.Tensor that specifies the height and
width after resizing.
Returns:
The input_tensor resized to shape `[batch, target_height, target_width, 2]`.
Moreover, the offsets along the y-axis are rescaled by a factor equal to
(target_height - 1) / (reference_height - 1) and the offsets along the
x-axis are rescaled by a factor equal to
(target_width - 1) / (reference_width - 1).
"""
input_size_y
=
tf
.
shape
(
input_tensor
)[
1
]
input_size_x
=
tf
.
shape
(
input_tensor
)[
2
]
dtype
=
input_tensor
.
dtype
scale_y
=
tf
.
cast
(
target_size
[
0
]
-
1
,
dtype
=
dtype
)
/
tf
.
cast
(
input_size_y
-
1
,
dtype
=
dtype
)
scale_x
=
tf
.
cast
(
target_size
[
1
]
-
1
,
dtype
=
dtype
)
/
tf
.
cast
(
input_size_x
-
1
,
dtype
=
dtype
)
target_y
,
target_x
=
tf
.
split
(
value
=
input_tensor
,
num_or_size_splits
=
2
,
axis
=
3
)
target_y
*=
scale_y
target_x
*=
scale_x
_
=
tf
.
concat
([
target_y
,
target_x
],
3
)
return
tf
.
image
.
resize
(
input_tensor
,
size
=
target_size
,
method
=
tf
.
image
.
ResizeMethod
.
BILINEAR
)
official/vision/beta/projects/panoptic_maskrcnn/tasks/panoptic_deeplab.py
0 → 100644
View file @
3e3b0c64
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Panoptic Deeplab task definition."""
from
typing
import
Any
,
Dict
,
List
,
Mapping
,
Optional
,
Tuple
from
absl
import
logging
import
tensorflow
as
tf
from
official.common
import
dataset_fn
from
official.core
import
base_task
from
official.core
import
task_factory
from
official.vision.beta.projects.panoptic_maskrcnn.configs
import
panoptic_deeplab
as
exp_cfg
from
official.vision.beta.projects.panoptic_maskrcnn.dataloaders
import
panoptic_deeplab_input
from
official.vision.beta.projects.panoptic_maskrcnn.losses
import
panoptic_deeplab_losses
from
official.vision.beta.projects.panoptic_maskrcnn.modeling
import
factory
from
official.vision.dataloaders
import
input_reader_factory
from
official.vision.evaluation
import
panoptic_quality_evaluator
from
official.vision.evaluation
import
segmentation_metrics
@
task_factory
.
register_task_cls
(
exp_cfg
.
PanopticDeeplabTask
)
class
PanopticDeeplabTask
(
base_task
.
Task
):
"""A task for Panoptic Deeplab."""
def
build_model
(
self
):
"""Builds panoptic deeplab model."""
input_specs
=
tf
.
keras
.
layers
.
InputSpec
(
shape
=
[
None
]
+
self
.
task_config
.
model
.
input_size
)
l2_weight_decay
=
self
.
task_config
.
losses
.
l2_weight_decay
# Divide weight decay by 2.0 to match the implementation of tf.nn.l2_loss.
# (https://www.tensorflow.org/api_docs/python/tf/keras/regularizers/l2)
# (https://www.tensorflow.org/api_docs/python/tf/nn/l2_loss)
l2_regularizer
=
(
tf
.
keras
.
regularizers
.
l2
(
l2_weight_decay
/
2.0
)
if
l2_weight_decay
else
None
)
model
=
factory
.
build_panoptic_deeplab
(
input_specs
=
input_specs
,
model_config
=
self
.
task_config
.
model
,
l2_regularizer
=
l2_regularizer
)
return
model
def
initialize
(
self
,
model
:
tf
.
keras
.
Model
):
"""Loads pretrained checkpoint."""
if
not
self
.
task_config
.
init_checkpoint
:
return
ckpt_dir_or_file
=
self
.
task_config
.
init_checkpoint
if
tf
.
io
.
gfile
.
isdir
(
ckpt_dir_or_file
):
ckpt_dir_or_file
=
tf
.
train
.
latest_checkpoint
(
ckpt_dir_or_file
)
# Restoring checkpoint.
if
'all'
in
self
.
task_config
.
init_checkpoint_modules
:
ckpt
=
tf
.
train
.
Checkpoint
(
**
model
.
checkpoint_items
)
status
=
ckpt
.
read
(
ckpt_dir_or_file
)
status
.
expect_partial
().
assert_existing_objects_matched
()
else
:
ckpt_items
=
{}
if
'backbone'
in
self
.
task_config
.
init_checkpoint_modules
:
ckpt_items
.
update
(
backbone
=
model
.
backbone
)
if
'decoder'
in
self
.
task_config
.
init_checkpoint_modules
:
ckpt_items
.
update
(
semantic_decoder
=
model
.
semantic_decoder
)
if
not
self
.
task_config
.
model
.
shared_decoder
:
ckpt_items
.
update
(
instance_decoder
=
model
.
instance_decoder
)
ckpt
=
tf
.
train
.
Checkpoint
(
**
ckpt_items
)
status
=
ckpt
.
read
(
ckpt_dir_or_file
)
status
.
expect_partial
().
assert_existing_objects_matched
()
logging
.
info
(
'Finished loading pretrained checkpoint from %s'
,
ckpt_dir_or_file
)
def
build_inputs
(
self
,
params
:
exp_cfg
.
DataConfig
,
input_context
:
Optional
[
tf
.
distribute
.
InputContext
]
=
None
):
"""Builds panoptic deeplab input."""
decoder_cfg
=
params
.
decoder
.
get
()
if
params
.
decoder
.
type
==
'simple_decoder'
:
decoder
=
panoptic_deeplab_input
.
TfExampleDecoder
(
regenerate_source_id
=
decoder_cfg
.
regenerate_source_id
,
panoptic_category_mask_key
=
decoder_cfg
.
panoptic_category_mask_key
,
panoptic_instance_mask_key
=
decoder_cfg
.
panoptic_instance_mask_key
)
else
:
raise
ValueError
(
'Unknown decoder type: {}!'
.
format
(
params
.
decoder
.
type
))
parser
=
panoptic_deeplab_input
.
Parser
(
output_size
=
self
.
task_config
.
model
.
input_size
[:
2
],
ignore_label
=
params
.
parser
.
ignore_label
,
resize_eval_groundtruth
=
params
.
parser
.
resize_eval_groundtruth
,
groundtruth_padded_size
=
params
.
parser
.
groundtruth_padded_size
,
aug_scale_min
=
params
.
parser
.
aug_scale_min
,
aug_scale_max
=
params
.
parser
.
aug_scale_max
,
aug_rand_hflip
=
params
.
parser
.
aug_rand_hflip
,
aug_type
=
params
.
parser
.
aug_type
,
sigma
=
params
.
parser
.
sigma
,
dtype
=
params
.
parser
.
dtype
)
reader
=
input_reader_factory
.
input_reader_generator
(
params
,
dataset_fn
=
dataset_fn
.
pick_dataset_fn
(
params
.
file_type
),
decoder_fn
=
decoder
.
decode
,
parser_fn
=
parser
.
parse_fn
(
params
.
is_training
))
dataset
=
reader
.
read
(
input_context
=
input_context
)
return
dataset
def
build_losses
(
self
,
labels
:
Mapping
[
str
,
tf
.
Tensor
],
model_outputs
:
Mapping
[
str
,
tf
.
Tensor
],
aux_losses
:
Optional
[
Any
]
=
None
):
"""Panoptic deeplab losses.
Args:
labels: labels.
model_outputs: Output logits from panoptic deeplab.
aux_losses: auxiliarly loss tensors, i.e. `losses` in keras.Model.
Returns:
The total loss tensor.
"""
loss_config
=
self
.
_task_config
.
losses
segmentation_loss_fn
=
panoptic_deeplab_losses
.
WeightedBootstrappedCrossEntropyLoss
(
loss_config
.
label_smoothing
,
loss_config
.
class_weights
,
loss_config
.
ignore_label
,
top_k_percent_pixels
=
loss_config
.
top_k_percent_pixels
)
instance_center_heatmap_loss_fn
=
panoptic_deeplab_losses
.
CenterHeatmapLoss
(
)
instance_center_offset_loss_fn
=
panoptic_deeplab_losses
.
CenterOffsetLoss
()
semantic_weights
=
tf
.
cast
(
labels
[
'semantic_weights'
],
dtype
=
model_outputs
[
'instance_centers_heatmap'
].
dtype
)
things_mask
=
tf
.
cast
(
tf
.
squeeze
(
labels
[
'things_mask'
],
axis
=
3
),
dtype
=
model_outputs
[
'instance_centers_heatmap'
].
dtype
)
valid_mask
=
tf
.
cast
(
tf
.
squeeze
(
labels
[
'valid_mask'
],
axis
=
3
),
dtype
=
model_outputs
[
'instance_centers_heatmap'
].
dtype
)
segmentation_loss
=
segmentation_loss_fn
(
model_outputs
[
'segmentation_outputs'
],
labels
[
'category_mask'
],
sample_weight
=
semantic_weights
)
instance_center_heatmap_loss
=
instance_center_heatmap_loss_fn
(
model_outputs
[
'instance_centers_heatmap'
],
labels
[
'instance_centers_heatmap'
],
sample_weight
=
valid_mask
)
instance_center_offset_loss
=
instance_center_offset_loss_fn
(
model_outputs
[
'instance_centers_offset'
],
labels
[
'instance_centers_offset'
],
sample_weight
=
things_mask
)
model_loss
=
(
loss_config
.
segmentation_loss_weight
*
segmentation_loss
+
loss_config
.
center_heatmap_loss_weight
*
instance_center_heatmap_loss
+
loss_config
.
center_offset_loss_weight
*
instance_center_offset_loss
)
total_loss
=
model_loss
if
aux_losses
:
total_loss
+=
tf
.
add_n
(
aux_losses
)
losses
=
{
'total_loss'
:
total_loss
,
'model_loss'
:
model_loss
,
'segmentation_loss'
:
segmentation_loss
,
'instance_center_heatmap_loss'
:
instance_center_heatmap_loss
,
'instance_center_offset_loss'
:
instance_center_offset_loss
}
return
losses
def
build_metrics
(
self
,
training
:
bool
=
True
)
->
List
[
tf
.
keras
.
metrics
.
Metric
]:
"""Build metrics."""
eval_config
=
self
.
task_config
.
evaluation
metrics
=
[]
if
training
:
metric_names
=
[
'total_loss'
,
'segmentation_loss'
,
'instance_center_heatmap_loss'
,
'instance_center_offset_loss'
,
'model_loss'
]
for
name
in
metric_names
:
metrics
.
append
(
tf
.
keras
.
metrics
.
Mean
(
name
,
dtype
=
tf
.
float32
))
if
eval_config
.
report_train_mean_iou
:
self
.
train_mean_iou
=
segmentation_metrics
.
MeanIoU
(
name
=
'train_mean_iou'
,
num_classes
=
self
.
task_config
.
model
.
num_classes
,
rescale_predictions
=
False
,
dtype
=
tf
.
float32
)
else
:
rescale_predictions
=
(
not
self
.
task_config
.
validation_data
.
parser
.
resize_eval_groundtruth
)
self
.
perclass_iou_metric
=
segmentation_metrics
.
PerClassIoU
(
name
=
'per_class_iou'
,
num_classes
=
self
.
task_config
.
model
.
num_classes
,
rescale_predictions
=
rescale_predictions
,
dtype
=
tf
.
float32
)
if
isinstance
(
tf
.
distribute
.
get_strategy
(),
tf
.
distribute
.
TPUStrategy
):
self
.
_process_iou_metric_on_cpu
=
True
else
:
self
.
_process_iou_metric_on_cpu
=
False
if
self
.
task_config
.
model
.
generate_panoptic_masks
:
self
.
panoptic_quality_metric
=
panoptic_quality_evaluator
.
PanopticQualityEvaluator
(
num_categories
=
self
.
task_config
.
model
.
num_classes
,
ignored_label
=
eval_config
.
ignored_label
,
max_instances_per_category
=
eval_config
.
max_instances_per_category
,
offset
=
eval_config
.
offset
,
is_thing
=
eval_config
.
is_thing
,
rescale_predictions
=
eval_config
.
rescale_predictions
)
# Update state on CPU if TPUStrategy due to dynamic resizing.
self
.
_process_iou_metric_on_cpu
=
isinstance
(
tf
.
distribute
.
get_strategy
(),
tf
.
distribute
.
TPUStrategy
)
return
metrics
def
train_step
(
self
,
inputs
:
Tuple
[
Any
,
Any
],
model
:
tf
.
keras
.
Model
,
optimizer
:
tf
.
keras
.
optimizers
.
Optimizer
,
metrics
:
Optional
[
List
[
Any
]]
=
None
)
->
Dict
[
str
,
Any
]:
"""Does forward and backward.
Args:
inputs: a dictionary of input tensors.
model: the model, forward pass definition.
optimizer: the optimizer for this training step.
metrics: a nested structure of metrics objects.
Returns:
A dictionary of logs.
"""
images
,
labels
=
inputs
num_replicas
=
tf
.
distribute
.
get_strategy
().
num_replicas_in_sync
with
tf
.
GradientTape
()
as
tape
:
outputs
=
model
(
inputs
=
images
,
image_info
=
labels
[
'image_info'
],
training
=
True
)
outputs
=
tf
.
nest
.
map_structure
(
lambda
x
:
tf
.
cast
(
x
,
tf
.
float32
),
outputs
)
# Computes per-replica loss.
losses
=
self
.
build_losses
(
labels
=
labels
,
model_outputs
=
outputs
,
aux_losses
=
model
.
losses
)
scaled_loss
=
losses
[
'total_loss'
]
/
num_replicas
# For mixed_precision policy, when LossScaleOptimizer is used, loss is
# scaled for numerical stability.
if
isinstance
(
optimizer
,
tf
.
keras
.
mixed_precision
.
LossScaleOptimizer
):
scaled_loss
=
optimizer
.
get_scaled_loss
(
scaled_loss
)
tvars
=
model
.
trainable_variables
grads
=
tape
.
gradient
(
scaled_loss
,
tvars
)
# Scales back gradient when LossScaleOptimizer is used.
if
isinstance
(
optimizer
,
tf
.
keras
.
mixed_precision
.
LossScaleOptimizer
):
grads
=
optimizer
.
get_unscaled_gradients
(
grads
)
optimizer
.
apply_gradients
(
list
(
zip
(
grads
,
tvars
)))
logs
=
{
self
.
loss
:
losses
[
'total_loss'
]}
if
metrics
:
for
m
in
metrics
:
m
.
update_state
(
losses
[
m
.
name
])
if
self
.
task_config
.
evaluation
.
report_train_mean_iou
:
segmentation_labels
=
{
'masks'
:
labels
[
'category_mask'
],
'valid_masks'
:
labels
[
'valid_mask'
],
'image_info'
:
labels
[
'image_info'
]
}
self
.
process_metrics
(
metrics
=
[
self
.
train_mean_iou
],
labels
=
segmentation_labels
,
model_outputs
=
outputs
[
'segmentation_outputs'
])
logs
.
update
({
self
.
train_mean_iou
.
name
:
self
.
train_mean_iou
.
result
()
})
return
logs
def
validation_step
(
self
,
inputs
:
Tuple
[
Any
,
Any
],
model
:
tf
.
keras
.
Model
,
metrics
:
Optional
[
List
[
Any
]]
=
None
)
->
Dict
[
str
,
Any
]:
"""Validatation step.
Args:
inputs: a dictionary of input tensors.
model: the keras.Model.
metrics: a nested structure of metrics objects.
Returns:
A dictionary of logs.
"""
images
,
labels
=
inputs
outputs
=
model
(
inputs
=
images
,
image_info
=
labels
[
'image_info'
],
training
=
False
)
logs
=
{
self
.
loss
:
0
}
segmentation_labels
=
{
'masks'
:
labels
[
'category_mask'
],
'valid_masks'
:
labels
[
'valid_mask'
],
'image_info'
:
labels
[
'image_info'
]
}
if
self
.
_process_iou_metric_on_cpu
:
logs
.
update
({
self
.
perclass_iou_metric
.
name
:
(
segmentation_labels
,
outputs
[
'segmentation_outputs'
])
})
else
:
self
.
perclass_iou_metric
.
update_state
(
segmentation_labels
,
outputs
[
'segmentation_outputs'
])
if
self
.
task_config
.
model
.
generate_panoptic_masks
:
pq_metric_labels
=
{
'category_mask'
:
tf
.
squeeze
(
labels
[
'category_mask'
],
axis
=
3
),
'instance_mask'
:
tf
.
squeeze
(
labels
[
'instance_mask'
],
axis
=
3
),
'image_info'
:
labels
[
'image_info'
]
}
panoptic_outputs
=
{
'category_mask'
:
outputs
[
'category_mask'
],
'instance_mask'
:
outputs
[
'instance_mask'
],
}
logs
.
update
({
self
.
panoptic_quality_metric
.
name
:
(
pq_metric_labels
,
panoptic_outputs
)})
return
logs
def
aggregate_logs
(
self
,
state
=
None
,
step_outputs
=
None
):
if
state
is
None
:
self
.
perclass_iou_metric
.
reset_states
()
state
=
[
self
.
perclass_iou_metric
]
if
self
.
task_config
.
model
.
generate_panoptic_masks
:
state
+=
[
self
.
panoptic_quality_metric
]
if
self
.
_process_iou_metric_on_cpu
:
self
.
perclass_iou_metric
.
update_state
(
step_outputs
[
self
.
perclass_iou_metric
.
name
][
0
],
step_outputs
[
self
.
perclass_iou_metric
.
name
][
1
])
if
self
.
task_config
.
model
.
generate_panoptic_masks
:
self
.
panoptic_quality_metric
.
update_state
(
step_outputs
[
self
.
panoptic_quality_metric
.
name
][
0
],
step_outputs
[
self
.
panoptic_quality_metric
.
name
][
1
])
return
state
def
reduce_aggregated_logs
(
self
,
aggregated_logs
,
global_step
=
None
):
result
=
{}
ious
=
self
.
perclass_iou_metric
.
result
()
if
self
.
task_config
.
evaluation
.
report_per_class_iou
:
for
i
,
value
in
enumerate
(
ious
.
numpy
()):
result
.
update
({
'segmentation_iou/class_{}'
.
format
(
i
):
value
})
# Computes mean IoU
result
.
update
({
'segmentation_mean_iou'
:
tf
.
reduce_mean
(
ious
).
numpy
()})
if
self
.
task_config
.
model
.
generate_panoptic_masks
:
panoptic_quality_results
=
self
.
panoptic_quality_metric
.
result
()
for
k
,
value
in
panoptic_quality_results
.
items
():
if
k
.
endswith
(
'per_class'
):
if
self
.
task_config
.
evaluation
.
report_per_class_pq
:
for
i
,
per_class_value
in
enumerate
(
value
):
metric_key
=
'panoptic_quality/{}/class_{}'
.
format
(
k
,
i
)
result
[
metric_key
]
=
per_class_value
else
:
continue
else
:
result
[
'panoptic_quality/{}'
.
format
(
k
)]
=
value
return
result
official/vision/beta/projects/panoptic_maskrcnn/tasks/panoptic_deeplab_test.py
0 → 100644
View file @
3e3b0c64
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for panoptic_deeplab.py."""
import
os
from
absl.testing
import
parameterized
import
tensorflow
as
tf
from
official.vision.beta.projects.panoptic_maskrcnn.configs
import
panoptic_deeplab
as
cfg
from
official.vision.beta.projects.panoptic_maskrcnn.tasks
import
panoptic_deeplab
# TODO(b/234636381): add unit test for train and validation step
class
PanopticDeeplabTaskTest
(
tf
.
test
.
TestCase
,
parameterized
.
TestCase
):
@
parameterized
.
parameters
(
([
'all'
],
False
),
([
'backbone'
],
False
),
([
'decoder'
],
False
),
([
'decoder'
],
True
))
def
test_model_initializing
(
self
,
init_checkpoint_modules
,
shared_decoder
):
task_config
=
cfg
.
PanopticDeeplabTask
(
model
=
cfg
.
PanopticDeeplab
(
num_classes
=
10
,
input_size
=
[
640
,
640
,
3
],
shared_decoder
=
shared_decoder
))
task
=
panoptic_deeplab
.
PanopticDeeplabTask
(
task_config
)
model
=
task
.
build_model
()
ckpt
=
tf
.
train
.
Checkpoint
(
**
model
.
checkpoint_items
)
ckpt_save_dir
=
self
.
create_tempdir
().
full_path
ckpt
.
save
(
os
.
path
.
join
(
ckpt_save_dir
,
'ckpt'
))
task
.
_task_config
.
init_checkpoint
=
ckpt_save_dir
task
.
_task_config
.
init_checkpoint_modules
=
init_checkpoint_modules
task
.
initialize
(
model
)
@
parameterized
.
parameters
(
(
True
,),
(
False
,))
def
test_build_metrics
(
self
,
training
):
task_config
=
cfg
.
PanopticDeeplabTask
(
model
=
cfg
.
PanopticDeeplab
(
num_classes
=
10
,
input_size
=
[
640
,
640
,
3
],
shared_decoder
=
False
))
task
=
panoptic_deeplab
.
PanopticDeeplabTask
(
task_config
)
metrics
=
task
.
build_metrics
(
training
=
training
)
if
training
:
expected_metric_names
=
{
'total_loss'
,
'segmentation_loss'
,
'instance_center_heatmap_loss'
,
'instance_center_offset_loss'
,
'model_loss'
}
self
.
assertEqual
(
expected_metric_names
,
set
([
metric
.
name
for
metric
in
metrics
]))
else
:
assert
hasattr
(
task
,
'perclass_iou_metric'
)
assert
hasattr
(
task
,
'panoptic_quality_metric'
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
official/vision/beta/projects/panoptic_maskrcnn/train.py
View file @
3e3b0c64
...
...
@@ -18,9 +18,12 @@ from absl import app
from
official.common
import
flags
as
tfm_flags
from
official.vision
import
train
from
official.vision.beta.projects.panoptic_maskrcnn.configs
import
panoptic_maskrcnn
as
cfg
# pylint: disable=unused-import
from
official.vision.beta.projects.panoptic_maskrcnn.tasks
import
panoptic_maskrcnn
as
task
# pylint: disable=unused-import
# pylint: disable=unused-import
from
official.vision.beta.projects.panoptic_maskrcnn.configs
import
panoptic_deeplab
from
official.vision.beta.projects.panoptic_maskrcnn.configs
import
panoptic_maskrcnn
from
official.vision.beta.projects.panoptic_maskrcnn.tasks
import
panoptic_deeplab
as
panoptic_deeplab_task
from
official.vision.beta.projects.panoptic_maskrcnn.tasks
import
panoptic_maskrcnn
as
panoptic_maskrcnn_task
# pylint: enable=unused-import
if
__name__
==
'__main__'
:
tfm_flags
.
define_flags
()
...
...
official/vision/ops/augment.py
View file @
3e3b0c64
...
...
@@ -1583,6 +1583,7 @@ class AutoAugment(ImageAugment):
'reduced_cifar10'
:
self
.
policy_reduced_cifar10
(),
'svhn'
:
self
.
policy_svhn
(),
'reduced_imagenet'
:
self
.
policy_reduced_imagenet
(),
'panoptic_deeplab_policy'
:
self
.
panoptic_deeplab_policy
(),
}
if
not
policies
:
...
...
@@ -1888,6 +1889,16 @@ class AutoAugment(ImageAugment):
]
return
policy
@
staticmethod
def
panoptic_deeplab_policy
():
policy
=
[
[(
'Sharpness'
,
0.4
,
1.4
),
(
'Brightness'
,
0.2
,
2.0
)],
[(
'Equalize'
,
0.0
,
1.8
),
(
'Contrast'
,
0.2
,
2.0
)],
[(
'Sharpness'
,
0.2
,
1.8
),
(
'Color'
,
0.2
,
1.8
)],
[(
'Solarize'
,
0.2
,
1.4
),
(
'Equalize'
,
0.6
,
1.8
)],
[(
'Sharpness'
,
0.2
,
0.2
),
(
'Equalize'
,
0.2
,
1.4
)]]
return
policy
@
staticmethod
def
policy_test
():
"""Autoaugment test policy for debugging."""
...
...
@@ -2025,7 +2036,7 @@ class RandAugment(ImageAugment):
aug_image
,
aug_bboxes
=
tf
.
switch_case
(
branch_index
=
op_to_select
,
branch_fns
=
branch_fns
,
default
=
lambda
:
(
tf
.
identity
(
image
),
_maybe_identity
(
bboxes
)))
default
=
lambda
:
(
tf
.
identity
(
image
),
_maybe_identity
(
bboxes
)))
# pylint: disable=cell-var-from-loop
if
self
.
prob_to_apply
is
not
None
:
aug_image
,
aug_bboxes
=
tf
.
cond
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment