# Copyright 2022 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Panoptic Mask R-CNN configuration definition.""" import dataclasses import os from typing import List, Optional from official.core import config_definitions as cfg from official.core import exp_factory from official.modeling import hyperparams from official.modeling import optimization from official.projects.deepmac_maskrcnn.configs import deep_mask_head_rcnn as deepmac_maskrcnn from official.vision.configs import common from official.vision.configs import maskrcnn from official.vision.configs import semantic_segmentation SEGMENTATION_MODEL = semantic_segmentation.SemanticSegmentationModel SEGMENTATION_HEAD = semantic_segmentation.SegmentationHead _COCO_INPUT_PATH_BASE = 'coco/tfrecords' _COCO_TRAIN_EXAMPLES = 118287 _COCO_VAL_EXAMPLES = 5000 # pytype: disable=wrong-keyword-args @dataclasses.dataclass class Parser(maskrcnn.Parser): """Panoptic Mask R-CNN parser config.""" # If segmentation_resize_eval_groundtruth is set to False, original image # sizes are used for eval. In that case, # segmentation_groundtruth_padded_size has to be specified too to allow for # batching the variable input sizes of images. segmentation_resize_eval_groundtruth: bool = True segmentation_groundtruth_padded_size: List[int] = dataclasses.field( default_factory=list) segmentation_ignore_label: int = 255 panoptic_ignore_label: int = 0 # Setting this to true will enable parsing category_mask and instance_mask. include_panoptic_masks: bool = True @dataclasses.dataclass class TfExampleDecoder(common.TfExampleDecoder): """A simple TF Example decoder config.""" # Setting this to true will enable decoding category_mask and instance_mask. include_panoptic_masks: bool = True panoptic_category_mask_key: str = 'image/panoptic/category_mask' panoptic_instance_mask_key: str = 'image/panoptic/instance_mask' @dataclasses.dataclass class DataDecoder(common.DataDecoder): """Data decoder config.""" simple_decoder: TfExampleDecoder = TfExampleDecoder() @dataclasses.dataclass class DataConfig(maskrcnn.DataConfig): """Input config for training.""" decoder: DataDecoder = DataDecoder() parser: Parser = Parser() @dataclasses.dataclass class PanopticSegmentationGenerator(hyperparams.Config): """Panoptic segmentation generator config.""" output_size: List[int] = dataclasses.field( default_factory=list) mask_binarize_threshold: float = 0.5 score_threshold: float = 0.5 things_overlap_threshold: float = 0.5 stuff_area_threshold: float = 4096.0 things_class_label: int = 1 void_class_label: int = 0 void_instance_id: int = 0 rescale_predictions: bool = False @dataclasses.dataclass class PanopticMaskRCNN(deepmac_maskrcnn.DeepMaskHeadRCNN): """Panoptic Mask R-CNN model config.""" segmentation_model: semantic_segmentation.SemanticSegmentationModel = ( SEGMENTATION_MODEL(num_classes=2)) include_mask = True shared_backbone: bool = True shared_decoder: bool = True stuff_classes_offset: int = 0 generate_panoptic_masks: bool = True panoptic_segmentation_generator: PanopticSegmentationGenerator = PanopticSegmentationGenerator() # pylint:disable=line-too-long @dataclasses.dataclass class Losses(maskrcnn.Losses): """Panoptic Mask R-CNN loss config.""" semantic_segmentation_label_smoothing: float = 0.0 semantic_segmentation_ignore_label: int = 255 semantic_segmentation_gt_is_matting_map: bool = False semantic_segmentation_class_weights: List[float] = dataclasses.field( default_factory=list) semantic_segmentation_use_groundtruth_dimension: bool = True semantic_segmentation_top_k_percent_pixels: float = 1.0 instance_segmentation_weight: float = 1.0 semantic_segmentation_weight: float = 0.5 @dataclasses.dataclass class PanopticQualityEvaluator(hyperparams.Config): """Panoptic Quality Evaluator config.""" num_categories: int = 2 ignored_label: int = 0 max_instances_per_category: int = 256 offset: int = 256 * 256 * 256 is_thing: List[float] = dataclasses.field( default_factory=list) rescale_predictions: bool = False report_per_class_metrics: bool = False @dataclasses.dataclass class PanopticMaskRCNNTask(maskrcnn.MaskRCNNTask): """Panoptic Mask R-CNN task config.""" model: PanopticMaskRCNN = PanopticMaskRCNN() train_data: DataConfig = DataConfig(is_training=True) validation_data: DataConfig = DataConfig(is_training=False, drop_remainder=False) segmentation_evaluation: semantic_segmentation.Evaluation = semantic_segmentation.Evaluation() # pylint: disable=line-too-long losses: Losses = Losses() init_checkpoint: Optional[str] = None segmentation_init_checkpoint: Optional[str] = None # 'init_checkpoint_modules' controls the modules that need to be initialized # from checkpoint paths given by 'init_checkpoint' and/or # 'segmentation_init_checkpoint. Supports modules: # 'backbone': Initialize MaskRCNN backbone # 'segmentation_backbone': Initialize segmentation backbone # 'segmentation_decoder': Initialize segmentation decoder # 'all': Initialize all modules init_checkpoint_modules: Optional[List[str]] = dataclasses.field( default_factory=list) panoptic_quality_evaluator: PanopticQualityEvaluator = PanopticQualityEvaluator() # pylint: disable=line-too-long @exp_factory.register_config_factory('panoptic_fpn_coco') def panoptic_fpn_coco() -> cfg.ExperimentConfig: """COCO panoptic segmentation with Panoptic Mask R-CNN.""" train_batch_size = 64 eval_batch_size = 8 steps_per_epoch = _COCO_TRAIN_EXAMPLES // train_batch_size validation_steps = _COCO_VAL_EXAMPLES // eval_batch_size # coco panoptic dataset has category ids ranging from [0-200] inclusive. # 0 is not used and represents the background class # ids 1-91 represent thing categories (91) # ids 92-200 represent stuff categories (109) # for the segmentation task, we continue using id=0 for the background # and map all thing categories to id=1, the remaining 109 stuff categories # are shifted by an offset=90 given by num_thing classes - 1. This shifting # will make all the stuff categories begin from id=2 and end at id=110 num_panoptic_categories = 201 num_thing_categories = 91 num_semantic_segmentation_classes = 111 is_thing = [False] for idx in range(1, num_panoptic_categories): is_thing.append(True if idx <= num_thing_categories else False) config = cfg.ExperimentConfig( runtime=cfg.RuntimeConfig( mixed_precision_dtype='float32', enable_xla=True), task=PanopticMaskRCNNTask( init_checkpoint='gs://cloud-tpu-checkpoints/vision-2.0/resnet50_imagenet/ckpt-28080', # pylint: disable=line-too-long init_checkpoint_modules=['backbone'], model=PanopticMaskRCNN( num_classes=91, input_size=[1024, 1024, 3], panoptic_segmentation_generator=PanopticSegmentationGenerator( output_size=[640, 640], rescale_predictions=True), stuff_classes_offset=90, segmentation_model=SEGMENTATION_MODEL( num_classes=num_semantic_segmentation_classes, head=SEGMENTATION_HEAD( level=2, num_convs=0, num_filters=128, decoder_min_level=2, decoder_max_level=6, feature_fusion='panoptic_fpn_fusion'))), losses=Losses(l2_weight_decay=0.00004), train_data=DataConfig( input_path=os.path.join(_COCO_INPUT_PATH_BASE, 'train*'), is_training=True, global_batch_size=train_batch_size, parser=Parser( aug_rand_hflip=True, aug_scale_min=0.8, aug_scale_max=1.25)), validation_data=DataConfig( input_path=os.path.join(_COCO_INPUT_PATH_BASE, 'val*'), is_training=False, global_batch_size=eval_batch_size, parser=Parser( segmentation_resize_eval_groundtruth=False, segmentation_groundtruth_padded_size=[640, 640]), drop_remainder=False), annotation_file=os.path.join(_COCO_INPUT_PATH_BASE, 'instances_val2017.json'), segmentation_evaluation=semantic_segmentation.Evaluation( report_per_class_iou=False, report_train_mean_iou=False), panoptic_quality_evaluator=PanopticQualityEvaluator( num_categories=num_panoptic_categories, ignored_label=0, is_thing=is_thing, rescale_predictions=True)), trainer=cfg.TrainerConfig( train_steps=22500, validation_steps=validation_steps, validation_interval=steps_per_epoch, steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'sgd', 'sgd': { 'momentum': 0.9 } }, 'learning_rate': { 'type': 'stepwise', 'stepwise': { 'boundaries': [15000, 20000], 'values': [0.12, 0.012, 0.0012], } }, 'warmup': { 'type': 'linear', 'linear': { 'warmup_steps': 500, 'warmup_learning_rate': 0.0067 } } })), restrictions=[ 'task.train_data.is_training != None', 'task.validation_data.is_training != None' ]) return config