Commit 1d077fe6 authored by Vighnesh Birodkar's avatar Vighnesh Birodkar Committed by TF Object Detection Team
Browse files

Open source DeepMAC architecture.

PiperOrigin-RevId: 366382220
parent ec543191
...@@ -73,6 +73,30 @@ documentation of the Object Detection API: ...@@ -73,6 +73,30 @@ documentation of the Object Detection API:
## Whats New ## Whats New
### DeepMAC architecture
We have released our new architecture, **DeepMAC**, desgined for partially
supervised instance segmentation. DeepMAC stands for Deep Mask-heads
Above CenterNet, and is based on our CenterNet implementation. In our
[paper](https://arxiv.org/abs/2104.00613) we show that DeepMAC achieves
state-of-the-art results for the partially supervised instance segmentation
task without using any specialty modules or losses; just better mask-head
architectures. The findings from our paper are not specific to CenterNet and
can also be applied to Mask R-CNN or without any detector at all.
Please see links below for more details
* [DeepMAC documentation](g3doc/deepmac.md).
* [Mask RCNN code](https://github.com/tensorflow/models/tree/master/official/vision/beta/projects/deepmac_maskrcnn)
in TF Model garden code base.
* [DeepMAC Colab](../colab_tutorials/deepmac_colab.ipynb) that lets you run a
pre-trained DeepMAC model on user-specified boxes. Note that you are not
restricted to COCO classes!
* Project website - [git.io/deepmac](https://git.io/deepmac)
<b>Thanks to contributors</b>: Vighnesh Birodkar, Zhichao Lu, Siyang Li,
Vivek Rathod, Jonathan Huang
### Mobile Inference for TF2 models ### Mobile Inference for TF2 models
TF2 OD API models can now be converted to TensorFlow Lite! Only SSD models TF2 OD API models can now be converted to TensorFlow Lite! Only SSD models
......
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "deepmac_demo.ipynb",
"provenance": [],
"collapsed_sections": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
},
"accelerator": "GPU"
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "P-esW81yhfCN"
},
"source": [
"# Novel class segmentation demo with Deep-MAC\n",
"\n",
"Welcome to the Novel class segmentation (with Deep-MAC) demo --- this colab loads a Deep-MAC model and tests it interactively with user-specified boxes. Deep-MAC was only trained to detect and segment COCO classes, but generalizes well when segmenting within user-specified boxes of unseen classes.\n",
"\n",
"Estimated time to run through this colab (with GPU): 10-15 minutes.\n",
"Note that the bulk of this time is in installing Tensorflow and downloading\n",
"the checkpoint then running inference for the first time. Once you've done\n",
"all that, running on new images is very fast."
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "Kq1eGNssiW31"
},
"source": [
"# Prerequisites\n",
"\n",
"Please change runtime to GPU."
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "UT7N0HJhiRKr"
},
"source": [
"# Installation and Imports\n",
"\n",
"This takes 3-4 minutes."
]
},
{
"cell_type": "code",
"metadata": {
"id": "nNdls0Pe0UPK"
},
"source": [
"!pip install -U --pre tensorflow==\"2.2.0\"\n",
"\n",
"import os\n",
"import pathlib\n",
"\n",
"# Clone the tensorflow models repository if it doesn't already exist\n",
"if \"models\" in pathlib.Path.cwd().parts:\n",
" while \"models\" in pathlib.Path.cwd().parts:\n",
" os.chdir('..')\n",
"elif not pathlib.Path('models').exists():\n",
" !git clone --depth 1 https://github.com/tensorflow/models\n"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "WwjV9clX0n7S"
},
"source": [
"# Install the Object Detection API\n",
"%%bash\n",
"cd models/research/\n",
"protoc object_detection/protos/*.proto --python_out=.\n",
"cp object_detection/packages/tf2/setup.py .\n",
"python -m pip install ."
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "sfrrno2L0sRR"
},
"source": [
"import glob\n",
"import io\n",
"import logging\n",
"import os\n",
"import random\n",
"import warnings\n",
"\n",
"import imageio\n",
"from IPython.display import display, Javascript\n",
"from IPython.display import Image as IPyImage\n",
"import matplotlib\n",
"from matplotlib import patches\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"from object_detection.utils import colab_utils\n",
"from object_detection.utils import ops\n",
"from object_detection.utils import visualization_utils as viz_utils\n",
"from PIL import Image, ImageDraw, ImageFont\n",
"import scipy.misc\n",
"from six import BytesIO\n",
"from skimage import color\n",
"from skimage import transform\n",
"from skimage import util\n",
"from skimage.color import rgb_colors\n",
"import tensorflow as tf\n",
"\n",
"%matplotlib inline\n",
"\n",
"COLORS = ([rgb_colors.cyan, rgb_colors.orange, rgb_colors.pink,\n",
" rgb_colors.purple, rgb_colors.limegreen , rgb_colors.crimson] +\n",
" [(color) for (name, color) in color.color_dict.items()])\n",
"random.shuffle(COLORS)\n",
"\n",
"logging.disable(logging.WARNING)\n",
"\n",
"\n",
"def read_image(path):\n",
" \"\"\"Read an image and optionally resize it for better plotting.\"\"\"\n",
" with tf.io.gfile.GFile(path, 'rb') as f:\n",
" img = Image.open(f)\n",
" return np.array(img, dtype=np.uint8)\n",
"\n",
"\n",
"def resize_for_display(image, max_height=600):\n",
" height, width, _ = image.shape\n",
" width = int(width * max_height / height)\n",
" with warnings.catch_warnings():\n",
" warnings.simplefilter(\"ignore\", UserWarning)\n",
" return util.img_as_ubyte(transform.resize(image, (height, width)))\n",
"\n",
"\n",
"def get_mask_prediction_function(model):\n",
" \"\"\"Get single image mask prediction function using a model.\"\"\"\n",
"\n",
" @tf.function\n",
" def predict_masks(image, boxes):\n",
" height, width, _ = image.shape.as_list()\n",
" batch = image[tf.newaxis]\n",
" boxes = boxes[tf.newaxis]\n",
"\n",
" detections = model(batch, boxes)\n",
" masks = detections['detection_masks']\n",
"\n",
" return ops.reframe_box_masks_to_image_masks(masks[0], boxes[0],\n",
" height, width)\n",
"\n",
" return predict_masks\n",
"\n",
"\n",
"def plot_image_annotations(image, boxes, masks, darken_image=0.5):\n",
" fig, ax = plt.subplots(figsize=(16, 12))\n",
" ax.set_axis_off()\n",
" image = (image * darken_image).astype(np.uint8)\n",
" ax.imshow(image)\n",
"\n",
" height, width, _ = image.shape\n",
"\n",
" num_colors = len(COLORS)\n",
" color_index = 0\n",
"\n",
" for box, mask in zip(boxes, masks):\n",
" ymin, xmin, ymax, xmax = box\n",
" ymin *= height\n",
" ymax *= height\n",
" xmin *= width\n",
" xmax *= width\n",
"\n",
" color = COLORS[color_index]\n",
" color = np.array(color)\n",
" rect = patches.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin,\n",
" linewidth=2.5, edgecolor=color, facecolor='none')\n",
" ax.add_patch(rect)\n",
" mask = (mask > 0.5).astype(np.float32)\n",
" color_image = np.ones_like(image) * color[np.newaxis, np.newaxis, :]\n",
" color_and_mask = np.concatenate(\n",
" [color_image, mask[:, :, np.newaxis]], axis=2)\n",
"\n",
" ax.imshow(color_and_mask, alpha=0.5)\n",
"\n",
" color_index = (color_index + 1) % num_colors\n",
"\n",
" return ax"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "ry9yq8zsi0Gg"
},
"source": [
"# Load Deep-MAC Model\n",
"\n",
"This can take up to 5 minutes."
]
},
{
"cell_type": "code",
"metadata": {
"id": "PZ-wnbYu05K8"
},
"source": [
"print('Downloading and untarring model')\n",
"!wget http://download.tensorflow.org/models/object_detection/tf2/20210329/deepmac_1024x1024_coco17.tar.gz\n",
"!cp deepmac_1024x1024_coco17.tar.gz models/research/object_detection/test_data/\n",
"!tar -xzf models/research/object_detection/test_data/deepmac_1024x1024_coco17.tar.gz\n",
"!mv deepmac_1024x1024_coco17 models/research/object_detection/test_data/\n",
"model_path = 'models/research/object_detection/test_data/deepmac_1024x1024_coco17/saved_model'\n",
"\n",
"print('Loading SavedModel')\n",
"model = tf.keras.models.load_model(model_path)\n",
"prediction_function = get_mask_prediction_function(model)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "ilXkYOB_NUSc"
},
"source": [
"# Load image"
]
},
{
"cell_type": "code",
"metadata": {
"id": "txj4UkoDNaOq"
},
"source": [
"image_path = 'models/research/object_detection/test_images/image3.jpg'\n",
"image = read_image(image_path)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "zyhudgYUjcvE"
},
"source": [
"# Annotate an image with one or more boxes\n",
"\n",
"This model is trained on COCO categories, but we encourage you to try segmenting\n",
"anything you want!\n",
"\n",
"Don't forget to hit **submit** when done."
]
},
{
"cell_type": "code",
"metadata": {
"id": "aZvY4At0074j"
},
"source": [
"display_image = resize_for_display(image)\n",
"\n",
"boxes_list = []\n",
"colab_utils.annotate([display_image], boxes_list)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "gUUG7NPBJMoa"
},
"source": [
"# In case you didn't want to label...\n",
"\n",
"Run this cell only if you didn't annotate anything above and would prefer to just use our preannotated boxes. Don't forget to uncomment.\n",
"\n"
]
},
{
"cell_type": "code",
"metadata": {
"id": "lupqTv1HJK5K"
},
"source": [
"# boxes_list = [np.array([[0.000, 0.160, 0.362, 0.812],\n",
"# [0.340, 0.286, 0.472, 0.619],\n",
"# [0.437, 0.008, 0.650, 0.263],\n",
"# [0.382, 0.003, 0.538, 0.594],\n",
"# [0.518, 0.444, 0.625,0.554]], dtype=np.float32)]"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "Ak1WO93NjvN-"
},
"source": [
"# Visualize mask predictions"
]
},
{
"cell_type": "code",
"metadata": {
"id": "vdzuKnpj1A3L"
},
"source": [
"%matplotlib inline\n",
"\n",
"boxes = boxes_list[0]\n",
"masks = prediction_function(tf.convert_to_tensor(image),\n",
" tf.convert_to_tensor(boxes, dtype=tf.float32))\n",
"plot_image_annotations(image, boxes, masks.numpy())\n",
"plt.show()"
],
"execution_count": null,
"outputs": []
}
]
}
\ No newline at end of file
# DeepMAC meta architecture from the "The surprising impact of mask-head
# architecture on novel class segmentation" [1] paper with an Hourglass-100[2]
# mask head. This config is trained on all COCO classes and achieves a
# mask mAP of 39.4% on the COCO testdev-2017 set.
# [1]: https://arxiv.org/abs/2104.00613
# [2]: https://arxiv.org/abs/1904.07850
# Train on TPU-128
model {
center_net {
num_classes: 90
feature_extractor {
type: "hourglass_104"
bgr_ordering: true
channel_means: [104.01362025, 114.03422265, 119.9165958 ]
channel_stds: [73.6027665 , 69.89082075, 70.9150767 ]
}
image_resizer {
keep_aspect_ratio_resizer {
min_dimension: 1024
max_dimension: 1024
pad_to_max_dimension: true
}
}
object_detection_task {
task_loss_weight: 1.0
offset_loss_weight: 1.0
scale_loss_weight: 0.1
localization_loss {
l1_localization_loss {
}
}
}
object_center_params {
object_center_loss_weight: 1.0
min_box_overlap_iou: 0.7
max_box_predictions: 100
classification_loss {
penalty_reduced_logistic_focal_loss {
alpha: 2.0
beta: 4.0
}
}
}
deepmac_mask_estimation {
dim: 32
task_loss_weight: 5.0
pixel_embedding_dim: 16
mask_size: 32
use_xy: true
use_instance_embedding: true
network_type: "hourglass100"
classification_loss {
weighted_sigmoid {}
}
}
}
}
train_config: {
batch_size: 128
num_steps: 50000
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
random_adjust_hue {
}
}
data_augmentation_options {
random_adjust_contrast {
}
}
data_augmentation_options {
random_adjust_saturation {
}
}
data_augmentation_options {
random_adjust_brightness {
}
}
data_augmentation_options {
random_square_crop_by_scale {
scale_min: 0.6
scale_max: 1.3
}
}
optimizer {
adam_optimizer: {
epsilon: 1e-7 # Match tf.keras.optimizers.Adam's default.
learning_rate: {
cosine_decay_learning_rate {
learning_rate_base: 1e-3
total_steps: 50000
warmup_learning_rate: 2.5e-4
warmup_steps: 5000
}
}
}
use_moving_average: false
}
max_number_of_boxes: 100
unpad_groundtruth_tensors: false
fine_tune_checkpoint_version: V2
fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/ckpt-51"
fine_tune_checkpoint_type: "fine_tune"
}
train_input_reader: {
load_instance_masks: true
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
mask_type: PNG_MASKS
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
}
}
eval_config: {
metrics_set: "coco_detection_metrics"
metrics_set: "coco_mask_metrics"
include_metrics_per_category: true
use_moving_averages: false
batch_size: 1;
super_categories {
key: "VOC"
value: "person,bicycle,car,motorcycle,airplane,bus,train,boat,bird,cat,"
"dog,horse,sheep,cow,bottle,chair,couch,potted plant,dining table,tv"
}
super_categories {
key: "NonVOC"
value: "truck,traffic light,fire hydrant,stop sign,parking meter,bench,"
"elephant,bear,zebra,giraffe,backpack,umbrella,handbag,tie,suitcase,"
"frisbee,skis,snowboard,sports ball,kite,baseball bat,baseball glove,"
"skateboard,surfboard,tennis racket,wine glass,cup,fork,knife,spoon,bowl,"
"banana,apple,sandwich,orange,broccoli,carrot,hot dog,pizza,donut,cake,bed,"
"toilet,laptop,mouse,remote,keyboard,cell phone,microwave,oven,toaster,"
"sink,refrigerator,book,clock,vase,scissors,teddy bear,hair drier,"
"toothbrush"
}
super_categories {
key: "person"
value: "person"
}
super_categories {
key: "vehicle"
value: "bicycle,car,motorcycle,airplane,bus,train,truck,boat"
}
super_categories {
key: "outdoor"
value: "traffic light,fire hydrant,stop sign,parking meter,bench"
}
super_categories {
key: "animal"
value: "bird,cat,dog,horse,sheep,cow,elephant,bear,zebra,giraffe"
}
super_categories {
key: "accessory"
value: "backpack,umbrella,handbag,tie,suitcase"
}
super_categories {
key: "sports"
value: "frisbee,skis,snowboard,sports ball,kite,baseball bat,"
"baseball glove,skateboard,surfboard,tennis racket"
}
super_categories {
key: "kitchen"
value: "bottle,wine glass,cup,fork,knife,spoon,bowl"
}
super_categories {
key: "food"
value: "banana,apple,sandwich,orange,broccoli,carrot,hot dog,pizza,donut,"
"cake"
}
super_categories {
key: "furniture"
value: "chair,couch,potted plant,bed,dining table,toilet"
}
super_categories {
key: "electronic"
value: "tv,laptop,mouse,remote,keyboard,cell phone,microwave,oven,toaster,"
"sink,refrigerator"
}
super_categories {
key: "indoor"
value: "book,clock,vase,scissors,teddy bear,hair drier,toothbrush"
}
}
eval_input_reader: {
load_instance_masks: true
mask_type: PNG_MASKS
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
shuffle: false
num_epochs: 1
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
}
}
# DeepMAC meta architecture from the "The surprising impact of mask-head
# architecture on novel class segmentation" [1] paper with an Hourglass-100[2]
# mask head. This config is trained on masks from the non-VOC classes and
# achieves a mask mAP of 39.1% on the VOC classes.
# [1]: https://arxiv.org/abs/2104.00613
# [2]: https://arxiv.org/abs/1904.07850
# Train on TPU-128
model {
center_net {
num_classes: 90
feature_extractor {
type: "hourglass_104"
bgr_ordering: true
channel_means: [104.01362025, 114.03422265, 119.9165958 ]
channel_stds: [73.6027665 , 69.89082075, 70.9150767 ]
}
image_resizer {
keep_aspect_ratio_resizer {
min_dimension: 1024
max_dimension: 1024
pad_to_max_dimension: true
}
}
object_detection_task {
task_loss_weight: 1.0
offset_loss_weight: 1.0
scale_loss_weight: 0.1
localization_loss {
l1_localization_loss {
}
}
}
object_center_params {
object_center_loss_weight: 1.0
min_box_overlap_iou: 0.7
max_box_predictions: 100
classification_loss {
penalty_reduced_logistic_focal_loss {
alpha: 2.0
beta: 4.0
}
}
}
deepmac_mask_estimation {
dim: 32
task_loss_weight: 5.0
pixel_embedding_dim: 16
mask_size: 32
use_xy: true
use_instance_embedding: true
network_type: "hourglass100"
classification_loss {
weighted_sigmoid {}
}
allowed_masked_classes_ids: [
8,
10,
11,
13,
14,
15,
22,
23,
24,
25,
27,
28,
31,
32,
33,
34,
35,
36,
37,
38,
39,
40,
41,
42,
43,
46,
47,
48,
49,
50,
51,
52,
53,
54,
55,
56,
57,
58,
59,
60,
61,
65,
70,
73,
74,
75,
76,
77,
78,
79,
80,
81,
82,
84,
85,
86,
87,
88,
89,
90
]
}
}
}
train_config: {
batch_size: 128
num_steps: 50000
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
random_adjust_hue {
}
}
data_augmentation_options {
random_adjust_contrast {
}
}
data_augmentation_options {
random_adjust_saturation {
}
}
data_augmentation_options {
random_adjust_brightness {
}
}
data_augmentation_options {
random_square_crop_by_scale {
scale_min: 0.6
scale_max: 1.3
}
}
optimizer {
adam_optimizer: {
epsilon: 1e-7 # Match tf.keras.optimizers.Adam's default.
learning_rate: {
cosine_decay_learning_rate {
learning_rate_base: 1e-3
total_steps: 50000
warmup_learning_rate: 2.5e-4
warmup_steps: 5000
}
}
}
use_moving_average: false
}
max_number_of_boxes: 100
unpad_groundtruth_tensors: false
fine_tune_checkpoint_version: V2
fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/ckpt-51"
fine_tune_checkpoint_type: "fine_tune"
}
train_input_reader: {
load_instance_masks: true
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
mask_type: PNG_MASKS
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
}
}
eval_config: {
metrics_set: "coco_detection_metrics"
metrics_set: "coco_mask_metrics"
include_metrics_per_category: true
use_moving_averages: false
batch_size: 1;
super_categories {
key: "VOC"
value: "person,bicycle,car,motorcycle,airplane,bus,train,boat,bird,cat,"
"dog,horse,sheep,cow,bottle,chair,couch,potted plant,dining table,tv"
}
super_categories {
key: "NonVOC"
value: "truck,traffic light,fire hydrant,stop sign,parking meter,bench,"
"elephant,bear,zebra,giraffe,backpack,umbrella,handbag,tie,suitcase,"
"frisbee,skis,snowboard,sports ball,kite,baseball bat,baseball glove,"
"skateboard,surfboard,tennis racket,wine glass,cup,fork,knife,spoon,bowl,"
"banana,apple,sandwich,orange,broccoli,carrot,hot dog,pizza,donut,cake,bed,"
"toilet,laptop,mouse,remote,keyboard,cell phone,microwave,oven,toaster,"
"sink,refrigerator,book,clock,vase,scissors,teddy bear,hair drier,"
"toothbrush"
}
super_categories {
key: "person"
value: "person"
}
super_categories {
key: "vehicle"
value: "bicycle,car,motorcycle,airplane,bus,train,truck,boat"
}
super_categories {
key: "outdoor"
value: "traffic light,fire hydrant,stop sign,parking meter,bench"
}
super_categories {
key: "animal"
value: "bird,cat,dog,horse,sheep,cow,elephant,bear,zebra,giraffe"
}
super_categories {
key: "accessory"
value: "backpack,umbrella,handbag,tie,suitcase"
}
super_categories {
key: "sports"
value: "frisbee,skis,snowboard,sports ball,kite,baseball bat,"
"baseball glove,skateboard,surfboard,tennis racket"
}
super_categories {
key: "kitchen"
value: "bottle,wine glass,cup,fork,knife,spoon,bowl"
}
super_categories {
key: "food"
value: "banana,apple,sandwich,orange,broccoli,carrot,hot dog,pizza,donut,"
"cake"
}
super_categories {
key: "furniture"
value: "chair,couch,potted plant,bed,dining table,toilet"
}
super_categories {
key: "electronic"
value: "tv,laptop,mouse,remote,keyboard,cell phone,microwave,oven,toaster,"
"sink,refrigerator"
}
super_categories {
key: "indoor"
value: "book,clock,vase,scissors,teddy bear,hair drier,toothbrush"
}
}
eval_input_reader: {
load_instance_masks: true
mask_type: PNG_MASKS
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
shuffle: false
num_epochs: 1
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
}
}
# DeepMAC meta architecture from the "The surprising impact of mask-head
# architecture on novel class segmentation" [1] paper with an Hourglass-100[2]
# mask head. This config is only trained on masks from the VOC classes in COCO
# and achieves a mask mAP of 35.5% on non-VOC classes.
# [1]: https://arxiv.org/abs/2104.00613
# [2]: https://arxiv.org/abs/1904.07850
# Train on TPU-128
model {
center_net {
num_classes: 90
feature_extractor {
type: "hourglass_104"
bgr_ordering: true
channel_means: [104.01362025, 114.03422265, 119.9165958 ]
channel_stds: [73.6027665 , 69.89082075, 70.9150767 ]
}
image_resizer {
keep_aspect_ratio_resizer {
min_dimension: 1024
max_dimension: 1024
pad_to_max_dimension: true
}
}
object_detection_task {
task_loss_weight: 1.0
offset_loss_weight: 1.0
scale_loss_weight: 0.1
localization_loss {
l1_localization_loss {
}
}
}
object_center_params {
object_center_loss_weight: 1.0
min_box_overlap_iou: 0.7
max_box_predictions: 100
classification_loss {
penalty_reduced_logistic_focal_loss {
alpha: 2.0
beta: 4.0
}
}
}
deepmac_mask_estimation {
dim: 32
task_loss_weight: 5.0
pixel_embedding_dim: 16
mask_size: 32
use_xy: true
use_instance_embedding: true
network_type: "hourglass100"
classification_loss {
weighted_sigmoid {}
}
allowed_masked_classes_ids: [
1, # person
2, # bicycle
3, # car
4, # motorcycle/motorbike
5, # airplane/aeroplane,
6, # bus
7, # train
9, # boat
16, # bird
17, # cat
18, # dog
19, # horse
20, # sheep
21, # cow
44, # bottle
62, # chair
63, # couch/sofa
64, # potted plant
67, # dining table
72 # tvmonitor
]
}
}
}
train_config: {
batch_size: 128
num_steps: 50000
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
random_adjust_hue {
}
}
data_augmentation_options {
random_adjust_contrast {
}
}
data_augmentation_options {
random_adjust_saturation {
}
}
data_augmentation_options {
random_adjust_brightness {
}
}
data_augmentation_options {
random_square_crop_by_scale {
scale_min: 0.6
scale_max: 1.3
}
}
optimizer {
adam_optimizer: {
epsilon: 1e-7 # Match tf.keras.optimizers.Adam's default.
learning_rate: {
cosine_decay_learning_rate {
learning_rate_base: 1e-3
total_steps: 50000
warmup_learning_rate: 2.5e-4
warmup_steps: 5000
}
}
}
use_moving_average: false
}
max_number_of_boxes: 100
unpad_groundtruth_tensors: false
fine_tune_checkpoint_version: V2
fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/ckpt-51"
fine_tune_checkpoint_type: "fine_tune"
}
train_input_reader: {
load_instance_masks: true
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
mask_type: PNG_MASKS
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
}
}
eval_config: {
metrics_set: "coco_detection_metrics"
metrics_set: "coco_mask_metrics"
include_metrics_per_category: true
use_moving_averages: false
batch_size: 1;
super_categories {
key: "VOC"
value: "person,bicycle,car,motorcycle,airplane,bus,train,boat,bird,cat,"
"dog,horse,sheep,cow,bottle,chair,couch,potted plant,dining table,tv"
}
super_categories {
key: "NonVOC"
value: "truck,traffic light,fire hydrant,stop sign,parking meter,bench,"
"elephant,bear,zebra,giraffe,backpack,umbrella,handbag,tie,suitcase,"
"frisbee,skis,snowboard,sports ball,kite,baseball bat,baseball glove,"
"skateboard,surfboard,tennis racket,wine glass,cup,fork,knife,spoon,bowl,"
"banana,apple,sandwich,orange,broccoli,carrot,hot dog,pizza,donut,cake,bed,"
"toilet,laptop,mouse,remote,keyboard,cell phone,microwave,oven,toaster,"
"sink,refrigerator,book,clock,vase,scissors,teddy bear,hair drier,"
"toothbrush"
}
super_categories {
key: "person"
value: "person"
}
super_categories {
key: "vehicle"
value: "bicycle,car,motorcycle,airplane,bus,train,truck,boat"
}
super_categories {
key: "outdoor"
value: "traffic light,fire hydrant,stop sign,parking meter,bench"
}
super_categories {
key: "animal"
value: "bird,cat,dog,horse,sheep,cow,elephant,bear,zebra,giraffe"
}
super_categories {
key: "accessory"
value: "backpack,umbrella,handbag,tie,suitcase"
}
super_categories {
key: "sports"
value: "frisbee,skis,snowboard,sports ball,kite,baseball bat,"
"baseball glove,skateboard,surfboard,tennis racket"
}
super_categories {
key: "kitchen"
value: "bottle,wine glass,cup,fork,knife,spoon,bowl"
}
super_categories {
key: "food"
value: "banana,apple,sandwich,orange,broccoli,carrot,hot dog,pizza,donut,"
"cake"
}
super_categories {
key: "furniture"
value: "chair,couch,potted plant,bed,dining table,toilet"
}
super_categories {
key: "electronic"
value: "tv,laptop,mouse,remote,keyboard,cell phone,microwave,oven,toaster,"
"sink,refrigerator"
}
super_categories {
key: "indoor"
value: "book,clock,vase,scissors,teddy bear,hair drier,toothbrush"
}
}
eval_input_reader: {
load_instance_masks: true
mask_type: PNG_MASKS
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
shuffle: false
num_epochs: 1
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
}
}
# DeepMAC meta architecture from the "The surprising impact of mask-head
# architecture on novel class segmentation" [1] paper with an Hourglass-52[2]
# mask head. This config is only trained on masks from the VOC classes in COCO
# and achieves a mask mAP of 32.5% on non-VOC classes.
# [1]: https://arxiv.org/abs/2104.00613
# [2]: https://arxiv.org/abs/1904.07850
# Train on TPU-32
model {
center_net {
num_classes: 90
feature_extractor {
type: "hourglass_104"
bgr_ordering: true
channel_means: [104.01362025, 114.03422265, 119.9165958 ]
channel_stds: [73.6027665 , 69.89082075, 70.9150767 ]
}
image_resizer {
keep_aspect_ratio_resizer {
min_dimension: 512
max_dimension: 512
pad_to_max_dimension: true
}
}
object_detection_task {
task_loss_weight: 1.0
offset_loss_weight: 1.0
scale_loss_weight: 0.1
localization_loss {
l1_localization_loss {
}
}
}
object_center_params {
object_center_loss_weight: 1.0
min_box_overlap_iou: 0.7
max_box_predictions: 100
classification_loss {
penalty_reduced_logistic_focal_loss {
alpha: 2.0
beta: 4.0
}
}
}
deepmac_mask_estimation {
dim: 32
task_loss_weight: 5.0
pixel_embedding_dim: 16
mask_size: 32
use_xy: true
use_instance_embedding: true
network_type: "hourglass52"
classification_loss {
weighted_sigmoid {}
}
allowed_masked_classes_ids: [
1, # person
2, # bicycle
3, # car
4, # motorcycle/motorbike
5, # airplane/aeroplane,
6, # bus
7, # train
9, # boat
16, # bird
17, # cat
18, # dog
19, # horse
20, # sheep
21, # cow
44, # bottle
62, # chair
63, # couch/sofa
64, # potted plant
67, # dining table
72 # tvmonitor
]
}
}
}
train_config: {
batch_size: 128
num_steps: 50000
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
random_adjust_hue {
}
}
data_augmentation_options {
random_adjust_contrast {
}
}
data_augmentation_options {
random_adjust_saturation {
}
}
data_augmentation_options {
random_adjust_brightness {
}
}
data_augmentation_options {
random_square_crop_by_scale {
scale_min: 0.6
scale_max: 1.3
}
}
optimizer {
adam_optimizer: {
epsilon: 1e-7 # Match tf.keras.optimizers.Adam's default.
learning_rate: {
cosine_decay_learning_rate {
learning_rate_base: 1e-3
total_steps: 50000
warmup_learning_rate: 2.5e-4
warmup_steps: 5000
}
}
}
use_moving_average: false
}
max_number_of_boxes: 100
unpad_groundtruth_tensors: false
fine_tune_checkpoint_version: V2
fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/ckpt-1"
fine_tune_checkpoint_type: "detection"
}
train_input_reader: {
load_instance_masks: true
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
mask_type: PNG_MASKS
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
}
}
eval_config: {
metrics_set: "coco_detection_metrics"
metrics_set: "coco_mask_metrics"
include_metrics_per_category: true
use_moving_averages: false
batch_size: 1;
super_categories {
key: "VOC"
value: "person,bicycle,car,motorcycle,airplane,bus,train,boat,bird,cat,"
"dog,horse,sheep,cow,bottle,chair,couch,potted plant,dining table,tv"
}
super_categories {
key: "NonVOC"
value: "truck,traffic light,fire hydrant,stop sign,parking meter,bench,"
"elephant,bear,zebra,giraffe,backpack,umbrella,handbag,tie,suitcase,"
"frisbee,skis,snowboard,sports ball,kite,baseball bat,baseball glove,"
"skateboard,surfboard,tennis racket,wine glass,cup,fork,knife,spoon,bowl,"
"banana,apple,sandwich,orange,broccoli,carrot,hot dog,pizza,donut,cake,bed,"
"toilet,laptop,mouse,remote,keyboard,cell phone,microwave,oven,toaster,"
"sink,refrigerator,book,clock,vase,scissors,teddy bear,hair drier,"
"toothbrush"
}
super_categories {
key: "person"
value: "person"
}
super_categories {
key: "vehicle"
value: "bicycle,car,motorcycle,airplane,bus,train,truck,boat"
}
super_categories {
key: "outdoor"
value: "traffic light,fire hydrant,stop sign,parking meter,bench"
}
super_categories {
key: "animal"
value: "bird,cat,dog,horse,sheep,cow,elephant,bear,zebra,giraffe"
}
super_categories {
key: "accessory"
value: "backpack,umbrella,handbag,tie,suitcase"
}
super_categories {
key: "sports"
value: "frisbee,skis,snowboard,sports ball,kite,baseball bat,"
"baseball glove,skateboard,surfboard,tennis racket"
}
super_categories {
key: "kitchen"
value: "bottle,wine glass,cup,fork,knife,spoon,bowl"
}
super_categories {
key: "food"
value: "banana,apple,sandwich,orange,broccoli,carrot,hot dog,pizza,donut,"
"cake"
}
super_categories {
key: "furniture"
value: "chair,couch,potted plant,bed,dining table,toilet"
}
super_categories {
key: "electronic"
value: "tv,laptop,mouse,remote,keyboard,cell phone,microwave,oven,toaster,"
"sink,refrigerator"
}
super_categories {
key: "indoor"
value: "book,clock,vase,scissors,teddy bear,hair drier,toothbrush"
}
}
eval_input_reader: {
load_instance_masks: true
mask_type: PNG_MASKS
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
shuffle: false
num_epochs: 1
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
}
}
# DeepMAC model
<!-- TODO(vighneshb) add correct arxiv links and test this page.-->
**DeepMAC** (Deep Mask heads Above CenterNet) is a neural network architecture
that is designed for the partially supervised instance segmentation task. For
details see the
[The surprising impact of mask-head architecture on novel class segmentation](https://arxiv.org/abs/2104.00613)
paper. The figure below shows improved mask predictions for unseen classes as we
use better mask-head architectures.
<p align="center">
<img src="./img/mask_improvement.png" style="width:50%;"/>
</p>
Just by using better mask-head architectures (no extra losses or modules) we
achieve state-of-the-art performance in the partially supervised instance
segmentation task.
## Code structure
* `deepmac_meta_arch.py` implements our main architecture, DeepMAC, on top of
the CenterNet detection architecture.
* The proto message `DeepMACMaskEstimation` in `center_net.proto` controls the
configutation of the mask head used.
* The field `allowed_masked_classes_ids` controls which classes recieve mask
supervision during training.
* Mask R-CNN based ablations in the paper are implemented in the
[TF model garden]() code base.
## Prerequisites
1. Follow [TF2 install instructions](tf2.md) to install Object Detection API.
2. Generate COCO dataset by using
[create_coco_tf_record.py](../../official/vision/beta/data/create_coco_tf_record.py)
## Configurations
We provide pre-defined configs which can be run as a
[TF2 training pipeline](tf2_training_and_evaluation.md). Each of these
configurations needs to be passed as the `pipeline_config_path` argument to the
`object_detection/model_main_tf2.py` binary. Note that the `512x512` resolution
models require a TPU `v3-32` and the `1024x1024` resolution models require a TPU
`v3-128` to train. The configs can be found in the [configs/tf2](../configs/tf2)
directory. In the table below `X->Y` indicates that we train with masks from `X`
and evaluate with masks from `Y`. Performance is measured on the `coco-val2017`
set.
### Partially supervised models
Resolution | Mask head | Train->Eval | Config name | Mask mAP
:--------- | :------------ | :------------- | :------------------------------------------------- | -------:
512x512 | Hourglass-52 | VOC -> Non-VOC | `center_net_deepmac_512x512_voc_only.config` | 32.5
1024x1024 | Hourglass-100 | VOC -> Non-VOC | `center_net_deepmac_1024x1024_voc_only.config` | 35.5
1024x1024 | Hourglass-100 | Non-VOC -> VOC | `center_net_deepmac_1024x1024_non_voc_only.config` | 39.1
### Fully supervised models
Here we report the Mask mAP averaged over all COCO classes on the `test-dev2017`
set .
Resolution | Mask head | Config name | Mask mAP
:--------- | :------------ | :----------------------------------------- | -------:
1024x1024 | Hourglass-100 | `center_net_deepmac_1024x1024_coco.config` | 39.4
## Demos
* [DeepMAC Colab](../colab_tutorials/deepmac_colab.ipynb) lets you run a
pre-trained DeepMAC model on user-specified boxes. Note that you are not
restricted to COCO classes!
## Pre-trained models
* [COCO Checkpoint](http://download.tensorflow.org/models/object_detection/tf2/20210329/deepmac_1024x1024_coco17.tar.gz) -
Takes as input Image + Boxes and produces per-box instance masks as output.
## See also
* [Mask RCNN code](https://github.com/tensorflow/models/tree/master/official/vision/beta/projects/deepmac_maskrcnn)
in TF Model garden code base.
* Project website - [git.io/deepmac](https://git.io/deepmac)
## Citation
```
@misc{birodkar2021surprising,
title={The surprising impact of mask-head architecture on novel class segmentation},
author={Vighnesh Birodkar and Zhichao Lu and Siyang Li and Vivek Rathod and Jonathan Huang},
year={2021},
eprint={2104.00613},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
```
"""Tests for google3.third_party.tensorflow_models.object_detection.meta_architectures.deepmac_meta_arch."""
import functools
import unittest
from absl.testing import parameterized
import numpy as np
import tensorflow as tf
from object_detection.core import losses
from object_detection.core import preprocessor
from object_detection.meta_architectures import center_net_meta_arch
from object_detection.meta_architectures import deepmac_meta_arch
from object_detection.utils import tf_version
class DummyFeatureExtractor(center_net_meta_arch.CenterNetFeatureExtractor):
def __init__(self,
channel_means,
channel_stds,
bgr_ordering,
num_feature_outputs,
stride):
self._num_feature_outputs = num_feature_outputs
self._stride = stride
super(DummyFeatureExtractor, self).__init__(
channel_means=channel_means, channel_stds=channel_stds,
bgr_ordering=bgr_ordering)
def predict(self):
pass
def loss(self):
pass
def postprocess(self):
pass
def call(self, inputs):
batch_size, input_height, input_width, _ = inputs.shape
fake_output = tf.ones([
batch_size, input_height // self._stride, input_width // self._stride,
64
], dtype=tf.float32)
return [fake_output] * self._num_feature_outputs
@property
def out_stride(self):
return self._stride
@property
def num_feature_outputs(self):
return self._num_feature_outputs
class MockMaskNet(tf.keras.layers.Layer):
def __call__(self, instance_embedding, pixel_embedding, training):
return tf.zeros_like(pixel_embedding[:, :, :, 0]) + 0.9
def build_meta_arch(predict_full_resolution_masks=False, use_dice_loss=False):
"""Builds the DeepMAC meta architecture."""
feature_extractor = DummyFeatureExtractor(
channel_means=(1.0, 2.0, 3.0),
channel_stds=(10., 20., 30.),
bgr_ordering=False,
num_feature_outputs=2,
stride=4)
image_resizer_fn = functools.partial(
preprocessor.resize_to_range,
min_dimension=128,
max_dimension=128,
pad_to_max_dimesnion=True)
object_center_params = center_net_meta_arch.ObjectCenterParams(
classification_loss=losses.WeightedSigmoidClassificationLoss(),
object_center_loss_weight=1.0,
min_box_overlap_iou=1.0,
max_box_predictions=5,
use_labeled_classes=False)
if use_dice_loss:
classification_loss = losses.WeightedDiceClassificationLoss(False)
else:
classification_loss = losses.WeightedSigmoidClassificationLoss()
deepmac_params = deepmac_meta_arch.DeepMACParams(
classification_loss=classification_loss,
dim=8,
task_loss_weight=1.0,
pixel_embedding_dim=2,
allowed_masked_classes_ids=[],
mask_size=16,
mask_num_subsamples=-1,
use_xy=True,
network_type='hourglass10',
use_instance_embedding=True,
num_init_channels=8,
predict_full_resolution_masks=predict_full_resolution_masks,
postprocess_crop_size=128
)
object_detection_params = center_net_meta_arch.ObjectDetectionParams(
localization_loss=losses.L1LocalizationLoss(),
offset_loss_weight=1.0,
scale_loss_weight=0.1
)
return deepmac_meta_arch.DeepMACMetaArch(
is_training=True,
add_summaries=False,
num_classes=6,
feature_extractor=feature_extractor,
object_center_params=object_center_params,
deepmac_params=deepmac_params,
object_detection_params=object_detection_params,
image_resizer_fn=image_resizer_fn)
@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
class DeepMACUtilsTest(tf.test.TestCase):
def test_subsample_trivial(self):
"""Test subsampling masks."""
boxes = np.arange(4).reshape(4, 1) * np.ones((4, 4))
masks = np.arange(4).reshape(4, 1, 1) * np.ones((4, 32, 32))
weights = np.ones(4)
classes = tf.one_hot(tf.range(4), depth=4)
result = deepmac_meta_arch.subsample_instances(
classes, weights, boxes, masks, 4)
self.assertAllClose(result[0], classes)
self.assertAllClose(result[1], weights)
self.assertAllClose(result[2], boxes)
self.assertAllClose(result[3], masks)
@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
class DeepMACMetaArchTest(tf.test.TestCase):
def setUp(self): # pylint:disable=g-missing-super-call
self.model = build_meta_arch()
def test_mask_network(self):
net = deepmac_meta_arch.MaskHeadNetwork('hourglass10', 8)
out = net(tf.zeros((2, 4)), tf.zeros((2, 32, 32, 16)), training=True)
self.assertEqual(out.shape, (2, 32, 32))
def test_mask_network_hourglass20(self):
net = deepmac_meta_arch.MaskHeadNetwork('hourglass20', 8)
out = net(tf.zeros((2, 4)), tf.zeros((2, 32, 32, 16)), training=True)
self.assertEqual(out.shape, (2, 32, 32))
def test_mask_network_resnet(self):
net = deepmac_meta_arch.MaskHeadNetwork('resnet4')
out = net(tf.zeros((2, 4)), tf.zeros((2, 32, 32, 16)), training=True)
self.assertEqual(out.shape, (2, 32, 32))
def test_mask_network_resnet_tf_function(self):
net = deepmac_meta_arch.MaskHeadNetwork('resnet8')
call_func = tf.function(net.__call__)
out = call_func(tf.zeros((2, 4)), tf.zeros((2, 32, 32, 16)), training=True)
self.assertEqual(out.shape, (2, 32, 32))
def test_get_mask_head_input(self):
boxes = tf.constant([[0., 0., 0.25, 0.25], [0.75, 0.75, 1.0, 1.0]],
dtype=tf.float32)
pixel_embedding = np.zeros((32, 32, 4), dtype=np.float32)
pixel_embedding[:16, :16] = 1.0
pixel_embedding[16:, 16:] = 2.0
pixel_embedding = tf.constant(pixel_embedding)
mask_inputs = self.model._get_mask_head_input(boxes, pixel_embedding)
self.assertEqual(mask_inputs.shape, (2, 16, 16, 6))
y_grid, x_grid = tf.meshgrid(np.linspace(-1.0, 1.0, 16),
np.linspace(-1.0, 1.0, 16), indexing='ij')
for i in range(2):
mask_input = mask_inputs[i]
self.assertAllClose(y_grid, mask_input[:, :, 0])
self.assertAllClose(x_grid, mask_input[:, :, 1])
pixel_embedding = mask_input[:, :, 2:]
self.assertAllClose(np.zeros((16, 16, 4)) + i + 1, pixel_embedding)
def test_get_mask_head_input_no_crop_resize(self):
model = build_meta_arch(predict_full_resolution_masks=True)
boxes = tf.constant([[0., 0., 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
dtype=tf.float32)
pixel_embedding_np = np.random.randn(32, 32, 4).astype(np.float32)
pixel_embedding = tf.constant(pixel_embedding_np)
mask_inputs = model._get_mask_head_input(boxes, pixel_embedding)
self.assertEqual(mask_inputs.shape, (2, 32, 32, 6))
y_grid, x_grid = tf.meshgrid(np.linspace(-1.0, 1.0, 32),
np.linspace(-1.0, 1.0, 32), indexing='ij')
for i in range(2):
mask_input = mask_inputs[i]
self.assertAllClose(y_grid, mask_input[:, :, 0])
self.assertAllClose(x_grid, mask_input[:, :, 1])
pixel_embedding = mask_input[:, :, 2:]
self.assertAllClose(pixel_embedding_np, pixel_embedding)
def test_get_instance_embeddings(self):
embeddings = np.zeros((32, 32, 2))
embeddings[8, 8] = 1.0
embeddings[24, 16] = 2.0
embeddings = tf.constant(embeddings)
boxes = tf.constant([[0., 0., 0.5, 0.5], [0.5, 0.0, 1.0, 1.0]])
center_embeddings = self.model._get_instance_embeddings(boxes, embeddings)
self.assertAllClose(center_embeddings, [[1.0, 1.0], [2.0, 2.0]])
def test_get_groundtruth_mask_output(self):
boxes = tf.constant([[0., 0., 0.25, 0.25], [0.75, 0.75, 1.0, 1.0]],
dtype=tf.float32)
masks = np.zeros((2, 32, 32), dtype=np.float32)
masks[0, :16, :16] = 0.5
masks[1, 16:, 16:] = 0.1
masks = self.model._get_groundtruth_mask_output(boxes, masks)
self.assertEqual(masks.shape, (2, 16, 16))
self.assertAllClose(masks[0], np.zeros((16, 16)) + 0.5)
self.assertAllClose(masks[1], np.zeros((16, 16)) + 0.1)
def test_get_groundtruth_mask_output_crop_resize(self):
model = build_meta_arch(predict_full_resolution_masks=True)
boxes = tf.constant([[0., 0., 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
dtype=tf.float32)
masks = tf.ones((2, 32, 32))
masks = model._get_groundtruth_mask_output(boxes, masks)
self.assertAllClose(masks, np.ones((2, 32, 32)))
def test_per_instance_loss(self):
model = build_meta_arch()
model._mask_net = MockMaskNet()
boxes = tf.constant([[0.0, 0.0, 0.25, 0.25], [0.75, 0.75, 1.0, 1.0]])
masks = np.zeros((2, 32, 32), dtype=np.float32)
masks[0, :16, :16] = 1.0
masks[1, 16:, 16:] = 1.0
masks = tf.constant(masks)
loss = model._compute_per_instance_mask_loss(
boxes, masks, tf.zeros((32, 32, 2)), tf.zeros((32, 32, 2)))
self.assertAllClose(
loss, np.zeros(2) - tf.math.log(tf.nn.sigmoid(0.9)))
def test_per_instance_loss_no_crop_resize(self):
model = build_meta_arch(predict_full_resolution_masks=True)
model._mask_net = MockMaskNet()
boxes = tf.constant([[0.0, 0.0, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0]])
masks = np.ones((2, 128, 128), dtype=np.float32)
masks = tf.constant(masks)
loss = model._compute_per_instance_mask_loss(
boxes, masks, tf.zeros((32, 32, 2)), tf.zeros((32, 32, 2)))
self.assertAllClose(
loss, np.zeros(2) - tf.math.log(tf.nn.sigmoid(0.9)))
def test_per_instance_loss_no_crop_resize_dice(self):
model = build_meta_arch(predict_full_resolution_masks=True,
use_dice_loss=True)
model._mask_net = MockMaskNet()
boxes = tf.constant([[0.0, 0.0, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0]])
masks = np.ones((2, 128, 128), dtype=np.float32)
masks = tf.constant(masks)
loss = model._compute_per_instance_mask_loss(
boxes, masks, tf.zeros((32, 32, 2)), tf.zeros((32, 32, 2)))
pred = tf.nn.sigmoid(0.9)
expected = (1.0 - ((2.0 * pred) / (1.0 + pred)))
self.assertAllClose(loss, [expected, expected], rtol=1e-3)
def test_empty_masks(self):
boxes = tf.zeros([0, 4])
masks = tf.zeros([0, 128, 128])
loss = self.model._compute_per_instance_mask_loss(
boxes, masks, tf.zeros((32, 32, 2)), tf.zeros((32, 32, 2)))
self.assertEqual(loss.shape, (0,))
def test_postprocess(self):
model = build_meta_arch()
model._mask_net = MockMaskNet()
boxes = np.zeros((2, 3, 4), dtype=np.float32)
boxes[:, :, [0, 2]] = 0.0
boxes[:, :, [1, 3]] = 8.0
boxes = tf.constant(boxes)
masks = model._postprocess_masks(
boxes, tf.zeros((2, 32, 32, 2)), tf.zeros((2, 32, 32, 2)))
prob = tf.nn.sigmoid(0.9).numpy()
self.assertAllClose(masks, prob * np.ones((2, 3, 16, 16)))
def test_postprocess_no_crop_resize_shape(self):
model = build_meta_arch(predict_full_resolution_masks=True)
model._mask_net = MockMaskNet()
boxes = np.zeros((2, 3, 4), dtype=np.float32)
boxes[:, :, [0, 2]] = 0.0
boxes[:, :, [1, 3]] = 8.0
boxes = tf.constant(boxes)
masks = model._postprocess_masks(
boxes, tf.zeros((2, 32, 32, 2)), tf.zeros((2, 32, 32, 2)))
prob = tf.nn.sigmoid(0.9).numpy()
self.assertAllClose(masks, prob * np.ones((2, 3, 128, 128)))
def test_crop_masks_within_boxes(self):
masks = np.zeros((2, 32, 32))
masks[0, :16, :16] = 1.0
masks[1, 16:, 16:] = 1.0
boxes = tf.constant([[0.0, 0.0, 15.0 / 32, 15.0 / 32],
[0.5, 0.5, 1.0, 1]])
masks = deepmac_meta_arch.crop_masks_within_boxes(
masks, boxes, 128)
masks = (masks.numpy() > 0.0).astype(np.float32)
self.assertAlmostEqual(masks.sum(), 2 * 128 * 128)
def test_transform_boxes_to_feature_coordinates(self):
batch_size = 2
model = build_meta_arch()
model._mask_net = MockMaskNet()
boxes = np.zeros((batch_size, 3, 4), dtype=np.float32)
boxes[:, :, [0, 2]] = 0.1
boxes[:, :, [1, 3]] = 0.5
boxes = tf.constant(boxes)
true_image_shapes = tf.constant([
[64, 32, 3], # Image 1 is padded during resizing.
[64, 64, 3], # Image 2 is not padded.
])
resized_image_height = 64
resized_image_width = 64
resized_image_shape = [
batch_size, resized_image_height, resized_image_width, 3
]
feature_map_height = 32
feature_map_width = 32
instance_embedding = tf.zeros(
(batch_size, feature_map_height, feature_map_width, 2))
expected_boxes = np.array([
[ # Image 1
# 0.1 * (64 / resized_image_height) * feature_map_height -> 3.2
# 0.5 * (32 / resized_image_width) * feature_map_width -> 8.0
[3.2, 8., 3.2, 8.],
[3.2, 8., 3.2, 8.],
[3.2, 8., 3.2, 8.],
],
[ # Image 2
# 0.1 * (64 / resized_image_height) * feature_map_height -> 3.2
# 0.5 * (64 / resized_image_width) * feature_map_width -> 16
[3.2, 16., 3.2, 16.],
[3.2, 16., 3.2, 16.],
[3.2, 16., 3.2, 16.],
],
])
box_strided = model._transform_boxes_to_feature_coordinates(
boxes, true_image_shapes, resized_image_shape, instance_embedding)
self.assertAllClose(box_strided, expected_boxes)
def test_fc_tf_function(self):
net = deepmac_meta_arch.MaskHeadNetwork('fully_connected', 8, mask_size=32)
call_func = tf.function(net.__call__)
out = call_func(tf.zeros((2, 4)), tf.zeros((2, 32, 32, 8)), training=True)
self.assertEqual(out.shape, (2, 32, 32))
@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
class FullyConnectedMaskHeadTest(tf.test.TestCase):
def test_fc_mask_head(self):
head = deepmac_meta_arch.FullyConnectedMaskHead(512, 16)
inputs = tf.random.uniform([100, 16, 16, 512])
output = head(inputs)
self.assertAllEqual([100, 16, 16, 1], output.numpy().shape)
@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
class ResNetMaskHeadTest(tf.test.TestCase, parameterized.TestCase):
@parameterized.parameters(['resnet4', 'resnet8', 'resnet20'])
def test_pass(self, name):
net = deepmac_meta_arch.ResNetMaskNetwork(name, 8)
out = net(tf.zeros((3, 32, 32, 16)))
self.assertEqual(out.shape[:3], (3, 32, 32))
if __name__ == '__main__':
tf.test.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment