Commit 61d6173d authored by Pengchong Jin's avatar Pengchong Jin Committed by A. Unique TensorFlower
Browse files

Clean up the mask utils.

PiperOrigin-RevId: 282825837
parent bd73fdfe
...@@ -25,103 +25,13 @@ from absl import logging ...@@ -25,103 +25,13 @@ from absl import logging
import numpy as np import numpy as np
from PIL import Image from PIL import Image
from pycocotools import coco from pycocotools import coco
from pycocotools import mask as mask_utils from pycocotools import mask as mask_api
import six import six
import tensorflow.compat.v2 as tf import tensorflow.compat.v2 as tf
from official.vision.detection.dataloader import tf_example_decoder from official.vision.detection.dataloader import tf_example_decoder
from official.vision.detection.utils import box_utils from official.vision.detection.utils import box_utils
from official.vision.detection.utils import mask_utils
import cv2
def generate_segmentation_from_masks(masks,
detected_boxes,
image_height,
image_width,
is_image_mask=False):
"""Generates segmentation result from instance masks.
Args:
masks: a numpy array of shape [N, mask_height, mask_width] representing the
instance masks w.r.t. the `detected_boxes`.
detected_boxes: a numpy array of shape [N, 4] representing the reference
bounding boxes.
image_height: an integer representing the height of the image.
image_width: an integer representing the width of the image.
is_image_mask: bool. True: input masks are whole-image masks. False: input
masks are bounding-box level masks.
Returns:
segms: a numpy array of shape [N, image_height, image_width] representing
the instance masks *pasted* on the image canvas.
"""
def expand_boxes(boxes, scale):
"""Expands an array of boxes by a given scale."""
# Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/utils/boxes.py#L227 # pylint: disable=line-too-long
# The `boxes` in the reference implementation is in [x1, y1, x2, y2] form,
# whereas `boxes` here is in [x1, y1, w, h] form
w_half = boxes[:, 2] * .5
h_half = boxes[:, 3] * .5
x_c = boxes[:, 0] + w_half
y_c = boxes[:, 1] + h_half
w_half *= scale
h_half *= scale
boxes_exp = np.zeros(boxes.shape)
boxes_exp[:, 0] = x_c - w_half
boxes_exp[:, 2] = x_c + w_half
boxes_exp[:, 1] = y_c - h_half
boxes_exp[:, 3] = y_c + h_half
return boxes_exp
# Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/core/test.py#L812 # pylint: disable=line-too-long
# To work around an issue with cv2.resize (it seems to automatically pad
# with repeated border values), we manually zero-pad the masks by 1 pixel
# prior to resizing back to the original image resolution. This prevents
# "top hat" artifacts. We therefore need to expand the reference boxes by an
# appropriate factor.
_, mask_height, mask_width = masks.shape
scale = max((mask_width + 2.0) / mask_width,
(mask_height + 2.0) / mask_height)
ref_boxes = expand_boxes(detected_boxes, scale)
ref_boxes = ref_boxes.astype(np.int32)
padded_mask = np.zeros((mask_height + 2, mask_width + 2), dtype=np.float32)
segms = []
for mask_ind, mask in enumerate(masks):
im_mask = np.zeros((image_height, image_width), dtype=np.uint8)
if is_image_mask:
# Process whole-image masks.
im_mask[:, :] = mask[:, :]
else:
# Process mask inside bounding boxes.
padded_mask[1:-1, 1:-1] = mask[:, :]
ref_box = ref_boxes[mask_ind, :]
w = ref_box[2] - ref_box[0] + 1
h = ref_box[3] - ref_box[1] + 1
w = np.maximum(w, 1)
h = np.maximum(h, 1)
mask = cv2.resize(padded_mask, (w, h))
mask = np.array(mask > 0.5, dtype=np.uint8)
x_0 = max(ref_box[0], 0)
x_1 = min(ref_box[2] + 1, image_width)
y_0 = max(ref_box[1], 0)
y_1 = min(ref_box[3] + 1, image_height)
im_mask[y_0:y_1, x_0:x_1] = mask[(y_0 - ref_box[1]):(y_1 - ref_box[1]),
(x_0 - ref_box[0]):(x_1 - ref_box[0])]
segms.append(im_mask)
segms = np.array(segms)
assert masks.shape[0] == segms.shape[0]
return segms
class COCOWrapper(coco.COCO): class COCOWrapper(coco.COCO):
...@@ -189,7 +99,7 @@ class COCOWrapper(coco.COCO): ...@@ -189,7 +99,7 @@ class COCOWrapper(coco.COCO):
ann['segmentation'] = [ ann['segmentation'] = [
[x1, y1, x1, y2, x2, y2, x2, y1]] [x1, y1, x1, y2, x2, y2, x2, y1]]
elif self._eval_type == 'mask': elif self._eval_type == 'mask':
ann['area'] = mask_utils.area(ann['segmentation']) ann['area'] = mask_api.area(ann['segmentation'])
res.dataset['annotations'] = copy.deepcopy(predictions) res.dataset['annotations'] = copy.deepcopy(predictions)
res.createIndex() res.createIndex()
...@@ -237,17 +147,15 @@ def convert_predictions_to_coco_annotations(predictions): ...@@ -237,17 +147,15 @@ def convert_predictions_to_coco_annotations(predictions):
for j in range(batch_size): for j in range(batch_size):
if 'detection_masks' in predictions: if 'detection_masks' in predictions:
image_masks = generate_segmentation_from_masks( image_masks = mask_utils.paste_instance_masks(
predictions['detection_masks'][i][j], predictions['detection_masks'][i][j],
mask_boxes[i][j], mask_boxes[i][j],
int(predictions['image_info'][i][j, 0, 0]), int(predictions['image_info'][i][j, 0, 0]),
int(predictions['image_info'][i][j, 0, 1]), int(predictions['image_info'][i][j, 0, 1]))
is_image_mask=False)
binary_masks = (image_masks > 0.0).astype(np.uint8) binary_masks = (image_masks > 0.0).astype(np.uint8)
encoded_masks = [ encoded_masks = [
mask_utils.encode(np.asfortranarray(binary_mask)) mask_api.encode(np.asfortranarray(binary_mask))
for binary_mask in list(binary_masks) for binary_mask in list(binary_masks)]
]
for k in range(max_num_detections): for k in range(max_num_detections):
ann = {} ann = {}
ann['image_id'] = predictions['source_id'][i][j] ann['image_id'] = predictions['source_id'][i][j]
...@@ -334,10 +242,10 @@ def convert_groundtruths_to_coco_dataset(groundtruths, label_map=None): ...@@ -334,10 +242,10 @@ def convert_groundtruths_to_coco_dataset(groundtruths, label_map=None):
np_mask = ( np_mask = (
np.array(mask.getdata()).reshape(height, width).astype(np.uint8)) np.array(mask.getdata()).reshape(height, width).astype(np.uint8))
np_mask[np_mask > 0] = 255 np_mask[np_mask > 0] = 255
encoded_mask = mask_utils.encode(np.asfortranarray(np_mask)) encoded_mask = mask_api.encode(np.asfortranarray(np_mask))
ann['segmentation'] = encoded_mask ann['segmentation'] = encoded_mask
if 'areas' not in groundtruths: if 'areas' not in groundtruths:
ann['area'] = mask_utils.area(encoded_mask) ann['area'] = mask_api.area(encoded_mask)
gt_annotations.append(ann) gt_annotations.append(ann)
for i, ann in enumerate(gt_annotations): for i, ann in enumerate(gt_annotations):
......
# Copyright 2019 The TensorFlow Authors. All Rights Reserved. # Copyright 2018 The TensorFlow Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
...@@ -21,8 +21,24 @@ import numpy as np ...@@ -21,8 +21,24 @@ import numpy as np
import cv2 import cv2
def segm_results(masks, detections, image_height, image_width): def paste_instance_masks(masks,
"""Generates segmentation results.""" detected_boxes,
image_height,
image_width):
"""Paste instance masks to generate the image segmentation results.
Args:
masks: a numpy array of shape [N, mask_height, mask_width] representing the
instance masks w.r.t. the `detected_boxes`.
detected_boxes: a numpy array of shape [N, 4] representing the reference
bounding boxes.
image_height: an integer representing the height of the image.
image_width: an integer representing the width of the image.
Returns:
segms: a numpy array of shape [N, image_height, image_width] representing
the instance masks *pasted* on the image canvas.
"""
def expand_boxes(boxes, scale): def expand_boxes(boxes, scale):
"""Expands an array of boxes by a given scale.""" """Expands an array of boxes by a given scale."""
...@@ -51,14 +67,17 @@ def segm_results(masks, detections, image_height, image_width): ...@@ -51,14 +67,17 @@ def segm_results(masks, detections, image_height, image_width):
# prior to resizing back to the original image resolution. This prevents # prior to resizing back to the original image resolution. This prevents
# "top hat" artifacts. We therefore need to expand the reference boxes by an # "top hat" artifacts. We therefore need to expand the reference boxes by an
# appropriate factor. # appropriate factor.
mask_size = masks.shape[2] _, mask_height, mask_width = masks.shape
scale = (mask_size + 2.0) / mask_size scale = max((mask_width + 2.0) / mask_width,
(mask_height + 2.0) / mask_height)
ref_boxes = expand_boxes(detections[:, 1:5], scale) ref_boxes = expand_boxes(detected_boxes, scale)
ref_boxes = ref_boxes.astype(np.int32) ref_boxes = ref_boxes.astype(np.int32)
padded_mask = np.zeros((mask_size + 2, mask_size + 2), dtype=np.float32) padded_mask = np.zeros((mask_height + 2, mask_width + 2), dtype=np.float32)
segms = [] segms = []
for mask_ind, mask in enumerate(masks): for mask_ind, mask in enumerate(masks):
im_mask = np.zeros((image_height, image_width), dtype=np.uint8)
# Process mask inside bounding boxes.
padded_mask[1:-1, 1:-1] = mask[:, :] padded_mask[1:-1, 1:-1] = mask[:, :]
ref_box = ref_boxes[mask_ind, :] ref_box = ref_boxes[mask_ind, :]
...@@ -69,7 +88,6 @@ def segm_results(masks, detections, image_height, image_width): ...@@ -69,7 +88,6 @@ def segm_results(masks, detections, image_height, image_width):
mask = cv2.resize(padded_mask, (w, h)) mask = cv2.resize(padded_mask, (w, h))
mask = np.array(mask > 0.5, dtype=np.uint8) mask = np.array(mask > 0.5, dtype=np.uint8)
im_mask = np.zeros((image_height, image_width), dtype=np.uint8)
x_0 = max(ref_box[0], 0) x_0 = max(ref_box[0], 0)
x_1 = min(ref_box[2] + 1, image_width) x_1 = min(ref_box[2] + 1, image_width)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment