"examples/vscode:/vscode.git/clone" did not exist on "b53f8a4e6e05580984e65545224f34a327bf23e3"
Commit 61d6173d authored by Pengchong Jin's avatar Pengchong Jin Committed by A. Unique TensorFlower
Browse files

Clean up the mask utils.

PiperOrigin-RevId: 282825837
parent bd73fdfe
......@@ -25,103 +25,13 @@ from absl import logging
import numpy as np
from PIL import Image
from pycocotools import coco
from pycocotools import mask as mask_utils
from pycocotools import mask as mask_api
import six
import tensorflow.compat.v2 as tf
from official.vision.detection.dataloader import tf_example_decoder
from official.vision.detection.utils import box_utils
import cv2
def generate_segmentation_from_masks(masks,
detected_boxes,
image_height,
image_width,
is_image_mask=False):
"""Generates segmentation result from instance masks.
Args:
masks: a numpy array of shape [N, mask_height, mask_width] representing the
instance masks w.r.t. the `detected_boxes`.
detected_boxes: a numpy array of shape [N, 4] representing the reference
bounding boxes.
image_height: an integer representing the height of the image.
image_width: an integer representing the width of the image.
is_image_mask: bool. True: input masks are whole-image masks. False: input
masks are bounding-box level masks.
Returns:
segms: a numpy array of shape [N, image_height, image_width] representing
the instance masks *pasted* on the image canvas.
"""
def expand_boxes(boxes, scale):
"""Expands an array of boxes by a given scale."""
# Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/utils/boxes.py#L227 # pylint: disable=line-too-long
# The `boxes` in the reference implementation is in [x1, y1, x2, y2] form,
# whereas `boxes` here is in [x1, y1, w, h] form
w_half = boxes[:, 2] * .5
h_half = boxes[:, 3] * .5
x_c = boxes[:, 0] + w_half
y_c = boxes[:, 1] + h_half
w_half *= scale
h_half *= scale
boxes_exp = np.zeros(boxes.shape)
boxes_exp[:, 0] = x_c - w_half
boxes_exp[:, 2] = x_c + w_half
boxes_exp[:, 1] = y_c - h_half
boxes_exp[:, 3] = y_c + h_half
return boxes_exp
# Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/core/test.py#L812 # pylint: disable=line-too-long
# To work around an issue with cv2.resize (it seems to automatically pad
# with repeated border values), we manually zero-pad the masks by 1 pixel
# prior to resizing back to the original image resolution. This prevents
# "top hat" artifacts. We therefore need to expand the reference boxes by an
# appropriate factor.
_, mask_height, mask_width = masks.shape
scale = max((mask_width + 2.0) / mask_width,
(mask_height + 2.0) / mask_height)
ref_boxes = expand_boxes(detected_boxes, scale)
ref_boxes = ref_boxes.astype(np.int32)
padded_mask = np.zeros((mask_height + 2, mask_width + 2), dtype=np.float32)
segms = []
for mask_ind, mask in enumerate(masks):
im_mask = np.zeros((image_height, image_width), dtype=np.uint8)
if is_image_mask:
# Process whole-image masks.
im_mask[:, :] = mask[:, :]
else:
# Process mask inside bounding boxes.
padded_mask[1:-1, 1:-1] = mask[:, :]
ref_box = ref_boxes[mask_ind, :]
w = ref_box[2] - ref_box[0] + 1
h = ref_box[3] - ref_box[1] + 1
w = np.maximum(w, 1)
h = np.maximum(h, 1)
mask = cv2.resize(padded_mask, (w, h))
mask = np.array(mask > 0.5, dtype=np.uint8)
x_0 = max(ref_box[0], 0)
x_1 = min(ref_box[2] + 1, image_width)
y_0 = max(ref_box[1], 0)
y_1 = min(ref_box[3] + 1, image_height)
im_mask[y_0:y_1, x_0:x_1] = mask[(y_0 - ref_box[1]):(y_1 - ref_box[1]),
(x_0 - ref_box[0]):(x_1 - ref_box[0])]
segms.append(im_mask)
segms = np.array(segms)
assert masks.shape[0] == segms.shape[0]
return segms
from official.vision.detection.utils import mask_utils
class COCOWrapper(coco.COCO):
......@@ -189,7 +99,7 @@ class COCOWrapper(coco.COCO):
ann['segmentation'] = [
[x1, y1, x1, y2, x2, y2, x2, y1]]
elif self._eval_type == 'mask':
ann['area'] = mask_utils.area(ann['segmentation'])
ann['area'] = mask_api.area(ann['segmentation'])
res.dataset['annotations'] = copy.deepcopy(predictions)
res.createIndex()
......@@ -237,17 +147,15 @@ def convert_predictions_to_coco_annotations(predictions):
for j in range(batch_size):
if 'detection_masks' in predictions:
image_masks = generate_segmentation_from_masks(
image_masks = mask_utils.paste_instance_masks(
predictions['detection_masks'][i][j],
mask_boxes[i][j],
int(predictions['image_info'][i][j, 0, 0]),
int(predictions['image_info'][i][j, 0, 1]),
is_image_mask=False)
int(predictions['image_info'][i][j, 0, 1]))
binary_masks = (image_masks > 0.0).astype(np.uint8)
encoded_masks = [
mask_utils.encode(np.asfortranarray(binary_mask))
for binary_mask in list(binary_masks)
]
mask_api.encode(np.asfortranarray(binary_mask))
for binary_mask in list(binary_masks)]
for k in range(max_num_detections):
ann = {}
ann['image_id'] = predictions['source_id'][i][j]
......@@ -334,10 +242,10 @@ def convert_groundtruths_to_coco_dataset(groundtruths, label_map=None):
np_mask = (
np.array(mask.getdata()).reshape(height, width).astype(np.uint8))
np_mask[np_mask > 0] = 255
encoded_mask = mask_utils.encode(np.asfortranarray(np_mask))
encoded_mask = mask_api.encode(np.asfortranarray(np_mask))
ann['segmentation'] = encoded_mask
if 'areas' not in groundtruths:
ann['area'] = mask_utils.area(encoded_mask)
ann['area'] = mask_api.area(encoded_mask)
gt_annotations.append(ann)
for i, ann in enumerate(gt_annotations):
......
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -21,8 +21,24 @@ import numpy as np
import cv2
def segm_results(masks, detections, image_height, image_width):
"""Generates segmentation results."""
def paste_instance_masks(masks,
detected_boxes,
image_height,
image_width):
"""Paste instance masks to generate the image segmentation results.
Args:
masks: a numpy array of shape [N, mask_height, mask_width] representing the
instance masks w.r.t. the `detected_boxes`.
detected_boxes: a numpy array of shape [N, 4] representing the reference
bounding boxes.
image_height: an integer representing the height of the image.
image_width: an integer representing the width of the image.
Returns:
segms: a numpy array of shape [N, image_height, image_width] representing
the instance masks *pasted* on the image canvas.
"""
def expand_boxes(boxes, scale):
"""Expands an array of boxes by a given scale."""
......@@ -51,14 +67,17 @@ def segm_results(masks, detections, image_height, image_width):
# prior to resizing back to the original image resolution. This prevents
# "top hat" artifacts. We therefore need to expand the reference boxes by an
# appropriate factor.
mask_size = masks.shape[2]
scale = (mask_size + 2.0) / mask_size
_, mask_height, mask_width = masks.shape
scale = max((mask_width + 2.0) / mask_width,
(mask_height + 2.0) / mask_height)
ref_boxes = expand_boxes(detections[:, 1:5], scale)
ref_boxes = expand_boxes(detected_boxes, scale)
ref_boxes = ref_boxes.astype(np.int32)
padded_mask = np.zeros((mask_size + 2, mask_size + 2), dtype=np.float32)
padded_mask = np.zeros((mask_height + 2, mask_width + 2), dtype=np.float32)
segms = []
for mask_ind, mask in enumerate(masks):
im_mask = np.zeros((image_height, image_width), dtype=np.uint8)
# Process mask inside bounding boxes.
padded_mask[1:-1, 1:-1] = mask[:, :]
ref_box = ref_boxes[mask_ind, :]
......@@ -69,7 +88,6 @@ def segm_results(masks, detections, image_height, image_width):
mask = cv2.resize(padded_mask, (w, h))
mask = np.array(mask > 0.5, dtype=np.uint8)
im_mask = np.zeros((image_height, image_width), dtype=np.uint8)
x_0 = max(ref_box[0], 0)
x_1 = min(ref_box[2] + 1, image_width)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment