"tests/python/common/vscode:/vscode.git/clone" did not exist on "77c84834c8b0941052e1e61b2406bec16a8972eb"
Commit 032d7970 authored by Sam Tsai's avatar Sam Tsai Committed by Facebook GitHub Bot
Browse files

add additional options for keeping aspect ratio and resizing to fit for affine augmentation

Summary:
Pull Request resolved: https://github.com/facebookresearch/d2go/pull/392

1. Moved scale adjustment to a separate function and expose the option to disable it
2. Add option to keep the original image instead of creating a square image

Reviewed By: wat3rBro

Differential Revision: D40403705

fbshipit-source-id: 6c35a9a1fe3ef868e5f0b2204874fd028776e26a
parent 4f3cc35f
......@@ -118,41 +118,39 @@ class RandomAffine(TransformGen):
translation_range: Tuple[float, float] = (0, 0),
scale_range: Tuple[float, float] = (1.0, 1.0),
shear_range: Tuple[float, float] = (0, 0),
fit_in_frame: bool = True,
keep_aspect_ratio: bool = False,
):
"""
Args:
prob (float): probability of applying transform.
angle_range (tuple of integers): min/max rotation angle in degrees
between -180 and 180.
between -180 and 180.
translation_range (tuple of integers): min/max translation
(post re-centered rotation).
(post re-centered rotation).
scale_range (tuple of floats): min/max scale (post re-centered rotation).
shear_range (tuple of intgers): min/max shear angle value in degrees
between -180 to 180.
between -180 to 180.
fit_in_frame: warped image is scaled into the output frame
keep_aspect_ratio: aspect ratio is kept instead of creating a squared image
with dimension of max dimension
"""
super().__init__()
# Turn all locals into member variables.
self._init(locals())
def get_transform(self, img: np.ndarray) -> Transform:
im_h, im_w = img.shape[:2]
max_size = max(im_w, im_h)
center = [im_w / 2, im_h / 2]
angle = random.uniform(self.angle_range[0], self.angle_range[1])
translation = [
random.uniform(self.translation_range[0], self.translation_range[1]),
random.uniform(self.translation_range[0], self.translation_range[1]),
]
scale = random.uniform(self.scale_range[0], self.scale_range[1])
shear = [
random.uniform(self.shear_range[0], self.shear_range[1]),
random.uniform(self.shear_range[0], self.shear_range[1]),
]
dummy_translation = [0.0, 0.0]
dummy_scale = 1.0
def _compute_scale_adjustment(
self,
im_w: float,
im_h: float,
out_w: float,
out_h: float,
center: Tuple[float, float],
angle: float,
shear: Tuple[float, float],
) -> float:
M_inv = T.functional._get_inverse_affine_matrix(
center, angle, dummy_translation, dummy_scale, shear
center, angle, [0.0, 0.0], 1.0, shear
)
M_inv.extend([0.0, 0.0, 1.0])
M_inv = np.array(M_inv).reshape((3, 3))
......@@ -166,41 +164,62 @@ class RandomAffine(TransformGen):
[1, 1, 1, 1],
]
)
transformed_corners = M @ img_corners
x_min = np.amin(transformed_corners[0])
x_max = np.amax(transformed_corners[0])
x_range = np.ceil(x_max - x_min)
y_min = np.amin(transformed_corners[1])
y_max = np.amax(transformed_corners[1])
y_range = np.ceil(y_max - y_min)
new_corners = M @ img_corners
x_range = np.ceil(np.amax(new_corners[0]) - np.amin(new_corners[0]))
y_range = np.ceil(np.amax(new_corners[1]) - np.amin(new_corners[1]))
# Apply translation and scale after centering in output patch
translation_adjustment = [(max_size - im_w) / 2, (max_size - im_h) / 2]
scale_adjustment = min(out_w / x_range, out_h / y_range)
return scale_adjustment
def get_transform(self, img: np.ndarray) -> Transform:
do = self._rand_range() < self.prob
if not do:
return NoOpTransform()
im_h, im_w = img.shape[:2]
center = [im_w / 2, im_h / 2]
angle = random.uniform(self.angle_range[0], self.angle_range[1])
translation = [
random.uniform(self.translation_range[0], self.translation_range[1]),
random.uniform(self.translation_range[0], self.translation_range[1]),
]
scale = random.uniform(self.scale_range[0], self.scale_range[1])
shear = [
random.uniform(self.shear_range[0], self.shear_range[1]),
random.uniform(self.shear_range[0], self.shear_range[1]),
]
# Determine output image size
max_size = max(im_w, im_h)
out_w, out_h = (im_w, im_h) if self.keep_aspect_ratio else (max_size, max_size)
# Apply translation adjustment
translation_adjustment = [(out_w - im_w) / 2, (out_h - im_h) / 2]
translation[0] += translation_adjustment[0]
translation[1] += translation_adjustment[1]
scale_adjustment = min(max_size / x_range, max_size / y_range)
scale *= scale_adjustment
# Apply scale adjustment
if self.fit_in_frame:
scale_adjustment = self._compute_scale_adjustment(
im_w, im_h, out_w, out_h, center, angle, shear
)
scale *= scale_adjustment
# Compute the affine transform
M_inv = T.functional._get_inverse_affine_matrix(
center, angle, translation, scale, shear
)
# Convert to Numpy matrix so it can be inverted
M_inv.extend([0.0, 0.0, 1.0])
M_inv = np.array(M_inv).reshape((3, 3))
M = np.linalg.inv(M_inv)
do = self._rand_range() < self.prob
if do:
return AffineTransform(
M_inv,
max_size,
max_size,
flags=cv2.WARP_INVERSE_MAP + cv2.INTER_LINEAR,
border_mode=cv2.BORDER_REPLICATE,
is_inversed_M=True,
)
else:
return NoOpTransform()
M_inv = np.array(M_inv).reshape((2, 3))
return AffineTransform(
M_inv,
out_w,
out_h,
flags=cv2.WARP_INVERSE_MAP + cv2.INTER_LINEAR,
border_mode=cv2.BORDER_REPLICATE,
is_inversed_M=True,
)
# example repr: "RandomPivotScalingOp::[1.0, 0.75, 0.5]"
......
......@@ -19,6 +19,8 @@ def generate_test_data(
translation: float = 0,
scale: float = 1,
shear: float = 0,
fit_in_frame: bool = True,
keep_aspect_ratio: bool = False,
) -> Tuple[str, np.ndarray]:
# Augmentation dictionary
aug_dict = {
......@@ -27,51 +29,60 @@ def generate_test_data(
"translation_range": [translation, translation],
"scale_range": [scale, scale],
"shear_range": [shear, shear],
"keep_aspect_ratio": keep_aspect_ratio,
"fit_in_frame": fit_in_frame,
}
aug_str = "RandomAffineOp::" + json.dumps(aug_dict)
# Get image info
img_sz = source_img.shape[0]
center = [img_sz / 2, img_sz / 2]
img_h, img_w = source_img.shape[0:2]
center = [img_w / 2, img_h / 2]
# Warp once to figure scale adjustment
M_inv = T.functional._get_inverse_affine_matrix(
center, angle, [0, 0], 1, [shear, shear]
)
M_inv.extend([0.0, 0.0, 1.0])
M_inv = np.array(M_inv).reshape((3, 3))
M = np.linalg.inv(M_inv)
# Center in output patch
img_corners = np.array(
[
[0, 0, img_sz - 1, img_sz - 1],
[0, img_sz - 1, 0, img_sz - 1],
[1, 1, 1, 1],
]
)
transformed_corners = M @ img_corners
x_min = np.amin(transformed_corners[0])
x_max = np.amax(transformed_corners[0])
x_range = np.ceil(x_max - x_min)
y_min = np.amin(transformed_corners[1])
y_max = np.amax(transformed_corners[1])
y_range = np.ceil(y_max - y_min)
# Apply translation and scale after centering in output patch
scale_adjustment = min(img_sz / x_range, img_sz / y_range)
scale *= scale_adjustment
# Compute output_size
max_size = max(img_w, img_h)
out_w, out_h = (img_w, img_h) if keep_aspect_ratio else (max_size, max_size)
if fit_in_frame:
# Warp once to figure scale adjustment
M_inv = T.functional._get_inverse_affine_matrix(
center, angle, [0, 0], 1, [shear, shear]
)
M_inv.extend([0.0, 0.0, 1.0])
M_inv = np.array(M_inv).reshape((3, 3))
M = np.linalg.inv(M_inv)
# Center in output patch
img_corners = np.array(
[
[0, 0, img_w - 1, img_w - 1],
[0, img_h - 1, 0, img_h - 1],
[1, 1, 1, 1],
]
)
new_corners = M @ img_corners
x_range = np.ceil(np.amax(new_corners[0]) - np.amin(new_corners[0]))
y_range = np.ceil(np.amax(new_corners[1]) - np.amin(new_corners[1]))
# Apply translation and scale after centering in output patch
scale_adjustment = min(out_w / x_range, out_h / y_range)
scale *= scale_adjustment
# Adjust output center location
translation_t = [translation, translation]
translation_adjustment = [(out_w - img_w) / 2, (out_h - img_h) / 2]
translation_t[0] += translation_adjustment[0]
translation_t[1] += translation_adjustment[1]
# Test data output generation
M_inv = T.functional._get_inverse_affine_matrix(
center, angle, [translation, translation], scale, [shear, shear]
center, angle, translation_t, scale, [shear, shear]
)
M_inv = np.array(M_inv).reshape((2, 3))
exp_out_img = cv2.warpAffine(
source_img,
M_inv,
(img_sz, img_sz),
(out_w, out_h),
flags=cv2.WARP_INVERSE_MAP + cv2.INTER_LINEAR,
borderMode=cv2.BORDER_REPLICATE,
)
......@@ -87,7 +98,7 @@ class TestDataTransformsAffine(unittest.TestCase):
)
def test_affine_transforms_angle(self):
default_cfg = Detectron2GoRunner().get_default_cfg()
default_cfg = Detectron2GoRunner.get_default_cfg()
img_sz = 11
img = np.zeros((img_sz, img_sz, 3)).astype(np.uint8)
......@@ -103,7 +114,7 @@ class TestDataTransformsAffine(unittest.TestCase):
self._check_array_close(trans_img, exp_out_img)
def test_affine_transforms_translation(self):
default_cfg = Detectron2GoRunner().get_default_cfg()
default_cfg = Detectron2GoRunner.get_default_cfg()
img_sz = 11
img = np.zeros((img_sz, img_sz, 3)).astype(np.uint8)
......@@ -119,7 +130,7 @@ class TestDataTransformsAffine(unittest.TestCase):
self._check_array_close(trans_img, exp_out_img)
def test_affine_transforms_shear(self):
default_cfg = Detectron2GoRunner().get_default_cfg()
default_cfg = Detectron2GoRunner.get_default_cfg()
img_sz = 11
img = np.zeros((img_sz, img_sz, 3)).astype(np.uint8)
......@@ -135,7 +146,7 @@ class TestDataTransformsAffine(unittest.TestCase):
self._check_array_close(trans_img, exp_out_img)
def test_affine_transforms_scale(self):
default_cfg = Detectron2GoRunner().get_default_cfg()
default_cfg = Detectron2GoRunner.get_default_cfg()
img_sz = 11
img = np.zeros((img_sz, img_sz, 3)).astype(np.uint8)
......@@ -149,3 +160,36 @@ class TestDataTransformsAffine(unittest.TestCase):
trans_img, _ = apply_augmentations(tfm, img)
self._check_array_close(trans_img, exp_out_img)
def test_affine_transforms_angle_non_square(self):
default_cfg = Detectron2GoRunner.get_default_cfg()
img_sz = 11
img = np.zeros((img_sz, img_sz - 2, 3)).astype(np.uint8)
img[((img_sz + 1) // 2) - 1, :, :] = 255
for keep_aspect_ratio in [False, True]:
aug_str, exp_out_img = generate_test_data(
img, angle=45, keep_aspect_ratio=keep_aspect_ratio
)
default_cfg.D2GO_DATA.AUG_OPS.TRAIN = [aug_str]
tfm = build_transform_gen(default_cfg, is_train=True)
trans_img, _ = apply_augmentations(tfm, img)
self._check_array_close(trans_img, exp_out_img)
def test_affine_transforms_angle_no_fit_to_frame(self):
default_cfg = Detectron2GoRunner.get_default_cfg()
img_sz = 11
img = np.zeros((img_sz, img_sz, 3)).astype(np.uint8)
img[((img_sz + 1) // 2) - 1, :, :] = 255
aug_str, exp_out_img = generate_test_data(img, angle=45, fit_in_frame=False)
default_cfg.D2GO_DATA.AUG_OPS.TRAIN = [aug_str]
tfm = build_transform_gen(default_cfg, is_train=True)
trans_img, _ = apply_augmentations(tfm, img)
self._check_array_close(trans_img, exp_out_img)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment