"docs/vscode:/vscode.git/clone" did not exist on "bbab8553224d12f7fd58b0e65b0daf899769ef0b"
Commit 032d7970 authored by Sam Tsai's avatar Sam Tsai Committed by Facebook GitHub Bot
Browse files

add additional options for keeping aspect ratio and resizing to fit for affine augmentation

Summary:
Pull Request resolved: https://github.com/facebookresearch/d2go/pull/392

1. Moved scale adjustment to a separate function and expose the option to disable it
2. Add option to keep the original image instead of creating a square image

Reviewed By: wat3rBro

Differential Revision: D40403705

fbshipit-source-id: 6c35a9a1fe3ef868e5f0b2204874fd028776e26a
parent 4f3cc35f
...@@ -118,6 +118,8 @@ class RandomAffine(TransformGen): ...@@ -118,6 +118,8 @@ class RandomAffine(TransformGen):
translation_range: Tuple[float, float] = (0, 0), translation_range: Tuple[float, float] = (0, 0),
scale_range: Tuple[float, float] = (1.0, 1.0), scale_range: Tuple[float, float] = (1.0, 1.0),
shear_range: Tuple[float, float] = (0, 0), shear_range: Tuple[float, float] = (0, 0),
fit_in_frame: bool = True,
keep_aspect_ratio: bool = False,
): ):
""" """
Args: Args:
...@@ -129,30 +131,26 @@ class RandomAffine(TransformGen): ...@@ -129,30 +131,26 @@ class RandomAffine(TransformGen):
scale_range (tuple of floats): min/max scale (post re-centered rotation). scale_range (tuple of floats): min/max scale (post re-centered rotation).
shear_range (tuple of intgers): min/max shear angle value in degrees shear_range (tuple of intgers): min/max shear angle value in degrees
between -180 to 180. between -180 to 180.
fit_in_frame: warped image is scaled into the output frame
keep_aspect_ratio: aspect ratio is kept instead of creating a squared image
with dimension of max dimension
""" """
super().__init__() super().__init__()
# Turn all locals into member variables. # Turn all locals into member variables.
self._init(locals()) self._init(locals())
def get_transform(self, img: np.ndarray) -> Transform: def _compute_scale_adjustment(
im_h, im_w = img.shape[:2] self,
max_size = max(im_w, im_h) im_w: float,
center = [im_w / 2, im_h / 2] im_h: float,
angle = random.uniform(self.angle_range[0], self.angle_range[1]) out_w: float,
translation = [ out_h: float,
random.uniform(self.translation_range[0], self.translation_range[1]), center: Tuple[float, float],
random.uniform(self.translation_range[0], self.translation_range[1]), angle: float,
] shear: Tuple[float, float],
scale = random.uniform(self.scale_range[0], self.scale_range[1]) ) -> float:
shear = [
random.uniform(self.shear_range[0], self.shear_range[1]),
random.uniform(self.shear_range[0], self.shear_range[1]),
]
dummy_translation = [0.0, 0.0]
dummy_scale = 1.0
M_inv = T.functional._get_inverse_affine_matrix( M_inv = T.functional._get_inverse_affine_matrix(
center, angle, dummy_translation, dummy_scale, shear center, angle, [0.0, 0.0], 1.0, shear
) )
M_inv.extend([0.0, 0.0, 1.0]) M_inv.extend([0.0, 0.0, 1.0])
M_inv = np.array(M_inv).reshape((3, 3)) M_inv = np.array(M_inv).reshape((3, 3))
...@@ -166,41 +164,62 @@ class RandomAffine(TransformGen): ...@@ -166,41 +164,62 @@ class RandomAffine(TransformGen):
[1, 1, 1, 1], [1, 1, 1, 1],
] ]
) )
transformed_corners = M @ img_corners new_corners = M @ img_corners
x_min = np.amin(transformed_corners[0]) x_range = np.ceil(np.amax(new_corners[0]) - np.amin(new_corners[0]))
x_max = np.amax(transformed_corners[0]) y_range = np.ceil(np.amax(new_corners[1]) - np.amin(new_corners[1]))
x_range = np.ceil(x_max - x_min)
y_min = np.amin(transformed_corners[1])
y_max = np.amax(transformed_corners[1])
y_range = np.ceil(y_max - y_min)
# Apply translation and scale after centering in output patch # Apply translation and scale after centering in output patch
translation_adjustment = [(max_size - im_w) / 2, (max_size - im_h) / 2] scale_adjustment = min(out_w / x_range, out_h / y_range)
return scale_adjustment
def get_transform(self, img: np.ndarray) -> Transform:
do = self._rand_range() < self.prob
if not do:
return NoOpTransform()
im_h, im_w = img.shape[:2]
center = [im_w / 2, im_h / 2]
angle = random.uniform(self.angle_range[0], self.angle_range[1])
translation = [
random.uniform(self.translation_range[0], self.translation_range[1]),
random.uniform(self.translation_range[0], self.translation_range[1]),
]
scale = random.uniform(self.scale_range[0], self.scale_range[1])
shear = [
random.uniform(self.shear_range[0], self.shear_range[1]),
random.uniform(self.shear_range[0], self.shear_range[1]),
]
# Determine output image size
max_size = max(im_w, im_h)
out_w, out_h = (im_w, im_h) if self.keep_aspect_ratio else (max_size, max_size)
# Apply translation adjustment
translation_adjustment = [(out_w - im_w) / 2, (out_h - im_h) / 2]
translation[0] += translation_adjustment[0] translation[0] += translation_adjustment[0]
translation[1] += translation_adjustment[1] translation[1] += translation_adjustment[1]
scale_adjustment = min(max_size / x_range, max_size / y_range)
# Apply scale adjustment
if self.fit_in_frame:
scale_adjustment = self._compute_scale_adjustment(
im_w, im_h, out_w, out_h, center, angle, shear
)
scale *= scale_adjustment scale *= scale_adjustment
# Compute the affine transform
M_inv = T.functional._get_inverse_affine_matrix( M_inv = T.functional._get_inverse_affine_matrix(
center, angle, translation, scale, shear center, angle, translation, scale, shear
) )
# Convert to Numpy matrix so it can be inverted M_inv = np.array(M_inv).reshape((2, 3))
M_inv.extend([0.0, 0.0, 1.0])
M_inv = np.array(M_inv).reshape((3, 3))
M = np.linalg.inv(M_inv)
do = self._rand_range() < self.prob
if do:
return AffineTransform( return AffineTransform(
M_inv, M_inv,
max_size, out_w,
max_size, out_h,
flags=cv2.WARP_INVERSE_MAP + cv2.INTER_LINEAR, flags=cv2.WARP_INVERSE_MAP + cv2.INTER_LINEAR,
border_mode=cv2.BORDER_REPLICATE, border_mode=cv2.BORDER_REPLICATE,
is_inversed_M=True, is_inversed_M=True,
) )
else:
return NoOpTransform()
# example repr: "RandomPivotScalingOp::[1.0, 0.75, 0.5]" # example repr: "RandomPivotScalingOp::[1.0, 0.75, 0.5]"
......
...@@ -19,6 +19,8 @@ def generate_test_data( ...@@ -19,6 +19,8 @@ def generate_test_data(
translation: float = 0, translation: float = 0,
scale: float = 1, scale: float = 1,
shear: float = 0, shear: float = 0,
fit_in_frame: bool = True,
keep_aspect_ratio: bool = False,
) -> Tuple[str, np.ndarray]: ) -> Tuple[str, np.ndarray]:
# Augmentation dictionary # Augmentation dictionary
aug_dict = { aug_dict = {
...@@ -27,13 +29,20 @@ def generate_test_data( ...@@ -27,13 +29,20 @@ def generate_test_data(
"translation_range": [translation, translation], "translation_range": [translation, translation],
"scale_range": [scale, scale], "scale_range": [scale, scale],
"shear_range": [shear, shear], "shear_range": [shear, shear],
"keep_aspect_ratio": keep_aspect_ratio,
"fit_in_frame": fit_in_frame,
} }
aug_str = "RandomAffineOp::" + json.dumps(aug_dict) aug_str = "RandomAffineOp::" + json.dumps(aug_dict)
# Get image info # Get image info
img_sz = source_img.shape[0] img_h, img_w = source_img.shape[0:2]
center = [img_sz / 2, img_sz / 2] center = [img_w / 2, img_h / 2]
# Compute output_size
max_size = max(img_w, img_h)
out_w, out_h = (img_w, img_h) if keep_aspect_ratio else (max_size, max_size)
if fit_in_frame:
# Warp once to figure scale adjustment # Warp once to figure scale adjustment
M_inv = T.functional._get_inverse_affine_matrix( M_inv = T.functional._get_inverse_affine_matrix(
center, angle, [0, 0], 1, [shear, shear] center, angle, [0, 0], 1, [shear, shear]
...@@ -45,33 +54,35 @@ def generate_test_data( ...@@ -45,33 +54,35 @@ def generate_test_data(
# Center in output patch # Center in output patch
img_corners = np.array( img_corners = np.array(
[ [
[0, 0, img_sz - 1, img_sz - 1], [0, 0, img_w - 1, img_w - 1],
[0, img_sz - 1, 0, img_sz - 1], [0, img_h - 1, 0, img_h - 1],
[1, 1, 1, 1], [1, 1, 1, 1],
] ]
) )
transformed_corners = M @ img_corners new_corners = M @ img_corners
x_min = np.amin(transformed_corners[0]) x_range = np.ceil(np.amax(new_corners[0]) - np.amin(new_corners[0]))
x_max = np.amax(transformed_corners[0]) y_range = np.ceil(np.amax(new_corners[1]) - np.amin(new_corners[1]))
x_range = np.ceil(x_max - x_min)
y_min = np.amin(transformed_corners[1])
y_max = np.amax(transformed_corners[1])
y_range = np.ceil(y_max - y_min)
# Apply translation and scale after centering in output patch # Apply translation and scale after centering in output patch
scale_adjustment = min(img_sz / x_range, img_sz / y_range) scale_adjustment = min(out_w / x_range, out_h / y_range)
scale *= scale_adjustment scale *= scale_adjustment
# Adjust output center location
translation_t = [translation, translation]
translation_adjustment = [(out_w - img_w) / 2, (out_h - img_h) / 2]
translation_t[0] += translation_adjustment[0]
translation_t[1] += translation_adjustment[1]
# Test data output generation # Test data output generation
M_inv = T.functional._get_inverse_affine_matrix( M_inv = T.functional._get_inverse_affine_matrix(
center, angle, [translation, translation], scale, [shear, shear] center, angle, translation_t, scale, [shear, shear]
) )
M_inv = np.array(M_inv).reshape((2, 3)) M_inv = np.array(M_inv).reshape((2, 3))
exp_out_img = cv2.warpAffine( exp_out_img = cv2.warpAffine(
source_img, source_img,
M_inv, M_inv,
(img_sz, img_sz), (out_w, out_h),
flags=cv2.WARP_INVERSE_MAP + cv2.INTER_LINEAR, flags=cv2.WARP_INVERSE_MAP + cv2.INTER_LINEAR,
borderMode=cv2.BORDER_REPLICATE, borderMode=cv2.BORDER_REPLICATE,
) )
...@@ -87,7 +98,7 @@ class TestDataTransformsAffine(unittest.TestCase): ...@@ -87,7 +98,7 @@ class TestDataTransformsAffine(unittest.TestCase):
) )
def test_affine_transforms_angle(self): def test_affine_transforms_angle(self):
default_cfg = Detectron2GoRunner().get_default_cfg() default_cfg = Detectron2GoRunner.get_default_cfg()
img_sz = 11 img_sz = 11
img = np.zeros((img_sz, img_sz, 3)).astype(np.uint8) img = np.zeros((img_sz, img_sz, 3)).astype(np.uint8)
...@@ -103,7 +114,7 @@ class TestDataTransformsAffine(unittest.TestCase): ...@@ -103,7 +114,7 @@ class TestDataTransformsAffine(unittest.TestCase):
self._check_array_close(trans_img, exp_out_img) self._check_array_close(trans_img, exp_out_img)
def test_affine_transforms_translation(self): def test_affine_transforms_translation(self):
default_cfg = Detectron2GoRunner().get_default_cfg() default_cfg = Detectron2GoRunner.get_default_cfg()
img_sz = 11 img_sz = 11
img = np.zeros((img_sz, img_sz, 3)).astype(np.uint8) img = np.zeros((img_sz, img_sz, 3)).astype(np.uint8)
...@@ -119,7 +130,7 @@ class TestDataTransformsAffine(unittest.TestCase): ...@@ -119,7 +130,7 @@ class TestDataTransformsAffine(unittest.TestCase):
self._check_array_close(trans_img, exp_out_img) self._check_array_close(trans_img, exp_out_img)
def test_affine_transforms_shear(self): def test_affine_transforms_shear(self):
default_cfg = Detectron2GoRunner().get_default_cfg() default_cfg = Detectron2GoRunner.get_default_cfg()
img_sz = 11 img_sz = 11
img = np.zeros((img_sz, img_sz, 3)).astype(np.uint8) img = np.zeros((img_sz, img_sz, 3)).astype(np.uint8)
...@@ -135,7 +146,7 @@ class TestDataTransformsAffine(unittest.TestCase): ...@@ -135,7 +146,7 @@ class TestDataTransformsAffine(unittest.TestCase):
self._check_array_close(trans_img, exp_out_img) self._check_array_close(trans_img, exp_out_img)
def test_affine_transforms_scale(self): def test_affine_transforms_scale(self):
default_cfg = Detectron2GoRunner().get_default_cfg() default_cfg = Detectron2GoRunner.get_default_cfg()
img_sz = 11 img_sz = 11
img = np.zeros((img_sz, img_sz, 3)).astype(np.uint8) img = np.zeros((img_sz, img_sz, 3)).astype(np.uint8)
...@@ -149,3 +160,36 @@ class TestDataTransformsAffine(unittest.TestCase): ...@@ -149,3 +160,36 @@ class TestDataTransformsAffine(unittest.TestCase):
trans_img, _ = apply_augmentations(tfm, img) trans_img, _ = apply_augmentations(tfm, img)
self._check_array_close(trans_img, exp_out_img) self._check_array_close(trans_img, exp_out_img)
def test_affine_transforms_angle_non_square(self):
default_cfg = Detectron2GoRunner.get_default_cfg()
img_sz = 11
img = np.zeros((img_sz, img_sz - 2, 3)).astype(np.uint8)
img[((img_sz + 1) // 2) - 1, :, :] = 255
for keep_aspect_ratio in [False, True]:
aug_str, exp_out_img = generate_test_data(
img, angle=45, keep_aspect_ratio=keep_aspect_ratio
)
default_cfg.D2GO_DATA.AUG_OPS.TRAIN = [aug_str]
tfm = build_transform_gen(default_cfg, is_train=True)
trans_img, _ = apply_augmentations(tfm, img)
self._check_array_close(trans_img, exp_out_img)
def test_affine_transforms_angle_no_fit_to_frame(self):
default_cfg = Detectron2GoRunner.get_default_cfg()
img_sz = 11
img = np.zeros((img_sz, img_sz, 3)).astype(np.uint8)
img[((img_sz + 1) // 2) - 1, :, :] = 255
aug_str, exp_out_img = generate_test_data(img, angle=45, fit_in_frame=False)
default_cfg.D2GO_DATA.AUG_OPS.TRAIN = [aug_str]
tfm = build_transform_gen(default_cfg, is_train=True)
trans_img, _ = apply_augmentations(tfm, img)
self._check_array_close(trans_img, exp_out_img)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment