Unverified Commit cde7ff01 authored by Vasilis Vryniotis's avatar Vasilis Vryniotis Committed by GitHub
Browse files

Pass custom scales on DefaultBoxGenerator and change default estimation. (#3766)

parent 3975ec56
...@@ -76,10 +76,10 @@ class Tester(TestCase): ...@@ -76,10 +76,10 @@ class Tester(TestCase):
dboxes = model(images, features) dboxes = model(images, features)
dboxes_output = torch.tensor([ dboxes_output = torch.tensor([
[6.9750, 6.9750, 8.0250, 8.0250], [6.3750, 6.3750, 8.6250, 8.6250],
[6.7315, 6.7315, 8.2685, 8.2685], [4.7443, 4.7443, 10.2557, 10.2557],
[6.7575, 7.1288, 8.2425, 7.8712], [5.9090, 6.7045, 9.0910, 8.2955],
[7.1288, 6.7575, 7.8712, 8.2425] [6.7045, 5.9090, 8.2955, 9.0910]
]) ])
self.assertEqual(len(dboxes), 2) self.assertEqual(len(dboxes), 2)
......
...@@ -138,9 +138,11 @@ class DefaultBoxGenerator(nn.Module): ...@@ -138,9 +138,11 @@ class DefaultBoxGenerator(nn.Module):
Args: Args:
aspect_ratios (List[List[int]]): A list with all the aspect ratios used in each feature map. aspect_ratios (List[List[int]]): A list with all the aspect ratios used in each feature map.
min_ratio (float): The minimum scale :math:`\text{s}_{\text{min}}` of the default boxes used in the estimation min_ratio (float): The minimum scale :math:`\text{s}_{\text{min}}` of the default boxes used in the estimation
of the scales of each feature map. of the scales of each feature map. It is used only if the ``scales`` parameter is not provided.
max_ratio (float): The maximum scale :math:`\text{s}_{\text{max}}` of the default boxes used in the estimation max_ratio (float): The maximum scale :math:`\text{s}_{\text{max}}` of the default boxes used in the estimation
of the scales of each feature map. of the scales of each feature map. It is used only if the ``scales`` parameter is not provided.
scales (List[float]], optional): The scales of the default boxes. If not provided it will be estimated using
the ``min_ratio`` and ``max_ratio`` parameters.
steps (List[int]], optional): It's a hyper-parameter that affects the tiling of defalt boxes. If not provided steps (List[int]], optional): It's a hyper-parameter that affects the tiling of defalt boxes. If not provided
it will be estimated from the data. it will be estimated from the data.
clip (bool): Whether the standardized values of default boxes should be clipped between 0 and 1. The clipping clip (bool): Whether the standardized values of default boxes should be clipped between 0 and 1. The clipping
...@@ -148,7 +150,7 @@ class DefaultBoxGenerator(nn.Module): ...@@ -148,7 +150,7 @@ class DefaultBoxGenerator(nn.Module):
""" """
def __init__(self, aspect_ratios: List[List[int]], min_ratio: float = 0.15, max_ratio: float = 0.9, def __init__(self, aspect_ratios: List[List[int]], min_ratio: float = 0.15, max_ratio: float = 0.9,
steps: Optional[List[int]] = None, clip: bool = True): scales: Optional[List[float]] = None, steps: Optional[List[int]] = None, clip: bool = True):
super().__init__() super().__init__()
if steps is not None: if steps is not None:
assert len(aspect_ratios) == len(steps) assert len(aspect_ratios) == len(steps)
...@@ -158,15 +160,15 @@ class DefaultBoxGenerator(nn.Module): ...@@ -158,15 +160,15 @@ class DefaultBoxGenerator(nn.Module):
num_outputs = len(aspect_ratios) num_outputs = len(aspect_ratios)
# Estimation of default boxes scales # Estimation of default boxes scales
# Inspired from https://github.com/weiliu89/caffe/blob/ssd/examples/ssd/ssd_pascal.py#L311-L317 if scales is None:
min_centile = int(100 * min_ratio) if num_outputs > 1:
max_centile = int(100 * max_ratio) range_ratio = max_ratio - min_ratio
conv4_centile = min_centile // 2 # assume half of min_ratio as in paper self.scales = [min_ratio + range_ratio * k / (num_outputs - 1.0) for k in range(num_outputs)]
step = (max_centile - min_centile) // (num_outputs - 2) self.scales.append(1.0)
centiles = [conv4_centile, min_centile] else:
for c in range(min_centile, max_centile + 1, step): self.scales = [min_ratio, max_ratio]
centiles.append(c + step) else:
self.scales = [c / 100 for c in centiles] self.scales = scales
self._wh_pairs = [] self._wh_pairs = []
for k in range(num_outputs): for k in range(num_outputs):
...@@ -207,9 +209,17 @@ class DefaultBoxGenerator(nn.Module): ...@@ -207,9 +209,17 @@ class DefaultBoxGenerator(nn.Module):
for k, f_k in enumerate(grid_sizes): for k, f_k in enumerate(grid_sizes):
# Now add the default boxes for each width-height pair # Now add the default boxes for each width-height pair
for j in range(f_k[0]): for j in range(f_k[0]):
cy = (j + 0.5) / (float(f_k[0]) if self.steps is None else image_size[1] / self.steps[k]) if self.steps is not None:
y_f_k = image_size[1] / self.steps[k]
else:
y_f_k = float(f_k[0])
cy = (j + 0.5) / y_f_k
for i in range(f_k[1]): for i in range(f_k[1]):
cx = (i + 0.5) / (float(f_k[1]) if self.steps is None else image_size[0] / self.steps[k]) if self.steps is not None:
x_f_k = image_size[0] / self.steps[k]
else:
x_f_k = float(f_k[1])
cx = (i + 0.5) / x_f_k
default_boxes.extend([[cx, cy, w, h] for w, h in self._wh_pairs[k]]) default_boxes.extend([[cx, cy, w, h] for w, h in self._wh_pairs[k]])
dboxes = [] dboxes = []
......
...@@ -552,7 +552,9 @@ def ssd300_vgg16(pretrained: bool = False, progress: bool = True, num_classes: i ...@@ -552,7 +552,9 @@ def ssd300_vgg16(pretrained: bool = False, progress: bool = True, num_classes: i
pretrained_backbone = False pretrained_backbone = False
backbone = _vgg_extractor("vgg16_features", False, progress, pretrained_backbone, trainable_backbone_layers, True) backbone = _vgg_extractor("vgg16_features", False, progress, pretrained_backbone, trainable_backbone_layers, True)
anchor_generator = DefaultBoxGenerator([[2], [2, 3], [2, 3], [2, 3], [2], [2]], steps=[8, 16, 32, 64, 100, 300]) anchor_generator = DefaultBoxGenerator([[2], [2, 3], [2, 3], [2, 3], [2], [2]],
scales=[0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05],
steps=[8, 16, 32, 64, 100, 300])
model = SSD(backbone, anchor_generator, (300, 300), num_classes, model = SSD(backbone, anchor_generator, (300, 300), num_classes,
image_mean=[0.48235, 0.45882, 0.40784], image_std=[1., 1., 1.], **kwargs) image_mean=[0.48235, 0.45882, 0.40784], image_std=[1., 1., 1.], **kwargs)
if pretrained: if pretrained:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment