Commit dff8897d authored by Vishnu Banna's avatar Vishnu Banna
Browse files

kmeans

parent 9cd84cc1
......@@ -186,6 +186,11 @@ class AnchorBoxes(hyperparams.Config):
level_limits: Optional[List[int]] = None
anchors_per_scale: int = 3
generate_anchors: bool = False
scaling_mode: str = "sqrt_log"
box_generation_mode: str = "per_level"
use_validation_data: bool = True
def get(self, min_level, max_level):
"""Distribute them in order to each level.
......@@ -210,6 +215,9 @@ class AnchorBoxes(hyperparams.Config):
start += self.anchors_per_scale
return anchors_per_level, self.level_limits
def set_boxes(self, boxes):
self.boxes = [Box(box = box) for box in boxes]
@dataclasses.dataclass
class Yolo(hyperparams.Config):
......
import math
import numpy as np
import tensorflow as tf
from official.vision.beta.projects.yolo.ops import box_ops
from official.vision.beta.projects.yolo.ops import math_ops
from official.core import input_reader
import matplotlib.patches as patches
import matplotlib.pyplot as plt
from matplotlib.pyplot import cm
import logging
def IOU(X, centroids_X, iou_type = "iou"):
"""Compute the WH IOU between the ground truths and the centroids."""
# set the center of the boxes to zeros
x = tf.concat([tf.zeros_like(X), X], axis = -1)
centroids = tf.concat([tf.zeros_like(centroids_X), centroids_X], axis = -1)
# compute IOU
if iou_type == 'iou':
iou, _ = box_ops.compute_giou(x, centroids)
else:
_, iou = box_ops.compute_giou(x, centroids)
return iou
class AnchorKMeans:
"""Box Anchor K-means."""
@property
def boxes(self):
return self._boxes.numpy()
def get_box_from_dataset(self, dataset, image_w=512):
"""Load all the boxes in the dataset into memory."""
box_list = []
for i, sample in enumerate(dataset):
width = sample["width"]
height = sample["height"]
boxes = sample['groundtruth_boxes']
# convert the box format from yxyx to xywh to allow
# kmeans by width height IOU
scale = tf.cast([width, height], boxes.dtype)
# scale the boxes then remove excessily small boxes that are
# less than 1 pixel in width or height
boxes = box_ops.yxyx_to_xcycwh(boxes)[..., 2:] * scale
boxes = boxes[tf.reduce_max(boxes, axis = -1) >= 1] / scale
box_list.append(boxes)
# loading is slow, so log the current iteration as a progress bar
tf.print('loading sample: ', i, end='\r')
tf.print('loaded sample: ', i, end='\n')
box_list = tf.concat(box_list,axis=0)
inds = tf.argsort(tf.reduce_prod(box_list, axis = -1), axis = 0)
box_list = tf.gather(box_list, inds, axis = 0)
self._boxes = box_list
def get_init_centroids(self, boxes, k):
"""Initialize centroids by splitting the sorted boxes into k groups. """
box_num = tf.shape(boxes)[0]
# fixed_means
split = box_num // k
bn2 = split * k
boxes = boxes[:bn2, :]
cluster_groups = tf.split(boxes, k, axis = 0)
clusters = []
for c in cluster_groups:
clusters.append(tf.reduce_mean(c, axis = 0))
clusters = tf.convert_to_tensor(clusters).numpy()
return clusters
def iou(self, boxes, clusters):
# broadcast the clusters to the same shape as the boxes
n = tf.shape(boxes)[0]
k = tf.shape(clusters)[0]
boxes = tf.repeat(boxes, k, axis=0)
boxes = tf.reshape(boxes, (n, k, -1))
boxes = tf.cast(boxes, tf.float32)
clusters = tf.tile(clusters, [n, 1])
clusters = tf.reshape(clusters, (n, k, -1))
clusters = tf.cast(clusters, tf.float32)
# compute the IOU
return IOU(boxes, clusters)
def maximization(self, boxes, clusters, assignments):
"""K-means maximization term"""
for i in range(clusters.shape[0]):
hold = tf.math.reduce_mean(boxes[assignments == i], axis=0)
clusters = tf.tensor_scatter_nd_update(clusters, [[i]], [hold])
return clusters
def _kmeans(self, boxes, clusters, k, max_iters = 1000):
"""Run Kmeans on arbitrary boxes and clusters with k centers. """
boxes = boxes
clusters = clusters
assignments = tf.zeros((boxes.shape[0]), dtype=tf.int64) - 1
dists = tf.zeros((boxes.shape[0], k))
num_iters = 1
# do one iteration outside of the optimization loop
dists = 1 - self.iou(boxes, clusters)
curr = tf.math.argmin(dists, axis=-1)
clusters = self.maximization(boxes, clusters, curr)
# iterate the boxes until the clusters not longer change
while not tf.math.reduce_all(curr == assignments) and num_iters < max_iters:
# get the distiance
assignments = curr
dists = 1 - self.iou(boxes, clusters)
curr = tf.math.argmin(dists, axis=-1)
clusters = self.maximization(boxes, clusters, curr)
tf.print('k-Means box generation iteration: ', num_iters, end='\r')
num_iters += 1
tf.print('k-Means box generation iteration: ', num_iters, end='\n')
assignments = curr
# sort the clusters by area then get the final assigments
clusters = tf.convert_to_tensor(
np.array(sorted(clusters.numpy(), key=lambda x: x[0] * x[1])))
dists = 1 - self.iou(boxes, clusters)
assignments = tf.math.argmin(dists, axis=-1)
return clusters, assignments
def run_kmeans(self, k, boxes, clusters = None):
"""Kmeans Wrapping function."""
if clusters is None:
clusters = self.get_init_centroids(boxes, k)
clusters, assignments = self._kmeans(boxes, clusters, k)
return clusters.numpy(), assignments.numpy()
def _avg_iou(self, boxes, clusters, assignments):
"""Compute the IOU between the centroid and the boxes in the centroid."""
ious = []
num_boxes = []
clusters1 = tf.split(clusters, clusters.shape[0], axis = 0)
for i, c in enumerate(clusters1):
hold = boxes[assignments == i]
iou = tf.reduce_mean(self.iou(hold, c)).numpy()
ious.append(iou)
num_boxes.append(hold.shape[0])
clusters = np.floor(np.array(sorted(clusters, key=lambda x: x[0] * x[1])))
print("boxes: ", clusters.tolist())
print("iou over cluster : ", ious)
print("boxes per cluster: ", num_boxes)
print("dataset avgiou: ", np.mean(iou))
return ious
def avg_iou_total(self, boxes, clusters):
clusters = tf.convert_to_tensor(clusters)
dists = 1 - self.iou(boxes, clusters)
assignments = tf.math.argmin(dists, axis=-1)
ious = self._avg_iou(boxes, clusters, assignments)
return clusters, assignments, ious
def get_boxes(self, boxes_, clusters, assignments = None):
"""given a the clusters, the boxes in each cluster. """
if assignments is None:
dists = 1 - self.iou(boxes_, np.array(clusters))
assignments = tf.math.argmin(dists, axis=-1)
boxes = []
clusters = tf.split(clusters, clusters.shape[0], axis = 0)
for i, c in enumerate(clusters):
hold = boxes_[assignments == i]
if hasattr(hold, "numpy"):
hold = hold.numpy()
boxes.append(hold)
return boxes
def __call__(self, dataset, k,
anchors_per_scale = None,
scaling_mode = "sqrt_log",
box_generation_mode = "across_level",
image_resolution=[512, 512, 3]):
"""Run k-means on th eboxes for a given input resolution.
Args:
dataset: `tf.data.Dataset` for the decoded object detection dataset. The
boxes must have the key 'groundtruth_boxes'.
k: `int` for the number for centroids to generate.
anchors_per_scale: `int` for how many anchor boxes to use per level.
scaling_mode: `str` for the type of box scaling to used when generating
anchor boxes. Must be in the set {sqrt_log, default}.
box_generation_mode: `str` for the type of kmeans to use when generating
anchor boxes. Must be in the set {across_level, per_level}.
image_resolution: `List[int]` for the resolution of the boxes to run
k-means for.
Return:
boxes: `List[List[int]]` of shape [k, 2] for the anchor boxes to use for
box predicitons.
"""
self.get_box_from_dataset(dataset)
if scaling_mode == "sqrt_log":
boxes_ls = tf.math.sqrt(self._boxes.numpy())
else:
boxes_ls = self._boxes.numpy()
if isinstance(image_resolution, int):
image_resolution = [image_resolution, image_resolution]
else:
image_resolution = image_resolution[:2]
image_resolution = image_resolution[::-1]
if box_generation_mode == "even_split":
clusters = self.get_init_centroids(boxes_ls, k)
dists = 1 - self.iou(boxes_ls, np.array(clusters))
assignments = tf.math.argmin(dists, axis=-1)
elif box_generation_mode == "across_level":
clusters = self.get_init_centroids(boxes_ls, k)
clusters, assignments = self.run_kmeans(k, boxes_ls, clusters)
else:
# generate a box region for each FPN level
clusters = self.get_init_centroids(boxes_ls, k//anchors_per_scale)
# square off the clusters
clusters += np.roll(clusters, 1, axis = -1)
clusters /= 2
# for each contained box set, compute K means
boxes_sets = self.get_boxes(boxes_ls, clusters)
clusters = []
for boxes in boxes_sets:
cluster_set, assignments = self.run_kmeans(anchors_per_scale, boxes)
clusters.extend(cluster_set)
clusters = np.array(clusters)
dists = 1 - self.iou(boxes_ls, np.array(clusters))
assignments = tf.math.argmin(dists, axis=-1)
if scaling_mode == "sqrt_log":
clusters = tf.square(clusters)
self._boxes *= tf.convert_to_tensor(image_resolution, self._boxes.dtype)
clusters = self.maximization(self._boxes, clusters, assignments)
if hasattr(clusters, "numpy"):
clusters = clusters.numpy()
_, _, _= self.avg_iou_total(self._boxes, clusters)
clusters = np.floor(np.array(sorted(clusters, key=lambda x: x[0] * x[1])))
return clusters.tolist()
class BoxGenInputReader(input_reader.InputReader):
"""Input reader that returns a tf.data.Dataset instance."""
def read(self,
k,
anchors_per_scale,
scaling_mode = "sqrt_log",
box_generation_mode = "across_level",
image_resolution=[512, 512, 3],
input_context=None):
"""Run k-means on th eboxes for a given input resolution.
Args:
k: `int` for the number for centroids to generate.
anchors_per_scale: `int` for how many anchor boxes to use per level.
scaling_mode: `str` for the type of box scaling to used when generating
anchor boxes. Must be in the set {sqrt_log, default}.
box_generation_mode: `str` for the type of kmeans to use when generating
anchor boxes. Must be in the set {across_level, per_level}.
image_resolution: `List[int]` for the resolution of the boxes to run
k-means for.
Return:
boxes: `List[List[int]]` of shape [k, 2] for the anchor boxes to use for
box predicitons.
"""
self._is_training = False
dataset = super().read(input_context=input_context)
dataset = dataset.unbatch()
kmeans_gen = AnchorKMeans()
boxes = kmeans_gen(
dataset, k,
anchors_per_scale = anchors_per_scale,
image_resolution=image_resolution,
scaling_mode = scaling_mode,
box_generation_mode = box_generation_mode)
del kmeans_gen # free the memory
del dataset
logging.info('clusting complete -> default boxes used ::')
logging.info(boxes)
return boxes
\ No newline at end of file
......@@ -176,40 +176,43 @@ class GridGenerator:
self._anchors = tf.convert_to_tensor(anchors)
return
def _build_grid_points(self, lwidth, lheight, anchors, dtype):
def _build_grid_points(self, lheight, lwidth, anchors, dtype):
"""Generate a grid of fixed grid edges for box center decoding."""
with tf.name_scope('center_grid'):
y = tf.range(0, lheight)
x = tf.range(0, lwidth)
num = tf.shape(anchors)[0]
x_left = tf.tile(
tf.transpose(tf.expand_dims(y, axis=-1), perm=[1, 0]), [lwidth, 1])
y_left = tf.tile(tf.expand_dims(x, axis=-1), [1, lheight])
tf.transpose(tf.expand_dims(x, axis=-1), perm=[1, 0]), [lheight, 1])
y_left = tf.tile(tf.expand_dims(y, axis=-1), [1, lwidth])
x_y = tf.stack([x_left, y_left], axis=-1)
x_y = tf.cast(x_y, dtype=dtype)
num = tf.shape(anchors)[0]
x_y = tf.expand_dims(
tf.tile(tf.expand_dims(x_y, axis=-2), [1, 1, num, 1]), axis=0)
return x_y
def _build_anchor_grid(self, anchors, dtype):
def _build_anchor_grid(self, height, width, anchors, dtype):
"""Get the transformed anchor boxes for each dimention."""
with tf.name_scope('anchor_grid'):
num = tf.shape(anchors)[0]
anchors = tf.cast(anchors, dtype=dtype)
anchors = tf.reshape(anchors, [1, 1, 1, num, 2])
anchors = tf.tile(anchors, [1, tf.cast(height, tf.int32),
tf.cast(width, tf.int32), 1, 1])
return anchors
def _extend_batch(self, grid, batch_size):
return tf.tile(grid, [batch_size, 1, 1, 1, 1])
def __call__(self, width, height, batch_size, dtype=None):
def __call__(self, height, width, batch_size, dtype=None):
if dtype is None:
self.dtype = tf.keras.backend.floatx()
else:
self.dtype = dtype
grid_points = self._build_grid_points(width, height, self._anchors,
grid_points = self._build_grid_points(height, width, self._anchors,
self.dtype)
anchor_grid = self._build_anchor_grid(
height, width,
tf.cast(self._anchors, self.dtype) /
tf.cast(self._scale_anchors, self.dtype), self.dtype)
......
......@@ -36,6 +36,7 @@ from official.vision.beta.projects.yolo.dataloaders import yolo_input
from official.vision.beta.projects.yolo.modeling import factory
from official.vision.beta.projects.yolo.ops import mosaic
from official.vision.beta.projects.yolo.ops import preprocessing_ops
from official.vision.beta.projects.yolo.ops import kmeans_anchors
from official.vision.beta.projects.yolo.tasks import task_utils
OptimizationConfig = optimization.OptimizationConfig
......@@ -61,8 +62,53 @@ class YoloTask(base_task.Task):
# globally set the random seed
preprocessing_ops.set_random_seeds(seed=params.seed)
if self.task_config.model.anchor_boxes.generate_anchors:
self.generate_anchors()
return
def generate_anchors(self,
input_context = None):
"""Generate Anchor boxes for an arbitrary object detection dataset."""
input_size = self.task_config.model.input_size
anchor_cfg = self.task_config.model.anchor_boxes
backbone = self.task_config.model.backbone.get()
if anchor_cfg.use_validation_data:
dataset = self.task_config.validation_data
else:
dataset = self.task_config.train_data
decoder = self._get_data_decoder(dataset)
num_anchors = backbone.max_level - backbone.min_level + 1
num_anchors *= anchor_cfg.anchors_per_scale
gbs = dataset.global_batch_size
dataset.global_batch_size = 1
box_reader = kmeans_anchors.BoxGenInputReader(
dataset,
dataset_fn=tf.data.TFRecordDataset,
decoder_fn=decoder.decode)
boxes = box_reader.read(
k = num_anchors,
anchors_per_scale = anchor_cfg.anchors_per_scale,
image_resolution = input_size,
input_context = input_context,
scaling_mode = anchor_cfg.scaling_mode,
box_generation_mode = anchor_cfg.box_generation_mode,
)
dataset.global_batch_size = gbs
with open("anchors.txt", 'w') as f:
f.write(f"input resolution: {input_size} \n boxes: \n {boxes}")
logging.info("INFO: boxes will be saved to anchors.txt, mack sure to save"
"them and update the boxes feild in you yaml config file.")
anchor_cfg.set_boxes(boxes)
return boxes
def build_model(self):
"""Build an instance of Yolo."""
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment