Commit 61d5313f authored by xinghao's avatar xinghao
Browse files

Initial commit

parents
Pipeline #1620 failed with stages
in 0 seconds
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
# Code is copy-pasted exactly as in torch.utils.data.distributed.
# FIXME remove this once c10d fixes the bug it has
import math
import torch
import torch.distributed as dist
from torch.utils.data.sampler import Sampler
class DistributedSampler(Sampler):
"""Sampler that restricts data loading to a subset of the dataset.
It is especially useful in conjunction with
:class:`torch.nn.parallel.DistributedDataParallel`. In such case, each
process can pass a DistributedSampler instance as a DataLoader sampler,
and load a subset of the original dataset that is exclusive to it.
.. note::
Dataset is assumed to be of constant size.
Arguments:
dataset: Dataset used for sampling.
num_replicas (optional): Number of processes participating in
distributed training.
rank (optional): Rank of the current process within num_replicas.
"""
def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True):
if num_replicas is None:
if not dist.is_available():
raise RuntimeError("Requires distributed package to be available")
num_replicas = dist.get_world_size()
if rank is None:
if not dist.is_available():
raise RuntimeError("Requires distributed package to be available")
rank = dist.get_rank()
self.dataset = dataset
self.num_replicas = num_replicas
self.rank = rank
self.epoch = 0
self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.num_replicas))
self.total_size = self.num_samples * self.num_replicas
self.shuffle = shuffle
def __iter__(self):
if self.shuffle:
# deterministically shuffle based on epoch
g = torch.Generator()
g.manual_seed(self.epoch)
indices = torch.randperm(len(self.dataset), generator=g).tolist()
else:
indices = torch.arange(len(self.dataset)).tolist()
# add extra samples to make it evenly divisible
indices += indices[: (self.total_size - len(indices))]
assert len(indices) == self.total_size
# subsample
offset = self.num_samples * self.rank
indices = indices[offset: offset + self.num_samples]
assert len(indices) == self.num_samples
return iter(indices)
def __len__(self):
return self.num_samples
def set_epoch(self, epoch):
self.epoch = epoch
from torch.utils.data.sampler import BatchSampler
class IterationBasedBatchSampler(BatchSampler):
"""
Wraps a BatchSampler, re-sampling from it until
a specified number of iterations have been sampled
"""
def __init__(self, batch_sampler, num_iterations, start_iter=0):
self.batch_sampler = batch_sampler
self.num_iterations = num_iterations
self.start_iter = start_iter
def __iter__(self):
iteration = self.start_iter
while iteration <= self.num_iterations:
# if the underlying sampler has a set_epoch method, like
# DistributedSampler, used for making each process see
# a different split of the dataset, then set it
if hasattr(self.batch_sampler.sampler, "set_epoch"):
self.batch_sampler.sampler.set_epoch(iteration)
for batch in self.batch_sampler:
iteration += 1
if iteration > self.num_iterations:
break
yield batch
def __len__(self):
return self.num_iterations
from ssd.modeling.anchors.prior_box import PriorBox
from .target_transform import SSDTargetTransform
from .transforms import *
def build_transforms(cfg, is_train=True):
if is_train:
transform = [
ConvertFromInts(),
PhotometricDistort(),
Expand(cfg.INPUT.PIXEL_MEAN),
RandomSampleCrop(),
RandomMirror(),
ToPercentCoords(),
Resize(cfg.INPUT.IMAGE_SIZE),
SubtractMeans(cfg.INPUT.PIXEL_MEAN),
ToTensor(),
]
else:
transform = [
Resize(cfg.INPUT.IMAGE_SIZE),
SubtractMeans(cfg.INPUT.PIXEL_MEAN),
ToTensor()
]
transform = Compose(transform)
return transform
def build_target_transform(cfg):
transform = SSDTargetTransform(PriorBox(cfg)(),
cfg.MODEL.CENTER_VARIANCE,
cfg.MODEL.SIZE_VARIANCE,
cfg.MODEL.THRESHOLD)
return transform
import numpy as np
import torch
from ssd.utils import box_utils
class SSDTargetTransform:
def __init__(self, center_form_priors, center_variance, size_variance, iou_threshold):
self.center_form_priors = center_form_priors
self.corner_form_priors = box_utils.center_form_to_corner_form(center_form_priors)
self.center_variance = center_variance
self.size_variance = size_variance
self.iou_threshold = iou_threshold
def __call__(self, gt_boxes, gt_labels):
if type(gt_boxes) is np.ndarray:
gt_boxes = torch.from_numpy(gt_boxes)
if type(gt_labels) is np.ndarray:
gt_labels = torch.from_numpy(gt_labels)
boxes, labels = box_utils.assign_priors(gt_boxes, gt_labels,
self.corner_form_priors, self.iou_threshold)
boxes = box_utils.corner_form_to_center_form(boxes)
locations = box_utils.convert_boxes_to_locations(boxes, self.center_form_priors, self.center_variance, self.size_variance)
return locations, labels
# from https://github.com/amdegroot/ssd.pytorch
import torch
from torchvision import transforms
import cv2
import numpy as np
import types
from numpy import random
def intersect(box_a, box_b):
max_xy = np.minimum(box_a[:, 2:], box_b[2:])
min_xy = np.maximum(box_a[:, :2], box_b[:2])
inter = np.clip((max_xy - min_xy), a_min=0, a_max=np.inf)
return inter[:, 0] * inter[:, 1]
def jaccard_numpy(box_a, box_b):
"""Compute the jaccard overlap of two sets of boxes. The jaccard overlap
is simply the intersection over union of two boxes.
E.g.:
A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
Args:
box_a: Multiple bounding boxes, Shape: [num_boxes,4]
box_b: Single bounding box, Shape: [4]
Return:
jaccard overlap: Shape: [box_a.shape[0], box_a.shape[1]]
"""
inter = intersect(box_a, box_b)
area_a = ((box_a[:, 2] - box_a[:, 0]) *
(box_a[:, 3] - box_a[:, 1])) # [A,B]
area_b = ((box_b[2] - box_b[0]) *
(box_b[3] - box_b[1])) # [A,B]
union = area_a + area_b - inter
return inter / union # [A,B]
def remove_empty_boxes(boxes, labels):
"""Removes bounding boxes of W or H equal to 0 and its labels
Args:
boxes (ndarray): NP Array with bounding boxes as lines
* BBOX[x1, y1, x2, y2]
labels (labels): Corresponding labels with boxes
Returns:
ndarray: Valid bounding boxes
ndarray: Corresponding labels
"""
del_boxes = []
for idx, box in enumerate(boxes):
if box[0] == box[2] or box[1] == box[3]:
del_boxes.append(idx)
return np.delete(boxes, del_boxes, 0), np.delete(labels, del_boxes)
class Compose(object):
"""Composes several augmentations together.
Args:
transforms (List[Transform]): list of transforms to compose.
Example:
>>> augmentations.Compose([
>>> transforms.CenterCrop(10),
>>> transforms.ToTensor(),
>>> ])
"""
def __init__(self, transforms):
self.transforms = transforms
def __call__(self, img, boxes=None, labels=None):
for t in self.transforms:
img, boxes, labels = t(img, boxes, labels)
if boxes is not None:
boxes, labels = remove_empty_boxes(boxes, labels)
return img, boxes, labels
class Lambda(object):
"""Applies a lambda as a transform."""
def __init__(self, lambd):
assert isinstance(lambd, types.LambdaType)
self.lambd = lambd
def __call__(self, img, boxes=None, labels=None):
return self.lambd(img, boxes, labels)
class ConvertFromInts(object):
def __call__(self, image, boxes=None, labels=None):
return image.astype(np.float32), boxes, labels
class SubtractMeans(object):
def __init__(self, mean):
self.mean = np.array(mean, dtype=np.float32)
def __call__(self, image, boxes=None, labels=None):
image = image.astype(np.float32)
image -= self.mean
return image.astype(np.float32), boxes, labels
class ToAbsoluteCoords(object):
def __call__(self, image, boxes=None, labels=None):
height, width, channels = image.shape
boxes[:, 0] *= width
boxes[:, 2] *= width
boxes[:, 1] *= height
boxes[:, 3] *= height
return image, boxes, labels
class ToPercentCoords(object):
def __call__(self, image, boxes=None, labels=None):
height, width, channels = image.shape
boxes[:, 0] /= width
boxes[:, 2] /= width
boxes[:, 1] /= height
boxes[:, 3] /= height
return image, boxes, labels
class Resize(object):
def __init__(self, size=300):
self.size = size
def __call__(self, image, boxes=None, labels=None):
image = cv2.resize(image, (self.size,
self.size))
return image, boxes, labels
class RandomSaturation(object):
def __init__(self, lower=0.5, upper=1.5):
self.lower = lower
self.upper = upper
assert self.upper >= self.lower, "contrast upper must be >= lower."
assert self.lower >= 0, "contrast lower must be non-negative."
def __call__(self, image, boxes=None, labels=None):
if random.randint(2):
image[:, :, 1] *= random.uniform(self.lower, self.upper)
return image, boxes, labels
class RandomHue(object):
def __init__(self, delta=18.0):
assert delta >= 0.0 and delta <= 360.0
self.delta = delta
def __call__(self, image, boxes=None, labels=None):
if random.randint(2):
image[:, :, 0] += random.uniform(-self.delta, self.delta)
image[:, :, 0][image[:, :, 0] > 360.0] -= 360.0
image[:, :, 0][image[:, :, 0] < 0.0] += 360.0
return image, boxes, labels
class RandomLightingNoise(object):
def __init__(self):
self.perms = ((0, 1, 2), (0, 2, 1),
(1, 0, 2), (1, 2, 0),
(2, 0, 1), (2, 1, 0))
def __call__(self, image, boxes=None, labels=None):
if random.randint(2):
swap = self.perms[random.randint(len(self.perms))]
shuffle = SwapChannels(swap) # shuffle channels
image = shuffle(image)
return image, boxes, labels
class ConvertColor(object):
def __init__(self, current, transform):
self.transform = transform
self.current = current
def __call__(self, image, boxes=None, labels=None):
if self.current == 'BGR' and self.transform == 'HSV':
image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
elif self.current == 'RGB' and self.transform == 'HSV':
image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
elif self.current == 'BGR' and self.transform == 'RGB':
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
elif self.current == 'HSV' and self.transform == 'BGR':
image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
elif self.current == 'HSV' and self.transform == "RGB":
image = cv2.cvtColor(image, cv2.COLOR_HSV2RGB)
else:
raise NotImplementedError
return image, boxes, labels
class RandomContrast(object):
def __init__(self, lower=0.5, upper=1.5):
self.lower = lower
self.upper = upper
assert self.upper >= self.lower, "contrast upper must be >= lower."
assert self.lower >= 0, "contrast lower must be non-negative."
# expects float image
def __call__(self, image, boxes=None, labels=None):
if random.randint(2):
alpha = random.uniform(self.lower, self.upper)
image *= alpha
return image, boxes, labels
class RandomBrightness(object):
def __init__(self, delta=32):
assert delta >= 0.0
assert delta <= 255.0
self.delta = delta
def __call__(self, image, boxes=None, labels=None):
if random.randint(2):
delta = random.uniform(-self.delta, self.delta)
image += delta
return image, boxes, labels
class ToCV2Image(object):
def __call__(self, tensor, boxes=None, labels=None):
return tensor.cpu().numpy().astype(np.float32).transpose((1, 2, 0)), boxes, labels
class ToTensor(object):
def __call__(self, cvimage, boxes=None, labels=None):
return torch.from_numpy(cvimage.astype(np.float32)).permute(2, 0, 1), boxes, labels
class RandomSampleCrop(object):
"""Crop
Arguments:
img (Image): the image being input during training
boxes (Tensor): the original bounding boxes in pt form
labels (Tensor): the class labels for each bbox
mode (float tuple): the min and max jaccard overlaps
Return:
(img, boxes, classes)
img (Image): the cropped image
boxes (Tensor): the adjusted bounding boxes in pt form
labels (Tensor): the class labels for each bbox
"""
def __init__(self):
self.sample_options = (
# using entire original input image
None,
# sample a patch s.t. MIN jaccard w/ obj in .1,.3,.4,.7,.9
(0.1, None),
(0.3, None),
(0.7, None),
(0.9, None),
# randomly sample a patch
(None, None),
)
def __call__(self, image, boxes=None, labels=None):
# guard against no boxes
if boxes is not None and boxes.shape[0] == 0:
return image, boxes, labels
height, width, _ = image.shape
while True:
# randomly choose a mode
mode = self.sample_options[random.randint(0, len(self.sample_options))]
if mode is None:
return image, boxes, labels
min_iou, max_iou = mode
if min_iou is None:
min_iou = float('-inf')
if max_iou is None:
max_iou = float('inf')
# max trails (50)
for _ in range(50):
current_image = image
w = random.uniform(0.3 * width, width)
h = random.uniform(0.3 * height, height)
# aspect ratio constraint b/t .5 & 2
if h / w < 0.5 or h / w > 2:
continue
left = random.uniform(width - w)
top = random.uniform(height - h)
# convert to integer rect x1,y1,x2,y2
rect = np.array([int(left), int(top), int(left + w), int(top + h)])
# calculate IoU (jaccard overlap) b/t the cropped and gt boxes
overlap = jaccard_numpy(boxes, rect)
# is min and max overlap constraint satisfied? if not try again
if overlap.max() < min_iou or overlap.min() > max_iou:
continue
# cut the crop from the image
current_image = current_image[rect[1]:rect[3], rect[0]:rect[2],
:]
# keep overlap with gt box IF center in sampled patch
centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0
# mask in all gt boxes that above and to the left of centers
m1 = (rect[0] < centers[:, 0]) * (rect[1] < centers[:, 1])
# mask in all gt boxes that under and to the right of centers
m2 = (rect[2] > centers[:, 0]) * (rect[3] > centers[:, 1])
# mask in that both m1 and m2 are true
mask = m1 * m2
# have any valid boxes? try again if not
if not mask.any():
continue
# take only matching gt boxes
current_boxes = boxes[mask, :].copy()
# take only matching gt labels
current_labels = labels[mask]
# should we use the box left and top corner or the crop's
current_boxes[:, :2] = np.maximum(current_boxes[:, :2],
rect[:2])
# adjust to crop (by substracting crop's left,top)
current_boxes[:, :2] -= rect[:2]
current_boxes[:, 2:] = np.minimum(current_boxes[:, 2:],
rect[2:])
# adjust to crop (by substracting crop's left,top)
current_boxes[:, 2:] -= rect[:2]
return current_image, current_boxes, current_labels
class Expand(object):
def __init__(self, mean):
self.mean = mean
def __call__(self, image, boxes, labels):
if random.randint(2):
return image, boxes, labels
height, width, depth = image.shape
ratio = random.uniform(1, 4)
left = random.uniform(0, width * ratio - width)
top = random.uniform(0, height * ratio - height)
expand_image = np.zeros(
(int(height * ratio), int(width * ratio), depth),
dtype=image.dtype)
expand_image[:, :, :] = self.mean
expand_image[int(top):int(top + height),
int(left):int(left + width)] = image
image = expand_image
boxes = boxes.copy()
boxes[:, :2] += (int(left), int(top))
boxes[:, 2:] += (int(left), int(top))
return image, boxes, labels
class RandomMirror(object):
def __call__(self, image, boxes, classes):
_, width, _ = image.shape
if random.randint(2):
image = image[:, ::-1]
boxes = boxes.copy()
boxes[:, 0::2] = width - boxes[:, 2::-2]
return image, boxes, classes
class SwapChannels(object):
"""Transforms a tensorized image by swapping the channels in the order
specified in the swap tuple.
Args:
swaps (int triple): final order of channels
eg: (2, 1, 0)
"""
def __init__(self, swaps):
self.swaps = swaps
def __call__(self, image):
"""
Args:
image (Tensor): image tensor to be transformed
Return:
a tensor with channels swapped according to swap
"""
# if torch.is_tensor(image):
# image = image.data.cpu().numpy()
# else:
# image = np.array(image)
image = image[:, :, self.swaps]
return image
class PhotometricDistort(object):
def __init__(self):
self.pd = [
RandomContrast(), # RGB
ConvertColor(current="RGB", transform='HSV'), # HSV
RandomSaturation(), # HSV
RandomHue(), # HSV
ConvertColor(current='HSV', transform='RGB'), # RGB
RandomContrast() # RGB
]
self.rand_brightness = RandomBrightness()
self.rand_light_noise = RandomLightingNoise()
def __call__(self, image, boxes, labels):
im = image.copy()
im, boxes, labels = self.rand_brightness(im, boxes, labels)
if random.randint(2):
distort = Compose(self.pd[:-1])
else:
distort = Compose(self.pd[1:])
im, boxes, labels = distort(im, boxes, labels)
return self.rand_light_noise(im, boxes, labels)
import logging
import os
import torch
import torch.utils.data
from tqdm import tqdm
from ssd.data.build import make_data_loader
from ssd.data.datasets.evaluation import evaluate
from ssd.utils import dist_util, mkdir
from ssd.utils.dist_util import synchronize, is_main_process
def _accumulate_predictions_from_multiple_gpus(predictions_per_gpu):
all_predictions = dist_util.all_gather(predictions_per_gpu)
if not dist_util.is_main_process():
return
# merge the list of dicts
predictions = {}
for p in all_predictions:
predictions.update(p)
# convert a dict where the key is the index in a list
image_ids = list(sorted(predictions.keys()))
if len(image_ids) != image_ids[-1] + 1:
logger = logging.getLogger("SSD.inference")
logger.warning(
"Number of images that were gathered from multiple processes is not "
"a contiguous set. Some images might be missing from the evaluation"
)
# convert to a list
predictions = [predictions[i] for i in image_ids]
return predictions
def compute_on_dataset(model, data_loader, device):
results_dict = {}
for batch in tqdm(data_loader):
images, targets, image_ids = batch
cpu_device = torch.device("cpu")
with torch.no_grad():
outputs = model(images.to(device))
outputs = [o.to(cpu_device) for o in outputs]
results_dict.update(
{int(img_id): result for img_id, result in zip(image_ids, outputs)}
)
return results_dict
def inference(model, data_loader, dataset_name, device, output_folder=None, use_cached=False, **kwargs):
dataset = data_loader.dataset
logger = logging.getLogger("SSD.inference")
logger.info("Evaluating {} dataset({} images):".format(dataset_name, len(dataset)))
predictions_path = os.path.join(output_folder, 'predictions.pth')
if use_cached and os.path.exists(predictions_path):
predictions = torch.load(predictions_path, map_location='cpu')
else:
predictions = compute_on_dataset(model, data_loader, device)
synchronize()
predictions = _accumulate_predictions_from_multiple_gpus(predictions)
if not is_main_process():
return
if output_folder:
torch.save(predictions, predictions_path)
return evaluate(dataset=dataset, predictions=predictions, output_dir=output_folder, **kwargs)
@torch.no_grad()
def do_evaluation(cfg, model, distributed, **kwargs):
if isinstance(model, torch.nn.parallel.DistributedDataParallel):
model = model.module
model.eval()
device = torch.device(cfg.MODEL.DEVICE)
data_loaders_val = make_data_loader(cfg, is_train=False, distributed=distributed)
eval_results = []
for dataset_name, data_loader in zip(cfg.DATASETS.TEST, data_loaders_val):
output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name)
if not os.path.exists(output_folder):
mkdir(output_folder)
eval_result = inference(model, data_loader, dataset_name, device, output_folder, **kwargs)
eval_results.append(eval_result)
return eval_results
import collections
import datetime
import logging
import os
import time
import torch
import torch.distributed as dist
from ssd.engine.inference import do_evaluation
from ssd.utils import dist_util
from ssd.utils.metric_logger import MetricLogger
def write_metric(eval_result, prefix, summary_writer, global_step):
for key in eval_result:
value = eval_result[key]
tag = '{}/{}'.format(prefix, key)
if isinstance(value, collections.abc.Mapping):
write_metric(value, tag, summary_writer, global_step)
else:
summary_writer.add_scalar(tag, value, global_step=global_step)
def reduce_loss_dict(loss_dict):
"""
Reduce the loss dictionary from all processes so that process with rank
0 has the averaged results. Returns a dict with the same fields as
loss_dict, after reduction.
"""
world_size = dist_util.get_world_size()
if world_size < 2:
return loss_dict
with torch.no_grad():
loss_names = []
all_losses = []
for k in sorted(loss_dict.keys()):
loss_names.append(k)
all_losses.append(loss_dict[k])
all_losses = torch.stack(all_losses, dim=0)
dist.reduce(all_losses, dst=0)
if dist.get_rank() == 0:
# only main process gets accumulated, so only divide by
# world_size in this case
all_losses /= world_size
reduced_losses = {k: v for k, v in zip(loss_names, all_losses)}
return reduced_losses
def do_train(cfg, model,
data_loader,
optimizer,
scheduler,
checkpointer,
device,
arguments,
args):
logger = logging.getLogger("SSD.trainer")
logger.info("Start training ...")
meters = MetricLogger()
model.train()
save_to_disk = dist_util.get_rank() == 0
if args.use_tensorboard and save_to_disk:
try:
from torch.utils.tensorboard import SummaryWriter
except ImportError:
from tensorboardX import SummaryWriter
summary_writer = SummaryWriter(log_dir=os.path.join(cfg.OUTPUT_DIR, 'tf_logs'))
else:
summary_writer = None
max_iter = len(data_loader)
start_iter = arguments["iteration"]
start_training_time = time.time()
end = time.time()
for iteration, (images, targets, _) in enumerate(data_loader, start_iter):
iteration = iteration + 1
arguments["iteration"] = iteration
images = images.to(device)
targets = targets.to(device)
loss_dict = model(images, targets=targets)
loss = sum(loss for loss in loss_dict.values())
# reduce losses over all GPUs for logging purposes
loss_dict_reduced = reduce_loss_dict(loss_dict)
losses_reduced = sum(loss for loss in loss_dict_reduced.values())
meters.update(total_loss=losses_reduced, **loss_dict_reduced)
optimizer.zero_grad()
loss.backward()
optimizer.step()
scheduler.step()
batch_time = time.time() - end
end = time.time()
meters.update(time=batch_time)
if iteration % args.log_step == 0:
eta_seconds = meters.time.global_avg * (max_iter - iteration)
eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
if device == "cuda":
logger.info(
meters.delimiter.join([
"iter: {iter:06d}",
"lr: {lr:.5f}",
'{meters}',
"eta: {eta}",
'mem: {mem}M',
]).format(
iter=iteration,
lr=optimizer.param_groups[0]['lr'],
meters=str(meters),
eta=eta_string,
mem=round(torch.cuda.max_memory_allocated() / 1024.0 / 1024.0),
)
)
else:
logger.info(
meters.delimiter.join([
"iter: {iter:06d}",
"lr: {lr:.5f}",
'{meters}',
"eta: {eta}",
]).format(
iter=iteration,
lr=optimizer.param_groups[0]['lr'],
meters=str(meters),
eta=eta_string,
)
)
if summary_writer:
global_step = iteration
summary_writer.add_scalar('losses/total_loss', losses_reduced, global_step=global_step)
for loss_name, loss_item in loss_dict_reduced.items():
summary_writer.add_scalar('losses/{}'.format(loss_name), loss_item, global_step=global_step)
summary_writer.add_scalar('lr', optimizer.param_groups[0]['lr'], global_step=global_step)
if iteration % args.save_step == 0:
checkpointer.save("model_{:06d}".format(iteration), **arguments)
if args.eval_step > 0 and iteration % args.eval_step == 0 and not iteration == max_iter:
eval_results = do_evaluation(cfg, model, distributed=args.distributed, iteration=iteration)
if dist_util.get_rank() == 0 and summary_writer:
for eval_result, dataset in zip(eval_results, cfg.DATASETS.TEST):
write_metric(eval_result['metrics'], 'metrics/' + dataset, summary_writer, iteration)
model.train() # *IMPORTANT*: change to train mode after eval.
checkpointer.save("model_final", **arguments)
# compute training time
total_training_time = int(time.time() - start_training_time)
total_time_str = str(datetime.timedelta(seconds=total_training_time))
logger.info("Total training time: {} ({:.4f} s / it)".format(total_time_str, total_training_time / max_iter))
return model
import torch
import torch.nn as nn
import torch.nn.init as init
from .separable_conv import SeparableConv2d
__all__ = ['L2Norm', 'SeparableConv2d']
class L2Norm(nn.Module):
def __init__(self, n_channels, scale):
super(L2Norm, self).__init__()
self.n_channels = n_channels
self.gamma = scale or None
self.eps = 1e-10
self.weight = nn.Parameter(torch.Tensor(self.n_channels))
self.reset_parameters()
def reset_parameters(self):
init.constant_(self.weight, self.gamma)
def forward(self, x):
norm = x.pow(2).sum(dim=1, keepdim=True).sqrt() + self.eps
x = torch.div(x, norm)
out = self.weight.unsqueeze(0).unsqueeze(2).unsqueeze(3).expand_as(x) * x
return out
from torch import nn
class SeparableConv2d(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size=1, stride=1, padding=0, onnx_compatible=False):
super().__init__()
ReLU = nn.ReLU if onnx_compatible else nn.ReLU6
self.conv = nn.Sequential(
nn.Conv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=kernel_size,
groups=in_channels, stride=stride, padding=padding),
nn.BatchNorm2d(in_channels),
ReLU(),
nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1),
)
def forward(self, x):
return self.conv(x)
from itertools import product
import torch
from math import sqrt
class PriorBox:
def __init__(self, cfg):
self.image_size = cfg.INPUT.IMAGE_SIZE
prior_config = cfg.MODEL.PRIORS
self.feature_maps = prior_config.FEATURE_MAPS
self.min_sizes = prior_config.MIN_SIZES
self.max_sizes = prior_config.MAX_SIZES
self.strides = prior_config.STRIDES
self.aspect_ratios = prior_config.ASPECT_RATIOS
self.clip = prior_config.CLIP
def __call__(self):
"""Generate SSD Prior Boxes.
It returns the center, height and width of the priors. The values are relative to the image size
Returns:
priors (num_priors, 4): The prior boxes represented as [[center_x, center_y, w, h]]. All the values
are relative to the image size.
"""
priors = []
for k, f in enumerate(self.feature_maps):
scale = self.image_size / self.strides[k]
for i, j in product(range(f), repeat=2):
# unit center x,y
cx = (j + 0.5) / scale
cy = (i + 0.5) / scale
# small sized square box
size = self.min_sizes[k]
h = w = size / self.image_size
priors.append([cx, cy, w, h])
# big sized square box
size = sqrt(self.min_sizes[k] * self.max_sizes[k])
h = w = size / self.image_size
priors.append([cx, cy, w, h])
# change h/w ratio of the small sized box
size = self.min_sizes[k]
h = w = size / self.image_size
for ratio in self.aspect_ratios[k]:
ratio = sqrt(ratio)
priors.append([cx, cy, w * ratio, h / ratio])
priors.append([cx, cy, w / ratio, h * ratio])
priors = torch.tensor(priors)
if self.clip:
priors.clamp_(max=1, min=0)
return priors
from ssd.modeling import registry
from .vgg import VGG
from .mobilenet import MobileNetV2
from .efficient_net import EfficientNet
from .mobilenetv3 import MobileNetV3
__all__ = ['build_backbone', 'VGG', 'MobileNetV2', 'EfficientNet', 'MobileNetV3']
def build_backbone(cfg):
return registry.BACKBONES[cfg.MODEL.BACKBONE.NAME](cfg, cfg.MODEL.BACKBONE.PRETRAINED)
from ssd.modeling import registry
from .efficient_net import EfficientNet
__all__ = ['efficient_net_b3', 'EfficientNet']
@registry.BACKBONES.register('efficient_net-b3')
def efficient_net_b3(cfg, pretrained=True):
if pretrained:
model = EfficientNet.from_pretrained('efficientnet-b3')
else:
model = EfficientNet.from_name('efficientnet-b3')
return model
import torch
from torch import nn
from torch.nn import functional as F
from .utils import (
relu_fn,
round_filters,
round_repeats,
drop_connect,
Conv2dSamePadding,
get_model_params,
efficientnet_params,
load_pretrained_weights,
)
INDICES = {
'efficientnet-b3': [7, 17, 25]
}
EXTRAS = {
'efficientnet-b3': [
# in, out, k, s, p
[(384, 128, 1, 1, 0), (128, 256, 3, 2, 1)], # 5 x 5
[(256, 128, 1, 1, 0), (128, 256, 3, 1, 0)], # 3 x 3
[(256, 128, 1, 1, 0), (128, 256, 3, 1, 0)], # 1 x 1
]
}
def add_extras(cfgs):
extras = nn.ModuleList()
for cfg in cfgs:
extra = []
for params in cfg:
in_channels, out_channels, kernel_size, stride, padding = params
extra.append(nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding))
extra.append(nn.ReLU())
extras.append(nn.Sequential(*extra))
return extras
class MBConvBlock(nn.Module):
"""
Mobile Inverted Residual Bottleneck Block
Args:
block_args (namedtuple): BlockArgs, see above
global_params (namedtuple): GlobalParam, see above
Attributes:
has_se (bool): Whether the block contains a Squeeze and Excitation layer.
"""
def __init__(self, block_args, global_params):
super().__init__()
self._block_args = block_args
self._bn_mom = 1 - global_params.batch_norm_momentum
self._bn_eps = global_params.batch_norm_epsilon
self.has_se = (self._block_args.se_ratio is not None) and (0 < self._block_args.se_ratio <= 1)
self.id_skip = block_args.id_skip # skip connection and drop connect
# Expansion phase
inp = self._block_args.input_filters # number of input channels
oup = self._block_args.input_filters * self._block_args.expand_ratio # number of output channels
if self._block_args.expand_ratio != 1:
self._expand_conv = Conv2dSamePadding(in_channels=inp, out_channels=oup, kernel_size=1, bias=False)
self._bn0 = nn.BatchNorm2d(num_features=oup, momentum=self._bn_mom, eps=self._bn_eps)
# Depthwise convolution phase
k = self._block_args.kernel_size
s = self._block_args.stride
self._depthwise_conv = Conv2dSamePadding(
in_channels=oup, out_channels=oup, groups=oup, # groups makes it depthwise
kernel_size=k, stride=s, bias=False)
self._bn1 = nn.BatchNorm2d(num_features=oup, momentum=self._bn_mom, eps=self._bn_eps)
# Squeeze and Excitation layer, if desired
if self.has_se:
num_squeezed_channels = max(1, int(self._block_args.input_filters * self._block_args.se_ratio))
self._se_reduce = Conv2dSamePadding(in_channels=oup, out_channels=num_squeezed_channels, kernel_size=1)
self._se_expand = Conv2dSamePadding(in_channels=num_squeezed_channels, out_channels=oup, kernel_size=1)
# Output phase
final_oup = self._block_args.output_filters
self._project_conv = Conv2dSamePadding(in_channels=oup, out_channels=final_oup, kernel_size=1, bias=False)
self._bn2 = nn.BatchNorm2d(num_features=final_oup, momentum=self._bn_mom, eps=self._bn_eps)
def forward(self, inputs, drop_connect_rate=None):
"""
:param inputs: input tensor
:param drop_connect_rate: drop connect rate (float, between 0 and 1)
:return: output of block
"""
# Expansion and Depthwise Convolution
x = inputs
if self._block_args.expand_ratio != 1:
x = relu_fn(self._bn0(self._expand_conv(inputs)))
x = relu_fn(self._bn1(self._depthwise_conv(x)))
# Squeeze and Excitation
if self.has_se:
x_squeezed = F.adaptive_avg_pool2d(x, 1)
x_squeezed = self._se_expand(relu_fn(self._se_reduce(x_squeezed)))
x = torch.sigmoid(x_squeezed) * x
x = self._bn2(self._project_conv(x))
# Skip connection and drop connect
input_filters, output_filters = self._block_args.input_filters, self._block_args.output_filters
if self.id_skip and self._block_args.stride == 1 and input_filters == output_filters:
if drop_connect_rate:
x = drop_connect(x, p=drop_connect_rate, training=self.training)
x = x + inputs # skip connection
return x
class EfficientNet(nn.Module):
"""
An EfficientNet model. Most easily loaded with the .from_name or .from_pretrained methods
Args:
blocks_args (list): A list of BlockArgs to construct blocks
global_params (namedtuple): A set of GlobalParams shared between blocks
Example:
model = EfficientNet.from_pretrained('efficientnet-b0')
"""
def __init__(self, model_name, blocks_args=None, global_params=None):
super().__init__()
self.indices = INDICES[model_name]
self.extras = add_extras(EXTRAS[model_name])
assert isinstance(blocks_args, list), 'blocks_args should be a list'
assert len(blocks_args) > 0, 'block args must be greater than 0'
self._global_params = global_params
self._blocks_args = blocks_args
# Batch norm parameters
bn_mom = 1 - self._global_params.batch_norm_momentum
bn_eps = self._global_params.batch_norm_epsilon
# Stem
in_channels = 3 # rgb
out_channels = round_filters(32, self._global_params) # number of output channels
self._conv_stem = Conv2dSamePadding(in_channels, out_channels, kernel_size=3, stride=2, bias=False)
self._bn0 = nn.BatchNorm2d(num_features=out_channels, momentum=bn_mom, eps=bn_eps)
# Build blocks
self._blocks = nn.ModuleList([])
for block_args in self._blocks_args:
# Update block input and output filters based on depth multiplier.
block_args = block_args._replace(
input_filters=round_filters(block_args.input_filters, self._global_params),
output_filters=round_filters(block_args.output_filters, self._global_params),
num_repeat=round_repeats(block_args.num_repeat, self._global_params)
)
# The first block needs to take care of stride and filter size increase.
self._blocks.append(MBConvBlock(block_args, self._global_params))
if block_args.num_repeat > 1:
block_args = block_args._replace(input_filters=block_args.output_filters, stride=1)
for _ in range(block_args.num_repeat - 1):
self._blocks.append(MBConvBlock(block_args, self._global_params))
self.reset_parameters()
def reset_parameters(self):
for m in self.extras.modules():
if isinstance(m, nn.Conv2d):
nn.init.xavier_uniform_(m.weight)
nn.init.zeros_(m.bias)
def extract_features(self, inputs):
""" Returns output of the final convolution layer """
# Stem
x = relu_fn(self._bn0(self._conv_stem(inputs)))
features = []
# Blocks
for idx, block in enumerate(self._blocks):
drop_connect_rate = self._global_params.drop_connect_rate
if drop_connect_rate:
drop_connect_rate *= float(idx) / len(self._blocks)
x = block(x, drop_connect_rate)
if idx in self.indices:
features.append(x)
return x, features
def forward(self, inputs):
""" Calls extract_features to extract features, applies final linear layer, and returns logits. """
# Convolution layers
x, features = self.extract_features(inputs)
for layer in self.extras:
x = layer(x)
features.append(x)
return tuple(features)
@classmethod
def from_name(cls, model_name, override_params=None):
cls._check_model_name_is_valid(model_name)
blocks_args, global_params = get_model_params(model_name, override_params)
return EfficientNet(model_name, blocks_args, global_params)
@classmethod
def from_pretrained(cls, model_name):
model = EfficientNet.from_name(model_name)
load_pretrained_weights(model, model_name)
return model
@classmethod
def get_image_size(cls, model_name):
cls._check_model_name_is_valid(model_name)
_, _, res, _ = efficientnet_params(model_name)
return res
@classmethod
def _check_model_name_is_valid(cls, model_name, also_need_pretrained_weights=False):
""" Validates model name. None that pretrained weights are only available for
the first four models (efficientnet-b{i} for i in 0,1,2,3) at the moment. """
num_models = 4 if also_need_pretrained_weights else 8
valid_models = ['efficientnet_b' + str(i) for i in range(num_models)]
if model_name.replace('-', '_') not in valid_models:
raise ValueError('model_name should be one of: ' + ', '.join(valid_models))
"""
This file contains helper functions for building the model and for loading model parameters.
These helper functions are built to mirror those in the official TensorFlow implementation.
"""
import re
import math
import collections
import torch
from torch import nn
from torch.nn import functional as F
from ssd.utils.model_zoo import load_state_dict_from_url
########################################################################
############### HELPERS FUNCTIONS FOR MODEL ARCHITECTURE ###############
########################################################################
# Parameters for the entire model (stem, all blocks, and head)
GlobalParams = collections.namedtuple('GlobalParams', [
'batch_norm_momentum', 'batch_norm_epsilon', 'dropout_rate',
'num_classes', 'width_coefficient', 'depth_coefficient',
'depth_divisor', 'min_depth', 'drop_connect_rate', ])
# Parameters for an individual model block
BlockArgs = collections.namedtuple('BlockArgs', [
'kernel_size', 'num_repeat', 'input_filters', 'output_filters',
'expand_ratio', 'id_skip', 'stride', 'se_ratio'])
# Change namedtuple defaults
GlobalParams.__new__.__defaults__ = (None,) * len(GlobalParams._fields)
BlockArgs.__new__.__defaults__ = (None,) * len(BlockArgs._fields)
def relu_fn(x):
""" Swish activation function """
return x * torch.sigmoid(x)
def round_filters(filters, global_params):
""" Calculate and round number of filters based on depth multiplier. """
multiplier = global_params.width_coefficient
if not multiplier:
return filters
divisor = global_params.depth_divisor
min_depth = global_params.min_depth
filters *= multiplier
min_depth = min_depth or divisor
new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor)
if new_filters < 0.9 * filters: # prevent rounding by more than 10%
new_filters += divisor
return int(new_filters)
def round_repeats(repeats, global_params):
""" Round number of filters based on depth multiplier. """
multiplier = global_params.depth_coefficient
if not multiplier:
return repeats
return int(math.ceil(multiplier * repeats))
def drop_connect(inputs, p, training):
""" Drop connect. """
if not training: return inputs
batch_size = inputs.shape[0]
keep_prob = 1 - p
random_tensor = keep_prob
random_tensor += torch.rand([batch_size, 1, 1, 1], dtype=inputs.dtype, device=inputs.device)
binary_tensor = torch.floor(random_tensor)
output = inputs / keep_prob * binary_tensor
return output
class Conv2dSamePadding(nn.Conv2d):
""" 2D Convolutions like TensorFlow """
def __init__(self, in_channels, out_channels, kernel_size, stride=1, dilation=1, groups=1, bias=True):
super().__init__(in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias)
self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2
def forward(self, x):
ih, iw = x.size()[-2:]
kh, kw = self.weight.size()[-2:]
sh, sw = self.stride
oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)
pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
if pad_h > 0 or pad_w > 0:
x = F.pad(x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2])
return F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
########################################################################
############## HELPERS FUNCTIONS FOR LOADING MODEL PARAMS ##############
########################################################################
def efficientnet_params(model_name):
""" Map EfficientNet model name to parameter coefficients. """
params_dict = {
# Coefficients: width,depth,res,dropout
'efficientnet-b0': (1.0, 1.0, 224, 0.2),
'efficientnet-b1': (1.0, 1.1, 240, 0.2),
'efficientnet-b2': (1.1, 1.2, 260, 0.3),
'efficientnet-b3': (1.2, 1.4, 300, 0.3),
'efficientnet-b4': (1.4, 1.8, 380, 0.4),
'efficientnet-b5': (1.6, 2.2, 456, 0.4),
'efficientnet-b6': (1.8, 2.6, 528, 0.5),
'efficientnet-b7': (2.0, 3.1, 600, 0.5),
}
return params_dict[model_name]
class BlockDecoder(object):
""" Block Decoder for readability, straight from the official TensorFlow repository """
@staticmethod
def _decode_block_string(block_string):
""" Gets a block through a string notation of arguments. """
assert isinstance(block_string, str)
ops = block_string.split('_')
options = {}
for op in ops:
splits = re.split(r'(\d.*)', op)
if len(splits) >= 2:
key, value = splits[:2]
options[key] = value
# Check stride
assert (('s' in options and len(options['s']) == 1) or
(len(options['s']) == 2 and options['s'][0] == options['s'][1]))
return BlockArgs(
kernel_size=int(options['k']),
num_repeat=int(options['r']),
input_filters=int(options['i']),
output_filters=int(options['o']),
expand_ratio=int(options['e']),
id_skip=('noskip' not in block_string),
se_ratio=float(options['se']) if 'se' in options else None,
stride=[int(options['s'][0])])
@staticmethod
def _encode_block_string(block):
"""Encodes a block to a string."""
args = [
'r%d' % block.num_repeat,
'k%d' % block.kernel_size,
's%d%d' % (block.strides[0], block.strides[1]),
'e%s' % block.expand_ratio,
'i%d' % block.input_filters,
'o%d' % block.output_filters
]
if 0 < block.se_ratio <= 1:
args.append('se%s' % block.se_ratio)
if block.id_skip is False:
args.append('noskip')
return '_'.join(args)
@staticmethod
def decode(string_list):
"""
Decodes a list of string notations to specify blocks inside the network.
:param string_list: a list of strings, each string is a notation of block
:return: a list of BlockArgs namedtuples of block args
"""
assert isinstance(string_list, list)
blocks_args = []
for block_string in string_list:
blocks_args.append(BlockDecoder._decode_block_string(block_string))
return blocks_args
@staticmethod
def encode(blocks_args):
"""
Encodes a list of BlockArgs to a list of strings.
:param blocks_args: a list of BlockArgs namedtuples of block args
:return: a list of strings, each string is a notation of block
"""
block_strings = []
for block in blocks_args:
block_strings.append(BlockDecoder._encode_block_string(block))
return block_strings
def efficientnet(width_coefficient=None, depth_coefficient=None,
dropout_rate=0.2, drop_connect_rate=0.2):
""" Creates a efficientnet model. """
blocks_args = [
'r1_k3_s11_e1_i32_o16_se0.25', 'r2_k3_s22_e6_i16_o24_se0.25',
'r2_k5_s22_e6_i24_o40_se0.25', 'r3_k3_s22_e6_i40_o80_se0.25',
'r3_k5_s11_e6_i80_o112_se0.25', 'r4_k5_s22_e6_i112_o192_se0.25',
'r1_k3_s11_e6_i192_o320_se0.25',
]
blocks_args = BlockDecoder.decode(blocks_args)
global_params = GlobalParams(
batch_norm_momentum=0.99,
batch_norm_epsilon=1e-3,
dropout_rate=dropout_rate,
drop_connect_rate=drop_connect_rate,
# data_format='channels_last', # removed, this is always true in PyTorch
num_classes=1000,
width_coefficient=width_coefficient,
depth_coefficient=depth_coefficient,
depth_divisor=8,
min_depth=None
)
return blocks_args, global_params
def get_model_params(model_name, override_params):
""" Get the block args and global params for a given model """
if model_name.startswith('efficientnet'):
w, d, _, p = efficientnet_params(model_name)
# note: all models have drop connect rate = 0.2
blocks_args, global_params = efficientnet(width_coefficient=w, depth_coefficient=d, dropout_rate=p)
else:
raise NotImplementedError('model name is not pre-defined: %s' % model_name)
if override_params:
# ValueError will be raised here if override_params has fields not included in global_params.
global_params = global_params._replace(**override_params)
return blocks_args, global_params
url_map = {
'efficientnet-b0': 'http://storage.googleapis.com/public-models/efficientnet-b0-08094119.pth',
'efficientnet-b1': 'http://storage.googleapis.com/public-models/efficientnet-b1-dbc7070a.pth',
'efficientnet-b2': 'http://storage.googleapis.com/public-models/efficientnet-b2-27687264.pth',
'efficientnet-b3': 'http://storage.googleapis.com/public-models/efficientnet-b3-c8376fa2.pth',
'efficientnet-b4': 'http://storage.googleapis.com/public-models/efficientnet-b4-e116e8b3.pth',
'efficientnet-b5': 'http://storage.googleapis.com/public-models/efficientnet-b5-586e6cc6.pth',
}
def load_pretrained_weights(model, model_name):
""" Loads pretrained weights, and downloads if loading for the first time. """
state_dict = load_state_dict_from_url(url_map[model_name])
model.load_state_dict(state_dict, strict=False)
print('Loaded pretrained weights for {}'.format(model_name))
from torch import nn
from ssd.modeling import registry
from ssd.utils.model_zoo import load_state_dict_from_url
model_urls = {
'mobilenet_v2': 'https://download.pytorch.org/models/mobilenet_v2-b0353104.pth',
}
class ConvBNReLU(nn.Sequential):
def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
padding = (kernel_size - 1) // 2
super(ConvBNReLU, self).__init__(
nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False),
nn.BatchNorm2d(out_planes),
nn.ReLU6(inplace=True)
)
class InvertedResidual(nn.Module):
def __init__(self, inp, oup, stride, expand_ratio):
super(InvertedResidual, self).__init__()
self.stride = stride
assert stride in [1, 2]
hidden_dim = int(round(inp * expand_ratio))
self.use_res_connect = self.stride == 1 and inp == oup
layers = []
if expand_ratio != 1:
# pw
layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1))
layers.extend([
# dw
ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim),
# pw-linear
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup),
])
self.conv = nn.Sequential(*layers)
def forward(self, x):
if self.use_res_connect:
return x + self.conv(x)
else:
return self.conv(x)
class MobileNetV2(nn.Module):
def __init__(self, width_mult=1.0, inverted_residual_setting=None):
super(MobileNetV2, self).__init__()
block = InvertedResidual
input_channel = 32
last_channel = 1280
if inverted_residual_setting is None:
inverted_residual_setting = [
# t, c, n, s
[1, 16, 1, 1],
[6, 24, 2, 2],
[6, 32, 3, 2],
[6, 64, 4, 2],
[6, 96, 3, 1],
[6, 160, 3, 2],
[6, 320, 1, 1],
]
# only check the first element, assuming user knows t,c,n,s are required
if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4:
raise ValueError("inverted_residual_setting should be non-empty "
"or a 4-element list, got {}".format(inverted_residual_setting))
# building first layer
input_channel = int(input_channel * width_mult)
self.last_channel = int(last_channel * max(1.0, width_mult))
features = [ConvBNReLU(3, input_channel, stride=2)]
# building inverted residual blocks
for t, c, n, s in inverted_residual_setting:
output_channel = int(c * width_mult)
for i in range(n):
stride = s if i == 0 else 1
features.append(block(input_channel, output_channel, stride, expand_ratio=t))
input_channel = output_channel
# building last several layers
features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1))
# make it nn.Sequential
self.features = nn.Sequential(*features)
self.extras = nn.ModuleList([
InvertedResidual(1280, 512, 2, 0.2),
InvertedResidual(512, 256, 2, 0.25),
InvertedResidual(256, 256, 2, 0.5),
InvertedResidual(256, 64, 2, 0.25)
])
self.reset_parameters()
def reset_parameters(self):
# weight initialization
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out')
if m.bias is not None:
nn.init.zeros_(m.bias)
elif isinstance(m, nn.BatchNorm2d):
nn.init.ones_(m.weight)
nn.init.zeros_(m.bias)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
nn.init.zeros_(m.bias)
def forward(self, x):
features = []
for i in range(14):
x = self.features[i](x)
features.append(x)
for i in range(14, len(self.features)):
x = self.features[i](x)
features.append(x)
for i in range(len(self.extras)):
x = self.extras[i](x)
features.append(x)
return tuple(features)
@registry.BACKBONES.register('mobilenet_v2')
def mobilenet_v2(cfg, pretrained=True):
model = MobileNetV2()
if pretrained:
model.load_state_dict(load_state_dict_from_url(model_urls['mobilenet_v2']), strict=False)
return model
"""
Creates a MobileNetV3 Model as defined in:
Andrew Howard, Mark Sandler, Grace Chu, Liang-Chieh Chen, Bo Chen, Mingxing Tan, Weijun Wang, Yukun Zhu, Ruoming Pang, Vijay Vasudevan, Quoc V. Le, Hartwig Adam. (2019).
Searching for MobileNetV3
arXiv preprint arXiv:1905.02244.
@ Credit from https://github.com/d-li14/mobilenetv3.pytorch
@ Modified by Chakkrit Termritthikun (https://github.com/chakkritte)
"""
import torch.nn as nn
import math
from ssd.modeling import registry
from ssd.utils.model_zoo import load_state_dict_from_url
model_urls = {
'mobilenet_v3': 'https://github.com/d-li14/mobilenetv3.pytorch/raw/master/pretrained/mobilenetv3-large-1cd25616.pth',
}
def _make_divisible(v, divisor, min_value=None):
"""
This function is taken from the original tf repo.
It ensures that all layers have a channel number that is divisible by 8
It can be seen here:
https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
:param v:
:param divisor:
:param min_value:
:return:
"""
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
# Make sure that round down does not go down by more than 10%.
if new_v < 0.9 * v:
new_v += divisor
return new_v
class h_sigmoid(nn.Module):
def __init__(self, inplace=True):
super(h_sigmoid, self).__init__()
self.relu = nn.ReLU6(inplace=inplace)
def forward(self, x):
return self.relu(x + 3) / 6
class h_swish(nn.Module):
def __init__(self, inplace=True):
super(h_swish, self).__init__()
self.sigmoid = h_sigmoid(inplace=inplace)
def forward(self, x):
return x * self.sigmoid(x)
class SELayer(nn.Module):
def __init__(self, channel, reduction=4):
super(SELayer, self).__init__()
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.fc = nn.Sequential(
nn.Linear(channel, _make_divisible(channel // reduction, 8)),
nn.ReLU(inplace=True),
nn.Linear(_make_divisible(channel // reduction, 8), channel),
h_sigmoid()
)
def forward(self, x):
b, c, _, _ = x.size()
y = self.avg_pool(x).view(b, c)
y = self.fc(y).view(b, c, 1, 1)
return x * y
def conv_3x3_bn(inp, oup, stride):
return nn.Sequential(
nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
nn.BatchNorm2d(oup),
h_swish()
)
def conv_1x1_bn(inp, oup):
return nn.Sequential(
nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup),
h_swish()
)
class InvertedResidual(nn.Module):
def __init__(self, inp, hidden_dim, oup, kernel_size, stride, use_se, use_hs):
super(InvertedResidual, self).__init__()
assert stride in [1, 2]
self.identity = stride == 1 and inp == oup
if inp == hidden_dim:
self.conv = nn.Sequential(
# dw
nn.Conv2d(hidden_dim, hidden_dim, kernel_size, stride, (kernel_size - 1) // 2, groups=hidden_dim, bias=False),
nn.BatchNorm2d(hidden_dim),
h_swish() if use_hs else nn.ReLU(inplace=True),
# Squeeze-and-Excite
SELayer(hidden_dim) if use_se else nn.Identity(),
# pw-linear
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup),
)
else:
self.conv = nn.Sequential(
# pw
nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False),
nn.BatchNorm2d(hidden_dim),
h_swish() if use_hs else nn.ReLU(inplace=True),
# dw
nn.Conv2d(hidden_dim, hidden_dim, kernel_size, stride, (kernel_size - 1) // 2, groups=hidden_dim, bias=False),
nn.BatchNorm2d(hidden_dim),
# Squeeze-and-Excite
SELayer(hidden_dim) if use_se else nn.Identity(),
h_swish() if use_hs else nn.ReLU(inplace=True),
# pw-linear
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup),
)
def forward(self, x):
if self.identity:
return x + self.conv(x)
else:
return self.conv(x)
class MobileNetV3(nn.Module):
def __init__(self, mode='large', num_classes=1000, width_mult=1.):
super(MobileNetV3, self).__init__()
# setting of inverted residual blocks
self.cfgs = [
# k, t, c, SE, HS, s
[3, 1, 16, 0, 0, 1],
[3, 4, 24, 0, 0, 2],
[3, 3, 24, 0, 0, 1],
[5, 3, 40, 1, 0, 2],
[5, 3, 40, 1, 0, 1],
[5, 3, 40, 1, 0, 1],
[3, 6, 80, 0, 1, 2],
[3, 2.5, 80, 0, 1, 1],
[3, 2.3, 80, 0, 1, 1],
[3, 2.3, 80, 0, 1, 1],
[3, 6, 112, 1, 1, 1],
[3, 6, 112, 1, 1, 1],
[5, 6, 160, 1, 1, 2],
[5, 6, 160, 1, 1, 1],
[5, 6, 160, 1, 1, 1]]
assert mode in ['large', 'small']
# building first layer
input_channel = _make_divisible(16 * width_mult, 8)
layers = [conv_3x3_bn(3, input_channel, 2)]
# building inverted residual blocks
block = InvertedResidual
for k, t, c, use_se, use_hs, s in self.cfgs:
output_channel = _make_divisible(c * width_mult, 8)
exp_size = _make_divisible(input_channel * t, 8)
layers.append(block(input_channel, exp_size, output_channel, k, s, use_se, use_hs))
input_channel = output_channel
# building last several layers
layers.append(conv_1x1_bn(input_channel, exp_size))
self.features = nn.Sequential(*layers)
self.extras = nn.ModuleList([
InvertedResidual(960, _make_divisible(960 * 0.2, 8), 512, 3, 2, True, True),
InvertedResidual(512, _make_divisible(512 * 0.25, 8), 256, 3, 2, True, True),
InvertedResidual(256, _make_divisible(256 * 0.5, 8), 256, 3, 2, True, True),
InvertedResidual(256, _make_divisible(256 * 0.25, 8), 64, 3, 2, True, True),
])
self.reset_parameters()
def forward(self, x):
features = []
for i in range(13):
x = self.features[i](x)
features.append(x)
for i in range(13, len(self.features)):
x = self.features[i](x)
features.append(x)
for i in range(len(self.extras)):
x = self.extras[i](x)
features.append(x)
return tuple(features)
def reset_parameters(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
if m.bias is not None:
m.bias.data.zero_()
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
elif isinstance(m, nn.Linear):
n = m.weight.size(1)
m.weight.data.normal_(0, 0.01)
m.bias.data.zero_()
@registry.BACKBONES.register('mobilenet_v3')
def mobilenet_v3(cfg, pretrained=True):
model = MobileNetV3()
if pretrained:
model.load_state_dict(load_state_dict_from_url(model_urls['mobilenet_v3']), strict=False)
return model
import torch.nn as nn
import torch.nn.functional as F
from ssd.layers import L2Norm
from ssd.modeling import registry
from ssd.utils.model_zoo import load_state_dict_from_url
model_urls = {
'vgg': 'https://s3.amazonaws.com/amdegroot-models/vgg16_reducedfc.pth',
}
# borrowed from https://github.com/amdegroot/ssd.pytorch/blob/master/ssd.py
def add_vgg(cfg, batch_norm=False):
layers = []
in_channels = 3
for v in cfg:
if v == 'M':
layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
elif v == 'C':
layers += [nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)]
else:
conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
if batch_norm:
layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
else:
layers += [conv2d, nn.ReLU(inplace=True)]
in_channels = v
pool5 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
conv6 = nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6)
conv7 = nn.Conv2d(1024, 1024, kernel_size=1)
layers += [pool5, conv6,
nn.ReLU(inplace=True), conv7, nn.ReLU(inplace=True)]
return layers
def add_extras(cfg, i, size=300):
# Extra layers added to VGG for feature scaling
layers = []
in_channels = i
flag = False
for k, v in enumerate(cfg):
if in_channels != 'S':
if v == 'S':
layers += [nn.Conv2d(in_channels, cfg[k + 1], kernel_size=(1, 3)[flag], stride=2, padding=1)]
else:
layers += [nn.Conv2d(in_channels, v, kernel_size=(1, 3)[flag])]
flag = not flag
in_channels = v
if size == 512:
layers.append(nn.Conv2d(in_channels, 128, kernel_size=1, stride=1))
layers.append(nn.Conv2d(128, 256, kernel_size=4, stride=1, padding=1))
return layers
vgg_base = {
'300': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M',
512, 512, 512],
'512': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M',
512, 512, 512],
}
extras_base = {
'300': [256, 'S', 512, 128, 'S', 256, 128, 256, 128, 256],
'512': [256, 'S', 512, 128, 'S', 256, 128, 'S', 256, 128, 'S', 256],
}
class VGG(nn.Module):
def __init__(self, cfg):
super().__init__()
size = cfg.INPUT.IMAGE_SIZE
vgg_config = vgg_base[str(size)]
extras_config = extras_base[str(size)]
self.vgg = nn.ModuleList(add_vgg(vgg_config))
self.extras = nn.ModuleList(add_extras(extras_config, i=1024, size=size))
self.l2_norm = L2Norm(512, scale=20)
self.reset_parameters()
def reset_parameters(self):
for m in self.extras.modules():
if isinstance(m, nn.Conv2d):
nn.init.xavier_uniform_(m.weight)
nn.init.zeros_(m.bias)
def init_from_pretrain(self, state_dict):
self.vgg.load_state_dict(state_dict)
def forward(self, x):
features = []
for i in range(23):
x = self.vgg[i](x)
s = self.l2_norm(x) # Conv4_3 L2 normalization
features.append(s)
# apply vgg up to fc7
for i in range(23, len(self.vgg)):
x = self.vgg[i](x)
features.append(x)
for k, v in enumerate(self.extras):
x = F.relu(v(x), inplace=True)
if k % 2 == 1:
features.append(x)
return tuple(features)
@registry.BACKBONES.register('vgg')
def vgg(cfg, pretrained=True):
model = VGG(cfg)
if pretrained:
model.init_from_pretrain(load_state_dict_from_url(model_urls['vgg']))
return model
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment