Commit 82295dbf authored by Yanghan Wang's avatar Yanghan Wang Committed by Facebook GitHub Bot
Browse files

enable black for mobile-vision

Summary:
https://fb.workplace.com/groups/pythonfoundation/posts/2990917737888352

Remove `mobile-vision` from opt-out list; leaving `mobile-vision/SNPE` opted out because of 3rd-party code.

arc lint --take BLACK --apply-patches --paths-cmd 'hg files mobile-vision'

allow-large-files

Reviewed By: sstsai-adl

Differential Revision: D30721093

fbshipit-source-id: 9e5c16d988b315b93a28038443ecfb92efd18ef8
parent a56c7e15
...@@ -5,11 +5,10 @@ ...@@ -5,11 +5,10 @@
Modules to compute the matching cost and solve the corresponding LSAP. Modules to compute the matching cost and solve the corresponding LSAP.
""" """
import torch import torch
from detr.util.box_ops import box_cxcywh_to_xyxy, generalized_box_iou
from scipy.optimize import linear_sum_assignment from scipy.optimize import linear_sum_assignment
from torch import nn from torch import nn
from detr.util.box_ops import box_cxcywh_to_xyxy, generalized_box_iou
class HungarianMatcher(nn.Module): class HungarianMatcher(nn.Module):
"""This class computes an assignment between the targets and the predictions of the network """This class computes an assignment between the targets and the predictions of the network
...@@ -19,7 +18,13 @@ class HungarianMatcher(nn.Module): ...@@ -19,7 +18,13 @@ class HungarianMatcher(nn.Module):
while the others are un-matched (and thus treated as non-objects). while the others are un-matched (and thus treated as non-objects).
""" """
def __init__(self, cost_class: float = 1, cost_bbox: float = 1, cost_giou: float = 1, use_focal_loss=False): def __init__(
self,
cost_class: float = 1,
cost_bbox: float = 1,
cost_giou: float = 1,
use_focal_loss=False,
):
"""Creates the matcher """Creates the matcher
Params: Params:
...@@ -31,12 +36,14 @@ class HungarianMatcher(nn.Module): ...@@ -31,12 +36,14 @@ class HungarianMatcher(nn.Module):
self.cost_class = cost_class self.cost_class = cost_class
self.cost_bbox = cost_bbox self.cost_bbox = cost_bbox
self.cost_giou = cost_giou self.cost_giou = cost_giou
assert cost_class != 0 or cost_bbox != 0 or cost_giou != 0, "all costs cant be 0" assert (
cost_class != 0 or cost_bbox != 0 or cost_giou != 0
), "all costs cant be 0"
self.use_focal_loss = use_focal_loss self.use_focal_loss = use_focal_loss
@torch.no_grad() @torch.no_grad()
def forward(self, outputs, targets): def forward(self, outputs, targets):
""" Performs the matching """Performs the matching
Params: Params:
outputs: This is a dict that contains at least these entries: outputs: This is a dict that contains at least these entries:
...@@ -61,7 +68,9 @@ class HungarianMatcher(nn.Module): ...@@ -61,7 +68,9 @@ class HungarianMatcher(nn.Module):
if self.use_focal_loss: if self.use_focal_loss:
out_prob = outputs["pred_logits"].flatten(0, 1).sigmoid() out_prob = outputs["pred_logits"].flatten(0, 1).sigmoid()
else: else:
out_prob = outputs["pred_logits"].flatten(0, 1).softmax(-1) # [batch_size * num_queries, num_classes] out_prob = (
outputs["pred_logits"].flatten(0, 1).softmax(-1)
) # [batch_size * num_queries, num_classes]
out_bbox = outputs["pred_boxes"].flatten(0, 1) # [batch_size * num_queries, 4] out_bbox = outputs["pred_boxes"].flatten(0, 1) # [batch_size * num_queries, 4]
# Also concat the target labels and boxes # Also concat the target labels and boxes
...@@ -74,29 +83,57 @@ class HungarianMatcher(nn.Module): ...@@ -74,29 +83,57 @@ class HungarianMatcher(nn.Module):
if self.use_focal_loss: if self.use_focal_loss:
alpha = 0.25 alpha = 0.25
gamma = 2.0 gamma = 2.0
neg_cost_class = (1 - alpha) * (out_prob ** gamma) * (-(1 - out_prob + 1e-8).log()) neg_cost_class = (
pos_cost_class = alpha * ((1 - out_prob) ** gamma) * (-(out_prob + 1e-8).log()) (1 - alpha) * (out_prob ** gamma) * (-(1 - out_prob + 1e-8).log())
)
pos_cost_class = (
alpha * ((1 - out_prob) ** gamma) * (-(out_prob + 1e-8).log())
)
cost_class = pos_cost_class[:, tgt_ids] - neg_cost_class[:, tgt_ids] cost_class = pos_cost_class[:, tgt_ids] - neg_cost_class[:, tgt_ids]
else: else:
cost_class = -out_prob[:, tgt_ids] # shape [batch_size * num_queries, \sum_b NUM-BOX_b] cost_class = -out_prob[
:, tgt_ids
] # shape [batch_size * num_queries, \sum_b NUM-BOX_b]
# Compute the L1 cost between boxes # Compute the L1 cost between boxes
cost_bbox = torch.cdist(out_bbox, tgt_bbox, p=1) # shape [batch_size * num_queries,\sum_b NUM-BOX_b] cost_bbox = torch.cdist(
out_bbox, tgt_bbox, p=1
) # shape [batch_size * num_queries,\sum_b NUM-BOX_b]
# Compute the giou cost betwen boxes # Compute the giou cost betwen boxes
# shape [batch_size * num_queries, \sum_b NUM-BOX_b] # shape [batch_size * num_queries, \sum_b NUM-BOX_b]
cost_giou = -generalized_box_iou(box_cxcywh_to_xyxy(out_bbox), box_cxcywh_to_xyxy(tgt_bbox)) cost_giou = -generalized_box_iou(
box_cxcywh_to_xyxy(out_bbox), box_cxcywh_to_xyxy(tgt_bbox)
)
# Final cost matrix # Final cost matrix
C = self.cost_bbox * cost_bbox + self.cost_class * cost_class + self.cost_giou * cost_giou C = (
C = C.view(bs, num_queries, -1).cpu() # shape [batch_size, num_queries, \sum_b NUM-BOX_b] self.cost_bbox * cost_bbox
+ self.cost_class * cost_class
+ self.cost_giou * cost_giou
)
C = C.view(
bs, num_queries, -1
).cpu() # shape [batch_size, num_queries, \sum_b NUM-BOX_b]
sizes = [len(v["boxes"]) for v in targets] # shape [batch_size,] sizes = [len(v["boxes"]) for v in targets] # shape [batch_size,]
# each split c shape [batch_size, num_queries, NUM-BOX_b] # each split c shape [batch_size, num_queries, NUM-BOX_b]
indices = [linear_sum_assignment(c[i]) for i, c in enumerate(C.split(sizes, -1))] indices = [
linear_sum_assignment(c[i]) for i, c in enumerate(C.split(sizes, -1))
]
# A list where each item is [row_indices, col_indices] # A list where each item is [row_indices, col_indices]
return [(torch.as_tensor(i, dtype=torch.int64), torch.as_tensor(j, dtype=torch.int64)) for i, j in indices] return [
(
torch.as_tensor(i, dtype=torch.int64),
torch.as_tensor(j, dtype=torch.int64),
)
for i, j in indices
]
def build_matcher(args): def build_matcher(args):
return HungarianMatcher(cost_class=args.set_cost_class, cost_bbox=args.set_cost_bbox, cost_giou=args.set_cost_giou) return HungarianMatcher(
cost_class=args.set_cost_class,
cost_bbox=args.set_cost_bbox,
cost_giou=args.set_cost_giou,
)
...@@ -5,10 +5,10 @@ ...@@ -5,10 +5,10 @@
Various positional encodings for the transformer. Various positional encodings for the transformer.
""" """
import math import math
import torch
from torch import nn
import torch
from detr.util.misc import NestedTensor from detr.util.misc import NestedTensor
from torch import nn
class PositionEmbeddingSine(nn.Module): class PositionEmbeddingSine(nn.Module):
...@@ -16,7 +16,15 @@ class PositionEmbeddingSine(nn.Module): ...@@ -16,7 +16,15 @@ class PositionEmbeddingSine(nn.Module):
This is a more standard version of the position embedding, very similar to the one This is a more standard version of the position embedding, very similar to the one
used by the Attention is all you need paper, generalized to work on images. used by the Attention is all you need paper, generalized to work on images.
""" """
def __init__(self, num_pos_feats=64, temperature=10000, normalize=False, scale=None, centered=False):
def __init__(
self,
num_pos_feats=64,
temperature=10000,
normalize=False,
scale=None,
centered=False,
):
super().__init__() super().__init__()
self.num_pos_feats = num_pos_feats self.num_pos_feats = num_pos_feats
self.temperature = temperature self.temperature = temperature
...@@ -47,13 +55,25 @@ class PositionEmbeddingSine(nn.Module): ...@@ -47,13 +55,25 @@ class PositionEmbeddingSine(nn.Module):
x_embed = x_embed / (x_embed[:, :, -1:] + eps) * self.scale x_embed = x_embed / (x_embed[:, :, -1:] + eps) * self.scale
dim_t = torch.arange(self.num_pos_feats, dtype=torch.float32, device=x.device) dim_t = torch.arange(self.num_pos_feats, dtype=torch.float32, device=x.device)
dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats) # shape (N, ) dim_t = self.temperature ** (
2 * (dim_t // 2) / self.num_pos_feats
) # shape (N, )
pos_x = x_embed[:, :, :, None] / dim_t # shape (B, H, W, N) pos_x = x_embed[:, :, :, None] / dim_t # shape (B, H, W, N)
pos_y = y_embed[:, :, :, None] / dim_t pos_y = y_embed[:, :, :, None] / dim_t
pos_x = torch.stack((pos_x[:, :, :, 0::2].sin(), pos_x[:, :, :, 1::2].cos()), dim=4).flatten(3) # shape (B, H, W, N) pos_x = torch.stack(
pos_y = torch.stack((pos_y[:, :, :, 0::2].sin(), pos_y[:, :, :, 1::2].cos()), dim=4).flatten(3) # shape (B, H, W, N) (pos_x[:, :, :, 0::2].sin(), pos_x[:, :, :, 1::2].cos()), dim=4
pos = torch.cat((pos_y, pos_x), dim=3).permute(0, 3, 1, 2) # shape (B, 2*N, H, W) ).flatten(
3
) # shape (B, H, W, N)
pos_y = torch.stack(
(pos_y[:, :, :, 0::2].sin(), pos_y[:, :, :, 1::2].cos()), dim=4
).flatten(
3
) # shape (B, H, W, N)
pos = torch.cat((pos_y, pos_x), dim=3).permute(
0, 3, 1, 2
) # shape (B, 2*N, H, W)
return pos return pos
...@@ -61,6 +81,7 @@ class PositionEmbeddingLearned(nn.Module): ...@@ -61,6 +81,7 @@ class PositionEmbeddingLearned(nn.Module):
""" """
Absolute pos embedding, learned. Absolute pos embedding, learned.
""" """
def __init__(self, num_pos_feats=256): def __init__(self, num_pos_feats=256):
super().__init__() super().__init__()
self.row_embed = nn.Embedding(50, num_pos_feats) self.row_embed = nn.Embedding(50, num_pos_feats)
...@@ -78,19 +99,27 @@ class PositionEmbeddingLearned(nn.Module): ...@@ -78,19 +99,27 @@ class PositionEmbeddingLearned(nn.Module):
j = torch.arange(h, device=x.device) j = torch.arange(h, device=x.device)
x_emb = self.col_embed(i) x_emb = self.col_embed(i)
y_emb = self.row_embed(j) y_emb = self.row_embed(j)
pos = torch.cat([ pos = (
x_emb.unsqueeze(0).repeat(h, 1, 1), torch.cat(
y_emb.unsqueeze(1).repeat(1, w, 1), [
], dim=-1).permute(2, 0, 1).unsqueeze(0).repeat(x.shape[0], 1, 1, 1) x_emb.unsqueeze(0).repeat(h, 1, 1),
y_emb.unsqueeze(1).repeat(1, w, 1),
],
dim=-1,
)
.permute(2, 0, 1)
.unsqueeze(0)
.repeat(x.shape[0], 1, 1, 1)
)
return pos return pos
def build_position_encoding(args): def build_position_encoding(args):
N_steps = args.hidden_dim // 2 N_steps = args.hidden_dim // 2
if args.position_embedding in ('v2', 'sine'): if args.position_embedding in ("v2", "sine"):
# TODO find a better way of exposing other arguments # TODO find a better way of exposing other arguments
position_embedding = PositionEmbeddingSine(N_steps, normalize=True) position_embedding = PositionEmbeddingSine(N_steps, normalize=True)
elif args.position_embedding in ('v3', 'learned'): elif args.position_embedding in ("v3", "learned"):
position_embedding = PositionEmbeddingLearned(N_steps) position_embedding = PositionEmbeddingLearned(N_steps)
else: else:
raise ValueError(f"not supported {args.position_embedding}") raise ValueError(f"not supported {args.position_embedding}")
......
...@@ -8,14 +8,13 @@ import io ...@@ -8,14 +8,13 @@ import io
from collections import defaultdict from collections import defaultdict
from typing import List, Optional from typing import List, Optional
import detr.util.box_ops as box_ops
import torch import torch
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
from torch import Tensor
from PIL import Image
import detr.util.box_ops as box_ops
from detr.util.misc import NestedTensor, interpolate, nested_tensor_from_tensor_list from detr.util.misc import NestedTensor, interpolate, nested_tensor_from_tensor_list
from PIL import Image
from torch import Tensor
try: try:
from panopticapi.utils import id2rgb, rgb2id from panopticapi.utils import id2rgb, rgb2id
...@@ -33,8 +32,12 @@ class DETRsegm(nn.Module): ...@@ -33,8 +32,12 @@ class DETRsegm(nn.Module):
p.requires_grad_(False) p.requires_grad_(False)
hidden_dim, nheads = detr.transformer.d_model, detr.transformer.nhead hidden_dim, nheads = detr.transformer.d_model, detr.transformer.nhead
self.bbox_attention = MHAttentionMap(hidden_dim, hidden_dim, nheads, dropout=0.0) self.bbox_attention = MHAttentionMap(
self.mask_head = MaskHeadSmallConv(hidden_dim + nheads, [1024, 512, 256], hidden_dim) hidden_dim, hidden_dim, nheads, dropout=0.0
)
self.mask_head = MaskHeadSmallConv(
hidden_dim + nheads, [1024, 512, 256], hidden_dim
)
def forward(self, samples: NestedTensor): def forward(self, samples: NestedTensor):
if isinstance(samples, (list, torch.Tensor)): if isinstance(samples, (list, torch.Tensor)):
...@@ -46,19 +49,27 @@ class DETRsegm(nn.Module): ...@@ -46,19 +49,27 @@ class DETRsegm(nn.Module):
src, mask = features[-1].decompose() src, mask = features[-1].decompose()
assert mask is not None assert mask is not None
src_proj = self.detr.input_proj(src) src_proj = self.detr.input_proj(src)
hs, memory = self.detr.transformer(src_proj, mask, self.detr.query_embed.weight, pos[-1]) hs, memory = self.detr.transformer(
src_proj, mask, self.detr.query_embed.weight, pos[-1]
)
outputs_class = self.detr.class_embed(hs) outputs_class = self.detr.class_embed(hs)
outputs_coord = self.detr.bbox_embed(hs).sigmoid() outputs_coord = self.detr.bbox_embed(hs).sigmoid()
out = {"pred_logits": outputs_class[-1], "pred_boxes": outputs_coord[-1]} out = {"pred_logits": outputs_class[-1], "pred_boxes": outputs_coord[-1]}
if self.detr.aux_loss: if self.detr.aux_loss:
out['aux_outputs'] = self.detr._set_aux_loss(outputs_class, outputs_coord) out["aux_outputs"] = self.detr._set_aux_loss(outputs_class, outputs_coord)
# FIXME h_boxes takes the last one computed, keep this in mind # FIXME h_boxes takes the last one computed, keep this in mind
bbox_mask = self.bbox_attention(hs[-1], memory, mask=mask) bbox_mask = self.bbox_attention(hs[-1], memory, mask=mask)
seg_masks = self.mask_head(src_proj, bbox_mask, [features[2].tensors, features[1].tensors, features[0].tensors]) seg_masks = self.mask_head(
outputs_seg_masks = seg_masks.view(bs, self.detr.num_queries, seg_masks.shape[-2], seg_masks.shape[-1]) src_proj,
bbox_mask,
[features[2].tensors, features[1].tensors, features[0].tensors],
)
outputs_seg_masks = seg_masks.view(
bs, self.detr.num_queries, seg_masks.shape[-2], seg_masks.shape[-1]
)
out["pred_masks"] = outputs_seg_masks out["pred_masks"] = outputs_seg_masks
return out return out
...@@ -77,7 +88,14 @@ class MaskHeadSmallConv(nn.Module): ...@@ -77,7 +88,14 @@ class MaskHeadSmallConv(nn.Module):
def __init__(self, dim, fpn_dims, context_dim): def __init__(self, dim, fpn_dims, context_dim):
super().__init__() super().__init__()
inter_dims = [dim, context_dim // 2, context_dim // 4, context_dim // 8, context_dim // 16, context_dim // 64] inter_dims = [
dim,
context_dim // 2,
context_dim // 4,
context_dim // 8,
context_dim // 16,
context_dim // 64,
]
self.lay1 = torch.nn.Conv2d(dim, dim, 3, padding=1) self.lay1 = torch.nn.Conv2d(dim, dim, 3, padding=1)
self.gn1 = torch.nn.GroupNorm(8, dim) self.gn1 = torch.nn.GroupNorm(8, dim)
self.lay2 = torch.nn.Conv2d(dim, inter_dims[1], 3, padding=1) self.lay2 = torch.nn.Conv2d(dim, inter_dims[1], 3, padding=1)
...@@ -159,9 +177,19 @@ class MHAttentionMap(nn.Module): ...@@ -159,9 +177,19 @@ class MHAttentionMap(nn.Module):
def forward(self, q, k, mask: Optional[Tensor] = None): def forward(self, q, k, mask: Optional[Tensor] = None):
q = self.q_linear(q) q = self.q_linear(q)
k = F.conv2d(k, self.k_linear.weight.unsqueeze(-1).unsqueeze(-1), self.k_linear.bias) k = F.conv2d(
qh = q.view(q.shape[0], q.shape[1], self.num_heads, self.hidden_dim // self.num_heads) k, self.k_linear.weight.unsqueeze(-1).unsqueeze(-1), self.k_linear.bias
kh = k.view(k.shape[0], self.num_heads, self.hidden_dim // self.num_heads, k.shape[-2], k.shape[-1]) )
qh = q.view(
q.shape[0], q.shape[1], self.num_heads, self.hidden_dim // self.num_heads
)
kh = k.view(
k.shape[0],
self.num_heads,
self.hidden_dim // self.num_heads,
k.shape[-2],
k.shape[-1],
)
weights = torch.einsum("bqnc,bnchw->bqnhw", qh * self.normalize_fact, kh) weights = torch.einsum("bqnc,bnchw->bqnhw", qh * self.normalize_fact, kh)
if mask is not None: if mask is not None:
...@@ -189,7 +217,9 @@ def dice_loss(inputs, targets, num_boxes): ...@@ -189,7 +217,9 @@ def dice_loss(inputs, targets, num_boxes):
return loss.sum() / num_boxes return loss.sum() / num_boxes
def sigmoid_focal_loss(inputs, targets, num_boxes, alpha: float = 0.25, gamma: float = 2): def sigmoid_focal_loss(
inputs, targets, num_boxes, alpha: float = 0.25, gamma: float = 2
):
""" """
Loss used in RetinaNet for dense detection: https://arxiv.org/abs/1708.02002. Loss used in RetinaNet for dense detection: https://arxiv.org/abs/1708.02002.
Args: Args:
...@@ -227,10 +257,14 @@ class PostProcessSegm(nn.Module): ...@@ -227,10 +257,14 @@ class PostProcessSegm(nn.Module):
assert len(orig_target_sizes) == len(max_target_sizes) assert len(orig_target_sizes) == len(max_target_sizes)
max_h, max_w = max_target_sizes.max(0)[0].tolist() max_h, max_w = max_target_sizes.max(0)[0].tolist()
outputs_masks = outputs["pred_masks"].squeeze(2) outputs_masks = outputs["pred_masks"].squeeze(2)
outputs_masks = F.interpolate(outputs_masks, size=(max_h, max_w), mode="bilinear", align_corners=False) outputs_masks = F.interpolate(
outputs_masks, size=(max_h, max_w), mode="bilinear", align_corners=False
)
outputs_masks = (outputs_masks.sigmoid() > self.threshold).cpu() outputs_masks = (outputs_masks.sigmoid() > self.threshold).cpu()
for i, (cur_mask, t, tt) in enumerate(zip(outputs_masks, max_target_sizes, orig_target_sizes)): for i, (cur_mask, t, tt) in enumerate(
zip(outputs_masks, max_target_sizes, orig_target_sizes)
):
img_h, img_w = t[0], t[1] img_h, img_w = t[0], t[1]
results[i]["masks"] = cur_mask[:, :img_h, :img_w].unsqueeze(1) results[i]["masks"] = cur_mask[:, :img_h, :img_w].unsqueeze(1)
results[i]["masks"] = F.interpolate( results[i]["masks"] = F.interpolate(
...@@ -242,7 +276,7 @@ class PostProcessSegm(nn.Module): ...@@ -242,7 +276,7 @@ class PostProcessSegm(nn.Module):
class PostProcessPanoptic(nn.Module): class PostProcessPanoptic(nn.Module):
"""This class converts the output of the model to the final panoptic result, in the format expected by the """This class converts the output of the model to the final panoptic result, in the format expected by the
coco panoptic API """ coco panoptic API"""
def __init__(self, is_thing_map, threshold=0.85): def __init__(self, is_thing_map, threshold=0.85):
""" """
...@@ -255,19 +289,23 @@ class PostProcessPanoptic(nn.Module): ...@@ -255,19 +289,23 @@ class PostProcessPanoptic(nn.Module):
self.threshold = threshold self.threshold = threshold
self.is_thing_map = is_thing_map self.is_thing_map = is_thing_map
def forward(self, outputs, processed_sizes, target_sizes=None): #noqa: C901 def forward(self, outputs, processed_sizes, target_sizes=None): # noqa: C901
""" This function computes the panoptic prediction from the model's predictions. """This function computes the panoptic prediction from the model's predictions.
Parameters: Parameters:
outputs: This is a dict coming directly from the model. See the model doc for the content. outputs: This is a dict coming directly from the model. See the model doc for the content.
processed_sizes: This is a list of tuples (or torch tensors) of sizes of the images that were passed to the processed_sizes: This is a list of tuples (or torch tensors) of sizes of the images that were passed to the
model, ie the size after data augmentation but before batching. model, ie the size after data augmentation but before batching.
target_sizes: This is a list of tuples (or torch tensors) corresponding to the requested final size target_sizes: This is a list of tuples (or torch tensors) corresponding to the requested final size
of each prediction. If left to None, it will default to the processed_sizes of each prediction. If left to None, it will default to the processed_sizes
""" """
if target_sizes is None: if target_sizes is None:
target_sizes = processed_sizes target_sizes = processed_sizes
assert len(processed_sizes) == len(target_sizes) assert len(processed_sizes) == len(target_sizes)
out_logits, raw_masks, raw_boxes = outputs["pred_logits"], outputs["pred_masks"], outputs["pred_boxes"] out_logits, raw_masks, raw_boxes = (
outputs["pred_logits"],
outputs["pred_masks"],
outputs["pred_boxes"],
)
assert len(out_logits) == len(raw_masks) == len(target_sizes) assert len(out_logits) == len(raw_masks) == len(target_sizes)
preds = [] preds = []
...@@ -281,12 +319,16 @@ class PostProcessPanoptic(nn.Module): ...@@ -281,12 +319,16 @@ class PostProcessPanoptic(nn.Module):
): ):
# we filter empty queries and detection below threshold # we filter empty queries and detection below threshold
scores, labels = cur_logits.softmax(-1).max(-1) scores, labels = cur_logits.softmax(-1).max(-1)
keep = labels.ne(outputs["pred_logits"].shape[-1] - 1) & (scores > self.threshold) keep = labels.ne(outputs["pred_logits"].shape[-1] - 1) & (
scores > self.threshold
)
cur_scores, cur_classes = cur_logits.softmax(-1).max(-1) cur_scores, cur_classes = cur_logits.softmax(-1).max(-1)
cur_scores = cur_scores[keep] cur_scores = cur_scores[keep]
cur_classes = cur_classes[keep] cur_classes = cur_classes[keep]
cur_masks = cur_masks[keep] cur_masks = cur_masks[keep]
cur_masks = interpolate(cur_masks[:, None], to_tuple(size), mode="bilinear").squeeze(1) cur_masks = interpolate(
cur_masks[:, None], to_tuple(size), mode="bilinear"
).squeeze(1)
cur_boxes = box_ops.box_cxcywh_to_xyxy(cur_boxes[keep]) cur_boxes = box_ops.box_cxcywh_to_xyxy(cur_boxes[keep])
h, w = cur_masks.shape[-2:] h, w = cur_masks.shape[-2:]
...@@ -322,10 +364,14 @@ class PostProcessPanoptic(nn.Module): ...@@ -322,10 +364,14 @@ class PostProcessPanoptic(nn.Module):
final_h, final_w = to_tuple(target_size) final_h, final_w = to_tuple(target_size)
seg_img = Image.fromarray(id2rgb(m_id.view(h, w).cpu().numpy())) seg_img = Image.fromarray(id2rgb(m_id.view(h, w).cpu().numpy()))
seg_img = seg_img.resize(size=(final_w, final_h), resample=Image.NEAREST) seg_img = seg_img.resize(
size=(final_w, final_h), resample=Image.NEAREST
)
np_seg_img = ( np_seg_img = (
torch.ByteTensor(torch.ByteStorage.from_buffer(seg_img.tobytes())).view(final_h, final_w, 3).numpy() torch.ByteTensor(torch.ByteStorage.from_buffer(seg_img.tobytes()))
.view(final_h, final_w, 3)
.numpy()
) )
m_id = torch.from_numpy(rgb2id(np_seg_img)) m_id = torch.from_numpy(rgb2id(np_seg_img))
...@@ -339,7 +385,9 @@ class PostProcessPanoptic(nn.Module): ...@@ -339,7 +385,9 @@ class PostProcessPanoptic(nn.Module):
# We know filter empty masks as long as we find some # We know filter empty masks as long as we find some
while True: while True:
filtered_small = torch.as_tensor( filtered_small = torch.as_tensor(
[area[i] <= 4 for i, c in enumerate(cur_classes)], dtype=torch.bool, device=keep.device [area[i] <= 4 for i, c in enumerate(cur_classes)],
dtype=torch.bool,
device=keep.device,
) )
if filtered_small.any().item(): if filtered_small.any().item():
cur_scores = cur_scores[~filtered_small] cur_scores = cur_scores[~filtered_small]
...@@ -355,11 +403,21 @@ class PostProcessPanoptic(nn.Module): ...@@ -355,11 +403,21 @@ class PostProcessPanoptic(nn.Module):
segments_info = [] segments_info = []
for i, a in enumerate(area): for i, a in enumerate(area):
cat = cur_classes[i].item() cat = cur_classes[i].item()
segments_info.append({"id": i, "isthing": self.is_thing_map[cat], "category_id": cat, "area": a}) segments_info.append(
{
"id": i,
"isthing": self.is_thing_map[cat],
"category_id": cat,
"area": a,
}
)
del cur_classes del cur_classes
with io.BytesIO() as out: with io.BytesIO() as out:
seg_img.save(out, format="PNG") seg_img.save(out, format="PNG")
predictions = {"png_string": out.getvalue(), "segments_info": segments_info} predictions = {
"png_string": out.getvalue(),
"segments_info": segments_info,
}
preds.append(predictions) preds.append(predictions)
return preds return preds
...@@ -18,23 +18,38 @@ from torch import nn, Tensor ...@@ -18,23 +18,38 @@ from torch import nn, Tensor
class Transformer(nn.Module): class Transformer(nn.Module):
def __init__(
def __init__(self, d_model=512, nhead=8, num_encoder_layers=6, self,
num_decoder_layers=6, dim_feedforward=2048, dropout=0.1, d_model=512,
activation="relu", normalize_before=False, nhead=8,
return_intermediate_dec=False): num_encoder_layers=6,
num_decoder_layers=6,
dim_feedforward=2048,
dropout=0.1,
activation="relu",
normalize_before=False,
return_intermediate_dec=False,
):
super().__init__() super().__init__()
encoder_layer = TransformerEncoderLayer(d_model, nhead, dim_feedforward, encoder_layer = TransformerEncoderLayer(
dropout, activation, normalize_before) d_model, nhead, dim_feedforward, dropout, activation, normalize_before
)
encoder_norm = nn.LayerNorm(d_model) if normalize_before else None encoder_norm = nn.LayerNorm(d_model) if normalize_before else None
self.encoder = TransformerEncoder(encoder_layer, num_encoder_layers, encoder_norm) self.encoder = TransformerEncoder(
encoder_layer, num_encoder_layers, encoder_norm
)
decoder_layer = TransformerDecoderLayer(d_model, nhead, dim_feedforward, decoder_layer = TransformerDecoderLayer(
dropout, activation, normalize_before) d_model, nhead, dim_feedforward, dropout, activation, normalize_before
)
decoder_norm = nn.LayerNorm(d_model) decoder_norm = nn.LayerNorm(d_model)
self.decoder = TransformerDecoder(decoder_layer, num_decoder_layers, decoder_norm, self.decoder = TransformerDecoder(
return_intermediate=return_intermediate_dec) decoder_layer,
num_decoder_layers,
decoder_norm,
return_intermediate=return_intermediate_dec,
)
self._reset_parameters() self._reset_parameters()
...@@ -63,30 +78,41 @@ class Transformer(nn.Module): ...@@ -63,30 +78,41 @@ class Transformer(nn.Module):
# memory shape (L, B, C) # memory shape (L, B, C)
memory = self.encoder(src, src_key_padding_mask=mask, pos=pos_embed) memory = self.encoder(src, src_key_padding_mask=mask, pos=pos_embed)
# hs shape (NUM_LEVEL, S, B, C) # hs shape (NUM_LEVEL, S, B, C)
hs = self.decoder(tgt, memory, memory_key_padding_mask=mask, hs = self.decoder(
pos=pos_embed, query_pos=query_embed) tgt,
memory,
memory_key_padding_mask=mask,
pos=pos_embed,
query_pos=query_embed,
)
# return shape (NUM_LEVEL, B, S, C) and (B, C, H, W) # return shape (NUM_LEVEL, B, S, C) and (B, C, H, W)
return hs.transpose(1, 2), memory.permute(1, 2, 0).view(bs, c, h, w) return hs.transpose(1, 2), memory.permute(1, 2, 0).view(bs, c, h, w)
class TransformerEncoder(nn.Module): class TransformerEncoder(nn.Module):
def __init__(self, encoder_layer, num_layers, norm=None): def __init__(self, encoder_layer, num_layers, norm=None):
super().__init__() super().__init__()
self.layers = _get_clones(encoder_layer, num_layers) self.layers = _get_clones(encoder_layer, num_layers)
self.num_layers = num_layers self.num_layers = num_layers
self.norm = norm self.norm = norm
def forward(self, src, def forward(
mask: Optional[Tensor] = None, self,
src_key_padding_mask: Optional[Tensor] = None, src,
pos: Optional[Tensor] = None): mask: Optional[Tensor] = None,
src_key_padding_mask: Optional[Tensor] = None,
pos: Optional[Tensor] = None,
):
output = src output = src
# mask, shape (L, L) # mask, shape (L, L)
# src_key_padding_mask, shape (B, L) # src_key_padding_mask, shape (B, L)
for layer in self.layers: for layer in self.layers:
output = layer(output, src_mask=mask, output = layer(
src_key_padding_mask=src_key_padding_mask, pos=pos) output,
src_mask=mask,
src_key_padding_mask=src_key_padding_mask,
pos=pos,
)
if self.norm is not None: if self.norm is not None:
output = self.norm(output) output = self.norm(output)
...@@ -95,7 +121,6 @@ class TransformerEncoder(nn.Module): ...@@ -95,7 +121,6 @@ class TransformerEncoder(nn.Module):
class TransformerDecoder(nn.Module): class TransformerDecoder(nn.Module):
def __init__(self, decoder_layer, num_layers, norm=None, return_intermediate=False): def __init__(self, decoder_layer, num_layers, norm=None, return_intermediate=False):
super().__init__() super().__init__()
self.layers = _get_clones(decoder_layer, num_layers) self.layers = _get_clones(decoder_layer, num_layers)
...@@ -103,13 +128,17 @@ class TransformerDecoder(nn.Module): ...@@ -103,13 +128,17 @@ class TransformerDecoder(nn.Module):
self.norm = norm self.norm = norm
self.return_intermediate = return_intermediate self.return_intermediate = return_intermediate
def forward(self, tgt, memory, def forward(
tgt_mask: Optional[Tensor] = None, self,
memory_mask: Optional[Tensor] = None, tgt,
tgt_key_padding_mask: Optional[Tensor] = None, memory,
memory_key_padding_mask: Optional[Tensor] = None, tgt_mask: Optional[Tensor] = None,
pos: Optional[Tensor] = None, memory_mask: Optional[Tensor] = None,
query_pos: Optional[Tensor] = None): tgt_key_padding_mask: Optional[Tensor] = None,
memory_key_padding_mask: Optional[Tensor] = None,
pos: Optional[Tensor] = None,
query_pos: Optional[Tensor] = None,
):
output = tgt output = tgt
intermediate = [] intermediate = []
...@@ -119,11 +148,16 @@ class TransformerDecoder(nn.Module): ...@@ -119,11 +148,16 @@ class TransformerDecoder(nn.Module):
# memory_mask shape (L, S) # memory_mask shape (L, S)
# memory_key_padding_mask shape (B, S) # memory_key_padding_mask shape (B, S)
for layer in self.layers: for layer in self.layers:
output = layer(output, memory, tgt_mask=tgt_mask, output = layer(
memory_mask=memory_mask, output,
tgt_key_padding_mask=tgt_key_padding_mask, memory,
memory_key_padding_mask=memory_key_padding_mask, tgt_mask=tgt_mask,
pos=pos, query_pos=query_pos) memory_mask=memory_mask,
tgt_key_padding_mask=tgt_key_padding_mask,
memory_key_padding_mask=memory_key_padding_mask,
pos=pos,
query_pos=query_pos,
)
if self.return_intermediate: if self.return_intermediate:
intermediate.append(self.norm(output)) intermediate.append(self.norm(output))
...@@ -140,9 +174,15 @@ class TransformerDecoder(nn.Module): ...@@ -140,9 +174,15 @@ class TransformerDecoder(nn.Module):
class TransformerEncoderLayer(nn.Module): class TransformerEncoderLayer(nn.Module):
def __init__(
def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1, self,
activation="relu", normalize_before=False): d_model,
nhead,
dim_feedforward=2048,
dropout=0.1,
activation="relu",
normalize_before=False,
):
super().__init__() super().__init__()
self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout) self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
# Implementation of Feedforward model # Implementation of Feedforward model
...@@ -161,16 +201,19 @@ class TransformerEncoderLayer(nn.Module): ...@@ -161,16 +201,19 @@ class TransformerEncoderLayer(nn.Module):
def with_pos_embed(self, tensor, pos: Optional[Tensor]): def with_pos_embed(self, tensor, pos: Optional[Tensor]):
return tensor if pos is None else tensor + pos return tensor if pos is None else tensor + pos
def forward_post(self, def forward_post(
src, self,
src_mask: Optional[Tensor] = None, src,
src_key_padding_mask: Optional[Tensor] = None, src_mask: Optional[Tensor] = None,
pos: Optional[Tensor] = None): src_key_padding_mask: Optional[Tensor] = None,
pos: Optional[Tensor] = None,
):
q = k = self.with_pos_embed(src, pos) # shape (L, B, D) q = k = self.with_pos_embed(src, pos) # shape (L, B, D)
# src mask, shape (L, L) # src mask, shape (L, L)
# src_key_padding_mask: shape (B, L) # src_key_padding_mask: shape (B, L)
src2 = self.self_attn(q, k, src, attn_mask=src_mask, src2 = self.self_attn(
key_padding_mask=src_key_padding_mask)[0] q, k, src, attn_mask=src_mask, key_padding_mask=src_key_padding_mask
)[0]
src = src + self.dropout1(src2) src = src + self.dropout1(src2)
src = self.norm1(src) src = self.norm1(src)
src2 = self.linear2(self.dropout(self.activation(self.linear1(src)))) src2 = self.linear2(self.dropout(self.activation(self.linear1(src))))
...@@ -178,33 +221,46 @@ class TransformerEncoderLayer(nn.Module): ...@@ -178,33 +221,46 @@ class TransformerEncoderLayer(nn.Module):
src = self.norm2(src) src = self.norm2(src)
return src return src
def forward_pre(self, src, def forward_pre(
src_mask: Optional[Tensor] = None, self,
src_key_padding_mask: Optional[Tensor] = None, src,
pos: Optional[Tensor] = None): src_mask: Optional[Tensor] = None,
src_key_padding_mask: Optional[Tensor] = None,
pos: Optional[Tensor] = None,
):
src2 = self.norm1(src) src2 = self.norm1(src)
q = k = self.with_pos_embed(src2, pos) q = k = self.with_pos_embed(src2, pos)
src2 = self.self_attn(q, k, src2, attn_mask=src_mask, src2 = self.self_attn(
key_padding_mask=src_key_padding_mask)[0] q, k, src2, attn_mask=src_mask, key_padding_mask=src_key_padding_mask
)[0]
src = src + self.dropout1(src2) src = src + self.dropout1(src2)
src2 = self.norm2(src) src2 = self.norm2(src)
src2 = self.linear2(self.dropout(self.activation(self.linear1(src2)))) src2 = self.linear2(self.dropout(self.activation(self.linear1(src2))))
src = src + self.dropout2(src2) src = src + self.dropout2(src2)
return src return src
def forward(self, src, def forward(
src_mask: Optional[Tensor] = None, self,
src_key_padding_mask: Optional[Tensor] = None, src,
pos: Optional[Tensor] = None): src_mask: Optional[Tensor] = None,
src_key_padding_mask: Optional[Tensor] = None,
pos: Optional[Tensor] = None,
):
if self.normalize_before: if self.normalize_before:
return self.forward_pre(src, src_mask, src_key_padding_mask, pos) return self.forward_pre(src, src_mask, src_key_padding_mask, pos)
return self.forward_post(src, src_mask, src_key_padding_mask, pos) return self.forward_post(src, src_mask, src_key_padding_mask, pos)
class TransformerDecoderLayer(nn.Module): class TransformerDecoderLayer(nn.Module):
def __init__(
def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1, self,
activation="relu", normalize_before=False): d_model,
nhead,
dim_feedforward=2048,
dropout=0.1,
activation="relu",
normalize_before=False,
):
super().__init__() super().__init__()
self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout) self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
self.multihead_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout) self.multihead_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
...@@ -226,28 +282,36 @@ class TransformerDecoderLayer(nn.Module): ...@@ -226,28 +282,36 @@ class TransformerDecoderLayer(nn.Module):
def with_pos_embed(self, tensor, pos: Optional[Tensor]): def with_pos_embed(self, tensor, pos: Optional[Tensor]):
return tensor if pos is None else tensor + pos return tensor if pos is None else tensor + pos
def forward_post(self, tgt, memory, def forward_post(
tgt_mask: Optional[Tensor] = None, self,
memory_mask: Optional[Tensor] = None, tgt,
tgt_key_padding_mask: Optional[Tensor] = None, memory,
memory_key_padding_mask: Optional[Tensor] = None, tgt_mask: Optional[Tensor] = None,
pos: Optional[Tensor] = None, memory_mask: Optional[Tensor] = None,
query_pos: Optional[Tensor] = None): tgt_key_padding_mask: Optional[Tensor] = None,
memory_key_padding_mask: Optional[Tensor] = None,
pos: Optional[Tensor] = None,
query_pos: Optional[Tensor] = None,
):
# tgt shape (L, B, C) # tgt shape (L, B, C)
# tgt_mask shape (L, L) # tgt_mask shape (L, L)
# tgt_key_padding_mask shape (B, L) # tgt_key_padding_mask shape (B, L)
q = k = self.with_pos_embed(tgt, query_pos) q = k = self.with_pos_embed(tgt, query_pos)
tgt2 = self.self_attn(q, k, tgt, attn_mask=tgt_mask, tgt2 = self.self_attn(
key_padding_mask=tgt_key_padding_mask)[0] q, k, tgt, attn_mask=tgt_mask, key_padding_mask=tgt_key_padding_mask
)[0]
tgt = tgt + self.dropout1(tgt2) tgt = tgt + self.dropout1(tgt2)
tgt = self.norm1(tgt) tgt = self.norm1(tgt)
# memory_mask shape (L, S) # memory_mask shape (L, S)
# memory_key_padding_mask shape (B, S) # memory_key_padding_mask shape (B, S)
# query_pos shape (L, B, C) # query_pos shape (L, B, C)
tgt2 = self.multihead_attn(self.with_pos_embed(tgt, query_pos), tgt2 = self.multihead_attn(
self.with_pos_embed(memory, pos), self.with_pos_embed(tgt, query_pos),
memory, attn_mask=memory_mask, self.with_pos_embed(memory, pos),
key_padding_mask=memory_key_padding_mask)[0] memory,
attn_mask=memory_mask,
key_padding_mask=memory_key_padding_mask,
)[0]
tgt = tgt + self.dropout2(tgt2) tgt = tgt + self.dropout2(tgt2)
tgt = self.norm2(tgt) tgt = self.norm2(tgt)
tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt)))) tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt))))
...@@ -256,41 +320,69 @@ class TransformerDecoderLayer(nn.Module): ...@@ -256,41 +320,69 @@ class TransformerDecoderLayer(nn.Module):
# return tgt shape (L, B, C) # return tgt shape (L, B, C)
return tgt return tgt
def forward_pre(self, tgt, memory, def forward_pre(
tgt_mask: Optional[Tensor] = None, self,
memory_mask: Optional[Tensor] = None, tgt,
tgt_key_padding_mask: Optional[Tensor] = None, memory,
memory_key_padding_mask: Optional[Tensor] = None, tgt_mask: Optional[Tensor] = None,
pos: Optional[Tensor] = None, memory_mask: Optional[Tensor] = None,
query_pos: Optional[Tensor] = None): tgt_key_padding_mask: Optional[Tensor] = None,
memory_key_padding_mask: Optional[Tensor] = None,
pos: Optional[Tensor] = None,
query_pos: Optional[Tensor] = None,
):
tgt2 = self.norm1(tgt) tgt2 = self.norm1(tgt)
q = k = self.with_pos_embed(tgt2, query_pos) q = k = self.with_pos_embed(tgt2, query_pos)
tgt2 = self.self_attn(q, k, tgt2, attn_mask=tgt_mask, tgt2 = self.self_attn(
key_padding_mask=tgt_key_padding_mask)[0] q, k, tgt2, attn_mask=tgt_mask, key_padding_mask=tgt_key_padding_mask
)[0]
tgt = tgt + self.dropout1(tgt2) tgt = tgt + self.dropout1(tgt2)
tgt2 = self.norm2(tgt) tgt2 = self.norm2(tgt)
tgt2 = self.multihead_attn(self.with_pos_embed(tgt2, query_pos), tgt2 = self.multihead_attn(
self.with_pos_embed(memory, pos), self.with_pos_embed(tgt2, query_pos),
memory, attn_mask=memory_mask, self.with_pos_embed(memory, pos),
key_padding_mask=memory_key_padding_mask)[0] memory,
attn_mask=memory_mask,
key_padding_mask=memory_key_padding_mask,
)[0]
tgt = tgt + self.dropout2(tgt2) tgt = tgt + self.dropout2(tgt2)
tgt2 = self.norm3(tgt) tgt2 = self.norm3(tgt)
tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt2)))) tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt2))))
tgt = tgt + self.dropout3(tgt2) tgt = tgt + self.dropout3(tgt2)
return tgt return tgt
def forward(self, tgt, memory, def forward(
tgt_mask: Optional[Tensor] = None, self,
memory_mask: Optional[Tensor] = None, tgt,
tgt_key_padding_mask: Optional[Tensor] = None, memory,
memory_key_padding_mask: Optional[Tensor] = None, tgt_mask: Optional[Tensor] = None,
pos: Optional[Tensor] = None, memory_mask: Optional[Tensor] = None,
query_pos: Optional[Tensor] = None): tgt_key_padding_mask: Optional[Tensor] = None,
memory_key_padding_mask: Optional[Tensor] = None,
pos: Optional[Tensor] = None,
query_pos: Optional[Tensor] = None,
):
if self.normalize_before: if self.normalize_before:
return self.forward_pre(tgt, memory, tgt_mask, memory_mask, return self.forward_pre(
tgt_key_padding_mask, memory_key_padding_mask, pos, query_pos) tgt,
return self.forward_post(tgt, memory, tgt_mask, memory_mask, memory,
tgt_key_padding_mask, memory_key_padding_mask, pos, query_pos) tgt_mask,
memory_mask,
tgt_key_padding_mask,
memory_key_padding_mask,
pos,
query_pos,
)
return self.forward_post(
tgt,
memory,
tgt_mask,
memory_mask,
tgt_key_padding_mask,
memory_key_padding_mask,
pos,
query_pos,
)
def _get_clones(module, N): def _get_clones(module, N):
...@@ -318,4 +410,4 @@ def _get_activation_fn(activation): ...@@ -318,4 +410,4 @@ def _get_activation_fn(activation):
return F.gelu return F.gelu
if activation == "glu": if activation == "glu":
return F.glu return F.glu
raise RuntimeError(F"activation should be relu/gelu, not {activation}.") raise RuntimeError(f"activation should be relu/gelu, not {activation}.")
...@@ -9,15 +9,15 @@ ...@@ -9,15 +9,15 @@
# ------------------------------------------------------------------------------------------------ # ------------------------------------------------------------------------------------------------
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import print_function
from __future__ import division from __future__ import division
from __future__ import print_function
import warnings
import math import math
import warnings
import torch import torch
from torch import nn
import torch.nn.functional as F import torch.nn.functional as F
from torch import nn
from torch.nn.init import xavier_uniform_, constant_ from torch.nn.init import xavier_uniform_, constant_
from ..functions import MSDeformAttnFunction from ..functions import MSDeformAttnFunction
...@@ -25,8 +25,10 @@ from ..functions import MSDeformAttnFunction ...@@ -25,8 +25,10 @@ from ..functions import MSDeformAttnFunction
def _is_power_of_2(n): def _is_power_of_2(n):
if (not isinstance(n, int)) or (n < 0): if (not isinstance(n, int)) or (n < 0):
raise ValueError("invalid input for _is_power_of_2: {} (type: {})".format(n, type(n))) raise ValueError(
return (n & (n-1) == 0) and n != 0 "invalid input for _is_power_of_2: {} (type: {})".format(n, type(n))
)
return (n & (n - 1) == 0) and n != 0
class MSDeformAttn(nn.Module): class MSDeformAttn(nn.Module):
...@@ -40,12 +42,18 @@ class MSDeformAttn(nn.Module): ...@@ -40,12 +42,18 @@ class MSDeformAttn(nn.Module):
""" """
super().__init__() super().__init__()
if d_model % n_heads != 0: if d_model % n_heads != 0:
raise ValueError('d_model must be divisible by n_heads, but got {} and {}'.format(d_model, n_heads)) raise ValueError(
"d_model must be divisible by n_heads, but got {} and {}".format(
d_model, n_heads
)
)
_d_per_head = d_model // n_heads _d_per_head = d_model // n_heads
# you'd better set _d_per_head to a power of 2 which is more efficient in our CUDA implementation # you'd better set _d_per_head to a power of 2 which is more efficient in our CUDA implementation
if not _is_power_of_2(_d_per_head): if not _is_power_of_2(_d_per_head):
warnings.warn("You'd better set d_model in MSDeformAttn to make the dimension of each attention head a power of 2 " warnings.warn(
"which is more efficient in our CUDA implementation.") "You'd better set d_model in MSDeformAttn to make the dimension of each attention head a power of 2 "
"which is more efficient in our CUDA implementation."
)
self.im2col_step = 64 self.im2col_step = 64
...@@ -62,25 +70,39 @@ class MSDeformAttn(nn.Module): ...@@ -62,25 +70,39 @@ class MSDeformAttn(nn.Module):
self._reset_parameters() self._reset_parameters()
def _reset_parameters(self): def _reset_parameters(self):
constant_(self.sampling_offsets.weight.data, 0.) constant_(self.sampling_offsets.weight.data, 0.0)
# shape (num_heads,) # shape (num_heads,)
thetas = torch.arange(self.n_heads, dtype=torch.float32) * (2.0 * math.pi / self.n_heads) thetas = torch.arange(self.n_heads, dtype=torch.float32) * (
2.0 * math.pi / self.n_heads
)
# shape (num_heads, 2) # shape (num_heads, 2)
grid_init = torch.stack([thetas.cos(), thetas.sin()], -1) grid_init = torch.stack([thetas.cos(), thetas.sin()], -1)
# shape (num_heads, num_levels, num_points, 2) # shape (num_heads, num_levels, num_points, 2)
grid_init = (grid_init / grid_init.abs().max(-1, keepdim=True)[0]).view(self.n_heads, 1, 1, 2).repeat(1, self.n_levels, self.n_points, 1) grid_init = (
(grid_init / grid_init.abs().max(-1, keepdim=True)[0])
.view(self.n_heads, 1, 1, 2)
.repeat(1, self.n_levels, self.n_points, 1)
)
for i in range(self.n_points): for i in range(self.n_points):
grid_init[:, :, i, :] *= i + 1 grid_init[:, :, i, :] *= i + 1
with torch.no_grad(): with torch.no_grad():
self.sampling_offsets.bias = nn.Parameter(grid_init.view(-1)) self.sampling_offsets.bias = nn.Parameter(grid_init.view(-1))
constant_(self.attention_weights.weight.data, 0.) constant_(self.attention_weights.weight.data, 0.0)
constant_(self.attention_weights.bias.data, 0.) constant_(self.attention_weights.bias.data, 0.0)
xavier_uniform_(self.value_proj.weight.data) xavier_uniform_(self.value_proj.weight.data)
constant_(self.value_proj.bias.data, 0.) constant_(self.value_proj.bias.data, 0.0)
xavier_uniform_(self.output_proj.weight.data) xavier_uniform_(self.output_proj.weight.data)
constant_(self.output_proj.bias.data, 0.) constant_(self.output_proj.bias.data, 0.0)
def forward(self, query, reference_points, input_flatten, input_spatial_shapes, input_level_start_index, input_padding_mask=None): def forward(
self,
query,
reference_points,
input_flatten,
input_spatial_shapes,
input_level_start_index,
input_padding_mask=None,
):
""" """
:param query (N, Length_{query}, C) :param query (N, Length_{query}, C)
:param reference_points (N, Length_{query}, n_levels, 2), range in [0, 1], top-left (0,0), bottom-right (1, 1), including padding area :param reference_points (N, Length_{query}, n_levels, 2), range in [0, 1], top-left (0,0), bottom-right (1, 1), including padding area
...@@ -100,21 +122,45 @@ class MSDeformAttn(nn.Module): ...@@ -100,21 +122,45 @@ class MSDeformAttn(nn.Module):
if input_padding_mask is not None: if input_padding_mask is not None:
value = value.masked_fill(input_padding_mask[..., None], float(0)) value = value.masked_fill(input_padding_mask[..., None], float(0))
value = value.view(N, Len_in, self.n_heads, self.d_model // self.n_heads) value = value.view(N, Len_in, self.n_heads, self.d_model // self.n_heads)
sampling_offsets = self.sampling_offsets(query).view(N, Len_q, self.n_heads, self.n_levels, self.n_points, 2) sampling_offsets = self.sampling_offsets(query).view(
attention_weights = self.attention_weights(query).view(N, Len_q, self.n_heads, self.n_levels * self.n_points) N, Len_q, self.n_heads, self.n_levels, self.n_points, 2
attention_weights = F.softmax(attention_weights, -1).view(N, Len_q, self.n_heads, self.n_levels, self.n_points) )
attention_weights = self.attention_weights(query).view(
N, Len_q, self.n_heads, self.n_levels * self.n_points
)
attention_weights = F.softmax(attention_weights, -1).view(
N, Len_q, self.n_heads, self.n_levels, self.n_points
)
# N, Len_q, n_heads, n_levels, n_points, 2 # N, Len_q, n_heads, n_levels, n_points, 2
if reference_points.shape[-1] == 2: if reference_points.shape[-1] == 2:
offset_normalizer = torch.stack([input_spatial_shapes[..., 1], input_spatial_shapes[..., 0]], -1) offset_normalizer = torch.stack(
sampling_locations = reference_points[:, :, None, :, None, :] \ [input_spatial_shapes[..., 1], input_spatial_shapes[..., 0]], -1
+ sampling_offsets / offset_normalizer[None, None, None, :, None, :] )
sampling_locations = (
reference_points[:, :, None, :, None, :]
+ sampling_offsets / offset_normalizer[None, None, None, :, None, :]
)
elif reference_points.shape[-1] == 4: elif reference_points.shape[-1] == 4:
sampling_locations = reference_points[:, :, None, :, None, :2] \ sampling_locations = (
+ sampling_offsets / self.n_points * reference_points[:, :, None, :, None, 2:] * 0.5 reference_points[:, :, None, :, None, :2]
+ sampling_offsets
/ self.n_points
* reference_points[:, :, None, :, None, 2:]
* 0.5
)
else: else:
raise ValueError( raise ValueError(
'Last dim of reference_points must be 2 or 4, but get {} instead.'.format(reference_points.shape[-1])) "Last dim of reference_points must be 2 or 4, but get {} instead.".format(
reference_points.shape[-1]
)
)
output = MSDeformAttnFunction.apply( output = MSDeformAttnFunction.apply(
value, input_spatial_shapes, input_level_start_index, sampling_locations, attention_weights, self.im2col_step) value,
input_spatial_shapes,
input_level_start_index,
sampling_locations,
attention_weights,
self.im2col_step,
)
output = self.output_proj(output) output = self.output_proj(output)
return output return output
...@@ -4,9 +4,9 @@ from d2go.config import CfgNode as CN ...@@ -4,9 +4,9 @@ from d2go.config import CfgNode as CN
from d2go.data.dataset_mappers.build import D2GO_DATA_MAPPER_REGISTRY from d2go.data.dataset_mappers.build import D2GO_DATA_MAPPER_REGISTRY
from d2go.data.dataset_mappers.d2go_dataset_mapper import D2GoDatasetMapper from d2go.data.dataset_mappers.d2go_dataset_mapper import D2GoDatasetMapper
from d2go.runner import GeneralizedRCNNRunner from d2go.runner import GeneralizedRCNNRunner
from detr.d2 import DetrDatasetMapper, add_detr_config
from detr.backbone.deit import add_deit_backbone_config from detr.backbone.deit import add_deit_backbone_config
from detr.backbone.pit import add_pit_backbone_config from detr.backbone.pit import add_pit_backbone_config
from detr.d2 import DetrDatasetMapper, add_detr_config
@D2GO_DATA_MAPPER_REGISTRY.register() @D2GO_DATA_MAPPER_REGISTRY.register()
......
...@@ -10,15 +10,13 @@ from torchvision.ops.boxes import box_area ...@@ -10,15 +10,13 @@ from torchvision.ops.boxes import box_area
def box_cxcywh_to_xyxy(x): def box_cxcywh_to_xyxy(x):
x_c, y_c, w, h = x.unbind(-1) x_c, y_c, w, h = x.unbind(-1)
b = [(x_c - 0.5 * w), (y_c - 0.5 * h), b = [(x_c - 0.5 * w), (y_c - 0.5 * h), (x_c + 0.5 * w), (y_c + 0.5 * h)]
(x_c + 0.5 * w), (y_c + 0.5 * h)]
return torch.stack(b, dim=-1) return torch.stack(b, dim=-1)
def box_xyxy_to_cxcywh(x): def box_xyxy_to_cxcywh(x):
x0, y0, x1, y1 = x.unbind(-1) x0, y0, x1, y1 = x.unbind(-1)
b = [(x0 + x1) / 2, (y0 + y1) / 2, b = [(x0 + x1) / 2, (y0 + y1) / 2, (x1 - x0), (y1 - y0)]
(x1 - x0), (y1 - y0)]
return torch.stack(b, dim=-1) return torch.stack(b, dim=-1)
...@@ -79,11 +77,11 @@ def masks_to_boxes(masks): ...@@ -79,11 +77,11 @@ def masks_to_boxes(masks):
x = torch.arange(0, w, dtype=torch.float) x = torch.arange(0, w, dtype=torch.float)
y, x = torch.meshgrid(y, x) y, x = torch.meshgrid(y, x)
x_mask = (masks * x.unsqueeze(0)) x_mask = masks * x.unsqueeze(0)
x_max = x_mask.flatten(1).max(-1)[0] x_max = x_mask.flatten(1).max(-1)[0]
x_min = x_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0] x_min = x_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0]
y_mask = (masks * y.unsqueeze(0)) y_mask = masks * y.unsqueeze(0)
y_max = y_mask.flatten(1).max(-1)[0] y_max = y_mask.flatten(1).max(-1)[0]
y_min = y_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0] y_min = y_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0]
......
...@@ -6,21 +6,22 @@ Misc functions, including distributed helpers. ...@@ -6,21 +6,22 @@ Misc functions, including distributed helpers.
Mostly copy-paste from torchvision references. Mostly copy-paste from torchvision references.
""" """
import datetime
import os import os
import pickle
import subprocess import subprocess
import time import time
from collections import defaultdict, deque from collections import defaultdict, deque
import datetime from distutils.version import LooseVersion
import pickle
from typing import Optional, List from typing import Optional, List
import torch import torch
import torch.distributed as dist import torch.distributed as dist
from torch import Tensor
# needed due to empty tensor bug in pytorch and torchvision 0.5 # needed due to empty tensor bug in pytorch and torchvision 0.5
import torchvision import torchvision
from distutils.version import LooseVersion from torch import Tensor
if LooseVersion(torchvision.__version__) < LooseVersion("0.7.0"): if LooseVersion(torchvision.__version__) < LooseVersion("0.7.0"):
from torchvision.ops import _new_empty_tensor from torchvision.ops import _new_empty_tensor
from torchvision.ops.misc import _output_size from torchvision.ops.misc import _output_size
...@@ -50,7 +51,7 @@ class SmoothedValue(object): ...@@ -50,7 +51,7 @@ class SmoothedValue(object):
""" """
if not is_dist_avail_and_initialized(): if not is_dist_avail_and_initialized():
return return
t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda') t = torch.tensor([self.count, self.total], dtype=torch.float64, device="cuda")
dist.barrier() dist.barrier()
dist.all_reduce(t) dist.all_reduce(t)
t = t.tolist() t = t.tolist()
...@@ -85,7 +86,8 @@ class SmoothedValue(object): ...@@ -85,7 +86,8 @@ class SmoothedValue(object):
avg=self.avg, avg=self.avg,
global_avg=self.global_avg, global_avg=self.global_avg,
max=self.max, max=self.max,
value=self.value) value=self.value,
)
def all_gather(data): def all_gather(data):
...@@ -119,14 +121,16 @@ def all_gather(data): ...@@ -119,14 +121,16 @@ def all_gather(data):
for _ in size_list: for _ in size_list:
tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device="cuda")) tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device="cuda"))
if local_size != max_size: if local_size != max_size:
padding = torch.empty(size=(max_size - local_size,), dtype=torch.uint8, device="cuda") padding = torch.empty(
size=(max_size - local_size,), dtype=torch.uint8, device="cuda"
)
tensor = torch.cat((tensor, padding), dim=0) tensor = torch.cat((tensor, padding), dim=0)
dist.all_gather(tensor_list, tensor) dist.all_gather(tensor_list, tensor)
data_list = [] data_list = []
for size, tensor in zip(size_list, tensor_list): for size, tensor in zip(size_list, tensor_list):
buffer = tensor.cpu().numpy().tobytes()[:size] buffer = tensor.cpu().numpy().tobytes()[:size]
data_list.append(pickle.loads(buffer)) #noqa data_list.append(pickle.loads(buffer)) # noqa
return data_list return data_list
...@@ -175,15 +179,14 @@ class MetricLogger(object): ...@@ -175,15 +179,14 @@ class MetricLogger(object):
return self.meters[attr] return self.meters[attr]
if attr in self.__dict__: if attr in self.__dict__:
return self.__dict__[attr] return self.__dict__[attr]
raise AttributeError("'{}' object has no attribute '{}'".format( raise AttributeError(
type(self).__name__, attr)) "'{}' object has no attribute '{}'".format(type(self).__name__, attr)
)
def __str__(self): def __str__(self):
loss_str = [] loss_str = []
for name, meter in self.meters.items(): for name, meter in self.meters.items():
loss_str.append( loss_str.append("{}: {}".format(name, str(meter)))
"{}: {}".format(name, str(meter))
)
return self.delimiter.join(loss_str) return self.delimiter.join(loss_str)
def synchronize_between_processes(self): def synchronize_between_processes(self):
...@@ -196,31 +199,35 @@ class MetricLogger(object): ...@@ -196,31 +199,35 @@ class MetricLogger(object):
def log_every(self, iterable, print_freq, header=None): def log_every(self, iterable, print_freq, header=None):
i = 0 i = 0
if not header: if not header:
header = '' header = ""
start_time = time.time() start_time = time.time()
end = time.time() end = time.time()
iter_time = SmoothedValue(fmt='{avg:.4f}') iter_time = SmoothedValue(fmt="{avg:.4f}")
data_time = SmoothedValue(fmt='{avg:.4f}') data_time = SmoothedValue(fmt="{avg:.4f}")
space_fmt = ':' + str(len(str(len(iterable)))) + 'd' space_fmt = ":" + str(len(str(len(iterable)))) + "d"
if torch.cuda.is_available(): if torch.cuda.is_available():
log_msg = self.delimiter.join([ log_msg = self.delimiter.join(
header, [
'[{0' + space_fmt + '}/{1}]', header,
'eta: {eta}', "[{0" + space_fmt + "}/{1}]",
'{meters}', "eta: {eta}",
'time: {time}', "{meters}",
'data: {data}', "time: {time}",
'max mem: {memory:.0f}' "data: {data}",
]) "max mem: {memory:.0f}",
]
)
else: else:
log_msg = self.delimiter.join([ log_msg = self.delimiter.join(
header, [
'[{0' + space_fmt + '}/{1}]', header,
'eta: {eta}', "[{0" + space_fmt + "}/{1}]",
'{meters}', "eta: {eta}",
'time: {time}', "{meters}",
'data: {data}' "time: {time}",
]) "data: {data}",
]
)
MB = 1024.0 * 1024.0 MB = 1024.0 * 1024.0
for obj in iterable: for obj in iterable:
data_time.update(time.time() - end) data_time.update(time.time() - end)
...@@ -230,38 +237,54 @@ class MetricLogger(object): ...@@ -230,38 +237,54 @@ class MetricLogger(object):
eta_seconds = iter_time.global_avg * (len(iterable) - i) eta_seconds = iter_time.global_avg * (len(iterable) - i)
eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
if torch.cuda.is_available(): if torch.cuda.is_available():
print(log_msg.format( print(
i, len(iterable), eta=eta_string, log_msg.format(
meters=str(self), i,
time=str(iter_time), data=str(data_time), len(iterable),
memory=torch.cuda.max_memory_allocated() / MB)) eta=eta_string,
meters=str(self),
time=str(iter_time),
data=str(data_time),
memory=torch.cuda.max_memory_allocated() / MB,
)
)
else: else:
print(log_msg.format( print(
i, len(iterable), eta=eta_string, log_msg.format(
meters=str(self), i,
time=str(iter_time), data=str(data_time))) len(iterable),
eta=eta_string,
meters=str(self),
time=str(iter_time),
data=str(data_time),
)
)
i += 1 i += 1
end = time.time() end = time.time()
total_time = time.time() - start_time total_time = time.time() - start_time
total_time_str = str(datetime.timedelta(seconds=int(total_time))) total_time_str = str(datetime.timedelta(seconds=int(total_time)))
print('{} Total time: {} ({:.4f} s / it)'.format( print(
header, total_time_str, total_time / len(iterable))) "{} Total time: {} ({:.4f} s / it)".format(
header, total_time_str, total_time / len(iterable)
)
)
def get_sha(): def get_sha():
cwd = os.path.dirname(os.path.abspath(__file__)) cwd = os.path.dirname(os.path.abspath(__file__))
def _run(command): def _run(command):
return subprocess.check_output(command, cwd=cwd).decode('ascii').strip() return subprocess.check_output(command, cwd=cwd).decode("ascii").strip()
sha = 'N/A'
sha = "N/A"
diff = "clean" diff = "clean"
branch = 'N/A' branch = "N/A"
try: try:
sha = _run(['git', 'rev-parse', 'HEAD']) sha = _run(["git", "rev-parse", "HEAD"])
subprocess.check_output(['git', 'diff'], cwd=cwd) subprocess.check_output(["git", "diff"], cwd=cwd)
diff = _run(['git', 'diff-index', 'HEAD']) diff = _run(["git", "diff-index", "HEAD"])
diff = "has uncommited changes" if diff else "clean" diff = "has uncommited changes" if diff else "clean"
branch = _run(['git', 'rev-parse', '--abbrev-ref', 'HEAD']) branch = _run(["git", "rev-parse", "--abbrev-ref", "HEAD"])
except Exception: except Exception:
pass pass
message = f"sha: {sha}, status: {diff}, branch: {branch}" message = f"sha: {sha}, status: {diff}, branch: {branch}"
...@@ -325,9 +348,9 @@ def nested_tensor_from_tensor_list(tensor_list: List[Tensor]): ...@@ -325,9 +348,9 @@ def nested_tensor_from_tensor_list(tensor_list: List[Tensor]):
mask = torch.ones((b, h, w), dtype=torch.bool, device=device) mask = torch.ones((b, h, w), dtype=torch.bool, device=device)
for img, pad_img, m in zip(tensor_list, tensor, mask): for img, pad_img, m in zip(tensor_list, tensor, mask):
pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img) pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
m[: img.shape[1], :img.shape[2]] = False m[: img.shape[1], : img.shape[2]] = False
else: else:
raise ValueError('not supported') raise ValueError("not supported")
return NestedTensor(tensor, mask) return NestedTensor(tensor, mask)
...@@ -337,7 +360,9 @@ def nested_tensor_from_tensor_list(tensor_list: List[Tensor]): ...@@ -337,7 +360,9 @@ def nested_tensor_from_tensor_list(tensor_list: List[Tensor]):
def _onnx_nested_tensor_from_tensor_list(tensor_list: List[Tensor]) -> NestedTensor: def _onnx_nested_tensor_from_tensor_list(tensor_list: List[Tensor]) -> NestedTensor:
max_size = [] max_size = []
for i in range(tensor_list[0].dim()): for i in range(tensor_list[0].dim()):
max_size_i = torch.max(torch.stack([img.shape[i] for img in tensor_list]).to(torch.float32)).to(torch.int64) max_size_i = torch.max(
torch.stack([img.shape[i] for img in tensor_list]).to(torch.float32)
).to(torch.int64)
max_size.append(max_size_i) max_size.append(max_size_i)
max_size = tuple(max_size) max_size = tuple(max_size)
...@@ -349,11 +374,15 @@ def _onnx_nested_tensor_from_tensor_list(tensor_list: List[Tensor]) -> NestedTen ...@@ -349,11 +374,15 @@ def _onnx_nested_tensor_from_tensor_list(tensor_list: List[Tensor]) -> NestedTen
padded_masks = [] padded_masks = []
for img in tensor_list: for img in tensor_list:
padding = [(s1 - s2) for s1, s2 in zip(max_size, tuple(img.shape))] padding = [(s1 - s2) for s1, s2 in zip(max_size, tuple(img.shape))]
padded_img = torch.nn.functional.pad(img, (0, padding[2], 0, padding[1], 0, padding[0])) padded_img = torch.nn.functional.pad(
img, (0, padding[2], 0, padding[1], 0, padding[0])
)
padded_imgs.append(padded_img) padded_imgs.append(padded_img)
m = torch.zeros_like(img[0], dtype=torch.int, device=img.device) m = torch.zeros_like(img[0], dtype=torch.int, device=img.device)
padded_mask = torch.nn.functional.pad(m, (0, padding[2], 0, padding[1]), "constant", 1) padded_mask = torch.nn.functional.pad(
m, (0, padding[2], 0, padding[1]), "constant", 1
)
padded_masks.append(padded_mask.to(torch.bool)) padded_masks.append(padded_mask.to(torch.bool))
tensor = torch.stack(padded_imgs) tensor = torch.stack(padded_imgs)
...@@ -367,10 +396,11 @@ def setup_for_distributed(is_master): ...@@ -367,10 +396,11 @@ def setup_for_distributed(is_master):
This function disables printing when not in master process This function disables printing when not in master process
""" """
import builtins as __builtin__ import builtins as __builtin__
builtin_print = __builtin__.print builtin_print = __builtin__.print
def print(*args, **kwargs): def print(*args, **kwargs):
force = kwargs.pop('force', False) force = kwargs.pop("force", False)
if is_master or force: if is_master or force:
builtin_print(*args, **kwargs) builtin_print(*args, **kwargs)
...@@ -407,26 +437,31 @@ def save_on_master(*args, **kwargs): ...@@ -407,26 +437,31 @@ def save_on_master(*args, **kwargs):
def init_distributed_mode(args): def init_distributed_mode(args):
if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ: if "RANK" in os.environ and "WORLD_SIZE" in os.environ:
args.rank = int(os.environ["RANK"]) args.rank = int(os.environ["RANK"])
args.world_size = int(os.environ['WORLD_SIZE']) args.world_size = int(os.environ["WORLD_SIZE"])
args.gpu = int(os.environ['LOCAL_RANK']) args.gpu = int(os.environ["LOCAL_RANK"])
elif 'SLURM_PROCID' in os.environ: elif "SLURM_PROCID" in os.environ:
args.rank = int(os.environ['SLURM_PROCID']) args.rank = int(os.environ["SLURM_PROCID"])
args.gpu = args.rank % torch.cuda.device_count() args.gpu = args.rank % torch.cuda.device_count()
else: else:
print('Not using distributed mode') print("Not using distributed mode")
args.distributed = False args.distributed = False
return return
args.distributed = True args.distributed = True
torch.cuda.set_device(args.gpu) torch.cuda.set_device(args.gpu)
args.dist_backend = 'nccl' args.dist_backend = "nccl"
print('| distributed init (rank {}): {}'.format( print(
args.rank, args.dist_url), flush=True) "| distributed init (rank {}): {}".format(args.rank, args.dist_url), flush=True
torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url, )
world_size=args.world_size, rank=args.rank) torch.distributed.init_process_group(
backend=args.dist_backend,
init_method=args.dist_url,
world_size=args.world_size,
rank=args.rank,
)
torch.distributed.barrier() torch.distributed.barrier()
setup_for_distributed(args.rank == 0) setup_for_distributed(args.rank == 0)
...@@ -450,14 +485,16 @@ def accuracy(output, target, topk=(1,)): ...@@ -450,14 +485,16 @@ def accuracy(output, target, topk=(1,)):
return res return res
def interpolate(input, size=None, scale_factor=None, mode="nearest", align_corners=None): def interpolate(
input, size=None, scale_factor=None, mode="nearest", align_corners=None
):
# type: (Tensor, Optional[List[int]], Optional[float], str, Optional[bool]) -> Tensor # type: (Tensor, Optional[List[int]], Optional[float], str, Optional[bool]) -> Tensor
""" """
Equivalent to nn.functional.interpolate, but with support for empty batch sizes. Equivalent to nn.functional.interpolate, but with support for empty batch sizes.
This will eventually be supported natively by PyTorch, and this This will eventually be supported natively by PyTorch, and this
class can go away. class can go away.
""" """
#if float(torchvision.__version__[:3]) < 0.7: # if float(torchvision.__version__[:3]) < 0.7:
if LooseVersion(torchvision.__version__) < LooseVersion("0.7.0"): if LooseVersion(torchvision.__version__) < LooseVersion("0.7.0"):
if input.numel() > 0: if input.numel() > 0:
return torch.nn.functional.interpolate( return torch.nn.functional.interpolate(
...@@ -468,10 +505,13 @@ def interpolate(input, size=None, scale_factor=None, mode="nearest", align_corne ...@@ -468,10 +505,13 @@ def interpolate(input, size=None, scale_factor=None, mode="nearest", align_corne
output_shape = list(input.shape[:-2]) + list(output_shape) output_shape = list(input.shape[:-2]) + list(output_shape)
return _new_empty_tensor(input, output_shape) return _new_empty_tensor(input, output_shape)
else: else:
return torchvision.ops.misc.interpolate(input, size, scale_factor, mode, align_corners) return torchvision.ops.misc.interpolate(
input, size, scale_factor, mode, align_corners
)
def inverse_sigmoid(x, eps=1e-5): def inverse_sigmoid(x, eps=1e-5):
x = x.clamp(min=0, max=1) x = x.clamp(min=0, max=1)
x1 = x.clamp(min=eps) x1 = x.clamp(min=eps)
x2 = (1 - x).clamp(min=eps) x2 = (1 - x).clamp(min=eps)
return torch.log(x1/x2) return torch.log(x1 / x2)
...@@ -3,17 +3,22 @@ ...@@ -3,17 +3,22 @@
""" """
Plotting utilities to visualize training logs. Plotting utilities to visualize training logs.
""" """
import torch from pathlib import Path, PurePath
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np import numpy as np
import pandas as pd
import seaborn as sns import seaborn as sns
import matplotlib.pyplot as plt import torch
from pathlib import Path, PurePath
def plot_logs(logs, fields=('class_error', 'loss_bbox_unscaled', 'mAP'), ewm_col=0, log_name='log.txt'): def plot_logs(
''' logs,
fields=("class_error", "loss_bbox_unscaled", "mAP"),
ewm_col=0,
log_name="log.txt",
):
"""
Function to plot specific fields from training log(s). Plots both training and test results. Function to plot specific fields from training log(s). Plots both training and test results.
:: Inputs - logs = list containing Path objects, each pointing to individual dir with a log file :: Inputs - logs = list containing Path objects, each pointing to individual dir with a log file
...@@ -24,7 +29,7 @@ def plot_logs(logs, fields=('class_error', 'loss_bbox_unscaled', 'mAP'), ewm_col ...@@ -24,7 +29,7 @@ def plot_logs(logs, fields=('class_error', 'loss_bbox_unscaled', 'mAP'), ewm_col
:: Outputs - matplotlib plots of results in fields, color coded for each log file. :: Outputs - matplotlib plots of results in fields, color coded for each log file.
- solid lines are training results, dashed lines are test results. - solid lines are training results, dashed lines are test results.
''' """
func_name = "plot_utils.py::plot_logs" func_name = "plot_utils.py::plot_logs"
# verify logs is a list of Paths (list[Paths]) or single Pathlib object Path, # verify logs is a list of Paths (list[Paths]) or single Pathlib object Path,
...@@ -33,17 +38,25 @@ def plot_logs(logs, fields=('class_error', 'loss_bbox_unscaled', 'mAP'), ewm_col ...@@ -33,17 +38,25 @@ def plot_logs(logs, fields=('class_error', 'loss_bbox_unscaled', 'mAP'), ewm_col
if not isinstance(logs, list): if not isinstance(logs, list):
if isinstance(logs, PurePath): if isinstance(logs, PurePath):
logs = [logs] logs = [logs]
print(f"{func_name} info: logs param expects a list argument, converted to list[Path].") print(
f"{func_name} info: logs param expects a list argument, converted to list[Path]."
)
else: else:
raise ValueError(f"{func_name} - invalid argument for logs parameter.\n \ raise ValueError(
Expect list[Path] or single Path obj, received {type(logs)}") f"{func_name} - invalid argument for logs parameter.\n \
Expect list[Path] or single Path obj, received {type(logs)}"
)
# Quality checks - verify valid dir(s), that every item in list is Path object, and that log_name exists in each dir # Quality checks - verify valid dir(s), that every item in list is Path object, and that log_name exists in each dir
for _, dir in enumerate(logs): for _, dir in enumerate(logs):
if not isinstance(dir, PurePath): if not isinstance(dir, PurePath):
raise ValueError(f"{func_name} - non-Path object in logs argument of {type(dir)}: \n{dir}") raise ValueError(
f"{func_name} - non-Path object in logs argument of {type(dir)}: \n{dir}"
)
if not dir.exists(): if not dir.exists():
raise ValueError(f"{func_name} - invalid directory in logs argument:\n{dir}") raise ValueError(
f"{func_name} - invalid directory in logs argument:\n{dir}"
)
# verify log_name exists # verify log_name exists
fn = Path(dir / log_name) fn = Path(dir / log_name)
if not fn.exists(): if not fn.exists():
...@@ -58,52 +71,57 @@ def plot_logs(logs, fields=('class_error', 'loss_bbox_unscaled', 'mAP'), ewm_col ...@@ -58,52 +71,57 @@ def plot_logs(logs, fields=('class_error', 'loss_bbox_unscaled', 'mAP'), ewm_col
for df, color in zip(dfs, sns.color_palette(n_colors=len(logs))): for df, color in zip(dfs, sns.color_palette(n_colors=len(logs))):
for j, field in enumerate(fields): for j, field in enumerate(fields):
if field == 'mAP': if field == "mAP":
coco_eval = pd.DataFrame( coco_eval = (
np.stack(df.test_coco_eval_bbox.dropna().values)[:, 1] pd.DataFrame(np.stack(df.test_coco_eval_bbox.dropna().values)[:, 1])
).ewm(com=ewm_col).mean() .ewm(com=ewm_col)
.mean()
)
axs[j].plot(coco_eval, c=color) axs[j].plot(coco_eval, c=color)
else: else:
df.interpolate().ewm(com=ewm_col).mean().plot( df.interpolate().ewm(com=ewm_col).mean().plot(
y=[f'train_{field}', f'test_{field}'], y=[f"train_{field}", f"test_{field}"],
ax=axs[j], ax=axs[j],
color=[color] * 2, color=[color] * 2,
style=['-', '--'] style=["-", "--"],
) )
for ax, field in zip(axs, fields): for ax, field in zip(axs, fields):
ax.legend([Path(p).name for p in logs]) ax.legend([Path(p).name for p in logs])
ax.set_title(field) ax.set_title(field)
def plot_precision_recall(files, naming_scheme='iter'): def plot_precision_recall(files, naming_scheme="iter"):
if naming_scheme == 'exp_id': if naming_scheme == "exp_id":
# name becomes exp_id # name becomes exp_id
names = [f.parts[-3] for f in files] names = [f.parts[-3] for f in files]
elif naming_scheme == 'iter': elif naming_scheme == "iter":
names = [f.stem for f in files] names = [f.stem for f in files]
else: else:
raise ValueError(f'not supported {naming_scheme}') raise ValueError(f"not supported {naming_scheme}")
fig, axs = plt.subplots(ncols=2, figsize=(16, 5)) fig, axs = plt.subplots(ncols=2, figsize=(16, 5))
for f, color, name in zip(files, sns.color_palette("Blues", n_colors=len(files)), names): for f, color, name in zip(
files, sns.color_palette("Blues", n_colors=len(files)), names
):
data = torch.load(f) data = torch.load(f)
# precision is n_iou, n_points, n_cat, n_area, max_det # precision is n_iou, n_points, n_cat, n_area, max_det
precision = data['precision'] precision = data["precision"]
recall = data['params'].recThrs recall = data["params"].recThrs
scores = data['scores'] scores = data["scores"]
# take precision for all classes, all areas and 100 detections # take precision for all classes, all areas and 100 detections
precision = precision[0, :, :, 0, -1].mean(1) precision = precision[0, :, :, 0, -1].mean(1)
scores = scores[0, :, :, 0, -1].mean(1) scores = scores[0, :, :, 0, -1].mean(1)
prec = precision.mean() prec = precision.mean()
rec = data['recall'][0, :, 0, -1].mean() rec = data["recall"][0, :, 0, -1].mean()
print(f'{naming_scheme} {name}: mAP@50={prec * 100: 05.1f}, ' + print(
f'score={scores.mean():0.3f}, ' + f"{naming_scheme} {name}: mAP@50={prec * 100: 05.1f}, "
f'f1={2 * prec * rec / (prec + rec + 1e-8):0.3f}' + f"score={scores.mean():0.3f}, "
) + f"f1={2 * prec * rec / (prec + rec + 1e-8):0.3f}"
)
axs[0].plot(recall, precision, c=color) axs[0].plot(recall, precision, c=color)
axs[1].plot(recall, scores, c=color) axs[1].plot(recall, scores, c=color)
axs[0].set_title('Precision / Recall') axs[0].set_title("Precision / Recall")
axs[0].legend(names) axs[0].legend(names)
axs[1].set_title('Scores / Recall') axs[1].set_title("Scores / Recall")
axs[1].legend(names) axs[1].legend(names)
return fig, axs return fig, axs
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import os
import logging
import argparse import argparse
import datetime import datetime
import json import json
import logging
import os
import random import random
import time import time
from datetime import timedelta from datetime import timedelta
from pathlib import Path from pathlib import Path
import detr.util.misc as utils
import numpy as np import numpy as np
import torch import torch
from torch.utils.data import DataLoader, DistributedSampler
import torch.distributed as dist import torch.distributed as dist
import torch.multiprocessing as mp import torch.multiprocessing as mp
from detectron2.engine.launch import _find_free_port
from detectron2.utils.file_io import PathManager
from detr import datasets from detr import datasets
import detr.util.misc as utils
from detr.datasets import build_dataset, get_coco_api_from_dataset from detr.datasets import build_dataset, get_coco_api_from_dataset
from detr.engine import evaluate, train_one_epoch from detr.engine import evaluate, train_one_epoch
from detr.models import build_model from detr.models import build_model
from detectron2.utils.file_io import PathManager from torch.utils.data import DataLoader, DistributedSampler
from detectron2.engine.launch import _find_free_port
DEFAULT_TIMEOUT = timedelta(minutes=30) DEFAULT_TIMEOUT = timedelta(minutes=30)
def get_args_parser(): def get_args_parser():
parser = argparse.ArgumentParser('Set transformer detector', add_help=False) parser = argparse.ArgumentParser("Set transformer detector", add_help=False)
parser.add_argument('--lr', default=1e-4, type=float) parser.add_argument("--lr", default=1e-4, type=float)
parser.add_argument('--lr_backbone', default=1e-5, type=float) parser.add_argument("--lr_backbone", default=1e-5, type=float)
parser.add_argument('--batch_size', default=2, type=int) parser.add_argument("--batch_size", default=2, type=int)
parser.add_argument('--weight_decay', default=1e-4, type=float) parser.add_argument("--weight_decay", default=1e-4, type=float)
parser.add_argument('--epochs', default=300, type=int) parser.add_argument("--epochs", default=300, type=int)
parser.add_argument('--lr_drop', default=200, type=int) parser.add_argument("--lr_drop", default=200, type=int)
parser.add_argument('--clip_max_norm', default=0.1, type=float, parser.add_argument(
help='gradient clipping max norm') "--clip_max_norm", default=0.1, type=float, help="gradient clipping max norm"
)
# Model parameters # Model parameters
parser.add_argument('--frozen_weights', type=str, default=None, parser.add_argument(
help="Path to the pretrained model. If set, only the mask head will be trained") "--frozen_weights",
type=str,
default=None,
help="Path to the pretrained model. If set, only the mask head will be trained",
)
# * Backbone # * Backbone
parser.add_argument('--backbone', default='resnet50', type=str, parser.add_argument(
help="Name of the convolutional backbone to use") "--backbone",
parser.add_argument('--dilation', action='store_true', default="resnet50",
help="If true, we replace stride with dilation in the last convolutional block (DC5)") type=str,
parser.add_argument('--position_embedding', default='sine', type=str, choices=('sine', 'learned'), help="Name of the convolutional backbone to use",
help="Type of positional embedding to use on top of the image features") )
parser.add_argument(
"--dilation",
action="store_true",
help="If true, we replace stride with dilation in the last convolutional block (DC5)",
)
parser.add_argument(
"--position_embedding",
default="sine",
type=str,
choices=("sine", "learned"),
help="Type of positional embedding to use on top of the image features",
)
# * Transformer # * Transformer
parser.add_argument('--enc_layers', default=6, type=int, parser.add_argument(
help="Number of encoding layers in the transformer") "--enc_layers",
parser.add_argument('--dec_layers', default=6, type=int, default=6,
help="Number of decoding layers in the transformer") type=int,
parser.add_argument('--dim_feedforward', default=2048, type=int, help="Number of encoding layers in the transformer",
help="Intermediate size of the feedforward layers in the transformer blocks") )
parser.add_argument('--hidden_dim', default=256, type=int, parser.add_argument(
help="Size of the embeddings (dimension of the transformer)") "--dec_layers",
parser.add_argument('--dropout', default=0.1, type=float, default=6,
help="Dropout applied in the transformer") type=int,
parser.add_argument('--nheads', default=8, type=int, help="Number of decoding layers in the transformer",
help="Number of attention heads inside the transformer's attentions") )
parser.add_argument('--num_queries', default=100, type=int, parser.add_argument(
help="Number of query slots") "--dim_feedforward",
parser.add_argument('--pre_norm', action='store_true') default=2048,
type=int,
help="Intermediate size of the feedforward layers in the transformer blocks",
)
parser.add_argument(
"--hidden_dim",
default=256,
type=int,
help="Size of the embeddings (dimension of the transformer)",
)
parser.add_argument(
"--dropout", default=0.1, type=float, help="Dropout applied in the transformer"
)
parser.add_argument(
"--nheads",
default=8,
type=int,
help="Number of attention heads inside the transformer's attentions",
)
parser.add_argument(
"--num_queries", default=100, type=int, help="Number of query slots"
)
parser.add_argument("--pre_norm", action="store_true")
# * Segmentation # * Segmentation
parser.add_argument('--masks', action='store_true', parser.add_argument(
help="Train segmentation head if the flag is provided") "--masks",
action="store_true",
help="Train segmentation head if the flag is provided",
)
# Loss # Loss
parser.add_argument('--no_aux_loss', dest='aux_loss', action='store_false', parser.add_argument(
help="Disables auxiliary decoding losses (loss at each layer)") "--no_aux_loss",
dest="aux_loss",
action="store_false",
help="Disables auxiliary decoding losses (loss at each layer)",
)
# * Matcher # * Matcher
parser.add_argument('--set_cost_class', default=1, type=float, parser.add_argument(
help="Class coefficient in the matching cost") "--set_cost_class",
parser.add_argument('--set_cost_bbox', default=5, type=float, default=1,
help="L1 box coefficient in the matching cost") type=float,
parser.add_argument('--set_cost_giou', default=2, type=float, help="Class coefficient in the matching cost",
help="giou box coefficient in the matching cost") )
parser.add_argument(
"--set_cost_bbox",
default=5,
type=float,
help="L1 box coefficient in the matching cost",
)
parser.add_argument(
"--set_cost_giou",
default=2,
type=float,
help="giou box coefficient in the matching cost",
)
# * Loss coefficients # * Loss coefficients
parser.add_argument('--mask_loss_coef', default=1, type=float) parser.add_argument("--mask_loss_coef", default=1, type=float)
parser.add_argument('--dice_loss_coef', default=1, type=float) parser.add_argument("--dice_loss_coef", default=1, type=float)
parser.add_argument('--bbox_loss_coef', default=5, type=float) parser.add_argument("--bbox_loss_coef", default=5, type=float)
parser.add_argument('--giou_loss_coef', default=2, type=float) parser.add_argument("--giou_loss_coef", default=2, type=float)
parser.add_argument('--eos_coef', default=0.1, type=float, parser.add_argument(
help="Relative classification weight of the no-object class") "--eos_coef",
default=0.1,
type=float,
help="Relative classification weight of the no-object class",
)
# dataset parameters # dataset parameters
parser.add_argument('--dataset_file', default='coco') parser.add_argument("--dataset_file", default="coco")
parser.add_argument('--ade_path', type=str, default='manifold://winvision/tree/detectron2/ADEChallengeData2016/') parser.add_argument(
parser.add_argument('--coco_path', type=str, default='manifold://fair_vision_data/tree/') "--ade_path",
parser.add_argument('--coco_panoptic_path', type=str, default='manifold://fair_vision_data/tree/') type=str,
parser.add_argument('--remove_difficult', action='store_true') default="manifold://winvision/tree/detectron2/ADEChallengeData2016/",
)
parser.add_argument('--output-dir', default='', parser.add_argument(
help='path where to save, empty for no saving') "--coco_path", type=str, default="manifold://fair_vision_data/tree/"
parser.add_argument('--device', default='cuda', )
help='device to use for training / testing') parser.add_argument(
parser.add_argument('--seed', default=42, type=int) "--coco_panoptic_path", type=str, default="manifold://fair_vision_data/tree/"
parser.add_argument('--resume', default='', help='resume from checkpoint') )
parser.add_argument('--start_epoch', default=0, type=int, metavar='N', parser.add_argument("--remove_difficult", action="store_true")
help='start epoch')
parser.add_argument('--eval', action='store_true') parser.add_argument(
parser.add_argument('--num_workers', default=2, type=int) "--output-dir", default="", help="path where to save, empty for no saving"
)
parser.add_argument(
"--device", default="cuda", help="device to use for training / testing"
)
parser.add_argument("--seed", default=42, type=int)
parser.add_argument("--resume", default="", help="resume from checkpoint")
parser.add_argument(
"--start_epoch", default=0, type=int, metavar="N", help="start epoch"
)
parser.add_argument("--eval", action="store_true")
parser.add_argument("--num_workers", default=2, type=int)
# distributed training parameters # distributed training parameters
parser.add_argument("--num-gpus", type=int, default=8, help="number of gpus *per machine*")
parser.add_argument("--num-machines", type=int, default=1, help="total number of machines")
parser.add_argument( parser.add_argument(
"--machine-rank", type=int, default=0, help="the rank of this machine (unique per machine)") "--num-gpus", type=int, default=8, help="number of gpus *per machine*"
parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training') )
parser.add_argument(
"--num-machines", type=int, default=1, help="total number of machines"
)
parser.add_argument(
"--machine-rank",
type=int,
default=0,
help="the rank of this machine (unique per machine)",
)
parser.add_argument(
"--dist-url", default="env://", help="url used to set up distributed training"
)
return parser return parser
def main(args): def main(args):
#utils.init_distributed_mode(args) # utils.init_distributed_mode(args)
if args.frozen_weights is not None: if args.frozen_weights is not None:
assert args.masks, "Frozen training is meant for segmentation only" assert args.masks, "Frozen training is meant for segmentation only"
...@@ -137,21 +219,32 @@ def main(args): ...@@ -137,21 +219,32 @@ def main(args):
model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
model_without_ddp = model.module model_without_ddp = model.module
n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad) n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
print('number of params:', n_parameters) print("number of params:", n_parameters)
param_dicts = [ param_dicts = [
{"params": [p for n, p in model_without_ddp.named_parameters() if "backbone" not in n and p.requires_grad]},
{ {
"params": [p for n, p in model_without_ddp.named_parameters() if "backbone" in n and p.requires_grad], "params": [
p
for n, p in model_without_ddp.named_parameters()
if "backbone" not in n and p.requires_grad
]
},
{
"params": [
p
for n, p in model_without_ddp.named_parameters()
if "backbone" in n and p.requires_grad
],
"lr": args.lr_backbone, "lr": args.lr_backbone,
}, },
] ]
optimizer = torch.optim.AdamW(param_dicts, lr=args.lr, optimizer = torch.optim.AdamW(
weight_decay=args.weight_decay) param_dicts, lr=args.lr, weight_decay=args.weight_decay
)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, args.lr_drop) lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, args.lr_drop)
dataset_train = build_dataset(image_set='train', args=args) dataset_train = build_dataset(image_set="train", args=args)
dataset_val = build_dataset(image_set='val', args=args) dataset_val = build_dataset(image_set="val", args=args)
if args.distributed: if args.distributed:
sampler_train = DistributedSampler(dataset_train) sampler_train = DistributedSampler(dataset_train)
...@@ -161,12 +254,23 @@ def main(args): ...@@ -161,12 +254,23 @@ def main(args):
sampler_val = torch.utils.data.SequentialSampler(dataset_val) sampler_val = torch.utils.data.SequentialSampler(dataset_val)
batch_sampler_train = torch.utils.data.BatchSampler( batch_sampler_train = torch.utils.data.BatchSampler(
sampler_train, args.batch_size, drop_last=True) sampler_train, args.batch_size, drop_last=True
)
data_loader_train = DataLoader(dataset_train, batch_sampler=batch_sampler_train, data_loader_train = DataLoader(
collate_fn=utils.collate_fn, num_workers=args.num_workers) dataset_train,
data_loader_val = DataLoader(dataset_val, args.batch_size, sampler=sampler_val, batch_sampler=batch_sampler_train,
drop_last=False, collate_fn=utils.collate_fn, num_workers=args.num_workers) collate_fn=utils.collate_fn,
num_workers=args.num_workers,
)
data_loader_val = DataLoader(
dataset_val,
args.batch_size,
sampler=sampler_val,
drop_last=False,
collate_fn=utils.collate_fn,
num_workers=args.num_workers,
)
if args.dataset_file == "coco_panoptic": if args.dataset_file == "coco_panoptic":
# We also evaluate AP during panoptic training, on original coco DS # We also evaluate AP during panoptic training, on original coco DS
...@@ -176,24 +280,37 @@ def main(args): ...@@ -176,24 +280,37 @@ def main(args):
base_ds = get_coco_api_from_dataset(dataset_val) base_ds = get_coco_api_from_dataset(dataset_val)
if args.frozen_weights is not None: if args.frozen_weights is not None:
checkpoint = torch.load(args.frozen_weights, map_location='cpu') checkpoint = torch.load(args.frozen_weights, map_location="cpu")
model_without_ddp.detr.load_state_dict(checkpoint['model']) model_without_ddp.detr.load_state_dict(checkpoint["model"])
if args.resume: if args.resume:
if args.resume.startswith('https'): if args.resume.startswith("https"):
checkpoint = torch.hub.load_state_dict_from_url( checkpoint = torch.hub.load_state_dict_from_url(
args.resume, map_location='cpu', check_hash=True) args.resume, map_location="cpu", check_hash=True
)
else: else:
checkpoint = torch.load(args.resume, map_location='cpu') checkpoint = torch.load(args.resume, map_location="cpu")
model_without_ddp.load_state_dict(checkpoint['model']) model_without_ddp.load_state_dict(checkpoint["model"])
if not args.eval and 'optimizer' in checkpoint and 'lr_scheduler' in checkpoint and 'epoch' in checkpoint: if (
optimizer.load_state_dict(checkpoint['optimizer']) not args.eval
lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) and "optimizer" in checkpoint
args.start_epoch = checkpoint['epoch'] + 1 and "lr_scheduler" in checkpoint
and "epoch" in checkpoint
):
optimizer.load_state_dict(checkpoint["optimizer"])
lr_scheduler.load_state_dict(checkpoint["lr_scheduler"])
args.start_epoch = checkpoint["epoch"] + 1
if args.eval: if args.eval:
test_stats, coco_evaluator = evaluate(model, criterion, postprocessors, test_stats, coco_evaluator = evaluate(
data_loader_val, base_ds, device, args.output_dir) model,
criterion,
postprocessors,
data_loader_val,
base_ds,
device,
args.output_dir,
)
if args.output_dir: if args.output_dir:
with PathManager.open(os.path.join(args.output_dir, "eval.pth"), "wb") as f: with PathManager.open(os.path.join(args.output_dir, "eval.pth"), "wb") as f:
utils.save_on_master(coco_evaluator.coco_eval["bbox"].eval, f) utils.save_on_master(coco_evaluator.coco_eval["bbox"].eval, f)
...@@ -205,33 +322,52 @@ def main(args): ...@@ -205,33 +322,52 @@ def main(args):
if args.distributed: if args.distributed:
sampler_train.set_epoch(epoch) sampler_train.set_epoch(epoch)
train_stats = train_one_epoch( train_stats = train_one_epoch(
model, criterion, data_loader_train, optimizer, device, epoch, model,
args.clip_max_norm) criterion,
data_loader_train,
optimizer,
device,
epoch,
args.clip_max_norm,
)
lr_scheduler.step() lr_scheduler.step()
if args.output_dir: if args.output_dir:
checkpoint_paths = [] #os.path.join(args.output_dir, 'checkpoint.pth')] checkpoint_paths = [] # os.path.join(args.output_dir, 'checkpoint.pth')]
# extra checkpoint before LR drop and every 10 epochs # extra checkpoint before LR drop and every 10 epochs
if (epoch + 1) % args.lr_drop == 0 or (epoch + 1) % 10 == 0: if (epoch + 1) % args.lr_drop == 0 or (epoch + 1) % 10 == 0:
checkpoint_paths.append(os.path.join(args.output_dir, f'checkpoint{epoch:04}.pth')) checkpoint_paths.append(
os.path.join(args.output_dir, f"checkpoint{epoch:04}.pth")
)
for checkpoint_path in checkpoint_paths: for checkpoint_path in checkpoint_paths:
with PathManager.open(checkpoint_path, "wb") as f: with PathManager.open(checkpoint_path, "wb") as f:
if args.gpu == 0 and args.machine_rank == 0: if args.gpu == 0 and args.machine_rank == 0:
utils.save_on_master({ utils.save_on_master(
'model': model_without_ddp.state_dict(), {
'optimizer': optimizer.state_dict(), "model": model_without_ddp.state_dict(),
'lr_scheduler': lr_scheduler.state_dict(), "optimizer": optimizer.state_dict(),
'epoch': epoch, "lr_scheduler": lr_scheduler.state_dict(),
'args': args, "epoch": epoch,
}, f) "args": args,
},
f,
)
test_stats, coco_evaluator = evaluate( test_stats, coco_evaluator = evaluate(
model, criterion, postprocessors, data_loader_val, base_ds, device, args.output_dir model,
criterion,
postprocessors,
data_loader_val,
base_ds,
device,
args.output_dir,
) )
log_stats = {**{f'train_{k}': v for k, v in train_stats.items()}, log_stats = {
**{f'test_{k}': v for k, v in test_stats.items()}, **{f"train_{k}": v for k, v in train_stats.items()},
'epoch': epoch, **{f"test_{k}": v for k, v in test_stats.items()},
'n_parameters': n_parameters} "epoch": epoch,
"n_parameters": n_parameters,
}
if args.output_dir and utils.is_main_process(): if args.output_dir and utils.is_main_process():
with PathManager.open(os.path.join(args.output_dir, "log.txt"), "w") as f: with PathManager.open(os.path.join(args.output_dir, "log.txt"), "w") as f:
...@@ -239,19 +375,21 @@ def main(args): ...@@ -239,19 +375,21 @@ def main(args):
# for evaluation logs # for evaluation logs
if coco_evaluator is not None: if coco_evaluator is not None:
PathManager.mkdirs(os.path.join(args.output_dir, 'eval')) PathManager.mkdirs(os.path.join(args.output_dir, "eval"))
if "bbox" in coco_evaluator.coco_eval: if "bbox" in coco_evaluator.coco_eval:
filenames = ['latest.pth'] filenames = ["latest.pth"]
if epoch % 50 == 0: if epoch % 50 == 0:
filenames.append(f'{epoch:03}.pth') filenames.append(f"{epoch:03}.pth")
for name in filenames: for name in filenames:
with PathManager.open(os.path.join(args.output_dir, "eval", name), "wb") as f: with PathManager.open(
torch.save(coco_evaluator.coco_eval["bbox"].eval, os.path.join(args.output_dir, "eval", name), "wb"
f) ) as f:
torch.save(coco_evaluator.coco_eval["bbox"].eval, f)
total_time = time.time() - start_time total_time = time.time() - start_time
total_time_str = str(datetime.timedelta(seconds=int(total_time))) total_time_str = str(datetime.timedelta(seconds=int(total_time)))
print('Training time {}'.format(total_time_str)) print("Training time {}".format(total_time_str))
def launch( def launch(
main_func, main_func,
...@@ -285,7 +423,9 @@ def launch( ...@@ -285,7 +423,9 @@ def launch(
# TODO prctl in spawned processes # TODO prctl in spawned processes
if dist_url == "auto": if dist_url == "auto":
assert num_machines == 1, "dist_url=auto not supported in multi-machine jobs." assert (
num_machines == 1
), "dist_url=auto not supported in multi-machine jobs."
port = _find_free_port() port = _find_free_port()
dist_url = f"tcp://127.0.0.1:{port}" dist_url = f"tcp://127.0.0.1:{port}"
if num_machines > 1 and dist_url.startswith("file://"): if num_machines > 1 and dist_url.startswith("file://"):
...@@ -311,7 +451,7 @@ def launch( ...@@ -311,7 +451,7 @@ def launch(
else: else:
main_func(*args) main_func(*args)
def synchronize(): def synchronize():
""" """
Helper function to synchronize (barrier) among all processes when Helper function to synchronize (barrier) among all processes when
...@@ -326,6 +466,7 @@ def synchronize(): ...@@ -326,6 +466,7 @@ def synchronize():
return return
dist.barrier() dist.barrier()
def _distributed_worker( def _distributed_worker(
local_rank, local_rank,
main_func, main_func,
...@@ -336,7 +477,9 @@ def _distributed_worker( ...@@ -336,7 +477,9 @@ def _distributed_worker(
args, args,
timeout=DEFAULT_TIMEOUT, timeout=DEFAULT_TIMEOUT,
): ):
assert torch.cuda.is_available(), "cuda is not available. Please check your installation." assert (
torch.cuda.is_available()
), "cuda is not available. Please check your installation."
global_rank = machine_rank * num_gpus_per_machine + local_rank global_rank = machine_rank * num_gpus_per_machine + local_rank
try: try:
dist.init_process_group( dist.init_process_group(
...@@ -359,9 +502,9 @@ def _distributed_worker( ...@@ -359,9 +502,9 @@ def _distributed_worker(
args[0].gpu = local_rank args[0].gpu = local_rank
# Setup the local process group (which contains ranks within the same machine) # Setup the local process group (which contains ranks within the same machine)
#assert comm._LOCAL_PROCESS_GROUP is None # assert comm._LOCAL_PROCESS_GROUP is None
#num_machines = world_size // num_gpus_per_machine # num_machines = world_size // num_gpus_per_machine
#for i in range(num_machines): # for i in range(num_machines):
# ranks_on_i = list(range(i * num_gpus_per_machine, (i + 1) * num_gpus_per_machine)) # ranks_on_i = list(range(i * num_gpus_per_machine, (i + 1) * num_gpus_per_machine))
# pg = dist.new_group(ranks_on_i) # pg = dist.new_group(ranks_on_i)
# if i == machine_rank: # if i == machine_rank:
...@@ -370,8 +513,10 @@ def _distributed_worker( ...@@ -370,8 +513,10 @@ def _distributed_worker(
main_func(*args) main_func(*args)
if __name__ == '__main__': if __name__ == "__main__":
parser = argparse.ArgumentParser('DETR training and evaluation script', parents=[get_args_parser()]) parser = argparse.ArgumentParser(
"DETR training and evaluation script", parents=[get_args_parser()]
)
args = parser.parse_args() args = parser.parse_args()
if args.output_dir: if args.output_dir:
PathManager.mkdirs(args.output_dir) PathManager.mkdirs(args.output_dir)
......
...@@ -6,20 +6,19 @@ ...@@ -6,20 +6,19 @@
# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
# ------------------------------------------------------------------------------------------------ # ------------------------------------------------------------------------------------------------
import os
import glob import glob
import os
import torch import torch
from torch.utils.cpp_extension import CUDA_HOME
from torch.utils.cpp_extension import CppExtension
from torch.utils.cpp_extension import CUDAExtension
from setuptools import find_packages from setuptools import find_packages
from setuptools import setup from setuptools import setup
from torch.utils.cpp_extension import CUDAExtension
from torch.utils.cpp_extension import CUDA_HOME
from torch.utils.cpp_extension import CppExtension
requirements = ["torch", "torchvision"] requirements = ["torch", "torchvision"]
def get_extensions(): def get_extensions():
this_dir = os.path.dirname(os.path.abspath(__file__)) this_dir = os.path.dirname(os.path.abspath(__file__))
extensions_dir = os.path.join(this_dir, "detr/src") extensions_dir = os.path.join(this_dir, "detr/src")
...@@ -49,7 +48,7 @@ def get_extensions(): ...@@ -49,7 +48,7 @@ def get_extensions():
"-D__CUDA_NO_HALF2_OPERATORS__", "-D__CUDA_NO_HALF2_OPERATORS__",
] ]
else: else:
raise NotImplementedError('Cuda is not availabel') raise NotImplementedError("Cuda is not availabel")
sources = [os.path.join(extensions_dir, s) for s in sources] sources = [os.path.join(extensions_dir, s) for s in sources]
include_dirs = [extensions_dir] include_dirs = [extensions_dir]
...@@ -64,13 +63,14 @@ def get_extensions(): ...@@ -64,13 +63,14 @@ def get_extensions():
] ]
return ext_modules return ext_modules
if __name__ == '__main__':
if __name__ == "__main__":
setup( setup(
name="detr", name="detr",
url="https://github.com/facebookresearch/d2go/detr", url="https://github.com/facebookresearch/d2go/detr",
license='Apache-2.0', license="Apache-2.0",
packages=find_packages(exclude=["test_all.py"]), packages=find_packages(exclude=["test_all.py"]),
package_data={ 'detr': ['LICENSE']}, package_data={"detr": ["LICENSE"]},
ext_modules=get_extensions(), ext_modules=get_extensions(),
cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension}, cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
) )
...@@ -3,17 +3,19 @@ ...@@ -3,17 +3,19 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import io import io
import unittest import unittest
import torch
from torch import nn, Tensor
from typing import List from typing import List
from detr.models.matcher import HungarianMatcher import torch
from detr.models.position_encoding import PositionEmbeddingSine, PositionEmbeddingLearned from detr.hub import detr_resnet50, detr_resnet50_panoptic
from detr.models.backbone import Backbone from detr.models.backbone import Backbone
from detr.models.matcher import HungarianMatcher
from detr.models.position_encoding import (
PositionEmbeddingSine,
PositionEmbeddingLearned,
)
from detr.util import box_ops from detr.util import box_ops
from detr.util.misc import nested_tensor_from_tensor_list from detr.util.misc import nested_tensor_from_tensor_list
from detr.hub import detr_resnet50, detr_resnet50_panoptic from torch import nn, Tensor
# onnxruntime requires python 3.5 or above # onnxruntime requires python 3.5 or above
try: try:
...@@ -23,7 +25,6 @@ except ImportError: ...@@ -23,7 +25,6 @@ except ImportError:
class Tester(unittest.TestCase): class Tester(unittest.TestCase):
def test_box_cxcywh_to_xyxy(self): def test_box_cxcywh_to_xyxy(self):
t = torch.rand(10, 4) t = torch.rand(10, 4)
r = box_ops.box_xyxy_to_cxcywh(box_ops.box_cxcywh_to_xyxy(t)) r = box_ops.box_xyxy_to_cxcywh(box_ops.box_cxcywh_to_xyxy(t))
...@@ -40,26 +41,45 @@ class Tester(unittest.TestCase): ...@@ -40,26 +41,45 @@ class Tester(unittest.TestCase):
tgt_labels = torch.randint(high=n_classes, size=(n_targets,)) tgt_labels = torch.randint(high=n_classes, size=(n_targets,))
tgt_boxes = torch.rand(n_targets, 4) tgt_boxes = torch.rand(n_targets, 4)
matcher = HungarianMatcher() matcher = HungarianMatcher()
targets = [{'labels': tgt_labels, 'boxes': tgt_boxes}] targets = [{"labels": tgt_labels, "boxes": tgt_boxes}]
indices_single = matcher({'pred_logits': logits, 'pred_boxes': boxes}, targets) indices_single = matcher({"pred_logits": logits, "pred_boxes": boxes}, targets)
indices_batched = matcher({'pred_logits': logits.repeat(2, 1, 1), indices_batched = matcher(
'pred_boxes': boxes.repeat(2, 1, 1)}, targets * 2) {
"pred_logits": logits.repeat(2, 1, 1),
"pred_boxes": boxes.repeat(2, 1, 1),
},
targets * 2,
)
self.assertEqual(len(indices_single[0][0]), n_targets) self.assertEqual(len(indices_single[0][0]), n_targets)
self.assertEqual(len(indices_single[0][1]), n_targets) self.assertEqual(len(indices_single[0][1]), n_targets)
self.assertEqual(self.indices_torch2python(indices_single), self.assertEqual(
self.indices_torch2python([indices_batched[0]])) self.indices_torch2python(indices_single),
self.assertEqual(self.indices_torch2python(indices_single), self.indices_torch2python([indices_batched[0]]),
self.indices_torch2python([indices_batched[1]])) )
self.assertEqual(
self.indices_torch2python(indices_single),
self.indices_torch2python([indices_batched[1]]),
)
# test with empty targets # test with empty targets
tgt_labels_empty = torch.randint(high=n_classes, size=(0,)) tgt_labels_empty = torch.randint(high=n_classes, size=(0,))
tgt_boxes_empty = torch.rand(0, 4) tgt_boxes_empty = torch.rand(0, 4)
targets_empty = [{'labels': tgt_labels_empty, 'boxes': tgt_boxes_empty}] targets_empty = [{"labels": tgt_labels_empty, "boxes": tgt_boxes_empty}]
indices = matcher({'pred_logits': logits.repeat(2, 1, 1), indices = matcher(
'pred_boxes': boxes.repeat(2, 1, 1)}, targets + targets_empty) {
"pred_logits": logits.repeat(2, 1, 1),
"pred_boxes": boxes.repeat(2, 1, 1),
},
targets + targets_empty,
)
self.assertEqual(len(indices[1][0]), 0) self.assertEqual(len(indices[1][0]), 0)
indices = matcher({'pred_logits': logits.repeat(2, 1, 1), indices = matcher(
'pred_boxes': boxes.repeat(2, 1, 1)}, targets_empty * 2) {
"pred_logits": logits.repeat(2, 1, 1),
"pred_boxes": boxes.repeat(2, 1, 1),
},
targets_empty * 2,
)
self.assertEqual(len(indices[0][0]), 0) self.assertEqual(len(indices[0][0]), 0)
def test_position_encoding_script(self): def test_position_encoding_script(self):
...@@ -67,13 +87,15 @@ class Tester(unittest.TestCase): ...@@ -67,13 +87,15 @@ class Tester(unittest.TestCase):
mm1, mm2 = torch.jit.script(m1), torch.jit.script(m2) # noqa mm1, mm2 = torch.jit.script(m1), torch.jit.script(m2) # noqa
def test_backbone_script(self): def test_backbone_script(self):
backbone = Backbone('resnet50', True, False, False) backbone = Backbone("resnet50", True, False, False)
torch.jit.script(backbone) # noqa torch.jit.script(backbone) # noqa
def test_model_script_detection(self): def test_model_script_detection(self):
model = detr_resnet50(pretrained=False).eval() model = detr_resnet50(pretrained=False).eval()
scripted_model = torch.jit.script(model) scripted_model = torch.jit.script(model)
x = nested_tensor_from_tensor_list([torch.rand(3, 200, 200), torch.rand(3, 200, 250)]) x = nested_tensor_from_tensor_list(
[torch.rand(3, 200, 200), torch.rand(3, 200, 250)]
)
out = model(x) out = model(x)
out_script = scripted_model(x) out_script = scripted_model(x)
self.assertTrue(out["pred_logits"].equal(out_script["pred_logits"])) self.assertTrue(out["pred_logits"].equal(out_script["pred_logits"]))
...@@ -82,7 +104,9 @@ class Tester(unittest.TestCase): ...@@ -82,7 +104,9 @@ class Tester(unittest.TestCase):
def test_model_script_panoptic(self): def test_model_script_panoptic(self):
model = detr_resnet50_panoptic(pretrained=False).eval() model = detr_resnet50_panoptic(pretrained=False).eval()
scripted_model = torch.jit.script(model) scripted_model = torch.jit.script(model)
x = nested_tensor_from_tensor_list([torch.rand(3, 200, 200), torch.rand(3, 200, 250)]) x = nested_tensor_from_tensor_list(
[torch.rand(3, 200, 200), torch.rand(3, 200, 250)]
)
out = model(x) out = model(x)
out_script = scripted_model(x) out_script = scripted_model(x)
self.assertTrue(out["pred_logits"].equal(out_script["pred_logits"])) self.assertTrue(out["pred_logits"].equal(out_script["pred_logits"]))
...@@ -92,17 +116,19 @@ class Tester(unittest.TestCase): ...@@ -92,17 +116,19 @@ class Tester(unittest.TestCase):
def test_model_detection_different_inputs(self): def test_model_detection_different_inputs(self):
model = detr_resnet50(pretrained=False).eval() model = detr_resnet50(pretrained=False).eval()
# support NestedTensor # support NestedTensor
x = nested_tensor_from_tensor_list([torch.rand(3, 200, 200), torch.rand(3, 200, 250)]) x = nested_tensor_from_tensor_list(
[torch.rand(3, 200, 200), torch.rand(3, 200, 250)]
)
out = model(x) out = model(x)
self.assertIn('pred_logits', out) self.assertIn("pred_logits", out)
# and 4d Tensor # and 4d Tensor
x = torch.rand(1, 3, 200, 200) x = torch.rand(1, 3, 200, 200)
out = model(x) out = model(x)
self.assertIn('pred_logits', out) self.assertIn("pred_logits", out)
# and List[Tensor[C, H, W]] # and List[Tensor[C, H, W]]
x = torch.rand(3, 200, 200) x = torch.rand(3, 200, 200)
out = model([x]) out = model([x])
self.assertIn('pred_logits', out) self.assertIn("pred_logits", out)
def test_warpped_model_script_detection(self): def test_warpped_model_script_detection(self):
class WrappedDETR(nn.Module): class WrappedDETR(nn.Module):
...@@ -125,30 +151,49 @@ class Tester(unittest.TestCase): ...@@ -125,30 +151,49 @@ class Tester(unittest.TestCase):
self.assertTrue(out["pred_boxes"].equal(out_script["pred_boxes"])) self.assertTrue(out["pred_boxes"].equal(out_script["pred_boxes"]))
@unittest.skipIf(onnxruntime is None, 'ONNX Runtime unavailable') @unittest.skipIf(onnxruntime is None, "ONNX Runtime unavailable")
class ONNXExporterTester(unittest.TestCase): class ONNXExporterTester(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
torch.manual_seed(123) torch.manual_seed(123)
def run_model(self, model, inputs_list, tolerate_small_mismatch=False, do_constant_folding=True, dynamic_axes=None, def run_model(
output_names=None, input_names=None): self,
model,
inputs_list,
tolerate_small_mismatch=False,
do_constant_folding=True,
dynamic_axes=None,
output_names=None,
input_names=None,
):
model.eval() model.eval()
onnx_io = io.BytesIO() onnx_io = io.BytesIO()
# export to onnx with the first input # export to onnx with the first input
torch.onnx.export(model, inputs_list[0], onnx_io, torch.onnx.export(
do_constant_folding=do_constant_folding, opset_version=12, model,
dynamic_axes=dynamic_axes, input_names=input_names, output_names=output_names) inputs_list[0],
onnx_io,
do_constant_folding=do_constant_folding,
opset_version=12,
dynamic_axes=dynamic_axes,
input_names=input_names,
output_names=output_names,
)
# validate the exported model with onnx runtime # validate the exported model with onnx runtime
for test_inputs in inputs_list: for test_inputs in inputs_list:
with torch.no_grad(): with torch.no_grad():
if isinstance(test_inputs, torch.Tensor) or isinstance(test_inputs, list): if isinstance(test_inputs, torch.Tensor) or isinstance(
test_inputs, list
):
test_inputs = (nested_tensor_from_tensor_list(test_inputs),) test_inputs = (nested_tensor_from_tensor_list(test_inputs),)
test_ouputs = model(*test_inputs) test_ouputs = model(*test_inputs)
if isinstance(test_ouputs, torch.Tensor): if isinstance(test_ouputs, torch.Tensor):
test_ouputs = (test_ouputs,) test_ouputs = (test_ouputs,)
self.ort_validate(onnx_io, test_inputs, test_ouputs, tolerate_small_mismatch) self.ort_validate(
onnx_io, test_inputs, test_ouputs, tolerate_small_mismatch
)
def ort_validate(self, onnx_io, inputs, outputs, tolerate_small_mismatch=False): def ort_validate(self, onnx_io, inputs, outputs, tolerate_small_mismatch=False):
...@@ -166,11 +211,15 @@ class ONNXExporterTester(unittest.TestCase): ...@@ -166,11 +211,15 @@ class ONNXExporterTester(unittest.TestCase):
ort_session = onnxruntime.InferenceSession(onnx_io.getvalue()) ort_session = onnxruntime.InferenceSession(onnx_io.getvalue())
# compute onnxruntime output prediction # compute onnxruntime output prediction
ort_inputs = dict((ort_session.get_inputs()[i].name, inpt) for i, inpt in enumerate(inputs)) #noqa: C402 ort_inputs = dict(
(ort_session.get_inputs()[i].name, inpt) for i, inpt in enumerate(inputs)
) # noqa: C402
ort_outs = ort_session.run(None, ort_inputs) ort_outs = ort_session.run(None, ort_inputs)
for i in range(0, len(outputs)): for i in range(0, len(outputs)):
try: try:
torch.testing.assert_allclose(outputs[i], ort_outs[i], rtol=1e-03, atol=1e-05) torch.testing.assert_allclose(
outputs[i], ort_outs[i], rtol=1e-03, atol=1e-05
)
except AssertionError as error: except AssertionError as error:
if tolerate_small_mismatch: if tolerate_small_mismatch:
self.assertIn("(0.00%)", str(error), str(error)) self.assertIn("(0.00%)", str(error), str(error))
...@@ -207,5 +256,5 @@ class ONNXExporterTester(unittest.TestCase): ...@@ -207,5 +256,5 @@ class ONNXExporterTester(unittest.TestCase):
) )
if __name__ == '__main__': if __name__ == "__main__":
unittest.main() unittest.main()
import logging
import unittest import unittest
from detr.backbone.deit import add_deit_backbone_config
from detr.backbone.pit import add_pit_backbone_config
import torch import torch
from detectron2.utils.file_io import PathManager
from detectron2.checkpoint import DetectionCheckpointer
from d2go.config import CfgNode as CN from d2go.config import CfgNode as CN
from detectron2.checkpoint import DetectionCheckpointer
from detectron2.modeling import BACKBONE_REGISTRY from detectron2.modeling import BACKBONE_REGISTRY
from detectron2.utils.file_io import PathManager
from detr.backbone.deit import add_deit_backbone_config
from detr.backbone.pit import add_pit_backbone_config
import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# avoid testing on sandcastle due to access to manifold # avoid testing on sandcastle due to access to manifold
USE_CUDA = torch.cuda.device_count() > 0 USE_CUDA = torch.cuda.device_count() > 0
class TestTransformerBackbone(unittest.TestCase): class TestTransformerBackbone(unittest.TestCase):
@unittest.skipIf(not USE_CUDA,"avoid testing on sandcastle due to access to manifold") @unittest.skipIf(
not USE_CUDA, "avoid testing on sandcastle due to access to manifold"
)
def test_deit_model(self): def test_deit_model(self):
cfg = CN() cfg = CN()
cfg.MODEL = CN() cfg.MODEL = CN()
...@@ -49,9 +52,10 @@ class TestTransformerBackbone(unittest.TestCase): ...@@ -49,9 +52,10 @@ class TestTransformerBackbone(unittest.TestCase):
x = torch.rand(1, 3, input_size_h, input_size_w) x = torch.rand(1, 3, input_size_h, input_size_w)
y = model(x) y = model(x)
print(f"x.shape: {x.shape}, y.shape: {y.shape}") print(f"x.shape: {x.shape}, y.shape: {y.shape}")
@unittest.skipIf(not USE_CUDA,"avoid testing on sandcastle due to access to manifold") @unittest.skipIf(
not USE_CUDA, "avoid testing on sandcastle due to access to manifold"
)
def test_pit_model(self): def test_pit_model(self):
cfg = CN() cfg = CN()
cfg.MODEL = CN() cfg.MODEL = CN()
......
...@@ -13,6 +13,7 @@ from d2go.utils.testing.data_loader_helper import create_local_dataset ...@@ -13,6 +13,7 @@ from d2go.utils.testing.data_loader_helper import create_local_dataset
# RUN: # RUN:
# buck test mobile-vision/d2go/projects_oss/detr:test_detr_runner # buck test mobile-vision/d2go/projects_oss/detr:test_detr_runner
def _get_cfg(runner, output_dir, dataset_name): def _get_cfg(runner, output_dir, dataset_name):
cfg = runner.get_default_cfg() cfg = runner.get_default_cfg()
cfg.MODEL.DEVICE = "cpu" cfg.MODEL.DEVICE = "cpu"
......
...@@ -10,13 +10,15 @@ ...@@ -10,13 +10,15 @@
import io import io
import unittest import unittest
import torch
from functools import wraps from functools import wraps
import torch
from detr.functions.ms_deform_attn_func import (
MSDeformAttnFunction,
ms_deform_attn_core_pytorch,
)
from torch.autograd import gradcheck from torch.autograd import gradcheck
from detr.functions.ms_deform_attn_func import MSDeformAttnFunction, ms_deform_attn_core_pytorch
USE_CUDA = torch.cuda.device_count() > 0 USE_CUDA = torch.cuda.device_count() > 0
...@@ -24,53 +26,107 @@ N, M, D = 1, 2, 2 ...@@ -24,53 +26,107 @@ N, M, D = 1, 2, 2
Lq, L, P = 2, 2, 2 Lq, L, P = 2, 2, 2
if USE_CUDA: if USE_CUDA:
shapes = torch.as_tensor([(6, 4), (3, 2)], dtype=torch.long).cuda() shapes = torch.as_tensor([(6, 4), (3, 2)], dtype=torch.long).cuda()
level_start_index = torch.cat((shapes.new_zeros((1, )), shapes.prod(1).cumsum(0)[:-1])) level_start_index = torch.cat(
S = sum([(H*W).item() for H, W in shapes]) (shapes.new_zeros((1,)), shapes.prod(1).cumsum(0)[:-1])
)
S = sum([(H * W).item() for H, W in shapes])
torch.manual_seed(3) torch.manual_seed(3)
class Tester(unittest.TestCase): class Tester(unittest.TestCase):
@unittest.skipIf(not USE_CUDA, 'CI does not have gpu') @unittest.skipIf(not USE_CUDA, "CI does not have gpu")
@torch.no_grad() @torch.no_grad()
def test_forward_equal_with_pytorch_double(self): def test_forward_equal_with_pytorch_double(self):
value = torch.rand(N, S, M, D).cuda() * 0.01 value = torch.rand(N, S, M, D).cuda() * 0.01
sampling_locations = torch.rand(N, Lq, M, L, P, 2).cuda() sampling_locations = torch.rand(N, Lq, M, L, P, 2).cuda()
attention_weights = torch.rand(N, Lq, M, L, P).cuda() + 1e-5 attention_weights = torch.rand(N, Lq, M, L, P).cuda() + 1e-5
attention_weights /= attention_weights.sum(-1, keepdim=True).sum(-2, keepdim=True) attention_weights /= attention_weights.sum(-1, keepdim=True).sum(
-2, keepdim=True
)
im2col_step = 2 im2col_step = 2
output_pytorch = ms_deform_attn_core_pytorch(value.double(), shapes, sampling_locations.double(), attention_weights.double()).detach().cpu() output_pytorch = (
output_cuda = MSDeformAttnFunction.apply(value.double(), shapes, level_start_index, sampling_locations.double(), attention_weights.double(), im2col_step).detach().cpu() ms_deform_attn_core_pytorch(
value.double(),
shapes,
sampling_locations.double(),
attention_weights.double(),
)
.detach()
.cpu()
)
output_cuda = (
MSDeformAttnFunction.apply(
value.double(),
shapes,
level_start_index,
sampling_locations.double(),
attention_weights.double(),
im2col_step,
)
.detach()
.cpu()
)
fwdok = torch.allclose(output_cuda, output_pytorch) fwdok = torch.allclose(output_cuda, output_pytorch)
max_abs_err = (output_cuda - output_pytorch).abs().max() max_abs_err = (output_cuda - output_pytorch).abs().max()
max_rel_err = ((output_cuda - output_pytorch).abs() / output_pytorch.abs()).max() max_rel_err = (
(output_cuda - output_pytorch).abs() / output_pytorch.abs()
print(f'* {fwdok} test_forward_equal_with_pytorch_double: max_abs_err {max_abs_err:.2e} max_rel_err {max_rel_err:.2e}') ).max()
print(
f"* {fwdok} test_forward_equal_with_pytorch_double: max_abs_err {max_abs_err:.2e} max_rel_err {max_rel_err:.2e}"
)
@unittest.skipIf(not USE_CUDA, 'CI does not have gpu') @unittest.skipIf(not USE_CUDA, "CI does not have gpu")
@torch.no_grad() @torch.no_grad()
def test_forward_equal_with_pytorch_float(self): def test_forward_equal_with_pytorch_float(self):
value = torch.rand(N, S, M, D).cuda() * 0.01 value = torch.rand(N, S, M, D).cuda() * 0.01
sampling_locations = torch.rand(N, Lq, M, L, P, 2).cuda() sampling_locations = torch.rand(N, Lq, M, L, P, 2).cuda()
attention_weights = torch.rand(N, Lq, M, L, P).cuda() + 1e-5 attention_weights = torch.rand(N, Lq, M, L, P).cuda() + 1e-5
attention_weights /= attention_weights.sum(-1, keepdim=True).sum(-2, keepdim=True) attention_weights /= attention_weights.sum(-1, keepdim=True).sum(
-2, keepdim=True
)
im2col_step = 2 im2col_step = 2
output_pytorch = ms_deform_attn_core_pytorch(value, shapes, sampling_locations, attention_weights).detach().cpu() output_pytorch = (
output_cuda = MSDeformAttnFunction.apply(value, shapes, level_start_index, sampling_locations, attention_weights, im2col_step).detach().cpu() ms_deform_attn_core_pytorch(
value, shapes, sampling_locations, attention_weights
)
.detach()
.cpu()
)
output_cuda = (
MSDeformAttnFunction.apply(
value,
shapes,
level_start_index,
sampling_locations,
attention_weights,
im2col_step,
)
.detach()
.cpu()
)
fwdok = torch.allclose(output_cuda, output_pytorch, rtol=1e-2, atol=1e-3) fwdok = torch.allclose(output_cuda, output_pytorch, rtol=1e-2, atol=1e-3)
max_abs_err = (output_cuda - output_pytorch).abs().max() max_abs_err = (output_cuda - output_pytorch).abs().max()
max_rel_err = ((output_cuda - output_pytorch).abs() / output_pytorch.abs()).max() max_rel_err = (
(output_cuda - output_pytorch).abs() / output_pytorch.abs()
print(f'* {fwdok} test_forward_equal_with_pytorch_float: max_abs_err {max_abs_err:.2e} max_rel_err {max_rel_err:.2e}') ).max()
print(
f"* {fwdok} test_forward_equal_with_pytorch_float: max_abs_err {max_abs_err:.2e} max_rel_err {max_rel_err:.2e}"
)
@unittest.skipIf(not USE_CUDA, 'CI does not have gpu') @unittest.skipIf(not USE_CUDA, "CI does not have gpu")
def test_gradient_numerical(self, channels=4, grad_value=True, grad_sampling_loc=True, grad_attn_weight=True): def test_gradient_numerical(
self, channels=4, grad_value=True, grad_sampling_loc=True, grad_attn_weight=True
):
value = torch.rand(N, S, M, channels).cuda() * 0.01 value = torch.rand(N, S, M, channels).cuda() * 0.01
sampling_locations = torch.rand(N, Lq, M, L, P, 2).cuda() sampling_locations = torch.rand(N, Lq, M, L, P, 2).cuda()
attention_weights = torch.rand(N, Lq, M, L, P).cuda() + 1e-5 attention_weights = torch.rand(N, Lq, M, L, P).cuda() + 1e-5
attention_weights /= attention_weights.sum(-1, keepdim=True).sum(-2, keepdim=True) attention_weights /= attention_weights.sum(-1, keepdim=True).sum(
-2, keepdim=True
)
im2col_step = 2 im2col_step = 2
func = MSDeformAttnFunction.apply func = MSDeformAttnFunction.apply
...@@ -78,10 +134,20 @@ class Tester(unittest.TestCase): ...@@ -78,10 +134,20 @@ class Tester(unittest.TestCase):
sampling_locations.requires_grad = grad_sampling_loc sampling_locations.requires_grad = grad_sampling_loc
attention_weights.requires_grad = grad_attn_weight attention_weights.requires_grad = grad_attn_weight
gradok = gradcheck(func, (value.double(), shapes, level_start_index, sampling_locations.double(), attention_weights.double(), im2col_step)) gradok = gradcheck(
func,
(
value.double(),
shapes,
level_start_index,
sampling_locations.double(),
attention_weights.double(),
im2col_step,
),
)
print(f'* {gradok} test_gradient_numerical(D={channels})') print(f"* {gradok} test_gradient_numerical(D={channels})")
if __name__ == '__main__': if __name__ == "__main__":
unittest.main() unittest.main()
#!/usr/bin/env python3 #!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import glob
import io import io
import os import os
import subprocess
import glob
import shutil import shutil
import subprocess
from os import path from os import path
from typing import List from typing import List
...@@ -12,10 +12,11 @@ from setuptools import setup, find_packages ...@@ -12,10 +12,11 @@ from setuptools import setup, find_packages
cwd = os.path.dirname(os.path.abspath(__file__)) cwd = os.path.dirname(os.path.abspath(__file__))
version = '0.0.1' version = "0.0.1"
try: try:
if not os.getenv('RELEASE'): if not os.getenv("RELEASE"):
from datetime import date from datetime import date
today = date.today() today = date.today()
day = today.strftime("b%Y%m%d") day = today.strftime("b%Y%m%d")
version += day version += day
...@@ -23,25 +24,24 @@ except Exception: ...@@ -23,25 +24,24 @@ except Exception:
pass pass
requirements = [ requirements = [
'importlib', "importlib",
'numpy', "numpy",
'Pillow', "Pillow",
'mock', "mock",
'torch', "torch",
'pytorch_lightning', "pytorch_lightning",
'opencv-python', "opencv-python",
'parameterized', "parameterized",
] ]
def d2go_gather_files(dst_module, file_path, extension="*") -> List[str]: def d2go_gather_files(dst_module, file_path, extension="*") -> List[str]:
""" """
Return a list of files to include in d2go submodule. Copy over the corresponding files. Return a list of files to include in d2go submodule. Copy over the corresponding files.
""" """
# Use absolute paths while symlinking. # Use absolute paths while symlinking.
source_configs_dir = path.join(path.dirname(path.realpath(__file__)), file_path) source_configs_dir = path.join(path.dirname(path.realpath(__file__)), file_path)
destination = path.join( destination = path.join(path.dirname(path.realpath(__file__)), "d2go", dst_module)
path.dirname(path.realpath(__file__)), "d2go", dst_module
)
# Symlink the config directory inside package to have a cleaner pip install. # Symlink the config directory inside package to have a cleaner pip install.
# Remove stale symlink/directory from a previous build. # Remove stale symlink/directory from a previous build.
...@@ -61,36 +61,41 @@ def d2go_gather_files(dst_module, file_path, extension="*") -> List[str]: ...@@ -61,36 +61,41 @@ def d2go_gather_files(dst_module, file_path, extension="*") -> List[str]:
config_paths = glob.glob(os.path.join(file_path + extension), recursive=True) config_paths = glob.glob(os.path.join(file_path + extension), recursive=True)
return config_paths return config_paths
def get_model_zoo_configs() -> List[str]: def get_model_zoo_configs() -> List[str]:
""" """
Return a list of configs to include in package for model zoo. Copy over these configs inside Return a list of configs to include in package for model zoo. Copy over these configs inside
d2go/model_zoo. d2go/model_zoo.
""" """
return d2go_gather_files(os.path.join("model_zoo", "configs"), "configs", "**/*.yaml") return d2go_gather_files(
os.path.join("model_zoo", "configs"), "configs", "**/*.yaml"
)
if __name__ == '__main__': if __name__ == "__main__":
setup( setup(
name="d2go", name="d2go",
version=version, version=version,
author="Mobile Vision", author="Mobile Vision",
url="https://github.com/facebookresearch/d2go", url="https://github.com/facebookresearch/d2go",
description="D2Go", description="D2Go",
long_description=open('README.md').read(), long_description=open("README.md").read(),
long_description_content_type='text/markdown', long_description_content_type="text/markdown",
license='Apache-2.0', license="Apache-2.0",
install_requires=requirements, install_requires=requirements,
packages=find_packages(exclude=["tools", "tests"]), packages=find_packages(exclude=["tools", "tests"]),
package_data={'d2go': [ package_data={
'LICENSE', "d2go": [
"LICENSE",
], ],
"d2go.model_zoo": get_model_zoo_configs(), "d2go.model_zoo": get_model_zoo_configs(),
"d2go.tools": d2go_gather_files("tools", "tools", "**/*.py"), "d2go.tools": d2go_gather_files("tools", "tools", "**/*.py"),
"d2go.tests": d2go_gather_files("tests", "tests", "**/*helper.py"), "d2go.tests": d2go_gather_files("tests", "tests", "**/*helper.py"),
}, },
entry_points={ entry_points={
'console_scripts': [ "console_scripts": [
'd2go.exporter = d2go.tools.exporter:cli', "d2go.exporter = d2go.tools.exporter:cli",
'd2go.train_net = d2go.tools.train_net:cli', "d2go.train_net = d2go.tools.train_net:cli",
] ]
}, },
) )
...@@ -293,6 +293,8 @@ class TestD2GoDatasets(unittest.TestCase): ...@@ -293,6 +293,8 @@ class TestD2GoDatasets(unittest.TestCase):
self.assertEqual(len(ds_list), 5) self.assertEqual(len(ds_list), 5)
# Test adhoc classes to use with suffix removal # Test adhoc classes to use with suffix removal
AdhocDatasetManager.add(COCOWithClassesToUse("test_adhoc_ds2@1classes", ["class_0"])) AdhocDatasetManager.add(
COCOWithClassesToUse("test_adhoc_ds2@1classes", ["class_0"])
)
ds_list = DatasetCatalog.get("test_adhoc_ds2@1classes") ds_list = DatasetCatalog.get("test_adhoc_ds2@1classes")
self.assertEqual(len(ds_list), 5) self.assertEqual(len(ds_list), 5)
...@@ -126,7 +126,5 @@ class TestDataTransformsBoxUtils(unittest.TestCase): ...@@ -126,7 +126,5 @@ class TestDataTransformsBoxUtils(unittest.TestCase):
boxes = np.array([[91, 46, 144, 111]]) boxes = np.array([[91, 46, 144, 111]])
transformed_bboxs = enlarge_box_tfm[0].apply_coords(boxes) transformed_bboxs = enlarge_box_tfm[0].apply_coords(boxes)
err_msg = "transformed_bbox = {}, expected {}".format( err_msg = "transformed_bbox = {}, expected {}".format(transformed_bboxs, boxes)
transformed_bboxs, boxes
)
self.assertTrue(np.allclose(transformed_bboxs, boxes), err_msg) self.assertTrue(np.allclose(transformed_bboxs, boxes), err_msg)
#!/usr/bin/env python3 #!/usr/bin/env python3
import unittest import unittest
import torch
import torch
from d2go.evaluation.prediction_count_evaluation import PredictionCountEvaluator from d2go.evaluation.prediction_count_evaluation import PredictionCountEvaluator
from detectron2.structures.instances import Instances from detectron2.structures.instances import Instances
class TestPredictionCountEvaluation(unittest.TestCase): class TestPredictionCountEvaluation(unittest.TestCase):
def setUp(self): def setUp(self):
self.evaluator = PredictionCountEvaluator() self.evaluator = PredictionCountEvaluator()
image_size = (224, 224) image_size = (224, 224)
...@@ -20,7 +19,7 @@ class TestPredictionCountEvaluation(unittest.TestCase): ...@@ -20,7 +19,7 @@ class TestPredictionCountEvaluation(unittest.TestCase):
{"instances": Instances(image_size, scores=torch.Tensor([0.9]))}, {"instances": Instances(image_size, scores=torch.Tensor([0.9]))},
] ]
# PredictionCountEvaluator does not depend on inputs # PredictionCountEvaluator does not depend on inputs
self.mock_inputs = [None] * len(self.mock_outputs) self.mock_inputs = [None] * len(self.mock_outputs)
def test_process_evaluate_reset(self): def test_process_evaluate_reset(self):
self.assertEqual(len(self.evaluator.prediction_counts), 0) self.assertEqual(len(self.evaluator.prediction_counts), 0)
...@@ -40,7 +39,7 @@ class TestPredictionCountEvaluation(unittest.TestCase): ...@@ -40,7 +39,7 @@ class TestPredictionCountEvaluation(unittest.TestCase):
"predictions_per_image": 11 / 5, "predictions_per_image": 11 / 5,
"confidence_per_prediction": (0.9 * 5 + 0.8 * 4 + 0.7 * 2) / 11, "confidence_per_prediction": (0.9 * 5 + 0.8 * 4 + 0.7 * 2) / 11,
} }
} },
) )
# Test that `reset` clears the evaluator state. # Test that `reset` clears the evaluator state.
...@@ -48,7 +47,6 @@ class TestPredictionCountEvaluation(unittest.TestCase): ...@@ -48,7 +47,6 @@ class TestPredictionCountEvaluation(unittest.TestCase):
self.assertEqual(len(self.evaluator.prediction_counts), 0) self.assertEqual(len(self.evaluator.prediction_counts), 0)
self.assertEqual(len(self.evaluator.confidence_scores), 0) self.assertEqual(len(self.evaluator.confidence_scores), 0)
def assertDictAlmostEqual(self, dict1, dict2): def assertDictAlmostEqual(self, dict1, dict2):
keys1 = list(dict1.keys()) keys1 = list(dict1.keys())
keys2 = list(dict2.keys()) keys2 = list(dict2.keys())
......
...@@ -28,9 +28,7 @@ class TestConfig(unittest.TestCase): ...@@ -28,9 +28,7 @@ class TestConfig(unittest.TestCase):
for location in ["detectron2", "detectron2go"]: for location in ["detectron2", "detectron2go"]:
root_dir = os.path.abspath(reroute_config_path(f"{location}://.")) root_dir = os.path.abspath(reroute_config_path(f"{location}://."))
files = glob.glob( files = glob.glob(os.path.join(root_dir, "**/*.yaml"), recursive=True)
os.path.join(root_dir, "**/*.yaml"),
recursive=True)
files = [f for f in files if "fbnas" not in f] files = [f for f in files if "fbnas" not in f]
self.assertGreater(len(files), 0) self.assertGreater(len(files), 0)
for fn in sorted(files): for fn in sorted(files):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment