enable black for mobile-vision

Summary: https://fb.workplace.com/groups/pythonfoundation/posts/2990917737888352 Remove `mobile-vision` from opt-out list; leaving `mobile-vision/SNPE` opted out because of 3rd-party code. arc lint --take BLACK --apply-patches --paths-cmd 'hg files mobile-vision' allow-large-files Reviewed By: sstsai-adl Differential Revision: D30721093 fbshipit-source-id: 9e5c16d988b315b93a28038443ecfb92efd18ef8

enable black for mobile-vision
Summary: https://fb.workplace.com/groups/pythonfoundation/posts/2990917737888352 Remove `mobile-vision` from opt-out list; leaving `mobile-vision/SNPE` opted out because of 3rd-party code. arc lint --take BLACK --apply-patches --paths-cmd 'hg files mobile-vision' allow-large-files Reviewed By: sstsai-adl Differential Revision: D30721093 fbshipit-source-id: 9e5c16d988b315b93a28038443ecfb92efd18ef8
82295dbf · Yanghan Wang · Facebook GitHub Bot · a56c7e15 · 82295dbf · 82295dbf
Commit 82295dbf authored Sep 08, 2021 by Yanghan Wang Committed by Facebook GitHub Bot Sep 08, 2021
20 changed files
--- a/projects_oss/detr/detr/models/matcher.py
+++ b/projects_oss/detr/detr/models/matcher.py
@@ -5,11 +5,10 @@
 Modules to compute the matching cost and solve the corresponding LSAP.
 """
 import torch
+from detr.util.box_ops import box_cxcywh_to_xyxy, generalized_box_iou
 from scipy.optimize import linear_sum_assignment
 from torch import nn

-from detr.util.box_ops import box_cxcywh_to_xyxy, generalized_box_iou
-

 class HungarianMatcher(nn.Module):
    """This class computes an assignment between the targets and the predictions of the network
@@ -19,7 +18,13 @@ class HungarianMatcher(nn.Module):
    while the others are un-matched (and thus treated as non-objects).
    """

-    def __init__(self, cost_class: float = 1, cost_bbox: float = 1, cost_giou: float = 1, use_focal_loss=False):
+    def __init__(
+        self,
+        cost_class: float = 1,
+        cost_bbox: float = 1,
+        cost_giou: float = 1,
+        use_focal_loss=False,
+    ):
        """Creates the matcher

        Params:
@@ -31,12 +36,14 @@ class HungarianMatcher(nn.Module):
        self.cost_class = cost_class
        self.cost_bbox = cost_bbox
        self.cost_giou = cost_giou
-        assert cost_class != 0 or cost_bbox != 0 or cost_giou != 0, "all costs cant be 0"
+        assert (
+            cost_class != 0 or cost_bbox != 0 or cost_giou != 0
+        ), "all costs cant be 0"
        self.use_focal_loss = use_focal_loss

    @torch.no_grad()
    def forward(self, outputs, targets):
-        """ Performs the matching
+        """Performs the matching

        Params:
            outputs: This is a dict that contains at least these entries:
@@ -61,7 +68,9 @@ class HungarianMatcher(nn.Module):
        if self.use_focal_loss:
            out_prob = outputs["pred_logits"].flatten(0, 1).sigmoid()
        else:
-            out_prob = outputs["pred_logits"].flatten(0, 1).softmax(-1)  # [batch_size * num_queries, num_classes]
+            out_prob = (
+                outputs["pred_logits"].flatten(0, 1).softmax(-1)
+            )  # [batch_size * num_queries, num_classes]
        out_bbox = outputs["pred_boxes"].flatten(0, 1)  # [batch_size * num_queries, 4]

        # Also concat the target labels and boxes
@@ -74,29 +83,57 @@ class HungarianMatcher(nn.Module):
        if self.use_focal_loss:
            alpha = 0.25
            gamma = 2.0
-            neg_cost_class = (1 - alpha) * (out_prob ** gamma) * (-(1 - out_prob + 1e-8).log())
-            pos_cost_class = alpha * ((1 - out_prob) ** gamma) * (-(out_prob + 1e-8).log())
+            neg_cost_class = (
+                (1 - alpha) * (out_prob ** gamma) * (-(1 - out_prob + 1e-8).log())
+            )
+            pos_cost_class = (
+                alpha * ((1 - out_prob) ** gamma) * (-(out_prob + 1e-8).log())
+            )
            cost_class = pos_cost_class[:, tgt_ids] - neg_cost_class[:, tgt_ids]
        else:
-            cost_class = -out_prob[:, tgt_ids]  # shape [batch_size * num_queries, \sum_b NUM-BOX_b]
+            cost_class = -out_prob[
+                :, tgt_ids
+            ]  # shape [batch_size * num_queries, \sum_b NUM-BOX_b]

        # Compute the L1 cost between boxes
-        cost_bbox = torch.cdist(out_bbox, tgt_bbox, p=1)  # shape [batch_size * num_queries,\sum_b NUM-BOX_b]
+        cost_bbox = torch.cdist(
+            out_bbox, tgt_bbox, p=1
+        )  # shape [batch_size * num_queries,\sum_b NUM-BOX_b]

        # Compute the giou cost betwen boxes
        # shape [batch_size * num_queries, \sum_b NUM-BOX_b]
-        cost_giou = -generalized_box_iou(box_cxcywh_to_xyxy(out_bbox), box_cxcywh_to_xyxy(tgt_bbox))
+        cost_giou = -generalized_box_iou(
+            box_cxcywh_to_xyxy(out_bbox), box_cxcywh_to_xyxy(tgt_bbox)
+        )

        # Final cost matrix
-        C = self.cost_bbox * cost_bbox + self.cost_class * cost_class + self.cost_giou * cost_giou
-        C = C.view(bs, num_queries, -1).cpu()  # shape [batch_size, num_queries, \sum_b NUM-BOX_b]
+        C = (
+            self.cost_bbox * cost_bbox
+            + self.cost_class * cost_class
+            + self.cost_giou * cost_giou
+        )
+        C = C.view(
+            bs, num_queries, -1
+        ).cpu()  # shape [batch_size, num_queries, \sum_b NUM-BOX_b]

        sizes = [len(v["boxes"]) for v in targets]  # shape [batch_size,]
        # each split c shape [batch_size, num_queries, NUM-BOX_b]
-        indices = [linear_sum_assignment(c[i]) for i, c in enumerate(C.split(sizes, -1))]
+        indices = [
+            linear_sum_assignment(c[i]) for i, c in enumerate(C.split(sizes, -1))
+        ]
        # A list where each item is [row_indices, col_indices]
-        return [(torch.as_tensor(i, dtype=torch.int64), torch.as_tensor(j, dtype=torch.int64)) for i, j in indices]
+        return [
+            (
+                torch.as_tensor(i, dtype=torch.int64),
+                torch.as_tensor(j, dtype=torch.int64),
+            )
+            for i, j in indices
+        ]


 def build_matcher(args):
-    return HungarianMatcher(cost_class=args.set_cost_class, cost_bbox=args.set_cost_bbox, cost_giou=args.set_cost_giou)
+    return HungarianMatcher(
+        cost_class=args.set_cost_class,
+        cost_bbox=args.set_cost_bbox,
+        cost_giou=args.set_cost_giou,
+    )
--- a/projects_oss/detr/detr/models/position_encoding.py
+++ b/projects_oss/detr/detr/models/position_encoding.py
@@ -5,10 +5,10 @@
 Various positional encodings for the transformer.
 """
 import math
-import torch
-from torch import nn

+import torch
 from detr.util.misc import NestedTensor
+from torch import nn


 class PositionEmbeddingSine(nn.Module):
@@ -16,7 +16,15 @@ class PositionEmbeddingSine(nn.Module):
    This is a more standard version of the position embedding, very similar to the one
    used by the Attention is all you need paper, generalized to work on images.
    """
-    def __init__(self, num_pos_feats=64, temperature=10000, normalize=False, scale=None, centered=False):
+
+    def __init__(
+        self,
+        num_pos_feats=64,
+        temperature=10000,
+        normalize=False,
+        scale=None,
+        centered=False,
+    ):
        super().__init__()
        self.num_pos_feats = num_pos_feats
        self.temperature = temperature
@@ -47,13 +55,25 @@ class PositionEmbeddingSine(nn.Module):
                x_embed = x_embed / (x_embed[:, :, -1:] + eps) * self.scale

        dim_t = torch.arange(self.num_pos_feats, dtype=torch.float32, device=x.device)
-        dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats)  # shape (N, )
+        dim_t = self.temperature ** (
+            2 * (dim_t // 2) / self.num_pos_feats
+        )  # shape (N, )

        pos_x = x_embed[:, :, :, None] / dim_t  # shape (B, H, W, N)
        pos_y = y_embed[:, :, :, None] / dim_t
-        pos_x = torch.stack((pos_x[:, :, :, 0::2].sin(), pos_x[:, :, :, 1::2].cos()), dim=4).flatten(3)  # shape (B, H, W, N)
-        pos_y = torch.stack((pos_y[:, :, :, 0::2].sin(), pos_y[:, :, :, 1::2].cos()), dim=4).flatten(3)  # shape (B, H, W, N)
-        pos = torch.cat((pos_y, pos_x), dim=3).permute(0, 3, 1, 2)  # shape (B, 2*N, H, W)
+        pos_x = torch.stack(
+            (pos_x[:, :, :, 0::2].sin(), pos_x[:, :, :, 1::2].cos()), dim=4
+        ).flatten(
+            3
+        )  # shape (B, H, W, N)
+        pos_y = torch.stack(
+            (pos_y[:, :, :, 0::2].sin(), pos_y[:, :, :, 1::2].cos()), dim=4
+        ).flatten(
+            3
+        )  # shape (B, H, W, N)
+        pos = torch.cat((pos_y, pos_x), dim=3).permute(
+            0, 3, 1, 2
+        )  # shape (B, 2*N, H, W)
        return pos


@@ -61,6 +81,7 @@ class PositionEmbeddingLearned(nn.Module):
    """
    Absolute pos embedding, learned.
    """
+
    def __init__(self, num_pos_feats=256):
        super().__init__()
        self.row_embed = nn.Embedding(50, num_pos_feats)
@@ -78,19 +99,27 @@ class PositionEmbeddingLearned(nn.Module):
        j = torch.arange(h, device=x.device)
        x_emb = self.col_embed(i)
        y_emb = self.row_embed(j)
-        pos = torch.cat([
+        pos = (
+            torch.cat(
+                [
                    x_emb.unsqueeze(0).repeat(h, 1, 1),
                    y_emb.unsqueeze(1).repeat(1, w, 1),
-        ], dim=-1).permute(2, 0, 1).unsqueeze(0).repeat(x.shape[0], 1, 1, 1)
+                ],
+                dim=-1,
+            )
+            .permute(2, 0, 1)
+            .unsqueeze(0)
+            .repeat(x.shape[0], 1, 1, 1)
+        )
        return pos


 def build_position_encoding(args):
    N_steps = args.hidden_dim // 2
-    if args.position_embedding in ('v2', 'sine'):
+    if args.position_embedding in ("v2", "sine"):
        # TODO find a better way of exposing other arguments
        position_embedding = PositionEmbeddingSine(N_steps, normalize=True)
-    elif args.position_embedding in ('v3', 'learned'):
+    elif args.position_embedding in ("v3", "learned"):
        position_embedding = PositionEmbeddingLearned(N_steps)
    else:
        raise ValueError(f"not supported {args.position_embedding}")

--- a/projects_oss/detr/detr/models/segmentation.py
+++ b/projects_oss/detr/detr/models/segmentation.py
@@ -8,14 +8,13 @@ import io
 from collections import defaultdict
 from typing import List, Optional

+import detr.util.box_ops as box_ops
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-from torch import Tensor
-from PIL import Image
-
-import detr.util.box_ops as box_ops
 from detr.util.misc import NestedTensor, interpolate, nested_tensor_from_tensor_list
+from PIL import Image
+from torch import Tensor

 try:
    from panopticapi.utils import id2rgb, rgb2id
@@ -33,8 +32,12 @@ class DETRsegm(nn.Module):
                p.requires_grad_(False)

        hidden_dim, nheads = detr.transformer.d_model, detr.transformer.nhead
-        self.bbox_attention = MHAttentionMap(hidden_dim, hidden_dim, nheads, dropout=0.0)
-        self.mask_head = MaskHeadSmallConv(hidden_dim + nheads, [1024, 512, 256], hidden_dim)
+        self.bbox_attention = MHAttentionMap(
+            hidden_dim, hidden_dim, nheads, dropout=0.0
+        )
+        self.mask_head = MaskHeadSmallConv(
+            hidden_dim + nheads, [1024, 512, 256], hidden_dim
+        )

    def forward(self, samples: NestedTensor):
        if isinstance(samples, (list, torch.Tensor)):
@@ -46,19 +49,27 @@ class DETRsegm(nn.Module):
        src, mask = features[-1].decompose()
        assert mask is not None
        src_proj = self.detr.input_proj(src)
-        hs, memory = self.detr.transformer(src_proj, mask, self.detr.query_embed.weight, pos[-1])
+        hs, memory = self.detr.transformer(
+            src_proj, mask, self.detr.query_embed.weight, pos[-1]
+        )

        outputs_class = self.detr.class_embed(hs)
        outputs_coord = self.detr.bbox_embed(hs).sigmoid()
        out = {"pred_logits": outputs_class[-1], "pred_boxes": outputs_coord[-1]}
        if self.detr.aux_loss:
-            out['aux_outputs'] = self.detr._set_aux_loss(outputs_class, outputs_coord)
+            out["aux_outputs"] = self.detr._set_aux_loss(outputs_class, outputs_coord)

        # FIXME h_boxes takes the last one computed, keep this in mind
        bbox_mask = self.bbox_attention(hs[-1], memory, mask=mask)

-        seg_masks = self.mask_head(src_proj, bbox_mask, [features[2].tensors, features[1].tensors, features[0].tensors])
-        outputs_seg_masks = seg_masks.view(bs, self.detr.num_queries, seg_masks.shape[-2], seg_masks.shape[-1])
+        seg_masks = self.mask_head(
+            src_proj,
+            bbox_mask,
+            [features[2].tensors, features[1].tensors, features[0].tensors],
+        )
+        outputs_seg_masks = seg_masks.view(
+            bs, self.detr.num_queries, seg_masks.shape[-2], seg_masks.shape[-1]
+        )

        out["pred_masks"] = outputs_seg_masks
        return out
@@ -77,7 +88,14 @@ class MaskHeadSmallConv(nn.Module):
    def __init__(self, dim, fpn_dims, context_dim):
        super().__init__()

-        inter_dims = [dim, context_dim // 2, context_dim // 4, context_dim // 8, context_dim // 16, context_dim // 64]
+        inter_dims = [
+            dim,
+            context_dim // 2,
+            context_dim // 4,
+            context_dim // 8,
+            context_dim // 16,
+            context_dim // 64,
+        ]
        self.lay1 = torch.nn.Conv2d(dim, dim, 3, padding=1)
        self.gn1 = torch.nn.GroupNorm(8, dim)
        self.lay2 = torch.nn.Conv2d(dim, inter_dims[1], 3, padding=1)
@@ -159,9 +177,19 @@ class MHAttentionMap(nn.Module):

    def forward(self, q, k, mask: Optional[Tensor] = None):
        q = self.q_linear(q)
-        k = F.conv2d(k, self.k_linear.weight.unsqueeze(-1).unsqueeze(-1), self.k_linear.bias)
-        qh = q.view(q.shape[0], q.shape[1], self.num_heads, self.hidden_dim // self.num_heads)
-        kh = k.view(k.shape[0], self.num_heads, self.hidden_dim // self.num_heads, k.shape[-2], k.shape[-1])
+        k = F.conv2d(
+            k, self.k_linear.weight.unsqueeze(-1).unsqueeze(-1), self.k_linear.bias
+        )
+        qh = q.view(
+            q.shape[0], q.shape[1], self.num_heads, self.hidden_dim // self.num_heads
+        )
+        kh = k.view(
+            k.shape[0],
+            self.num_heads,
+            self.hidden_dim // self.num_heads,
+            k.shape[-2],
+            k.shape[-1],
+        )
        weights = torch.einsum("bqnc,bnchw->bqnhw", qh * self.normalize_fact, kh)

        if mask is not None:
@@ -189,7 +217,9 @@ def dice_loss(inputs, targets, num_boxes):
    return loss.sum() / num_boxes


-def sigmoid_focal_loss(inputs, targets, num_boxes, alpha: float = 0.25, gamma: float = 2):
+def sigmoid_focal_loss(
+    inputs, targets, num_boxes, alpha: float = 0.25, gamma: float = 2
+):
    """
    Loss used in RetinaNet for dense detection: https://arxiv.org/abs/1708.02002.
    Args:
@@ -227,10 +257,14 @@ class PostProcessSegm(nn.Module):
        assert len(orig_target_sizes) == len(max_target_sizes)
        max_h, max_w = max_target_sizes.max(0)[0].tolist()
        outputs_masks = outputs["pred_masks"].squeeze(2)
-        outputs_masks = F.interpolate(outputs_masks, size=(max_h, max_w), mode="bilinear", align_corners=False)
+        outputs_masks = F.interpolate(
+            outputs_masks, size=(max_h, max_w), mode="bilinear", align_corners=False
+        )
        outputs_masks = (outputs_masks.sigmoid() > self.threshold).cpu()

-        for i, (cur_mask, t, tt) in enumerate(zip(outputs_masks, max_target_sizes, orig_target_sizes)):
+        for i, (cur_mask, t, tt) in enumerate(
+            zip(outputs_masks, max_target_sizes, orig_target_sizes)
+        ):
            img_h, img_w = t[0], t[1]
            results[i]["masks"] = cur_mask[:, :img_h, :img_w].unsqueeze(1)
            results[i]["masks"] = F.interpolate(
@@ -242,7 +276,7 @@ class PostProcessSegm(nn.Module):

 class PostProcessPanoptic(nn.Module):
    """This class converts the output of the model to the final panoptic result, in the format expected by the
-    coco panoptic API """
+    coco panoptic API"""

    def __init__(self, is_thing_map, threshold=0.85):
        """
@@ -255,8 +289,8 @@ class PostProcessPanoptic(nn.Module):
        self.threshold = threshold
        self.is_thing_map = is_thing_map

-    def forward(self, outputs, processed_sizes, target_sizes=None): #noqa: C901
-        """ This function computes the panoptic prediction from the model's predictions.
+    def forward(self, outputs, processed_sizes, target_sizes=None):  # noqa: C901
+        """This function computes the panoptic prediction from the model's predictions.
        Parameters:
            outputs: This is a dict coming directly from the model. See the model doc for the content.
            processed_sizes: This is a list of tuples (or torch tensors) of sizes of the images that were passed to the
@@ -267,7 +301,11 @@ class PostProcessPanoptic(nn.Module):
        if target_sizes is None:
            target_sizes = processed_sizes
        assert len(processed_sizes) == len(target_sizes)
-        out_logits, raw_masks, raw_boxes = outputs["pred_logits"], outputs["pred_masks"], outputs["pred_boxes"]
+        out_logits, raw_masks, raw_boxes = (
+            outputs["pred_logits"],
+            outputs["pred_masks"],
+            outputs["pred_boxes"],
+        )
        assert len(out_logits) == len(raw_masks) == len(target_sizes)
        preds = []

@@ -281,12 +319,16 @@ class PostProcessPanoptic(nn.Module):
        ):
            # we filter empty queries and detection below threshold
            scores, labels = cur_logits.softmax(-1).max(-1)
-            keep = labels.ne(outputs["pred_logits"].shape[-1] - 1) & (scores > self.threshold)
+            keep = labels.ne(outputs["pred_logits"].shape[-1] - 1) & (
+                scores > self.threshold
+            )
            cur_scores, cur_classes = cur_logits.softmax(-1).max(-1)
            cur_scores = cur_scores[keep]
            cur_classes = cur_classes[keep]
            cur_masks = cur_masks[keep]
-            cur_masks = interpolate(cur_masks[:, None], to_tuple(size), mode="bilinear").squeeze(1)
+            cur_masks = interpolate(
+                cur_masks[:, None], to_tuple(size), mode="bilinear"
+            ).squeeze(1)
            cur_boxes = box_ops.box_cxcywh_to_xyxy(cur_boxes[keep])

            h, w = cur_masks.shape[-2:]
@@ -322,10 +364,14 @@ class PostProcessPanoptic(nn.Module):
                final_h, final_w = to_tuple(target_size)

                seg_img = Image.fromarray(id2rgb(m_id.view(h, w).cpu().numpy()))
-                seg_img = seg_img.resize(size=(final_w, final_h), resample=Image.NEAREST)
+                seg_img = seg_img.resize(
+                    size=(final_w, final_h), resample=Image.NEAREST
+                )

                np_seg_img = (
-                    torch.ByteTensor(torch.ByteStorage.from_buffer(seg_img.tobytes())).view(final_h, final_w, 3).numpy()
+                    torch.ByteTensor(torch.ByteStorage.from_buffer(seg_img.tobytes()))
+                    .view(final_h, final_w, 3)
+                    .numpy()
                )
                m_id = torch.from_numpy(rgb2id(np_seg_img))

@@ -339,7 +385,9 @@ class PostProcessPanoptic(nn.Module):
                # We know filter empty masks as long as we find some
                while True:
                    filtered_small = torch.as_tensor(
-                        [area[i] <= 4 for i, c in enumerate(cur_classes)], dtype=torch.bool, device=keep.device
+                        [area[i] <= 4 for i, c in enumerate(cur_classes)],
+                        dtype=torch.bool,
+                        device=keep.device,
                    )
                    if filtered_small.any().item():
                        cur_scores = cur_scores[~filtered_small]
@@ -355,11 +403,21 @@ class PostProcessPanoptic(nn.Module):
            segments_info = []
            for i, a in enumerate(area):
                cat = cur_classes[i].item()
-                segments_info.append({"id": i, "isthing": self.is_thing_map[cat], "category_id": cat, "area": a})
+                segments_info.append(
+                    {
+                        "id": i,
+                        "isthing": self.is_thing_map[cat],
+                        "category_id": cat,
+                        "area": a,
+                    }
+                )
            del cur_classes

            with io.BytesIO() as out:
                seg_img.save(out, format="PNG")
-                predictions = {"png_string": out.getvalue(), "segments_info": segments_info}
+                predictions = {
+                    "png_string": out.getvalue(),
+                    "segments_info": segments_info,
+                }
            preds.append(predictions)
        return preds
--- a/projects_oss/detr/detr/models/transformer.py
+++ b/projects_oss/detr/detr/models/transformer.py
@@ -18,23 +18,38 @@ from torch import nn, Tensor


 class Transformer(nn.Module):
-
-    def __init__(self, d_model=512, nhead=8, num_encoder_layers=6,
-                 num_decoder_layers=6, dim_feedforward=2048, dropout=0.1,
-                 activation="relu", normalize_before=False,
-                 return_intermediate_dec=False):
+    def __init__(
+        self,
+        d_model=512,
+        nhead=8,
+        num_encoder_layers=6,
+        num_decoder_layers=6,
+        dim_feedforward=2048,
+        dropout=0.1,
+        activation="relu",
+        normalize_before=False,
+        return_intermediate_dec=False,
+    ):
        super().__init__()

-        encoder_layer = TransformerEncoderLayer(d_model, nhead, dim_feedforward,
-                                                dropout, activation, normalize_before)
+        encoder_layer = TransformerEncoderLayer(
+            d_model, nhead, dim_feedforward, dropout, activation, normalize_before
+        )
        encoder_norm = nn.LayerNorm(d_model) if normalize_before else None
-        self.encoder = TransformerEncoder(encoder_layer, num_encoder_layers, encoder_norm)
+        self.encoder = TransformerEncoder(
+            encoder_layer, num_encoder_layers, encoder_norm
+        )

-        decoder_layer = TransformerDecoderLayer(d_model, nhead, dim_feedforward,
-                                                dropout, activation, normalize_before)
+        decoder_layer = TransformerDecoderLayer(
+            d_model, nhead, dim_feedforward, dropout, activation, normalize_before
+        )
        decoder_norm = nn.LayerNorm(d_model)
-        self.decoder = TransformerDecoder(decoder_layer, num_decoder_layers, decoder_norm,
-                                          return_intermediate=return_intermediate_dec)
+        self.decoder = TransformerDecoder(
+            decoder_layer,
+            num_decoder_layers,
+            decoder_norm,
+            return_intermediate=return_intermediate_dec,
+        )

        self._reset_parameters()

@@ -63,30 +78,41 @@ class Transformer(nn.Module):
        # memory shape (L, B, C)
        memory = self.encoder(src, src_key_padding_mask=mask, pos=pos_embed)
        # hs shape (NUM_LEVEL, S, B, C)
-        hs = self.decoder(tgt, memory, memory_key_padding_mask=mask,
-                          pos=pos_embed, query_pos=query_embed)
+        hs = self.decoder(
+            tgt,
+            memory,
+            memory_key_padding_mask=mask,
+            pos=pos_embed,
+            query_pos=query_embed,
+        )
        # return shape (NUM_LEVEL, B, S, C) and (B, C, H, W)
        return hs.transpose(1, 2), memory.permute(1, 2, 0).view(bs, c, h, w)


 class TransformerEncoder(nn.Module):
-
    def __init__(self, encoder_layer, num_layers, norm=None):
        super().__init__()
        self.layers = _get_clones(encoder_layer, num_layers)
        self.num_layers = num_layers
        self.norm = norm

-    def forward(self, src,
+    def forward(
+        self,
+        src,
        mask: Optional[Tensor] = None,
        src_key_padding_mask: Optional[Tensor] = None,
-                pos: Optional[Tensor] = None):
+        pos: Optional[Tensor] = None,
+    ):
        output = src
        # mask, shape (L, L)
        # src_key_padding_mask, shape (B, L)
        for layer in self.layers:
-            output = layer(output, src_mask=mask,
-                           src_key_padding_mask=src_key_padding_mask, pos=pos)
+            output = layer(
+                output,
+                src_mask=mask,
+                src_key_padding_mask=src_key_padding_mask,
+                pos=pos,
+            )

        if self.norm is not None:
            output = self.norm(output)
@@ -95,7 +121,6 @@ class TransformerEncoder(nn.Module):


 class TransformerDecoder(nn.Module):
-
    def __init__(self, decoder_layer, num_layers, norm=None, return_intermediate=False):
        super().__init__()
        self.layers = _get_clones(decoder_layer, num_layers)
@@ -103,13 +128,17 @@ class TransformerDecoder(nn.Module):
        self.norm = norm
        self.return_intermediate = return_intermediate

-    def forward(self, tgt, memory,
+    def forward(
+        self,
+        tgt,
+        memory,
        tgt_mask: Optional[Tensor] = None,
        memory_mask: Optional[Tensor] = None,
        tgt_key_padding_mask: Optional[Tensor] = None,
        memory_key_padding_mask: Optional[Tensor] = None,
        pos: Optional[Tensor] = None,
-                query_pos: Optional[Tensor] = None):
+        query_pos: Optional[Tensor] = None,
+    ):
        output = tgt

        intermediate = []
@@ -119,11 +148,16 @@ class TransformerDecoder(nn.Module):
        # memory_mask shape (L, S)
        # memory_key_padding_mask shape (B, S)
        for layer in self.layers:
-            output = layer(output, memory, tgt_mask=tgt_mask,
+            output = layer(
+                output,
+                memory,
+                tgt_mask=tgt_mask,
                memory_mask=memory_mask,
                tgt_key_padding_mask=tgt_key_padding_mask,
                memory_key_padding_mask=memory_key_padding_mask,
-                           pos=pos, query_pos=query_pos)
+                pos=pos,
+                query_pos=query_pos,
+            )
            if self.return_intermediate:
                intermediate.append(self.norm(output))

@@ -140,9 +174,15 @@ class TransformerDecoder(nn.Module):


 class TransformerEncoderLayer(nn.Module):
-
-    def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1,
-                 activation="relu", normalize_before=False):
+    def __init__(
+        self,
+        d_model,
+        nhead,
+        dim_feedforward=2048,
+        dropout=0.1,
+        activation="relu",
+        normalize_before=False,
+    ):
        super().__init__()
        self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
        # Implementation of Feedforward model
@@ -161,16 +201,19 @@ class TransformerEncoderLayer(nn.Module):
    def with_pos_embed(self, tensor, pos: Optional[Tensor]):
        return tensor if pos is None else tensor + pos

-    def forward_post(self,
+    def forward_post(
+        self,
        src,
        src_mask: Optional[Tensor] = None,
        src_key_padding_mask: Optional[Tensor] = None,
-                     pos: Optional[Tensor] = None):
+        pos: Optional[Tensor] = None,
+    ):
        q = k = self.with_pos_embed(src, pos)  # shape (L, B, D)
        # src mask, shape (L, L)
        # src_key_padding_mask: shape (B, L)
-        src2 = self.self_attn(q, k, src, attn_mask=src_mask,
-                              key_padding_mask=src_key_padding_mask)[0]
+        src2 = self.self_attn(
+            q, k, src, attn_mask=src_mask, key_padding_mask=src_key_padding_mask
+        )[0]
        src = src + self.dropout1(src2)
        src = self.norm1(src)
        src2 = self.linear2(self.dropout(self.activation(self.linear1(src))))
@@ -178,33 +221,46 @@ class TransformerEncoderLayer(nn.Module):
        src = self.norm2(src)
        return src

-    def forward_pre(self, src,
+    def forward_pre(
+        self,
+        src,
        src_mask: Optional[Tensor] = None,
        src_key_padding_mask: Optional[Tensor] = None,
-                    pos: Optional[Tensor] = None):
+        pos: Optional[Tensor] = None,
+    ):
        src2 = self.norm1(src)
        q = k = self.with_pos_embed(src2, pos)
-        src2 = self.self_attn(q, k, src2, attn_mask=src_mask,
-                              key_padding_mask=src_key_padding_mask)[0]
+        src2 = self.self_attn(
+            q, k, src2, attn_mask=src_mask, key_padding_mask=src_key_padding_mask
+        )[0]
        src = src + self.dropout1(src2)
        src2 = self.norm2(src)
        src2 = self.linear2(self.dropout(self.activation(self.linear1(src2))))
        src = src + self.dropout2(src2)
        return src

-    def forward(self, src,
+    def forward(
+        self,
+        src,
        src_mask: Optional[Tensor] = None,
        src_key_padding_mask: Optional[Tensor] = None,
-                pos: Optional[Tensor] = None):
+        pos: Optional[Tensor] = None,
+    ):
        if self.normalize_before:
            return self.forward_pre(src, src_mask, src_key_padding_mask, pos)
        return self.forward_post(src, src_mask, src_key_padding_mask, pos)


 class TransformerDecoderLayer(nn.Module):
-
-    def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1,
-                 activation="relu", normalize_before=False):
+    def __init__(
+        self,
+        d_model,
+        nhead,
+        dim_feedforward=2048,
+        dropout=0.1,
+        activation="relu",
+        normalize_before=False,
+    ):
        super().__init__()
        self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
        self.multihead_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
@@ -226,28 +282,36 @@ class TransformerDecoderLayer(nn.Module):
    def with_pos_embed(self, tensor, pos: Optional[Tensor]):
        return tensor if pos is None else tensor + pos

-    def forward_post(self, tgt, memory,
+    def forward_post(
+        self,
+        tgt,
+        memory,
        tgt_mask: Optional[Tensor] = None,
        memory_mask: Optional[Tensor] = None,
        tgt_key_padding_mask: Optional[Tensor] = None,
        memory_key_padding_mask: Optional[Tensor] = None,
        pos: Optional[Tensor] = None,
-                     query_pos: Optional[Tensor] = None):
+        query_pos: Optional[Tensor] = None,
+    ):
        # tgt shape (L, B, C)
        # tgt_mask shape (L, L)
        # tgt_key_padding_mask shape (B, L)
        q = k = self.with_pos_embed(tgt, query_pos)
-        tgt2 = self.self_attn(q, k, tgt, attn_mask=tgt_mask,
-                              key_padding_mask=tgt_key_padding_mask)[0]
+        tgt2 = self.self_attn(
+            q, k, tgt, attn_mask=tgt_mask, key_padding_mask=tgt_key_padding_mask
+        )[0]
        tgt = tgt + self.dropout1(tgt2)
        tgt = self.norm1(tgt)
        # memory_mask shape (L, S)
        # memory_key_padding_mask shape (B, S)
        # query_pos shape (L, B, C)
-        tgt2 = self.multihead_attn(self.with_pos_embed(tgt, query_pos),
+        tgt2 = self.multihead_attn(
+            self.with_pos_embed(tgt, query_pos),
            self.with_pos_embed(memory, pos),
-                                   memory, attn_mask=memory_mask,
-                                   key_padding_mask=memory_key_padding_mask)[0]
+            memory,
+            attn_mask=memory_mask,
+            key_padding_mask=memory_key_padding_mask,
+        )[0]
        tgt = tgt + self.dropout2(tgt2)
        tgt = self.norm2(tgt)
        tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt))))
@@ -256,41 +320,69 @@ class TransformerDecoderLayer(nn.Module):
        # return tgt shape (L, B, C)
        return tgt

-    def forward_pre(self, tgt, memory,
+    def forward_pre(
+        self,
+        tgt,
+        memory,
        tgt_mask: Optional[Tensor] = None,
        memory_mask: Optional[Tensor] = None,
        tgt_key_padding_mask: Optional[Tensor] = None,
        memory_key_padding_mask: Optional[Tensor] = None,
        pos: Optional[Tensor] = None,
-                    query_pos: Optional[Tensor] = None):
+        query_pos: Optional[Tensor] = None,
+    ):
        tgt2 = self.norm1(tgt)
        q = k = self.with_pos_embed(tgt2, query_pos)
-        tgt2 = self.self_attn(q, k, tgt2, attn_mask=tgt_mask,
-                              key_padding_mask=tgt_key_padding_mask)[0]
+        tgt2 = self.self_attn(
+            q, k, tgt2, attn_mask=tgt_mask, key_padding_mask=tgt_key_padding_mask
+        )[0]
        tgt = tgt + self.dropout1(tgt2)
        tgt2 = self.norm2(tgt)
-        tgt2 = self.multihead_attn(self.with_pos_embed(tgt2, query_pos),
+        tgt2 = self.multihead_attn(
+            self.with_pos_embed(tgt2, query_pos),
            self.with_pos_embed(memory, pos),
-                                   memory, attn_mask=memory_mask,
-                                   key_padding_mask=memory_key_padding_mask)[0]
+            memory,
+            attn_mask=memory_mask,
+            key_padding_mask=memory_key_padding_mask,
+        )[0]
        tgt = tgt + self.dropout2(tgt2)
        tgt2 = self.norm3(tgt)
        tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt2))))
        tgt = tgt + self.dropout3(tgt2)
        return tgt

-    def forward(self, tgt, memory,
+    def forward(
+        self,
+        tgt,
+        memory,
        tgt_mask: Optional[Tensor] = None,
        memory_mask: Optional[Tensor] = None,
        tgt_key_padding_mask: Optional[Tensor] = None,
        memory_key_padding_mask: Optional[Tensor] = None,
        pos: Optional[Tensor] = None,
-                query_pos: Optional[Tensor] = None):
+        query_pos: Optional[Tensor] = None,
+    ):
        if self.normalize_before:
-            return self.forward_pre(tgt, memory, tgt_mask, memory_mask,
-                                    tgt_key_padding_mask, memory_key_padding_mask, pos, query_pos)
-        return self.forward_post(tgt, memory, tgt_mask, memory_mask,
-                                 tgt_key_padding_mask, memory_key_padding_mask, pos, query_pos)
+            return self.forward_pre(
+                tgt,
+                memory,
+                tgt_mask,
+                memory_mask,
+                tgt_key_padding_mask,
+                memory_key_padding_mask,
+                pos,
+                query_pos,
+            )
+        return self.forward_post(
+            tgt,
+            memory,
+            tgt_mask,
+            memory_mask,
+            tgt_key_padding_mask,
+            memory_key_padding_mask,
+            pos,
+            query_pos,
+        )


 def _get_clones(module, N):
@@ -318,4 +410,4 @@ def _get_activation_fn(activation):
        return F.gelu
    if activation == "glu":
        return F.glu
-    raise RuntimeError(F"activation should be relu/gelu, not {activation}.")
+    raise RuntimeError(f"activation should be relu/gelu, not {activation}.")
--- a/projects_oss/detr/detr/modules/ms_deform_attn.py
+++ b/projects_oss/detr/detr/modules/ms_deform_attn.py
@@ -9,15 +9,15 @@
 # ------------------------------------------------------------------------------------------------

 from __future__ import absolute_import
-from __future__ import print_function
 from __future__ import division
+from __future__ import print_function

-import warnings
 import math
+import warnings

 import torch
-from torch import nn
 import torch.nn.functional as F
+from torch import nn
 from torch.nn.init import xavier_uniform_, constant_

 from ..functions import MSDeformAttnFunction
@@ -25,8 +25,10 @@ from ..functions import MSDeformAttnFunction

 def _is_power_of_2(n):
    if (not isinstance(n, int)) or (n < 0):
-        raise ValueError("invalid input for _is_power_of_2: {} (type: {})".format(n, type(n)))
-    return (n & (n-1) == 0) and n != 0
+        raise ValueError(
+            "invalid input for _is_power_of_2: {} (type: {})".format(n, type(n))
+        )
+    return (n & (n - 1) == 0) and n != 0


 class MSDeformAttn(nn.Module):
@@ -40,12 +42,18 @@ class MSDeformAttn(nn.Module):
        """
        super().__init__()
        if d_model % n_heads != 0:
-            raise ValueError('d_model must be divisible by n_heads, but got {} and {}'.format(d_model, n_heads))
+            raise ValueError(
+                "d_model must be divisible by n_heads, but got {} and {}".format(
+                    d_model, n_heads
+                )
+            )
        _d_per_head = d_model // n_heads
        # you'd better set _d_per_head to a power of 2 which is more efficient in our CUDA implementation
        if not _is_power_of_2(_d_per_head):
-            warnings.warn("You'd better set d_model in MSDeformAttn to make the dimension of each attention head a power of 2 "
-                          "which is more efficient in our CUDA implementation.")
+            warnings.warn(
+                "You'd better set d_model in MSDeformAttn to make the dimension of each attention head a power of 2 "
+                "which is more efficient in our CUDA implementation."
+            )

        self.im2col_step = 64

@@ -62,25 +70,39 @@ class MSDeformAttn(nn.Module):
        self._reset_parameters()

    def _reset_parameters(self):
-        constant_(self.sampling_offsets.weight.data, 0.)
+        constant_(self.sampling_offsets.weight.data, 0.0)
        # shape (num_heads,)
-        thetas = torch.arange(self.n_heads, dtype=torch.float32) * (2.0 * math.pi / self.n_heads)
+        thetas = torch.arange(self.n_heads, dtype=torch.float32) * (
+            2.0 * math.pi / self.n_heads
+        )
        # shape (num_heads, 2)
        grid_init = torch.stack([thetas.cos(), thetas.sin()], -1)
        # shape (num_heads, num_levels, num_points, 2)
-        grid_init = (grid_init / grid_init.abs().max(-1, keepdim=True)[0]).view(self.n_heads, 1, 1, 2).repeat(1, self.n_levels, self.n_points, 1)
+        grid_init = (
+            (grid_init / grid_init.abs().max(-1, keepdim=True)[0])
+            .view(self.n_heads, 1, 1, 2)
+            .repeat(1, self.n_levels, self.n_points, 1)
+        )
        for i in range(self.n_points):
            grid_init[:, :, i, :] *= i + 1
        with torch.no_grad():
            self.sampling_offsets.bias = nn.Parameter(grid_init.view(-1))
-        constant_(self.attention_weights.weight.data, 0.)
-        constant_(self.attention_weights.bias.data, 0.)
+        constant_(self.attention_weights.weight.data, 0.0)
+        constant_(self.attention_weights.bias.data, 0.0)
        xavier_uniform_(self.value_proj.weight.data)
-        constant_(self.value_proj.bias.data, 0.)
+        constant_(self.value_proj.bias.data, 0.0)
        xavier_uniform_(self.output_proj.weight.data)
-        constant_(self.output_proj.bias.data, 0.)
-
-    def forward(self, query, reference_points, input_flatten, input_spatial_shapes, input_level_start_index, input_padding_mask=None):
+        constant_(self.output_proj.bias.data, 0.0)
+
+    def forward(
+        self,
+        query,
+        reference_points,
+        input_flatten,
+        input_spatial_shapes,
+        input_level_start_index,
+        input_padding_mask=None,
+    ):
        """
        :param query                       (N, Length_{query}, C)
        :param reference_points            (N, Length_{query}, n_levels, 2), range in [0, 1], top-left (0,0), bottom-right (1, 1), including padding area
@@ -100,21 +122,45 @@ class MSDeformAttn(nn.Module):
        if input_padding_mask is not None:
            value = value.masked_fill(input_padding_mask[..., None], float(0))
        value = value.view(N, Len_in, self.n_heads, self.d_model // self.n_heads)
-        sampling_offsets = self.sampling_offsets(query).view(N, Len_q, self.n_heads, self.n_levels, self.n_points, 2)
-        attention_weights = self.attention_weights(query).view(N, Len_q, self.n_heads, self.n_levels * self.n_points)
-        attention_weights = F.softmax(attention_weights, -1).view(N, Len_q, self.n_heads, self.n_levels, self.n_points)
+        sampling_offsets = self.sampling_offsets(query).view(
+            N, Len_q, self.n_heads, self.n_levels, self.n_points, 2
+        )
+        attention_weights = self.attention_weights(query).view(
+            N, Len_q, self.n_heads, self.n_levels * self.n_points
+        )
+        attention_weights = F.softmax(attention_weights, -1).view(
+            N, Len_q, self.n_heads, self.n_levels, self.n_points
+        )
        # N, Len_q, n_heads, n_levels, n_points, 2
        if reference_points.shape[-1] == 2:
-            offset_normalizer = torch.stack([input_spatial_shapes[..., 1], input_spatial_shapes[..., 0]], -1)
-            sampling_locations = reference_points[:, :, None, :, None, :] \
+            offset_normalizer = torch.stack(
+                [input_spatial_shapes[..., 1], input_spatial_shapes[..., 0]], -1
+            )
+            sampling_locations = (
+                reference_points[:, :, None, :, None, :]
                + sampling_offsets / offset_normalizer[None, None, None, :, None, :]
+            )
        elif reference_points.shape[-1] == 4:
-            sampling_locations = reference_points[:, :, None, :, None, :2] \
-                                 + sampling_offsets / self.n_points * reference_points[:, :, None, :, None, 2:] * 0.5
+            sampling_locations = (
+                reference_points[:, :, None, :, None, :2]
+                + sampling_offsets
+                / self.n_points
+                * reference_points[:, :, None, :, None, 2:]
+                * 0.5
+            )
        else:
            raise ValueError(
-                'Last dim of reference_points must be 2 or 4, but get {} instead.'.format(reference_points.shape[-1]))
+                "Last dim of reference_points must be 2 or 4, but get {} instead.".format(
+                    reference_points.shape[-1]
+                )
+            )
        output = MSDeformAttnFunction.apply(
-            value, input_spatial_shapes, input_level_start_index, sampling_locations, attention_weights, self.im2col_step)
+            value,
+            input_spatial_shapes,
+            input_level_start_index,
+            sampling_locations,
+            attention_weights,
+            self.im2col_step,
+        )
        output = self.output_proj(output)
        return output
--- a/projects_oss/detr/detr/runner.py
+++ b/projects_oss/detr/detr/runner.py
@@ -4,9 +4,9 @@ from d2go.config import CfgNode as CN
 from d2go.data.dataset_mappers.build import D2GO_DATA_MAPPER_REGISTRY
 from d2go.data.dataset_mappers.d2go_dataset_mapper import D2GoDatasetMapper
 from d2go.runner import GeneralizedRCNNRunner
-from detr.d2 import DetrDatasetMapper, add_detr_config
 from detr.backbone.deit import add_deit_backbone_config
 from detr.backbone.pit import add_pit_backbone_config
+from detr.d2 import DetrDatasetMapper, add_detr_config


 @D2GO_DATA_MAPPER_REGISTRY.register()

--- a/projects_oss/detr/detr/util/box_ops.py
+++ b/projects_oss/detr/detr/util/box_ops.py
@@ -10,15 +10,13 @@ from torchvision.ops.boxes import box_area

 def box_cxcywh_to_xyxy(x):
    x_c, y_c, w, h = x.unbind(-1)
-    b = [(x_c - 0.5 * w), (y_c - 0.5 * h),
-         (x_c + 0.5 * w), (y_c + 0.5 * h)]
+    b = [(x_c - 0.5 * w), (y_c - 0.5 * h), (x_c + 0.5 * w), (y_c + 0.5 * h)]
    return torch.stack(b, dim=-1)


 def box_xyxy_to_cxcywh(x):
    x0, y0, x1, y1 = x.unbind(-1)
-    b = [(x0 + x1) / 2, (y0 + y1) / 2,
-         (x1 - x0), (y1 - y0)]
+    b = [(x0 + x1) / 2, (y0 + y1) / 2, (x1 - x0), (y1 - y0)]
    return torch.stack(b, dim=-1)


@@ -79,11 +77,11 @@ def masks_to_boxes(masks):
    x = torch.arange(0, w, dtype=torch.float)
    y, x = torch.meshgrid(y, x)

-    x_mask = (masks * x.unsqueeze(0))
+    x_mask = masks * x.unsqueeze(0)
    x_max = x_mask.flatten(1).max(-1)[0]
    x_min = x_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0]

-    y_mask = (masks * y.unsqueeze(0))
+    y_mask = masks * y.unsqueeze(0)
    y_max = y_mask.flatten(1).max(-1)[0]
    y_min = y_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0]


--- a/projects_oss/detr/detr/util/misc.py
+++ b/projects_oss/detr/detr/util/misc.py
@@ -6,21 +6,22 @@ Misc functions, including distributed helpers.

 Mostly copy-paste from torchvision references.
 """
+import datetime
 import os
+import pickle
 import subprocess
 import time
 from collections import defaultdict, deque
-import datetime
-import pickle
+from distutils.version import LooseVersion
 from typing import Optional, List

 import torch
 import torch.distributed as dist
-from torch import Tensor

 # needed due to empty tensor bug in pytorch and torchvision 0.5
 import torchvision
-from distutils.version import LooseVersion
+from torch import Tensor
+
 if LooseVersion(torchvision.__version__) < LooseVersion("0.7.0"):
    from torchvision.ops import _new_empty_tensor
    from torchvision.ops.misc import _output_size
@@ -50,7 +51,7 @@ class SmoothedValue(object):
        """
        if not is_dist_avail_and_initialized():
            return
-        t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda')
+        t = torch.tensor([self.count, self.total], dtype=torch.float64, device="cuda")
        dist.barrier()
        dist.all_reduce(t)
        t = t.tolist()
@@ -85,7 +86,8 @@ class SmoothedValue(object):
            avg=self.avg,
            global_avg=self.global_avg,
            max=self.max,
-            value=self.value)
+            value=self.value,
+        )


 def all_gather(data):
@@ -119,14 +121,16 @@ def all_gather(data):
    for _ in size_list:
        tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device="cuda"))
    if local_size != max_size:
-        padding = torch.empty(size=(max_size - local_size,), dtype=torch.uint8, device="cuda")
+        padding = torch.empty(
+            size=(max_size - local_size,), dtype=torch.uint8, device="cuda"
+        )
        tensor = torch.cat((tensor, padding), dim=0)
    dist.all_gather(tensor_list, tensor)

    data_list = []
    for size, tensor in zip(size_list, tensor_list):
        buffer = tensor.cpu().numpy().tobytes()[:size]
-        data_list.append(pickle.loads(buffer)) #noqa
+        data_list.append(pickle.loads(buffer))  # noqa

    return data_list

@@ -175,15 +179,14 @@ class MetricLogger(object):
            return self.meters[attr]
        if attr in self.__dict__:
            return self.__dict__[attr]
-        raise AttributeError("'{}' object has no attribute '{}'".format(
-            type(self).__name__, attr))
+        raise AttributeError(
+            "'{}' object has no attribute '{}'".format(type(self).__name__, attr)
+        )

    def __str__(self):
        loss_str = []
        for name, meter in self.meters.items():
-            loss_str.append(
-                "{}: {}".format(name, str(meter))
-            )
+            loss_str.append("{}: {}".format(name, str(meter)))
        return self.delimiter.join(loss_str)

    def synchronize_between_processes(self):
@@ -196,31 +199,35 @@ class MetricLogger(object):
    def log_every(self, iterable, print_freq, header=None):
        i = 0
        if not header:
-            header = ''
+            header = ""
        start_time = time.time()
        end = time.time()
-        iter_time = SmoothedValue(fmt='{avg:.4f}')
-        data_time = SmoothedValue(fmt='{avg:.4f}')
-        space_fmt = ':' + str(len(str(len(iterable)))) + 'd'
+        iter_time = SmoothedValue(fmt="{avg:.4f}")
+        data_time = SmoothedValue(fmt="{avg:.4f}")
+        space_fmt = ":" + str(len(str(len(iterable)))) + "d"
        if torch.cuda.is_available():
-            log_msg = self.delimiter.join([
+            log_msg = self.delimiter.join(
+                [
                    header,
-                '[{0' + space_fmt + '}/{1}]',
-                'eta: {eta}',
-                '{meters}',
-                'time: {time}',
-                'data: {data}',
-                'max mem: {memory:.0f}'
-            ])
+                    "[{0" + space_fmt + "}/{1}]",
+                    "eta: {eta}",
+                    "{meters}",
+                    "time: {time}",
+                    "data: {data}",
+                    "max mem: {memory:.0f}",
+                ]
+            )
        else:
-            log_msg = self.delimiter.join([
+            log_msg = self.delimiter.join(
+                [
                    header,
-                '[{0' + space_fmt + '}/{1}]',
-                'eta: {eta}',
-                '{meters}',
-                'time: {time}',
-                'data: {data}'
-            ])
+                    "[{0" + space_fmt + "}/{1}]",
+                    "eta: {eta}",
+                    "{meters}",
+                    "time: {time}",
+                    "data: {data}",
+                ]
+            )
        MB = 1024.0 * 1024.0
        for obj in iterable:
            data_time.update(time.time() - end)
@@ -230,38 +237,54 @@ class MetricLogger(object):
                eta_seconds = iter_time.global_avg * (len(iterable) - i)
                eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
                if torch.cuda.is_available():
-                    print(log_msg.format(
-                        i, len(iterable), eta=eta_string,
+                    print(
+                        log_msg.format(
+                            i,
+                            len(iterable),
+                            eta=eta_string,
                            meters=str(self),
-                        time=str(iter_time), data=str(data_time),
-                        memory=torch.cuda.max_memory_allocated() / MB))
+                            time=str(iter_time),
+                            data=str(data_time),
+                            memory=torch.cuda.max_memory_allocated() / MB,
+                        )
+                    )
                else:
-                    print(log_msg.format(
-                        i, len(iterable), eta=eta_string,
+                    print(
+                        log_msg.format(
+                            i,
+                            len(iterable),
+                            eta=eta_string,
                            meters=str(self),
-                        time=str(iter_time), data=str(data_time)))
+                            time=str(iter_time),
+                            data=str(data_time),
+                        )
+                    )
            i += 1
            end = time.time()
        total_time = time.time() - start_time
        total_time_str = str(datetime.timedelta(seconds=int(total_time)))
-        print('{} Total time: {} ({:.4f} s / it)'.format(
-            header, total_time_str, total_time / len(iterable)))
+        print(
+            "{} Total time: {} ({:.4f} s / it)".format(
+                header, total_time_str, total_time / len(iterable)
+            )
+        )


 def get_sha():
    cwd = os.path.dirname(os.path.abspath(__file__))

    def _run(command):
-        return subprocess.check_output(command, cwd=cwd).decode('ascii').strip()
-    sha = 'N/A'
+        return subprocess.check_output(command, cwd=cwd).decode("ascii").strip()
+
+    sha = "N/A"
    diff = "clean"
-    branch = 'N/A'
+    branch = "N/A"
    try:
-        sha = _run(['git', 'rev-parse', 'HEAD'])
-        subprocess.check_output(['git', 'diff'], cwd=cwd)
-        diff = _run(['git', 'diff-index', 'HEAD'])
+        sha = _run(["git", "rev-parse", "HEAD"])
+        subprocess.check_output(["git", "diff"], cwd=cwd)
+        diff = _run(["git", "diff-index", "HEAD"])
        diff = "has uncommited changes" if diff else "clean"
-        branch = _run(['git', 'rev-parse', '--abbrev-ref', 'HEAD'])
+        branch = _run(["git", "rev-parse", "--abbrev-ref", "HEAD"])
    except Exception:
        pass
    message = f"sha: {sha}, status: {diff}, branch: {branch}"
@@ -325,9 +348,9 @@ def nested_tensor_from_tensor_list(tensor_list: List[Tensor]):
        mask = torch.ones((b, h, w), dtype=torch.bool, device=device)
        for img, pad_img, m in zip(tensor_list, tensor, mask):
            pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
-            m[: img.shape[1], :img.shape[2]] = False
+            m[: img.shape[1], : img.shape[2]] = False
    else:
-        raise ValueError('not supported')
+        raise ValueError("not supported")
    return NestedTensor(tensor, mask)


@@ -337,7 +360,9 @@ def nested_tensor_from_tensor_list(tensor_list: List[Tensor]):
 def _onnx_nested_tensor_from_tensor_list(tensor_list: List[Tensor]) -> NestedTensor:
    max_size = []
    for i in range(tensor_list[0].dim()):
-        max_size_i = torch.max(torch.stack([img.shape[i] for img in tensor_list]).to(torch.float32)).to(torch.int64)
+        max_size_i = torch.max(
+            torch.stack([img.shape[i] for img in tensor_list]).to(torch.float32)
+        ).to(torch.int64)
        max_size.append(max_size_i)
    max_size = tuple(max_size)

@@ -349,11 +374,15 @@ def _onnx_nested_tensor_from_tensor_list(tensor_list: List[Tensor]) -> NestedTen
    padded_masks = []
    for img in tensor_list:
        padding = [(s1 - s2) for s1, s2 in zip(max_size, tuple(img.shape))]
-        padded_img = torch.nn.functional.pad(img, (0, padding[2], 0, padding[1], 0, padding[0]))
+        padded_img = torch.nn.functional.pad(
+            img, (0, padding[2], 0, padding[1], 0, padding[0])
+        )
        padded_imgs.append(padded_img)

        m = torch.zeros_like(img[0], dtype=torch.int, device=img.device)
-        padded_mask = torch.nn.functional.pad(m, (0, padding[2], 0, padding[1]), "constant", 1)
+        padded_mask = torch.nn.functional.pad(
+            m, (0, padding[2], 0, padding[1]), "constant", 1
+        )
        padded_masks.append(padded_mask.to(torch.bool))

    tensor = torch.stack(padded_imgs)
@@ -367,10 +396,11 @@ def setup_for_distributed(is_master):
    This function disables printing when not in master process
    """
    import builtins as __builtin__
+
    builtin_print = __builtin__.print

    def print(*args, **kwargs):
-        force = kwargs.pop('force', False)
+        force = kwargs.pop("force", False)
        if is_master or force:
            builtin_print(*args, **kwargs)

@@ -407,26 +437,31 @@ def save_on_master(*args, **kwargs):


 def init_distributed_mode(args):
-    if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
+    if "RANK" in os.environ and "WORLD_SIZE" in os.environ:
        args.rank = int(os.environ["RANK"])
-        args.world_size = int(os.environ['WORLD_SIZE'])
-        args.gpu = int(os.environ['LOCAL_RANK'])
-    elif 'SLURM_PROCID' in os.environ:
-        args.rank = int(os.environ['SLURM_PROCID'])
+        args.world_size = int(os.environ["WORLD_SIZE"])
+        args.gpu = int(os.environ["LOCAL_RANK"])
+    elif "SLURM_PROCID" in os.environ:
+        args.rank = int(os.environ["SLURM_PROCID"])
        args.gpu = args.rank % torch.cuda.device_count()
    else:
-        print('Not using distributed mode')
+        print("Not using distributed mode")
        args.distributed = False
        return

    args.distributed = True

    torch.cuda.set_device(args.gpu)
-    args.dist_backend = 'nccl'
-    print('| distributed init (rank {}): {}'.format(
-        args.rank, args.dist_url), flush=True)
-    torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
-                                         world_size=args.world_size, rank=args.rank)
+    args.dist_backend = "nccl"
+    print(
+        "| distributed init (rank {}): {}".format(args.rank, args.dist_url), flush=True
+    )
+    torch.distributed.init_process_group(
+        backend=args.dist_backend,
+        init_method=args.dist_url,
+        world_size=args.world_size,
+        rank=args.rank,
+    )
    torch.distributed.barrier()
    setup_for_distributed(args.rank == 0)

@@ -450,14 +485,16 @@ def accuracy(output, target, topk=(1,)):
    return res


-def interpolate(input, size=None, scale_factor=None, mode="nearest", align_corners=None):
+def interpolate(
+    input, size=None, scale_factor=None, mode="nearest", align_corners=None
+):
    # type: (Tensor, Optional[List[int]], Optional[float], str, Optional[bool]) -> Tensor
    """
    Equivalent to nn.functional.interpolate, but with support for empty batch sizes.
    This will eventually be supported natively by PyTorch, and this
    class can go away.
    """
-    #if float(torchvision.__version__[:3]) < 0.7:
+    # if float(torchvision.__version__[:3]) < 0.7:
    if LooseVersion(torchvision.__version__) < LooseVersion("0.7.0"):
        if input.numel() > 0:
            return torch.nn.functional.interpolate(
@@ -468,10 +505,13 @@ def interpolate(input, size=None, scale_factor=None, mode="nearest", align_corne
        output_shape = list(input.shape[:-2]) + list(output_shape)
        return _new_empty_tensor(input, output_shape)
    else:
-        return torchvision.ops.misc.interpolate(input, size, scale_factor, mode, align_corners)
+        return torchvision.ops.misc.interpolate(
+            input, size, scale_factor, mode, align_corners
+        )
+

 def inverse_sigmoid(x, eps=1e-5):
    x = x.clamp(min=0, max=1)
    x1 = x.clamp(min=eps)
    x2 = (1 - x).clamp(min=eps)
-    return torch.log(x1/x2)
+    return torch.log(x1 / x2)
--- a/projects_oss/detr/detr/util/plot_utils.py
+++ b/projects_oss/detr/detr/util/plot_utils.py
@@ -3,17 +3,22 @@
 """
 Plotting utilities to visualize training logs.
 """
-import torch
-import pandas as pd
+from pathlib import Path, PurePath
+
+import matplotlib.pyplot as plt
 import numpy as np
+import pandas as pd
 import seaborn as sns
-import matplotlib.pyplot as plt
-
-from pathlib import Path, PurePath
+import torch


-def plot_logs(logs, fields=('class_error', 'loss_bbox_unscaled', 'mAP'), ewm_col=0, log_name='log.txt'):
-    '''
+def plot_logs(
+    logs,
+    fields=("class_error", "loss_bbox_unscaled", "mAP"),
+    ewm_col=0,
+    log_name="log.txt",
+):
+    """
    Function to plot specific fields from training log(s). Plots both training and test results.

    :: Inputs - logs = list containing Path objects, each pointing to individual dir with a log file
@@ -24,7 +29,7 @@ def plot_logs(logs, fields=('class_error', 'loss_bbox_unscaled', 'mAP'), ewm_col
    :: Outputs - matplotlib plots of results in fields, color coded for each log file.
               - solid lines are training results, dashed lines are test results.

-    '''
+    """
    func_name = "plot_utils.py::plot_logs"

    # verify logs is a list of Paths (list[Paths]) or single Pathlib object Path,
@@ -33,17 +38,25 @@ def plot_logs(logs, fields=('class_error', 'loss_bbox_unscaled', 'mAP'), ewm_col
    if not isinstance(logs, list):
        if isinstance(logs, PurePath):
            logs = [logs]
-            print(f"{func_name} info: logs param expects a list argument, converted to list[Path].")
+            print(
+                f"{func_name} info: logs param expects a list argument, converted to list[Path]."
+            )
        else:
-            raise ValueError(f"{func_name} - invalid argument for logs parameter.\n \
-            Expect list[Path] or single Path obj, received {type(logs)}")
+            raise ValueError(
+                f"{func_name} - invalid argument for logs parameter.\n \
+            Expect list[Path] or single Path obj, received {type(logs)}"
+            )

    # Quality checks - verify valid dir(s), that every item in list is Path object, and that log_name exists in each dir
    for _, dir in enumerate(logs):
        if not isinstance(dir, PurePath):
-            raise ValueError(f"{func_name} - non-Path object in logs argument of {type(dir)}: \n{dir}")
+            raise ValueError(
+                f"{func_name} - non-Path object in logs argument of {type(dir)}: \n{dir}"
+            )
        if not dir.exists():
-            raise ValueError(f"{func_name} - invalid directory in logs argument:\n{dir}")
+            raise ValueError(
+                f"{func_name} - invalid directory in logs argument:\n{dir}"
+            )
        # verify log_name exists
        fn = Path(dir / log_name)
        if not fn.exists():
@@ -58,52 +71,57 @@ def plot_logs(logs, fields=('class_error', 'loss_bbox_unscaled', 'mAP'), ewm_col

    for df, color in zip(dfs, sns.color_palette(n_colors=len(logs))):
        for j, field in enumerate(fields):
-            if field == 'mAP':
-                coco_eval = pd.DataFrame(
-                    np.stack(df.test_coco_eval_bbox.dropna().values)[:, 1]
-                ).ewm(com=ewm_col).mean()
+            if field == "mAP":
+                coco_eval = (
+                    pd.DataFrame(np.stack(df.test_coco_eval_bbox.dropna().values)[:, 1])
+                    .ewm(com=ewm_col)
+                    .mean()
+                )
                axs[j].plot(coco_eval, c=color)
            else:
                df.interpolate().ewm(com=ewm_col).mean().plot(
-                    y=[f'train_{field}', f'test_{field}'],
+                    y=[f"train_{field}", f"test_{field}"],
                    ax=axs[j],
                    color=[color] * 2,
-                    style=['-', '--']
+                    style=["-", "--"],
                )
    for ax, field in zip(axs, fields):
        ax.legend([Path(p).name for p in logs])
        ax.set_title(field)


-def plot_precision_recall(files, naming_scheme='iter'):
-    if naming_scheme == 'exp_id':
+def plot_precision_recall(files, naming_scheme="iter"):
+    if naming_scheme == "exp_id":
        # name becomes exp_id
        names = [f.parts[-3] for f in files]
-    elif naming_scheme == 'iter':
+    elif naming_scheme == "iter":
        names = [f.stem for f in files]
    else:
-        raise ValueError(f'not supported {naming_scheme}')
+        raise ValueError(f"not supported {naming_scheme}")
    fig, axs = plt.subplots(ncols=2, figsize=(16, 5))
-    for f, color, name in zip(files, sns.color_palette("Blues", n_colors=len(files)), names):
+    for f, color, name in zip(
+        files, sns.color_palette("Blues", n_colors=len(files)), names
+    ):
        data = torch.load(f)
        # precision is n_iou, n_points, n_cat, n_area, max_det
-        precision = data['precision']
-        recall = data['params'].recThrs
-        scores = data['scores']
+        precision = data["precision"]
+        recall = data["params"].recThrs
+        scores = data["scores"]
        # take precision for all classes, all areas and 100 detections
        precision = precision[0, :, :, 0, -1].mean(1)
        scores = scores[0, :, :, 0, -1].mean(1)
        prec = precision.mean()
-        rec = data['recall'][0, :, 0, -1].mean()
-        print(f'{naming_scheme} {name}: mAP@50={prec * 100: 05.1f}, ' +
-              f'score={scores.mean():0.3f}, ' +
-              f'f1={2 * prec * rec / (prec + rec + 1e-8):0.3f}'
+        rec = data["recall"][0, :, 0, -1].mean()
+        print(
+            f"{naming_scheme} {name}: mAP@50={prec * 100: 05.1f}, "
+            + f"score={scores.mean():0.3f}, "
+            + f"f1={2 * prec * rec / (prec + rec + 1e-8):0.3f}"
        )
        axs[0].plot(recall, precision, c=color)
        axs[1].plot(recall, scores, c=color)

-    axs[0].set_title('Precision / Recall')
+    axs[0].set_title("Precision / Recall")
    axs[0].legend(names)
-    axs[1].set_title('Scores / Recall')
+    axs[1].set_title("Scores / Recall")
    axs[1].legend(names)
    return fig, axs
--- a/projects_oss/detr/main.py
+++ b/projects_oss/detr/main.py
 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import os
-import logging
 import argparse
 import datetime
 import json
+import logging
+import os
 import random
 import time
 from datetime import timedelta
 from pathlib import Path

+import detr.util.misc as utils
 import numpy as np
 import torch
-from torch.utils.data import DataLoader, DistributedSampler
-
 import torch.distributed as dist
 import torch.multiprocessing as mp
-
+from detectron2.engine.launch import _find_free_port
+from detectron2.utils.file_io import PathManager
 from detr import datasets
-import detr.util.misc as utils
 from detr.datasets import build_dataset, get_coco_api_from_dataset
 from detr.engine import evaluate, train_one_epoch
 from detr.models import build_model
-from detectron2.utils.file_io import PathManager
-from detectron2.engine.launch import _find_free_port
+from torch.utils.data import DataLoader, DistributedSampler

 DEFAULT_TIMEOUT = timedelta(minutes=30)

+
 def get_args_parser():
-    parser = argparse.ArgumentParser('Set transformer detector', add_help=False)
-    parser.add_argument('--lr', default=1e-4, type=float)
-    parser.add_argument('--lr_backbone', default=1e-5, type=float)
-    parser.add_argument('--batch_size', default=2, type=int)
-    parser.add_argument('--weight_decay', default=1e-4, type=float)
-    parser.add_argument('--epochs', default=300, type=int)
-    parser.add_argument('--lr_drop', default=200, type=int)
-    parser.add_argument('--clip_max_norm', default=0.1, type=float,
-                        help='gradient clipping max norm')
+    parser = argparse.ArgumentParser("Set transformer detector", add_help=False)
+    parser.add_argument("--lr", default=1e-4, type=float)
+    parser.add_argument("--lr_backbone", default=1e-5, type=float)
+    parser.add_argument("--batch_size", default=2, type=int)
+    parser.add_argument("--weight_decay", default=1e-4, type=float)
+    parser.add_argument("--epochs", default=300, type=int)
+    parser.add_argument("--lr_drop", default=200, type=int)
+    parser.add_argument(
+        "--clip_max_norm", default=0.1, type=float, help="gradient clipping max norm"
+    )

    # Model parameters
-    parser.add_argument('--frozen_weights', type=str, default=None,
-                        help="Path to the pretrained model. If set, only the mask head will be trained")
+    parser.add_argument(
+        "--frozen_weights",
+        type=str,
+        default=None,
+        help="Path to the pretrained model. If set, only the mask head will be trained",
+    )
    # * Backbone
-    parser.add_argument('--backbone', default='resnet50', type=str,
-                        help="Name of the convolutional backbone to use")
-    parser.add_argument('--dilation', action='store_true',
-                        help="If true, we replace stride with dilation in the last convolutional block (DC5)")
-    parser.add_argument('--position_embedding', default='sine', type=str, choices=('sine', 'learned'),
-                        help="Type of positional embedding to use on top of the image features")
+    parser.add_argument(
+        "--backbone",
+        default="resnet50",
+        type=str,
+        help="Name of the convolutional backbone to use",
+    )
+    parser.add_argument(
+        "--dilation",
+        action="store_true",
+        help="If true, we replace stride with dilation in the last convolutional block (DC5)",
+    )
+    parser.add_argument(
+        "--position_embedding",
+        default="sine",
+        type=str,
+        choices=("sine", "learned"),
+        help="Type of positional embedding to use on top of the image features",
+    )

    # * Transformer
-    parser.add_argument('--enc_layers', default=6, type=int,
-                        help="Number of encoding layers in the transformer")
-    parser.add_argument('--dec_layers', default=6, type=int,
-                        help="Number of decoding layers in the transformer")
-    parser.add_argument('--dim_feedforward', default=2048, type=int,
-                        help="Intermediate size of the feedforward layers in the transformer blocks")
-    parser.add_argument('--hidden_dim', default=256, type=int,
-                        help="Size of the embeddings (dimension of the transformer)")
-    parser.add_argument('--dropout', default=0.1, type=float,
-                        help="Dropout applied in the transformer")
-    parser.add_argument('--nheads', default=8, type=int,
-                        help="Number of attention heads inside the transformer's attentions")
-    parser.add_argument('--num_queries', default=100, type=int,
-                        help="Number of query slots")
-    parser.add_argument('--pre_norm', action='store_true')
+    parser.add_argument(
+        "--enc_layers",
+        default=6,
+        type=int,
+        help="Number of encoding layers in the transformer",
+    )
+    parser.add_argument(
+        "--dec_layers",
+        default=6,
+        type=int,
+        help="Number of decoding layers in the transformer",
+    )
+    parser.add_argument(
+        "--dim_feedforward",
+        default=2048,
+        type=int,
+        help="Intermediate size of the feedforward layers in the transformer blocks",
+    )
+    parser.add_argument(
+        "--hidden_dim",
+        default=256,
+        type=int,
+        help="Size of the embeddings (dimension of the transformer)",
+    )
+    parser.add_argument(
+        "--dropout", default=0.1, type=float, help="Dropout applied in the transformer"
+    )
+    parser.add_argument(
+        "--nheads",
+        default=8,
+        type=int,
+        help="Number of attention heads inside the transformer's attentions",
+    )
+    parser.add_argument(
+        "--num_queries", default=100, type=int, help="Number of query slots"
+    )
+    parser.add_argument("--pre_norm", action="store_true")

    # * Segmentation
-    parser.add_argument('--masks', action='store_true',
-                        help="Train segmentation head if the flag is provided")
+    parser.add_argument(
+        "--masks",
+        action="store_true",
+        help="Train segmentation head if the flag is provided",
+    )

    # Loss
-    parser.add_argument('--no_aux_loss', dest='aux_loss', action='store_false',
-                        help="Disables auxiliary decoding losses (loss at each layer)")
+    parser.add_argument(
+        "--no_aux_loss",
+        dest="aux_loss",
+        action="store_false",
+        help="Disables auxiliary decoding losses (loss at each layer)",
+    )
    # * Matcher
-    parser.add_argument('--set_cost_class', default=1, type=float,
-                        help="Class coefficient in the matching cost")
-    parser.add_argument('--set_cost_bbox', default=5, type=float,
-                        help="L1 box coefficient in the matching cost")
-    parser.add_argument('--set_cost_giou', default=2, type=float,
-                        help="giou box coefficient in the matching cost")
+    parser.add_argument(
+        "--set_cost_class",
+        default=1,
+        type=float,
+        help="Class coefficient in the matching cost",
+    )
+    parser.add_argument(
+        "--set_cost_bbox",
+        default=5,
+        type=float,
+        help="L1 box coefficient in the matching cost",
+    )
+    parser.add_argument(
+        "--set_cost_giou",
+        default=2,
+        type=float,
+        help="giou box coefficient in the matching cost",
+    )
    # * Loss coefficients
-    parser.add_argument('--mask_loss_coef', default=1, type=float)
-    parser.add_argument('--dice_loss_coef', default=1, type=float)
-    parser.add_argument('--bbox_loss_coef', default=5, type=float)
-    parser.add_argument('--giou_loss_coef', default=2, type=float)
-    parser.add_argument('--eos_coef', default=0.1, type=float,
-                        help="Relative classification weight of the no-object class")
+    parser.add_argument("--mask_loss_coef", default=1, type=float)
+    parser.add_argument("--dice_loss_coef", default=1, type=float)
+    parser.add_argument("--bbox_loss_coef", default=5, type=float)
+    parser.add_argument("--giou_loss_coef", default=2, type=float)
+    parser.add_argument(
+        "--eos_coef",
+        default=0.1,
+        type=float,
+        help="Relative classification weight of the no-object class",
+    )

    # dataset parameters
-    parser.add_argument('--dataset_file', default='coco')
-    parser.add_argument('--ade_path', type=str, default='manifold://winvision/tree/detectron2/ADEChallengeData2016/')
-    parser.add_argument('--coco_path', type=str, default='manifold://fair_vision_data/tree/')
-    parser.add_argument('--coco_panoptic_path', type=str, default='manifold://fair_vision_data/tree/')
-    parser.add_argument('--remove_difficult', action='store_true')
-
-    parser.add_argument('--output-dir', default='',
-                        help='path where to save, empty for no saving')
-    parser.add_argument('--device', default='cuda',
-                        help='device to use for training / testing')
-    parser.add_argument('--seed', default=42, type=int)
-    parser.add_argument('--resume', default='', help='resume from checkpoint')
-    parser.add_argument('--start_epoch', default=0, type=int, metavar='N',
-                        help='start epoch')
-    parser.add_argument('--eval', action='store_true')
-    parser.add_argument('--num_workers', default=2, type=int)
+    parser.add_argument("--dataset_file", default="coco")
+    parser.add_argument(
+        "--ade_path",
+        type=str,
+        default="manifold://winvision/tree/detectron2/ADEChallengeData2016/",
+    )
+    parser.add_argument(
+        "--coco_path", type=str, default="manifold://fair_vision_data/tree/"
+    )
+    parser.add_argument(
+        "--coco_panoptic_path", type=str, default="manifold://fair_vision_data/tree/"
+    )
+    parser.add_argument("--remove_difficult", action="store_true")
+
+    parser.add_argument(
+        "--output-dir", default="", help="path where to save, empty for no saving"
+    )
+    parser.add_argument(
+        "--device", default="cuda", help="device to use for training / testing"
+    )
+    parser.add_argument("--seed", default=42, type=int)
+    parser.add_argument("--resume", default="", help="resume from checkpoint")
+    parser.add_argument(
+        "--start_epoch", default=0, type=int, metavar="N", help="start epoch"
+    )
+    parser.add_argument("--eval", action="store_true")
+    parser.add_argument("--num_workers", default=2, type=int)

    # distributed training parameters
-    parser.add_argument("--num-gpus", type=int, default=8, help="number of gpus *per machine*")
-    parser.add_argument("--num-machines", type=int, default=1, help="total number of machines")
    parser.add_argument(
-        "--machine-rank", type=int, default=0, help="the rank of this machine (unique per machine)")
-    parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training')
+        "--num-gpus", type=int, default=8, help="number of gpus *per machine*"
+    )
+    parser.add_argument(
+        "--num-machines", type=int, default=1, help="total number of machines"
+    )
+    parser.add_argument(
+        "--machine-rank",
+        type=int,
+        default=0,
+        help="the rank of this machine (unique per machine)",
+    )
+    parser.add_argument(
+        "--dist-url", default="env://", help="url used to set up distributed training"
+    )
    return parser


 def main(args):
-    #utils.init_distributed_mode(args)
+    # utils.init_distributed_mode(args)

    if args.frozen_weights is not None:
        assert args.masks, "Frozen training is meant for segmentation only"
@@ -137,21 +219,32 @@ def main(args):
        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
        model_without_ddp = model.module
    n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
-    print('number of params:', n_parameters)
+    print("number of params:", n_parameters)

    param_dicts = [
-        {"params": [p for n, p in model_without_ddp.named_parameters() if "backbone" not in n and p.requires_grad]},
        {
-            "params": [p for n, p in model_without_ddp.named_parameters() if "backbone" in n and p.requires_grad],
+            "params": [
+                p
+                for n, p in model_without_ddp.named_parameters()
+                if "backbone" not in n and p.requires_grad
+            ]
+        },
+        {
+            "params": [
+                p
+                for n, p in model_without_ddp.named_parameters()
+                if "backbone" in n and p.requires_grad
+            ],
            "lr": args.lr_backbone,
        },
    ]
-    optimizer = torch.optim.AdamW(param_dicts, lr=args.lr,
-                                  weight_decay=args.weight_decay)
+    optimizer = torch.optim.AdamW(
+        param_dicts, lr=args.lr, weight_decay=args.weight_decay
+    )
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, args.lr_drop)

-    dataset_train = build_dataset(image_set='train', args=args)
-    dataset_val = build_dataset(image_set='val', args=args)
+    dataset_train = build_dataset(image_set="train", args=args)
+    dataset_val = build_dataset(image_set="val", args=args)

    if args.distributed:
        sampler_train = DistributedSampler(dataset_train)
@@ -161,12 +254,23 @@ def main(args):
        sampler_val = torch.utils.data.SequentialSampler(dataset_val)

    batch_sampler_train = torch.utils.data.BatchSampler(
-        sampler_train, args.batch_size, drop_last=True)
+        sampler_train, args.batch_size, drop_last=True
+    )

-    data_loader_train = DataLoader(dataset_train, batch_sampler=batch_sampler_train,
-                                   collate_fn=utils.collate_fn, num_workers=args.num_workers)
-    data_loader_val = DataLoader(dataset_val, args.batch_size, sampler=sampler_val,
-                                 drop_last=False, collate_fn=utils.collate_fn, num_workers=args.num_workers)
+    data_loader_train = DataLoader(
+        dataset_train,
+        batch_sampler=batch_sampler_train,
+        collate_fn=utils.collate_fn,
+        num_workers=args.num_workers,
+    )
+    data_loader_val = DataLoader(
+        dataset_val,
+        args.batch_size,
+        sampler=sampler_val,
+        drop_last=False,
+        collate_fn=utils.collate_fn,
+        num_workers=args.num_workers,
+    )

    if args.dataset_file == "coco_panoptic":
        # We also evaluate AP during panoptic training, on original coco DS
@@ -176,24 +280,37 @@ def main(args):
        base_ds = get_coco_api_from_dataset(dataset_val)

    if args.frozen_weights is not None:
-        checkpoint = torch.load(args.frozen_weights, map_location='cpu')
-        model_without_ddp.detr.load_state_dict(checkpoint['model'])
+        checkpoint = torch.load(args.frozen_weights, map_location="cpu")
+        model_without_ddp.detr.load_state_dict(checkpoint["model"])

    if args.resume:
-        if args.resume.startswith('https'):
+        if args.resume.startswith("https"):
            checkpoint = torch.hub.load_state_dict_from_url(
-                args.resume, map_location='cpu', check_hash=True)
+                args.resume, map_location="cpu", check_hash=True
+            )
        else:
-            checkpoint = torch.load(args.resume, map_location='cpu')
-        model_without_ddp.load_state_dict(checkpoint['model'])
-        if not args.eval and 'optimizer' in checkpoint and 'lr_scheduler' in checkpoint and 'epoch' in checkpoint:
-            optimizer.load_state_dict(checkpoint['optimizer'])
-            lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
-            args.start_epoch = checkpoint['epoch'] + 1
+            checkpoint = torch.load(args.resume, map_location="cpu")
+        model_without_ddp.load_state_dict(checkpoint["model"])
+        if (
+            not args.eval
+            and "optimizer" in checkpoint
+            and "lr_scheduler" in checkpoint
+            and "epoch" in checkpoint
+        ):
+            optimizer.load_state_dict(checkpoint["optimizer"])
+            lr_scheduler.load_state_dict(checkpoint["lr_scheduler"])
+            args.start_epoch = checkpoint["epoch"] + 1

    if args.eval:
-        test_stats, coco_evaluator = evaluate(model, criterion, postprocessors,
-                                              data_loader_val, base_ds, device, args.output_dir)
+        test_stats, coco_evaluator = evaluate(
+            model,
+            criterion,
+            postprocessors,
+            data_loader_val,
+            base_ds,
+            device,
+            args.output_dir,
+        )
        if args.output_dir:
            with PathManager.open(os.path.join(args.output_dir, "eval.pth"), "wb") as f:
                utils.save_on_master(coco_evaluator.coco_eval["bbox"].eval, f)
@@ -205,33 +322,52 @@ def main(args):
        if args.distributed:
            sampler_train.set_epoch(epoch)
        train_stats = train_one_epoch(
-            model, criterion, data_loader_train, optimizer, device, epoch,
-            args.clip_max_norm)
+            model,
+            criterion,
+            data_loader_train,
+            optimizer,
+            device,
+            epoch,
+            args.clip_max_norm,
+        )
        lr_scheduler.step()
        if args.output_dir:
-            checkpoint_paths = [] #os.path.join(args.output_dir, 'checkpoint.pth')]
+            checkpoint_paths = []  # os.path.join(args.output_dir, 'checkpoint.pth')]
            # extra checkpoint before LR drop and every 10 epochs
            if (epoch + 1) % args.lr_drop == 0 or (epoch + 1) % 10 == 0:
-                checkpoint_paths.append(os.path.join(args.output_dir, f'checkpoint{epoch:04}.pth'))
+                checkpoint_paths.append(
+                    os.path.join(args.output_dir, f"checkpoint{epoch:04}.pth")
+                )
            for checkpoint_path in checkpoint_paths:
                with PathManager.open(checkpoint_path, "wb") as f:
                    if args.gpu == 0 and args.machine_rank == 0:
-                        utils.save_on_master({
-                            'model': model_without_ddp.state_dict(),
-                            'optimizer': optimizer.state_dict(),
-                            'lr_scheduler': lr_scheduler.state_dict(),
-                            'epoch': epoch,
-                            'args': args,
-                        }, f)
+                        utils.save_on_master(
+                            {
+                                "model": model_without_ddp.state_dict(),
+                                "optimizer": optimizer.state_dict(),
+                                "lr_scheduler": lr_scheduler.state_dict(),
+                                "epoch": epoch,
+                                "args": args,
+                            },
+                            f,
+                        )

        test_stats, coco_evaluator = evaluate(
-            model, criterion, postprocessors, data_loader_val, base_ds, device, args.output_dir
+            model,
+            criterion,
+            postprocessors,
+            data_loader_val,
+            base_ds,
+            device,
+            args.output_dir,
        )

-        log_stats = {**{f'train_{k}': v for k, v in train_stats.items()},
-                     **{f'test_{k}': v for k, v in test_stats.items()},
-                     'epoch': epoch,
-                     'n_parameters': n_parameters}
+        log_stats = {
+            **{f"train_{k}": v for k, v in train_stats.items()},
+            **{f"test_{k}": v for k, v in test_stats.items()},
+            "epoch": epoch,
+            "n_parameters": n_parameters,
+        }

        if args.output_dir and utils.is_main_process():
            with PathManager.open(os.path.join(args.output_dir, "log.txt"), "w") as f:
@@ -239,19 +375,21 @@ def main(args):

            # for evaluation logs
            if coco_evaluator is not None:
-                PathManager.mkdirs(os.path.join(args.output_dir, 'eval'))
+                PathManager.mkdirs(os.path.join(args.output_dir, "eval"))
                if "bbox" in coco_evaluator.coco_eval:
-                    filenames = ['latest.pth']
+                    filenames = ["latest.pth"]
                    if epoch % 50 == 0:
-                        filenames.append(f'{epoch:03}.pth')
+                        filenames.append(f"{epoch:03}.pth")
                    for name in filenames:
-                        with PathManager.open(os.path.join(args.output_dir, "eval", name), "wb") as f:
-                            torch.save(coco_evaluator.coco_eval["bbox"].eval,
-                                       f)
+                        with PathManager.open(
+                            os.path.join(args.output_dir, "eval", name), "wb"
+                        ) as f:
+                            torch.save(coco_evaluator.coco_eval["bbox"].eval, f)

    total_time = time.time() - start_time
    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
-    print('Training time {}'.format(total_time_str))
+    print("Training time {}".format(total_time_str))
+

 def launch(
    main_func,
@@ -285,7 +423,9 @@ def launch(
        # TODO prctl in spawned processes

        if dist_url == "auto":
-            assert num_machines == 1, "dist_url=auto not supported in multi-machine jobs."
+            assert (
+                num_machines == 1
+            ), "dist_url=auto not supported in multi-machine jobs."
            port = _find_free_port()
            dist_url = f"tcp://127.0.0.1:{port}"
        if num_machines > 1 and dist_url.startswith("file://"):
@@ -326,6 +466,7 @@ def synchronize():
        return
    dist.barrier()

+
 def _distributed_worker(
    local_rank,
    main_func,
@@ -336,7 +477,9 @@ def _distributed_worker(
    args,
    timeout=DEFAULT_TIMEOUT,
 ):
-    assert torch.cuda.is_available(), "cuda is not available. Please check your installation."
+    assert (
+        torch.cuda.is_available()
+    ), "cuda is not available. Please check your installation."
    global_rank = machine_rank * num_gpus_per_machine + local_rank
    try:
        dist.init_process_group(
@@ -359,9 +502,9 @@ def _distributed_worker(
    args[0].gpu = local_rank

    # Setup the local process group (which contains ranks within the same machine)
-    #assert comm._LOCAL_PROCESS_GROUP is None
-    #num_machines = world_size // num_gpus_per_machine
-    #for i in range(num_machines):
+    # assert comm._LOCAL_PROCESS_GROUP is None
+    # num_machines = world_size // num_gpus_per_machine
+    # for i in range(num_machines):
    #    ranks_on_i = list(range(i * num_gpus_per_machine, (i + 1) * num_gpus_per_machine))
    #    pg = dist.new_group(ranks_on_i)
    #    if i == machine_rank:
@@ -370,8 +513,10 @@ def _distributed_worker(
    main_func(*args)


-if __name__ == '__main__':
-    parser = argparse.ArgumentParser('DETR training and evaluation script', parents=[get_args_parser()])
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        "DETR training and evaluation script", parents=[get_args_parser()]
+    )
    args = parser.parse_args()
    if args.output_dir:
        PathManager.mkdirs(args.output_dir)

--- a/projects_oss/detr/setup.py
+++ b/projects_oss/detr/setup.py
@@ -6,20 +6,19 @@
 # Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 # ------------------------------------------------------------------------------------------------

-import os
 import glob
+import os

 import torch
-
-from torch.utils.cpp_extension import CUDA_HOME
-from torch.utils.cpp_extension import CppExtension
-from torch.utils.cpp_extension import CUDAExtension
-
 from setuptools import find_packages
 from setuptools import setup
+from torch.utils.cpp_extension import CUDAExtension
+from torch.utils.cpp_extension import CUDA_HOME
+from torch.utils.cpp_extension import CppExtension

 requirements = ["torch", "torchvision"]

+
 def get_extensions():
    this_dir = os.path.dirname(os.path.abspath(__file__))
    extensions_dir = os.path.join(this_dir, "detr/src")
@@ -49,7 +48,7 @@ def get_extensions():
            "-D__CUDA_NO_HALF2_OPERATORS__",
        ]
    else:
-        raise NotImplementedError('Cuda is not availabel')
+        raise NotImplementedError("Cuda is not availabel")

    sources = [os.path.join(extensions_dir, s) for s in sources]
    include_dirs = [extensions_dir]
@@ -64,13 +63,14 @@ def get_extensions():
    ]
    return ext_modules

-if __name__ == '__main__':
+
+if __name__ == "__main__":
    setup(
        name="detr",
        url="https://github.com/facebookresearch/d2go/detr",
-        license='Apache-2.0',
+        license="Apache-2.0",
        packages=find_packages(exclude=["test_all.py"]),
-        package_data={ 'detr': ['LICENSE']},
+        package_data={"detr": ["LICENSE"]},
        ext_modules=get_extensions(),
        cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
    )
--- a/projects_oss/detr/test_all.py
+++ b/projects_oss/detr/test_all.py
@@ -3,17 +3,19 @@
 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 import io
 import unittest
-
-import torch
-from torch import nn, Tensor
 from typing import List

-from detr.models.matcher import HungarianMatcher
-from detr.models.position_encoding import PositionEmbeddingSine, PositionEmbeddingLearned
+import torch
+from detr.hub import detr_resnet50, detr_resnet50_panoptic
 from detr.models.backbone import Backbone
+from detr.models.matcher import HungarianMatcher
+from detr.models.position_encoding import (
+    PositionEmbeddingSine,
+    PositionEmbeddingLearned,
+)
 from detr.util import box_ops
 from detr.util.misc import nested_tensor_from_tensor_list
-from detr.hub import detr_resnet50, detr_resnet50_panoptic
+from torch import nn, Tensor

 # onnxruntime requires python 3.5 or above
 try:
@@ -23,7 +25,6 @@ except ImportError:


 class Tester(unittest.TestCase):
-
    def test_box_cxcywh_to_xyxy(self):
        t = torch.rand(10, 4)
        r = box_ops.box_xyxy_to_cxcywh(box_ops.box_cxcywh_to_xyxy(t))
@@ -40,26 +41,45 @@ class Tester(unittest.TestCase):
        tgt_labels = torch.randint(high=n_classes, size=(n_targets,))
        tgt_boxes = torch.rand(n_targets, 4)
        matcher = HungarianMatcher()
-        targets = [{'labels': tgt_labels, 'boxes': tgt_boxes}]
-        indices_single = matcher({'pred_logits': logits, 'pred_boxes': boxes}, targets)
-        indices_batched = matcher({'pred_logits': logits.repeat(2, 1, 1),
-                                   'pred_boxes': boxes.repeat(2, 1, 1)}, targets * 2)
+        targets = [{"labels": tgt_labels, "boxes": tgt_boxes}]
+        indices_single = matcher({"pred_logits": logits, "pred_boxes": boxes}, targets)
+        indices_batched = matcher(
+            {
+                "pred_logits": logits.repeat(2, 1, 1),
+                "pred_boxes": boxes.repeat(2, 1, 1),
+            },
+            targets * 2,
+        )
        self.assertEqual(len(indices_single[0][0]), n_targets)
        self.assertEqual(len(indices_single[0][1]), n_targets)
-        self.assertEqual(self.indices_torch2python(indices_single),
-                         self.indices_torch2python([indices_batched[0]]))
-        self.assertEqual(self.indices_torch2python(indices_single),
-                         self.indices_torch2python([indices_batched[1]]))
+        self.assertEqual(
+            self.indices_torch2python(indices_single),
+            self.indices_torch2python([indices_batched[0]]),
+        )
+        self.assertEqual(
+            self.indices_torch2python(indices_single),
+            self.indices_torch2python([indices_batched[1]]),
+        )

        # test with empty targets
        tgt_labels_empty = torch.randint(high=n_classes, size=(0,))
        tgt_boxes_empty = torch.rand(0, 4)
-        targets_empty = [{'labels': tgt_labels_empty, 'boxes': tgt_boxes_empty}]
-        indices = matcher({'pred_logits': logits.repeat(2, 1, 1),
-                           'pred_boxes': boxes.repeat(2, 1, 1)}, targets + targets_empty)
+        targets_empty = [{"labels": tgt_labels_empty, "boxes": tgt_boxes_empty}]
+        indices = matcher(
+            {
+                "pred_logits": logits.repeat(2, 1, 1),
+                "pred_boxes": boxes.repeat(2, 1, 1),
+            },
+            targets + targets_empty,
+        )
        self.assertEqual(len(indices[1][0]), 0)
-        indices = matcher({'pred_logits': logits.repeat(2, 1, 1),
-                           'pred_boxes': boxes.repeat(2, 1, 1)}, targets_empty * 2)
+        indices = matcher(
+            {
+                "pred_logits": logits.repeat(2, 1, 1),
+                "pred_boxes": boxes.repeat(2, 1, 1),
+            },
+            targets_empty * 2,
+        )
        self.assertEqual(len(indices[0][0]), 0)

    def test_position_encoding_script(self):
@@ -67,13 +87,15 @@ class Tester(unittest.TestCase):
        mm1, mm2 = torch.jit.script(m1), torch.jit.script(m2)  # noqa

    def test_backbone_script(self):
-        backbone = Backbone('resnet50', True, False, False)
+        backbone = Backbone("resnet50", True, False, False)
        torch.jit.script(backbone)  # noqa

    def test_model_script_detection(self):
        model = detr_resnet50(pretrained=False).eval()
        scripted_model = torch.jit.script(model)
-        x = nested_tensor_from_tensor_list([torch.rand(3, 200, 200), torch.rand(3, 200, 250)])
+        x = nested_tensor_from_tensor_list(
+            [torch.rand(3, 200, 200), torch.rand(3, 200, 250)]
+        )
        out = model(x)
        out_script = scripted_model(x)
        self.assertTrue(out["pred_logits"].equal(out_script["pred_logits"]))
@@ -82,7 +104,9 @@ class Tester(unittest.TestCase):
    def test_model_script_panoptic(self):
        model = detr_resnet50_panoptic(pretrained=False).eval()
        scripted_model = torch.jit.script(model)
-        x = nested_tensor_from_tensor_list([torch.rand(3, 200, 200), torch.rand(3, 200, 250)])
+        x = nested_tensor_from_tensor_list(
+            [torch.rand(3, 200, 200), torch.rand(3, 200, 250)]
+        )
        out = model(x)
        out_script = scripted_model(x)
        self.assertTrue(out["pred_logits"].equal(out_script["pred_logits"]))
@@ -92,17 +116,19 @@ class Tester(unittest.TestCase):
    def test_model_detection_different_inputs(self):
        model = detr_resnet50(pretrained=False).eval()
        # support NestedTensor
-        x = nested_tensor_from_tensor_list([torch.rand(3, 200, 200), torch.rand(3, 200, 250)])
+        x = nested_tensor_from_tensor_list(
+            [torch.rand(3, 200, 200), torch.rand(3, 200, 250)]
+        )
        out = model(x)
-        self.assertIn('pred_logits', out)
+        self.assertIn("pred_logits", out)
        # and 4d Tensor
        x = torch.rand(1, 3, 200, 200)
        out = model(x)
-        self.assertIn('pred_logits', out)
+        self.assertIn("pred_logits", out)
        # and List[Tensor[C, H, W]]
        x = torch.rand(3, 200, 200)
        out = model([x])
-        self.assertIn('pred_logits', out)
+        self.assertIn("pred_logits", out)

    def test_warpped_model_script_detection(self):
        class WrappedDETR(nn.Module):
@@ -125,30 +151,49 @@ class Tester(unittest.TestCase):
        self.assertTrue(out["pred_boxes"].equal(out_script["pred_boxes"]))


-@unittest.skipIf(onnxruntime is None, 'ONNX Runtime unavailable')
+@unittest.skipIf(onnxruntime is None, "ONNX Runtime unavailable")
 class ONNXExporterTester(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        torch.manual_seed(123)

-    def run_model(self, model, inputs_list, tolerate_small_mismatch=False, do_constant_folding=True, dynamic_axes=None,
-                  output_names=None, input_names=None):
+    def run_model(
+        self,
+        model,
+        inputs_list,
+        tolerate_small_mismatch=False,
+        do_constant_folding=True,
+        dynamic_axes=None,
+        output_names=None,
+        input_names=None,
+    ):
        model.eval()

        onnx_io = io.BytesIO()
        # export to onnx with the first input
-        torch.onnx.export(model, inputs_list[0], onnx_io,
-                          do_constant_folding=do_constant_folding, opset_version=12,
-                          dynamic_axes=dynamic_axes, input_names=input_names, output_names=output_names)
+        torch.onnx.export(
+            model,
+            inputs_list[0],
+            onnx_io,
+            do_constant_folding=do_constant_folding,
+            opset_version=12,
+            dynamic_axes=dynamic_axes,
+            input_names=input_names,
+            output_names=output_names,
+        )
        # validate the exported model with onnx runtime
        for test_inputs in inputs_list:
            with torch.no_grad():
-                if isinstance(test_inputs, torch.Tensor) or isinstance(test_inputs, list):
+                if isinstance(test_inputs, torch.Tensor) or isinstance(
+                    test_inputs, list
+                ):
                    test_inputs = (nested_tensor_from_tensor_list(test_inputs),)
                test_ouputs = model(*test_inputs)
                if isinstance(test_ouputs, torch.Tensor):
                    test_ouputs = (test_ouputs,)
-            self.ort_validate(onnx_io, test_inputs, test_ouputs, tolerate_small_mismatch)
+            self.ort_validate(
+                onnx_io, test_inputs, test_ouputs, tolerate_small_mismatch
+            )

    def ort_validate(self, onnx_io, inputs, outputs, tolerate_small_mismatch=False):

@@ -166,11 +211,15 @@ class ONNXExporterTester(unittest.TestCase):

        ort_session = onnxruntime.InferenceSession(onnx_io.getvalue())
        # compute onnxruntime output prediction
-        ort_inputs = dict((ort_session.get_inputs()[i].name, inpt) for i, inpt in enumerate(inputs)) #noqa: C402
+        ort_inputs = dict(
+            (ort_session.get_inputs()[i].name, inpt) for i, inpt in enumerate(inputs)
+        )  # noqa: C402
        ort_outs = ort_session.run(None, ort_inputs)
        for i in range(0, len(outputs)):
            try:
-                torch.testing.assert_allclose(outputs[i], ort_outs[i], rtol=1e-03, atol=1e-05)
+                torch.testing.assert_allclose(
+                    outputs[i], ort_outs[i], rtol=1e-03, atol=1e-05
+                )
            except AssertionError as error:
                if tolerate_small_mismatch:
                    self.assertIn("(0.00%)", str(error), str(error))
@@ -207,5 +256,5 @@ class ONNXExporterTester(unittest.TestCase):
        )


-if __name__ == '__main__':
+if __name__ == "__main__":
    unittest.main()
--- a/projects_oss/detr/test_deit_backbone.py
+++ b/projects_oss/detr/test_deit_backbone.py
+import logging
 import unittest
-from detr.backbone.deit import add_deit_backbone_config
-from detr.backbone.pit import add_pit_backbone_config

 import torch
-from detectron2.utils.file_io import PathManager
-from detectron2.checkpoint import DetectionCheckpointer
 from d2go.config import CfgNode as CN
+from detectron2.checkpoint import DetectionCheckpointer
 from detectron2.modeling import BACKBONE_REGISTRY
+from detectron2.utils.file_io import PathManager
+from detr.backbone.deit import add_deit_backbone_config
+from detr.backbone.pit import add_pit_backbone_config

-import logging
 logger = logging.getLogger(__name__)

 # avoid testing on sandcastle due to access to manifold
 USE_CUDA = torch.cuda.device_count() > 0

+
 class TestTransformerBackbone(unittest.TestCase):
-    @unittest.skipIf(not USE_CUDA,"avoid testing on sandcastle due to access to manifold")
+    @unittest.skipIf(
+        not USE_CUDA, "avoid testing on sandcastle due to access to manifold"
+    )
    def test_deit_model(self):
        cfg = CN()
        cfg.MODEL = CN()
@@ -50,8 +53,9 @@ class TestTransformerBackbone(unittest.TestCase):
                    y = model(x)
                    print(f"x.shape: {x.shape}, y.shape: {y.shape}")

-
-    @unittest.skipIf(not USE_CUDA,"avoid testing on sandcastle due to access to manifold")
+    @unittest.skipIf(
+        not USE_CUDA, "avoid testing on sandcastle due to access to manifold"
+    )
    def test_pit_model(self):
        cfg = CN()
        cfg.MODEL = CN()

--- a/projects_oss/detr/test_detr_runner.py
+++ b/projects_oss/detr/test_detr_runner.py
@@ -13,6 +13,7 @@ from d2go.utils.testing.data_loader_helper import create_local_dataset
 # RUN:
 # buck test mobile-vision/d2go/projects_oss/detr:test_detr_runner

+
 def _get_cfg(runner, output_dir, dataset_name):
    cfg = runner.get_default_cfg()
    cfg.MODEL.DEVICE = "cpu"

--- a/projects_oss/detr/test_op.py
+++ b/projects_oss/detr/test_op.py
@@ -10,13 +10,15 @@

 import io
 import unittest
-import torch
 from functools import wraps

+import torch
+from detr.functions.ms_deform_attn_func import (
+    MSDeformAttnFunction,
+    ms_deform_attn_core_pytorch,
+)
 from torch.autograd import gradcheck

-from detr.functions.ms_deform_attn_func import MSDeformAttnFunction, ms_deform_attn_core_pytorch
-
 USE_CUDA = torch.cuda.device_count() > 0


@@ -24,53 +26,107 @@ N, M, D = 1, 2, 2
 Lq, L, P = 2, 2, 2
 if USE_CUDA:
    shapes = torch.as_tensor([(6, 4), (3, 2)], dtype=torch.long).cuda()
-    level_start_index = torch.cat((shapes.new_zeros((1, )), shapes.prod(1).cumsum(0)[:-1]))
-    S = sum([(H*W).item() for H, W in shapes])
+    level_start_index = torch.cat(
+        (shapes.new_zeros((1,)), shapes.prod(1).cumsum(0)[:-1])
+    )
+    S = sum([(H * W).item() for H, W in shapes])

 torch.manual_seed(3)

+
 class Tester(unittest.TestCase):
-    @unittest.skipIf(not USE_CUDA, 'CI does not have gpu')
+    @unittest.skipIf(not USE_CUDA, "CI does not have gpu")
    @torch.no_grad()
    def test_forward_equal_with_pytorch_double(self):
        value = torch.rand(N, S, M, D).cuda() * 0.01
        sampling_locations = torch.rand(N, Lq, M, L, P, 2).cuda()
        attention_weights = torch.rand(N, Lq, M, L, P).cuda() + 1e-5
-        attention_weights /= attention_weights.sum(-1, keepdim=True).sum(-2, keepdim=True)
+        attention_weights /= attention_weights.sum(-1, keepdim=True).sum(
+            -2, keepdim=True
+        )
        im2col_step = 2
-        output_pytorch = ms_deform_attn_core_pytorch(value.double(), shapes, sampling_locations.double(), attention_weights.double()).detach().cpu()
-        output_cuda = MSDeformAttnFunction.apply(value.double(), shapes, level_start_index, sampling_locations.double(), attention_weights.double(), im2col_step).detach().cpu()
+        output_pytorch = (
+            ms_deform_attn_core_pytorch(
+                value.double(),
+                shapes,
+                sampling_locations.double(),
+                attention_weights.double(),
+            )
+            .detach()
+            .cpu()
+        )
+        output_cuda = (
+            MSDeformAttnFunction.apply(
+                value.double(),
+                shapes,
+                level_start_index,
+                sampling_locations.double(),
+                attention_weights.double(),
+                im2col_step,
+            )
+            .detach()
+            .cpu()
+        )
        fwdok = torch.allclose(output_cuda, output_pytorch)
        max_abs_err = (output_cuda - output_pytorch).abs().max()
-        max_rel_err = ((output_cuda - output_pytorch).abs() / output_pytorch.abs()).max()
-
-        print(f'* {fwdok} test_forward_equal_with_pytorch_double: max_abs_err {max_abs_err:.2e} max_rel_err {max_rel_err:.2e}')
+        max_rel_err = (
+            (output_cuda - output_pytorch).abs() / output_pytorch.abs()
+        ).max()

+        print(
+            f"* {fwdok} test_forward_equal_with_pytorch_double: max_abs_err {max_abs_err:.2e} max_rel_err {max_rel_err:.2e}"
+        )

-    @unittest.skipIf(not USE_CUDA, 'CI does not have gpu')
+    @unittest.skipIf(not USE_CUDA, "CI does not have gpu")
    @torch.no_grad()
    def test_forward_equal_with_pytorch_float(self):
        value = torch.rand(N, S, M, D).cuda() * 0.01
        sampling_locations = torch.rand(N, Lq, M, L, P, 2).cuda()
        attention_weights = torch.rand(N, Lq, M, L, P).cuda() + 1e-5
-        attention_weights /= attention_weights.sum(-1, keepdim=True).sum(-2, keepdim=True)
+        attention_weights /= attention_weights.sum(-1, keepdim=True).sum(
+            -2, keepdim=True
+        )
        im2col_step = 2
-        output_pytorch = ms_deform_attn_core_pytorch(value, shapes, sampling_locations, attention_weights).detach().cpu()
-        output_cuda = MSDeformAttnFunction.apply(value, shapes, level_start_index, sampling_locations, attention_weights, im2col_step).detach().cpu()
+        output_pytorch = (
+            ms_deform_attn_core_pytorch(
+                value, shapes, sampling_locations, attention_weights
+            )
+            .detach()
+            .cpu()
+        )
+        output_cuda = (
+            MSDeformAttnFunction.apply(
+                value,
+                shapes,
+                level_start_index,
+                sampling_locations,
+                attention_weights,
+                im2col_step,
+            )
+            .detach()
+            .cpu()
+        )
        fwdok = torch.allclose(output_cuda, output_pytorch, rtol=1e-2, atol=1e-3)
        max_abs_err = (output_cuda - output_pytorch).abs().max()
-        max_rel_err = ((output_cuda - output_pytorch).abs() / output_pytorch.abs()).max()
-
-        print(f'* {fwdok} test_forward_equal_with_pytorch_float: max_abs_err {max_abs_err:.2e} max_rel_err {max_rel_err:.2e}')
+        max_rel_err = (
+            (output_cuda - output_pytorch).abs() / output_pytorch.abs()
+        ).max()

+        print(
+            f"* {fwdok} test_forward_equal_with_pytorch_float: max_abs_err {max_abs_err:.2e} max_rel_err {max_rel_err:.2e}"
+        )

-    @unittest.skipIf(not USE_CUDA, 'CI does not have gpu')
-    def test_gradient_numerical(self, channels=4, grad_value=True, grad_sampling_loc=True, grad_attn_weight=True):
+    @unittest.skipIf(not USE_CUDA, "CI does not have gpu")
+    def test_gradient_numerical(
+        self, channels=4, grad_value=True, grad_sampling_loc=True, grad_attn_weight=True
+    ):

        value = torch.rand(N, S, M, channels).cuda() * 0.01
        sampling_locations = torch.rand(N, Lq, M, L, P, 2).cuda()
        attention_weights = torch.rand(N, Lq, M, L, P).cuda() + 1e-5
-        attention_weights /= attention_weights.sum(-1, keepdim=True).sum(-2, keepdim=True)
+        attention_weights /= attention_weights.sum(-1, keepdim=True).sum(
+            -2, keepdim=True
+        )
        im2col_step = 2
        func = MSDeformAttnFunction.apply

@@ -78,10 +134,20 @@ class Tester(unittest.TestCase):
        sampling_locations.requires_grad = grad_sampling_loc
        attention_weights.requires_grad = grad_attn_weight

-        gradok = gradcheck(func, (value.double(), shapes, level_start_index, sampling_locations.double(), attention_weights.double(), im2col_step))
+        gradok = gradcheck(
+            func,
+            (
+                value.double(),
+                shapes,
+                level_start_index,
+                sampling_locations.double(),
+                attention_weights.double(),
+                im2col_step,
+            ),
+        )

-        print(f'* {gradok} test_gradient_numerical(D={channels})')
+        print(f"* {gradok} test_gradient_numerical(D={channels})")


-if __name__ == '__main__':
+if __name__ == "__main__":
    unittest.main()
--- a/setup.py
+++ b/setup.py
 #!/usr/bin/env python3
 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import glob
 import io
 import os
-import subprocess
-import glob
 import shutil
+import subprocess
 from os import path
 from typing import List

@@ -12,10 +12,11 @@ from setuptools import setup, find_packages

 cwd = os.path.dirname(os.path.abspath(__file__))

-version = '0.0.1'
+version = "0.0.1"
 try:
-    if not os.getenv('RELEASE'):
+    if not os.getenv("RELEASE"):
        from datetime import date
+
        today = date.today()
        day = today.strftime("b%Y%m%d")
        version += day
@@ -23,25 +24,24 @@ except Exception:
    pass

 requirements = [
-    'importlib',
-    'numpy',
-    'Pillow',
-    'mock',
-    'torch',
-    'pytorch_lightning',
-    'opencv-python',
-    'parameterized',
+    "importlib",
+    "numpy",
+    "Pillow",
+    "mock",
+    "torch",
+    "pytorch_lightning",
+    "opencv-python",
+    "parameterized",
 ]

+
 def d2go_gather_files(dst_module, file_path, extension="*") -> List[str]:
    """
    Return a list of files to include in d2go submodule. Copy over the corresponding files.
    """
    # Use absolute paths while symlinking.
    source_configs_dir = path.join(path.dirname(path.realpath(__file__)), file_path)
-    destination = path.join(
-        path.dirname(path.realpath(__file__)), "d2go", dst_module
-    )
+    destination = path.join(path.dirname(path.realpath(__file__)), "d2go", dst_module)
    # Symlink the config directory inside package to have a cleaner pip install.

    # Remove stale symlink/directory from a previous build.
@@ -61,36 +61,41 @@ def d2go_gather_files(dst_module, file_path, extension="*") -> List[str]:
    config_paths = glob.glob(os.path.join(file_path + extension), recursive=True)
    return config_paths

+
 def get_model_zoo_configs() -> List[str]:
    """
    Return a list of configs to include in package for model zoo. Copy over these configs inside
    d2go/model_zoo.
    """
-    return d2go_gather_files(os.path.join("model_zoo", "configs"), "configs", "**/*.yaml")
+    return d2go_gather_files(
+        os.path.join("model_zoo", "configs"), "configs", "**/*.yaml"
+    )
+

-if __name__ == '__main__':
+if __name__ == "__main__":
    setup(
        name="d2go",
        version=version,
        author="Mobile Vision",
        url="https://github.com/facebookresearch/d2go",
        description="D2Go",
-        long_description=open('README.md').read(),
-        long_description_content_type='text/markdown',
-        license='Apache-2.0',
+        long_description=open("README.md").read(),
+        long_description_content_type="text/markdown",
+        license="Apache-2.0",
        install_requires=requirements,
        packages=find_packages(exclude=["tools", "tests"]),
-        package_data={'d2go': [
-                'LICENSE',
+        package_data={
+            "d2go": [
+                "LICENSE",
            ],
            "d2go.model_zoo": get_model_zoo_configs(),
            "d2go.tools": d2go_gather_files("tools", "tools", "**/*.py"),
            "d2go.tests": d2go_gather_files("tests", "tests", "**/*helper.py"),
        },
        entry_points={
-            'console_scripts': [
-                'd2go.exporter = d2go.tools.exporter:cli',
-                'd2go.train_net = d2go.tools.train_net:cli',
+            "console_scripts": [
+                "d2go.exporter = d2go.tools.exporter:cli",
+                "d2go.train_net = d2go.tools.train_net:cli",
            ]
        },
    )
--- a/tests/data/test_d2go_datasets.py
+++ b/tests/data/test_d2go_datasets.py
@@ -293,6 +293,8 @@ class TestD2GoDatasets(unittest.TestCase):
        self.assertEqual(len(ds_list), 5)

        # Test adhoc classes to use with suffix removal
-        AdhocDatasetManager.add(COCOWithClassesToUse("test_adhoc_ds2@1classes", ["class_0"]))
+        AdhocDatasetManager.add(
+            COCOWithClassesToUse("test_adhoc_ds2@1classes", ["class_0"])
+        )
        ds_list = DatasetCatalog.get("test_adhoc_ds2@1classes")
        self.assertEqual(len(ds_list), 5)
--- a/tests/data/test_data_transforms_box_utils.py
+++ b/tests/data/test_data_transforms_box_utils.py
@@ -126,7 +126,5 @@ class TestDataTransformsBoxUtils(unittest.TestCase):

        boxes = np.array([[91, 46, 144, 111]])
        transformed_bboxs = enlarge_box_tfm[0].apply_coords(boxes)
-        err_msg = "transformed_bbox = {}, expected {}".format(
-            transformed_bboxs, boxes
-        )
+        err_msg = "transformed_bbox = {}, expected {}".format(transformed_bboxs, boxes)
        self.assertTrue(np.allclose(transformed_bboxs, boxes), err_msg)
--- a/tests/evaluation/test_prediction_count_evaluation.py
+++ b/tests/evaluation/test_prediction_count_evaluation.py
 #!/usr/bin/env python3

 import unittest
-import torch

+import torch
 from d2go.evaluation.prediction_count_evaluation import PredictionCountEvaluator
 from detectron2.structures.instances import Instances


 class TestPredictionCountEvaluation(unittest.TestCase):
-
    def setUp(self):
        self.evaluator = PredictionCountEvaluator()
        image_size = (224, 224)
@@ -40,7 +39,7 @@ class TestPredictionCountEvaluation(unittest.TestCase):
                    "predictions_per_image": 11 / 5,
                    "confidence_per_prediction": (0.9 * 5 + 0.8 * 4 + 0.7 * 2) / 11,
                }
-            }
+            },
        )

        # Test that `reset` clears the evaluator state.
@@ -48,7 +47,6 @@ class TestPredictionCountEvaluation(unittest.TestCase):
        self.assertEqual(len(self.evaluator.prediction_counts), 0)
        self.assertEqual(len(self.evaluator.confidence_scores), 0)

-
    def assertDictAlmostEqual(self, dict1, dict2):
        keys1 = list(dict1.keys())
        keys2 = list(dict2.keys())

--- a/tests/misc/test_config.py
+++ b/tests/misc/test_config.py
@@ -28,9 +28,7 @@ class TestConfig(unittest.TestCase):

        for location in ["detectron2", "detectron2go"]:
            root_dir = os.path.abspath(reroute_config_path(f"{location}://."))
-            files = glob.glob(
-                os.path.join(root_dir, "**/*.yaml"),
-                recursive=True)
+            files = glob.glob(os.path.join(root_dir, "**/*.yaml"), recursive=True)
            files = [f for f in files if "fbnas" not in f]
            self.assertGreater(len(files), 0)
            for fn in sorted(files):