support v0.6

b634945d · limm · 5b3792fc · b634945d · b634945d · b634945d
Commit b634945d authored Apr 09, 2025 by limm
20 changed files
--- a/detectron2/data/transforms/augmentation_impl.py
+++ b/detectron2/data/transforms/augmentation_impl.py
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+"""
+Implement many useful :class:`Augmentation`.
+"""
+import numpy as np
+import sys
+from typing import Tuple
+import torch
+from fvcore.transforms.transform import (
+    BlendTransform,
+    CropTransform,
+    HFlipTransform,
+    NoOpTransform,
+    PadTransform,
+    Transform,
+    TransformList,
+    VFlipTransform,
+)
+from PIL import Image
+from .augmentation import Augmentation, _transform_to_aug
+from .transform import ExtentTransform, ResizeTransform, RotationTransform
+__all__ = [
+    "FixedSizeCrop",
+    "RandomApply",
+    "RandomBrightness",
+    "RandomContrast",
+    "RandomCrop",
+    "RandomExtent",
+    "RandomFlip",
+    "RandomSaturation",
+    "RandomLighting",
+    "RandomRotation",
+    "Resize",
+    "ResizeScale",
+    "ResizeShortestEdge",
+    "RandomCrop_CategoryAreaConstraint",
+]
+class RandomApply(Augmentation):
+    """
+    Randomly apply an augmentation with a given probability.
+    """
+    def __init__(self, tfm_or_aug, prob=0.5):
+        """
+        Args:
+            tfm_or_aug (Transform, Augmentation): the transform or augmentation
+                to be applied. It can either be a `Transform` or `Augmentation`
+                instance.
+            prob (float): probability between 0.0 and 1.0 that
+                the wrapper transformation is applied
+        """
+        super().__init__()
+        self.aug = _transform_to_aug(tfm_or_aug)
+        assert 0.0 <= prob <= 1.0, f"Probablity must be between 0.0 and 1.0 (given: {prob})"
+        self.prob = prob
+    def get_transform(self, *args):
+        do = self._rand_range() < self.prob
+        if do:
+            return self.aug.get_transform(*args)
+        else:
+            return NoOpTransform()
+    def __call__(self, aug_input):
+        do = self._rand_range() < self.prob
+        if do:
+            return self.aug(aug_input)
+        else:
+            return NoOpTransform()
+class RandomFlip(Augmentation):
+    """
+    Flip the image horizontally or vertically with the given probability.
+    """
+    def __init__(self, prob=0.5, *, horizontal=True, vertical=False):
+        """
+        Args:
+            prob (float): probability of flip.
+            horizontal (boolean): whether to apply horizontal flipping
+            vertical (boolean): whether to apply vertical flipping
+        """
+        super().__init__()
+        if horizontal and vertical:
+            raise ValueError("Cannot do both horiz and vert. Please use two Flip instead.")
+        if not horizontal and not vertical:
+            raise ValueError("At least one of horiz or vert has to be True!")
+        self._init(locals())
+    def get_transform(self, image):
+        h, w = image.shape[:2]
+        do = self._rand_range() < self.prob
+        if do:
+            if self.horizontal:
+                return HFlipTransform(w)
+            elif self.vertical:
+                return VFlipTransform(h)
+        else:
+            return NoOpTransform()
+class Resize(Augmentation):
+    """Resize image to a fixed target size"""
+    def __init__(self, shape, interp=Image.BILINEAR):
+        """
+        Args:
+            shape: (h, w) tuple or a int
+            interp: PIL interpolation method
+        """
+        if isinstance(shape, int):
+            shape = (shape, shape)
+        shape = tuple(shape)
+        self._init(locals())
+    def get_transform(self, image):
+        return ResizeTransform(
+            image.shape[0], image.shape[1], self.shape[0], self.shape[1], self.interp
+        )
+class ResizeShortestEdge(Augmentation):
+    """
+    Resize the image while keeping the aspect ratio unchanged.
+    It attempts to scale the shorter edge to the given `short_edge_length`,
+    as long as the longer edge does not exceed `max_size`.
+    If `max_size` is reached, then downscale so that the longer edge does not exceed max_size.
+    """
+    @torch.jit.unused
+    def __init__(
+        self, short_edge_length, max_size=sys.maxsize, sample_style="range", interp=Image.BILINEAR
+    ):
+        """
+        Args:
+            short_edge_length (list[int]): If ``sample_style=="range"``,
+                a [min, max] interval from which to sample the shortest edge length.
+                If ``sample_style=="choice"``, a list of shortest edge lengths to sample from.
+            max_size (int): maximum allowed longest edge length.
+            sample_style (str): either "range" or "choice".
+        """
+        super().__init__()
+        assert sample_style in ["range", "choice"], sample_style
+        self.is_range = sample_style == "range"
+        if isinstance(short_edge_length, int):
+            short_edge_length = (short_edge_length, short_edge_length)
+        if self.is_range:
+            assert len(short_edge_length) == 2, (
+                "short_edge_length must be two values using 'range' sample style."
+                f" Got {short_edge_length}!"
+            )
+        self._init(locals())
+    @torch.jit.unused
+    def get_transform(self, image):
+        h, w = image.shape[:2]
+        if self.is_range:
+            size = np.random.randint(self.short_edge_length[0], self.short_edge_length[1] + 1)
+        else:
+            size = np.random.choice(self.short_edge_length)
+        if size == 0:
+            return NoOpTransform()
+        newh, neww = ResizeShortestEdge.get_output_shape(h, w, size, self.max_size)
+        return ResizeTransform(h, w, newh, neww, self.interp)
+    @staticmethod
+    def get_output_shape(
+        oldh: int, oldw: int, short_edge_length: int, max_size: int
+    ) -> Tuple[int, int]:
+        """
+        Compute the output size given input size and target short edge length.
+        """
+        h, w = oldh, oldw
+        size = short_edge_length * 1.0
+        scale = size / min(h, w)
+        if h < w:
+            newh, neww = size, scale * w
+        else:
+            newh, neww = scale * h, size
+        if max(newh, neww) > max_size:
+            scale = max_size * 1.0 / max(newh, neww)
+            newh = newh * scale
+            neww = neww * scale
+        neww = int(neww + 0.5)
+        newh = int(newh + 0.5)
+        return (newh, neww)
+class ResizeScale(Augmentation):
+    """
+    Takes target size as input and randomly scales the given target size between `min_scale`
+    and `max_scale`. It then scales the input image such that it fits inside the scaled target
+    box, keeping the aspect ratio constant.
+    This implements the resize part of the Google's 'resize_and_crop' data augmentation:
+    https://github.com/tensorflow/tpu/blob/master/models/official/detection/utils/input_utils.py#L127
+    """
+    def __init__(
+        self,
+        min_scale: float,
+        max_scale: float,
+        target_height: int,
+        target_width: int,
+        interp: int = Image.BILINEAR,
+    ):
+        """
+        Args:
+            min_scale: minimum image scale range.
+            max_scale: maximum image scale range.
+            target_height: target image height.
+            target_width: target image width.
+            interp: image interpolation method.
+        """
+        super().__init__()
+        self._init(locals())
+    def _get_resize(self, image: np.ndarray, scale: float) -> Transform:
+        input_size = image.shape[:2]
+        # Compute new target size given a scale.
+        target_size = (self.target_height, self.target_width)
+        target_scale_size = np.multiply(target_size, scale)
+        # Compute actual rescaling applied to input image and output size.
+        output_scale = np.minimum(
+            target_scale_size[0] / input_size[0], target_scale_size[1] / input_size[1]
+        )
+        output_size = np.round(np.multiply(input_size, output_scale)).astype(int)
+        return ResizeTransform(
+            input_size[0], input_size[1], output_size[0], output_size[1], self.interp
+        )
+    def get_transform(self, image: np.ndarray) -> Transform:
+        random_scale = np.random.uniform(self.min_scale, self.max_scale)
+        return self._get_resize(image, random_scale)
+class RandomRotation(Augmentation):
+    """
+    This method returns a copy of this image, rotated the given
+    number of degrees counter clockwise around the given center.
+    """
+    def __init__(self, angle, expand=True, center=None, sample_style="range", interp=None):
+        """
+        Args:
+            angle (list[float]): If ``sample_style=="range"``,
+                a [min, max] interval from which to sample the angle (in degrees).
+                If ``sample_style=="choice"``, a list of angles to sample from
+            expand (bool): choose if the image should be resized to fit the whole
+                rotated image (default), or simply cropped
+            center (list[[float, float]]):  If ``sample_style=="range"``,
+                a [[minx, miny], [maxx, maxy]] relative interval from which to sample the center,
+                [0, 0] being the top left of the image and [1, 1] the bottom right.
+                If ``sample_style=="choice"``, a list of centers to sample from
+                Default: None, which means that the center of rotation is the center of the image
+                center has no effect if expand=True because it only affects shifting
+        """
+        super().__init__()
+        assert sample_style in ["range", "choice"], sample_style
+        self.is_range = sample_style == "range"
+        if isinstance(angle, (float, int)):
+            angle = (angle, angle)
+        if center is not None and isinstance(center[0], (float, int)):
+            center = (center, center)
+        self._init(locals())
+    def get_transform(self, image):
+        h, w = image.shape[:2]
+        center = None
+        if self.is_range:
+            angle = np.random.uniform(self.angle[0], self.angle[1])
+            if self.center is not None:
+                center = (
+                    np.random.uniform(self.center[0][0], self.center[1][0]),
+                    np.random.uniform(self.center[0][1], self.center[1][1]),
+                )
+        else:
+            angle = np.random.choice(self.angle)
+            if self.center is not None:
+                center = np.random.choice(self.center)
+        if center is not None:
+            center = (w * center[0], h * center[1])  # Convert to absolute coordinates
+        if angle % 360 == 0:
+            return NoOpTransform()
+        return RotationTransform(h, w, angle, expand=self.expand, center=center, interp=self.interp)
+class FixedSizeCrop(Augmentation):
+    """
+    If `crop_size` is smaller than the input image size, then it uses a random crop of
+    the crop size. If `crop_size` is larger than the input image size, then it pads
+    the right and the bottom of the image to the crop size if `pad` is True, otherwise
+    it returns the smaller image.
+    """
+    def __init__(self, crop_size: Tuple[int], pad: bool = True, pad_value: float = 128.0):
+        """
+        Args:
+            crop_size: target image (height, width).
+            pad: if True, will pad images smaller than `crop_size` up to `crop_size`
+            pad_value: the padding value.
+        """
+        super().__init__()
+        self._init(locals())
+    def _get_crop(self, image: np.ndarray) -> Transform:
+        # Compute the image scale and scaled size.
+        input_size = image.shape[:2]
+        output_size = self.crop_size
+        # Add random crop if the image is scaled up.
+        max_offset = np.subtract(input_size, output_size)
+        max_offset = np.maximum(max_offset, 0)
+        offset = np.multiply(max_offset, np.random.uniform(0.0, 1.0))
+        offset = np.round(offset).astype(int)
+        return CropTransform(
+            offset[1], offset[0], output_size[1], output_size[0], input_size[1], input_size[0]
+        )
+    def _get_pad(self, image: np.ndarray) -> Transform:
+        # Compute the image scale and scaled size.
+        input_size = image.shape[:2]
+        output_size = self.crop_size
+        # Add padding if the image is scaled down.
+        pad_size = np.subtract(output_size, input_size)
+        pad_size = np.maximum(pad_size, 0)
+        original_size = np.minimum(input_size, output_size)
+        return PadTransform(
+            0, 0, pad_size[1], pad_size[0], original_size[1], original_size[0], self.pad_value
+        )
+    def get_transform(self, image: np.ndarray) -> TransformList:
+        transforms = [self._get_crop(image)]
+        if self.pad:
+            transforms.append(self._get_pad(image))
+        return TransformList(transforms)
+class RandomCrop(Augmentation):
+    """
+    Randomly crop a rectangle region out of an image.
+    """
+    def __init__(self, crop_type: str, crop_size):
+        """
+        Args:
+            crop_type (str): one of "relative_range", "relative", "absolute", "absolute_range".
+            crop_size (tuple[float, float]): two floats, explained below.
+        - "relative": crop a (H * crop_size[0], W * crop_size[1]) region from an input image of
+          size (H, W). crop size should be in (0, 1]
+        - "relative_range": uniformly sample two values from [crop_size[0], 1]
+          and [crop_size[1]], 1], and use them as in "relative" crop type.
+        - "absolute" crop a (crop_size[0], crop_size[1]) region from input image.
+          crop_size must be smaller than the input image size.
+        - "absolute_range", for an input of size (H, W), uniformly sample H_crop in
+          [crop_size[0], min(H, crop_size[1])] and W_crop in [crop_size[0], min(W, crop_size[1])].
+          Then crop a region (H_crop, W_crop).
+        """
+        # TODO style of relative_range and absolute_range are not consistent:
+        # one takes (h, w) but another takes (min, max)
+        super().__init__()
+        assert crop_type in ["relative_range", "relative", "absolute", "absolute_range"]
+        self._init(locals())
+    def get_transform(self, image):
+        h, w = image.shape[:2]
+        croph, cropw = self.get_crop_size((h, w))
+        assert h >= croph and w >= cropw, "Shape computation in {} has bugs.".format(self)
+        h0 = np.random.randint(h - croph + 1)
+        w0 = np.random.randint(w - cropw + 1)
+        return CropTransform(w0, h0, cropw, croph)
+    def get_crop_size(self, image_size):
+        """
+        Args:
+            image_size (tuple): height, width
+        Returns:
+            crop_size (tuple): height, width in absolute pixels
+        """
+        h, w = image_size
+        if self.crop_type == "relative":
+            ch, cw = self.crop_size
+            return int(h * ch + 0.5), int(w * cw + 0.5)
+        elif self.crop_type == "relative_range":
+            crop_size = np.asarray(self.crop_size, dtype=np.float32)
+            ch, cw = crop_size + np.random.rand(2) * (1 - crop_size)
+            return int(h * ch + 0.5), int(w * cw + 0.5)
+        elif self.crop_type == "absolute":
+            return (min(self.crop_size[0], h), min(self.crop_size[1], w))
+        elif self.crop_type == "absolute_range":
+            assert self.crop_size[0] <= self.crop_size[1]
+            ch = np.random.randint(min(h, self.crop_size[0]), min(h, self.crop_size[1]) + 1)
+            cw = np.random.randint(min(w, self.crop_size[0]), min(w, self.crop_size[1]) + 1)
+            return ch, cw
+        else:
+            raise NotImplementedError("Unknown crop type {}".format(self.crop_type))
+class RandomCrop_CategoryAreaConstraint(Augmentation):
+    """
+    Similar to :class:`RandomCrop`, but find a cropping window such that no single category
+    occupies a ratio of more than `single_category_max_area` in semantic segmentation ground
+    truth, which can cause unstability in training. The function attempts to find such a valid
+    cropping window for at most 10 times.
+    """
+    def __init__(
+        self,
+        crop_type: str,
+        crop_size,
+        single_category_max_area: float = 1.0,
+        ignored_category: int = None,
+    ):
+        """
+        Args:
+            crop_type, crop_size: same as in :class:`RandomCrop`
+            single_category_max_area: the maximum allowed area ratio of a
+                category. Set to 1.0 to disable
+            ignored_category: allow this category in the semantic segmentation
+                ground truth to exceed the area ratio. Usually set to the category
+                that's ignored in training.
+        """
+        self.crop_aug = RandomCrop(crop_type, crop_size)
+        self._init(locals())
+    def get_transform(self, image, sem_seg):
+        if self.single_category_max_area >= 1.0:
+            return self.crop_aug.get_transform(image)
+        else:
+            h, w = sem_seg.shape
+            for _ in range(10):
+                crop_size = self.crop_aug.get_crop_size((h, w))
+                y0 = np.random.randint(h - crop_size[0] + 1)
+                x0 = np.random.randint(w - crop_size[1] + 1)
+                sem_seg_temp = sem_seg[y0 : y0 + crop_size[0], x0 : x0 + crop_size[1]]
+                labels, cnt = np.unique(sem_seg_temp, return_counts=True)
+                if self.ignored_category is not None:
+                    cnt = cnt[labels != self.ignored_category]
+                if len(cnt) > 1 and np.max(cnt) < np.sum(cnt) * self.single_category_max_area:
+                    break
+            crop_tfm = CropTransform(x0, y0, crop_size[1], crop_size[0])
+            return crop_tfm
+class RandomExtent(Augmentation):
+    """
+    Outputs an image by cropping a random "subrect" of the source image.
+    The subrect can be parameterized to include pixels outside the source image,
+    in which case they will be set to zeros (i.e. black). The size of the output
+    image will vary with the size of the random subrect.
+    """
+    def __init__(self, scale_range, shift_range):
+        """
+        Args:
+            output_size (h, w): Dimensions of output image
+            scale_range (l, h): Range of input-to-output size scaling factor
+            shift_range (x, y): Range of shifts of the cropped subrect. The rect
+                is shifted by [w / 2 * Uniform(-x, x), h / 2 * Uniform(-y, y)],
+                where (w, h) is the (width, height) of the input image. Set each
+                component to zero to crop at the image's center.
+        """
+        super().__init__()
+        self._init(locals())
+    def get_transform(self, image):
+        img_h, img_w = image.shape[:2]
+        # Initialize src_rect to fit the input image.
+        src_rect = np.array([-0.5 * img_w, -0.5 * img_h, 0.5 * img_w, 0.5 * img_h])
+        # Apply a random scaling to the src_rect.
+        src_rect *= np.random.uniform(self.scale_range[0], self.scale_range[1])
+        # Apply a random shift to the coordinates origin.
+        src_rect[0::2] += self.shift_range[0] * img_w * (np.random.rand() - 0.5)
+        src_rect[1::2] += self.shift_range[1] * img_h * (np.random.rand() - 0.5)
+        # Map src_rect coordinates into image coordinates (center at corner).
+        src_rect[0::2] += 0.5 * img_w
+        src_rect[1::2] += 0.5 * img_h
+        return ExtentTransform(
+            src_rect=(src_rect[0], src_rect[1], src_rect[2], src_rect[3]),
+            output_size=(int(src_rect[3] - src_rect[1]), int(src_rect[2] - src_rect[0])),
+        )
+class RandomContrast(Augmentation):
+    """
+    Randomly transforms image contrast.
+    Contrast intensity is uniformly sampled in (intensity_min, intensity_max).
+    - intensity < 1 will reduce contrast
+    - intensity = 1 will preserve the input image
+    - intensity > 1 will increase contrast
+    See: https://pillow.readthedocs.io/en/3.0.x/reference/ImageEnhance.html
+    """
+    def __init__(self, intensity_min, intensity_max):
+        """
+        Args:
+            intensity_min (float): Minimum augmentation
+            intensity_max (float): Maximum augmentation
+        """
+        super().__init__()
+        self._init(locals())
+    def get_transform(self, image):
+        w = np.random.uniform(self.intensity_min, self.intensity_max)
+        return BlendTransform(src_image=image.mean(), src_weight=1 - w, dst_weight=w)
+class RandomBrightness(Augmentation):
+    """
+    Randomly transforms image brightness.
+    Brightness intensity is uniformly sampled in (intensity_min, intensity_max).
+    - intensity < 1 will reduce brightness
+    - intensity = 1 will preserve the input image
+    - intensity > 1 will increase brightness
+    See: https://pillow.readthedocs.io/en/3.0.x/reference/ImageEnhance.html
+    """
+    def __init__(self, intensity_min, intensity_max):
+        """
+        Args:
+            intensity_min (float): Minimum augmentation
+            intensity_max (float): Maximum augmentation
+        """
+        super().__init__()
+        self._init(locals())
+    def get_transform(self, image):
+        w = np.random.uniform(self.intensity_min, self.intensity_max)
+        return BlendTransform(src_image=0, src_weight=1 - w, dst_weight=w)
+class RandomSaturation(Augmentation):
+    """
+    Randomly transforms saturation of an RGB image.
+    Input images are assumed to have 'RGB' channel order.
+    Saturation intensity is uniformly sampled in (intensity_min, intensity_max).
+    - intensity < 1 will reduce saturation (make the image more grayscale)
+    - intensity = 1 will preserve the input image
+    - intensity > 1 will increase saturation
+    See: https://pillow.readthedocs.io/en/3.0.x/reference/ImageEnhance.html
+    """
+    def __init__(self, intensity_min, intensity_max):
+        """
+        Args:
+            intensity_min (float): Minimum augmentation (1 preserves input).
+            intensity_max (float): Maximum augmentation (1 preserves input).
+        """
+        super().__init__()
+        self._init(locals())
+    def get_transform(self, image):
+        assert image.shape[-1] == 3, "RandomSaturation only works on RGB images"
+        w = np.random.uniform(self.intensity_min, self.intensity_max)
+        grayscale = image.dot([0.299, 0.587, 0.114])[:, :, np.newaxis]
+        return BlendTransform(src_image=grayscale, src_weight=1 - w, dst_weight=w)
+class RandomLighting(Augmentation):
+    """
+    The "lighting" augmentation described in AlexNet, using fixed PCA over ImageNet.
+    Input images are assumed to have 'RGB' channel order.
+    The degree of color jittering is randomly sampled via a normal distribution,
+    with standard deviation given by the scale parameter.
+    """
+    def __init__(self, scale):
+        """
+        Args:
+            scale (float): Standard deviation of principal component weighting.
+        """
+        super().__init__()
+        self._init(locals())
+        self.eigen_vecs = np.array(
+            [[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]]
+        )
+        self.eigen_vals = np.array([0.2175, 0.0188, 0.0045])
+    def get_transform(self, image):
+        assert image.shape[-1] == 3, "RandomLighting only works on RGB images"
+        weights = np.random.normal(scale=self.scale, size=3)
+        return BlendTransform(
+            src_image=self.eigen_vecs.dot(weights * self.eigen_vals), src_weight=1.0, dst_weight=1.0
+        )
--- a/detectron2/data/transforms/transform.py
+++ b/detectron2/data/transforms/transform.py
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+"""
+See "Data Augmentation" tutorial for an overview of the system:
+https://detectron2.readthedocs.io/tutorials/augmentation.html
+"""
+import numpy as np
+import torch
+import torch.nn.functional as F
+from fvcore.transforms.transform import (
+    CropTransform,
+    HFlipTransform,
+    NoOpTransform,
+    Transform,
+    TransformList,
+)
+from PIL import Image
+try:
+    import cv2  # noqa
+except ImportError:
+    # OpenCV is an optional dependency at the moment
+    pass
+__all__ = [
+    "ExtentTransform",
+    "ResizeTransform",
+    "RotationTransform",
+    "ColorTransform",
+    "PILColorTransform",
+]
+class ExtentTransform(Transform):
+    """
+    Extracts a subregion from the source image and scales it to the output size.
+    The fill color is used to map pixels from the source rect that fall outside
+    the source image.
+    See: https://pillow.readthedocs.io/en/latest/PIL.html#PIL.ImageTransform.ExtentTransform
+    """
+    def __init__(self, src_rect, output_size, interp=Image.BILINEAR, fill=0):
+        """
+        Args:
+            src_rect (x0, y0, x1, y1): src coordinates
+            output_size (h, w): dst image size
+            interp: PIL interpolation methods
+            fill: Fill color used when src_rect extends outside image
+        """
+        super().__init__()
+        self._set_attributes(locals())
+    def apply_image(self, img, interp=None):
+        h, w = self.output_size
+        if len(img.shape) > 2 and img.shape[2] == 1:
+            pil_image = Image.fromarray(img[:, :, 0], mode="L")
+        else:
+            pil_image = Image.fromarray(img)
+        pil_image = pil_image.transform(
+            size=(w, h),
+            method=Image.EXTENT,
+            data=self.src_rect,
+            resample=interp if interp else self.interp,
+            fill=self.fill,
+        )
+        ret = np.asarray(pil_image)
+        if len(img.shape) > 2 and img.shape[2] == 1:
+            ret = np.expand_dims(ret, -1)
+        return ret
+    def apply_coords(self, coords):
+        # Transform image center from source coordinates into output coordinates
+        # and then map the new origin to the corner of the output image.
+        h, w = self.output_size
+        x0, y0, x1, y1 = self.src_rect
+        new_coords = coords.astype(np.float32)
+        new_coords[:, 0] -= 0.5 * (x0 + x1)
+        new_coords[:, 1] -= 0.5 * (y0 + y1)
+        new_coords[:, 0] *= w / (x1 - x0)
+        new_coords[:, 1] *= h / (y1 - y0)
+        new_coords[:, 0] += 0.5 * w
+        new_coords[:, 1] += 0.5 * h
+        return new_coords
+    def apply_segmentation(self, segmentation):
+        segmentation = self.apply_image(segmentation, interp=Image.NEAREST)
+        return segmentation
+class ResizeTransform(Transform):
+    """
+    Resize the image to a target size.
+    """
+    def __init__(self, h, w, new_h, new_w, interp=None):
+        """
+        Args:
+            h, w (int): original image size
+            new_h, new_w (int): new image size
+            interp: PIL interpolation methods, defaults to bilinear.
+        """
+        # TODO decide on PIL vs opencv
+        super().__init__()
+        if interp is None:
+            interp = Image.BILINEAR
+        self._set_attributes(locals())
+    def apply_image(self, img, interp=None):
+        assert img.shape[:2] == (self.h, self.w)
+        assert len(img.shape) <= 4
+        interp_method = interp if interp is not None else self.interp
+        if img.dtype == np.uint8:
+            if len(img.shape) > 2 and img.shape[2] == 1:
+                pil_image = Image.fromarray(img[:, :, 0], mode="L")
+            else:
+                pil_image = Image.fromarray(img)
+            pil_image = pil_image.resize((self.new_w, self.new_h), interp_method)
+            ret = np.asarray(pil_image)
+            if len(img.shape) > 2 and img.shape[2] == 1:
+                ret = np.expand_dims(ret, -1)
+        else:
+            # PIL only supports uint8
+            if any(x < 0 for x in img.strides):
+                img = np.ascontiguousarray(img)
+            img = torch.from_numpy(img)
+            shape = list(img.shape)
+            shape_4d = shape[:2] + [1] * (4 - len(shape)) + shape[2:]
+            img = img.view(shape_4d).permute(2, 3, 0, 1)  # hw(c) -> nchw
+            _PIL_RESIZE_TO_INTERPOLATE_MODE = {
+                Image.NEAREST: "nearest",
+                Image.BILINEAR: "bilinear",
+                Image.BICUBIC: "bicubic",
+            }
+            mode = _PIL_RESIZE_TO_INTERPOLATE_MODE[interp_method]
+            align_corners = None if mode == "nearest" else False
+            img = F.interpolate(
+                img, (self.new_h, self.new_w), mode=mode, align_corners=align_corners
+            )
+            shape[:2] = (self.new_h, self.new_w)
+            ret = img.permute(2, 3, 0, 1).view(shape).numpy()  # nchw -> hw(c)
+        return ret
+    def apply_coords(self, coords):
+        coords[:, 0] = coords[:, 0] * (self.new_w * 1.0 / self.w)
+        coords[:, 1] = coords[:, 1] * (self.new_h * 1.0 / self.h)
+        return coords
+    def apply_segmentation(self, segmentation):
+        segmentation = self.apply_image(segmentation, interp=Image.NEAREST)
+        return segmentation
+    def inverse(self):
+        return ResizeTransform(self.new_h, self.new_w, self.h, self.w, self.interp)
+class RotationTransform(Transform):
+    """
+    This method returns a copy of this image, rotated the given
+    number of degrees counter clockwise around its center.
+    """
+    def __init__(self, h, w, angle, expand=True, center=None, interp=None):
+        """
+        Args:
+            h, w (int): original image size
+            angle (float): degrees for rotation
+            expand (bool): choose if the image should be resized to fit the whole
+                rotated image (default), or simply cropped
+            center (tuple (width, height)): coordinates of the rotation center
+                if left to None, the center will be fit to the center of each image
+                center has no effect if expand=True because it only affects shifting
+            interp: cv2 interpolation method, default cv2.INTER_LINEAR
+        """
+        super().__init__()
+        image_center = np.array((w / 2, h / 2))
+        if center is None:
+            center = image_center
+        if interp is None:
+            interp = cv2.INTER_LINEAR
+        abs_cos, abs_sin = (abs(np.cos(np.deg2rad(angle))), abs(np.sin(np.deg2rad(angle))))
+        if expand:
+            # find the new width and height bounds
+            bound_w, bound_h = np.rint(
+                [h * abs_sin + w * abs_cos, h * abs_cos + w * abs_sin]
+            ).astype(int)
+        else:
+            bound_w, bound_h = w, h
+        self._set_attributes(locals())
+        self.rm_coords = self.create_rotation_matrix()
+        # Needed because of this problem https://github.com/opencv/opencv/issues/11784
+        self.rm_image = self.create_rotation_matrix(offset=-0.5)
+    def apply_image(self, img, interp=None):
+        """
+        img should be a numpy array, formatted as Height * Width * Nchannels
+        """
+        if len(img) == 0 or self.angle % 360 == 0:
+            return img
+        assert img.shape[:2] == (self.h, self.w)
+        interp = interp if interp is not None else self.interp
+        return cv2.warpAffine(img, self.rm_image, (self.bound_w, self.bound_h), flags=interp)
+    def apply_coords(self, coords):
+        """
+        coords should be a N * 2 array-like, containing N couples of (x, y) points
+        """
+        coords = np.asarray(coords, dtype=float)
+        if len(coords) == 0 or self.angle % 360 == 0:
+            return coords
+        return cv2.transform(coords[:, np.newaxis, :], self.rm_coords)[:, 0, :]
+    def apply_segmentation(self, segmentation):
+        segmentation = self.apply_image(segmentation, interp=cv2.INTER_NEAREST)
+        return segmentation
+    def create_rotation_matrix(self, offset=0):
+        center = (self.center[0] + offset, self.center[1] + offset)
+        rm = cv2.getRotationMatrix2D(tuple(center), self.angle, 1)
+        if self.expand:
+            # Find the coordinates of the center of rotation in the new image
+            # The only point for which we know the future coordinates is the center of the image
+            rot_im_center = cv2.transform(self.image_center[None, None, :] + offset, rm)[0, 0, :]
+            new_center = np.array([self.bound_w / 2, self.bound_h / 2]) + offset - rot_im_center
+            # shift the rotation center to the new coordinates
+            rm[:, 2] += new_center
+        return rm
+    def inverse(self):
+        """
+        The inverse is to rotate it back with expand, and crop to get the original shape.
+        """
+        if not self.expand:  # Not possible to inverse if a part of the image is lost
+            raise NotImplementedError()
+        rotation = RotationTransform(
+            self.bound_h, self.bound_w, -self.angle, True, None, self.interp
+        )
+        crop = CropTransform(
+            (rotation.bound_w - self.w) // 2, (rotation.bound_h - self.h) // 2, self.w, self.h
+        )
+        return TransformList([rotation, crop])
+class ColorTransform(Transform):
+    """
+    Generic wrapper for any photometric transforms.
+    These transformations should only affect the color space and
+        not the coordinate space of the image (e.g. annotation
+        coordinates such as bounding boxes should not be changed)
+    """
+    def __init__(self, op):
+        """
+        Args:
+            op (Callable): operation to be applied to the image,
+                which takes in an ndarray and returns an ndarray.
+        """
+        if not callable(op):
+            raise ValueError("op parameter should be callable")
+        super().__init__()
+        self._set_attributes(locals())
+    def apply_image(self, img):
+        return self.op(img)
+    def apply_coords(self, coords):
+        return coords
+    def inverse(self):
+        return NoOpTransform()
+    def apply_segmentation(self, segmentation):
+        return segmentation
+class PILColorTransform(ColorTransform):
+    """
+    Generic wrapper for PIL Photometric image transforms,
+        which affect the color space and not the coordinate
+        space of the image
+    """
+    def __init__(self, op):
+        """
+        Args:
+            op (Callable): operation to be applied to the image,
+                which takes in a PIL Image and returns a transformed
+                PIL Image.
+                For reference on possible operations see:
+                - https://pillow.readthedocs.io/en/stable/
+        """
+        if not callable(op):
+            raise ValueError("op parameter should be callable")
+        super().__init__(op)
+    def apply_image(self, img):
+        img = Image.fromarray(img)
+        return np.asarray(super().apply_image(img))
+def HFlip_rotated_box(transform, rotated_boxes):
+    """
+    Apply the horizontal flip transform on rotated boxes.
+    Args:
+        rotated_boxes (ndarray): Nx5 floating point array of
+            (x_center, y_center, width, height, angle_degrees) format
+            in absolute coordinates.
+    """
+    # Transform x_center
+    rotated_boxes[:, 0] = transform.width - rotated_boxes[:, 0]
+    # Transform angle
+    rotated_boxes[:, 4] = -rotated_boxes[:, 4]
+    return rotated_boxes
+def Resize_rotated_box(transform, rotated_boxes):
+    """
+    Apply the resizing transform on rotated boxes. For details of how these (approximation)
+    formulas are derived, please refer to :meth:`RotatedBoxes.scale`.
+    Args:
+        rotated_boxes (ndarray): Nx5 floating point array of
+            (x_center, y_center, width, height, angle_degrees) format
+            in absolute coordinates.
+    """
+    scale_factor_x = transform.new_w * 1.0 / transform.w
+    scale_factor_y = transform.new_h * 1.0 / transform.h
+    rotated_boxes[:, 0] *= scale_factor_x
+    rotated_boxes[:, 1] *= scale_factor_y
+    theta = rotated_boxes[:, 4] * np.pi / 180.0
+    c = np.cos(theta)
+    s = np.sin(theta)
+    rotated_boxes[:, 2] *= np.sqrt(np.square(scale_factor_x * c) + np.square(scale_factor_y * s))
+    rotated_boxes[:, 3] *= np.sqrt(np.square(scale_factor_x * s) + np.square(scale_factor_y * c))
+    rotated_boxes[:, 4] = np.arctan2(scale_factor_x * s, scale_factor_y * c) * 180 / np.pi
+    return rotated_boxes
+HFlipTransform.register_type("rotated_box", HFlip_rotated_box)
+ResizeTransform.register_type("rotated_box", Resize_rotated_box)
+# not necessary any more with latest fvcore
+NoOpTransform.register_type("rotated_box", lambda t, x: x)
--- a/detectron2/engine/__init__.py
+++ b/detectron2/engine/__init__.py
+# Copyright (c) Facebook, Inc. and its affiliates.
+from .launch import *
+from .train_loop import *
+__all__ = [k for k in globals().keys() if not k.startswith("_")]
+# prefer to let hooks and defaults live in separate namespaces (therefore not in __all__)
+# but still make them available here
+from .hooks import *
+from .defaults import *
--- a/detectron2/engine/defaults.py
+++ b/detectron2/engine/defaults.py
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+"""
+This file contains components with some default boilerplate logic user may need
+in training / testing. They will not work for everyone, but many users may find them useful.
+The behavior of functions/classes in this file is subject to change,
+since they are meant to represent the "common default behavior" people need in their projects.
+"""
+import argparse
+import logging
+import os
+import sys
+import weakref
+from collections import OrderedDict
+from typing import Optional
+import torch
+from fvcore.nn.precise_bn import get_bn_modules
+from omegaconf import OmegaConf
+from torch.nn.parallel import DistributedDataParallel
+import detectron2.data.transforms as T
+from detectron2.checkpoint import DetectionCheckpointer
+from detectron2.config import CfgNode, LazyConfig
+from detectron2.data import (
+    MetadataCatalog,
+    build_detection_test_loader,
+    build_detection_train_loader,
+)
+from detectron2.evaluation import (
+    DatasetEvaluator,
+    inference_on_dataset,
+    print_csv_format,
+    verify_results,
+)
+from detectron2.modeling import build_model
+from detectron2.solver import build_lr_scheduler, build_optimizer
+from detectron2.utils import comm
+from detectron2.utils.collect_env import collect_env_info
+from detectron2.utils.env import seed_all_rng
+from detectron2.utils.events import CommonMetricPrinter, JSONWriter, TensorboardXWriter
+from detectron2.utils.file_io import PathManager
+from detectron2.utils.logger import setup_logger
+from . import hooks
+from .train_loop import AMPTrainer, SimpleTrainer, TrainerBase
+__all__ = [
+    "create_ddp_model",
+    "default_argument_parser",
+    "default_setup",
+    "default_writers",
+    "DefaultPredictor",
+    "DefaultTrainer",
+]
+def create_ddp_model(model, *, fp16_compression=False, **kwargs):
+    """
+    Create a DistributedDataParallel model if there are >1 processes.
+    Args:
+        model: a torch.nn.Module
+        fp16_compression: add fp16 compression hooks to the ddp object.
+            See more at https://pytorch.org/docs/stable/ddp_comm_hooks.html#torch.distributed.algorithms.ddp_comm_hooks.default_hooks.fp16_compress_hook
+        kwargs: other arguments of :module:`torch.nn.parallel.DistributedDataParallel`.
+    """  # noqa
+    if comm.get_world_size() == 1:
+        return model
+    if "device_ids" not in kwargs:
+        kwargs["device_ids"] = [comm.get_local_rank()]
+    ddp = DistributedDataParallel(model, **kwargs)
+    if fp16_compression:
+        from torch.distributed.algorithms.ddp_comm_hooks import default as comm_hooks
+        ddp.register_comm_hook(state=None, hook=comm_hooks.fp16_compress_hook)
+    return ddp
+def default_argument_parser(epilog=None):
+    """
+    Create a parser with some common arguments used by detectron2 users.
+    Args:
+        epilog (str): epilog passed to ArgumentParser describing the usage.
+    Returns:
+        argparse.ArgumentParser:
+    """
+    parser = argparse.ArgumentParser(
+        epilog=epilog
+        or f"""
+Examples:
+Run on single machine:
+    $ {sys.argv[0]} --num-gpus 8 --config-file cfg.yaml
+Change some config options:
+    $ {sys.argv[0]} --config-file cfg.yaml MODEL.WEIGHTS /path/to/weight.pth SOLVER.BASE_LR 0.001
+Run on multiple machines:
+    (machine0)$ {sys.argv[0]} --machine-rank 0 --num-machines 2 --dist-url <URL> [--other-flags]
+    (machine1)$ {sys.argv[0]} --machine-rank 1 --num-machines 2 --dist-url <URL> [--other-flags]
+""",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    parser.add_argument("--config-file", default="", metavar="FILE", help="path to config file")
+    parser.add_argument(
+        "--resume",
+        action="store_true",
+        help="Whether to attempt to resume from the checkpoint directory. "
+        "See documentation of `DefaultTrainer.resume_or_load()` for what it means.",
+    )
+    parser.add_argument("--eval-only", action="store_true", help="perform evaluation only")
+    parser.add_argument("--num-gpus", type=int, default=1, help="number of gpus *per machine*")
+    parser.add_argument("--num-machines", type=int, default=1, help="total number of machines")
+    parser.add_argument(
+        "--machine-rank", type=int, default=0, help="the rank of this machine (unique per machine)"
+    )
+    # PyTorch still may leave orphan processes in multi-gpu training.
+    # Therefore we use a deterministic way to obtain port,
+    # so that users are aware of orphan processes by seeing the port occupied.
+    port = 2 ** 15 + 2 ** 14 + hash(os.getuid() if sys.platform != "win32" else 1) % 2 ** 14
+    parser.add_argument(
+        "--dist-url",
+        default="tcp://127.0.0.1:{}".format(port),
+        help="initialization URL for pytorch distributed backend. See "
+        "https://pytorch.org/docs/stable/distributed.html for details.",
+    )
+    parser.add_argument(
+        "opts",
+        help="""
+Modify config options at the end of the command. For Yacs configs, use
+space-separated "PATH.KEY VALUE" pairs.
+For python-based LazyConfig, use "path.key=value".
+        """.strip(),
+        default=None,
+        nargs=argparse.REMAINDER,
+    )
+    return parser
+def _try_get_key(cfg, *keys, default=None):
+    """
+    Try select keys from cfg until the first key that exists. Otherwise return default.
+    """
+    if isinstance(cfg, CfgNode):
+        cfg = OmegaConf.create(cfg.dump())
+    for k in keys:
+        none = object()
+        p = OmegaConf.select(cfg, k, default=none)
+        if p is not none:
+            return p
+    return default
+def _highlight(code, filename):
+    try:
+        import pygments
+    except ImportError:
+        return code
+    from pygments.lexers import Python3Lexer, YamlLexer
+    from pygments.formatters import Terminal256Formatter
+    lexer = Python3Lexer() if filename.endswith(".py") else YamlLexer()
+    code = pygments.highlight(code, lexer, Terminal256Formatter(style="monokai"))
+    return code
+def default_setup(cfg, args):
+    """
+    Perform some basic common setups at the beginning of a job, including:
+    1. Set up the detectron2 logger
+    2. Log basic information about environment, cmdline arguments, and config
+    3. Backup the config to the output directory
+    Args:
+        cfg (CfgNode or omegaconf.DictConfig): the full config to be used
+        args (argparse.NameSpace): the command line arguments to be logged
+    """
+    output_dir = _try_get_key(cfg, "OUTPUT_DIR", "output_dir", "train.output_dir")
+    if comm.is_main_process() and output_dir:
+        PathManager.mkdirs(output_dir)
+    rank = comm.get_rank()
+    setup_logger(output_dir, distributed_rank=rank, name="fvcore")
+    logger = setup_logger(output_dir, distributed_rank=rank)
+    logger.info("Rank of current process: {}. World size: {}".format(rank, comm.get_world_size()))
+    logger.info("Environment info:\n" + collect_env_info())
+    logger.info("Command line arguments: " + str(args))
+    if hasattr(args, "config_file") and args.config_file != "":
+        logger.info(
+            "Contents of args.config_file={}:\n{}".format(
+                args.config_file,
+                _highlight(PathManager.open(args.config_file, "r").read(), args.config_file),
+            )
+        )
+    if comm.is_main_process() and output_dir:
+        # Note: some of our scripts may expect the existence of
+        # config.yaml in output directory
+        path = os.path.join(output_dir, "config.yaml")
+        if isinstance(cfg, CfgNode):
+            logger.info("Running with full config:\n{}".format(_highlight(cfg.dump(), ".yaml")))
+            with PathManager.open(path, "w") as f:
+                f.write(cfg.dump())
+        else:
+            LazyConfig.save(cfg, path)
+        logger.info("Full config saved to {}".format(path))
+    # make sure each worker has a different, yet deterministic seed if specified
+    seed = _try_get_key(cfg, "SEED", "train.seed", default=-1)
+    seed_all_rng(None if seed < 0 else seed + rank)
+    # cudnn benchmark has large overhead. It shouldn't be used considering the small size of
+    # typical validation set.
+    if not (hasattr(args, "eval_only") and args.eval_only):
+        torch.backends.cudnn.benchmark = _try_get_key(
+            cfg, "CUDNN_BENCHMARK", "train.cudnn_benchmark", default=False
+        )
+def default_writers(output_dir: str, max_iter: Optional[int] = None):
+    """
+    Build a list of :class:`EventWriter` to be used.
+    It now consists of a :class:`CommonMetricPrinter`,
+    :class:`TensorboardXWriter` and :class:`JSONWriter`.
+    Args:
+        output_dir: directory to store JSON metrics and tensorboard events
+        max_iter: the total number of iterations
+    Returns:
+        list[EventWriter]: a list of :class:`EventWriter` objects.
+    """
+    PathManager.mkdirs(output_dir)
+    return [
+        # It may not always print what you want to see, since it prints "common" metrics only.
+        CommonMetricPrinter(max_iter),
+        JSONWriter(os.path.join(output_dir, "metrics.json")),
+        TensorboardXWriter(output_dir),
+    ]
+class DefaultPredictor:
+    """
+    Create a simple end-to-end predictor with the given config that runs on
+    single device for a single input image.
+    Compared to using the model directly, this class does the following additions:
+    1. Load checkpoint from `cfg.MODEL.WEIGHTS`.
+    2. Always take BGR image as the input and apply conversion defined by `cfg.INPUT.FORMAT`.
+    3. Apply resizing defined by `cfg.INPUT.{MIN,MAX}_SIZE_TEST`.
+    4. Take one input image and produce a single output, instead of a batch.
+    This is meant for simple demo purposes, so it does the above steps automatically.
+    This is not meant for benchmarks or running complicated inference logic.
+    If you'd like to do anything more complicated, please refer to its source code as
+    examples to build and use the model manually.
+    Attributes:
+        metadata (Metadata): the metadata of the underlying dataset, obtained from
+            cfg.DATASETS.TEST.
+    Examples:
+    ::
+        pred = DefaultPredictor(cfg)
+        inputs = cv2.imread("input.jpg")
+        outputs = pred(inputs)
+    """
+    def __init__(self, cfg):
+        self.cfg = cfg.clone()  # cfg can be modified by model
+        self.model = build_model(self.cfg)
+        self.model.eval()
+        if len(cfg.DATASETS.TEST):
+            self.metadata = MetadataCatalog.get(cfg.DATASETS.TEST[0])
+        checkpointer = DetectionCheckpointer(self.model)
+        checkpointer.load(cfg.MODEL.WEIGHTS)
+        self.aug = T.ResizeShortestEdge(
+            [cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MIN_SIZE_TEST], cfg.INPUT.MAX_SIZE_TEST
+        )
+        self.input_format = cfg.INPUT.FORMAT
+        assert self.input_format in ["RGB", "BGR"], self.input_format
+    def __call__(self, original_image):
+        """
+        Args:
+            original_image (np.ndarray): an image of shape (H, W, C) (in BGR order).
+        Returns:
+            predictions (dict):
+                the output of the model for one image only.
+                See :doc:`/tutorials/models` for details about the format.
+        """
+        with torch.no_grad():  # https://github.com/sphinx-doc/sphinx/issues/4258
+            # Apply pre-processing to image.
+            if self.input_format == "RGB":
+                # whether the model expects BGR inputs or RGB
+                original_image = original_image[:, :, ::-1]
+            height, width = original_image.shape[:2]
+            image = self.aug.get_transform(original_image).apply_image(original_image)
+            image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))
+            inputs = {"image": image, "height": height, "width": width}
+            predictions = self.model([inputs])[0]
+            return predictions
+class DefaultTrainer(TrainerBase):
+    """
+    A trainer with default training logic. It does the following:
+    1. Create a :class:`SimpleTrainer` using model, optimizer, dataloader
+       defined by the given config. Create a LR scheduler defined by the config.
+    2. Load the last checkpoint or `cfg.MODEL.WEIGHTS`, if exists, when
+       `resume_or_load` is called.
+    3. Register a few common hooks defined by the config.
+    It is created to simplify the **standard model training workflow** and reduce code boilerplate
+    for users who only need the standard training workflow, with standard features.
+    It means this class makes *many assumptions* about your training logic that
+    may easily become invalid in a new research. In fact, any assumptions beyond those made in the
+    :class:`SimpleTrainer` are too much for research.
+    The code of this class has been annotated about restrictive assumptions it makes.
+    When they do not work for you, you're encouraged to:
+    1. Overwrite methods of this class, OR:
+    2. Use :class:`SimpleTrainer`, which only does minimal SGD training and
+       nothing else. You can then add your own hooks if needed. OR:
+    3. Write your own training loop similar to `tools/plain_train_net.py`.
+    See the :doc:`/tutorials/training` tutorials for more details.
+    Note that the behavior of this class, like other functions/classes in
+    this file, is not stable, since it is meant to represent the "common default behavior".
+    It is only guaranteed to work well with the standard models and training workflow in detectron2.
+    To obtain more stable behavior, write your own training logic with other public APIs.
+    Examples:
+    ::
+        trainer = DefaultTrainer(cfg)
+        trainer.resume_or_load()  # load last checkpoint or MODEL.WEIGHTS
+        trainer.train()
+    Attributes:
+        scheduler:
+        checkpointer (DetectionCheckpointer):
+        cfg (CfgNode):
+    """
+    def __init__(self, cfg):
+        """
+        Args:
+            cfg (CfgNode):
+        """
+        super().__init__()
+        logger = logging.getLogger("detectron2")
+        if not logger.isEnabledFor(logging.INFO):  # setup_logger is not called for d2
+            setup_logger()
+        cfg = DefaultTrainer.auto_scale_workers(cfg, comm.get_world_size())
+        # Assume these objects must be constructed in this order.
+        model = self.build_model(cfg)
+        optimizer = self.build_optimizer(cfg, model)
+        data_loader = self.build_train_loader(cfg)
+        model = create_ddp_model(model, broadcast_buffers=False)
+        self._trainer = (AMPTrainer if cfg.SOLVER.AMP.ENABLED else SimpleTrainer)(
+            model, data_loader, optimizer
+        )
+        self.scheduler = self.build_lr_scheduler(cfg, optimizer)
+        self.checkpointer = DetectionCheckpointer(
+            # Assume you want to save checkpoints together with logs/statistics
+            model,
+            cfg.OUTPUT_DIR,
+            trainer=weakref.proxy(self),
+        )
+        self.start_iter = 0
+        self.max_iter = cfg.SOLVER.MAX_ITER
+        self.cfg = cfg
+        self.register_hooks(self.build_hooks())
+    def resume_or_load(self, resume=True):
+        """
+        If `resume==True` and `cfg.OUTPUT_DIR` contains the last checkpoint (defined by
+        a `last_checkpoint` file), resume from the file. Resuming means loading all
+        available states (eg. optimizer and scheduler) and update iteration counter
+        from the checkpoint. ``cfg.MODEL.WEIGHTS`` will not be used.
+        Otherwise, this is considered as an independent training. The method will load model
+        weights from the file `cfg.MODEL.WEIGHTS` (but will not load other states) and start
+        from iteration 0.
+        Args:
+            resume (bool): whether to do resume or not
+        """
+        self.checkpointer.resume_or_load(self.cfg.MODEL.WEIGHTS, resume=resume)
+        if resume and self.checkpointer.has_checkpoint():
+            # The checkpoint stores the training iteration that just finished, thus we start
+            # at the next iteration
+            self.start_iter = self.iter + 1
+    def build_hooks(self):
+        """
+        Build a list of default hooks, including timing, evaluation,
+        checkpointing, lr scheduling, precise BN, writing events.
+        Returns:
+            list[HookBase]:
+        """
+        cfg = self.cfg.clone()
+        cfg.defrost()
+        cfg.DATALOADER.NUM_WORKERS = 0  # save some memory and time for PreciseBN
+        ret = [
+            hooks.IterationTimer(),
+            hooks.LRScheduler(),
+            hooks.PreciseBN(
+                # Run at the same freq as (but before) evaluation.
+                cfg.TEST.EVAL_PERIOD,
+                self.model,
+                # Build a new data loader to not affect training
+                self.build_train_loader(cfg),
+                cfg.TEST.PRECISE_BN.NUM_ITER,
+            )
+            if cfg.TEST.PRECISE_BN.ENABLED and get_bn_modules(self.model)
+            else None,
+        ]
+        # Do PreciseBN before checkpointer, because it updates the model and need to
+        # be saved by checkpointer.
+        # This is not always the best: if checkpointing has a different frequency,
+        # some checkpoints may have more precise statistics than others.
+        if comm.is_main_process():
+            ret.append(hooks.PeriodicCheckpointer(self.checkpointer, cfg.SOLVER.CHECKPOINT_PERIOD))
+        def test_and_save_results():
+            self._last_eval_results = self.test(self.cfg, self.model)
+            return self._last_eval_results
+        # Do evaluation after checkpointer, because then if it fails,
+        # we can use the saved checkpoint to debug.
+        ret.append(hooks.EvalHook(cfg.TEST.EVAL_PERIOD, test_and_save_results))
+        if comm.is_main_process():
+            # Here the default print/log frequency of each writer is used.
+            # run writers in the end, so that evaluation metrics are written
+            ret.append(hooks.PeriodicWriter(self.build_writers(), period=20))
+        return ret
+    def build_writers(self):
+        """
+        Build a list of writers to be used using :func:`default_writers()`.
+        If you'd like a different list of writers, you can overwrite it in
+        your trainer.
+        Returns:
+            list[EventWriter]: a list of :class:`EventWriter` objects.
+        """
+        return default_writers(self.cfg.OUTPUT_DIR, self.max_iter)
+    def train(self):
+        """
+        Run training.
+        Returns:
+            OrderedDict of results, if evaluation is enabled. Otherwise None.
+        """
+        super().train(self.start_iter, self.max_iter)
+        if len(self.cfg.TEST.EXPECTED_RESULTS) and comm.is_main_process():
+            assert hasattr(
+                self, "_last_eval_results"
+            ), "No evaluation results obtained during training!"
+            verify_results(self.cfg, self._last_eval_results)
+            return self._last_eval_results
+    def run_step(self):
+        self._trainer.iter = self.iter
+        self._trainer.run_step()
+    @classmethod
+    def build_model(cls, cfg):
+        """
+        Returns:
+            torch.nn.Module:
+        It now calls :func:`detectron2.modeling.build_model`.
+        Overwrite it if you'd like a different model.
+        """
+        model = build_model(cfg)
+        logger = logging.getLogger(__name__)
+        logger.info("Model:\n{}".format(model))
+        return model
+    @classmethod
+    def build_optimizer(cls, cfg, model):
+        """
+        Returns:
+            torch.optim.Optimizer:
+        It now calls :func:`detectron2.solver.build_optimizer`.
+        Overwrite it if you'd like a different optimizer.
+        """
+        return build_optimizer(cfg, model)
+    @classmethod
+    def build_lr_scheduler(cls, cfg, optimizer):
+        """
+        It now calls :func:`detectron2.solver.build_lr_scheduler`.
+        Overwrite it if you'd like a different scheduler.
+        """
+        return build_lr_scheduler(cfg, optimizer)
+    @classmethod
+    def build_train_loader(cls, cfg):
+        """
+        Returns:
+            iterable
+        It now calls :func:`detectron2.data.build_detection_train_loader`.
+        Overwrite it if you'd like a different data loader.
+        """
+        return build_detection_train_loader(cfg)
+    @classmethod
+    def build_test_loader(cls, cfg, dataset_name):
+        """
+        Returns:
+            iterable
+        It now calls :func:`detectron2.data.build_detection_test_loader`.
+        Overwrite it if you'd like a different data loader.
+        """
+        return build_detection_test_loader(cfg, dataset_name)
+    @classmethod
+    def build_evaluator(cls, cfg, dataset_name):
+        """
+        Returns:
+            DatasetEvaluator or None
+        It is not implemented by default.
+        """
+        raise NotImplementedError(
+            """
+If you want DefaultTrainer to automatically run evaluation,
+please implement `build_evaluator()` in subclasses (see train_net.py for example).
+Alternatively, you can call evaluation functions yourself (see Colab balloon tutorial for example).
+"""
+        )
+    @classmethod
+    def test(cls, cfg, model, evaluators=None):
+        """
+        Evaluate the given model. The given model is expected to already contain
+        weights to evaluate.
+        Args:
+            cfg (CfgNode):
+            model (nn.Module):
+            evaluators (list[DatasetEvaluator] or None): if None, will call
+                :meth:`build_evaluator`. Otherwise, must have the same length as
+                ``cfg.DATASETS.TEST``.
+        Returns:
+            dict: a dict of result metrics
+        """
+        logger = logging.getLogger(__name__)
+        if isinstance(evaluators, DatasetEvaluator):
+            evaluators = [evaluators]
+        if evaluators is not None:
+            assert len(cfg.DATASETS.TEST) == len(evaluators), "{} != {}".format(
+                len(cfg.DATASETS.TEST), len(evaluators)
+            )
+        results = OrderedDict()
+        for idx, dataset_name in enumerate(cfg.DATASETS.TEST):
+            data_loader = cls.build_test_loader(cfg, dataset_name)
+            # When evaluators are passed in as arguments,
+            # implicitly assume that evaluators can be created before data_loader.
+            if evaluators is not None:
+                evaluator = evaluators[idx]
+            else:
+                try:
+                    evaluator = cls.build_evaluator(cfg, dataset_name)
+                except NotImplementedError:
+                    logger.warn(
+                        "No evaluator found. Use `DefaultTrainer.test(evaluators=)`, "
+                        "or implement its `build_evaluator` method."
+                    )
+                    results[dataset_name] = {}
+                    continue
+            results_i = inference_on_dataset(model, data_loader, evaluator)
+            results[dataset_name] = results_i
+            if comm.is_main_process():
+                assert isinstance(
+                    results_i, dict
+                ), "Evaluator must return a dict on the main process. Got {} instead.".format(
+                    results_i
+                )
+                logger.info("Evaluation results for {} in csv format:".format(dataset_name))
+                print_csv_format(results_i)
+        if len(results) == 1:
+            results = list(results.values())[0]
+        return results
+    @staticmethod
+    def auto_scale_workers(cfg, num_workers: int):
+        """
+        When the config is defined for certain number of workers (according to
+        ``cfg.SOLVER.REFERENCE_WORLD_SIZE``) that's different from the number of
+        workers currently in use, returns a new cfg where the total batch size
+        is scaled so that the per-GPU batch size stays the same as the
+        original ``IMS_PER_BATCH // REFERENCE_WORLD_SIZE``.
+        Other config options are also scaled accordingly:
+        * training steps and warmup steps are scaled inverse proportionally.
+        * learning rate are scaled proportionally, following :paper:`ImageNet in 1h`.
+        For example, with the original config like the following:
+        .. code-block:: yaml
+            IMS_PER_BATCH: 16
+            BASE_LR: 0.1
+            REFERENCE_WORLD_SIZE: 8
+            MAX_ITER: 5000
+            STEPS: (4000,)
+            CHECKPOINT_PERIOD: 1000
+        When this config is used on 16 GPUs instead of the reference number 8,
+        calling this method will return a new config with:
+        .. code-block:: yaml
+            IMS_PER_BATCH: 32
+            BASE_LR: 0.2
+            REFERENCE_WORLD_SIZE: 16
+            MAX_ITER: 2500
+            STEPS: (2000,)
+            CHECKPOINT_PERIOD: 500
+        Note that both the original config and this new config can be trained on 16 GPUs.
+        It's up to user whether to enable this feature (by setting ``REFERENCE_WORLD_SIZE``).
+        Returns:
+            CfgNode: a new config. Same as original if ``cfg.SOLVER.REFERENCE_WORLD_SIZE==0``.
+        """
+        old_world_size = cfg.SOLVER.REFERENCE_WORLD_SIZE
+        if old_world_size == 0 or old_world_size == num_workers:
+            return cfg
+        cfg = cfg.clone()
+        frozen = cfg.is_frozen()
+        cfg.defrost()
+        assert (
+            cfg.SOLVER.IMS_PER_BATCH % old_world_size == 0
+        ), "Invalid REFERENCE_WORLD_SIZE in config!"
+        scale = num_workers / old_world_size
+        bs = cfg.SOLVER.IMS_PER_BATCH = int(round(cfg.SOLVER.IMS_PER_BATCH * scale))
+        lr = cfg.SOLVER.BASE_LR = cfg.SOLVER.BASE_LR * scale
+        max_iter = cfg.SOLVER.MAX_ITER = int(round(cfg.SOLVER.MAX_ITER / scale))
+        warmup_iter = cfg.SOLVER.WARMUP_ITERS = int(round(cfg.SOLVER.WARMUP_ITERS / scale))
+        cfg.SOLVER.STEPS = tuple(int(round(s / scale)) for s in cfg.SOLVER.STEPS)
+        cfg.TEST.EVAL_PERIOD = int(round(cfg.TEST.EVAL_PERIOD / scale))
+        cfg.SOLVER.CHECKPOINT_PERIOD = int(round(cfg.SOLVER.CHECKPOINT_PERIOD / scale))
+        cfg.SOLVER.REFERENCE_WORLD_SIZE = num_workers  # maintain invariant
+        logger = logging.getLogger(__name__)
+        logger.info(
+            f"Auto-scaling the config to batch_size={bs}, learning_rate={lr}, "
+            f"max_iter={max_iter}, warmup={warmup_iter}."
+        )
+        if frozen:
+            cfg.freeze()
+        return cfg
+# Access basic attributes from the underlying trainer
+for _attr in ["model", "data_loader", "optimizer"]:
+    setattr(
+        DefaultTrainer,
+        _attr,
+        property(
+            # getter
+            lambda self, x=_attr: getattr(self._trainer, x),
+            # setter
+            lambda self, value, x=_attr: setattr(self._trainer, x, value),
+        ),
+    )
--- a/detectron2/engine/hooks.py
+++ b/detectron2/engine/hooks.py
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+import datetime
+import itertools
+import logging
+import math
+import operator
+import os
+import tempfile
+import time
+import warnings
+from collections import Counter
+import torch
+from fvcore.common.checkpoint import Checkpointer
+from fvcore.common.checkpoint import PeriodicCheckpointer as _PeriodicCheckpointer
+from fvcore.common.param_scheduler import ParamScheduler
+from fvcore.common.timer import Timer
+from fvcore.nn.precise_bn import get_bn_modules, update_bn_stats
+import detectron2.utils.comm as comm
+from detectron2.evaluation.testing import flatten_results_dict
+from detectron2.solver import LRMultiplier
+from detectron2.utils.events import EventStorage, EventWriter
+from detectron2.utils.file_io import PathManager
+from .train_loop import HookBase
+__all__ = [
+    "CallbackHook",
+    "IterationTimer",
+    "PeriodicWriter",
+    "PeriodicCheckpointer",
+    "BestCheckpointer",
+    "LRScheduler",
+    "AutogradProfiler",
+    "EvalHook",
+    "PreciseBN",
+    "TorchProfiler",
+    "TorchMemoryStats",
+]
+"""
+Implement some common hooks.
+"""
+class CallbackHook(HookBase):
+    """
+    Create a hook using callback functions provided by the user.
+    """
+    def __init__(self, *, before_train=None, after_train=None, before_step=None, after_step=None):
+        """
+        Each argument is a function that takes one argument: the trainer.
+        """
+        self._before_train = before_train
+        self._before_step = before_step
+        self._after_step = after_step
+        self._after_train = after_train
+    def before_train(self):
+        if self._before_train:
+            self._before_train(self.trainer)
+    def after_train(self):
+        if self._after_train:
+            self._after_train(self.trainer)
+        # The functions may be closures that hold reference to the trainer
+        # Therefore, delete them to avoid circular reference.
+        del self._before_train, self._after_train
+        del self._before_step, self._after_step
+    def before_step(self):
+        if self._before_step:
+            self._before_step(self.trainer)
+    def after_step(self):
+        if self._after_step:
+            self._after_step(self.trainer)
+class IterationTimer(HookBase):
+    """
+    Track the time spent for each iteration (each run_step call in the trainer).
+    Print a summary in the end of training.
+    This hook uses the time between the call to its :meth:`before_step`
+    and :meth:`after_step` methods.
+    Under the convention that :meth:`before_step` of all hooks should only
+    take negligible amount of time, the :class:`IterationTimer` hook should be
+    placed at the beginning of the list of hooks to obtain accurate timing.
+    """
+    def __init__(self, warmup_iter=3):
+        """
+        Args:
+            warmup_iter (int): the number of iterations at the beginning to exclude
+                from timing.
+        """
+        self._warmup_iter = warmup_iter
+        self._step_timer = Timer()
+        self._start_time = time.perf_counter()
+        self._total_timer = Timer()
+    def before_train(self):
+        self._start_time = time.perf_counter()
+        self._total_timer.reset()
+        self._total_timer.pause()
+    def after_train(self):
+        logger = logging.getLogger(__name__)
+        total_time = time.perf_counter() - self._start_time
+        total_time_minus_hooks = self._total_timer.seconds()
+        hook_time = total_time - total_time_minus_hooks
+        num_iter = self.trainer.storage.iter + 1 - self.trainer.start_iter - self._warmup_iter
+        if num_iter > 0 and total_time_minus_hooks > 0:
+            # Speed is meaningful only after warmup
+            # NOTE this format is parsed by grep in some scripts
+            logger.info(
+                "Overall training speed: {} iterations in {} ({:.4f} s / it)".format(
+                    num_iter,
+                    str(datetime.timedelta(seconds=int(total_time_minus_hooks))),
+                    total_time_minus_hooks / num_iter,
+                )
+            )
+        logger.info(
+            "Total training time: {} ({} on hooks)".format(
+                str(datetime.timedelta(seconds=int(total_time))),
+                str(datetime.timedelta(seconds=int(hook_time))),
+            )
+        )
+    def before_step(self):
+        self._step_timer.reset()
+        self._total_timer.resume()
+    def after_step(self):
+        # +1 because we're in after_step, the current step is done
+        # but not yet counted
+        iter_done = self.trainer.storage.iter - self.trainer.start_iter + 1
+        if iter_done >= self._warmup_iter:
+            sec = self._step_timer.seconds()
+            self.trainer.storage.put_scalars(time=sec)
+        else:
+            self._start_time = time.perf_counter()
+            self._total_timer.reset()
+        self._total_timer.pause()
+class PeriodicWriter(HookBase):
+    """
+    Write events to EventStorage (by calling ``writer.write()``) periodically.
+    It is executed every ``period`` iterations and after the last iteration.
+    Note that ``period`` does not affect how data is smoothed by each writer.
+    """
+    def __init__(self, writers, period=20):
+        """
+        Args:
+            writers (list[EventWriter]): a list of EventWriter objects
+            period (int):
+        """
+        self._writers = writers
+        for w in writers:
+            assert isinstance(w, EventWriter), w
+        self._period = period
+    def after_step(self):
+        if (self.trainer.iter + 1) % self._period == 0 or (
+            self.trainer.iter == self.trainer.max_iter - 1
+        ):
+            for writer in self._writers:
+                writer.write()
+    def after_train(self):
+        for writer in self._writers:
+            # If any new data is found (e.g. produced by other after_train),
+            # write them before closing
+            writer.write()
+            writer.close()
+class PeriodicCheckpointer(_PeriodicCheckpointer, HookBase):
+    """
+    Same as :class:`detectron2.checkpoint.PeriodicCheckpointer`, but as a hook.
+    Note that when used as a hook,
+    it is unable to save additional data other than what's defined
+    by the given `checkpointer`.
+    It is executed every ``period`` iterations and after the last iteration.
+    """
+    def before_train(self):
+        self.max_iter = self.trainer.max_iter
+    def after_step(self):
+        # No way to use **kwargs
+        self.step(self.trainer.iter)
+class BestCheckpointer(HookBase):
+    """
+    Checkpoints best weights based off given metric.
+    This hook should be used in conjunction to and executed after the hook
+    that produces the metric, e.g. `EvalHook`.
+    """
+    def __init__(
+        self,
+        eval_period: int,
+        checkpointer: Checkpointer,
+        val_metric: str,
+        mode: str = "max",
+        file_prefix: str = "model_best",
+    ) -> None:
+        """
+        Args:
+            eval_period (int): the period `EvalHook` is set to run.
+            checkpointer: the checkpointer object used to save checkpoints.
+            val_metric (str): validation metric to track for best checkpoint, e.g. "bbox/AP50"
+            mode (str): one of {'max', 'min'}. controls whether the chosen val metric should be
+                maximized or minimized, e.g. for "bbox/AP50" it should be "max"
+            file_prefix (str): the prefix of checkpoint's filename, defaults to "model_best"
+        """
+        self._logger = logging.getLogger(__name__)
+        self._period = eval_period
+        self._val_metric = val_metric
+        assert mode in [
+            "max",
+            "min",
+        ], f'Mode "{mode}" to `BestCheckpointer` is unknown. It should be one of {"max", "min"}.'
+        if mode == "max":
+            self._compare = operator.gt
+        else:
+            self._compare = operator.lt
+        self._checkpointer = checkpointer
+        self._file_prefix = file_prefix
+        self.best_metric = None
+        self.best_iter = None
+    def _update_best(self, val, iteration):
+        if math.isnan(val) or math.isinf(val):
+            return False
+        self.best_metric = val
+        self.best_iter = iteration
+        return True
+    def _best_checking(self):
+        metric_tuple = self.trainer.storage.latest().get(self._val_metric)
+        if metric_tuple is None:
+            self._logger.warning(
+                f"Given val metric {self._val_metric} does not seem to be computed/stored."
+                "Will not be checkpointing based on it."
+            )
+            return
+        else:
+            latest_metric, metric_iter = metric_tuple
+        if self.best_metric is None:
+            if self._update_best(latest_metric, metric_iter):
+                additional_state = {"iteration": metric_iter}
+                self._checkpointer.save(f"{self._file_prefix}", **additional_state)
+                self._logger.info(
+                    f"Saved first model at {self.best_metric:0.5f} @ {self.best_iter} steps"
+                )
+        elif self._compare(latest_metric, self.best_metric):
+            additional_state = {"iteration": metric_iter}
+            self._checkpointer.save(f"{self._file_prefix}", **additional_state)
+            self._logger.info(
+                f"Saved best model as latest eval score for {self._val_metric} is"
+                f"{latest_metric:0.5f}, better than last best score "
+                f"{self.best_metric:0.5f} @ iteration {self.best_iter}."
+            )
+            self._update_best(latest_metric, metric_iter)
+        else:
+            self._logger.info(
+                f"Not saving as latest eval score for {self._val_metric} is {latest_metric:0.5f}, "
+                f"not better than best score {self.best_metric:0.5f} @ iteration {self.best_iter}."
+            )
+    def after_step(self):
+        # same conditions as `EvalHook`
+        next_iter = self.trainer.iter + 1
+        if (
+            self._period > 0
+            and next_iter % self._period == 0
+            and next_iter != self.trainer.max_iter
+        ):
+            self._best_checking()
+    def after_train(self):
+        # same conditions as `EvalHook`
+        if self.trainer.iter + 1 >= self.trainer.max_iter:
+            self._best_checking()
+class LRScheduler(HookBase):
+    """
+    A hook which executes a torch builtin LR scheduler and summarizes the LR.
+    It is executed after every iteration.
+    """
+    def __init__(self, optimizer=None, scheduler=None):
+        """
+        Args:
+            optimizer (torch.optim.Optimizer):
+            scheduler (torch.optim.LRScheduler or fvcore.common.param_scheduler.ParamScheduler):
+                if a :class:`ParamScheduler` object, it defines the multiplier over the base LR
+                in the optimizer.
+        If any argument is not given, will try to obtain it from the trainer.
+        """
+        self._optimizer = optimizer
+        self._scheduler = scheduler
+    def before_train(self):
+        self._optimizer = self._optimizer or self.trainer.optimizer
+        if isinstance(self.scheduler, ParamScheduler):
+            self._scheduler = LRMultiplier(
+                self._optimizer,
+                self.scheduler,
+                self.trainer.max_iter,
+                last_iter=self.trainer.iter - 1,
+            )
+        self._best_param_group_id = LRScheduler.get_best_param_group_id(self._optimizer)
+    @staticmethod
+    def get_best_param_group_id(optimizer):
+        # NOTE: some heuristics on what LR to summarize
+        # summarize the param group with most parameters
+        largest_group = max(len(g["params"]) for g in optimizer.param_groups)
+        if largest_group == 1:
+            # If all groups have one parameter,
+            # then find the most common initial LR, and use it for summary
+            lr_count = Counter([g["lr"] for g in optimizer.param_groups])
+            lr = lr_count.most_common()[0][0]
+            for i, g in enumerate(optimizer.param_groups):
+                if g["lr"] == lr:
+                    return i
+        else:
+            for i, g in enumerate(optimizer.param_groups):
+                if len(g["params"]) == largest_group:
+                    return i
+    def after_step(self):
+        lr = self._optimizer.param_groups[self._best_param_group_id]["lr"]
+        self.trainer.storage.put_scalar("lr", lr, smoothing_hint=False)
+        self.scheduler.step()
+    @property
+    def scheduler(self):
+        return self._scheduler or self.trainer.scheduler
+    def state_dict(self):
+        if isinstance(self.scheduler, torch.optim.lr_scheduler._LRScheduler):
+            return self.scheduler.state_dict()
+        return {}
+    def load_state_dict(self, state_dict):
+        if isinstance(self.scheduler, torch.optim.lr_scheduler._LRScheduler):
+            logger = logging.getLogger(__name__)
+            logger.info("Loading scheduler from state_dict ...")
+            self.scheduler.load_state_dict(state_dict)
+class TorchProfiler(HookBase):
+    """
+    A hook which runs `torch.profiler.profile`.
+    Examples:
+    ::
+        hooks.TorchProfiler(
+             lambda trainer: 10 < trainer.iter < 20, self.cfg.OUTPUT_DIR
+        )
+    The above example will run the profiler for iteration 10~20 and dump
+    results to ``OUTPUT_DIR``. We did not profile the first few iterations
+    because they are typically slower than the rest.
+    The result files can be loaded in the ``chrome://tracing`` page in chrome browser,
+    and the tensorboard visualizations can be visualized using
+    ``tensorboard --logdir OUTPUT_DIR/log``
+    """
+    def __init__(self, enable_predicate, output_dir, *, activities=None, save_tensorboard=True):
+        """
+        Args:
+            enable_predicate (callable[trainer -> bool]): a function which takes a trainer,
+                and returns whether to enable the profiler.
+                It will be called once every step, and can be used to select which steps to profile.
+            output_dir (str): the output directory to dump tracing files.
+            activities (iterable): same as in `torch.profiler.profile`.
+            save_tensorboard (bool): whether to save tensorboard visualizations at (output_dir)/log/
+        """
+        self._enable_predicate = enable_predicate
+        self._activities = activities
+        self._output_dir = output_dir
+        self._save_tensorboard = save_tensorboard
+    def before_step(self):
+        if self._enable_predicate(self.trainer):
+            if self._save_tensorboard:
+                on_trace_ready = torch.profiler.tensorboard_trace_handler(
+                    os.path.join(
+                        self._output_dir,
+                        "log",
+                        "profiler-tensorboard-iter{}".format(self.trainer.iter),
+                    ),
+                    f"worker{comm.get_rank()}",
+                )
+            else:
+                on_trace_ready = None
+            self._profiler = torch.profiler.profile(
+                activities=self._activities,
+                on_trace_ready=on_trace_ready,
+                record_shapes=True,
+                profile_memory=True,
+                with_stack=True,
+                with_flops=True,
+            )
+            self._profiler.__enter__()
+        else:
+            self._profiler = None
+    def after_step(self):
+        if self._profiler is None:
+            return
+        self._profiler.__exit__(None, None, None)
+        if not self._save_tensorboard:
+            PathManager.mkdirs(self._output_dir)
+            out_file = os.path.join(
+                self._output_dir, "profiler-trace-iter{}.json".format(self.trainer.iter)
+            )
+            if "://" not in out_file:
+                self._profiler.export_chrome_trace(out_file)
+            else:
+                # Support non-posix filesystems
+                with tempfile.TemporaryDirectory(prefix="detectron2_profiler") as d:
+                    tmp_file = os.path.join(d, "tmp.json")
+                    self._profiler.export_chrome_trace(tmp_file)
+                    with open(tmp_file) as f:
+                        content = f.read()
+                with PathManager.open(out_file, "w") as f:
+                    f.write(content)
+class AutogradProfiler(TorchProfiler):
+    """
+    A hook which runs `torch.autograd.profiler.profile`.
+    Examples:
+    ::
+        hooks.AutogradProfiler(
+             lambda trainer: 10 < trainer.iter < 20, self.cfg.OUTPUT_DIR
+        )
+    The above example will run the profiler for iteration 10~20 and dump
+    results to ``OUTPUT_DIR``. We did not profile the first few iterations
+    because they are typically slower than the rest.
+    The result files can be loaded in the ``chrome://tracing`` page in chrome browser.
+    Note:
+        When used together with NCCL on older version of GPUs,
+        autograd profiler may cause deadlock because it unnecessarily allocates
+        memory on every device it sees. The memory management calls, if
+        interleaved with NCCL calls, lead to deadlock on GPUs that do not
+        support ``cudaLaunchCooperativeKernelMultiDevice``.
+    """
+    def __init__(self, enable_predicate, output_dir, *, use_cuda=True):
+        """
+        Args:
+            enable_predicate (callable[trainer -> bool]): a function which takes a trainer,
+                and returns whether to enable the profiler.
+                It will be called once every step, and can be used to select which steps to profile.
+            output_dir (str): the output directory to dump tracing files.
+            use_cuda (bool): same as in `torch.autograd.profiler.profile`.
+        """
+        warnings.warn("AutogradProfiler has been deprecated in favor of TorchProfiler.")
+        self._enable_predicate = enable_predicate
+        self._use_cuda = use_cuda
+        self._output_dir = output_dir
+    def before_step(self):
+        if self._enable_predicate(self.trainer):
+            self._profiler = torch.autograd.profiler.profile(use_cuda=self._use_cuda)
+            self._profiler.__enter__()
+        else:
+            self._profiler = None
+class EvalHook(HookBase):
+    """
+    Run an evaluation function periodically, and at the end of training.
+    It is executed every ``eval_period`` iterations and after the last iteration.
+    """
+    def __init__(self, eval_period, eval_function):
+        """
+        Args:
+            eval_period (int): the period to run `eval_function`. Set to 0 to
+                not evaluate periodically (but still after the last iteration).
+            eval_function (callable): a function which takes no arguments, and
+                returns a nested dict of evaluation metrics.
+        Note:
+            This hook must be enabled in all or none workers.
+            If you would like only certain workers to perform evaluation,
+            give other workers a no-op function (`eval_function=lambda: None`).
+        """
+        self._period = eval_period
+        self._func = eval_function
+    def _do_eval(self):
+        results = self._func()
+        if results:
+            assert isinstance(
+                results, dict
+            ), "Eval function must return a dict. Got {} instead.".format(results)
+            flattened_results = flatten_results_dict(results)
+            for k, v in flattened_results.items():
+                try:
+                    v = float(v)
+                except Exception as e:
+                    raise ValueError(
+                        "[EvalHook] eval_function should return a nested dict of float. "
+                        "Got '{}: {}' instead.".format(k, v)
+                    ) from e
+            self.trainer.storage.put_scalars(**flattened_results, smoothing_hint=False)
+        # Evaluation may take different time among workers.
+        # A barrier make them start the next iteration together.
+        comm.synchronize()
+    def after_step(self):
+        next_iter = self.trainer.iter + 1
+        if self._period > 0 and next_iter % self._period == 0:
+            # do the last eval in after_train
+            if next_iter != self.trainer.max_iter:
+                self._do_eval()
+    def after_train(self):
+        # This condition is to prevent the eval from running after a failed training
+        if self.trainer.iter + 1 >= self.trainer.max_iter:
+            self._do_eval()
+        # func is likely a closure that holds reference to the trainer
+        # therefore we clean it to avoid circular reference in the end
+        del self._func
+class PreciseBN(HookBase):
+    """
+    The standard implementation of BatchNorm uses EMA in inference, which is
+    sometimes suboptimal.
+    This class computes the true average of statistics rather than the moving average,
+    and put true averages to every BN layer in the given model.
+    It is executed every ``period`` iterations and after the last iteration.
+    """
+    def __init__(self, period, model, data_loader, num_iter):
+        """
+        Args:
+            period (int): the period this hook is run, or 0 to not run during training.
+                The hook will always run in the end of training.
+            model (nn.Module): a module whose all BN layers in training mode will be
+                updated by precise BN.
+                Note that user is responsible for ensuring the BN layers to be
+                updated are in training mode when this hook is triggered.
+            data_loader (iterable): it will produce data to be run by `model(data)`.
+            num_iter (int): number of iterations used to compute the precise
+                statistics.
+        """
+        self._logger = logging.getLogger(__name__)
+        if len(get_bn_modules(model)) == 0:
+            self._logger.info(
+                "PreciseBN is disabled because model does not contain BN layers in training mode."
+            )
+            self._disabled = True
+            return
+        self._model = model
+        self._data_loader = data_loader
+        self._num_iter = num_iter
+        self._period = period
+        self._disabled = False
+        self._data_iter = None
+    def after_step(self):
+        next_iter = self.trainer.iter + 1
+        is_final = next_iter == self.trainer.max_iter
+        if is_final or (self._period > 0 and next_iter % self._period == 0):
+            self.update_stats()
+    def update_stats(self):
+        """
+        Update the model with precise statistics. Users can manually call this method.
+        """
+        if self._disabled:
+            return
+        if self._data_iter is None:
+            self._data_iter = iter(self._data_loader)
+        def data_loader():
+            for num_iter in itertools.count(1):
+                if num_iter % 100 == 0:
+                    self._logger.info(
+                        "Running precise-BN ... {}/{} iterations.".format(num_iter, self._num_iter)
+                    )
+                # This way we can reuse the same iterator
+                yield next(self._data_iter)
+        with EventStorage():  # capture events in a new storage to discard them
+            self._logger.info(
+                "Running precise-BN for {} iterations...  ".format(self._num_iter)
+                + "Note that this could produce different statistics every time."
+            )
+            update_bn_stats(self._model, data_loader(), self._num_iter)
+class TorchMemoryStats(HookBase):
+    """
+    Writes pytorch's cuda memory statistics periodically.
+    """
+    def __init__(self, period=20, max_runs=10):
+        """
+        Args:
+            period (int): Output stats each 'period' iterations
+            max_runs (int): Stop the logging after 'max_runs'
+        """
+        self._logger = logging.getLogger(__name__)
+        self._period = period
+        self._max_runs = max_runs
+        self._runs = 0
+    def after_step(self):
+        if self._runs > self._max_runs:
+            return
+        if (self.trainer.iter + 1) % self._period == 0 or (
+            self.trainer.iter == self.trainer.max_iter - 1
+        ):
+            if torch.cuda.is_available():
+                max_reserved_mb = torch.cuda.max_memory_reserved() / 1024.0 / 1024.0
+                reserved_mb = torch.cuda.memory_reserved() / 1024.0 / 1024.0
+                max_allocated_mb = torch.cuda.max_memory_allocated() / 1024.0 / 1024.0
+                allocated_mb = torch.cuda.memory_allocated() / 1024.0 / 1024.0
+                self._logger.info(
+                    (
+                        " iter: {} "
+                        " max_reserved_mem: {:.0f}MB "
+                        " reserved_mem: {:.0f}MB "
+                        " max_allocated_mem: {:.0f}MB "
+                        " allocated_mem: {:.0f}MB "
+                    ).format(
+                        self.trainer.iter,
+                        max_reserved_mb,
+                        reserved_mb,
+                        max_allocated_mb,
+                        allocated_mb,
+                    )
+                )
+                self._runs += 1
+                if self._runs == self._max_runs:
+                    mem_summary = torch.cuda.memory_summary()
+                    self._logger.info("\n" + mem_summary)
+                torch.cuda.reset_peak_memory_stats()
--- a/detectron2/engine/launch.py
+++ b/detectron2/engine/launch.py
+# Copyright (c) Facebook, Inc. and its affiliates.
+import logging
+from datetime import timedelta
+import torch
+import torch.distributed as dist
+import torch.multiprocessing as mp
+from detectron2.utils import comm
+__all__ = ["DEFAULT_TIMEOUT", "launch"]
+DEFAULT_TIMEOUT = timedelta(minutes=30)
+def _find_free_port():
+    import socket
+    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    # Binding to port 0 will cause the OS to find an available port for us
+    sock.bind(("", 0))
+    port = sock.getsockname()[1]
+    sock.close()
+    # NOTE: there is still a chance the port could be taken by other processes.
+    return port
+def launch(
+    main_func,
+    num_gpus_per_machine,
+    num_machines=1,
+    machine_rank=0,
+    dist_url=None,
+    args=(),
+    timeout=DEFAULT_TIMEOUT,
+):
+    """
+    Launch multi-gpu or distributed training.
+    This function must be called on all machines involved in the training.
+    It will spawn child processes (defined by ``num_gpus_per_machine``) on each machine.
+    Args:
+        main_func: a function that will be called by `main_func(*args)`
+        num_gpus_per_machine (int): number of GPUs per machine
+        num_machines (int): the total number of machines
+        machine_rank (int): the rank of this machine
+        dist_url (str): url to connect to for distributed jobs, including protocol
+                       e.g. "tcp://127.0.0.1:8686".
+                       Can be set to "auto" to automatically select a free port on localhost
+        timeout (timedelta): timeout of the distributed workers
+        args (tuple): arguments passed to main_func
+    """
+    world_size = num_machines * num_gpus_per_machine
+    if world_size > 1:
+        # https://github.com/pytorch/pytorch/pull/14391
+        # TODO prctl in spawned processes
+        if dist_url == "auto":
+            assert num_machines == 1, "dist_url=auto not supported in multi-machine jobs."
+            port = _find_free_port()
+            dist_url = f"tcp://127.0.0.1:{port}"
+        if num_machines > 1 and dist_url.startswith("file://"):
+            logger = logging.getLogger(__name__)
+            logger.warning(
+                "file:// is not a reliable init_method in multi-machine jobs. Prefer tcp://"
+            )
+        mp.spawn(
+            _distributed_worker,
+            nprocs=num_gpus_per_machine,
+            args=(
+                main_func,
+                world_size,
+                num_gpus_per_machine,
+                machine_rank,
+                dist_url,
+                args,
+                timeout,
+            ),
+            daemon=False,
+        )
+    else:
+        main_func(*args)
+def _distributed_worker(
+    local_rank,
+    main_func,
+    world_size,
+    num_gpus_per_machine,
+    machine_rank,
+    dist_url,
+    args,
+    timeout=DEFAULT_TIMEOUT,
+):
+    assert torch.cuda.is_available(), "cuda is not available. Please check your installation."
+    global_rank = machine_rank * num_gpus_per_machine + local_rank
+    try:
+        dist.init_process_group(
+            backend="NCCL",
+            init_method=dist_url,
+            world_size=world_size,
+            rank=global_rank,
+            timeout=timeout,
+        )
+    except Exception as e:
+        logger = logging.getLogger(__name__)
+        logger.error("Process group URL: {}".format(dist_url))
+        raise e
+    # Setup the local process group (which contains ranks within the same machine)
+    assert comm._LOCAL_PROCESS_GROUP is None
+    num_machines = world_size // num_gpus_per_machine
+    for i in range(num_machines):
+        ranks_on_i = list(range(i * num_gpus_per_machine, (i + 1) * num_gpus_per_machine))
+        pg = dist.new_group(ranks_on_i)
+        if i == machine_rank:
+            comm._LOCAL_PROCESS_GROUP = pg
+    assert num_gpus_per_machine <= torch.cuda.device_count()
+    torch.cuda.set_device(local_rank)
+    # synchronize is needed here to prevent a possible timeout after calling init_process_group
+    # See: https://github.com/facebookresearch/maskrcnn-benchmark/issues/172
+    comm.synchronize()
+    main_func(*args)
--- a/detectron2/engine/train_loop.py
+++ b/detectron2/engine/train_loop.py
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+import logging
+import numpy as np
+import time
+import weakref
+from typing import List, Mapping, Optional
+import torch
+from torch.nn.parallel import DataParallel, DistributedDataParallel
+import detectron2.utils.comm as comm
+from detectron2.utils.events import EventStorage, get_event_storage
+from detectron2.utils.logger import _log_api_usage
+__all__ = ["HookBase", "TrainerBase", "SimpleTrainer", "AMPTrainer"]
+class HookBase:
+    """
+    Base class for hooks that can be registered with :class:`TrainerBase`.
+    Each hook can implement 4 methods. The way they are called is demonstrated
+    in the following snippet:
+    ::
+        hook.before_train()
+        for iter in range(start_iter, max_iter):
+            hook.before_step()
+            trainer.run_step()
+            hook.after_step()
+        iter += 1
+        hook.after_train()
+    Notes:
+        1. In the hook method, users can access ``self.trainer`` to access more
+           properties about the context (e.g., model, current iteration, or config
+           if using :class:`DefaultTrainer`).
+        2. A hook that does something in :meth:`before_step` can often be
+           implemented equivalently in :meth:`after_step`.
+           If the hook takes non-trivial time, it is strongly recommended to
+           implement the hook in :meth:`after_step` instead of :meth:`before_step`.
+           The convention is that :meth:`before_step` should only take negligible time.
+           Following this convention will allow hooks that do care about the difference
+           between :meth:`before_step` and :meth:`after_step` (e.g., timer) to
+           function properly.
+    """
+    trainer: "TrainerBase" = None
+    """
+    A weak reference to the trainer object. Set by the trainer when the hook is registered.
+    """
+    def before_train(self):
+        """
+        Called before the first iteration.
+        """
+        pass
+    def after_train(self):
+        """
+        Called after the last iteration.
+        """
+        pass
+    def before_step(self):
+        """
+        Called before each iteration.
+        """
+        pass
+    def after_step(self):
+        """
+        Called after each iteration.
+        """
+        pass
+    def state_dict(self):
+        """
+        Hooks are stateless by default, but can be made checkpointable by
+        implementing `state_dict` and `load_state_dict`.
+        """
+        return {}
+class TrainerBase:
+    """
+    Base class for iterative trainer with hooks.
+    The only assumption we made here is: the training runs in a loop.
+    A subclass can implement what the loop is.
+    We made no assumptions about the existence of dataloader, optimizer, model, etc.
+    Attributes:
+        iter(int): the current iteration.
+        start_iter(int): The iteration to start with.
+            By convention the minimum possible value is 0.
+        max_iter(int): The iteration to end training.
+        storage(EventStorage): An EventStorage that's opened during the course of training.
+    """
+    def __init__(self) -> None:
+        self._hooks: List[HookBase] = []
+        self.iter: int = 0
+        self.start_iter: int = 0
+        self.max_iter: int
+        self.storage: EventStorage
+        _log_api_usage("trainer." + self.__class__.__name__)
+    def register_hooks(self, hooks: List[Optional[HookBase]]) -> None:
+        """
+        Register hooks to the trainer. The hooks are executed in the order
+        they are registered.
+        Args:
+            hooks (list[Optional[HookBase]]): list of hooks
+        """
+        hooks = [h for h in hooks if h is not None]
+        for h in hooks:
+            assert isinstance(h, HookBase)
+            # To avoid circular reference, hooks and trainer cannot own each other.
+            # This normally does not matter, but will cause memory leak if the
+            # involved objects contain __del__:
+            # See http://engineering.hearsaysocial.com/2013/06/16/circular-references-in-python/
+            h.trainer = weakref.proxy(self)
+        self._hooks.extend(hooks)
+    def train(self, start_iter: int, max_iter: int):
+        """
+        Args:
+            start_iter, max_iter (int): See docs above
+        """
+        logger = logging.getLogger(__name__)
+        logger.info("Starting training from iteration {}".format(start_iter))
+        self.iter = self.start_iter = start_iter
+        self.max_iter = max_iter
+        with EventStorage(start_iter) as self.storage:
+            try:
+                self.before_train()
+                for self.iter in range(start_iter, max_iter):
+                    self.before_step()
+                    self.run_step()
+                    self.after_step()
+                # self.iter == max_iter can be used by `after_train` to
+                # tell whether the training successfully finished or failed
+                # due to exceptions.
+                self.iter += 1
+            except Exception:
+                logger.exception("Exception during training:")
+                raise
+            finally:
+                self.after_train()
+    def before_train(self):
+        for h in self._hooks:
+            h.before_train()
+    def after_train(self):
+        self.storage.iter = self.iter
+        for h in self._hooks:
+            h.after_train()
+    def before_step(self):
+        # Maintain the invariant that storage.iter == trainer.iter
+        # for the entire execution of each step
+        self.storage.iter = self.iter
+        for h in self._hooks:
+            h.before_step()
+    def after_step(self):
+        for h in self._hooks:
+            h.after_step()
+    def run_step(self):
+        raise NotImplementedError
+    def state_dict(self):
+        ret = {"iteration": self.iter}
+        hooks_state = {}
+        for h in self._hooks:
+            sd = h.state_dict()
+            if sd:
+                name = type(h).__qualname__
+                if name in hooks_state:
+                    # TODO handle repetitive stateful hooks
+                    continue
+                hooks_state[name] = sd
+        if hooks_state:
+            ret["hooks"] = hooks_state
+        return ret
+    def load_state_dict(self, state_dict):
+        logger = logging.getLogger(__name__)
+        self.iter = state_dict["iteration"]
+        for key, value in state_dict.get("hooks", {}).items():
+            for h in self._hooks:
+                try:
+                    name = type(h).__qualname__
+                except AttributeError:
+                    continue
+                if name == key:
+                    h.load_state_dict(value)
+                    break
+            else:
+                logger.warning(f"Cannot find the hook '{key}', its state_dict is ignored.")
+class SimpleTrainer(TrainerBase):
+    """
+    A simple trainer for the most common type of task:
+    single-cost single-optimizer single-data-source iterative optimization,
+    optionally using data-parallelism.
+    It assumes that every step, you:
+    1. Compute the loss with a data from the data_loader.
+    2. Compute the gradients with the above loss.
+    3. Update the model with the optimizer.
+    All other tasks during training (checkpointing, logging, evaluation, LR schedule)
+    are maintained by hooks, which can be registered by :meth:`TrainerBase.register_hooks`.
+    If you want to do anything fancier than this,
+    either subclass TrainerBase and implement your own `run_step`,
+    or write your own training loop.
+    """
+    def __init__(self, model, data_loader, optimizer):
+        """
+        Args:
+            model: a torch Module. Takes a data from data_loader and returns a
+                dict of losses.
+            data_loader: an iterable. Contains data to be used to call model.
+            optimizer: a torch optimizer.
+        """
+        super().__init__()
+        """
+        We set the model to training mode in the trainer.
+        However it's valid to train a model that's in eval mode.
+        If you want your model (or a submodule of it) to behave
+        like evaluation during training, you can overwrite its train() method.
+        """
+        model.train()
+        self.model = model
+        self.data_loader = data_loader
+        self._data_loader_iter = iter(data_loader)
+        self.optimizer = optimizer
+    def run_step(self):
+        """
+        Implement the standard training logic described above.
+        """
+        assert self.model.training, "[SimpleTrainer] model was changed to eval mode!"
+        start = time.perf_counter()
+        """
+        If you want to do something with the data, you can wrap the dataloader.
+        """
+        data = next(self._data_loader_iter)
+        data_time = time.perf_counter() - start
+        """
+        If you want to do something with the losses, you can wrap the model.
+        """
+        loss_dict = self.model(data)
+        if isinstance(loss_dict, torch.Tensor):
+            losses = loss_dict
+            loss_dict = {"total_loss": loss_dict}
+        else:
+            losses = sum(loss_dict.values())
+        """
+        If you need to accumulate gradients or do something similar, you can
+        wrap the optimizer with your custom `zero_grad()` method.
+        """
+        self.optimizer.zero_grad()
+        losses.backward()
+        self._write_metrics(loss_dict, data_time)
+        """
+        If you need gradient clipping/scaling or other processing, you can
+        wrap the optimizer with your custom `step()` method. But it is
+        suboptimal as explained in https://arxiv.org/abs/2006.15704 Sec 3.2.4
+        """
+        self.optimizer.step()
+    def _write_metrics(
+        self,
+        loss_dict: Mapping[str, torch.Tensor],
+        data_time: float,
+        prefix: str = "",
+    ) -> None:
+        SimpleTrainer.write_metrics(loss_dict, data_time, prefix)
+    @staticmethod
+    def write_metrics(
+        loss_dict: Mapping[str, torch.Tensor],
+        data_time: float,
+        prefix: str = "",
+    ) -> None:
+        """
+        Args:
+            loss_dict (dict): dict of scalar losses
+            data_time (float): time taken by the dataloader iteration
+            prefix (str): prefix for logging keys
+        """
+        metrics_dict = {k: v.detach().cpu().item() for k, v in loss_dict.items()}
+        metrics_dict["data_time"] = data_time
+        # Gather metrics among all workers for logging
+        # This assumes we do DDP-style training, which is currently the only
+        # supported method in detectron2.
+        all_metrics_dict = comm.gather(metrics_dict)
+        if comm.is_main_process():
+            storage = get_event_storage()
+            # data_time among workers can have high variance. The actual latency
+            # caused by data_time is the maximum among workers.
+            data_time = np.max([x.pop("data_time") for x in all_metrics_dict])
+            storage.put_scalar("data_time", data_time)
+            # average the rest metrics
+            metrics_dict = {
+                k: np.mean([x[k] for x in all_metrics_dict]) for k in all_metrics_dict[0].keys()
+            }
+            total_losses_reduced = sum(metrics_dict.values())
+            if not np.isfinite(total_losses_reduced):
+                raise FloatingPointError(
+                    f"Loss became infinite or NaN at iteration={storage.iter}!\n"
+                    f"loss_dict = {metrics_dict}"
+                )
+            storage.put_scalar("{}total_loss".format(prefix), total_losses_reduced)
+            if len(metrics_dict) > 1:
+                storage.put_scalars(**metrics_dict)
+    def state_dict(self):
+        ret = super().state_dict()
+        ret["optimizer"] = self.optimizer.state_dict()
+        return ret
+    def load_state_dict(self, state_dict):
+        super().load_state_dict(state_dict)
+        self.optimizer.load_state_dict(state_dict["optimizer"])
+class AMPTrainer(SimpleTrainer):
+    """
+    Like :class:`SimpleTrainer`, but uses PyTorch's native automatic mixed precision
+    in the training loop.
+    """
+    def __init__(self, model, data_loader, optimizer, grad_scaler=None):
+        """
+        Args:
+            model, data_loader, optimizer: same as in :class:`SimpleTrainer`.
+            grad_scaler: torch GradScaler to automatically scale gradients.
+        """
+        unsupported = "AMPTrainer does not support single-process multi-device training!"
+        if isinstance(model, DistributedDataParallel):
+            assert not (model.device_ids and len(model.device_ids) > 1), unsupported
+        assert not isinstance(model, DataParallel), unsupported
+        super().__init__(model, data_loader, optimizer)
+        if grad_scaler is None:
+            from torch.cuda.amp import GradScaler
+            grad_scaler = GradScaler()
+        self.grad_scaler = grad_scaler
+    def run_step(self):
+        """
+        Implement the AMP training logic.
+        """
+        assert self.model.training, "[AMPTrainer] model was changed to eval mode!"
+        assert torch.cuda.is_available(), "[AMPTrainer] CUDA is required for AMP training!"
+        from torch.cuda.amp import autocast
+        start = time.perf_counter()
+        data = next(self._data_loader_iter)
+        data_time = time.perf_counter() - start
+        with autocast():
+            loss_dict = self.model(data)
+            if isinstance(loss_dict, torch.Tensor):
+                losses = loss_dict
+                loss_dict = {"total_loss": loss_dict}
+            else:
+                losses = sum(loss_dict.values())
+        self.optimizer.zero_grad()
+        self.grad_scaler.scale(losses).backward()
+        self._write_metrics(loss_dict, data_time)
+        self.grad_scaler.step(self.optimizer)
+        self.grad_scaler.update()
+    def state_dict(self):
+        ret = super().state_dict()
+        ret["grad_scaler"] = self.grad_scaler.state_dict()
+        return ret
+    def load_state_dict(self, state_dict):
+        super().load_state_dict(state_dict)
+        self.grad_scaler.load_state_dict(state_dict["grad_scaler"])
--- a/detectron2/evaluation/__init__.py
+++ b/detectron2/evaluation/__init__.py
+# Copyright (c) Facebook, Inc. and its affiliates.
+from .cityscapes_evaluation import CityscapesInstanceEvaluator, CityscapesSemSegEvaluator
+from .coco_evaluation import COCOEvaluator
+from .rotated_coco_evaluation import RotatedCOCOEvaluator
+from .evaluator import DatasetEvaluator, DatasetEvaluators, inference_context, inference_on_dataset
+from .lvis_evaluation import LVISEvaluator
+from .panoptic_evaluation import COCOPanopticEvaluator
+from .pascal_voc_evaluation import PascalVOCDetectionEvaluator
+from .sem_seg_evaluation import SemSegEvaluator
+from .testing import print_csv_format, verify_results
+__all__ = [k for k in globals().keys() if not k.startswith("_")]
--- a/detectron2/evaluation/cityscapes_evaluation.py
+++ b/detectron2/evaluation/cityscapes_evaluation.py
+# Copyright (c) Facebook, Inc. and its affiliates.
+import glob
+import logging
+import numpy as np
+import os
+import tempfile
+from collections import OrderedDict
+import torch
+from PIL import Image
+from detectron2.data import MetadataCatalog
+from detectron2.utils import comm
+from detectron2.utils.file_io import PathManager
+from .evaluator import DatasetEvaluator
+class CityscapesEvaluator(DatasetEvaluator):
+    """
+    Base class for evaluation using cityscapes API.
+    """
+    def __init__(self, dataset_name):
+        """
+        Args:
+            dataset_name (str): the name of the dataset.
+                It must have the following metadata associated with it:
+                "thing_classes", "gt_dir".
+        """
+        self._metadata = MetadataCatalog.get(dataset_name)
+        self._cpu_device = torch.device("cpu")
+        self._logger = logging.getLogger(__name__)
+    def reset(self):
+        self._working_dir = tempfile.TemporaryDirectory(prefix="cityscapes_eval_")
+        self._temp_dir = self._working_dir.name
+        # All workers will write to the same results directory
+        # TODO this does not work in distributed training
+        self._temp_dir = comm.all_gather(self._temp_dir)[0]
+        if self._temp_dir != self._working_dir.name:
+            self._working_dir.cleanup()
+        self._logger.info(
+            "Writing cityscapes results to temporary directory {} ...".format(self._temp_dir)
+        )
+class CityscapesInstanceEvaluator(CityscapesEvaluator):
+    """
+    Evaluate instance segmentation results on cityscapes dataset using cityscapes API.
+    Note:
+        * It does not work in multi-machine distributed training.
+        * It contains a synchronization, therefore has to be used on all ranks.
+        * Only the main process runs evaluation.
+    """
+    def process(self, inputs, outputs):
+        from cityscapesscripts.helpers.labels import name2label
+        for input, output in zip(inputs, outputs):
+            file_name = input["file_name"]
+            basename = os.path.splitext(os.path.basename(file_name))[0]
+            pred_txt = os.path.join(self._temp_dir, basename + "_pred.txt")
+            if "instances" in output:
+                output = output["instances"].to(self._cpu_device)
+                num_instances = len(output)
+                with open(pred_txt, "w") as fout:
+                    for i in range(num_instances):
+                        pred_class = output.pred_classes[i]
+                        classes = self._metadata.thing_classes[pred_class]
+                        class_id = name2label[classes].id
+                        score = output.scores[i]
+                        mask = output.pred_masks[i].numpy().astype("uint8")
+                        png_filename = os.path.join(
+                            self._temp_dir, basename + "_{}_{}.png".format(i, classes)
+                        )
+                        Image.fromarray(mask * 255).save(png_filename)
+                        fout.write(
+                            "{} {} {}\n".format(os.path.basename(png_filename), class_id, score)
+                        )
+            else:
+                # Cityscapes requires a prediction file for every ground truth image.
+                with open(pred_txt, "w") as fout:
+                    pass
+    def evaluate(self):
+        """
+        Returns:
+            dict: has a key "segm", whose value is a dict of "AP" and "AP50".
+        """
+        comm.synchronize()
+        if comm.get_rank() > 0:
+            return
+        import cityscapesscripts.evaluation.evalInstanceLevelSemanticLabeling as cityscapes_eval
+        self._logger.info("Evaluating results under {} ...".format(self._temp_dir))
+        # set some global states in cityscapes evaluation API, before evaluating
+        cityscapes_eval.args.predictionPath = os.path.abspath(self._temp_dir)
+        cityscapes_eval.args.predictionWalk = None
+        cityscapes_eval.args.JSONOutput = False
+        cityscapes_eval.args.colorized = False
+        cityscapes_eval.args.gtInstancesFile = os.path.join(self._temp_dir, "gtInstances.json")
+        # These lines are adopted from
+        # https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/evaluation/evalInstanceLevelSemanticLabeling.py # noqa
+        gt_dir = PathManager.get_local_path(self._metadata.gt_dir)
+        groundTruthImgList = glob.glob(os.path.join(gt_dir, "*", "*_gtFine_instanceIds.png"))
+        assert len(
+            groundTruthImgList
+        ), "Cannot find any ground truth images to use for evaluation. Searched for: {}".format(
+            cityscapes_eval.args.groundTruthSearch
+        )
+        predictionImgList = []
+        for gt in groundTruthImgList:
+            predictionImgList.append(cityscapes_eval.getPrediction(gt, cityscapes_eval.args))
+        results = cityscapes_eval.evaluateImgLists(
+            predictionImgList, groundTruthImgList, cityscapes_eval.args
+        )["averages"]
+        ret = OrderedDict()
+        ret["segm"] = {"AP": results["allAp"] * 100, "AP50": results["allAp50%"] * 100}
+        self._working_dir.cleanup()
+        return ret
+class CityscapesSemSegEvaluator(CityscapesEvaluator):
+    """
+    Evaluate semantic segmentation results on cityscapes dataset using cityscapes API.
+    Note:
+        * It does not work in multi-machine distributed training.
+        * It contains a synchronization, therefore has to be used on all ranks.
+        * Only the main process runs evaluation.
+    """
+    def process(self, inputs, outputs):
+        from cityscapesscripts.helpers.labels import trainId2label
+        for input, output in zip(inputs, outputs):
+            file_name = input["file_name"]
+            basename = os.path.splitext(os.path.basename(file_name))[0]
+            pred_filename = os.path.join(self._temp_dir, basename + "_pred.png")
+            output = output["sem_seg"].argmax(dim=0).to(self._cpu_device).numpy()
+            pred = 255 * np.ones(output.shape, dtype=np.uint8)
+            for train_id, label in trainId2label.items():
+                if label.ignoreInEval:
+                    continue
+                pred[output == train_id] = label.id
+            Image.fromarray(pred).save(pred_filename)
+    def evaluate(self):
+        comm.synchronize()
+        if comm.get_rank() > 0:
+            return
+        # Load the Cityscapes eval script *after* setting the required env var,
+        # since the script reads CITYSCAPES_DATASET into global variables at load time.
+        import cityscapesscripts.evaluation.evalPixelLevelSemanticLabeling as cityscapes_eval
+        self._logger.info("Evaluating results under {} ...".format(self._temp_dir))
+        # set some global states in cityscapes evaluation API, before evaluating
+        cityscapes_eval.args.predictionPath = os.path.abspath(self._temp_dir)
+        cityscapes_eval.args.predictionWalk = None
+        cityscapes_eval.args.JSONOutput = False
+        cityscapes_eval.args.colorized = False
+        # These lines are adopted from
+        # https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/evaluation/evalPixelLevelSemanticLabeling.py # noqa
+        gt_dir = PathManager.get_local_path(self._metadata.gt_dir)
+        groundTruthImgList = glob.glob(os.path.join(gt_dir, "*", "*_gtFine_labelIds.png"))
+        assert len(
+            groundTruthImgList
+        ), "Cannot find any ground truth images to use for evaluation. Searched for: {}".format(
+            cityscapes_eval.args.groundTruthSearch
+        )
+        predictionImgList = []
+        for gt in groundTruthImgList:
+            predictionImgList.append(cityscapes_eval.getPrediction(cityscapes_eval.args, gt))
+        results = cityscapes_eval.evaluateImgLists(
+            predictionImgList, groundTruthImgList, cityscapes_eval.args
+        )
+        ret = OrderedDict()
+        ret["sem_seg"] = {
+            "IoU": 100.0 * results["averageScoreClasses"],
+            "iIoU": 100.0 * results["averageScoreInstClasses"],
+            "IoU_sup": 100.0 * results["averageScoreCategories"],
+            "iIoU_sup": 100.0 * results["averageScoreInstCategories"],
+        }
+        self._working_dir.cleanup()
+        return ret
--- a/detectron2/evaluation/coco_evaluation.py
+++ b/detectron2/evaluation/coco_evaluation.py
+# Copyright (c) Facebook, Inc. and its affiliates.
+import contextlib
+import copy
+import io
+import itertools
+import json
+import logging
+import numpy as np
+import os
+import pickle
+from collections import OrderedDict
+import pycocotools.mask as mask_util
+import torch
+from pycocotools.coco import COCO
+from pycocotools.cocoeval import COCOeval
+from tabulate import tabulate
+import detectron2.utils.comm as comm
+from detectron2.config import CfgNode
+from detectron2.data import MetadataCatalog
+from detectron2.data.datasets.coco import convert_to_coco_json
+from detectron2.evaluation.fast_eval_api import COCOeval_opt
+from detectron2.structures import Boxes, BoxMode, pairwise_iou
+from detectron2.utils.file_io import PathManager
+from detectron2.utils.logger import create_small_table
+from .evaluator import DatasetEvaluator
+class COCOEvaluator(DatasetEvaluator):
+    """
+    Evaluate AR for object proposals, AP for instance detection/segmentation, AP
+    for keypoint detection outputs using COCO's metrics.
+    See http://cocodataset.org/#detection-eval and
+    http://cocodataset.org/#keypoints-eval to understand its metrics.
+    The metrics range from 0 to 100 (instead of 0 to 1), where a -1 or NaN means
+    the metric cannot be computed (e.g. due to no predictions made).
+    In addition to COCO, this evaluator is able to support any bounding box detection,
+    instance segmentation, or keypoint detection dataset.
+    """
+    def __init__(
+        self,
+        dataset_name,
+        tasks=None,
+        distributed=True,
+        output_dir=None,
+        *,
+        max_dets_per_image=None,
+        use_fast_impl=True,
+        kpt_oks_sigmas=(),
+    ):
+        """
+        Args:
+            dataset_name (str): name of the dataset to be evaluated.
+                It must have either the following corresponding metadata:
+                    "json_file": the path to the COCO format annotation
+                Or it must be in detectron2's standard dataset format
+                so it can be converted to COCO format automatically.
+            tasks (tuple[str]): tasks that can be evaluated under the given
+                configuration. A task is one of "bbox", "segm", "keypoints".
+                By default, will infer this automatically from predictions.
+            distributed (True): if True, will collect results from all ranks and run evaluation
+                in the main process.
+                Otherwise, will only evaluate the results in the current process.
+            output_dir (str): optional, an output directory to dump all
+                results predicted on the dataset. The dump contains two files:
+                1. "instances_predictions.pth" a file that can be loaded with `torch.load` and
+                   contains all the results in the format they are produced by the model.
+                2. "coco_instances_results.json" a json file in COCO's result format.
+            max_dets_per_image (int): limit on the maximum number of detections per image.
+                By default in COCO, this limit is to 100, but this can be customized
+                to be greater, as is needed in evaluation metrics AP fixed and AP pool
+                (see https://arxiv.org/pdf/2102.01066.pdf)
+                This doesn't affect keypoint evaluation.
+            use_fast_impl (bool): use a fast but **unofficial** implementation to compute AP.
+                Although the results should be very close to the official implementation in COCO
+                API, it is still recommended to compute results with the official API for use in
+                papers. The faster implementation also uses more RAM.
+            kpt_oks_sigmas (list[float]): The sigmas used to calculate keypoint OKS.
+                See http://cocodataset.org/#keypoints-eval
+                When empty, it will use the defaults in COCO.
+                Otherwise it should be the same length as ROI_KEYPOINT_HEAD.NUM_KEYPOINTS.
+        """
+        self._logger = logging.getLogger(__name__)
+        self._distributed = distributed
+        self._output_dir = output_dir
+        self._use_fast_impl = use_fast_impl
+        # COCOeval requires the limit on the number of detections per image (maxDets) to be a list
+        # with at least 3 elements. The default maxDets in COCOeval is [1, 10, 100], in which the
+        # 3rd element (100) is used as the limit on the number of detections per image when
+        # evaluating AP. COCOEvaluator expects an integer for max_dets_per_image, so for COCOeval,
+        # we reformat max_dets_per_image into [1, 10, max_dets_per_image], based on the defaults.
+        if max_dets_per_image is None:
+            max_dets_per_image = [1, 10, 100]
+        else:
+            max_dets_per_image = [1, 10, max_dets_per_image]
+        self._max_dets_per_image = max_dets_per_image
+        if tasks is not None and isinstance(tasks, CfgNode):
+            kpt_oks_sigmas = (
+                tasks.TEST.KEYPOINT_OKS_SIGMAS if not kpt_oks_sigmas else kpt_oks_sigmas
+            )
+            self._logger.warn(
+                "COCO Evaluator instantiated using config, this is deprecated behavior."
+                " Please pass in explicit arguments instead."
+            )
+            self._tasks = None  # Infering it from predictions should be better
+        else:
+            self._tasks = tasks
+        self._cpu_device = torch.device("cpu")
+        self._metadata = MetadataCatalog.get(dataset_name)
+        if not hasattr(self._metadata, "json_file"):
+            if output_dir is None:
+                raise ValueError(
+                    "output_dir must be provided to COCOEvaluator "
+                    "for datasets not in COCO format."
+                )
+            self._logger.info(f"Trying to convert '{dataset_name}' to COCO format ...")
+            cache_path = os.path.join(output_dir, f"{dataset_name}_coco_format.json")
+            self._metadata.json_file = cache_path
+            convert_to_coco_json(dataset_name, cache_path)
+        json_file = PathManager.get_local_path(self._metadata.json_file)
+        with contextlib.redirect_stdout(io.StringIO()):
+            self._coco_api = COCO(json_file)
+        # Test set json files do not contain annotations (evaluation must be
+        # performed using the COCO evaluation server).
+        self._do_evaluation = "annotations" in self._coco_api.dataset
+        if self._do_evaluation:
+            self._kpt_oks_sigmas = kpt_oks_sigmas
+    def reset(self):
+        self._predictions = []
+    def process(self, inputs, outputs):
+        """
+        Args:
+            inputs: the inputs to a COCO model (e.g., GeneralizedRCNN).
+                It is a list of dict. Each dict corresponds to an image and
+                contains keys like "height", "width", "file_name", "image_id".
+            outputs: the outputs of a COCO model. It is a list of dicts with key
+                "instances" that contains :class:`Instances`.
+        """
+        for input, output in zip(inputs, outputs):
+            prediction = {"image_id": input["image_id"]}
+            if "instances" in output:
+                instances = output["instances"].to(self._cpu_device)
+                prediction["instances"] = instances_to_coco_json(instances, input["image_id"])
+            if "proposals" in output:
+                prediction["proposals"] = output["proposals"].to(self._cpu_device)
+            if len(prediction) > 1:
+                self._predictions.append(prediction)
+    def evaluate(self, img_ids=None):
+        """
+        Args:
+            img_ids: a list of image IDs to evaluate on. Default to None for the whole dataset
+        """
+        if self._distributed:
+            comm.synchronize()
+            predictions = comm.gather(self._predictions, dst=0)
+            predictions = list(itertools.chain(*predictions))
+            if not comm.is_main_process():
+                return {}
+        else:
+            predictions = self._predictions
+        if len(predictions) == 0:
+            self._logger.warning("[COCOEvaluator] Did not receive valid predictions.")
+            return {}
+        if self._output_dir:
+            PathManager.mkdirs(self._output_dir)
+            file_path = os.path.join(self._output_dir, "instances_predictions.pth")
+            with PathManager.open(file_path, "wb") as f:
+                torch.save(predictions, f)
+        self._results = OrderedDict()
+        if "proposals" in predictions[0]:
+            self._eval_box_proposals(predictions)
+        if "instances" in predictions[0]:
+            self._eval_predictions(predictions, img_ids=img_ids)
+        # Copy so the caller can do whatever with results
+        return copy.deepcopy(self._results)
+    def _tasks_from_predictions(self, predictions):
+        """
+        Get COCO API "tasks" (i.e. iou_type) from COCO-format predictions.
+        """
+        tasks = {"bbox"}
+        for pred in predictions:
+            if "segmentation" in pred:
+                tasks.add("segm")
+            if "keypoints" in pred:
+                tasks.add("keypoints")
+        return sorted(tasks)
+    def _eval_predictions(self, predictions, img_ids=None):
+        """
+        Evaluate predictions. Fill self._results with the metrics of the tasks.
+        """
+        self._logger.info("Preparing results for COCO format ...")
+        coco_results = list(itertools.chain(*[x["instances"] for x in predictions]))
+        tasks = self._tasks or self._tasks_from_predictions(coco_results)
+        # unmap the category ids for COCO
+        if hasattr(self._metadata, "thing_dataset_id_to_contiguous_id"):
+            dataset_id_to_contiguous_id = self._metadata.thing_dataset_id_to_contiguous_id
+            all_contiguous_ids = list(dataset_id_to_contiguous_id.values())
+            num_classes = len(all_contiguous_ids)
+            assert min(all_contiguous_ids) == 0 and max(all_contiguous_ids) == num_classes - 1
+            reverse_id_mapping = {v: k for k, v in dataset_id_to_contiguous_id.items()}
+            for result in coco_results:
+                category_id = result["category_id"]
+                assert category_id < num_classes, (
+                    f"A prediction has class={category_id}, "
+                    f"but the dataset only has {num_classes} classes and "
+                    f"predicted class id should be in [0, {num_classes - 1}]."
+                )
+                result["category_id"] = reverse_id_mapping[category_id]
+        if self._output_dir:
+            file_path = os.path.join(self._output_dir, "coco_instances_results.json")
+            self._logger.info("Saving results to {}".format(file_path))
+            with PathManager.open(file_path, "w") as f:
+                f.write(json.dumps(coco_results))
+                f.flush()
+        if not self._do_evaluation:
+            self._logger.info("Annotations are not available for evaluation.")
+            return
+        self._logger.info(
+            "Evaluating predictions with {} COCO API...".format(
+                "unofficial" if self._use_fast_impl else "official"
+            )
+        )
+        for task in sorted(tasks):
+            assert task in {"bbox", "segm", "keypoints"}, f"Got unknown task: {task}!"
+            coco_eval = (
+                _evaluate_predictions_on_coco(
+                    self._coco_api,
+                    coco_results,
+                    task,
+                    kpt_oks_sigmas=self._kpt_oks_sigmas,
+                    use_fast_impl=self._use_fast_impl,
+                    img_ids=img_ids,
+                    max_dets_per_image=self._max_dets_per_image,
+                )
+                if len(coco_results) > 0
+                else None  # cocoapi does not handle empty results very well
+            )
+            res = self._derive_coco_results(
+                coco_eval, task, class_names=self._metadata.get("thing_classes")
+            )
+            self._results[task] = res
+    def _eval_box_proposals(self, predictions):
+        """
+        Evaluate the box proposals in predictions.
+        Fill self._results with the metrics for "box_proposals" task.
+        """
+        if self._output_dir:
+            # Saving generated box proposals to file.
+            # Predicted box_proposals are in XYXY_ABS mode.
+            bbox_mode = BoxMode.XYXY_ABS.value
+            ids, boxes, objectness_logits = [], [], []
+            for prediction in predictions:
+                ids.append(prediction["image_id"])
+                boxes.append(prediction["proposals"].proposal_boxes.tensor.numpy())
+                objectness_logits.append(prediction["proposals"].objectness_logits.numpy())
+            proposal_data = {
+                "boxes": boxes,
+                "objectness_logits": objectness_logits,
+                "ids": ids,
+                "bbox_mode": bbox_mode,
+            }
+            with PathManager.open(os.path.join(self._output_dir, "box_proposals.pkl"), "wb") as f:
+                pickle.dump(proposal_data, f)
+        if not self._do_evaluation:
+            self._logger.info("Annotations are not available for evaluation.")
+            return
+        self._logger.info("Evaluating bbox proposals ...")
+        res = {}
+        areas = {"all": "", "small": "s", "medium": "m", "large": "l"}
+        for limit in [100, 1000]:
+            for area, suffix in areas.items():
+                stats = _evaluate_box_proposals(predictions, self._coco_api, area=area, limit=limit)
+                key = "AR{}@{:d}".format(suffix, limit)
+                res[key] = float(stats["ar"].item() * 100)
+        self._logger.info("Proposal metrics: \n" + create_small_table(res))
+        self._results["box_proposals"] = res
+    def _derive_coco_results(self, coco_eval, iou_type, class_names=None):
+        """
+        Derive the desired score numbers from summarized COCOeval.
+        Args:
+            coco_eval (None or COCOEval): None represents no predictions from model.
+            iou_type (str):
+            class_names (None or list[str]): if provided, will use it to predict
+                per-category AP.
+        Returns:
+            a dict of {metric name: score}
+        """
+        metrics = {
+            "bbox": ["AP", "AP50", "AP75", "APs", "APm", "APl"],
+            "segm": ["AP", "AP50", "AP75", "APs", "APm", "APl"],
+            "keypoints": ["AP", "AP50", "AP75", "APm", "APl"],
+        }[iou_type]
+        if coco_eval is None:
+            self._logger.warn("No predictions from the model!")
+            return {metric: float("nan") for metric in metrics}
+        # the standard metrics
+        results = {
+            metric: float(coco_eval.stats[idx] * 100 if coco_eval.stats[idx] >= 0 else "nan")
+            for idx, metric in enumerate(metrics)
+        }
+        self._logger.info(
+            "Evaluation results for {}: \n".format(iou_type) + create_small_table(results)
+        )
+        if not np.isfinite(sum(results.values())):
+            self._logger.info("Some metrics cannot be computed and is shown as NaN.")
+        if class_names is None or len(class_names) <= 1:
+            return results
+        # Compute per-category AP
+        # from https://github.com/facebookresearch/Detectron/blob/a6a835f5b8208c45d0dce217ce9bbda915f44df7/detectron/datasets/json_dataset_evaluator.py#L222-L252 # noqa
+        precisions = coco_eval.eval["precision"]
+        # precision has dims (iou, recall, cls, area range, max dets)
+        assert len(class_names) == precisions.shape[2]
+        results_per_category = []
+        for idx, name in enumerate(class_names):
+            # area range index 0: all area ranges
+            # max dets index -1: typically 100 per image
+            precision = precisions[:, :, idx, 0, -1]
+            precision = precision[precision > -1]
+            ap = np.mean(precision) if precision.size else float("nan")
+            results_per_category.append(("{}".format(name), float(ap * 100)))
+        # tabulate it
+        N_COLS = min(6, len(results_per_category) * 2)
+        results_flatten = list(itertools.chain(*results_per_category))
+        results_2d = itertools.zip_longest(*[results_flatten[i::N_COLS] for i in range(N_COLS)])
+        table = tabulate(
+            results_2d,
+            tablefmt="pipe",
+            floatfmt=".3f",
+            headers=["category", "AP"] * (N_COLS // 2),
+            numalign="left",
+        )
+        self._logger.info("Per-category {} AP: \n".format(iou_type) + table)
+        results.update({"AP-" + name: ap for name, ap in results_per_category})
+        return results
+def instances_to_coco_json(instances, img_id):
+    """
+    Dump an "Instances" object to a COCO-format json that's used for evaluation.
+    Args:
+        instances (Instances):
+        img_id (int): the image id
+    Returns:
+        list[dict]: list of json annotations in COCO format.
+    """
+    num_instance = len(instances)
+    if num_instance == 0:
+        return []
+    boxes = instances.pred_boxes.tensor.numpy()
+    boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
+    boxes = boxes.tolist()
+    scores = instances.scores.tolist()
+    classes = instances.pred_classes.tolist()
+    has_mask = instances.has("pred_masks")
+    if has_mask:
+        # use RLE to encode the masks, because they are too large and takes memory
+        # since this evaluator stores outputs of the entire dataset
+        rles = [
+            mask_util.encode(np.array(mask[:, :, None], order="F", dtype="uint8"))[0]
+            for mask in instances.pred_masks
+        ]
+        for rle in rles:
+            # "counts" is an array encoded by mask_util as a byte-stream. Python3's
+            # json writer which always produces strings cannot serialize a bytestream
+            # unless you decode it. Thankfully, utf-8 works out (which is also what
+            # the pycocotools/_mask.pyx does).
+            rle["counts"] = rle["counts"].decode("utf-8")
+    has_keypoints = instances.has("pred_keypoints")
+    if has_keypoints:
+        keypoints = instances.pred_keypoints
+    results = []
+    for k in range(num_instance):
+        result = {
+            "image_id": img_id,
+            "category_id": classes[k],
+            "bbox": boxes[k],
+            "score": scores[k],
+        }
+        if has_mask:
+            result["segmentation"] = rles[k]
+        if has_keypoints:
+            # In COCO annotations,
+            # keypoints coordinates are pixel indices.
+            # However our predictions are floating point coordinates.
+            # Therefore we subtract 0.5 to be consistent with the annotation format.
+            # This is the inverse of data loading logic in `datasets/coco.py`.
+            keypoints[k][:, :2] -= 0.5
+            result["keypoints"] = keypoints[k].flatten().tolist()
+        results.append(result)
+    return results
+# inspired from Detectron:
+# https://github.com/facebookresearch/Detectron/blob/a6a835f5b8208c45d0dce217ce9bbda915f44df7/detectron/datasets/json_dataset_evaluator.py#L255 # noqa
+def _evaluate_box_proposals(dataset_predictions, coco_api, thresholds=None, area="all", limit=None):
+    """
+    Evaluate detection proposal recall metrics. This function is a much
+    faster alternative to the official COCO API recall evaluation code. However,
+    it produces slightly different results.
+    """
+    # Record max overlap value for each gt box
+    # Return vector of overlap values
+    areas = {
+        "all": 0,
+        "small": 1,
+        "medium": 2,
+        "large": 3,
+        "96-128": 4,
+        "128-256": 5,
+        "256-512": 6,
+        "512-inf": 7,
+    }
+    area_ranges = [
+        [0 ** 2, 1e5 ** 2],  # all
+        [0 ** 2, 32 ** 2],  # small
+        [32 ** 2, 96 ** 2],  # medium
+        [96 ** 2, 1e5 ** 2],  # large
+        [96 ** 2, 128 ** 2],  # 96-128
+        [128 ** 2, 256 ** 2],  # 128-256
+        [256 ** 2, 512 ** 2],  # 256-512
+        [512 ** 2, 1e5 ** 2],
+    ]  # 512-inf
+    assert area in areas, "Unknown area range: {}".format(area)
+    area_range = area_ranges[areas[area]]
+    gt_overlaps = []
+    num_pos = 0
+    for prediction_dict in dataset_predictions:
+        predictions = prediction_dict["proposals"]
+        # sort predictions in descending order
+        # TODO maybe remove this and make it explicit in the documentation
+        inds = predictions.objectness_logits.sort(descending=True)[1]
+        predictions = predictions[inds]
+        ann_ids = coco_api.getAnnIds(imgIds=prediction_dict["image_id"])
+        anno = coco_api.loadAnns(ann_ids)
+        gt_boxes = [
+            BoxMode.convert(obj["bbox"], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS)
+            for obj in anno
+            if obj["iscrowd"] == 0
+        ]
+        gt_boxes = torch.as_tensor(gt_boxes).reshape(-1, 4)  # guard against no boxes
+        gt_boxes = Boxes(gt_boxes)
+        gt_areas = torch.as_tensor([obj["area"] for obj in anno if obj["iscrowd"] == 0])
+        if len(gt_boxes) == 0 or len(predictions) == 0:
+            continue
+        valid_gt_inds = (gt_areas >= area_range[0]) & (gt_areas <= area_range[1])
+        gt_boxes = gt_boxes[valid_gt_inds]
+        num_pos += len(gt_boxes)
+        if len(gt_boxes) == 0:
+            continue
+        if limit is not None and len(predictions) > limit:
+            predictions = predictions[:limit]
+        overlaps = pairwise_iou(predictions.proposal_boxes, gt_boxes)
+        _gt_overlaps = torch.zeros(len(gt_boxes))
+        for j in range(min(len(predictions), len(gt_boxes))):
+            # find which proposal box maximally covers each gt box
+            # and get the iou amount of coverage for each gt box
+            max_overlaps, argmax_overlaps = overlaps.max(dim=0)
+            # find which gt box is 'best' covered (i.e. 'best' = most iou)
+            gt_ovr, gt_ind = max_overlaps.max(dim=0)
+            assert gt_ovr >= 0
+            # find the proposal box that covers the best covered gt box
+            box_ind = argmax_overlaps[gt_ind]
+            # record the iou coverage of this gt box
+            _gt_overlaps[j] = overlaps[box_ind, gt_ind]
+            assert _gt_overlaps[j] == gt_ovr
+            # mark the proposal box and the gt box as used
+            overlaps[box_ind, :] = -1
+            overlaps[:, gt_ind] = -1
+        # append recorded iou coverage level
+        gt_overlaps.append(_gt_overlaps)
+    gt_overlaps = (
+        torch.cat(gt_overlaps, dim=0) if len(gt_overlaps) else torch.zeros(0, dtype=torch.float32)
+    )
+    gt_overlaps, _ = torch.sort(gt_overlaps)
+    if thresholds is None:
+        step = 0.05
+        thresholds = torch.arange(0.5, 0.95 + 1e-5, step, dtype=torch.float32)
+    recalls = torch.zeros_like(thresholds)
+    # compute recall for each iou threshold
+    for i, t in enumerate(thresholds):
+        recalls[i] = (gt_overlaps >= t).float().sum() / float(num_pos)
+    # ar = 2 * np.trapz(recalls, thresholds)
+    ar = recalls.mean()
+    return {
+        "ar": ar,
+        "recalls": recalls,
+        "thresholds": thresholds,
+        "gt_overlaps": gt_overlaps,
+        "num_pos": num_pos,
+    }
+def _evaluate_predictions_on_coco(
+    coco_gt,
+    coco_results,
+    iou_type,
+    kpt_oks_sigmas=None,
+    use_fast_impl=True,
+    img_ids=None,
+    max_dets_per_image=None,
+):
+    """
+    Evaluate the coco results using COCOEval API.
+    """
+    assert len(coco_results) > 0
+    if iou_type == "segm":
+        coco_results = copy.deepcopy(coco_results)
+        # When evaluating mask AP, if the results contain bbox, cocoapi will
+        # use the box area as the area of the instance, instead of the mask area.
+        # This leads to a different definition of small/medium/large.
+        # We remove the bbox field to let mask AP use mask area.
+        for c in coco_results:
+            c.pop("bbox", None)
+    coco_dt = coco_gt.loadRes(coco_results)
+    coco_eval = (COCOeval_opt if use_fast_impl else COCOeval)(coco_gt, coco_dt, iou_type)
+    # For COCO, the default max_dets_per_image is [1, 10, 100].
+    if max_dets_per_image is None:
+        max_dets_per_image = [1, 10, 100]  # Default from COCOEval
+    else:
+        assert (
+            len(max_dets_per_image) >= 3
+        ), "COCOeval requires maxDets (and max_dets_per_image) to have length at least 3"
+        # In the case that user supplies a custom input for max_dets_per_image,
+        # apply COCOevalMaxDets to evaluate AP with the custom input.
+        if max_dets_per_image[2] != 100:
+            coco_eval = COCOevalMaxDets(coco_gt, coco_dt, iou_type)
+    if iou_type != "keypoints":
+        coco_eval.params.maxDets = max_dets_per_image
+    if img_ids is not None:
+        coco_eval.params.imgIds = img_ids
+    if iou_type == "keypoints":
+        # Use the COCO default keypoint OKS sigmas unless overrides are specified
+        if kpt_oks_sigmas:
+            assert hasattr(coco_eval.params, "kpt_oks_sigmas"), "pycocotools is too old!"
+            coco_eval.params.kpt_oks_sigmas = np.array(kpt_oks_sigmas)
+        # COCOAPI requires every detection and every gt to have keypoints, so
+        # we just take the first entry from both
+        num_keypoints_dt = len(coco_results[0]["keypoints"]) // 3
+        num_keypoints_gt = len(next(iter(coco_gt.anns.values()))["keypoints"]) // 3
+        num_keypoints_oks = len(coco_eval.params.kpt_oks_sigmas)
+        assert num_keypoints_oks == num_keypoints_dt == num_keypoints_gt, (
+            f"[COCOEvaluator] Prediction contain {num_keypoints_dt} keypoints. "
+            f"Ground truth contains {num_keypoints_gt} keypoints. "
+            f"The length of cfg.TEST.KEYPOINT_OKS_SIGMAS is {num_keypoints_oks}. "
+            "They have to agree with each other. For meaning of OKS, please refer to "
+            "http://cocodataset.org/#keypoints-eval."
+        )
+    coco_eval.evaluate()
+    coco_eval.accumulate()
+    coco_eval.summarize()
+    return coco_eval
+class COCOevalMaxDets(COCOeval):
+    """
+    Modified version of COCOeval for evaluating AP with a custom
+    maxDets (by default for COCO, maxDets is 100)
+    """
+    def summarize(self):
+        """
+        Compute and display summary metrics for evaluation results given
+        a custom value for  max_dets_per_image
+        """
+        def _summarize(ap=1, iouThr=None, areaRng="all", maxDets=100):
+            p = self.params
+            iStr = " {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}"
+            titleStr = "Average Precision" if ap == 1 else "Average Recall"
+            typeStr = "(AP)" if ap == 1 else "(AR)"
+            iouStr = (
+                "{:0.2f}:{:0.2f}".format(p.iouThrs[0], p.iouThrs[-1])
+                if iouThr is None
+                else "{:0.2f}".format(iouThr)
+            )
+            aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng]
+            mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets]
+            if ap == 1:
+                # dimension of precision: [TxRxKxAxM]
+                s = self.eval["precision"]
+                # IoU
+                if iouThr is not None:
+                    t = np.where(iouThr == p.iouThrs)[0]
+                    s = s[t]
+                s = s[:, :, :, aind, mind]
+            else:
+                # dimension of recall: [TxKxAxM]
+                s = self.eval["recall"]
+                if iouThr is not None:
+                    t = np.where(iouThr == p.iouThrs)[0]
+                    s = s[t]
+                s = s[:, :, aind, mind]
+            if len(s[s > -1]) == 0:
+                mean_s = -1
+            else:
+                mean_s = np.mean(s[s > -1])
+            print(iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s))
+            return mean_s
+        def _summarizeDets():
+            stats = np.zeros((12,))
+            # Evaluate AP using the custom limit on maximum detections per image
+            stats[0] = _summarize(1, maxDets=self.params.maxDets[2])
+            stats[1] = _summarize(1, iouThr=0.5, maxDets=self.params.maxDets[2])
+            stats[2] = _summarize(1, iouThr=0.75, maxDets=self.params.maxDets[2])
+            stats[3] = _summarize(1, areaRng="small", maxDets=self.params.maxDets[2])
+            stats[4] = _summarize(1, areaRng="medium", maxDets=self.params.maxDets[2])
+            stats[5] = _summarize(1, areaRng="large", maxDets=self.params.maxDets[2])
+            stats[6] = _summarize(0, maxDets=self.params.maxDets[0])
+            stats[7] = _summarize(0, maxDets=self.params.maxDets[1])
+            stats[8] = _summarize(0, maxDets=self.params.maxDets[2])
+            stats[9] = _summarize(0, areaRng="small", maxDets=self.params.maxDets[2])
+            stats[10] = _summarize(0, areaRng="medium", maxDets=self.params.maxDets[2])
+            stats[11] = _summarize(0, areaRng="large", maxDets=self.params.maxDets[2])
+            return stats
+        def _summarizeKps():
+            stats = np.zeros((10,))
+            stats[0] = _summarize(1, maxDets=20)
+            stats[1] = _summarize(1, maxDets=20, iouThr=0.5)
+            stats[2] = _summarize(1, maxDets=20, iouThr=0.75)
+            stats[3] = _summarize(1, maxDets=20, areaRng="medium")
+            stats[4] = _summarize(1, maxDets=20, areaRng="large")
+            stats[5] = _summarize(0, maxDets=20)
+            stats[6] = _summarize(0, maxDets=20, iouThr=0.5)
+            stats[7] = _summarize(0, maxDets=20, iouThr=0.75)
+            stats[8] = _summarize(0, maxDets=20, areaRng="medium")
+            stats[9] = _summarize(0, maxDets=20, areaRng="large")
+            return stats
+        if not self.eval:
+            raise Exception("Please run accumulate() first")
+        iouType = self.params.iouType
+        if iouType == "segm" or iouType == "bbox":
+            summarize = _summarizeDets
+        elif iouType == "keypoints":
+            summarize = _summarizeKps
+        self.stats = summarize()
+    def __str__(self):
+        self.summarize()
--- a/detectron2/evaluation/evaluator.py
+++ b/detectron2/evaluation/evaluator.py
+# Copyright (c) Facebook, Inc. and its affiliates.
+import datetime
+import logging
+import time
+from collections import OrderedDict, abc
+from contextlib import ExitStack, contextmanager
+from typing import List, Union
+import torch
+from torch import nn
+from detectron2.utils.comm import get_world_size, is_main_process
+from detectron2.utils.logger import log_every_n_seconds
+class DatasetEvaluator:
+    """
+    Base class for a dataset evaluator.
+    The function :func:`inference_on_dataset` runs the model over
+    all samples in the dataset, and have a DatasetEvaluator to process the inputs/outputs.
+    This class will accumulate information of the inputs/outputs (by :meth:`process`),
+    and produce evaluation results in the end (by :meth:`evaluate`).
+    """
+    def reset(self):
+        """
+        Preparation for a new round of evaluation.
+        Should be called before starting a round of evaluation.
+        """
+        pass
+    def process(self, inputs, outputs):
+        """
+        Process the pair of inputs and outputs.
+        If they contain batches, the pairs can be consumed one-by-one using `zip`:
+        .. code-block:: python
+            for input_, output in zip(inputs, outputs):
+                # do evaluation on single input/output pair
+                ...
+        Args:
+            inputs (list): the inputs that's used to call the model.
+            outputs (list): the return value of `model(inputs)`
+        """
+        pass
+    def evaluate(self):
+        """
+        Evaluate/summarize the performance, after processing all input/output pairs.
+        Returns:
+            dict:
+                A new evaluator class can return a dict of arbitrary format
+                as long as the user can process the results.
+                In our train_net.py, we expect the following format:
+                * key: the name of the task (e.g., bbox)
+                * value: a dict of {metric name: score}, e.g.: {"AP50": 80}
+        """
+        pass
+class DatasetEvaluators(DatasetEvaluator):
+    """
+    Wrapper class to combine multiple :class:`DatasetEvaluator` instances.
+    This class dispatches every evaluation call to
+    all of its :class:`DatasetEvaluator`.
+    """
+    def __init__(self, evaluators):
+        """
+        Args:
+            evaluators (list): the evaluators to combine.
+        """
+        super().__init__()
+        self._evaluators = evaluators
+    def reset(self):
+        for evaluator in self._evaluators:
+            evaluator.reset()
+    def process(self, inputs, outputs):
+        for evaluator in self._evaluators:
+            evaluator.process(inputs, outputs)
+    def evaluate(self):
+        results = OrderedDict()
+        for evaluator in self._evaluators:
+            result = evaluator.evaluate()
+            if is_main_process() and result is not None:
+                for k, v in result.items():
+                    assert (
+                        k not in results
+                    ), "Different evaluators produce results with the same key {}".format(k)
+                    results[k] = v
+        return results
+def inference_on_dataset(
+    model, data_loader, evaluator: Union[DatasetEvaluator, List[DatasetEvaluator], None]
+):
+    """
+    Run model on the data_loader and evaluate the metrics with evaluator.
+    Also benchmark the inference speed of `model.__call__` accurately.
+    The model will be used in eval mode.
+    Args:
+        model (callable): a callable which takes an object from
+            `data_loader` and returns some outputs.
+            If it's an nn.Module, it will be temporarily set to `eval` mode.
+            If you wish to evaluate a model in `training` mode instead, you can
+            wrap the given model and override its behavior of `.eval()` and `.train()`.
+        data_loader: an iterable object with a length.
+            The elements it generates will be the inputs to the model.
+        evaluator: the evaluator(s) to run. Use `None` if you only want to benchmark,
+            but don't want to do any evaluation.
+    Returns:
+        The return value of `evaluator.evaluate()`
+    """
+    num_devices = get_world_size()
+    logger = logging.getLogger(__name__)
+    logger.info("Start inference on {} batches".format(len(data_loader)))
+    total = len(data_loader)  # inference data loader must have a fixed length
+    if evaluator is None:
+        # create a no-op evaluator
+        evaluator = DatasetEvaluators([])
+    if isinstance(evaluator, abc.MutableSequence):
+        evaluator = DatasetEvaluators(evaluator)
+    evaluator.reset()
+    num_warmup = min(5, total - 1)
+    start_time = time.perf_counter()
+    total_data_time = 0
+    total_compute_time = 0
+    total_eval_time = 0
+    with ExitStack() as stack:
+        if isinstance(model, nn.Module):
+            stack.enter_context(inference_context(model))
+        stack.enter_context(torch.no_grad())
+        start_data_time = time.perf_counter()
+        for idx, inputs in enumerate(data_loader):
+            total_data_time += time.perf_counter() - start_data_time
+            if idx == num_warmup:
+                start_time = time.perf_counter()
+                total_data_time = 0
+                total_compute_time = 0
+                total_eval_time = 0
+            start_compute_time = time.perf_counter()
+            outputs = model(inputs)
+            if torch.cuda.is_available():
+                torch.cuda.synchronize()
+            total_compute_time += time.perf_counter() - start_compute_time
+            start_eval_time = time.perf_counter()
+            evaluator.process(inputs, outputs)
+            total_eval_time += time.perf_counter() - start_eval_time
+            iters_after_start = idx + 1 - num_warmup * int(idx >= num_warmup)
+            data_seconds_per_iter = total_data_time / iters_after_start
+            compute_seconds_per_iter = total_compute_time / iters_after_start
+            eval_seconds_per_iter = total_eval_time / iters_after_start
+            total_seconds_per_iter = (time.perf_counter() - start_time) / iters_after_start
+            if idx >= num_warmup * 2 or compute_seconds_per_iter > 5:
+                eta = datetime.timedelta(seconds=int(total_seconds_per_iter * (total - idx - 1)))
+                log_every_n_seconds(
+                    logging.INFO,
+                    (
+                        f"Inference done {idx + 1}/{total}. "
+                        f"Dataloading: {data_seconds_per_iter:.4f} s/iter. "
+                        f"Inference: {compute_seconds_per_iter:.4f} s/iter. "
+                        f"Eval: {eval_seconds_per_iter:.4f} s/iter. "
+                        f"Total: {total_seconds_per_iter:.4f} s/iter. "
+                        f"ETA={eta}"
+                    ),
+                    n=5,
+                )
+            start_data_time = time.perf_counter()
+    # Measure the time only for this worker (before the synchronization barrier)
+    total_time = time.perf_counter() - start_time
+    total_time_str = str(datetime.timedelta(seconds=total_time))
+    # NOTE this format is parsed by grep
+    logger.info(
+        "Total inference time: {} ({:.6f} s / iter per device, on {} devices)".format(
+            total_time_str, total_time / (total - num_warmup), num_devices
+        )
+    )
+    total_compute_time_str = str(datetime.timedelta(seconds=int(total_compute_time)))
+    logger.info(
+        "Total inference pure compute time: {} ({:.6f} s / iter per device, on {} devices)".format(
+            total_compute_time_str, total_compute_time / (total - num_warmup), num_devices
+        )
+    )
+    results = evaluator.evaluate()
+    # An evaluator may return None when not in main process.
+    # Replace it by an empty dict instead to make it easier for downstream code to handle
+    if results is None:
+        results = {}
+    return results
+@contextmanager
+def inference_context(model):
+    """
+    A context where the model is temporarily changed to eval mode,
+    and restored to previous mode afterwards.
+    Args:
+        model: a torch Module
+    """
+    training_mode = model.training
+    model.eval()
+    yield
+    model.train(training_mode)
--- a/detectron2/evaluation/fast_eval_api.py
+++ b/detectron2/evaluation/fast_eval_api.py
+# Copyright (c) Facebook, Inc. and its affiliates.
+import copy
+import logging
+import numpy as np
+import time
+from pycocotools.cocoeval import COCOeval
+from detectron2 import _C
+logger = logging.getLogger(__name__)
+class COCOeval_opt(COCOeval):
+    """
+    This is a slightly modified version of the original COCO API, where the functions evaluateImg()
+    and accumulate() are implemented in C++ to speedup evaluation
+    """
+    def evaluate(self):
+        """
+        Run per image evaluation on given images and store results in self.evalImgs_cpp, a
+        datastructure that isn't readable from Python but is used by a c++ implementation of
+        accumulate().  Unlike the original COCO PythonAPI, we don't populate the datastructure
+        self.evalImgs because this datastructure is a computational bottleneck.
+        :return: None
+        """
+        tic = time.time()
+        p = self.params
+        # add backward compatibility if useSegm is specified in params
+        if p.useSegm is not None:
+            p.iouType = "segm" if p.useSegm == 1 else "bbox"
+        logger.info("Evaluate annotation type *{}*".format(p.iouType))
+        p.imgIds = list(np.unique(p.imgIds))
+        if p.useCats:
+            p.catIds = list(np.unique(p.catIds))
+        p.maxDets = sorted(p.maxDets)
+        self.params = p
+        self._prepare()  # bottleneck
+        # loop through images, area range, max detection number
+        catIds = p.catIds if p.useCats else [-1]
+        if p.iouType == "segm" or p.iouType == "bbox":
+            computeIoU = self.computeIoU
+        elif p.iouType == "keypoints":
+            computeIoU = self.computeOks
+        self.ious = {
+            (imgId, catId): computeIoU(imgId, catId) for imgId in p.imgIds for catId in catIds
+        }  # bottleneck
+        maxDet = p.maxDets[-1]
+        # <<<< Beginning of code differences with original COCO API
+        def convert_instances_to_cpp(instances, is_det=False):
+            # Convert annotations for a list of instances in an image to a format that's fast
+            # to access in C++
+            instances_cpp = []
+            for instance in instances:
+                instance_cpp = _C.InstanceAnnotation(
+                    int(instance["id"]),
+                    instance["score"] if is_det else instance.get("score", 0.0),
+                    instance["area"],
+                    bool(instance.get("iscrowd", 0)),
+                    bool(instance.get("ignore", 0)),
+                )
+                instances_cpp.append(instance_cpp)
+            return instances_cpp
+        # Convert GT annotations, detections, and IOUs to a format that's fast to access in C++
+        ground_truth_instances = [
+            [convert_instances_to_cpp(self._gts[imgId, catId]) for catId in p.catIds]
+            for imgId in p.imgIds
+        ]
+        detected_instances = [
+            [convert_instances_to_cpp(self._dts[imgId, catId], is_det=True) for catId in p.catIds]
+            for imgId in p.imgIds
+        ]
+        ious = [[self.ious[imgId, catId] for catId in catIds] for imgId in p.imgIds]
+        if not p.useCats:
+            # For each image, flatten per-category lists into a single list
+            ground_truth_instances = [[[o for c in i for o in c]] for i in ground_truth_instances]
+            detected_instances = [[[o for c in i for o in c]] for i in detected_instances]
+        # Call C++ implementation of self.evaluateImgs()
+        self._evalImgs_cpp = _C.COCOevalEvaluateImages(
+            p.areaRng, maxDet, p.iouThrs, ious, ground_truth_instances, detected_instances
+        )
+        self._evalImgs = None
+        self._paramsEval = copy.deepcopy(self.params)
+        toc = time.time()
+        logger.info("COCOeval_opt.evaluate() finished in {:0.2f} seconds.".format(toc - tic))
+        # >>>> End of code differences with original COCO API
+    def accumulate(self):
+        """
+        Accumulate per image evaluation results and store the result in self.eval.  Does not
+        support changing parameter settings from those used by self.evaluate()
+        """
+        logger.info("Accumulating evaluation results...")
+        tic = time.time()
+        assert hasattr(
+            self, "_evalImgs_cpp"
+        ), "evaluate() must be called before accmulate() is called."
+        self.eval = _C.COCOevalAccumulate(self._paramsEval, self._evalImgs_cpp)
+        # recall is num_iou_thresholds X num_categories X num_area_ranges X num_max_detections
+        self.eval["recall"] = np.array(self.eval["recall"]).reshape(
+            self.eval["counts"][:1] + self.eval["counts"][2:]
+        )
+        # precision and scores are num_iou_thresholds X num_recall_thresholds X num_categories X
+        # num_area_ranges X num_max_detections
+        self.eval["precision"] = np.array(self.eval["precision"]).reshape(self.eval["counts"])
+        self.eval["scores"] = np.array(self.eval["scores"]).reshape(self.eval["counts"])
+        toc = time.time()
+        logger.info("COCOeval_opt.accumulate() finished in {:0.2f} seconds.".format(toc - tic))
--- a/detectron2/evaluation/lvis_evaluation.py
+++ b/detectron2/evaluation/lvis_evaluation.py
+# Copyright (c) Facebook, Inc. and its affiliates.
+import copy
+import itertools
+import json
+import logging
+import os
+import pickle
+from collections import OrderedDict
+import torch
+import detectron2.utils.comm as comm
+from detectron2.config import CfgNode
+from detectron2.data import MetadataCatalog
+from detectron2.structures import Boxes, BoxMode, pairwise_iou
+from detectron2.utils.file_io import PathManager
+from detectron2.utils.logger import create_small_table
+from .coco_evaluation import instances_to_coco_json
+from .evaluator import DatasetEvaluator
+class LVISEvaluator(DatasetEvaluator):
+    """
+    Evaluate object proposal and instance detection/segmentation outputs using
+    LVIS's metrics and evaluation API.
+    """
+    def __init__(
+        self,
+        dataset_name,
+        tasks=None,
+        distributed=True,
+        output_dir=None,
+        *,
+        max_dets_per_image=None,
+    ):
+        """
+        Args:
+            dataset_name (str): name of the dataset to be evaluated.
+                It must have the following corresponding metadata:
+                "json_file": the path to the LVIS format annotation
+            tasks (tuple[str]): tasks that can be evaluated under the given
+                configuration. A task is one of "bbox", "segm".
+                By default, will infer this automatically from predictions.
+            distributed (True): if True, will collect results from all ranks for evaluation.
+                Otherwise, will evaluate the results in the current process.
+            output_dir (str): optional, an output directory to dump results.
+            max_dets_per_image (None or int): limit on maximum detections per image in evaluating AP
+                This limit, by default of the LVIS dataset, is 300.
+        """
+        from lvis import LVIS
+        self._logger = logging.getLogger(__name__)
+        if tasks is not None and isinstance(tasks, CfgNode):
+            self._logger.warn(
+                "COCO Evaluator instantiated using config, this is deprecated behavior."
+                " Please pass in explicit arguments instead."
+            )
+            self._tasks = None  # Infering it from predictions should be better
+        else:
+            self._tasks = tasks
+        self._distributed = distributed
+        self._output_dir = output_dir
+        self._max_dets_per_image = max_dets_per_image
+        self._cpu_device = torch.device("cpu")
+        self._metadata = MetadataCatalog.get(dataset_name)
+        json_file = PathManager.get_local_path(self._metadata.json_file)
+        self._lvis_api = LVIS(json_file)
+        # Test set json files do not contain annotations (evaluation must be
+        # performed using the LVIS evaluation server).
+        self._do_evaluation = len(self._lvis_api.get_ann_ids()) > 0
+    def reset(self):
+        self._predictions = []
+    def process(self, inputs, outputs):
+        """
+        Args:
+            inputs: the inputs to a LVIS model (e.g., GeneralizedRCNN).
+                It is a list of dict. Each dict corresponds to an image and
+                contains keys like "height", "width", "file_name", "image_id".
+            outputs: the outputs of a LVIS model. It is a list of dicts with key
+                "instances" that contains :class:`Instances`.
+        """
+        for input, output in zip(inputs, outputs):
+            prediction = {"image_id": input["image_id"]}
+            if "instances" in output:
+                instances = output["instances"].to(self._cpu_device)
+                prediction["instances"] = instances_to_coco_json(instances, input["image_id"])
+            if "proposals" in output:
+                prediction["proposals"] = output["proposals"].to(self._cpu_device)
+            self._predictions.append(prediction)
+    def evaluate(self):
+        if self._distributed:
+            comm.synchronize()
+            predictions = comm.gather(self._predictions, dst=0)
+            predictions = list(itertools.chain(*predictions))
+            if not comm.is_main_process():
+                return
+        else:
+            predictions = self._predictions
+        if len(predictions) == 0:
+            self._logger.warning("[LVISEvaluator] Did not receive valid predictions.")
+            return {}
+        if self._output_dir:
+            PathManager.mkdirs(self._output_dir)
+            file_path = os.path.join(self._output_dir, "instances_predictions.pth")
+            with PathManager.open(file_path, "wb") as f:
+                torch.save(predictions, f)
+        self._results = OrderedDict()
+        if "proposals" in predictions[0]:
+            self._eval_box_proposals(predictions)
+        if "instances" in predictions[0]:
+            self._eval_predictions(predictions)
+        # Copy so the caller can do whatever with results
+        return copy.deepcopy(self._results)
+    def _tasks_from_predictions(self, predictions):
+        for pred in predictions:
+            if "segmentation" in pred:
+                return ("bbox", "segm")
+        return ("bbox",)
+    def _eval_predictions(self, predictions):
+        """
+        Evaluate predictions. Fill self._results with the metrics of the tasks.
+        Args:
+            predictions (list[dict]): list of outputs from the model
+        """
+        self._logger.info("Preparing results in the LVIS format ...")
+        lvis_results = list(itertools.chain(*[x["instances"] for x in predictions]))
+        tasks = self._tasks or self._tasks_from_predictions(lvis_results)
+        # LVIS evaluator can be used to evaluate results for COCO dataset categories.
+        # In this case `_metadata` variable will have a field with COCO-specific category mapping.
+        if hasattr(self._metadata, "thing_dataset_id_to_contiguous_id"):
+            reverse_id_mapping = {
+                v: k for k, v in self._metadata.thing_dataset_id_to_contiguous_id.items()
+            }
+            for result in lvis_results:
+                result["category_id"] = reverse_id_mapping[result["category_id"]]
+        else:
+            # unmap the category ids for LVIS (from 0-indexed to 1-indexed)
+            for result in lvis_results:
+                result["category_id"] += 1
+        if self._output_dir:
+            file_path = os.path.join(self._output_dir, "lvis_instances_results.json")
+            self._logger.info("Saving results to {}".format(file_path))
+            with PathManager.open(file_path, "w") as f:
+                f.write(json.dumps(lvis_results))
+                f.flush()
+        if not self._do_evaluation:
+            self._logger.info("Annotations are not available for evaluation.")
+            return
+        self._logger.info("Evaluating predictions ...")
+        for task in sorted(tasks):
+            res = _evaluate_predictions_on_lvis(
+                self._lvis_api,
+                lvis_results,
+                task,
+                max_dets_per_image=self._max_dets_per_image,
+                class_names=self._metadata.get("thing_classes"),
+            )
+            self._results[task] = res
+    def _eval_box_proposals(self, predictions):
+        """
+        Evaluate the box proposals in predictions.
+        Fill self._results with the metrics for "box_proposals" task.
+        """
+        if self._output_dir:
+            # Saving generated box proposals to file.
+            # Predicted box_proposals are in XYXY_ABS mode.
+            bbox_mode = BoxMode.XYXY_ABS.value
+            ids, boxes, objectness_logits = [], [], []
+            for prediction in predictions:
+                ids.append(prediction["image_id"])
+                boxes.append(prediction["proposals"].proposal_boxes.tensor.numpy())
+                objectness_logits.append(prediction["proposals"].objectness_logits.numpy())
+            proposal_data = {
+                "boxes": boxes,
+                "objectness_logits": objectness_logits,
+                "ids": ids,
+                "bbox_mode": bbox_mode,
+            }
+            with PathManager.open(os.path.join(self._output_dir, "box_proposals.pkl"), "wb") as f:
+                pickle.dump(proposal_data, f)
+        if not self._do_evaluation:
+            self._logger.info("Annotations are not available for evaluation.")
+            return
+        self._logger.info("Evaluating bbox proposals ...")
+        res = {}
+        areas = {"all": "", "small": "s", "medium": "m", "large": "l"}
+        for limit in [100, 1000]:
+            for area, suffix in areas.items():
+                stats = _evaluate_box_proposals(predictions, self._lvis_api, area=area, limit=limit)
+                key = "AR{}@{:d}".format(suffix, limit)
+                res[key] = float(stats["ar"].item() * 100)
+        self._logger.info("Proposal metrics: \n" + create_small_table(res))
+        self._results["box_proposals"] = res
+# inspired from Detectron:
+# https://github.com/facebookresearch/Detectron/blob/a6a835f5b8208c45d0dce217ce9bbda915f44df7/detectron/datasets/json_dataset_evaluator.py#L255 # noqa
+def _evaluate_box_proposals(dataset_predictions, lvis_api, thresholds=None, area="all", limit=None):
+    """
+    Evaluate detection proposal recall metrics. This function is a much
+    faster alternative to the official LVIS API recall evaluation code. However,
+    it produces slightly different results.
+    """
+    # Record max overlap value for each gt box
+    # Return vector of overlap values
+    areas = {
+        "all": 0,
+        "small": 1,
+        "medium": 2,
+        "large": 3,
+        "96-128": 4,
+        "128-256": 5,
+        "256-512": 6,
+        "512-inf": 7,
+    }
+    area_ranges = [
+        [0 ** 2, 1e5 ** 2],  # all
+        [0 ** 2, 32 ** 2],  # small
+        [32 ** 2, 96 ** 2],  # medium
+        [96 ** 2, 1e5 ** 2],  # large
+        [96 ** 2, 128 ** 2],  # 96-128
+        [128 ** 2, 256 ** 2],  # 128-256
+        [256 ** 2, 512 ** 2],  # 256-512
+        [512 ** 2, 1e5 ** 2],
+    ]  # 512-inf
+    assert area in areas, "Unknown area range: {}".format(area)
+    area_range = area_ranges[areas[area]]
+    gt_overlaps = []
+    num_pos = 0
+    for prediction_dict in dataset_predictions:
+        predictions = prediction_dict["proposals"]
+        # sort predictions in descending order
+        # TODO maybe remove this and make it explicit in the documentation
+        inds = predictions.objectness_logits.sort(descending=True)[1]
+        predictions = predictions[inds]
+        ann_ids = lvis_api.get_ann_ids(img_ids=[prediction_dict["image_id"]])
+        anno = lvis_api.load_anns(ann_ids)
+        gt_boxes = [
+            BoxMode.convert(obj["bbox"], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) for obj in anno
+        ]
+        gt_boxes = torch.as_tensor(gt_boxes).reshape(-1, 4)  # guard against no boxes
+        gt_boxes = Boxes(gt_boxes)
+        gt_areas = torch.as_tensor([obj["area"] for obj in anno])
+        if len(gt_boxes) == 0 or len(predictions) == 0:
+            continue
+        valid_gt_inds = (gt_areas >= area_range[0]) & (gt_areas <= area_range[1])
+        gt_boxes = gt_boxes[valid_gt_inds]
+        num_pos += len(gt_boxes)
+        if len(gt_boxes) == 0:
+            continue
+        if limit is not None and len(predictions) > limit:
+            predictions = predictions[:limit]
+        overlaps = pairwise_iou(predictions.proposal_boxes, gt_boxes)
+        _gt_overlaps = torch.zeros(len(gt_boxes))
+        for j in range(min(len(predictions), len(gt_boxes))):
+            # find which proposal box maximally covers each gt box
+            # and get the iou amount of coverage for each gt box
+            max_overlaps, argmax_overlaps = overlaps.max(dim=0)
+            # find which gt box is 'best' covered (i.e. 'best' = most iou)
+            gt_ovr, gt_ind = max_overlaps.max(dim=0)
+            assert gt_ovr >= 0
+            # find the proposal box that covers the best covered gt box
+            box_ind = argmax_overlaps[gt_ind]
+            # record the iou coverage of this gt box
+            _gt_overlaps[j] = overlaps[box_ind, gt_ind]
+            assert _gt_overlaps[j] == gt_ovr
+            # mark the proposal box and the gt box as used
+            overlaps[box_ind, :] = -1
+            overlaps[:, gt_ind] = -1
+        # append recorded iou coverage level
+        gt_overlaps.append(_gt_overlaps)
+    gt_overlaps = (
+        torch.cat(gt_overlaps, dim=0) if len(gt_overlaps) else torch.zeros(0, dtype=torch.float32)
+    )
+    gt_overlaps, _ = torch.sort(gt_overlaps)
+    if thresholds is None:
+        step = 0.05
+        thresholds = torch.arange(0.5, 0.95 + 1e-5, step, dtype=torch.float32)
+    recalls = torch.zeros_like(thresholds)
+    # compute recall for each iou threshold
+    for i, t in enumerate(thresholds):
+        recalls[i] = (gt_overlaps >= t).float().sum() / float(num_pos)
+    # ar = 2 * np.trapz(recalls, thresholds)
+    ar = recalls.mean()
+    return {
+        "ar": ar,
+        "recalls": recalls,
+        "thresholds": thresholds,
+        "gt_overlaps": gt_overlaps,
+        "num_pos": num_pos,
+    }
+def _evaluate_predictions_on_lvis(
+    lvis_gt, lvis_results, iou_type, max_dets_per_image=None, class_names=None
+):
+    """
+    Args:
+        iou_type (str):
+        max_dets_per_image (None or int): limit on maximum detections per image in evaluating AP
+            This limit, by default of the LVIS dataset, is 300.
+        class_names (None or list[str]): if provided, will use it to predict
+            per-category AP.
+    Returns:
+        a dict of {metric name: score}
+    """
+    metrics = {
+        "bbox": ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"],
+        "segm": ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"],
+    }[iou_type]
+    logger = logging.getLogger(__name__)
+    if len(lvis_results) == 0:  # TODO: check if needed
+        logger.warn("No predictions from the model!")
+        return {metric: float("nan") for metric in metrics}
+    if iou_type == "segm":
+        lvis_results = copy.deepcopy(lvis_results)
+        # When evaluating mask AP, if the results contain bbox, LVIS API will
+        # use the box area as the area of the instance, instead of the mask area.
+        # This leads to a different definition of small/medium/large.
+        # We remove the bbox field to let mask AP use mask area.
+        for c in lvis_results:
+            c.pop("bbox", None)
+    if max_dets_per_image is None:
+        max_dets_per_image = 300  # Default for LVIS dataset
+    from lvis import LVISEval, LVISResults
+    logger.info(f"Evaluating with max detections per image = {max_dets_per_image}")
+    lvis_results = LVISResults(lvis_gt, lvis_results, max_dets=max_dets_per_image)
+    lvis_eval = LVISEval(lvis_gt, lvis_results, iou_type)
+    lvis_eval.run()
+    lvis_eval.print_results()
+    # Pull the standard metrics from the LVIS results
+    results = lvis_eval.get_results()
+    results = {metric: float(results[metric] * 100) for metric in metrics}
+    logger.info("Evaluation results for {}: \n".format(iou_type) + create_small_table(results))
+    return results
--- a/detectron2/evaluation/panoptic_evaluation.py
+++ b/detectron2/evaluation/panoptic_evaluation.py
+# Copyright (c) Facebook, Inc. and its affiliates.
+import contextlib
+import io
+import itertools
+import json
+import logging
+import numpy as np
+import os
+import tempfile
+from collections import OrderedDict
+from typing import Optional
+from PIL import Image
+from tabulate import tabulate
+from detectron2.data import MetadataCatalog
+from detectron2.utils import comm
+from detectron2.utils.file_io import PathManager
+from .evaluator import DatasetEvaluator
+logger = logging.getLogger(__name__)
+class COCOPanopticEvaluator(DatasetEvaluator):
+    """
+    Evaluate Panoptic Quality metrics on COCO using PanopticAPI.
+    It saves panoptic segmentation prediction in `output_dir`
+    It contains a synchronize call and has to be called from all workers.
+    """
+    def __init__(self, dataset_name: str, output_dir: Optional[str] = None):
+        """
+        Args:
+            dataset_name: name of the dataset
+            output_dir: output directory to save results for evaluation.
+        """
+        self._metadata = MetadataCatalog.get(dataset_name)
+        self._thing_contiguous_id_to_dataset_id = {
+            v: k for k, v in self._metadata.thing_dataset_id_to_contiguous_id.items()
+        }
+        self._stuff_contiguous_id_to_dataset_id = {
+            v: k for k, v in self._metadata.stuff_dataset_id_to_contiguous_id.items()
+        }
+        self._output_dir = output_dir
+        if self._output_dir is not None:
+            PathManager.mkdirs(self._output_dir)
+    def reset(self):
+        self._predictions = []
+    def _convert_category_id(self, segment_info):
+        isthing = segment_info.pop("isthing", None)
+        if isthing is None:
+            # the model produces panoptic category id directly. No more conversion needed
+            return segment_info
+        if isthing is True:
+            segment_info["category_id"] = self._thing_contiguous_id_to_dataset_id[
+                segment_info["category_id"]
+            ]
+        else:
+            segment_info["category_id"] = self._stuff_contiguous_id_to_dataset_id[
+                segment_info["category_id"]
+            ]
+        return segment_info
+    def process(self, inputs, outputs):
+        from panopticapi.utils import id2rgb
+        for input, output in zip(inputs, outputs):
+            panoptic_img, segments_info = output["panoptic_seg"]
+            panoptic_img = panoptic_img.cpu().numpy()
+            if segments_info is None:
+                # If "segments_info" is None, we assume "panoptic_img" is a
+                # H*W int32 image storing the panoptic_id in the format of
+                # category_id * label_divisor + instance_id. We reserve -1 for
+                # VOID label, and add 1 to panoptic_img since the official
+                # evaluation script uses 0 for VOID label.
+                label_divisor = self._metadata.label_divisor
+                segments_info = []
+                for panoptic_label in np.unique(panoptic_img):
+                    if panoptic_label == -1:
+                        # VOID region.
+                        continue
+                    pred_class = panoptic_label // label_divisor
+                    isthing = (
+                        pred_class in self._metadata.thing_dataset_id_to_contiguous_id.values()
+                    )
+                    segments_info.append(
+                        {
+                            "id": int(panoptic_label) + 1,
+                            "category_id": int(pred_class),
+                            "isthing": bool(isthing),
+                        }
+                    )
+                # Official evaluation script uses 0 for VOID label.
+                panoptic_img += 1
+            file_name = os.path.basename(input["file_name"])
+            file_name_png = os.path.splitext(file_name)[0] + ".png"
+            with io.BytesIO() as out:
+                Image.fromarray(id2rgb(panoptic_img)).save(out, format="PNG")
+                segments_info = [self._convert_category_id(x) for x in segments_info]
+                self._predictions.append(
+                    {
+                        "image_id": input["image_id"],
+                        "file_name": file_name_png,
+                        "png_string": out.getvalue(),
+                        "segments_info": segments_info,
+                    }
+                )
+    def evaluate(self):
+        comm.synchronize()
+        self._predictions = comm.gather(self._predictions)
+        self._predictions = list(itertools.chain(*self._predictions))
+        if not comm.is_main_process():
+            return
+        # PanopticApi requires local files
+        gt_json = PathManager.get_local_path(self._metadata.panoptic_json)
+        gt_folder = PathManager.get_local_path(self._metadata.panoptic_root)
+        with tempfile.TemporaryDirectory(prefix="panoptic_eval") as pred_dir:
+            logger.info("Writing all panoptic predictions to {} ...".format(pred_dir))
+            for p in self._predictions:
+                with open(os.path.join(pred_dir, p["file_name"]), "wb") as f:
+                    f.write(p.pop("png_string"))
+            with open(gt_json, "r") as f:
+                json_data = json.load(f)
+            json_data["annotations"] = self._predictions
+            output_dir = self._output_dir or pred_dir
+            predictions_json = os.path.join(output_dir, "predictions.json")
+            with PathManager.open(predictions_json, "w") as f:
+                f.write(json.dumps(json_data))
+            from panopticapi.evaluation import pq_compute
+            with contextlib.redirect_stdout(io.StringIO()):
+                pq_res = pq_compute(
+                    gt_json,
+                    PathManager.get_local_path(predictions_json),
+                    gt_folder=gt_folder,
+                    pred_folder=pred_dir,
+                )
+        res = {}
+        res["PQ"] = 100 * pq_res["All"]["pq"]
+        res["SQ"] = 100 * pq_res["All"]["sq"]
+        res["RQ"] = 100 * pq_res["All"]["rq"]
+        res["PQ_th"] = 100 * pq_res["Things"]["pq"]
+        res["SQ_th"] = 100 * pq_res["Things"]["sq"]
+        res["RQ_th"] = 100 * pq_res["Things"]["rq"]
+        res["PQ_st"] = 100 * pq_res["Stuff"]["pq"]
+        res["SQ_st"] = 100 * pq_res["Stuff"]["sq"]
+        res["RQ_st"] = 100 * pq_res["Stuff"]["rq"]
+        results = OrderedDict({"panoptic_seg": res})
+        _print_panoptic_results(pq_res)
+        return results
+def _print_panoptic_results(pq_res):
+    headers = ["", "PQ", "SQ", "RQ", "#categories"]
+    data = []
+    for name in ["All", "Things", "Stuff"]:
+        row = [name] + [pq_res[name][k] * 100 for k in ["pq", "sq", "rq"]] + [pq_res[name]["n"]]
+        data.append(row)
+    table = tabulate(
+        data, headers=headers, tablefmt="pipe", floatfmt=".3f", stralign="center", numalign="center"
+    )
+    logger.info("Panoptic Evaluation Results:\n" + table)
+if __name__ == "__main__":
+    from detectron2.utils.logger import setup_logger
+    logger = setup_logger()
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--gt-json")
+    parser.add_argument("--gt-dir")
+    parser.add_argument("--pred-json")
+    parser.add_argument("--pred-dir")
+    args = parser.parse_args()
+    from panopticapi.evaluation import pq_compute
+    with contextlib.redirect_stdout(io.StringIO()):
+        pq_res = pq_compute(
+            args.gt_json, args.pred_json, gt_folder=args.gt_dir, pred_folder=args.pred_dir
+        )
+        _print_panoptic_results(pq_res)
--- a/detectron2/evaluation/pascal_voc_evaluation.py
+++ b/detectron2/evaluation/pascal_voc_evaluation.py
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+import logging
+import numpy as np
+import os
+import tempfile
+import xml.etree.ElementTree as ET
+from collections import OrderedDict, defaultdict
+from functools import lru_cache
+import torch
+from detectron2.data import MetadataCatalog
+from detectron2.utils import comm
+from detectron2.utils.file_io import PathManager
+from .evaluator import DatasetEvaluator
+class PascalVOCDetectionEvaluator(DatasetEvaluator):
+    """
+    Evaluate Pascal VOC style AP for Pascal VOC dataset.
+    It contains a synchronization, therefore has to be called from all ranks.
+    Note that the concept of AP can be implemented in different ways and may not
+    produce identical results. This class mimics the implementation of the official
+    Pascal VOC Matlab API, and should produce similar but not identical results to the
+    official API.
+    """
+    def __init__(self, dataset_name):
+        """
+        Args:
+            dataset_name (str): name of the dataset, e.g., "voc_2007_test"
+        """
+        self._dataset_name = dataset_name
+        meta = MetadataCatalog.get(dataset_name)
+        # Too many tiny files, download all to local for speed.
+        annotation_dir_local = PathManager.get_local_path(
+            os.path.join(meta.dirname, "Annotations/")
+        )
+        self._anno_file_template = os.path.join(annotation_dir_local, "{}.xml")
+        self._image_set_path = os.path.join(meta.dirname, "ImageSets", "Main", meta.split + ".txt")
+        self._class_names = meta.thing_classes
+        assert meta.year in [2007, 2012], meta.year
+        self._is_2007 = meta.year == 2007
+        self._cpu_device = torch.device("cpu")
+        self._logger = logging.getLogger(__name__)
+    def reset(self):
+        self._predictions = defaultdict(list)  # class name -> list of prediction strings
+    def process(self, inputs, outputs):
+        for input, output in zip(inputs, outputs):
+            image_id = input["image_id"]
+            instances = output["instances"].to(self._cpu_device)
+            boxes = instances.pred_boxes.tensor.numpy()
+            scores = instances.scores.tolist()
+            classes = instances.pred_classes.tolist()
+            for box, score, cls in zip(boxes, scores, classes):
+                xmin, ymin, xmax, ymax = box
+                # The inverse of data loading logic in `datasets/pascal_voc.py`
+                xmin += 1
+                ymin += 1
+                self._predictions[cls].append(
+                    f"{image_id} {score:.3f} {xmin:.1f} {ymin:.1f} {xmax:.1f} {ymax:.1f}"
+                )
+    def evaluate(self):
+        """
+        Returns:
+            dict: has a key "segm", whose value is a dict of "AP", "AP50", and "AP75".
+        """
+        all_predictions = comm.gather(self._predictions, dst=0)
+        if not comm.is_main_process():
+            return
+        predictions = defaultdict(list)
+        for predictions_per_rank in all_predictions:
+            for clsid, lines in predictions_per_rank.items():
+                predictions[clsid].extend(lines)
+        del all_predictions
+        self._logger.info(
+            "Evaluating {} using {} metric. "
+            "Note that results do not use the official Matlab API.".format(
+                self._dataset_name, 2007 if self._is_2007 else 2012
+            )
+        )
+        with tempfile.TemporaryDirectory(prefix="pascal_voc_eval_") as dirname:
+            res_file_template = os.path.join(dirname, "{}.txt")
+            aps = defaultdict(list)  # iou -> ap per class
+            for cls_id, cls_name in enumerate(self._class_names):
+                lines = predictions.get(cls_id, [""])
+                with open(res_file_template.format(cls_name), "w") as f:
+                    f.write("\n".join(lines))
+                for thresh in range(50, 100, 5):
+                    rec, prec, ap = voc_eval(
+                        res_file_template,
+                        self._anno_file_template,
+                        self._image_set_path,
+                        cls_name,
+                        ovthresh=thresh / 100.0,
+                        use_07_metric=self._is_2007,
+                    )
+                    aps[thresh].append(ap * 100)
+        ret = OrderedDict()
+        mAP = {iou: np.mean(x) for iou, x in aps.items()}
+        ret["bbox"] = {"AP": np.mean(list(mAP.values())), "AP50": mAP[50], "AP75": mAP[75]}
+        return ret
+##############################################################################
+#
+# Below code is modified from
+# https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/voc_eval.py
+# --------------------------------------------------------
+# Fast/er R-CNN
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Bharath Hariharan
+# --------------------------------------------------------
+"""Python implementation of the PASCAL VOC devkit's AP evaluation code."""
+@lru_cache(maxsize=None)
+def parse_rec(filename):
+    """Parse a PASCAL VOC xml file."""
+    with PathManager.open(filename) as f:
+        tree = ET.parse(f)
+    objects = []
+    for obj in tree.findall("object"):
+        obj_struct = {}
+        obj_struct["name"] = obj.find("name").text
+        obj_struct["pose"] = obj.find("pose").text
+        obj_struct["truncated"] = int(obj.find("truncated").text)
+        obj_struct["difficult"] = int(obj.find("difficult").text)
+        bbox = obj.find("bndbox")
+        obj_struct["bbox"] = [
+            int(bbox.find("xmin").text),
+            int(bbox.find("ymin").text),
+            int(bbox.find("xmax").text),
+            int(bbox.find("ymax").text),
+        ]
+        objects.append(obj_struct)
+    return objects
+def voc_ap(rec, prec, use_07_metric=False):
+    """Compute VOC AP given precision and recall. If use_07_metric is true, uses
+    the VOC 07 11-point method (default:False).
+    """
+    if use_07_metric:
+        # 11 point metric
+        ap = 0.0
+        for t in np.arange(0.0, 1.1, 0.1):
+            if np.sum(rec >= t) == 0:
+                p = 0
+            else:
+                p = np.max(prec[rec >= t])
+            ap = ap + p / 11.0
+    else:
+        # correct AP calculation
+        # first append sentinel values at the end
+        mrec = np.concatenate(([0.0], rec, [1.0]))
+        mpre = np.concatenate(([0.0], prec, [0.0]))
+        # compute the precision envelope
+        for i in range(mpre.size - 1, 0, -1):
+            mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
+        # to calculate area under PR curve, look for points
+        # where X axis (recall) changes value
+        i = np.where(mrec[1:] != mrec[:-1])[0]
+        # and sum (\Delta recall) * prec
+        ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
+    return ap
+def voc_eval(detpath, annopath, imagesetfile, classname, ovthresh=0.5, use_07_metric=False):
+    """rec, prec, ap = voc_eval(detpath,
+                                annopath,
+                                imagesetfile,
+                                classname,
+                                [ovthresh],
+                                [use_07_metric])
+    Top level function that does the PASCAL VOC evaluation.
+    detpath: Path to detections
+        detpath.format(classname) should produce the detection results file.
+    annopath: Path to annotations
+        annopath.format(imagename) should be the xml annotations file.
+    imagesetfile: Text file containing the list of images, one image per line.
+    classname: Category name (duh)
+    [ovthresh]: Overlap threshold (default = 0.5)
+    [use_07_metric]: Whether to use VOC07's 11 point AP computation
+        (default False)
+    """
+    # assumes detections are in detpath.format(classname)
+    # assumes annotations are in annopath.format(imagename)
+    # assumes imagesetfile is a text file with each line an image name
+    # first load gt
+    # read list of images
+    with PathManager.open(imagesetfile, "r") as f:
+        lines = f.readlines()
+    imagenames = [x.strip() for x in lines]
+    # load annots
+    recs = {}
+    for imagename in imagenames:
+        recs[imagename] = parse_rec(annopath.format(imagename))
+    # extract gt objects for this class
+    class_recs = {}
+    npos = 0
+    for imagename in imagenames:
+        R = [obj for obj in recs[imagename] if obj["name"] == classname]
+        bbox = np.array([x["bbox"] for x in R])
+        difficult = np.array([x["difficult"] for x in R]).astype(np.bool)
+        # difficult = np.array([False for x in R]).astype(np.bool)  # treat all "difficult" as GT
+        det = [False] * len(R)
+        npos = npos + sum(~difficult)
+        class_recs[imagename] = {"bbox": bbox, "difficult": difficult, "det": det}
+    # read dets
+    detfile = detpath.format(classname)
+    with open(detfile, "r") as f:
+        lines = f.readlines()
+    splitlines = [x.strip().split(" ") for x in lines]
+    image_ids = [x[0] for x in splitlines]
+    confidence = np.array([float(x[1]) for x in splitlines])
+    BB = np.array([[float(z) for z in x[2:]] for x in splitlines]).reshape(-1, 4)
+    # sort by confidence
+    sorted_ind = np.argsort(-confidence)
+    BB = BB[sorted_ind, :]
+    image_ids = [image_ids[x] for x in sorted_ind]
+    # go down dets and mark TPs and FPs
+    nd = len(image_ids)
+    tp = np.zeros(nd)
+    fp = np.zeros(nd)
+    for d in range(nd):
+        R = class_recs[image_ids[d]]
+        bb = BB[d, :].astype(float)
+        ovmax = -np.inf
+        BBGT = R["bbox"].astype(float)
+        if BBGT.size > 0:
+            # compute overlaps
+            # intersection
+            ixmin = np.maximum(BBGT[:, 0], bb[0])
+            iymin = np.maximum(BBGT[:, 1], bb[1])
+            ixmax = np.minimum(BBGT[:, 2], bb[2])
+            iymax = np.minimum(BBGT[:, 3], bb[3])
+            iw = np.maximum(ixmax - ixmin + 1.0, 0.0)
+            ih = np.maximum(iymax - iymin + 1.0, 0.0)
+            inters = iw * ih
+            # union
+            uni = (
+                (bb[2] - bb[0] + 1.0) * (bb[3] - bb[1] + 1.0)
+                + (BBGT[:, 2] - BBGT[:, 0] + 1.0) * (BBGT[:, 3] - BBGT[:, 1] + 1.0)
+                - inters
+            )
+            overlaps = inters / uni
+            ovmax = np.max(overlaps)
+            jmax = np.argmax(overlaps)
+        if ovmax > ovthresh:
+            if not R["difficult"][jmax]:
+                if not R["det"][jmax]:
+                    tp[d] = 1.0
+                    R["det"][jmax] = 1
+                else:
+                    fp[d] = 1.0
+        else:
+            fp[d] = 1.0
+    # compute precision recall
+    fp = np.cumsum(fp)
+    tp = np.cumsum(tp)
+    rec = tp / float(npos)
+    # avoid divide by zero in case the first detection matches a difficult
+    # ground truth
+    prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
+    ap = voc_ap(rec, prec, use_07_metric)
+    return rec, prec, ap
--- a/detectron2/evaluation/rotated_coco_evaluation.py
+++ b/detectron2/evaluation/rotated_coco_evaluation.py
+# Copyright (c) Facebook, Inc. and its affiliates.
+import itertools
+import json
+import numpy as np
+import os
+import torch
+from pycocotools.cocoeval import COCOeval, maskUtils
+from detectron2.structures import BoxMode, RotatedBoxes, pairwise_iou_rotated
+from detectron2.utils.file_io import PathManager
+from .coco_evaluation import COCOEvaluator
+class RotatedCOCOeval(COCOeval):
+    @staticmethod
+    def is_rotated(box_list):
+        if type(box_list) == np.ndarray:
+            return box_list.shape[1] == 5
+        elif type(box_list) == list:
+            if box_list == []:  # cannot decide the box_dim
+                return False
+            return np.all(
+                np.array(
+                    [
+                        (len(obj) == 5) and ((type(obj) == list) or (type(obj) == np.ndarray))
+                        for obj in box_list
+                    ]
+                )
+            )
+        return False
+    @staticmethod
+    def boxlist_to_tensor(boxlist, output_box_dim):
+        if type(boxlist) == np.ndarray:
+            box_tensor = torch.from_numpy(boxlist)
+        elif type(boxlist) == list:
+            if boxlist == []:
+                return torch.zeros((0, output_box_dim), dtype=torch.float32)
+            else:
+                box_tensor = torch.FloatTensor(boxlist)
+        else:
+            raise Exception("Unrecognized boxlist type")
+        input_box_dim = box_tensor.shape[1]
+        if input_box_dim != output_box_dim:
+            if input_box_dim == 4 and output_box_dim == 5:
+                box_tensor = BoxMode.convert(box_tensor, BoxMode.XYWH_ABS, BoxMode.XYWHA_ABS)
+            else:
+                raise Exception(
+                    "Unable to convert from {}-dim box to {}-dim box".format(
+                        input_box_dim, output_box_dim
+                    )
+                )
+        return box_tensor
+    def compute_iou_dt_gt(self, dt, gt, is_crowd):
+        if self.is_rotated(dt) or self.is_rotated(gt):
+            # TODO: take is_crowd into consideration
+            assert all(c == 0 for c in is_crowd)
+            dt = RotatedBoxes(self.boxlist_to_tensor(dt, output_box_dim=5))
+            gt = RotatedBoxes(self.boxlist_to_tensor(gt, output_box_dim=5))
+            return pairwise_iou_rotated(dt, gt)
+        else:
+            # This is the same as the classical COCO evaluation
+            return maskUtils.iou(dt, gt, is_crowd)
+    def computeIoU(self, imgId, catId):
+        p = self.params
+        if p.useCats:
+            gt = self._gts[imgId, catId]
+            dt = self._dts[imgId, catId]
+        else:
+            gt = [_ for cId in p.catIds for _ in self._gts[imgId, cId]]
+            dt = [_ for cId in p.catIds for _ in self._dts[imgId, cId]]
+        if len(gt) == 0 and len(dt) == 0:
+            return []
+        inds = np.argsort([-d["score"] for d in dt], kind="mergesort")
+        dt = [dt[i] for i in inds]
+        if len(dt) > p.maxDets[-1]:
+            dt = dt[0 : p.maxDets[-1]]
+        assert p.iouType == "bbox", "unsupported iouType for iou computation"
+        g = [g["bbox"] for g in gt]
+        d = [d["bbox"] for d in dt]
+        # compute iou between each dt and gt region
+        iscrowd = [int(o["iscrowd"]) for o in gt]
+        # Note: this function is copied from cocoeval.py in cocoapi
+        # and the major difference is here.
+        ious = self.compute_iou_dt_gt(d, g, iscrowd)
+        return ious
+class RotatedCOCOEvaluator(COCOEvaluator):
+    """
+    Evaluate object proposal/instance detection outputs using COCO-like metrics and APIs,
+    with rotated boxes support.
+    Note: this uses IOU only and does not consider angle differences.
+    """
+    def process(self, inputs, outputs):
+        """
+        Args:
+            inputs: the inputs to a COCO model (e.g., GeneralizedRCNN).
+                It is a list of dict. Each dict corresponds to an image and
+                contains keys like "height", "width", "file_name", "image_id".
+            outputs: the outputs of a COCO model. It is a list of dicts with key
+                "instances" that contains :class:`Instances`.
+        """
+        for input, output in zip(inputs, outputs):
+            prediction = {"image_id": input["image_id"]}
+            if "instances" in output:
+                instances = output["instances"].to(self._cpu_device)
+                prediction["instances"] = self.instances_to_json(instances, input["image_id"])
+            if "proposals" in output:
+                prediction["proposals"] = output["proposals"].to(self._cpu_device)
+            self._predictions.append(prediction)
+    def instances_to_json(self, instances, img_id):
+        num_instance = len(instances)
+        if num_instance == 0:
+            return []
+        boxes = instances.pred_boxes.tensor.numpy()
+        if boxes.shape[1] == 4:
+            boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
+        boxes = boxes.tolist()
+        scores = instances.scores.tolist()
+        classes = instances.pred_classes.tolist()
+        results = []
+        for k in range(num_instance):
+            result = {
+                "image_id": img_id,
+                "category_id": classes[k],
+                "bbox": boxes[k],
+                "score": scores[k],
+            }
+            results.append(result)
+        return results
+    def _eval_predictions(self, predictions, img_ids=None):  # img_ids: unused
+        """
+        Evaluate predictions on the given tasks.
+        Fill self._results with the metrics of the tasks.
+        """
+        self._logger.info("Preparing results for COCO format ...")
+        coco_results = list(itertools.chain(*[x["instances"] for x in predictions]))
+        # unmap the category ids for COCO
+        if hasattr(self._metadata, "thing_dataset_id_to_contiguous_id"):
+            reverse_id_mapping = {
+                v: k for k, v in self._metadata.thing_dataset_id_to_contiguous_id.items()
+            }
+            for result in coco_results:
+                result["category_id"] = reverse_id_mapping[result["category_id"]]
+        if self._output_dir:
+            file_path = os.path.join(self._output_dir, "coco_instances_results.json")
+            self._logger.info("Saving results to {}".format(file_path))
+            with PathManager.open(file_path, "w") as f:
+                f.write(json.dumps(coco_results))
+                f.flush()
+        if not self._do_evaluation:
+            self._logger.info("Annotations are not available for evaluation.")
+            return
+        self._logger.info("Evaluating predictions ...")
+        assert self._tasks is None or set(self._tasks) == {
+            "bbox"
+        }, "[RotatedCOCOEvaluator] Only bbox evaluation is supported"
+        coco_eval = (
+            self._evaluate_predictions_on_coco(self._coco_api, coco_results)
+            if len(coco_results) > 0
+            else None  # cocoapi does not handle empty results very well
+        )
+        task = "bbox"
+        res = self._derive_coco_results(
+            coco_eval, task, class_names=self._metadata.get("thing_classes")
+        )
+        self._results[task] = res
+    def _evaluate_predictions_on_coco(self, coco_gt, coco_results):
+        """
+        Evaluate the coco results using COCOEval API.
+        """
+        assert len(coco_results) > 0
+        coco_dt = coco_gt.loadRes(coco_results)
+        # Only bbox is supported for now
+        coco_eval = RotatedCOCOeval(coco_gt, coco_dt, iouType="bbox")
+        coco_eval.evaluate()
+        coco_eval.accumulate()
+        coco_eval.summarize()
+        return coco_eval
--- a/detectron2/evaluation/sem_seg_evaluation.py
+++ b/detectron2/evaluation/sem_seg_evaluation.py
+# Copyright (c) Facebook, Inc. and its affiliates.
+import itertools
+import json
+import logging
+import numpy as np
+import os
+from collections import OrderedDict
+import PIL.Image as Image
+import pycocotools.mask as mask_util
+import torch
+from detectron2.data import DatasetCatalog, MetadataCatalog
+from detectron2.utils.comm import all_gather, is_main_process, synchronize
+from detectron2.utils.file_io import PathManager
+from .evaluator import DatasetEvaluator
+class SemSegEvaluator(DatasetEvaluator):
+    """
+    Evaluate semantic segmentation metrics.
+    """
+    def __init__(
+        self,
+        dataset_name,
+        distributed=True,
+        output_dir=None,
+        *,
+        num_classes=None,
+        ignore_label=None,
+    ):
+        """
+        Args:
+            dataset_name (str): name of the dataset to be evaluated.
+            distributed (bool): if True, will collect results from all ranks for evaluation.
+                Otherwise, will evaluate the results in the current process.
+            output_dir (str): an output directory to dump results.
+            num_classes, ignore_label: deprecated argument
+        """
+        self._logger = logging.getLogger(__name__)
+        if num_classes is not None:
+            self._logger.warn(
+                "SemSegEvaluator(num_classes) is deprecated! It should be obtained from metadata."
+            )
+        if ignore_label is not None:
+            self._logger.warn(
+                "SemSegEvaluator(ignore_label) is deprecated! It should be obtained from metadata."
+            )
+        self._dataset_name = dataset_name
+        self._distributed = distributed
+        self._output_dir = output_dir
+        self._cpu_device = torch.device("cpu")
+        self.input_file_to_gt_file = {
+            dataset_record["file_name"]: dataset_record["sem_seg_file_name"]
+            for dataset_record in DatasetCatalog.get(dataset_name)
+        }
+        meta = MetadataCatalog.get(dataset_name)
+        # Dict that maps contiguous training ids to COCO category ids
+        try:
+            c2d = meta.stuff_dataset_id_to_contiguous_id
+            self._contiguous_id_to_dataset_id = {v: k for k, v in c2d.items()}
+        except AttributeError:
+            self._contiguous_id_to_dataset_id = None
+        self._class_names = meta.stuff_classes
+        self._num_classes = len(meta.stuff_classes)
+        if num_classes is not None:
+            assert self._num_classes == num_classes, f"{self._num_classes} != {num_classes}"
+        self._ignore_label = ignore_label if ignore_label is not None else meta.ignore_label
+    def reset(self):
+        self._conf_matrix = np.zeros((self._num_classes + 1, self._num_classes + 1), dtype=np.int64)
+        self._predictions = []
+    def process(self, inputs, outputs):
+        """
+        Args:
+            inputs: the inputs to a model.
+                It is a list of dicts. Each dict corresponds to an image and
+                contains keys like "height", "width", "file_name".
+            outputs: the outputs of a model. It is either list of semantic segmentation predictions
+                (Tensor [H, W]) or list of dicts with key "sem_seg" that contains semantic
+                segmentation prediction in the same format.
+        """
+        for input, output in zip(inputs, outputs):
+            output = output["sem_seg"].argmax(dim=0).to(self._cpu_device)
+            pred = np.array(output, dtype=np.int)
+            with PathManager.open(self.input_file_to_gt_file[input["file_name"]], "rb") as f:
+                gt = np.array(Image.open(f), dtype=np.int)
+            gt[gt == self._ignore_label] = self._num_classes
+            self._conf_matrix += np.bincount(
+                (self._num_classes + 1) * pred.reshape(-1) + gt.reshape(-1),
+                minlength=self._conf_matrix.size,
+            ).reshape(self._conf_matrix.shape)
+            self._predictions.extend(self.encode_json_sem_seg(pred, input["file_name"]))
+    def evaluate(self):
+        """
+        Evaluates standard semantic segmentation metrics (http://cocodataset.org/#stuff-eval):
+        * Mean intersection-over-union averaged across classes (mIoU)
+        * Frequency Weighted IoU (fwIoU)
+        * Mean pixel accuracy averaged across classes (mACC)
+        * Pixel Accuracy (pACC)
+        """
+        if self._distributed:
+            synchronize()
+            conf_matrix_list = all_gather(self._conf_matrix)
+            self._predictions = all_gather(self._predictions)
+            self._predictions = list(itertools.chain(*self._predictions))
+            if not is_main_process():
+                return
+            self._conf_matrix = np.zeros_like(self._conf_matrix)
+            for conf_matrix in conf_matrix_list:
+                self._conf_matrix += conf_matrix
+        if self._output_dir:
+            PathManager.mkdirs(self._output_dir)
+            file_path = os.path.join(self._output_dir, "sem_seg_predictions.json")
+            with PathManager.open(file_path, "w") as f:
+                f.write(json.dumps(self._predictions))
+        acc = np.full(self._num_classes, np.nan, dtype=np.float)
+        iou = np.full(self._num_classes, np.nan, dtype=np.float)
+        tp = self._conf_matrix.diagonal()[:-1].astype(np.float)
+        pos_gt = np.sum(self._conf_matrix[:-1, :-1], axis=0).astype(np.float)
+        class_weights = pos_gt / np.sum(pos_gt)
+        pos_pred = np.sum(self._conf_matrix[:-1, :-1], axis=1).astype(np.float)
+        acc_valid = pos_gt > 0
+        acc[acc_valid] = tp[acc_valid] / pos_gt[acc_valid]
+        iou_valid = (pos_gt + pos_pred) > 0
+        union = pos_gt + pos_pred - tp
+        iou[acc_valid] = tp[acc_valid] / union[acc_valid]
+        macc = np.sum(acc[acc_valid]) / np.sum(acc_valid)
+        miou = np.sum(iou[acc_valid]) / np.sum(iou_valid)
+        fiou = np.sum(iou[acc_valid] * class_weights[acc_valid])
+        pacc = np.sum(tp) / np.sum(pos_gt)
+        res = {}
+        res["mIoU"] = 100 * miou
+        res["fwIoU"] = 100 * fiou
+        for i, name in enumerate(self._class_names):
+            res["IoU-{}".format(name)] = 100 * iou[i]
+        res["mACC"] = 100 * macc
+        res["pACC"] = 100 * pacc
+        for i, name in enumerate(self._class_names):
+            res["ACC-{}".format(name)] = 100 * acc[i]
+        if self._output_dir:
+            file_path = os.path.join(self._output_dir, "sem_seg_evaluation.pth")
+            with PathManager.open(file_path, "wb") as f:
+                torch.save(res, f)
+        results = OrderedDict({"sem_seg": res})
+        self._logger.info(results)
+        return results
+    def encode_json_sem_seg(self, sem_seg, input_file_name):
+        """
+        Convert semantic segmentation to COCO stuff format with segments encoded as RLEs.
+        See http://cocodataset.org/#format-results
+        """
+        json_list = []
+        for label in np.unique(sem_seg):
+            if self._contiguous_id_to_dataset_id is not None:
+                assert (
+                    label in self._contiguous_id_to_dataset_id
+                ), "Label {} is not in the metadata info for {}".format(label, self._dataset_name)
+                dataset_id = self._contiguous_id_to_dataset_id[label]
+            else:
+                dataset_id = int(label)
+            mask = (sem_seg == label).astype(np.uint8)
+            mask_rle = mask_util.encode(np.array(mask[:, :, None], order="F"))[0]
+            mask_rle["counts"] = mask_rle["counts"].decode("utf-8")
+            json_list.append(
+                {"file_name": input_file_name, "category_id": dataset_id, "segmentation": mask_rle}
+            )
+        return json_list
--- a/detectron2/evaluation/testing.py
+++ b/detectron2/evaluation/testing.py
+# Copyright (c) Facebook, Inc. and its affiliates.
+import logging
+import numpy as np
+import pprint
+import sys
+from collections.abc import Mapping
+def print_csv_format(results):
+    """
+    Print main metrics in a format similar to Detectron,
+    so that they are easy to copypaste into a spreadsheet.
+    Args:
+        results (OrderedDict[dict]): task_name -> {metric -> score}
+            unordered dict can also be printed, but in arbitrary order
+    """
+    assert isinstance(results, Mapping) or not len(results), results
+    logger = logging.getLogger(__name__)
+    for task, res in results.items():
+        if isinstance(res, Mapping):
+            # Don't print "AP-category" metrics since they are usually not tracked.
+            important_res = [(k, v) for k, v in res.items() if "-" not in k]
+            logger.info("copypaste: Task: {}".format(task))
+            logger.info("copypaste: " + ",".join([k[0] for k in important_res]))
+            logger.info("copypaste: " + ",".join(["{0:.4f}".format(k[1]) for k in important_res]))
+        else:
+            logger.info(f"copypaste: {task}={res}")
+def verify_results(cfg, results):
+    """
+    Args:
+        results (OrderedDict[dict]): task_name -> {metric -> score}
+    Returns:
+        bool: whether the verification succeeds or not
+    """
+    expected_results = cfg.TEST.EXPECTED_RESULTS
+    if not len(expected_results):
+        return True
+    ok = True
+    for task, metric, expected, tolerance in expected_results:
+        actual = results[task].get(metric, None)
+        if actual is None:
+            ok = False
+            continue
+        if not np.isfinite(actual):
+            ok = False
+            continue
+        diff = abs(actual - expected)
+        if diff > tolerance:
+            ok = False
+    logger = logging.getLogger(__name__)
+    if not ok:
+        logger.error("Result verification failed!")
+        logger.error("Expected Results: " + str(expected_results))
+        logger.error("Actual Results: " + pprint.pformat(results))
+        sys.exit(1)
+    else:
+        logger.info("Results verification passed.")
+    return ok
+def flatten_results_dict(results):
+    """
+    Expand a hierarchical dict of scalars into a flat dict of scalars.
+    If results[k1][k2][k3] = v, the returned dict will have the entry
+    {"k1/k2/k3": v}.
+    Args:
+        results (dict):
+    """
+    r = {}
+    for k, v in results.items():
+        if isinstance(v, Mapping):
+            v = flatten_results_dict(v)
+            for kk, vv in v.items():
+                r[k + "/" + kk] = vv
+        else:
+            r[k] = v
+    return r
--- a/detectron2/export/README.md
+++ b/detectron2/export/README.md
+This directory contains code to prepare a detectron2 model for deployment.
+Currently it supports exporting a detectron2 model to Caffe2 format through ONNX.
+Please see [documentation](https://detectron2.readthedocs.io/tutorials/deployment.html) for its usage.
+### Acknowledgements
+Thanks to Mobile Vision team at Facebook for developing the Caffe2 conversion tools.
+Thanks to Computing Platform Department - PAI team at Alibaba Group (@bddpqq, @chenbohua3) who
+help export Detectron2 models to TorchScript.
--- a/detectron2/export/__init__.py
+++ b/detectron2/export/__init__.py
+# -*- coding: utf-8 -*-
+from .api import *
+from .flatten import TracingAdapter
+from .torchscript import scripting_with_instances, dump_torchscript_IR
+__all__ = [k for k in globals().keys() if not k.startswith("_")]