Initial commit

fbshipit-source-id: f4a8ba78691d8cf46e003ef0bd2e95f170932778

Initial commit
fbshipit-source-id: f4a8ba78691d8cf46e003ef0bd2e95f170932778
f23248c0 · facebook-github-bot · f23248c0 · f23248c0 · f23248c0 · f23248c0
Commit f23248c0 authored Mar 02, 2021 by facebook-github-bot
20 changed files
--- a/d2go/data/utils.py
+++ b/d2go/data/utils.py
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+
+import atexit
+import contextlib
+import json
+import logging
+import os
+import shutil
+import tempfile
+from collections import defaultdict
+
+import numpy as np
+import torch.utils.data as data
+
+logger = logging.getLogger(__name__)
+
+
+from d2go.config import temp_defrost
+from d2go.data.datasets import register_dataset_split, ANN_FN, IM_DIR
+from detectron2.data import DatasetCatalog, MetadataCatalog
+from fvcore.common.file_io import PathManager
+
+
+class AdhocDatasetManager:
+    # mapping from the new dataset name a AdhocDataset instance
+    _REGISTERED = {}
+
+    @staticmethod
+    def add(adhoc_ds):
+        assert isinstance(adhoc_ds, AdhocDataset)
+        if adhoc_ds.new_ds_name in AdhocDatasetManager._REGISTERED:
+            logger.warning(
+                "Adhoc dataset {} has already been added, skip adding it".format(
+                    adhoc_ds.new_ds_name
+                )
+            )
+        else:
+            logger.info("Adding new adhoc dataset {} ...".format(adhoc_ds.new_ds_name))
+            AdhocDatasetManager._REGISTERED[adhoc_ds.new_ds_name] = adhoc_ds
+            adhoc_ds.register_catalog()
+
+    @staticmethod
+    def remove(adhoc_ds):
+        try:
+            assert isinstance(adhoc_ds, AdhocDataset)
+            if adhoc_ds.new_ds_name not in AdhocDatasetManager._REGISTERED:
+                logger.warning(
+                    "Adhoc dataset {} has already been removed, skip removing it".format(
+                        adhoc_ds.new_ds_name
+                    )
+                )
+            else:
+                logger.info("Remove adhoc dataset {} ...".format(adhoc_ds.new_ds_name))
+                del AdhocDatasetManager._REGISTERED[adhoc_ds.new_ds_name]
+        finally:
+            adhoc_ds.cleanup()
+
+    @staticmethod
+    @atexit.register
+    def _atexit():
+        for ds in AdhocDatasetManager._REGISTERED.values():
+            logger.info("Remove remaining adhoc dataset: {}".format(ds.new_ds_name))
+            ds.cleanup()
+
+
+class AdhocDataset(object):
+    def __init__(self, new_ds_name):
+        assert isinstance(new_ds_name, str)
+        self.new_ds_name = new_ds_name
+
+    def register_catalog(self):
+        raise NotImplementedError()
+
+    def cleanup(self):
+        raise NotImplementedError()
+
+
+class CallFuncWithJsonFile(object):
+    """
+    The instance of this class is parameterless callable that calls its `func` using its
+    `json_file`, it can be used to register in DatasetCatalog which later on provide
+    access to the json file.
+    """
+
+    def __init__(self, func, json_file):
+        self.func = func
+        self.json_file = json_file
+
+    def __call__(self):
+        return self.func(self.json_file)
+
+
+class AdhocCOCODataset(AdhocDataset):
+    def __init__(self, src_ds_name, new_ds_name):
+        super().__init__(new_ds_name)
+        # NOTE: only support single source dataset now
+        assert isinstance(src_ds_name, str)
+        self.src_ds_name = src_ds_name
+
+    def new_json_dict(self, json_dict):
+        raise NotImplementedError()
+
+    def register_catalog(self):
+        """
+        Adhoc COCO (json) dataset assumes the derived dataset can be created by only
+        changing the json file, currently it supports two sources: 1) the dataset is
+        registered using standard COCO registering functions in D2 or
+        register_dataset_split from D2Go, this way it uses `json_file` from the metadata
+        to access the json file. 2) the load func in DatasetCatalog is an instance of
+        CallFuncWithJsonFile, which gives access to the json_file. In both cases,
+        metadata will be the same except for the `name` and potentially `json_file`.
+        """
+        logger.info("Register {} from {}".format(self.new_ds_name, self.src_ds_name))
+        metadata = MetadataCatalog.get(self.src_ds_name)
+
+        load_func = DatasetCatalog[self.src_ds_name]
+        src_json_file = (
+            load_func.json_file
+            if isinstance(load_func, CallFuncWithJsonFile)
+            else metadata.json_file
+        )
+
+        # TODO cache ?
+        with PathManager.open(src_json_file) as f:
+            json_dict = json.load(f)
+        assert "images" in json_dict, "Only support COCO-style json!"
+        json_dict = self.new_json_dict(json_dict)
+        self.tmp_dir = tempfile.mkdtemp(prefix="detectron2go_tmp_datasets")
+        tmp_file = os.path.join(self.tmp_dir, "{}.json".format(self.new_ds_name))
+        with open(tmp_file, "w") as f:
+            json.dump(json_dict, f)
+
+        # re-register DatasetCatalog
+        if isinstance(load_func, CallFuncWithJsonFile):
+            new_func = CallFuncWithJsonFile(func=load_func.func, json_file=tmp_file)
+            DatasetCatalog.register(self.new_ds_name, new_func)
+        else:
+            # NOTE: only supports COCODataset as DS_TYPE since we cannot reconstruct
+            # the split_dict
+            register_dataset_split(
+                self.new_ds_name,
+                split_dict={ANN_FN: tmp_file, IM_DIR: metadata.image_root},
+            )
+
+        # re-regisister MetadataCatalog
+        metadata_dict = metadata.as_dict()
+        metadata_dict["name"] = self.new_ds_name
+        if "json_file" in metadata_dict:
+            metadata_dict["json_file"] = tmp_file
+        MetadataCatalog.remove(self.new_ds_name)
+        MetadataCatalog.get(self.new_ds_name).set(**metadata_dict)
+
+    def cleanup(self):
+        # remove temporarily registered dataset and json file
+        DatasetCatalog.pop(self.new_ds_name, None)
+        MetadataCatalog.pop(self.new_ds_name, None)
+        if hasattr(self, "tmp_dir"):
+            shutil.rmtree(self.tmp_dir)
+
+
+class COCOSubsetWithNImages(AdhocCOCODataset):
+    _SUPPORTED_SAMPLING = ["frontmost", "random"]
+
+    def __init__(self, src_ds_name, num_images, sampling):
+        super().__init__(
+            src_ds_name=src_ds_name,
+            new_ds_name="{}_{}{}".format(src_ds_name, sampling, num_images),
+        )
+        self.num_images = num_images
+        self.sampling = sampling
+
+    def new_json_dict(self, json_dict):
+        all_images = json_dict["images"]
+        if self.sampling == "frontmost":
+            new_images = all_images[: self.num_images]
+        elif self.sampling == "random":
+            # use fixed seed so results are repeatable
+            indices = np.random.RandomState(seed=42).permutation(len(all_images))
+            new_images = [all_images[i] for i in indices[: self.num_images]]
+        else:
+            raise NotImplementedError(
+                "COCOSubsetWithNImages doesn't support sampling method: {}".format(
+                    self.sampling
+                )
+            )
+
+        new_image_ids = {im["id"] for im in new_images}
+        new_annotations = [
+            ann for ann in json_dict["annotations"] if ann["image_id"] in new_image_ids
+        ]
+        json_dict["images"] = new_images
+        json_dict["annotations"] = new_annotations
+        return json_dict
+
+
+class COCOSubsetWithGivenImages(AdhocCOCODataset):
+    def __init__(self, src_ds_name, file_names, prefix="given"):
+        super().__init__(
+            src_ds_name=src_ds_name,
+            new_ds_name="{}_{}{}".format(src_ds_name, prefix, len(file_names)),
+        )
+
+        self.file_names = file_names
+
+    def new_json_dict(self, json_dict):
+        all_images = json_dict["images"]
+
+        file_name_to_im = {im["file_name"]: im for im in all_images}
+        new_images = [file_name_to_im[file_name] for file_name in self.file_names]
+
+        # re-assign image id to keep the order (COCO loads images by id order)
+        old_id_to_new_id = {im["id"]: i for i, im in enumerate(new_images)}
+        new_annotations = [
+            ann
+            for ann in json_dict["annotations"]
+            if ann["image_id"] in old_id_to_new_id
+        ]
+        # update image id
+        for im in new_images:
+            im["id"] = old_id_to_new_id[im["id"]]
+        for anno in new_annotations:
+            anno["image_id"] = old_id_to_new_id[anno["image_id"]]
+        json_dict["images"] = new_images
+        json_dict["annotations"] = new_annotations
+        return json_dict
+
+
+class COCOWithClassesToUse(AdhocCOCODataset):
+    def __init__(self, src_ds_name, classes_to_use):
+        super().__init__(
+            src_ds_name=src_ds_name,
+            new_ds_name="{}@{}classes".format(src_ds_name, len(classes_to_use)),
+        )
+        self.classes_to_use = classes_to_use
+
+    def new_json_dict(self, json_dict):
+        categories = json_dict["categories"]
+        new_categories = [
+            cat for cat in categories if cat["name"] in self.classes_to_use
+        ]
+        new_category_ids = {cat["id"] for cat in new_categories}
+        new_annotations = [
+            ann
+            for ann in json_dict["annotations"]
+            if ann["category_id"] in new_category_ids
+        ]
+        json_dict["categories"] = new_categories
+        json_dict["annotations"] = new_annotations
+        return json_dict
+
+
+class ClipLengthGroupedDataset(data.IterableDataset):
+    """
+    Batch data that have same clip length and similar aspect ratio.
+    In this implementation, images with same length and whose aspect
+    ratio < (or >) 1 will be batched together.
+    This makes training with different clip length possible and improves
+    training speed because the images then need less padding to form a batch.
+    """
+
+    def __init__(self, dataset, batch_size):
+        """
+        Args:
+            dataset: an iterable. Each element must be a dict with keys
+                "width" and "height", which will be used to batch data.
+            batch_size (int):
+        """
+        self.dataset = dataset
+        self.batch_size = batch_size
+        self._buckets = defaultdict(list)
+
+    def __iter__(self):
+        for d in self.dataset:
+            clip_length = len(d["frames"])
+            h, w = d["height"], d["width"]
+            aspect_ratio_bucket_id = 0 if h > w else 1
+            bucket = self._buckets[(clip_length, aspect_ratio_bucket_id)]
+            bucket.append(d)
+
+            if len(bucket) == self.batch_size:
+                yield bucket[:]
+                del bucket[:]
+
+
+@contextlib.contextmanager
+def register_sub_dataset_with_n_images(dataset_name, num_images, sampling):
+    """
+    Temporarily register a sub-dataset created from `dataset_name`, with the first
+    `num_images` from it.
+    """
+    # when `num_images` is not larger than 0, return original dataset
+    if num_images <= 0:
+        yield dataset_name
+        return
+
+    # only support coco for now
+    assert sampling in COCOSubsetWithNImages._SUPPORTED_SAMPLING
+
+    new_dataset = COCOSubsetWithNImages(dataset_name, num_images, sampling)
+    AdhocDatasetManager.add(new_dataset)
+    try:
+        yield new_dataset.new_ds_name
+    finally:
+        AdhocDatasetManager.remove(new_dataset)
+
+
+@contextlib.contextmanager
+def register_sub_dataset_with_given_images(*args, **kwargs):
+    new_dataset = COCOSubsetWithGivenImages(*args, **kwargs)
+    AdhocDatasetManager.add(new_dataset)
+    AdhocDatasetManager.add(new_dataset)
+    try:
+        yield new_dataset.new_ds_name
+    finally:
+        AdhocDatasetManager.remove(new_dataset)
+
+
+@contextlib.contextmanager
+def maybe_subsample_n_images(cfg, is_train=False):
+    """
+    Create a new config whose train/test datasets only take a subsample of
+    `max_images` image. Use all images (non-op) when `max_images` <= 0.
+    """
+    max_images = cfg.D2GO_DATA.TEST.MAX_IMAGES
+    sampling = cfg.D2GO_DATA.TEST.SUBSET_SAMPLING
+    with contextlib.ExitStack() as stack:  # python 3.3+
+        new_splits = tuple(
+            stack.enter_context(
+                register_sub_dataset_with_n_images(ds, max_images, sampling)
+            )
+            for ds in (cfg.DATASETS.TRAIN if is_train else cfg.DATASETS.TEST)
+        )
+        new_cfg = cfg.clone()
+        with temp_defrost(new_cfg):
+            if is_train:
+                new_cfg.DATASETS.TRAIN = new_splits
+            else:
+                new_cfg.DATASETS.TEST = new_splits
+        yield new_cfg
+
+
+def update_cfg_if_using_adhoc_dataset(cfg):
+    if cfg.D2GO_DATA.DATASETS.TRAIN_CATEGORIES:
+        new_train_datasets = [
+            COCOWithClassesToUse(name, cfg.D2GO_DATA.DATASETS.TRAIN_CATEGORIES)
+            for name in cfg.DATASETS.TRAIN
+        ]
+        [AdhocDatasetManager.add(new_ds) for new_ds in new_train_datasets]
+        with temp_defrost(cfg):
+            cfg.DATASETS.TRAIN = tuple(ds.new_ds_name for ds in new_train_datasets)
+
+    if cfg.D2GO_DATA.DATASETS.TEST_CATEGORIES:
+        new_test_datasets = [
+            COCOWithClassesToUse(ds, cfg.D2GO_DATA.DATASETS.TEST_CATEGORIES)
+            for ds in cfg.DATASETS.TEST
+        ]
+        [AdhocDatasetManager.add(new_ds) for new_ds in new_test_datasets]
+        with temp_defrost(cfg):
+            cfg.DATASETS.TEST = tuple(ds.new_ds_name for ds in new_test_datasets)
+
+    return cfg
--- a/d2go/distributed.py
+++ b/d2go/distributed.py
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+
+"""
+Similar to detectron2.engine.launch, may support a few more things:
+    - support for get_local_rank.
+    - support other backends like GLOO.
+"""
+
+import logging
+import tempfile
+
+import detectron2.utils.comm as comm
+import torch
+import torch.distributed as dist
+import torch.multiprocessing as mp
+from d2go.config import CfgNode, temp_defrost
+from d2go.utils.launch_environment import get_launch_environment
+
+
+logger = logging.getLogger(__name__)
+
+
+_LOCAL_RANK = 0
+_NUM_PROCESSES_PER_MACHINE = 1
+
+
+def _set_local_rank(local_rank):
+    global _LOCAL_RANK
+    _LOCAL_RANK = local_rank
+
+
+def _set_num_processes_per_machine(num_processes):
+    global _NUM_PROCESSES_PER_MACHINE
+    _NUM_PROCESSES_PER_MACHINE = num_processes
+
+
+def get_local_rank():
+    return _LOCAL_RANK
+
+
+def get_num_processes_per_machine():
+    return _NUM_PROCESSES_PER_MACHINE
+
+
+def launch(
+    main_func,
+    num_processes_per_machine,
+    num_machines=1,
+    machine_rank=0,
+    dist_url=None,
+    backend="NCCL",
+    always_spawn=False,
+    args=(),
+):
+    logger.info(
+        f"Launch with num_processes_per_machine: {num_processes_per_machine},"
+        f" num_machines: {num_machines}, machine_rank: {machine_rank},"
+        f" dist_url: {dist_url}, backend: {backend}."
+    )
+
+    if get_launch_environment() == "local" and not torch.cuda.is_available():
+        assert len(args) > 0, args
+        cfg = args[0]
+        assert isinstance(cfg, CfgNode)
+        if cfg.MODEL.DEVICE == "cuda":
+            logger.warning(
+                "Detected that CUDA is not available on this machine, set MODEL.DEVICE"
+                " to cpu and backend to GLOO"
+            )
+            with temp_defrost(cfg):
+                cfg.MODEL.DEVICE = "cpu"
+            backend = "GLOO"
+
+    if backend == "NCCL":
+        assert (
+            num_processes_per_machine <= torch.cuda.device_count()
+        ), "num_processes_per_machine is greater than device count: {} vs {}".format(
+            num_processes_per_machine, torch.cuda.device_count()
+        )
+
+    world_size = num_machines * num_processes_per_machine
+    if world_size > 1 or always_spawn:
+        # https://github.com/pytorch/pytorch/pull/14391
+        # TODO prctl in spawned processes
+        prefix = f"detectron2go_{main_func.__module__}.{main_func.__name__}_return"
+        with tempfile.NamedTemporaryFile(prefix=prefix, suffix=".pth") as f:
+            return_file = f.name
+            mp.spawn(
+                _distributed_worker,
+                nprocs=num_processes_per_machine,
+                args=(
+                    main_func,
+                    world_size,
+                    num_processes_per_machine,
+                    machine_rank,
+                    dist_url,
+                    backend,
+                    return_file,
+                    args,
+                ),
+                daemon=False,
+            )
+            if machine_rank == 0:
+                return torch.load(return_file)
+    else:
+        return main_func(*args)
+
+
+def _distributed_worker(
+    local_rank,
+    main_func,
+    world_size,
+    num_processes_per_machine,
+    machine_rank,
+    dist_url,
+    backend,
+    return_file,
+    args,
+):
+    assert backend in ["NCCL", "GLOO"]
+    _set_local_rank(local_rank)
+    _set_num_processes_per_machine(num_processes_per_machine)
+
+    # NOTE: this is wrong if using different number of processes across machine
+    global_rank = machine_rank * num_processes_per_machine + local_rank
+    try:
+        dist.init_process_group(
+            backend=backend,
+            init_method=dist_url,
+            world_size=world_size,
+            rank=global_rank,
+        )
+    except Exception as e:
+        logger.error("Process group URL: {}".format(dist_url))
+        raise e
+    # synchronize is needed here to prevent a possible timeout after calling
+    # init_process_group
+    # See: https://github.com/facebookresearch/maskrcnn-benchmark/issues/172
+    comm.synchronize()
+    if backend in ["NCCL"]:
+        torch.cuda.set_device(local_rank)
+
+    # Setup the local process group (which contains ranks within the same machine)
+    assert comm._LOCAL_PROCESS_GROUP is None
+    num_machines = world_size // num_processes_per_machine
+    for i in range(num_machines):
+        ranks_on_i = list(
+            range(i * num_processes_per_machine, (i + 1) * num_processes_per_machine)
+        )
+        pg = dist.new_group(ranks_on_i)
+        if i == machine_rank:
+            comm._LOCAL_PROCESS_GROUP = pg
+
+    ret = main_func(*args)
+    if global_rank == 0:
+        logger.info(
+            "Save {}.{} return to: {}".format(
+                main_func.__module__, main_func.__name__, return_file
+            )
+        )
+        torch.save(ret, return_file)
--- a/d2go/evaluation/__init__.py
+++ b/d2go/evaluation/__init__.py
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+
+from .prediction_count_evaluation import PredictionCountEvaluator  # noqa
+
+__all__ = [k for k in globals().keys() if not k.startswith("_")]
--- a/d2go/evaluation/prediction_count_evaluation.py
+++ b/d2go/evaluation/prediction_count_evaluation.py
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+
+import logging
+from collections import OrderedDict
+
+import numpy as np
+from detectron2.evaluation import DatasetEvaluator
+
+
+logger = logging.getLogger(__name__)
+
+
+class PredictionCountEvaluator(DatasetEvaluator):
+    """
+    Custom Detectron2 evaluator class to simply count the number of predictions
+    e.g. on a dataset of hard negatives where there are no annotations, and
+    summarize results into interpretable metrics.
+
+    See class pattern from detectron2.evaluation.evaluator.py, especially
+    :func:`inference_on_dataset` to see how this class will be called.
+    """
+
+    def reset(self):
+        self.prediction_counts = []
+        self.confidence_scores = []
+
+    def process(self, inputs, outputs):
+        """
+        Params:
+            input: the input that's used to call the model.
+            output: the return value of `model(output)`
+        """
+        # outputs format:
+        # [{
+        #   "instances": Instances(
+        #                  num_instances=88,
+        #                  fields=[scores = tensor([list of len num_instances])]
+        #   ), ...
+        #  },
+        #  ... other dicts
+        # ]
+        for output_dict in outputs:
+            instances = output_dict["instances"]
+            self.prediction_counts.append(len(instances))
+            self.confidence_scores.extend(instances.get("scores").tolist())
+
+    def evaluate(self):
+        """
+        Returns:
+            In detectron2.tools.train_net.py, following format expected:
+            dict:
+                * key: the name of the task (e.g., bbox)
+                * value: a dict of {metric name: score}, e.g.: {"AP50": 80}
+        """
+        mpi = np.mean(self.prediction_counts)
+        mcp = np.mean(self.confidence_scores)
+        output_metrics = OrderedDict(
+            {
+                "false_positives": {
+                    "predictions_per_image": mpi,
+                    "confidence_per_prediction": mcp,
+                }
+            }
+        )
+        logger.info(f"mean predictions per image: {mpi}")
+        logger.info(f"mean confidence per prediction: {mcp}")
+        return output_metrics
--- a/d2go/evaluation/sem_seg_evaluation.py
+++ b/d2go/evaluation/sem_seg_evaluation.py
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+
+import heapq
+import itertools
+import logging
+from contextlib import contextmanager
+
+from detectron2.data import MetadataCatalog
+from detectron2.evaluation import DatasetEvaluator, SemSegEvaluator
+from detectron2.utils.comm import all_gather, synchronize
+
+
+logger = logging.getLogger(__name__)
+
+
+class MultiSemSegEvaluator(DatasetEvaluator):
+    """
+    Evaluate multiple results for the same target. SemSegEvaluator requires the
+    outputs of model to be like:
+    [
+        {"sem_seg": Tensor},
+    ]
+    This evaluator allows evaluating mutliple predictions, it may takes outputs like:
+    [
+        {
+            "prediction_1": {"sem_seg": Tensor},
+            "prediction_2": {"sem_seg": Tensor},
+        }
+    ]
+    """
+
+    _DUMMY_KEY_PREFIX = "dummy_eval"
+
+    def __init__(self, dataset_name, *args, distributed, output_dir=None, **kwargs):
+        self._distributed = distributed
+        self._output_dir = output_dir
+
+        self.evaluators = {}
+        self.dataset_name = dataset_name
+        self.init_args = args
+        self.init_kwargs = kwargs
+
+    def _get_evaluator(self, key, superclass_name=None):
+        if key in self.evaluators:
+            return self.evaluators[key]
+
+        def create_evaluator_and_reset(dataset_name):
+            logger.info(
+                "Create an instance of SemSegEvaluator for {} on dataset {} ...".format(
+                    key, dataset_name
+                )
+            )
+            evaluator = SemSegEvaluator(
+                dataset_name,
+                *self.init_args,
+                **self.init_kwargs,
+                distributed=self._distributed,
+                output_dir=self._output_dir,
+            )
+            evaluator.reset()
+            return evaluator
+
+        if superclass_name is None:
+            self.evaluators[key] = create_evaluator_and_reset(self.dataset_name)
+        else:
+            # NOTE: create temporary single-super-class dataset and use standard
+            # evaluator for the dataset
+            metadata = MetadataCatalog.get(self.dataset_name)
+            tmp_dataset_name = "__AUTOGEN__{}@{}".format(
+                self.dataset_name, superclass_name
+            )
+            from d2go.datasets.builtin_dataset_people_ai_person_segmentation import (
+                _register_person_sem_seg,
+            )
+
+            if tmp_dataset_name not in MetadataCatalog:
+                _register_person_sem_seg(
+                    tmp_dataset_name,
+                    metadata.mcs_metadata[superclass_name],
+                    image_root=metadata.image_root,
+                    sem_seg_root=metadata.sem_seg_root,
+                    instances_json=metadata.json_file,
+                    mask_dir="{}_mask".format(superclass_name),
+                )
+            self.evaluators[key] = create_evaluator_and_reset(tmp_dataset_name)
+
+        return self.evaluators[key]
+
+    def reset(self):
+        for evaluator in self.evaluators.values():
+            evaluator.reset()
+
+    def process(self, inputs, outputs):
+        if "sem_seg" in outputs[0].keys():
+            # normal eval is compatible with SemSegEvaluator
+            self._get_evaluator("sem_seg").process(inputs, outputs)
+        else:
+            # only the file_name of inputs is needed for SemSegEvaluator
+            inputs_ = [{"file_name": inp["file_name"]} for inp in inputs]
+            for frame_name in outputs[0].keys():
+                if isinstance(outputs[0]["detect"]["sem_seg"], dict):  # multi-class
+                    for superclass_name in outputs[0]["detect"]["sem_seg"]:
+                        outputs_ = []
+                        for outp in outputs:
+                            x = outp[frame_name]
+                            x = {"sem_seg": x["sem_seg"][superclass_name]}
+                            outputs_.append(x)
+                        self._get_evaluator(
+                            "sem_seg-{}-{}".format(frame_name, superclass_name),
+                            superclass_name=superclass_name,
+                        ).process(inputs_, outputs_)
+                else:
+                    # convert the output to SemSegEvaluator's format
+                    outputs_ = [outp[frame_name] for outp in outputs]
+                    self._get_evaluator("sem_seg-{}".format(frame_name)).process(
+                        inputs_, outputs_
+                    )
+
+    def evaluate(self):
+        results = {}
+
+        # The evaluation will get stuck sometimes if the follwoing code is not used.
+        # `SemSegEvaluator` will do synchronization between processes when computing
+        # the metrics. In some cases the number of self.evaluators will not be the
+        # same between processes and the code will stuck in synchronization.
+        # For example, evaluate 10 images on 8 GPUs, only 5 GPUs
+        # will be used for evaluation, each has 2 images, the rest 3 GPUs will have
+        # zero self.evaluators as they are constructed on-the-fly when calling
+        # self.process())
+        # We create additional evaluators so that all processes have the same size
+        # of evaluators so that the synchronization will not get stuck.
+        evaluator_size = len(self.evaluators)
+        synchronize()
+        evaluator_size_list = all_gather(evaluator_size)
+        max_evaluator_size = max(evaluator_size_list)
+        if evaluator_size < max_evaluator_size:
+            # create additional evaluators so that all processes have the same
+            # size of evaluators
+            metadata = MetadataCatalog.get(self.dataset_name)
+            mcs_metadata = metadata.get("mcs_metadata")
+            for idx in range(max_evaluator_size - evaluator_size):
+                dummy_key = f"{self._DUMMY_KEY_PREFIX}_{idx}"
+                assert dummy_key not in self.evaluators
+                if mcs_metadata:
+                    for k in mcs_metadata:
+                        self._get_evaluator(dummy_key, superclass_name=k).reset()
+                else:
+                    self._get_evaluator(dummy_key).reset()
+
+        for name, evaluator in self.evaluators.items():
+            result = evaluator.evaluate()
+            # NOTE: .evaluate() returns None for non-main process
+            if result is not None:
+                results[name] = result["sem_seg"]
+        return results
+
+class MultiSemSegVidEvaluator(MultiSemSegEvaluator):
+    """
+    Evaluate semantic segmentation results for video clips. MultiSemSegVidEvaluator
+    requires the outputs of model to be like:
+    [
+        {"file_names": Tensor},
+    ]
+    """
+    def process(self, inputs, outputs):
+        assert "file_names" in inputs[0]
+        inputs_ = []
+        for batch_id in range(len(inputs)):
+            for frame_i in range(len(inputs[batch_id]["file_names"])):
+                inputs_.append(
+                    {"file_name": inputs[batch_id]["file_names"][frame_i]}
+                )
+        for name in outputs[0].keys():
+            # convert the output to SemSegEvaluator's format
+            outputs_ = [outp[name] for outp in outputs]
+            self.evaluators["sem_seg_{}".format(name)].process(inputs_, outputs_)
+
+@contextmanager
+def all_logging_disabled(highest_level=logging.CRITICAL):
+    """
+    A context manager that will prevent any logging messages
+    triggered during the body from being processed.
+    :param highest_level: the maximum logging level in use.
+      This would only need to be changed if a custom level greater than CRITICAL
+      is defined.
+    """
+    # two kind-of hacks here:
+    #    * can't get the highest logging level in effect => delegate to the user
+    #    * can't get the current module-level override => use an undocumented
+    #       (but non-private!) interface
+
+    previous_level = logging.root.manager.disable
+
+    logging.disable(highest_level)
+
+    try:
+        yield
+    finally:
+        logging.disable(previous_level)
+
+
+class PerImageEvaluator(object):
+    def __init__(
+        self,
+        evaluator,
+        callback,
+        distributed=True,
+        playback_criterion=None,
+        playback_limit=0,
+    ):
+        self._evaluator = evaluator
+        self._evaluator._distributed = False
+        self._evaluator._output_dir = None
+
+        self._distributed = distributed
+        self.callback = callback
+        self.results_per_image = []
+
+        # record the N most interesting results for playback
+        self.playback_heap = []
+        self.playback_criterion = playback_criterion
+        self.playback_limit = playback_limit
+
+    def reset(self):
+        self._evaluator.reset()
+
+    def process(self, inputs, outputs):
+        self._evaluator.process(inputs, outputs)
+
+        assert len(inputs) == 1
+        with all_logging_disabled():
+            result = self._evaluator.evaluate()
+        self.results_per_image.append((inputs[0], result))
+
+        if self.playback_criterion:
+            score = self.playback_criterion(result)
+            heapq.heappush(self.playback_heap, (score, inputs[0], outputs[0], result))
+            if len(self.playback_heap) > self.playback_limit:
+                heapq.heappop(self.playback_heap)
+
+        self._evaluator.reset()
+
+    def evaluate(self):
+        if self._distributed:
+            synchronize()
+            results_per_image = all_gather(self.results_per_image)
+            self.results_per_image = list(itertools.chain(*results_per_image))
+
+            playback_heap = all_gather(self.playback_heap)
+            playback_heap = list(itertools.chain(*playback_heap))
+            # each GPU has local N mininums, sort and take global mininums
+            playback_heap = sorted(playback_heap, key=lambda x: x[0])
+            self.playback_heap = playback_heap[: self.playback_limit]
+
+        self.callback(self)
+        return {}
--- a/d2go/export/__init__.py
+++ b/d2go/export/__init__.py
--- a/d2go/export/api.py
+++ b/d2go/export/api.py
--- a/d2go/export/caffe2.py
+++ b/d2go/export/caffe2.py
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+
+import logging
+import torch
+import os
+from torch import nn
+from typing import Dict, Tuple
+
+from detectron2.export.api import Caffe2Model
+from detectron2.export.caffe2_export import (
+    export_caffe2_detection_model,
+    run_and_save_graph,
+)
+from d2go.export.logfiledb import export_to_logfiledb
+
+logger = logging.getLogger(__name__)
+
+
+def export_caffe2(
+    caffe2_compatible_model: nn.Module,
+    tensor_inputs: Tuple[str, torch.Tensor],
+    output_dir: str,
+    save_pb: bool = True,
+    save_logdb: bool = False,
+) -> Tuple[Caffe2Model, Dict[str, str]]:
+    predict_net, init_net = export_caffe2_detection_model(
+        caffe2_compatible_model,
+        # pyre-fixme[6]: Expected `List[torch.Tensor]` for 2nd param but got
+        #  `Tuple[str, torch.Tensor]`.
+        tensor_inputs,
+    )
+
+    caffe2_model = Caffe2Model(predict_net, init_net)
+
+    caffe2_export_paths = {}
+    if save_pb:
+        caffe2_model.save_protobuf(output_dir)
+        caffe2_export_paths.update({
+            "predict_net_path": os.path.join(output_dir, "model.pb"),
+            "init_net_path": os.path.join(output_dir, "model_init.pb"),
+        })
+
+    graph_save_path = os.path.join(output_dir, "model_def.svg")
+    ws_blobs = run_and_save_graph(
+        predict_net,
+        init_net,
+        tensor_inputs,
+        graph_save_path=graph_save_path,
+    )
+    caffe2_export_paths.update({
+        "model_def_path": graph_save_path,
+    })
+
+    if save_logdb:
+        logfiledb_path = os.path.join(output_dir, "model.logfiledb")
+        export_to_logfiledb(predict_net, init_net, logfiledb_path, ws_blobs)
+        caffe2_export_paths.update({
+            "logfiledb_path": logfiledb_path if save_logdb else None,
+        })
+
+    return caffe2_model, caffe2_export_paths
--- a/d2go/export/caffe2_model_helper.py
+++ b/d2go/export/caffe2_model_helper.py
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+
+import logging
+
+from detectron2.export.caffe2_inference import ProtobufDetectionModel
+from d2go.config import temp_defrost
+
+logger = logging.getLogger(__name__)
+
+
+def infer_mask_on(model: ProtobufDetectionModel):
+    # the real self.assembler should tell about this, currently use heuristic
+    possible_blob_names = {"mask_fcn_probs"}
+    return any(
+        possible_blob_names.intersection(op.output)
+        for op in model.protobuf_model.net.Proto().op
+    )
+
+
+def infer_keypoint_on(model: ProtobufDetectionModel):
+    # the real self.assembler should tell about this, currently use heuristic
+    possible_blob_names = {"kps_score"}
+    return any(
+        possible_blob_names.intersection(op.output)
+        for op in model.protobuf_model.net.Proto().op
+    )
+
+
+def infer_densepose_on(model: ProtobufDetectionModel):
+    possible_blob_names = {"AnnIndex", "Index_UV", "U_estimated", "V_estimated"}
+    return any(
+        possible_blob_names.intersection(op.output)
+        for op in model.protobuf_model.net.Proto().op
+    )
+
+
+def _update_if_true(cfg, key, value):
+    if not value:
+        return
+
+    keys = key.split(".")
+    ref_value = cfg
+    while len(keys):
+        ref_value = getattr(ref_value, keys.pop(0))
+
+    if ref_value != value:
+        logger.warning(
+            "There's conflict between cfg and model, overwrite config {} from {} to {}"
+            .format(key, ref_value, value)
+        )
+        cfg.merge_from_list([key, value])
+
+
+def update_cfg_from_pb_model(cfg, model):
+    """
+    Update cfg statically based given caffe2 model, in cast that there's conflict
+    between caffe2 model and the cfg, caffe2 model has higher priority.
+    """
+    with temp_defrost(cfg):
+        _update_if_true(cfg, "MODEL.MASK_ON", infer_mask_on(model))
+        _update_if_true(cfg, "MODEL.KEYPOINT_ON", infer_keypoint_on(model))
+        _update_if_true(cfg, "MODEL.DENSEPOSE_ON", infer_densepose_on(model))
+    return cfg
--- a/d2go/export/d2_meta_arch.py
+++ b/d2go/export/d2_meta_arch.py
--- a/d2go/export/logfiledb.py
+++ b/d2go/export/logfiledb.py
--- a/d2go/export/torchscript.py
+++ b/d2go/export/torchscript.py
--- a/d2go/initializer.py
+++ b/d2go/initializer.py
--- a/d2go/model_zoo/__init__.py
+++ b/d2go/model_zoo/__init__.py
--- a/d2go/model_zoo/model_zoo.py
+++ b/d2go/model_zoo/model_zoo.py
--- a/d2go/modeling/__init__.py
+++ b/d2go/modeling/__init__.py
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+
+# NOTE: making necessary imports to register with Registery
+from . import backbone  # noqa
+from . import modeldef  # noqa
--- a/d2go/modeling/backbone/__init__.py
+++ b/d2go/modeling/backbone/__init__.py
--- a/d2go/modeling/backbone/fbnet_cfg.py
+++ b/d2go/modeling/backbone/fbnet_cfg.py
--- a/d2go/modeling/backbone/fbnet_v2.py
+++ b/d2go/modeling/backbone/fbnet_v2.py
--- a/d2go/modeling/backbone/modules.py
+++ b/d2go/modeling/backbone/modules.py