push v0.1.3 version commit bd2ea47

c732df65 · limm · 5b3792fc · c732df65 · c732df65 · c732df65
Commit c732df65 authored Jan 18, 2024 by limm
20 changed files
--- a/configs/quick_schedules/mask_rcnn_R_50_DC5_inference_acc_test.yaml
+++ b/configs/quick_schedules/mask_rcnn_R_50_DC5_inference_acc_test.yaml
+_BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml"
+MODEL:
+  WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x/137849551/model_final_84107b.pkl"
+DATASETS:
+  TEST: ("coco_2017_val_100",)
+TEST:
+  EXPECTED_RESULTS: [["bbox", "AP", 47.44, 0.02], ["segm", "AP", 42.94, 0.02]]
--- a/configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml
+++ b/configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml
+_BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"
+MODEL:
+  WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl"
+DATASETS:
+  TEST: ("coco_2017_val_100",)
+TEST:
+  EXPECTED_RESULTS: [["bbox", "AP", 47.34, 0.02], ["segm", "AP",  42.67, 0.02], ["bbox_TTA", "AP", 49.11, 0.02], ["segm_TTA", "AP", 45.04, 0.02]]
+  AUG:
+    ENABLED: True
+    MIN_SIZES: (700, 800)  # to save some time
--- a/configs/quick_schedules/mask_rcnn_R_50_FPN_instant_test.yaml
+++ b/configs/quick_schedules/mask_rcnn_R_50_FPN_instant_test.yaml
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+DATASETS:
+  TRAIN: ("coco_2017_val_100",)
+  TEST: ("coco_2017_val_100",)
+SOLVER:
+  BASE_LR: 0.005
+  STEPS: (30,)
+  MAX_ITER: 40
+  IMS_PER_BATCH: 4
+DATALOADER:
+  NUM_WORKERS: 2
--- a/configs/quick_schedules/mask_rcnn_R_50_FPN_training_acc_test.yaml
+++ b/configs/quick_schedules/mask_rcnn_R_50_FPN_training_acc_test.yaml
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  ROI_HEADS:
+    BATCH_SIZE_PER_IMAGE: 256
+  MASK_ON: True
+DATASETS:
+  TRAIN: ("coco_2017_val",)
+  TEST: ("coco_2017_val",)
+INPUT:
+  MIN_SIZE_TRAIN: (600,)
+  MAX_SIZE_TRAIN: 1000
+  MIN_SIZE_TEST: 800
+  MAX_SIZE_TEST: 1000
+SOLVER:
+  WARMUP_FACTOR: 0.3333333
+  WARMUP_ITERS: 100
+  STEPS: (5500, 5800)
+  MAX_ITER: 6000
+TEST:
+  EXPECTED_RESULTS: [["bbox", "AP", 42.0, 1.6], ["segm", "AP", 35.4, 1.25]]
--- a/configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml
+++ b/configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml
+_BASE_: "../COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml"
+MODEL:
+  WEIGHTS: "detectron2://COCO-PanopticSegmentation/panoptic_fpn_R_50_3x/139514569/model_final_c10459.pkl"
+DATASETS:
+  TEST: ("coco_2017_val_100_panoptic_separated",)
+TEST:
+  EXPECTED_RESULTS: [["bbox", "AP", 46.47, 0.02], ["segm", "AP", 43.39, 0.02], ["sem_seg", "mIoU", 42.55, 0.02], ["panoptic_seg", "PQ", 38.99, 0.02]]
--- a/configs/quick_schedules/panoptic_fpn_R_50_instant_test.yaml
+++ b/configs/quick_schedules/panoptic_fpn_R_50_instant_test.yaml
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  META_ARCHITECTURE: "PanopticFPN"
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+  SEM_SEG_HEAD:
+    LOSS_WEIGHT: 0.5
+DATASETS:
+  TRAIN: ("coco_2017_val_100_panoptic_separated",)
+  TEST: ("coco_2017_val_100_panoptic_separated",)
+SOLVER:
+  BASE_LR: 0.005
+  STEPS: (30,)
+  MAX_ITER: 40
+  IMS_PER_BATCH: 4
+DATALOADER:
+  NUM_WORKERS: 1
--- a/configs/quick_schedules/panoptic_fpn_R_50_training_acc_test.yaml
+++ b/configs/quick_schedules/panoptic_fpn_R_50_training_acc_test.yaml
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  META_ARCHITECTURE: "PanopticFPN"
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  MASK_ON: True
+  RESNETS:
+    DEPTH: 50
+  SEM_SEG_HEAD:
+    LOSS_WEIGHT: 0.5
+DATASETS:
+  TRAIN: ("coco_2017_val_panoptic_separated",)
+  TEST: ("coco_2017_val_panoptic_separated",)
+SOLVER:
+  BASE_LR: 0.01
+  WARMUP_FACTOR: 0.001
+  WARMUP_ITERS: 500
+  STEPS: (5500,)
+  MAX_ITER: 7000
+TEST:
+  EXPECTED_RESULTS: [["bbox", "AP", 46.70, 1.1], ["segm", "AP", 38.73, 0.7], ["sem_seg", "mIoU", 64.73, 1.2], ["panoptic_seg", "PQ", 48.13, 0.8]]
--- a/configs/quick_schedules/retinanet_R_50_FPN_inference_acc_test.yaml
+++ b/configs/quick_schedules/retinanet_R_50_FPN_inference_acc_test.yaml
+_BASE_: "../COCO-Detection/retinanet_R_50_FPN_3x.yaml"
+MODEL:
+  WEIGHTS: "detectron2://COCO-Detection/retinanet_R_50_FPN_3x/137849486/model_final_4cafe0.pkl"
+DATASETS:
+  TEST: ("coco_2017_val_100",)
+TEST:
+  EXPECTED_RESULTS: [["bbox", "AP", 44.36, 0.02]]
--- a/configs/quick_schedules/retinanet_R_50_FPN_instant_test.yaml
+++ b/configs/quick_schedules/retinanet_R_50_FPN_instant_test.yaml
+_BASE_: "../COCO-Detection/retinanet_R_50_FPN_1x.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+DATASETS:
+  TRAIN: ("coco_2017_val_100",)
+  TEST: ("coco_2017_val_100",)
+SOLVER:
+  BASE_LR: 0.005
+  STEPS: (30,)
+  MAX_ITER: 40
+  IMS_PER_BATCH: 4
+DATALOADER:
+  NUM_WORKERS: 2
--- a/configs/quick_schedules/rpn_R_50_FPN_inference_acc_test.yaml
+++ b/configs/quick_schedules/rpn_R_50_FPN_inference_acc_test.yaml
+_BASE_: "../COCO-Detection/rpn_R_50_FPN_1x.yaml"
+MODEL:
+  WEIGHTS: "detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/model_final_02ce48.pkl"
+DATASETS:
+  TEST: ("coco_2017_val_100",)
+TEST:
+  EXPECTED_RESULTS: [["box_proposals", "AR@1000", 58.16, 0.02]]
--- a/configs/quick_schedules/rpn_R_50_FPN_instant_test.yaml
+++ b/configs/quick_schedules/rpn_R_50_FPN_instant_test.yaml
+_BASE_: "../COCO-Detection/rpn_R_50_FPN_1x.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+DATASETS:
+  TRAIN: ("coco_2017_val_100",)
+  TEST: ("coco_2017_val_100",)
+SOLVER:
+  STEPS: (30,)
+  MAX_ITER: 40
+  BASE_LR: 0.005
+  IMS_PER_BATCH: 4
+DATALOADER:
+  NUM_WORKERS: 2
--- a/configs/quick_schedules/semantic_R_50_FPN_inference_acc_test.yaml
+++ b/configs/quick_schedules/semantic_R_50_FPN_inference_acc_test.yaml
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  META_ARCHITECTURE: "SemanticSegmentor"
+  WEIGHTS: "detectron2://semantic_R_50_FPN_1x/111802073/model_final_c18079783c55a94968edc28b7101c5f0.pkl"
+  RESNETS:
+    DEPTH: 50
+DATASETS:
+  TEST: ("coco_2017_val_100_panoptic_stuffonly",)
+TEST:
+  EXPECTED_RESULTS: [["sem_seg", "mIoU", 39.53, 0.02], ["sem_seg", "mACC", 51.50, 0.02]]
--- a/configs/quick_schedules/semantic_R_50_FPN_instant_test.yaml
+++ b/configs/quick_schedules/semantic_R_50_FPN_instant_test.yaml
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  META_ARCHITECTURE: "SemanticSegmentor"
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
+DATASETS:
+  TRAIN: ("coco_2017_val_100_panoptic_stuffonly",)
+  TEST: ("coco_2017_val_100_panoptic_stuffonly",)
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
+SOLVER:
+  BASE_LR: 0.005
+  STEPS: (30,)
+  MAX_ITER: 40
+  IMS_PER_BATCH: 4
+DATALOADER:
+  NUM_WORKERS: 2
--- a/configs/quick_schedules/semantic_R_50_FPN_training_acc_test.yaml
+++ b/configs/quick_schedules/semantic_R_50_FPN_training_acc_test.yaml
+_BASE_: "../Base-RCNN-FPN.yaml"
+MODEL:
+  META_ARCHITECTURE: "SemanticSegmentor"
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
+DATASETS:
+  TRAIN: ("coco_2017_val_panoptic_stuffonly",)
+  TEST: ("coco_2017_val_panoptic_stuffonly",)
+SOLVER:
+  BASE_LR: 0.01
+  WARMUP_FACTOR: 0.001
+  WARMUP_ITERS: 300
+  STEPS: (5500,)
+  MAX_ITER: 7000
+TEST:
+  EXPECTED_RESULTS: [["sem_seg", "mIoU", 76.51, 1.0], ["sem_seg", "mACC", 83.25, 1.0]]
+INPUT:
+  # no scale augmentation
+  MIN_SIZE_TRAIN: (800, )
--- a/datasets/README.md
+++ b/datasets/README.md
+# Setup Builtin Datasets
+Detectron2 has builtin support for a few datasets.
+The datasets are assumed to exist in a directory specified by the environment variable
+`DETECTRON2_DATASETS`.
+Under this directory, detectron2 expects to find datasets in the structure described below.
+You can set the location for builtin datasets by `export DETECTRON2_DATASETS=/path/to/datasets`.
+If left unset, the default is `./datasets` relative to your current working directory.
+The [model zoo](https://github.com/facebookresearch/detectron2/blob/master/MODEL_ZOO.md)
+contains configs and models that use these builtin datasets.
+## Expected dataset structure for COCO instance/keypoint detection:
+```
+coco/
+  annotations/
+    instances_{train,val}2017.json
+    person_keypoints_{train,val}2017.json
+  {train,val}2017/
+    # image files that are mentioned in the corresponding json
+```
+You can use the 2014 version of the dataset as well.
+Some of the builtin tests (`dev/run_*_tests.sh`) uses a tiny version of the COCO dataset,
+which you can download with `./prepare_for_tests.sh`.
+## Expected dataset structure for PanopticFPN:
+```
+coco/
+  annotations/
+    panoptic_{train,val}2017.json
+  panoptic_{train,val}2017/  # png annotations
+  panoptic_stuff_{train,val}2017/  # generated by the script mentioned below
+```
+Install panopticapi by:
+```
+pip install git+https://github.com/cocodataset/panopticapi.git
+```
+Then, run `python prepare_panoptic_fpn.py`, to extract semantic annotations from panoptic annotations.
+## Expected dataset structure for LVIS instance segmentation:
+```
+coco/
+  {train,val,test}2017/
+lvis/
+  lvis_v0.5_{train,val}.json
+  lvis_v0.5_image_info_test.json
+```
+Install lvis-api by:
+```
+pip install git+https://github.com/lvis-dataset/lvis-api.git
+```
+Run `python prepare_cocofied_lvis.py` to prepare "cocofied" LVIS annotations for evaluation of models trained on the COCO dataset.
+## Expected dataset structure for cityscapes:
+```
+cityscapes/
+  gtFine/
+    train/
+      aachen/
+        color.png, instanceIds.png, labelIds.png, polygons.json,
+        labelTrainIds.png
+      ...
+    val/
+    test/
+  leftImg8bit/
+    train/
+    val/
+    test/
+```
+Install cityscapes scripts by:
+```
+pip install git+https://github.com/mcordts/cityscapesScripts.git
+```
+Note: labelTrainIds.png are created using cityscapesescript with:
+```
+CITYSCAPES_DATASET=$DETECTRON2_DATASETS/cityscapes python cityscapesscripts/preparation/createTrainIdLabelImgs.py
+```
+They are not needed for instance segmentation.
+## Expected dataset structure for Pascal VOC:
+```
+VOC20{07,12}/
+  Annotations/
+  ImageSets/
+    Main/
+      trainval.txt
+      test.txt
+      # train.txt or val.txt, if you use these splits
+  JPEGImages/
+```
--- a/datasets/prepare_cocofied_lvis.py
+++ b/datasets/prepare_cocofied_lvis.py
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import copy
+import json
+import os
+from collections import defaultdict
+# This mapping is extracted from the official LVIS mapping:
+# https://github.com/lvis-dataset/lvis-api/blob/master/data/coco_to_synset.json
+COCO_SYNSET_CATEGORIES = [
+    {"synset": "person.n.01", "coco_cat_id": 1},
+    {"synset": "bicycle.n.01", "coco_cat_id": 2},
+    {"synset": "car.n.01", "coco_cat_id": 3},
+    {"synset": "motorcycle.n.01", "coco_cat_id": 4},
+    {"synset": "airplane.n.01", "coco_cat_id": 5},
+    {"synset": "bus.n.01", "coco_cat_id": 6},
+    {"synset": "train.n.01", "coco_cat_id": 7},
+    {"synset": "truck.n.01", "coco_cat_id": 8},
+    {"synset": "boat.n.01", "coco_cat_id": 9},
+    {"synset": "traffic_light.n.01", "coco_cat_id": 10},
+    {"synset": "fireplug.n.01", "coco_cat_id": 11},
+    {"synset": "stop_sign.n.01", "coco_cat_id": 13},
+    {"synset": "parking_meter.n.01", "coco_cat_id": 14},
+    {"synset": "bench.n.01", "coco_cat_id": 15},
+    {"synset": "bird.n.01", "coco_cat_id": 16},
+    {"synset": "cat.n.01", "coco_cat_id": 17},
+    {"synset": "dog.n.01", "coco_cat_id": 18},
+    {"synset": "horse.n.01", "coco_cat_id": 19},
+    {"synset": "sheep.n.01", "coco_cat_id": 20},
+    {"synset": "beef.n.01", "coco_cat_id": 21},
+    {"synset": "elephant.n.01", "coco_cat_id": 22},
+    {"synset": "bear.n.01", "coco_cat_id": 23},
+    {"synset": "zebra.n.01", "coco_cat_id": 24},
+    {"synset": "giraffe.n.01", "coco_cat_id": 25},
+    {"synset": "backpack.n.01", "coco_cat_id": 27},
+    {"synset": "umbrella.n.01", "coco_cat_id": 28},
+    {"synset": "bag.n.04", "coco_cat_id": 31},
+    {"synset": "necktie.n.01", "coco_cat_id": 32},
+    {"synset": "bag.n.06", "coco_cat_id": 33},
+    {"synset": "frisbee.n.01", "coco_cat_id": 34},
+    {"synset": "ski.n.01", "coco_cat_id": 35},
+    {"synset": "snowboard.n.01", "coco_cat_id": 36},
+    {"synset": "ball.n.06", "coco_cat_id": 37},
+    {"synset": "kite.n.03", "coco_cat_id": 38},
+    {"synset": "baseball_bat.n.01", "coco_cat_id": 39},
+    {"synset": "baseball_glove.n.01", "coco_cat_id": 40},
+    {"synset": "skateboard.n.01", "coco_cat_id": 41},
+    {"synset": "surfboard.n.01", "coco_cat_id": 42},
+    {"synset": "tennis_racket.n.01", "coco_cat_id": 43},
+    {"synset": "bottle.n.01", "coco_cat_id": 44},
+    {"synset": "wineglass.n.01", "coco_cat_id": 46},
+    {"synset": "cup.n.01", "coco_cat_id": 47},
+    {"synset": "fork.n.01", "coco_cat_id": 48},
+    {"synset": "knife.n.01", "coco_cat_id": 49},
+    {"synset": "spoon.n.01", "coco_cat_id": 50},
+    {"synset": "bowl.n.03", "coco_cat_id": 51},
+    {"synset": "banana.n.02", "coco_cat_id": 52},
+    {"synset": "apple.n.01", "coco_cat_id": 53},
+    {"synset": "sandwich.n.01", "coco_cat_id": 54},
+    {"synset": "orange.n.01", "coco_cat_id": 55},
+    {"synset": "broccoli.n.01", "coco_cat_id": 56},
+    {"synset": "carrot.n.01", "coco_cat_id": 57},
+    {"synset": "frank.n.02", "coco_cat_id": 58},
+    {"synset": "pizza.n.01", "coco_cat_id": 59},
+    {"synset": "doughnut.n.02", "coco_cat_id": 60},
+    {"synset": "cake.n.03", "coco_cat_id": 61},
+    {"synset": "chair.n.01", "coco_cat_id": 62},
+    {"synset": "sofa.n.01", "coco_cat_id": 63},
+    {"synset": "pot.n.04", "coco_cat_id": 64},
+    {"synset": "bed.n.01", "coco_cat_id": 65},
+    {"synset": "dining_table.n.01", "coco_cat_id": 67},
+    {"synset": "toilet.n.02", "coco_cat_id": 70},
+    {"synset": "television_receiver.n.01", "coco_cat_id": 72},
+    {"synset": "laptop.n.01", "coco_cat_id": 73},
+    {"synset": "mouse.n.04", "coco_cat_id": 74},
+    {"synset": "remote_control.n.01", "coco_cat_id": 75},
+    {"synset": "computer_keyboard.n.01", "coco_cat_id": 76},
+    {"synset": "cellular_telephone.n.01", "coco_cat_id": 77},
+    {"synset": "microwave.n.02", "coco_cat_id": 78},
+    {"synset": "oven.n.01", "coco_cat_id": 79},
+    {"synset": "toaster.n.02", "coco_cat_id": 80},
+    {"synset": "sink.n.01", "coco_cat_id": 81},
+    {"synset": "electric_refrigerator.n.01", "coco_cat_id": 82},
+    {"synset": "book.n.01", "coco_cat_id": 84},
+    {"synset": "clock.n.01", "coco_cat_id": 85},
+    {"synset": "vase.n.01", "coco_cat_id": 86},
+    {"synset": "scissors.n.01", "coco_cat_id": 87},
+    {"synset": "teddy.n.01", "coco_cat_id": 88},
+    {"synset": "hand_blower.n.01", "coco_cat_id": 89},
+    {"synset": "toothbrush.n.01", "coco_cat_id": 90},
+]
+def cocofy_lvis(input_filename, output_filename):
+    """
+    Filter LVIS instance segmentation annotations to remove all categories that are not included in
+    COCO. The new json files can be used to evaluate COCO AP using `lvis-api`. The category ids in
+    the output json are the incontiguous COCO dataset ids.
+    Args:
+        input_filename (str): path to the LVIS json file.
+        output_filename (str): path to the COCOfied json file.
+    """
+    with open(input_filename, "r") as f:
+        lvis_json = json.load(f)
+    lvis_annos = lvis_json.pop("annotations")
+    cocofied_lvis = copy.deepcopy(lvis_json)
+    lvis_json["annotations"] = lvis_annos
+    # Mapping from lvis cat id to coco cat id via synset
+    lvis_cat_id_to_synset = {cat["id"]: cat["synset"] for cat in lvis_json["categories"]}
+    synset_to_coco_cat_id = {x["synset"]: x["coco_cat_id"] for x in COCO_SYNSET_CATEGORIES}
+    # Synsets that we will keep in the dataset
+    synsets_to_keep = set(synset_to_coco_cat_id.keys())
+    coco_cat_id_with_instances = defaultdict(int)
+    new_annos = []
+    ann_id = 1
+    for ann in lvis_annos:
+        lvis_cat_id = ann["category_id"]
+        synset = lvis_cat_id_to_synset[lvis_cat_id]
+        if synset not in synsets_to_keep:
+            continue
+        coco_cat_id = synset_to_coco_cat_id[synset]
+        new_ann = copy.deepcopy(ann)
+        new_ann["category_id"] = coco_cat_id
+        new_ann["id"] = ann_id
+        ann_id += 1
+        new_annos.append(new_ann)
+        coco_cat_id_with_instances[coco_cat_id] += 1
+    cocofied_lvis["annotations"] = new_annos
+    for image in cocofied_lvis["images"]:
+        for key in ["not_exhaustive_category_ids", "neg_category_ids"]:
+            new_category_list = []
+            for lvis_cat_id in image[key]:
+                synset = lvis_cat_id_to_synset[lvis_cat_id]
+                if synset not in synsets_to_keep:
+                    continue
+                coco_cat_id = synset_to_coco_cat_id[synset]
+                new_category_list.append(coco_cat_id)
+                coco_cat_id_with_instances[coco_cat_id] += 1
+            image[key] = new_category_list
+    coco_cat_id_with_instances = set(coco_cat_id_with_instances.keys())
+    new_categories = []
+    for cat in lvis_json["categories"]:
+        synset = cat["synset"]
+        if synset not in synsets_to_keep:
+            continue
+        coco_cat_id = synset_to_coco_cat_id[synset]
+        if coco_cat_id not in coco_cat_id_with_instances:
+            continue
+        new_cat = copy.deepcopy(cat)
+        new_cat["id"] = coco_cat_id
+        new_categories.append(new_cat)
+    cocofied_lvis["categories"] = new_categories
+    with open(output_filename, "w") as f:
+        json.dump(cocofied_lvis, f)
+    print("{} is COCOfied and stored in {}.".format(input_filename, output_filename))
+if __name__ == "__main__":
+    dataset_dir = os.path.join(os.path.dirname(__file__), "lvis")
+    for s in ["lvis_v0.5_train", "lvis_v0.5_val"]:
+        print("Start COCOfing {}.".format(s))
+        cocofy_lvis(
+            os.path.join(dataset_dir, "{}.json".format(s)),
+            os.path.join(dataset_dir, "{}_cocofied.json".format(s)),
+        )
--- a/datasets/prepare_for_tests.sh
+++ b/datasets/prepare_for_tests.sh
+#!/bin/bash -e
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+# Download some files needed for running tests.
+cd "${0%/*}"
+BASE=https://dl.fbaipublicfiles.com/detectron2
+mkdir -p coco/annotations
+for anno in instances_val2017_100 \
+  person_keypoints_val2017_100 \
+  instances_minival2014_100 \
+  person_keypoints_minival2014_100; do
+  dest=coco/annotations/$anno.json
+  [[ -s $dest ]] && {
+    echo "$dest exists. Skipping ..."
+  } || {
+    wget $BASE/annotations/coco/$anno.json -O $dest
+  }
+done
--- a/datasets/prepare_panoptic_fpn.py
+++ b/datasets/prepare_panoptic_fpn.py
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import functools
+import json
+import multiprocessing as mp
+import numpy as np
+import os
+import time
+from fvcore.common.download import download
+from PIL import Image
+from detectron2.data.datasets.builtin_meta import COCO_CATEGORIES
+from panopticapi.utils import rgb2id
+def _process_panoptic_to_semantic(input_panoptic, output_semantic, segments, id_map):
+    panoptic = np.asarray(Image.open(input_panoptic), dtype=np.uint32)
+    panoptic = rgb2id(panoptic)
+    output = np.zeros_like(panoptic, dtype=np.uint8) + 255
+    for seg in segments:
+        cat_id = seg["category_id"]
+        new_cat_id = id_map[cat_id]
+        output[panoptic == seg["id"]] = new_cat_id
+    Image.fromarray(output).save(output_semantic)
+def separate_coco_semantic_from_panoptic(panoptic_json, panoptic_root, sem_seg_root, categories):
+    """
+    Create semantic segmentation annotations from panoptic segmentation
+    annotations, to be used by PanopticFPN.
+    It maps all thing categories to class 0, and maps all unlabeled pixels to class 255.
+    It maps all stuff categories to contiguous ids starting from 1.
+    Args:
+        panoptic_json (str): path to the panoptic json file, in COCO's format.
+        panoptic_root (str): a directory with panoptic annotation files, in COCO's format.
+        sem_seg_root (str): a directory to output semantic annotation files
+        categories (list[dict]): category metadata. Each dict needs to have:
+            "id": corresponds to the "category_id" in the json annotations
+            "isthing": 0 or 1
+    """
+    os.makedirs(sem_seg_root, exist_ok=True)
+    stuff_ids = [k["id"] for k in categories if k["isthing"] == 0]
+    thing_ids = [k["id"] for k in categories if k["isthing"] == 1]
+    id_map = {}  # map from category id to id in the output semantic annotation
+    assert len(stuff_ids) <= 254
+    for i, stuff_id in enumerate(stuff_ids):
+        id_map[stuff_id] = i + 1
+    for thing_id in thing_ids:
+        id_map[thing_id] = 0
+    id_map[0] = 255
+    with open(panoptic_json) as f:
+        obj = json.load(f)
+    pool = mp.Pool(processes=max(mp.cpu_count() // 2, 4))
+    def iter_annotations():
+        for anno in obj["annotations"]:
+            file_name = anno["file_name"]
+            segments = anno["segments_info"]
+            input = os.path.join(panoptic_root, file_name)
+            output = os.path.join(sem_seg_root, file_name)
+            yield input, output, segments
+    print("Start writing to {} ...".format(sem_seg_root))
+    start = time.time()
+    pool.starmap(
+        functools.partial(_process_panoptic_to_semantic, id_map=id_map),
+        iter_annotations(),
+        chunksize=100,
+    )
+    print("Finished. time: {:.2f}s".format(time.time() - start))
+if __name__ == "__main__":
+    dataset_dir = os.path.join(os.path.dirname(__file__), "coco")
+    for s in ["val2017", "train2017"]:
+        separate_coco_semantic_from_panoptic(
+            os.path.join(dataset_dir, "annotations/panoptic_{}.json".format(s)),
+            os.path.join(dataset_dir, "panoptic_{}".format(s)),
+            os.path.join(dataset_dir, "panoptic_stuff_{}".format(s)),
+            COCO_CATEGORIES,
+        )
+    # Prepare val2017_100 for quick testing:
+    dest_dir = os.path.join(dataset_dir, "annotations/")
+    URL_PREFIX = "https://dl.fbaipublicfiles.com/detectron2/"
+    download(URL_PREFIX + "annotations/coco/panoptic_val2017_100.json", dest_dir)
+    with open(os.path.join(dest_dir, "panoptic_val2017_100.json")) as f:
+        obj = json.load(f)
+    def link_val100(dir_full, dir_100):
+        print("Creating " + dir_100 + " ...")
+        os.makedirs(dir_100, exist_ok=True)
+        for img in obj["images"]:
+            basename = os.path.splitext(img["file_name"])[0]
+            src = os.path.join(dir_full, basename + ".png")
+            dst = os.path.join(dir_100, basename + ".png")
+            src = os.path.relpath(src, start=dir_100)
+            os.symlink(src, dst)
+    link_val100(
+        os.path.join(dataset_dir, "panoptic_val2017"),
+        os.path.join(dataset_dir, "panoptic_val2017_100"),
+    )
+    link_val100(
+        os.path.join(dataset_dir, "panoptic_stuff_val2017"),
+        os.path.join(dataset_dir, "panoptic_stuff_val2017_100"),
+    )
--- a/demo/README.md
+++ b/demo/README.md
+## Detectron2 Demo
+We provide a command line tool to run a simple demo of builtin models.
+The usage is explained in [GETTING_STARTED.md](../GETTING_STARTED.md).
+See our [blog post](https://ai.facebook.com/blog/-detectron2-a-pytorch-based-modular-object-detection-library-)
+for a high-quality demo generated with this tool.
--- a/demo/demo.py
+++ b/demo/demo.py
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+import argparse
+import glob
+import multiprocessing as mp
+import os
+import time
+import cv2
+import tqdm
+from detectron2.config import get_cfg
+from detectron2.data.detection_utils import read_image
+from detectron2.utils.logger import setup_logger
+from predictor import VisualizationDemo
+# constants
+WINDOW_NAME = "COCO detections"
+def setup_cfg(args):
+    # load config from file and command-line arguments
+    cfg = get_cfg()
+    cfg.merge_from_file(args.config_file)
+    cfg.merge_from_list(args.opts)
+    # Set score_threshold for builtin models
+    cfg.MODEL.RETINANET.SCORE_THRESH_TEST = args.confidence_threshold
+    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = args.confidence_threshold
+    cfg.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH = args.confidence_threshold
+    cfg.freeze()
+    return cfg
+def get_parser():
+    parser = argparse.ArgumentParser(description="Detectron2 demo for builtin models")
+    parser.add_argument(
+        "--config-file",
+        default="configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml",
+        metavar="FILE",
+        help="path to config file",
+    )
+    parser.add_argument("--webcam", action="store_true", help="Take inputs from webcam.")
+    parser.add_argument("--video-input", help="Path to video file.")
+    parser.add_argument(
+        "--input",
+        nargs="+",
+        help="A list of space separated input images; "
+        "or a single glob pattern such as 'directory/*.jpg'",
+    )
+    parser.add_argument(
+        "--output",
+        help="A file or directory to save output visualizations. "
+        "If not given, will show output in an OpenCV window.",
+    )
+    parser.add_argument(
+        "--confidence-threshold",
+        type=float,
+        default=0.5,
+        help="Minimum score for instance predictions to be shown",
+    )
+    parser.add_argument(
+        "--opts",
+        help="Modify config options using the command-line 'KEY VALUE' pairs",
+        default=[],
+        nargs=argparse.REMAINDER,
+    )
+    return parser
+if __name__ == "__main__":
+    mp.set_start_method("spawn", force=True)
+    args = get_parser().parse_args()
+    setup_logger(name="fvcore")
+    logger = setup_logger()
+    logger.info("Arguments: " + str(args))
+    cfg = setup_cfg(args)
+    demo = VisualizationDemo(cfg)
+    if args.input:
+        if len(args.input) == 1:
+            args.input = glob.glob(os.path.expanduser(args.input[0]))
+            assert args.input, "The input path(s) was not found"
+        for path in tqdm.tqdm(args.input, disable=not args.output):
+            # use PIL, to be consistent with evaluation
+            img = read_image(path, format="BGR")
+            start_time = time.time()
+            predictions, visualized_output = demo.run_on_image(img)
+            logger.info(
+                "{}: {} in {:.2f}s".format(
+                    path,
+                    "detected {} instances".format(len(predictions["instances"]))
+                    if "instances" in predictions
+                    else "finished",
+                    time.time() - start_time,
+                )
+            )
+            if args.output:
+                if os.path.isdir(args.output):
+                    assert os.path.isdir(args.output), args.output
+                    out_filename = os.path.join(args.output, os.path.basename(path))
+                else:
+                    assert len(args.input) == 1, "Please specify a directory with args.output"
+                    out_filename = args.output
+                visualized_output.save(out_filename)
+            else:
+                cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
+                cv2.imshow(WINDOW_NAME, visualized_output.get_image()[:, :, ::-1])
+                if cv2.waitKey(0) == 27:
+                    break  # esc to quit
+    elif args.webcam:
+        assert args.input is None, "Cannot have both --input and --webcam!"
+        assert args.output is None, "output not yet supported with --webcam!"
+        cam = cv2.VideoCapture(0)
+        for vis in tqdm.tqdm(demo.run_on_video(cam)):
+            cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
+            cv2.imshow(WINDOW_NAME, vis)
+            if cv2.waitKey(1) == 27:
+                break  # esc to quit
+        cam.release()
+        cv2.destroyAllWindows()
+    elif args.video_input:
+        video = cv2.VideoCapture(args.video_input)
+        width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
+        height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        frames_per_second = video.get(cv2.CAP_PROP_FPS)
+        num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
+        basename = os.path.basename(args.video_input)
+        if args.output:
+            if os.path.isdir(args.output):
+                output_fname = os.path.join(args.output, basename)
+                output_fname = os.path.splitext(output_fname)[0] + ".mkv"
+            else:
+                output_fname = args.output
+            assert not os.path.isfile(output_fname), output_fname
+            output_file = cv2.VideoWriter(
+                filename=output_fname,
+                # some installation of opencv may not support x264 (due to its license),
+                # you can try other format (e.g. MPEG)
+                fourcc=cv2.VideoWriter_fourcc(*"x264"),
+                fps=float(frames_per_second),
+                frameSize=(width, height),
+                isColor=True,
+            )
+        assert os.path.isfile(args.video_input)
+        for vis_frame in tqdm.tqdm(demo.run_on_video(video), total=num_frames):
+            if args.output:
+                output_file.write(vis_frame)
+            else:
+                cv2.namedWindow(basename, cv2.WINDOW_NORMAL)
+                cv2.imshow(basename, vis_frame)
+                if cv2.waitKey(1) == 27:
+                    break  # esc to quit
+        video.release()
+        if args.output:
+            output_file.release()
+        else:
+            cv2.destroyAllWindows()