Commit 80a37498 authored by yongshk's avatar yongshk
Browse files

Initial commit

parents
Pipeline #3463 failed with stages
in 0 seconds
_BASE_: "../COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml"
MODEL:
WEIGHTS: "detectron2://COCO-PanopticSegmentation/panoptic_fpn_R_50_3x/139514569/model_final_c10459.pkl"
DATASETS:
TEST: ("coco_2017_val_100_panoptic_separated",)
TEST:
EXPECTED_RESULTS: [["bbox", "AP", 46.47, 0.02], ["segm", "AP", 43.39, 0.02], ["sem_seg", "mIoU", 42.55, 0.02], ["panoptic_seg", "PQ", 38.99, 0.02]]
FLOAT32_PRECISION: "highest"
_BASE_: "../Base-RCNN-FPN.yaml"
MODEL:
META_ARCHITECTURE: "PanopticFPN"
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
MASK_ON: True
RESNETS:
DEPTH: 50
SEM_SEG_HEAD:
LOSS_WEIGHT: 0.5
DATASETS:
TRAIN: ("coco_2017_val_100_panoptic_separated",)
TEST: ("coco_2017_val_100_panoptic_separated",)
SOLVER:
BASE_LR: 0.005
STEPS: (30,)
MAX_ITER: 40
IMS_PER_BATCH: 4
DATALOADER:
NUM_WORKERS: 1
_BASE_: "../Base-RCNN-FPN.yaml"
MODEL:
META_ARCHITECTURE: "PanopticFPN"
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
MASK_ON: True
RESNETS:
DEPTH: 50
SEM_SEG_HEAD:
LOSS_WEIGHT: 0.5
DATASETS:
TRAIN: ("coco_2017_val_panoptic_separated",)
TEST: ("coco_2017_val_panoptic_separated",)
SOLVER:
BASE_LR: 0.01
WARMUP_FACTOR: 0.001
WARMUP_ITERS: 500
STEPS: (5500,)
MAX_ITER: 7000
TEST:
EXPECTED_RESULTS: [["bbox", "AP", 46.70, 1.1], ["segm", "AP", 39.0, 0.7], ["sem_seg", "mIoU", 64.73, 1.3], ["panoptic_seg", "PQ", 48.13, 0.8]]
_BASE_: "../COCO-Detection/retinanet_R_50_FPN_3x.yaml"
MODEL:
WEIGHTS: "detectron2://COCO-Detection/retinanet_R_50_FPN_3x/190397829/model_final_5bd44e.pkl"
DATASETS:
TEST: ("coco_2017_val_100",)
TEST:
EXPECTED_RESULTS: [["bbox", "AP", 44.45, 0.02]]
_BASE_: "../COCO-Detection/retinanet_R_50_FPN_1x.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
DATASETS:
TRAIN: ("coco_2017_val_100",)
TEST: ("coco_2017_val_100",)
SOLVER:
BASE_LR: 0.005
STEPS: (30,)
MAX_ITER: 40
IMS_PER_BATCH: 4
DATALOADER:
NUM_WORKERS: 2
_BASE_: "../COCO-Detection/rpn_R_50_FPN_1x.yaml"
MODEL:
WEIGHTS: "detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/model_final_02ce48.pkl"
DATASETS:
TEST: ("coco_2017_val_100",)
TEST:
EXPECTED_RESULTS: [["box_proposals", "AR@1000", 58.16, 0.02]]
FLOAT32_PRECISION: "highest"
_BASE_: "../COCO-Detection/rpn_R_50_FPN_1x.yaml"
MODEL:
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
DATASETS:
TRAIN: ("coco_2017_val_100",)
TEST: ("coco_2017_val_100",)
SOLVER:
STEPS: (30,)
MAX_ITER: 40
BASE_LR: 0.005
IMS_PER_BATCH: 4
DATALOADER:
NUM_WORKERS: 2
_BASE_: "../Base-RCNN-FPN.yaml"
MODEL:
META_ARCHITECTURE: "SemanticSegmentor"
WEIGHTS: "detectron2://semantic_R_50_FPN_1x/111802073/model_final_c18079783c55a94968edc28b7101c5f0.pkl"
RESNETS:
DEPTH: 50
DATASETS:
TEST: ("coco_2017_val_100_panoptic_stuffonly",)
TEST:
EXPECTED_RESULTS: [["sem_seg", "mIoU", 39.53, 0.02], ["sem_seg", "mACC", 51.50, 0.02]]
_BASE_: "../Base-RCNN-FPN.yaml"
MODEL:
META_ARCHITECTURE: "SemanticSegmentor"
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
RESNETS:
DEPTH: 50
DATASETS:
TRAIN: ("coco_2017_val_100_panoptic_stuffonly",)
TEST: ("coco_2017_val_100_panoptic_stuffonly",)
INPUT:
MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
SOLVER:
BASE_LR: 0.005
STEPS: (30,)
MAX_ITER: 40
IMS_PER_BATCH: 4
DATALOADER:
NUM_WORKERS: 2
_BASE_: "../Base-RCNN-FPN.yaml"
MODEL:
META_ARCHITECTURE: "SemanticSegmentor"
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
RESNETS:
DEPTH: 50
DATASETS:
TRAIN: ("coco_2017_val_panoptic_stuffonly",)
TEST: ("coco_2017_val_panoptic_stuffonly",)
SOLVER:
BASE_LR: 0.01
WARMUP_FACTOR: 0.001
WARMUP_ITERS: 300
STEPS: (5500,)
MAX_ITER: 7000
TEST:
EXPECTED_RESULTS: [["sem_seg", "mIoU", 76.51, 1.0], ["sem_seg", "mACC", 83.25, 1.0]]
INPUT:
# no scale augmentation
MIN_SIZE_TRAIN: (800, )
# Use Builtin Datasets
A dataset can be used by accessing [DatasetCatalog](https://detectron2.readthedocs.io/modules/data.html#detectron2.data.DatasetCatalog)
for its data, or [MetadataCatalog](https://detectron2.readthedocs.io/modules/data.html#detectron2.data.MetadataCatalog) for its metadata (class names, etc).
This document explains how to setup the builtin datasets so they can be used by the above APIs.
[Use Custom Datasets](https://detectron2.readthedocs.io/tutorials/datasets.html) gives a deeper dive on how to use `DatasetCatalog` and `MetadataCatalog`,
and how to add new datasets to them.
Detectron2 has builtin support for a few datasets.
The datasets are assumed to exist in a directory specified by the environment variable
`DETECTRON2_DATASETS`.
Under this directory, detectron2 will look for datasets in the structure described below, if needed.
```
$DETECTRON2_DATASETS/
coco/
lvis/
cityscapes/
VOC20{07,12}/
```
You can set the location for builtin datasets by `export DETECTRON2_DATASETS=/path/to/datasets`.
If left unset, the default is `./datasets` relative to your current working directory.
The [model zoo](https://github.com/facebookresearch/detectron2/blob/master/MODEL_ZOO.md)
contains configs and models that use these builtin datasets.
## Expected dataset structure for [COCO instance/keypoint detection](https://cocodataset.org/#download):
```
coco/
annotations/
instances_{train,val}2017.json
person_keypoints_{train,val}2017.json
{train,val}2017/
# image files that are mentioned in the corresponding json
```
You can use the 2014 version of the dataset as well.
Some of the builtin tests (`dev/run_*_tests.sh`) uses a tiny version of the COCO dataset,
which you can download with `./datasets/prepare_for_tests.sh`.
## Expected dataset structure for PanopticFPN:
Extract panoptic annotations from [COCO website](https://cocodataset.org/#download)
into the following structure:
```
coco/
annotations/
panoptic_{train,val}2017.json
panoptic_{train,val}2017/ # png annotations
panoptic_stuff_{train,val}2017/ # generated by the script mentioned below
```
Install panopticapi by:
```
pip install git+https://github.com/cocodataset/panopticapi.git
```
Then, run `python datasets/prepare_panoptic_fpn.py`, to extract semantic annotations from panoptic annotations.
## Expected dataset structure for [LVIS instance segmentation](https://www.lvisdataset.org/dataset):
```
coco/
{train,val,test}2017/
lvis/
lvis_v0.5_{train,val}.json
lvis_v0.5_image_info_test.json
lvis_v1_{train,val}.json
lvis_v1_image_info_test{,_challenge}.json
```
Install lvis-api by:
```
pip install git+https://github.com/lvis-dataset/lvis-api.git
```
To evaluate models trained on the COCO dataset using LVIS annotations,
run `python datasets/prepare_cocofied_lvis.py` to prepare "cocofied" LVIS annotations.
## Expected dataset structure for [cityscapes](https://www.cityscapes-dataset.com/downloads/):
```
cityscapes/
gtFine/
train/
aachen/
color.png, instanceIds.png, labelIds.png, polygons.json,
labelTrainIds.png
...
val/
test/
# below are generated Cityscapes panoptic annotation
cityscapes_panoptic_train.json
cityscapes_panoptic_train/
cityscapes_panoptic_val.json
cityscapes_panoptic_val/
cityscapes_panoptic_test.json
cityscapes_panoptic_test/
leftImg8bit/
train/
val/
test/
```
Install cityscapes scripts by:
```
pip install git+https://github.com/mcordts/cityscapesScripts.git
```
Note: to create labelTrainIds.png, first prepare the above structure, then run cityscapesescript with:
```
CITYSCAPES_DATASET=/path/to/abovementioned/cityscapes python cityscapesscripts/preparation/createTrainIdLabelImgs.py
```
These files are not needed for instance segmentation.
Note: to generate Cityscapes panoptic dataset, run cityscapesescript with:
```
CITYSCAPES_DATASET=/path/to/abovementioned/cityscapes python cityscapesscripts/preparation/createPanopticImgs.py
```
These files are not needed for semantic and instance segmentation.
## Expected dataset structure for [Pascal VOC](http://host.robots.ox.ac.uk/pascal/VOC/index.html):
```
VOC20{07,12}/
Annotations/
ImageSets/
Main/
trainval.txt
test.txt
# train.txt or val.txt, if you use these splits
JPEGImages/
```
## Expected dataset structure for [ADE20k Scene Parsing](http://sceneparsing.csail.mit.edu/):
```
ADEChallengeData2016/
annotations/
annotations_detectron2/
images/
objectInfo150.txt
```
The directory `annotations_detectron2` is generated by running `python datasets/prepare_ade20k_sem_seg.py`.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright (c) Facebook, Inc. and its affiliates.
import numpy as np
import os
from pathlib import Path
import tqdm
from PIL import Image
def convert(input, output):
img = np.asarray(Image.open(input))
assert img.dtype == np.uint8
img = img - 1 # 0 (ignore) becomes 255. others are shifted by 1
Image.fromarray(img).save(output)
if __name__ == "__main__":
dataset_dir = Path(os.getenv("DETECTRON2_DATASETS", "datasets")) / "ADEChallengeData2016"
for name in ["training", "validation"]:
annotation_dir = dataset_dir / "annotations" / name
output_dir = dataset_dir / "annotations_detectron2" / name
output_dir.mkdir(parents=True, exist_ok=True)
for file in tqdm.tqdm(list(annotation_dir.iterdir())):
output_file = output_dir / file.name
convert(file, output_file)
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright (c) Facebook, Inc. and its affiliates.
import copy
import json
import os
from collections import defaultdict
# This mapping is extracted from the official LVIS mapping:
# https://github.com/lvis-dataset/lvis-api/blob/master/data/coco_to_synset.json
COCO_SYNSET_CATEGORIES = [
{"synset": "person.n.01", "coco_cat_id": 1},
{"synset": "bicycle.n.01", "coco_cat_id": 2},
{"synset": "car.n.01", "coco_cat_id": 3},
{"synset": "motorcycle.n.01", "coco_cat_id": 4},
{"synset": "airplane.n.01", "coco_cat_id": 5},
{"synset": "bus.n.01", "coco_cat_id": 6},
{"synset": "train.n.01", "coco_cat_id": 7},
{"synset": "truck.n.01", "coco_cat_id": 8},
{"synset": "boat.n.01", "coco_cat_id": 9},
{"synset": "traffic_light.n.01", "coco_cat_id": 10},
{"synset": "fireplug.n.01", "coco_cat_id": 11},
{"synset": "stop_sign.n.01", "coco_cat_id": 13},
{"synset": "parking_meter.n.01", "coco_cat_id": 14},
{"synset": "bench.n.01", "coco_cat_id": 15},
{"synset": "bird.n.01", "coco_cat_id": 16},
{"synset": "cat.n.01", "coco_cat_id": 17},
{"synset": "dog.n.01", "coco_cat_id": 18},
{"synset": "horse.n.01", "coco_cat_id": 19},
{"synset": "sheep.n.01", "coco_cat_id": 20},
{"synset": "beef.n.01", "coco_cat_id": 21},
{"synset": "elephant.n.01", "coco_cat_id": 22},
{"synset": "bear.n.01", "coco_cat_id": 23},
{"synset": "zebra.n.01", "coco_cat_id": 24},
{"synset": "giraffe.n.01", "coco_cat_id": 25},
{"synset": "backpack.n.01", "coco_cat_id": 27},
{"synset": "umbrella.n.01", "coco_cat_id": 28},
{"synset": "bag.n.04", "coco_cat_id": 31},
{"synset": "necktie.n.01", "coco_cat_id": 32},
{"synset": "bag.n.06", "coco_cat_id": 33},
{"synset": "frisbee.n.01", "coco_cat_id": 34},
{"synset": "ski.n.01", "coco_cat_id": 35},
{"synset": "snowboard.n.01", "coco_cat_id": 36},
{"synset": "ball.n.06", "coco_cat_id": 37},
{"synset": "kite.n.03", "coco_cat_id": 38},
{"synset": "baseball_bat.n.01", "coco_cat_id": 39},
{"synset": "baseball_glove.n.01", "coco_cat_id": 40},
{"synset": "skateboard.n.01", "coco_cat_id": 41},
{"synset": "surfboard.n.01", "coco_cat_id": 42},
{"synset": "tennis_racket.n.01", "coco_cat_id": 43},
{"synset": "bottle.n.01", "coco_cat_id": 44},
{"synset": "wineglass.n.01", "coco_cat_id": 46},
{"synset": "cup.n.01", "coco_cat_id": 47},
{"synset": "fork.n.01", "coco_cat_id": 48},
{"synset": "knife.n.01", "coco_cat_id": 49},
{"synset": "spoon.n.01", "coco_cat_id": 50},
{"synset": "bowl.n.03", "coco_cat_id": 51},
{"synset": "banana.n.02", "coco_cat_id": 52},
{"synset": "apple.n.01", "coco_cat_id": 53},
{"synset": "sandwich.n.01", "coco_cat_id": 54},
{"synset": "orange.n.01", "coco_cat_id": 55},
{"synset": "broccoli.n.01", "coco_cat_id": 56},
{"synset": "carrot.n.01", "coco_cat_id": 57},
{"synset": "frank.n.02", "coco_cat_id": 58},
{"synset": "pizza.n.01", "coco_cat_id": 59},
{"synset": "doughnut.n.02", "coco_cat_id": 60},
{"synset": "cake.n.03", "coco_cat_id": 61},
{"synset": "chair.n.01", "coco_cat_id": 62},
{"synset": "sofa.n.01", "coco_cat_id": 63},
{"synset": "pot.n.04", "coco_cat_id": 64},
{"synset": "bed.n.01", "coco_cat_id": 65},
{"synset": "dining_table.n.01", "coco_cat_id": 67},
{"synset": "toilet.n.02", "coco_cat_id": 70},
{"synset": "television_receiver.n.01", "coco_cat_id": 72},
{"synset": "laptop.n.01", "coco_cat_id": 73},
{"synset": "mouse.n.04", "coco_cat_id": 74},
{"synset": "remote_control.n.01", "coco_cat_id": 75},
{"synset": "computer_keyboard.n.01", "coco_cat_id": 76},
{"synset": "cellular_telephone.n.01", "coco_cat_id": 77},
{"synset": "microwave.n.02", "coco_cat_id": 78},
{"synset": "oven.n.01", "coco_cat_id": 79},
{"synset": "toaster.n.02", "coco_cat_id": 80},
{"synset": "sink.n.01", "coco_cat_id": 81},
{"synset": "electric_refrigerator.n.01", "coco_cat_id": 82},
{"synset": "book.n.01", "coco_cat_id": 84},
{"synset": "clock.n.01", "coco_cat_id": 85},
{"synset": "vase.n.01", "coco_cat_id": 86},
{"synset": "scissors.n.01", "coco_cat_id": 87},
{"synset": "teddy.n.01", "coco_cat_id": 88},
{"synset": "hand_blower.n.01", "coco_cat_id": 89},
{"synset": "toothbrush.n.01", "coco_cat_id": 90},
]
def cocofy_lvis(input_filename, output_filename):
"""
Filter LVIS instance segmentation annotations to remove all categories that are not included in
COCO. The new json files can be used to evaluate COCO AP using `lvis-api`. The category ids in
the output json are the incontiguous COCO dataset ids.
Args:
input_filename (str): path to the LVIS json file.
output_filename (str): path to the COCOfied json file.
"""
with open(input_filename, "r") as f:
lvis_json = json.load(f)
lvis_annos = lvis_json.pop("annotations")
cocofied_lvis = copy.deepcopy(lvis_json)
lvis_json["annotations"] = lvis_annos
# Mapping from lvis cat id to coco cat id via synset
lvis_cat_id_to_synset = {cat["id"]: cat["synset"] for cat in lvis_json["categories"]}
synset_to_coco_cat_id = {x["synset"]: x["coco_cat_id"] for x in COCO_SYNSET_CATEGORIES}
# Synsets that we will keep in the dataset
synsets_to_keep = set(synset_to_coco_cat_id.keys())
coco_cat_id_with_instances = defaultdict(int)
new_annos = []
ann_id = 1
for ann in lvis_annos:
lvis_cat_id = ann["category_id"]
synset = lvis_cat_id_to_synset[lvis_cat_id]
if synset not in synsets_to_keep:
continue
coco_cat_id = synset_to_coco_cat_id[synset]
new_ann = copy.deepcopy(ann)
new_ann["category_id"] = coco_cat_id
new_ann["id"] = ann_id
ann_id += 1
new_annos.append(new_ann)
coco_cat_id_with_instances[coco_cat_id] += 1
cocofied_lvis["annotations"] = new_annos
for image in cocofied_lvis["images"]:
for key in ["not_exhaustive_category_ids", "neg_category_ids"]:
new_category_list = []
for lvis_cat_id in image[key]:
synset = lvis_cat_id_to_synset[lvis_cat_id]
if synset not in synsets_to_keep:
continue
coco_cat_id = synset_to_coco_cat_id[synset]
new_category_list.append(coco_cat_id)
coco_cat_id_with_instances[coco_cat_id] += 1
image[key] = new_category_list
coco_cat_id_with_instances = set(coco_cat_id_with_instances.keys())
new_categories = []
for cat in lvis_json["categories"]:
synset = cat["synset"]
if synset not in synsets_to_keep:
continue
coco_cat_id = synset_to_coco_cat_id[synset]
if coco_cat_id not in coco_cat_id_with_instances:
continue
new_cat = copy.deepcopy(cat)
new_cat["id"] = coco_cat_id
new_categories.append(new_cat)
cocofied_lvis["categories"] = new_categories
with open(output_filename, "w") as f:
json.dump(cocofied_lvis, f)
print("{} is COCOfied and stored in {}.".format(input_filename, output_filename))
if __name__ == "__main__":
dataset_dir = os.path.join(os.getenv("DETECTRON2_DATASETS", "datasets"), "lvis")
for s in ["lvis_v0.5_train", "lvis_v0.5_val"]:
print("Start COCOfing {}.".format(s))
cocofy_lvis(
os.path.join(dataset_dir, "{}.json".format(s)),
os.path.join(dataset_dir, "{}_cocofied.json".format(s)),
)
#!/bin/bash -e
# Copyright (c) Facebook, Inc. and its affiliates.
# Download the mini dataset (coco val2017_100, with only 100 images)
# to be used in unittests & integration tests.
cd "${0%/*}"
BASE=https://dl.fbaipublicfiles.com/detectron2
ROOT=${DETECTRON2_DATASETS:-./}
ROOT=${ROOT/#\~/$HOME} # expand ~ to HOME
mkdir -p $ROOT/coco/annotations
for anno in instances_val2017_100 \
person_keypoints_val2017_100 ; do
dest=$ROOT/coco/annotations/$anno.json
[[ -s $dest ]] && {
echo "$dest exists. Skipping ..."
} || {
wget $BASE/annotations/coco/$anno.json -O $dest
}
done
dest=$ROOT/coco/val2017_100.tgz
[[ -d $ROOT/coco/val2017 ]] && {
echo "$ROOT/coco/val2017 exists. Skipping ..."
} || {
wget $BASE/annotations/coco/val2017_100.tgz -O $dest
tar xzf $dest -C $ROOT/coco/ && rm -f $dest
}
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright (c) Facebook, Inc. and its affiliates.
import functools
import json
import multiprocessing as mp
import numpy as np
import os
import time
from fvcore.common.download import download
from panopticapi.utils import rgb2id
from PIL import Image
from detectron2.data.datasets.builtin_meta import COCO_CATEGORIES
def _process_panoptic_to_semantic(input_panoptic, output_semantic, segments, id_map):
panoptic = np.asarray(Image.open(input_panoptic), dtype=np.uint32)
panoptic = rgb2id(panoptic)
output = np.zeros_like(panoptic, dtype=np.uint8) + 255
for seg in segments:
cat_id = seg["category_id"]
new_cat_id = id_map[cat_id]
output[panoptic == seg["id"]] = new_cat_id
Image.fromarray(output).save(output_semantic)
def separate_coco_semantic_from_panoptic(panoptic_json, panoptic_root, sem_seg_root, categories):
"""
Create semantic segmentation annotations from panoptic segmentation
annotations, to be used by PanopticFPN.
It maps all thing categories to class 0, and maps all unlabeled pixels to class 255.
It maps all stuff categories to contiguous ids starting from 1.
Args:
panoptic_json (str): path to the panoptic json file, in COCO's format.
panoptic_root (str): a directory with panoptic annotation files, in COCO's format.
sem_seg_root (str): a directory to output semantic annotation files
categories (list[dict]): category metadata. Each dict needs to have:
"id": corresponds to the "category_id" in the json annotations
"isthing": 0 or 1
"""
os.makedirs(sem_seg_root, exist_ok=True)
stuff_ids = [k["id"] for k in categories if k["isthing"] == 0]
thing_ids = [k["id"] for k in categories if k["isthing"] == 1]
id_map = {} # map from category id to id in the output semantic annotation
assert len(stuff_ids) <= 254
for i, stuff_id in enumerate(stuff_ids):
id_map[stuff_id] = i + 1
for thing_id in thing_ids:
id_map[thing_id] = 0
id_map[0] = 255
with open(panoptic_json) as f:
obj = json.load(f)
pool = mp.Pool(processes=max(mp.cpu_count() // 2, 4))
def iter_annotations():
for anno in obj["annotations"]:
file_name = anno["file_name"]
segments = anno["segments_info"]
input = os.path.join(panoptic_root, file_name)
output = os.path.join(sem_seg_root, file_name)
yield input, output, segments
print("Start writing to {} ...".format(sem_seg_root))
start = time.time()
pool.starmap(
functools.partial(_process_panoptic_to_semantic, id_map=id_map),
iter_annotations(),
chunksize=100,
)
print("Finished. time: {:.2f}s".format(time.time() - start))
if __name__ == "__main__":
dataset_dir = os.path.join(os.getenv("DETECTRON2_DATASETS", "datasets"), "coco")
for s in ["val2017", "train2017"]:
separate_coco_semantic_from_panoptic(
os.path.join(dataset_dir, "annotations/panoptic_{}.json".format(s)),
os.path.join(dataset_dir, "panoptic_{}".format(s)),
os.path.join(dataset_dir, "panoptic_stuff_{}".format(s)),
COCO_CATEGORIES,
)
# Prepare val2017_100 for quick testing:
dest_dir = os.path.join(dataset_dir, "annotations/")
URL_PREFIX = "https://dl.fbaipublicfiles.com/detectron2/"
download(URL_PREFIX + "annotations/coco/panoptic_val2017_100.json", dest_dir)
with open(os.path.join(dest_dir, "panoptic_val2017_100.json")) as f:
obj = json.load(f)
def link_val100(dir_full, dir_100):
print("Creating " + dir_100 + " ...")
os.makedirs(dir_100, exist_ok=True)
for img in obj["images"]:
basename = os.path.splitext(img["file_name"])[0]
src = os.path.join(dir_full, basename + ".png")
dst = os.path.join(dir_100, basename + ".png")
src = os.path.relpath(src, start=dir_100)
os.symlink(src, dst)
link_val100(
os.path.join(dataset_dir, "panoptic_val2017"),
os.path.join(dataset_dir, "panoptic_val2017_100"),
)
link_val100(
os.path.join(dataset_dir, "panoptic_stuff_val2017"),
os.path.join(dataset_dir, "panoptic_stuff_val2017_100"),
)
## Detectron2 Demo
We provide a command line tool to run a simple demo of builtin configs.
The usage is explained in [GETTING_STARTED.md](../GETTING_STARTED.md).
See our [blog post](https://ai.facebook.com/blog/-detectron2-a-pytorch-based-modular-object-detection-library-)
for a high-quality demo generated with this tool.
# Copyright (c) Facebook, Inc. and its affiliates.
import argparse
import glob
import multiprocessing as mp
import numpy as np
import os
import tempfile
import time
import warnings
import cv2
import tqdm
from detectron2.config import get_cfg
from detectron2.data.detection_utils import read_image
from detectron2.utils.logger import setup_logger
from vision.fair.detectron2.demo.predictor import VisualizationDemo
# constants
WINDOW_NAME = "COCO detections"
def setup_cfg(args):
# load config from file and command-line arguments
cfg = get_cfg()
# To use demo for Panoptic-DeepLab, please uncomment the following two lines.
# from detectron2.projects.panoptic_deeplab import add_panoptic_deeplab_config # noqa
# add_panoptic_deeplab_config(cfg)
cfg.merge_from_file(args.config_file)
cfg.merge_from_list(args.opts)
# Set score_threshold for builtin models
cfg.MODEL.RETINANET.SCORE_THRESH_TEST = args.confidence_threshold
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = args.confidence_threshold
cfg.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH = args.confidence_threshold
cfg.freeze()
return cfg
def get_parser():
parser = argparse.ArgumentParser(description="Detectron2 demo for builtin configs")
parser.add_argument(
"--config-file",
default="configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml",
metavar="FILE",
help="path to config file",
)
parser.add_argument("--webcam", action="store_true", help="Take inputs from webcam.")
parser.add_argument("--video-input", help="Path to video file.")
parser.add_argument(
"--input",
nargs="+",
help="A list of space separated input images; "
"or a single glob pattern such as 'directory/*.jpg'",
)
parser.add_argument(
"--output",
help="A file or directory to save output visualizations. "
"If not given, will show output in an OpenCV window.",
)
parser.add_argument(
"--confidence-threshold",
type=float,
default=0.5,
help="Minimum score for instance predictions to be shown",
)
parser.add_argument(
"--opts",
help="Modify config options using the command-line 'KEY VALUE' pairs",
default=[],
nargs=argparse.REMAINDER,
)
return parser
def test_opencv_video_format(codec, file_ext):
with tempfile.TemporaryDirectory(prefix="video_format_test") as dir:
filename = os.path.join(dir, "test_file" + file_ext)
writer = cv2.VideoWriter(
filename=filename,
fourcc=cv2.VideoWriter_fourcc(*codec),
fps=float(30),
frameSize=(10, 10),
isColor=True,
)
[writer.write(np.zeros((10, 10, 3), np.uint8)) for _ in range(30)]
writer.release()
if os.path.isfile(filename):
return True
return False
def main() -> None:
mp.set_start_method("spawn", force=True)
args = get_parser().parse_args()
setup_logger(name="fvcore")
logger = setup_logger()
logger.info("Arguments: " + str(args))
cfg = setup_cfg(args)
demo = VisualizationDemo(cfg)
if args.input:
if len(args.input) == 1:
args.input = glob.glob(os.path.expanduser(args.input[0]))
assert args.input, "The input path(s) was not found"
for path in tqdm.tqdm(args.input, disable=not args.output):
# use PIL, to be consistent with evaluation
img = read_image(path, format="BGR")
start_time = time.time()
predictions, visualized_output = demo.run_on_image(img)
logger.info(
"{}: {} in {:.2f}s".format(
path,
(
"detected {} instances".format(len(predictions["instances"]))
if "instances" in predictions
else "finished"
),
time.time() - start_time,
)
)
if args.output:
if os.path.isdir(args.output):
assert os.path.isdir(args.output), args.output
out_filename = os.path.join(args.output, os.path.basename(path))
else:
assert len(args.input) == 1, "Please specify a directory with args.output"
out_filename = args.output
visualized_output.save(out_filename)
else:
cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
cv2.imshow(WINDOW_NAME, visualized_output.get_image()[:, :, ::-1])
if cv2.waitKey(0) == 27:
break # esc to quit
elif args.webcam:
assert args.input is None, "Cannot have both --input and --webcam!"
assert args.output is None, "output not yet supported with --webcam!"
cam = cv2.VideoCapture(0)
for vis in tqdm.tqdm(demo.run_on_video(cam)):
cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
cv2.imshow(WINDOW_NAME, vis)
if cv2.waitKey(1) == 27:
break # esc to quit
cam.release()
cv2.destroyAllWindows()
elif args.video_input:
video = cv2.VideoCapture(args.video_input)
width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
frames_per_second = video.get(cv2.CAP_PROP_FPS)
num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
basename = os.path.basename(args.video_input)
codec, file_ext = (
("x264", ".mkv") if test_opencv_video_format("x264", ".mkv") else ("mp4v", ".mp4")
)
if codec == ".mp4v":
warnings.warn("x264 codec not available, switching to mp4v")
if args.output:
if os.path.isdir(args.output):
output_fname = os.path.join(args.output, basename)
output_fname = os.path.splitext(output_fname)[0] + file_ext
else:
output_fname = args.output
assert not os.path.isfile(output_fname), output_fname
output_file = cv2.VideoWriter(
filename=output_fname,
# some installation of opencv may not support x264 (due to its license),
# you can try other format (e.g. MPEG)
fourcc=cv2.VideoWriter_fourcc(*codec),
fps=float(frames_per_second),
frameSize=(width, height),
isColor=True,
)
assert os.path.isfile(args.video_input)
for vis_frame in tqdm.tqdm(demo.run_on_video(video), total=num_frames):
if args.output:
output_file.write(vis_frame)
else:
cv2.namedWindow(basename, cv2.WINDOW_NORMAL)
cv2.imshow(basename, vis_frame)
if cv2.waitKey(1) == 27:
break # esc to quit
video.release()
if args.output:
output_file.release()
else:
cv2.destroyAllWindows()
if __name__ == "__main__":
main() # pragma: no cover
# Copyright (c) Facebook, Inc. and its affiliates.
import atexit
import bisect
import multiprocessing as mp
from collections import deque
import cv2
import torch
from detectron2.data import MetadataCatalog
from detectron2.engine.defaults import DefaultPredictor
from detectron2.utils.video_visualizer import VideoVisualizer
from detectron2.utils.visualizer import ColorMode, Visualizer
class VisualizationDemo:
def __init__(self, cfg, instance_mode=ColorMode.IMAGE, parallel=False):
"""
Args:
cfg (CfgNode):
instance_mode (ColorMode):
parallel (bool): whether to run the model in different processes from visualization.
Useful since the visualization logic can be slow.
"""
self.metadata = MetadataCatalog.get(
cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused"
)
self.cpu_device = torch.device("cpu")
self.instance_mode = instance_mode
self.parallel = parallel
if parallel:
num_gpu = torch.cuda.device_count()
self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu)
else:
self.predictor = DefaultPredictor(cfg)
def run_on_image(self, image):
"""
Args:
image (np.ndarray): an image of shape (H, W, C) (in BGR order).
This is the format used by OpenCV.
Returns:
predictions (dict): the output of the model.
vis_output (VisImage): the visualized image output.
"""
vis_output = None
predictions = self.predictor(image)
# Convert image from OpenCV BGR format to Matplotlib RGB format.
image = image[:, :, ::-1]
visualizer = Visualizer(image, self.metadata, instance_mode=self.instance_mode)
if "panoptic_seg" in predictions:
panoptic_seg, segments_info = predictions["panoptic_seg"]
vis_output = visualizer.draw_panoptic_seg_predictions(
panoptic_seg.to(self.cpu_device), segments_info
)
else:
if "sem_seg" in predictions:
vis_output = visualizer.draw_sem_seg(
predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)
)
if "instances" in predictions:
instances = predictions["instances"].to(self.cpu_device)
vis_output = visualizer.draw_instance_predictions(predictions=instances)
return predictions, vis_output
def _frame_from_video(self, video):
while video.isOpened():
success, frame = video.read()
if success:
yield frame
else:
break
def run_on_video(self, video):
"""
Visualizes predictions on frames of the input video.
Args:
video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be
either a webcam or a video file.
Yields:
ndarray: BGR visualizations of each video frame.
"""
video_visualizer = VideoVisualizer(self.metadata, self.instance_mode)
def process_predictions(frame, predictions):
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
if "panoptic_seg" in predictions:
panoptic_seg, segments_info = predictions["panoptic_seg"]
vis_frame = video_visualizer.draw_panoptic_seg_predictions(
frame, panoptic_seg.to(self.cpu_device), segments_info
)
elif "instances" in predictions:
predictions = predictions["instances"].to(self.cpu_device)
vis_frame = video_visualizer.draw_instance_predictions(frame, predictions)
elif "sem_seg" in predictions:
vis_frame = video_visualizer.draw_sem_seg(
frame, predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)
)
# Converts Matplotlib RGB format to OpenCV BGR format
vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR)
return vis_frame
frame_gen = self._frame_from_video(video)
if self.parallel:
buffer_size = self.predictor.default_buffer_size
frame_data = deque()
for cnt, frame in enumerate(frame_gen):
frame_data.append(frame)
self.predictor.put(frame)
if cnt >= buffer_size:
frame = frame_data.popleft()
predictions = self.predictor.get()
yield process_predictions(frame, predictions)
while len(frame_data):
frame = frame_data.popleft()
predictions = self.predictor.get()
yield process_predictions(frame, predictions)
else:
for frame in frame_gen:
yield process_predictions(frame, self.predictor(frame))
class AsyncPredictor:
"""
A predictor that runs the model asynchronously, possibly on >1 GPUs.
Because rendering the visualization takes considerably amount of time,
this helps improve throughput a little bit when rendering videos.
"""
class _StopToken:
pass
class _PredictWorker(mp.Process):
def __init__(self, cfg, task_queue, result_queue):
self.cfg = cfg
self.task_queue = task_queue
self.result_queue = result_queue
super().__init__()
def run(self):
predictor = DefaultPredictor(self.cfg)
while True:
task = self.task_queue.get()
if isinstance(task, AsyncPredictor._StopToken):
break
idx, data = task
result = predictor(data)
self.result_queue.put((idx, result))
def __init__(self, cfg, num_gpus: int = 1):
"""
Args:
cfg (CfgNode):
num_gpus (int): if 0, will run on CPU
"""
num_workers = max(num_gpus, 1)
self.task_queue = mp.Queue(maxsize=num_workers * 3)
self.result_queue = mp.Queue(maxsize=num_workers * 3)
self.procs = []
for gpuid in range(max(num_gpus, 1)):
cfg = cfg.clone()
cfg.defrost()
cfg.MODEL.DEVICE = "cuda:{}".format(gpuid) if num_gpus > 0 else "cpu"
self.procs.append(
AsyncPredictor._PredictWorker(cfg, self.task_queue, self.result_queue)
)
self.put_idx = 0
self.get_idx = 0
self.result_rank = []
self.result_data = []
for p in self.procs:
p.start()
atexit.register(self.shutdown)
def put(self, image):
self.put_idx += 1
self.task_queue.put((self.put_idx, image))
def get(self):
self.get_idx += 1 # the index needed for this request
if len(self.result_rank) and self.result_rank[0] == self.get_idx:
res = self.result_data[0]
del self.result_data[0], self.result_rank[0]
return res
while True:
# make sure the results are returned in the correct order
idx, res = self.result_queue.get()
if idx == self.get_idx:
return res
insert = bisect.bisect(self.result_rank, idx)
self.result_rank.insert(insert, idx)
self.result_data.insert(insert, res)
def __len__(self):
return self.put_idx - self.get_idx
def __call__(self, image):
self.put(image)
return self.get()
def shutdown(self):
for _ in self.procs:
self.task_queue.put(AsyncPredictor._StopToken())
@property
def default_buffer_size(self):
return len(self.procs) * 5
# Copyright (c) Facebook, Inc. and its affiliates.
from .utils.env import setup_environment
setup_environment()
# This line will be programatically read/write by setup.py.
# Leave them at the bottom of this file and don't touch them.
__version__ = "0.6"
# -*- coding: utf-8 -*-
# Copyright (c) Facebook, Inc. and its affiliates.
# File:
from . import catalog as _UNUSED # register the handler
from .detection_checkpoint import DetectionCheckpointer
from fvcore.common.checkpoint import Checkpointer, PeriodicCheckpointer
__all__ = ["Checkpointer", "PeriodicCheckpointer", "DetectionCheckpointer"]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment