Commit 82a8e0a0 authored by Hang Zhang's avatar Hang Zhang Committed by Facebook GitHub Bot
Browse files

Add Demo and Quick Start Instructions

Summary: Pull Request resolved: https://github.com/facebookresearch/d2go/pull/5

Reviewed By: wat3rBro

Differential Revision: D26780956

Pulled By: zhanghang1989

fbshipit-source-id: 26af80bbdf6bcb6af4a8b5d27e655826b34db26a
parent 465cdb84
......@@ -6,8 +6,17 @@ This page holds a reference for example configs, pretrained models and training/
### How to
- Train: the "name" column contains a link to the config file. Running `tools/train_net.py --num-gpus 8` with the config file will reproduce the corresponding model.
- Evaluation: Running `tools/train_net.py --eval-only` with the config file and pretrained model will evaluate the results.
- Get pretrained models in python:
```python
from d2go.model_zoo import model_zoo
model = model_zoo.get('faster_rcnn_fbnetv3a_C4.yaml', trained=True)
```
- Train: the "name" column contains a link to the config file. Running `d2go.train_net --config-file` with the config file will reproduce the corresponding model.
- Evaluation: Running `d2go.train_net --config-file path/to/the/config --eval-only MODEL.WEIGHTS path/to/the/model/weights` with the config file and pretrained model will evaluate the results. See details in [Getting Started](./demo/README.md).
- Training curves and other statistics can be found in `metrics` for each model.
### Backbone Models
......@@ -31,7 +40,11 @@ FBNet series are efficient mobile backbones discovered via neural architecture s
| ------------------------------------------------------------ | ------ | ------- | --------- | ------------------------------------------------------------ |
| [Faster-RCNN-FBNetV3A](./configs/faster_rcnn_fbnetv3a_C4.yaml) | 22.99 | 59ms | 246823121 | [model](https://mobile-cv.s3-us-west-2.amazonaws.com/d2go/models/246823121/model_0479999.pth) \|[metrics](https://mobile-cv.s3-us-west-2.amazonaws.com/d2go/models/246823121/metrics.json) |
| [Faster-RCNN-FBNetV3A-dsmask](./configs/faster_rcnn_fbnetv3a_dsmask_C4.yaml) | 21.06 | 30ms | 250414811 | [model](https://mobile-cv.s3-us-west-2.amazonaws.com/d2go/models/250414811/model_0399999.pth) \|[metrics](https://mobile-cv.s3-us-west-2.amazonaws.com/d2go/models/250414811/metrics.json) |
| [Faster-RCNN-FBNetV3G-FPN](./configs/faster_rcnn_fbnetv3g_fpn.yaml) | 43.13 | | 250356938 | [model](https://mobile-cv.s3-us-west-2.amazonaws.com/d2go/models/250356938/model_0374999.pth) \|[metrics](https://mobile-cv.s3-us-west-2.amazonaws.com/d2go/models/250356938/metrics.json) |
| [Faster-RCNN-FBNetV3G-FPN](./configs/faster_rcnn_fbnetv3g_fpn.yaml) | 43.13 | 132ms | 250356938 | [model](https://mobile-cv.s3-us-west-2.amazonaws.com/d2go/models/250356938/model_0374999.pth) \|[metrics](https://mobile-cv.s3-us-west-2.amazonaws.com/d2go/models/250356938/metrics.json) |
(Latencies are measured on android system using Pixel.)
## COCO Instance Segmentation
| name | box AP | mask AP | model id | download |
......
......@@ -38,7 +38,7 @@ cd d2go & python -m pip install .
## Get Started
- Getting Started tutorial Coming soon.
- [Getting Started with D2Go](./demo).
- See our [model zoo](./MODEL_ZOO.md) for example configs and pretrained models.
......
......@@ -31,6 +31,7 @@ MODEL:
NAME: "MaskRCNNConvUpsampleHead"
NUM_CONV: 4
POOLER_RESOLUTION: 14
NORM: "naiveSyncBN"
MODEL_EMA:
ENABLED: True
DECAY: 0.9998
......
......@@ -31,6 +31,7 @@ MODEL:
NAME: "MaskRCNNConvUpsampleHead"
NUM_CONV: 4
POOLER_RESOLUTION: 14
NORM: "naiveSyncBN"
MODEL_EMA:
ENABLED: True
DECAY: 0.9998
......
......@@ -38,6 +38,7 @@ MODEL:
NAME: "MaskRCNNConvUpsampleHead"
NUM_CONV: 4
POOLER_RESOLUTION: 14
NORM: "naiveSyncBN"
MODEL_EMA:
ENABLED: True
DECAY: 0.9998
......
_BASE_: "faster_rcnn_fbnetv3a_C4.yaml"
SOLVER:
BASE_LR: 0.0001
MAX_ITER: 50
IMS_PER_BATCH: 48 # for 8GPUs
QUANTIZATION:
BACKEND: "qnnpack"
QAT:
ENABLED: True
START_ITER: 0
ENABLE_OBSERVER_ITER: 0
DISABLE_OBSERVER_ITER: 5
FREEZE_BN_ITER: 7
# Copyright (c) Facebook, Inc. and its affiliates.
from collections import deque
import cv2
import torch
from detectron2.data import MetadataCatalog
from detectron2.utils.video_visualizer import VideoVisualizer
from detectron2.utils.visualizer import ColorMode, Visualizer
import detectron2.data.transforms as T
from d2go.model_zoo import model_zoo
class DemoPredictor:
def __init__(self, model, min_size_test=224, max_size_test=320, input_format="RGB"):
self.model = model
self.model.eval()
self.aug = T.ResizeShortestEdge(
[min_size_test, min_size_test], max_size_test
)
self.input_format = input_format
def __call__(self, original_image):
"""
Args:
original_image (np.ndarray): an image of shape (H, W, C) (in BGR order).
Returns:
predictions (dict):
the output of the model for one image only.
See :doc:`/tutorials/models` for details about the format.
"""
with torch.no_grad(): # https://github.com/sphinx-doc/sphinx/issues/4258
# Apply pre-processing to image.
if self.input_format == "RGB":
# whether the model expects BGR inputs or RGB
original_image = original_image[:, :, ::-1]
height, width = original_image.shape[:2]
image = self.aug.get_transform(original_image).apply_image(original_image)
image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))
inputs = {"image": image, "height": height, "width": width}
predictions = self.model([inputs])[0]
return predictions
class VisualizationDemo(object):
def __init__(self, cfg, config_file, instance_mode=ColorMode.IMAGE, parallel=False):
"""
Args:
cfg (CfgNode):
instance_mode (ColorMode):
parallel (bool): whether to run the model in different processes from visualization.
Useful since the visualization logic can be slow.
"""
self.metadata = MetadataCatalog.get(
cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused"
)
self.cpu_device = torch.device("cpu")
self.instance_mode = instance_mode
self.parallel = parallel
model = model_zoo.get(config_file, trained=True)#runner.build_model(cfg)
self.predictor = DemoPredictor(model)
def run_on_image(self, image):
"""
Args:
image (np.ndarray): an image of shape (H, W, C) (in BGR order).
This is the format used by OpenCV.
Returns:
predictions (dict): the output of the model.
vis_output (VisImage): the visualized image output.
"""
vis_output = None
predictions = self.predictor(image)
# Convert image from OpenCV BGR format to Matplotlib RGB format.
image = image[:, :, ::-1]
visualizer = Visualizer(image, self.metadata, instance_mode=self.instance_mode)
if "panoptic_seg" in predictions:
panoptic_seg, segments_info = predictions["panoptic_seg"]
vis_output = visualizer.draw_panoptic_seg_predictions(
panoptic_seg.to(self.cpu_device), segments_info
)
else:
if "sem_seg" in predictions:
vis_output = visualizer.draw_sem_seg(
predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)
)
if "instances" in predictions:
instances = predictions["instances"].to(self.cpu_device)
vis_output = visualizer.draw_instance_predictions(predictions=instances)
return predictions, vis_output
def _frame_from_video(self, video):
while video.isOpened():
success, frame = video.read()
if success:
yield frame
else:
break
def run_on_video(self, video):
"""
Visualizes predictions on frames of the input video.
Args:
video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be
either a webcam or a video file.
Yields:
ndarray: BGR visualizations of each video frame.
"""
video_visualizer = VideoVisualizer(self.metadata, self.instance_mode)
def process_predictions(frame, predictions):
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
if "panoptic_seg" in predictions:
panoptic_seg, segments_info = predictions["panoptic_seg"]
vis_frame = video_visualizer.draw_panoptic_seg_predictions(
frame, panoptic_seg.to(self.cpu_device), segments_info
)
elif "instances" in predictions:
predictions = predictions["instances"].to(self.cpu_device)
vis_frame = video_visualizer.draw_instance_predictions(frame, predictions)
elif "sem_seg" in predictions:
vis_frame = video_visualizer.draw_sem_seg(
frame, predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)
)
# Converts Matplotlib RGB format to OpenCV BGR format
vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR)
return vis_frame
frame_gen = self._frame_from_video(video)
if self.parallel:
buffer_size = self.predictor.default_buffer_size
frame_data = deque()
for cnt, frame in enumerate(frame_gen):
frame_data.append(frame)
self.predictor.put(frame)
if cnt >= buffer_size:
frame = frame_data.popleft()
predictions = self.predictor.get()
yield process_predictions(frame, predictions)
while len(frame_data):
frame = frame_data.popleft()
predictions = self.predictor.get()
yield process_predictions(frame, predictions)
else:
for frame in frame_gen:
yield process_predictions(frame, self.predictor(frame))
# Getting Started with D2Go
This document provides a brief intro of the usage of builtin command-line tools in d2go.
For a tutorial that involves coding with the API, see our [Jupyter Notebook](./d2go_beginner.ipynb) which covers 1). how to run inference with an existing model, 2). how to train a builtin model on a custom dataset, and 3). how to apply quantization to the model for int8 deployment.
## Inference Demo with Pre-trained Models
- Choose a model from [mdoel_zoo](https://github.com/facebookresearch/d2go/blob/master/MODEL_ZOO.md), e.g. `faster_rcnn_fbnetv3a_C4.yaml`.
- Use the provided `demo.py` to try demo on an input image:
```bash
cd demo/
python demo.py --config-file faster_rcnn_fbnetv3a_C4.yaml --input input1.jpg --output output1.jpg
```
- To run on a video, replace the `--input files` with `--video-input video.mp4`
## Training & Evaluation
D2Go is built on top of detectron2 toolkit, please follow the [instructions](https://github.com/facebookresearch/detectron2/blob/master/datasets/README.md) on detectron2 to setup the builtin datasets before training.
- To train a model:
```bash
d2go.train_net --config-file ./configs/faster_rcnn_fbnetv3a_C4.yaml
```
- To evaluate a model checkpoint:
```bash
d2go.train_net --config-file ./configs/faster_rcnn_fbnetv3a_C4.yaml --eval-only \
MODEL.WEIGHTS https://mobile-cv.s3-us-west-2.amazonaws.com/d2go/models/246823121/model_0479999.pth
```
(change the URL to a local path, if evaluating local models)
## Export to Torchscript & Int8 Model
- Export to Torchscript model:
```bash
d2go.exporter --config-file configs/faster_rcnn_fbnetv3a_C4.yaml \
--output-dir ./ --predictor-type torchscript \
MODEL.WEIGHTS https://mobile-cv.s3-us-west-2.amazonaws.com/d2go/models/246823121/model_0479999.pth
```
- Export to Int8 model (using post-training quantization):
```bash
d2go.exporter --config-file configs/faster_rcnn_fbnetv3a_C4.yaml \
--output-dir ./ --predictor-type torchscript_int8 \
MODEL.WEIGHTS https://mobile-cv.s3-us-west-2.amazonaws.com/d2go/models/246823121/model_0479999.pth
```
## Quantization-aware Training
The previous method exports int8 models using post-training quantization, which is easy-to-use but may suffers accuracy drop. Quantization aware training emulates inference-time quantization during the training, so that the resulting lower-precision model can benefit during deployment.
To apply quantization-aware training, we need to resume from a pretrained checkpoint:
```bash
d2go.train_net --config-file configs/qat_faster_rcnn_fbnetv3a_C4.yaml \
MODEL.WEIGHTS https://mobile-cv.s3-us-west-2.amazonaws.com/d2go/models/246823121/model_0479999.pth
```
Please see the [config file](./configs/qat_faster_rcnn_fbnetv3a_C4.yaml) for relevant hyper-params.
This diff is collapsed.
# Adapted from https://github.com/facebookresearch/detectron2/
# Copyright (c) Facebook, Inc. and its affiliates.
import argparse
import glob
import multiprocessing as mp
import os
import time
import cv2
import tqdm
from detectron2.data.detection_utils import read_image
from detectron2.utils.logger import setup_logger
from d2go.model_zoo import model_zoo
from d2go.utils.demo_predictor import VisualizationDemo
# constants
WINDOW_NAME = "COCO detections"
def setup_cfg(cfg, args):
# Set score_threshold for builtin models
cfg.MODEL.RETINANET.SCORE_THRESH_TEST = args.confidence_threshold
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = args.confidence_threshold
cfg.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH = args.confidence_threshold
cfg.freeze()
return cfg
def get_parser():
parser = argparse.ArgumentParser(description="Detectron2 demo for builtin configs")
parser.add_argument(
"--config-file",
default='keypoint_rcnn_fbnetv3a_dsmask_C4.yaml',
metavar="FILE",
help="path to config file",
)
parser.add_argument("--webcam", action="store_true", help="Take inputs from webcam.")
parser.add_argument("--video-input", help="Path to video file.")
parser.add_argument(
"--input",
nargs="+",
help="A list of space separated input images; "
"or a single glob pattern such as 'directory/*.jpg'",
)
parser.add_argument(
"--output",
help="A file or directory to save output visualizations. "
"If not given, will show output in an OpenCV window.",
)
parser.add_argument(
"--confidence-threshold",
type=float,
default=0.5,
help="Minimum score for instance predictions to be shown",
)
parser.add_argument(
"--opts",
help="Modify config options using the command-line 'KEY VALUE' pairs",
default=[],
nargs=argparse.REMAINDER,
)
return parser
def main():
mp.set_start_method("spawn", force=True)
args = get_parser().parse_args()
setup_logger(name="fvcore")
logger = setup_logger()
logger.info("Arguments: " + str(args))
cfg = model_zoo.get_config(args.config_file)
cfg = setup_cfg(cfg, args)
demo = VisualizationDemo(cfg, args.config_file)
if args.input:
if len(args.input) == 1:
args.input = glob.glob(os.path.expanduser(args.input[0]))
assert args.input, "The input path(s) was not found"
for path in tqdm.tqdm(args.input, disable=not args.output):
# use PIL, to be consistent with evaluation
img = read_image(path, format="BGR")
start_time = time.time()
predictions, visualized_output = demo.run_on_image(img)
logger.info(
"{}: {} in {:.2f}s".format(
path,
"detected {} instances".format(len(predictions["instances"]))
if "instances" in predictions
else "finished",
time.time() - start_time,
)
)
if args.output:
if os.path.isdir(args.output):
assert os.path.isdir(args.output), args.output
out_filename = os.path.join(args.output, os.path.basename(path))
else:
assert len(args.input) == 1, "Please specify a directory with args.output"
out_filename = args.output
visualized_output.save(out_filename)
else:
cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
cv2.imshow(WINDOW_NAME, visualized_output.get_image()[:, :, ::-1])
if cv2.waitKey(0) == 27:
break # esc to quit
elif args.video_input:
video = cv2.VideoCapture(args.video_input)
width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
frames_per_second = video.get(cv2.CAP_PROP_FPS)
num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
basename = os.path.basename(args.video_input)
if args.output:
if os.path.isdir(args.output):
output_fname = os.path.join(args.output, basename)
output_fname = os.path.splitext(output_fname)[0] + ".mkv"
else:
output_fname = args.output
assert not os.path.isfile(output_fname), output_fname
output_file = cv2.VideoWriter(
filename=output_fname,
# some installation of opencv may not support x264 (due to its license),
# you can try other format (e.g. MPEG)
fourcc=cv2.VideoWriter_fourcc(*"x264"),
fps=float(frames_per_second),
frameSize=(width, height),
isColor=True,
)
assert os.path.isfile(args.video_input)
for vis_frame in tqdm.tqdm(demo.run_on_video(video), total=num_frames):
if args.output:
output_file.write(vis_frame)
else:
cv2.namedWindow(basename, cv2.WINDOW_NORMAL)
cv2.imshow(basename, vis_frame)
if cv2.waitKey(1) == 27:
break # esc to quit
video.release()
if args.output:
output_file.release()
else:
cv2.destroyAllWindows()
if __name__ == "__main__":
main()
......@@ -9,6 +9,7 @@ import os
import uuid
from d2go.data.datasets import register_dataset_split
from d2go.runner import create_runner
from detectron2.data import DatasetCatalog, MetadataCatalog
from mobile_cv.common.misc.file_utils import make_temp_directory
from PIL import Image
......@@ -165,3 +166,31 @@ class LocalImageGenerator:
def prepare_image(self, i):
image = Image.new("RGB", (self._width, self._height))
image.save(os.path.join(self._image_dir, self.get_image_dict(i)["file_name"]))
@contextlib.contextmanager
def create_fake_detection_data_loader(height, width, is_train):
with make_temp_directory("detectron2go_tmp_dataset") as dataset_dir:
runner = create_runner("d2go.runner.GeneralizedRCNNRunner")
cfg = runner.get_default_cfg()
cfg.DATASETS.TRAIN = ["default_dataset_train"]
cfg.DATASETS.TEST = ["default_dataset_test"]
with make_temp_directory("detectron2go_tmp_dataset") as dataset_dir:
image_dir = os.path.join(dataset_dir, "images")
os.makedirs(image_dir)
image_generator = LocalImageGenerator(image_dir, width=width, height=height)
if is_train:
with register_toy_dataset(
"default_dataset_train", image_generator, num_images=3
):
train_loader = runner.build_detection_train_loader(cfg)
yield train_loader
else:
with register_toy_dataset(
"default_dataset_test", image_generator, num_images=3
):
test_loader = runner.build_detection_test_loader(
cfg, dataset_name="default_dataset_test"
)
yield test_loader
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment