Commit b9dc151a authored by Yanghan Wang's avatar Yanghan Wang Committed by Facebook GitHub Bot
Browse files

support force exporting gpu model for rcnn meta_arch

Summary:
Pull Request resolved: https://github.com/facebookresearch/d2go/pull/191

When exporting model to torchscript (using `MODEL.DEVICE = "cpu"`), mean/std are constant instead of model parameters. Therefore after casting the torchscript to CUDA, the mean/std remains on cpu. This will cause problem when running inference on GPU.

The fix is exporting the model with `MODEL.DEVICE = "cuda"`. However D2 (https://github.com/facebookresearch/d2go/commit/87374efb134e539090e0b5c476809dc35bf6aedb)Go internally uses "cpu" during export (via cli: https://fburl.com/code/4mpk153i, via workflow: https://fburl.com/code/zcj5ud4u) by default. For CLI, user can manually set `--device`, but for workflow it's hard to do so. Further more it's hard to support mixed model using single `--device` option. So this diff adds a special handling in the RCNN's `default_prepare_for_export` to bypass the `--device` option.

Reviewed By: zhanghang1989

Differential Revision: D35097613

fbshipit-source-id: df9f44f49af1f0fd4baf3d7ccae6c31e341f3ef6
parent a781894c
......@@ -2,6 +2,7 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import inspect
import logging
import torch
......@@ -59,6 +60,16 @@ class GeneralizedRCNNPatch:
@RCNN_PREPARE_FOR_EXPORT_REGISTRY.register()
def default_rcnn_prepare_for_export(self, cfg, inputs, predictor_type):
pytorch_model = self
# NOTE: currently Exporter doesn't support specifying exporting GPU model via
# `model_export_method` in a general way. For RCNN model, we only need to cast
# the model to GPU and trace the model (scripting might not work) normally to
# get the GPU torchscripts.
if "_gpu" in predictor_type:
pytorch_model = _cast_detection_model(pytorch_model, "cuda")
predictor_type = predictor_type.replace("_gpu", "", 1)
if (
"@c2_ops" in predictor_type
or "caffe2" in predictor_type
......@@ -67,7 +78,7 @@ def default_rcnn_prepare_for_export(self, cfg, inputs, predictor_type):
from detectron2.export.caffe2_modeling import META_ARCH_CAFFE2_EXPORT_TYPE_MAP
C2MetaArch = META_ARCH_CAFFE2_EXPORT_TYPE_MAP[cfg.MODEL.META_ARCHITECTURE]
c2_compatible_model = C2MetaArch(cfg, self)
c2_compatible_model = C2MetaArch(cfg, pytorch_model)
preprocess_info = FuncInfo.gen_func_info(
D2Caffe2MetaArchPreprocessFunc,
......@@ -100,8 +111,9 @@ def default_rcnn_prepare_for_export(self, cfg, inputs, predictor_type):
)
preprocess_func = preprocess_info.instantiate()
return PredictorExportConfig(
model=D2RCNNInferenceWrapper(self),
model=D2RCNNInferenceWrapper(pytorch_model),
data_generator=lambda x: (preprocess_func(x),),
model_export_method=predictor_type,
preprocess_info=preprocess_info,
postprocess_info=FuncInfo.gen_func_info(
D2RCNNInferenceWrapper.Postprocess, params={}
......@@ -430,3 +442,28 @@ class D2RCNNInferenceWrapper(nn.Module):
width, height = batch[0]["width"], batch[0]["height"]
r = detector_postprocess(outputs, height, width)
return [{"instances": r}]
# TODO: model.to(device) might not work for detection meta-arch, this function is the
# workaround, in general, we might need a meta-arch API for this if needed.
def _cast_detection_model(model, device):
# check model is an instance of one of the meta arch
from detectron2.export.caffe2_modeling import Caffe2MetaArch
from detectron2.modeling import META_ARCH_REGISTRY
if isinstance(model, Caffe2MetaArch):
model._wrapped_model = _cast_detection_model(model._wrapped_model, device)
return model
assert isinstance(model, tuple(META_ARCH_REGISTRY._obj_map.values()))
model.to(device)
# cast normalizer separately
if hasattr(model, "normalizer") and not (
hasattr(model, "pixel_mean") and hasattr(model, "pixel_std")
):
pixel_mean = inspect.getclosurevars(model.normalizer).nonlocals["pixel_mean"]
pixel_std = inspect.getclosurevars(model.normalizer).nonlocals["pixel_std"]
pixel_mean = pixel_mean.to(device)
pixel_std = pixel_std.to(device)
model.normalizer = lambda x: (x - pixel_mean) / pixel_std
return model
......@@ -3,7 +3,6 @@
import copy
import inspect
import logging
import math
from typing import Tuple
......@@ -137,27 +136,12 @@ def add_quantization_default_configs(_C):
# TODO: model.to(device) might not work for detection meta-arch, this function is the
# workaround, in general, we might need a meta-arch API for this if needed.
def _cast_detection_model(model, device):
# check model is an instance of one of the meta arch
from detectron2.export.caffe2_modeling import Caffe2MetaArch
from detectron2.modeling import META_ARCH_REGISTRY
def _cast_model_to_device(model, device):
from d2go.modeling.meta_arch.rcnn import _cast_detection_model
from detectron2.modeling import GeneralizedRCNN
if isinstance(model, Caffe2MetaArch):
model._wrapped_model = _cast_detection_model(model._wrapped_model, device)
return model
assert isinstance(model, tuple(META_ARCH_REGISTRY._obj_map.values()))
model.to(device)
# cast normalizer separately
if hasattr(model, "normalizer") and not (
hasattr(model, "pixel_mean") and hasattr(model, "pixel_std")
):
pixel_mean = inspect.getclosurevars(model.normalizer).nonlocals["pixel_mean"]
pixel_std = inspect.getclosurevars(model.normalizer).nonlocals["pixel_std"]
pixel_mean = pixel_mean.to(device)
pixel_std = pixel_std.to(device)
model.normalizer = lambda x: (x - pixel_mean) / pixel_std
return model
assert isinstance(model, GeneralizedRCNN), "Currently only availabe for RCNN"
return _cast_detection_model(model, device)
def add_d2_quant_mapping(mappings):
......@@ -304,7 +288,7 @@ def post_training_quantize(cfg, model, data_loader):
if calibration_force_on_gpu:
# NOTE: model.to(device) may not handle cases such as normalizer, FPN, only
# do move to GPU if specified.
_cast_detection_model(model, "cuda")
_cast_model_to_device(model, "cuda")
calibration_iters = cfg.QUANTIZATION.PTQ.CALIBRATION_NUM_IMAGES
for idx, inputs in enumerate(data_loader):
......@@ -327,7 +311,7 @@ def post_training_quantize(cfg, model, data_loader):
# cast model back to the original device
if calibration_force_on_gpu:
_cast_detection_model(model, cfg.MODEL.DEVICE)
_cast_model_to_device(model, cfg.MODEL.DEVICE)
return model
......
......@@ -180,8 +180,10 @@ class MockRCNNInference(object):
return results
def _validate_outputs(inputs, outputs):
def _validate_outputs(inputs, outputs, is_gpu=False):
assert len(inputs) == len(outputs)
if is_gpu:
assert outputs[0]["instances"].pred_classes.device.type == "cuda"
# TODO: figure out how to validate outputs
......@@ -311,7 +313,8 @@ class RCNNBaseTestCases:
predictor = create_predictor(predictor_path)
predictor_outputs = predictor(inputs)
_validate_outputs(inputs, predictor_outputs)
is_gpu = self.cfg.MODEL.DEVICE != "cpu" or "_gpu" in predictor_type
_validate_outputs(inputs, predictor_outputs, is_gpu=is_gpu)
if compare_match:
with torch.no_grad():
......
......@@ -19,6 +19,14 @@ from mobile_cv.common.misc.file_utils import make_temp_directory
patch_d2_meta_arch()
def _maybe_skip_test(self, predictor_type):
if os.getenv("OSSRUN") == "1" and "@c2_ops" in predictor_type:
self.skipTest("Caffe2 is not available for OSS")
if not torch.cuda.is_available() and "_gpu" in predictor_type:
self.skipTest("GPU is not available for exporting GPU model")
class TestFBNetV3MaskRCNNFP32(RCNNBaseTestCases.TemplateTestCase):
def setup_custom_test(self):
super().setup_custom_test()
......@@ -31,13 +39,13 @@ class TestFBNetV3MaskRCNNFP32(RCNNBaseTestCases.TemplateTestCase):
[
["torchscript@c2_ops", True],
["torchscript", True],
["torchscript_gpu", False], # can't compare across device
["torchscript_int8@c2_ops", False],
["torchscript_int8", False],
]
)
def test_export(self, predictor_type, compare_match):
if os.getenv("OSSRUN") == "1" and "@c2_ops" in predictor_type:
self.skipTest("Caffe2 is not available for OSS")
_maybe_skip_test(self, predictor_type)
self._test_export(predictor_type, compare_match=compare_match)
......@@ -58,8 +66,7 @@ class TestFBNetV3MaskRCNNFPNFP32(RCNNBaseTestCases.TemplateTestCase):
]
)
def test_export(self, predictor_type, compare_match):
if os.getenv("OSSRUN") == "1" and "@c2_ops" in predictor_type:
self.skipTest("Caffe2 is not available for OSS")
_maybe_skip_test(self, predictor_type)
self._test_export(predictor_type, compare_match=compare_match)
......@@ -89,8 +96,7 @@ class TestFBNetV3MaskRCNNQATEager(RCNNBaseTestCases.TemplateTestCase):
]
)
def test_export(self, predictor_type, compare_match):
if os.getenv("OSSRUN") == "1" and "@c2_ops" in predictor_type:
self.skipTest("Caffe2 is not available for OSS")
_maybe_skip_test(self, predictor_type)
self._test_export(predictor_type, compare_match=compare_match)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment