Commit f23248c0 authored by facebook-github-bot's avatar facebook-github-bot
Browse files

Initial commit

fbshipit-source-id: f4a8ba78691d8cf46e003ef0bd2e95f170932778
parents
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import sys
import json
import importlib
import dataclasses
from caffe2.proto import caffe2_pb2
from detectron2.export.caffe2_modeling import (
META_ARCH_CAFFE2_EXPORT_TYPE_MAP,
convert_batched_inputs_to_c2_format,
)
from detectron2.export.shared import get_pb_arg_vali, get_pb_arg_vals
from detectron2.modeling.postprocessing import detector_postprocess
class D2Caffe2MetaArchPreprocessFunc(object):
def __init__(self, size_divisibility, device):
self.size_divisibility = size_divisibility
self.device = device
def __call__(self, inputs):
data, im_info = convert_batched_inputs_to_c2_format(
inputs, self.size_divisibility, self.device
)
return (data, im_info)
@staticmethod
def get_params(cfg, model):
fake_predict_net = caffe2_pb2.NetDef()
model.encode_additional_info(fake_predict_net, None)
size_divisibility = get_pb_arg_vali(fake_predict_net, "size_divisibility", 0)
device = get_pb_arg_vals(fake_predict_net, "device", b"cpu").decode("ascii")
return {
"size_divisibility": size_divisibility,
"device": device,
}
class D2Caffe2MetaArchPostprocessFunc(object):
def __init__(self, external_input, external_output, encoded_info):
self.external_input = external_input
self.external_output = external_output
self.encoded_info = encoded_info
def __call__(self, inputs, tensor_inputs, tensor_outputs):
encoded_info = self.encoded_info.encode("ascii")
fake_predict_net = caffe2_pb2.NetDef().FromString(encoded_info)
meta_architecture = get_pb_arg_vals(fake_predict_net, "meta_architecture", None)
meta_architecture = meta_architecture.decode("ascii")
model_class = META_ARCH_CAFFE2_EXPORT_TYPE_MAP[meta_architecture]
convert_outputs = model_class.get_outputs_converter(fake_predict_net, None)
c2_inputs = tensor_inputs
c2_results = dict(zip(self.external_output, tensor_outputs))
return convert_outputs(inputs, c2_inputs, c2_results)
@staticmethod
def get_params(cfg, model):
# NOTE: the post processing has different values for different meta
# architectures, here simply relying Caffe2 meta architecture to encode info
# into a NetDef and storing it as whole.
fake_predict_net = caffe2_pb2.NetDef()
model.encode_additional_info(fake_predict_net, None)
encoded_info = fake_predict_net.SerializeToString().decode("ascii")
# HACK: Caffe2MetaArch's post processing requires the blob name of model output,
# this information is missed for torchscript. There'no easy way to know this
# unless using NamedTuple for tracing.
external_input = ["data", "im_info"]
if cfg.MODEL.META_ARCHITECTURE == "GeneralizedRCNN":
external_output = ["bbox_nms", "score_nms", "class_nms"]
if cfg.MODEL.MASK_ON:
external_output.extend(["mask_fcn_probs"])
if cfg.MODEL.KEYPOINT_ON:
if cfg.EXPORT_CAFFE2.USE_HEATMAP_MAX_KEYPOINT:
external_output.extend(["keypoints_out"])
else:
external_output.extend(["kps_score"])
else:
raise NotImplementedError("")
return {
"external_input": external_input,
"external_output": external_output,
"encoded_info": encoded_info,
}
def dataclass_object_dump(ob):
datacls = type(ob)
if not dataclasses.is_dataclass(datacls):
raise TypeError(f"Expected dataclass instance, got '{datacls!r}' object")
mod = sys.modules.get(datacls.__module__)
if mod is None or not hasattr(mod, datacls.__qualname__):
raise ValueError(f"Can't resolve '{datacls!r}' reference")
ref = f"{datacls.__module__}.{datacls.__qualname__}"
fields = (f.name for f in dataclasses.fields(ob))
return {**{f: getattr(ob, f) for f in fields}, "__dataclass__": ref}
def dataclass_object_load(d):
ref = d.pop("__dataclass__", None)
if ref is None:
return d
try:
modname, hasdot, qualname = ref.rpartition(".")
module = importlib.import_module(modname)
datacls = getattr(module, qualname)
if not dataclasses.is_dataclass(datacls) or not isinstance(datacls, type):
raise ValueError
return datacls(**d)
except (ModuleNotFoundError, ValueError, AttributeError, TypeError):
raise ValueError(f"Invalid dataclass reference {ref!r}") from None
class D2TracingAdapterPreprocessFunc(object):
def __call__(self, inputs):
assert len(inputs) == 1, "only support single batch"
return inputs[0]["image"]
class D2TracingAdapterPostFunc(object):
def __init__(self, outputs_schema_json):
self.outputs_schema = json.loads(
outputs_schema_json, object_hook=dataclass_object_load
)
def __call__(self, inputs, tensor_inputs, tensor_outputs):
results_per_image = self.outputs_schema(tensor_outputs)
assert len(inputs) == 1, "only support single batch"
width, height = inputs[0]["width"], inputs[0]["height"]
r = detector_postprocess(results_per_image, height, width)
return [{"instances": r}]
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import copy
import logging
import detectron2.utils.comm as comm
import mobile_cv.lut.lib.pt.flops_utils as flops_utils
from d2go.utils.helper import run_once
logger = logging.getLogger(__name__)
def print_flops(model, first_batch):
logger.info("Evaluating model's number of parameters and FLOPS")
model_flops = copy.deepcopy(model)
model_flops.eval()
fest = flops_utils.FlopsEstimation(model_flops)
with fest.enable():
model_flops(first_batch)
fest.add_flops_info()
model_str = str(model_flops)
logger.info(model_str)
return model_str
# NOTE: the logging can be too long and messsy when printing flops multiple
# times, especially when running eval during training, thus using `run_once`
# to limit it. TODO: log the flops more concisely.
@run_once()
def add_print_flops_callback(cfg, model, disable_after_callback=True):
def _print_flops_callback(self, model, model_data):
self.add_flops_info()
logger.info("Callback: model flops info:\n{}".format(model))
def _guess_batch_size():
# Inputs are meta-arch dependent, the most general solution will be
# adding a function like `get_batch_size()` to each meta arch
ret = 1
try:
model_input_shapes = model_data(model)["input_shapes"]
assert isinstance(model_input_shapes, list)
assert len(model_input_shapes) > 0
# assuming the first input is a list of images
ret = len(model_input_shapes[0])
except Exception:
ret = cfg.SOLVER.IMS_PER_BATCH // comm.get_world_size()
logger.warning(
"Could not get batch size, compute from"
f" `cfg.SOLVER.IMS_PER_BATCH`={ret}"
)
pass
return ret
nparams, nflops = self.get_flops()
batch_size = _guess_batch_size()
nflops_single = nflops / batch_size
logger.info(
f"Model parameters (M): {nparams}, "
f"MFlops (batch_size={batch_size}): {nflops}, "
f"MFlops (batch_size=1): {nflops_single}"
)
if disable_after_callback:
self.set_enable(False)
fest = flops_utils.FlopsEstimation(model).set_callback(_print_flops_callback)
logger.info("Added callback to log flops info after the first inference")
fest.set_enable(True)
return fest
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
from d2go.config import CfgNode as CN
from d2go.data.build import (
add_weighted_training_sampler_default_configs,
)
from d2go.data.config import add_d2go_data_default_configs
from d2go.modeling.backbone.fbnet_cfg import (
add_bifpn_default_configs,
add_fbnet_v2_default_configs,
)
from d2go.modeling import kmeans_anchors, model_ema
from d2go.modeling.model_freezing_utils import add_model_freezing_configs
from d2go.modeling.quantization import add_quantization_default_configs
from d2go.modeling.subclass import add_subclass_configs
def add_tensorboard_default_configs(_C):
_C.TENSORBOARD = CN()
# Output from dataloader will be written to tensorboard at this frequency
_C.TENSORBOARD.TRAIN_LOADER_VIS_WRITE_PERIOD = 20
# This controls max number of images over all batches, be considerate when
# increasing this number because it takes disk space and slows down the training
_C.TENSORBOARD.TRAIN_LOADER_VIS_MAX_IMAGES = 16
# Max number of images per dataset to visualize in tensorboard during evaluation
_C.TENSORBOARD.TEST_VIS_MAX_IMAGES = 16
# TENSORBOARD.LOG_DIR will be determined solely by OUTPUT_DIR
_C.register_deprecated_key("TENSORBOARD.LOG_DIR")
def add_abnormal_checker_configs(_C):
_C.ABNORMAL_CHECKER = CN()
# check and log the iteration with bad losses if enabled
_C.ABNORMAL_CHECKER.ENABLED = False
def get_default_cfg(_C):
# _C.MODEL.FBNET...
add_fbnet_v2_default_configs(_C)
# _C.MODEL.FROZEN_LAYER_REG_EXP
add_model_freezing_configs(_C)
# _C.MODEL other models
model_ema.add_model_ema_configs(_C)
# _C.D2GO_DATA...
add_d2go_data_default_configs(_C)
# _C.TENSORBOARD...
add_tensorboard_default_configs(_C)
# _C.MODEL.KMEANS...
kmeans_anchors.add_kmeans_anchors_cfg(_C)
# _C.QUANTIZATION
add_quantization_default_configs(_C)
# _C.DATASETS.TRAIN_REPEAT_FACTOR
add_weighted_training_sampler_default_configs(_C)
# _C.ABNORMAL_CHECKER
add_abnormal_checker_configs(_C)
# _C.MODEL.SUBCLASS
add_subclass_configs(_C)
# Set find_unused_parameters for DistributedDataParallel.
_C.MODEL.DDP_FIND_UNUSED_PARAMETERS = False
# Set default optimizer
_C.SOLVER.OPTIMIZER = "sgd"
_C.SOLVER.LR_MULTIPLIER_OVERWRITE = []
# Default world size in D2 is 0, which means scaling is not applied. For D2Go
# auto scale is encouraged, setting it to 8
assert _C.SOLVER.REFERENCE_WORLD_SIZE == 0
_C.SOLVER.REFERENCE_WORLD_SIZE = 8
# Besides scaling default D2 configs, also scale quantization configs
_C.SOLVER.AUTO_SCALING_METHODS = [
"default_scale_d2_configs",
"default_scale_quantization_configs",
]
return _C
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
#!/usr/bin/python
import errno
import importlib
import inspect
import logging
import math
import os
import re
import tempfile
import zipfile
import pickle
import signal
import sys
import threading
import time
import traceback
import typing
import warnings
import pkg_resources
from contextlib import contextmanager
from functools import partial
from random import random
import six
from functools import wraps
from typing import (
Any,
Callable,
Iterable,
List,
Mapping,
NamedTuple,
Optional,
Tuple,
Type,
TypeVar,
Union,
)
import torch
import detectron2.utils.comm as comm
from detectron2.checkpoint import DetectionCheckpointer
from detectron2.config import get_cfg
from detectron2.data import MetadataCatalog
from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, hooks, launch
from detectron2.evaluation import (
CityscapesInstanceEvaluator,
CityscapesSemSegEvaluator,
COCOEvaluator,
COCOPanopticEvaluator,
DatasetEvaluators,
LVISEvaluator,
PascalVOCDetectionEvaluator,
SemSegEvaluator,
verify_results,
)
T = TypeVar("T")
CallbackMapping = Mapping[Callable, Optional[Iterable[Any]]]
FuncType = Callable[..., Any]
F = TypeVar("F", bound=FuncType)
RT = TypeVar("RT")
NT = TypeVar("T", bound=NamedTuple)
from detectron2.utils.events import TensorboardXWriter
class MultipleFunctionCallError(Exception):
pass
def run_once(
raise_on_multiple: bool = False,
# pyre-fixme[34]: `Variable[T]` isn't present in the function's parameters.
) -> Callable[[Callable[..., T]], Callable[..., T]]:
"""
A decorator to wrap a function such that it only ever runs once
Useful, for example, with exit handlers that could be run via atexit or
via a signal handler. The decorator will cache the result of the first call
and return it on subsequent calls. If `raise_on_multiple` is set, any call
to the function after the first one will raise a
`MultipleFunctionCallError`.
"""
def decorator(func: Callable[..., T]) -> (Callable[..., T]):
signal: List[T] = []
@wraps(func)
def wrapper(*args, **kwargs) -> T:
if signal:
if raise_on_multiple:
raise MultipleFunctionCallError(
"Function %s was called multiple times" % func.__name__
)
return signal[0]
signal.append(func(*args, **kwargs))
return signal[0]
return wrapper
return decorator
class retryable(object):
"""Fake retryable function
"""
def __init__(self, num_tries=1, sleep_time=0.1):
pass
def __call__(self, func: F) -> F:
return func
# pyre-fixme[3]: Return type must be annotated.
# pyre-fixme[2]: Parameter must be annotated.
def get_dir_path(relative_path):
"""Return a path for a directory in this package, extracting if necessary
For an entire directory within the par file (zip, fastzip) or lpar
structure, this function will check to see if the contents are extracted;
extracting each file that has not been extracted. It returns the path of
a directory containing the expected contents, making sure permissions are
correct.
Returns a string path, throws exeption on error
"""
return os.path.dirname(importlib.import_module(relative_path).__file__)
# copy util function for oss
def alias(x, name, is_backward=False):
if not torch.onnx.is_in_onnx_export():
return x
assert isinstance(x, torch.Tensor)
return torch.ops._caffe2.AliasWithName(x, name, is_backward=is_backward)
class D2Trainer(DefaultTrainer):
@classmethod
def build_evaluator(cls, cfg, dataset_name, output_folder=None):
"""
Create evaluator(s) for a given dataset.
This uses the special metadata "evaluator_type" associated with each builtin dataset.
For your own dataset, you can simply create an evaluator manually in your
script and do not have to worry about the hacky if-else logic here.
"""
if output_folder is None:
output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
evaluator_list = []
evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type
if evaluator_type in ["sem_seg", "coco_panoptic_seg"]:
evaluator_list.append(
SemSegEvaluator(
dataset_name,
distributed=True,
output_dir=output_folder,
)
)
if evaluator_type in ["coco", "coco_panoptic_seg"]:
evaluator_list.append(COCOEvaluator(dataset_name, output_dir=output_folder))
if evaluator_type == "coco_panoptic_seg":
evaluator_list.append(COCOPanopticEvaluator(dataset_name, output_folder))
if evaluator_type == "cityscapes_instance":
assert (
torch.cuda.device_count() >= comm.get_rank()
), "CityscapesEvaluator currently do not work with multiple machines."
return CityscapesInstanceEvaluator(dataset_name)
if evaluator_type == "cityscapes_sem_seg":
assert (
torch.cuda.device_count() >= comm.get_rank()
), "CityscapesEvaluator currently do not work with multiple machines."
return CityscapesSemSegEvaluator(dataset_name)
elif evaluator_type == "pascal_voc":
return PascalVOCDetectionEvaluator(dataset_name)
elif evaluator_type == "lvis":
return LVISEvaluator(dataset_name, output_dir=output_folder)
if len(evaluator_list) == 0:
raise NotImplementedError(
"no Evaluator for the dataset {} with the type {}".format(
dataset_name, evaluator_type
)
)
elif len(evaluator_list) == 1:
return evaluator_list[0]
return DatasetEvaluators(evaluator_list)
def reroute_config_path(path: str) -> str:
"""
Supporting rerouting the config files for convenience:
d2go:// -> mobile-vision/d2go/...
detectron2go:// -> mobile-vision/d2go/configs/...
detectron2:// -> vision/fair/detectron2/configs/...
flow:// -> fblearner/flow/projects/mobile_vision/detectron2go/...
mv_experimental:// -> mobile-vision/experimental/...
(see //mobile-vision/experimental:mv_experimental_d2go_yaml_files)
Those config are considered as code, so they'll reflect your current checkout,
try using canary if you have local changes.
"""
if path.startswith("d2go://"):
rel_path = path[len("d2go://") :]
config_in_resource = pkg_resources.resource_filename(
"d2go.model_zoo", os.path.join("configs", rel_path)
)
return config_in_resource
elif path.startswith("detectron2go://"):
rel_path = path[len("detectron2go://") :]
config_in_resource = pkg_resources.resource_filename(
"d2go.model_zoo", os.path.join("configs", rel_path)
)
return config_in_resource
elif path.startswith("detectron2://"):
rel_path = path[len("detectron2://") :]
config_in_resource = pkg_resources.resource_filename(
"detectron2.model_zoo", os.path.join("configs", rel_path)
)
return config_in_resource
elif path.startswith("mv_experimental://"):
rel_path = path[len("mv_experimental://") :]
# pyre-fixme[21]: Could not find module `mv_experimental_d2go_yaml_files`.
import mv_experimental_d2go_yaml_files
package_path = get_dir_path(mv_experimental_d2go_yaml_files.__name__)
return os.path.join(package_path, rel_path)
return path
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
def get_launch_environment():
return "local"
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import logging
import os
from typing import Dict
import warnings
import detectron2.utils.comm as comm
from d2go.config import CfgNode
from fvcore.common.file_io import PathManager
from tabulate import tabulate
from .tensorboard_log_util import get_tensorboard_log_dir
logger = logging.getLogger(__name__)
def check_version(library, min_version, warning_only=False):
"""Check the version of the library satisfies the provided minimum version.
An exception is thrown if the check does not pass.
Parameters
----------
min_version : str
Minimum version
warning_only : bool
Printing a warning instead of throwing an exception.
"""
from distutils.version import LooseVersion
version = library.__version__
bad_version = LooseVersion(version) < LooseVersion(min_version)
if bad_version:
msg = f'Installed {library.__name__} version {version} does not satisfy the ' \
f'minimum required version {min_version}'
if warning_only:
warnings.warn(msg)
else:
raise AssertionError(msg)
return False
return True
def metrics_dict_to_metrics_table(dic):
assert isinstance(dic, dict)
ret = []
for key in sorted(dic.keys()):
value = dic[key]
if isinstance(value, dict):
for sub_metrics in metrics_dict_to_metrics_table(value):
ret.append([key] + sub_metrics)
else:
ret.append([key, value])
return ret
def print_metrics_table(metrics_dic):
metrics_table = metrics_dict_to_metrics_table(metrics_dic)
metrics_tabulate = tabulate(
metrics_table,
tablefmt="pipe",
headers=["model", "dataset", "task", "metric", "score"],
)
logger.info("Metrics table: \n" + metrics_tabulate)
def dump_trained_model_configs(output_dir: str, trained_cfgs: Dict[str, CfgNode]) -> Dict[str, str]:
"""Writes trained model config files to output_dir.
Args:
output_dir: output file directory.
trained_cfgs: map from model name to the config of trained model.
Returns:
A map of model name to model config path.
"""
trained_model_configs = {}
trained_model_config_dir = os.path.join(output_dir, "trained_model_configs")
PathManager.mkdirs(trained_model_config_dir)
for name, trained_cfg in trained_cfgs.items():
config_file = os.path.join(trained_model_config_dir, "{}.yaml".format(name))
trained_model_configs[name] = config_file
if comm.is_main_process():
with PathManager.open(config_file, "w") as f:
f.write(trained_cfg.dump())
return trained_model_configs
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import json
import logging
import torch
from d2go.export.api import PredictorExportConfig
from detectron2.export.caffe2_modeling import META_ARCH_CAFFE2_EXPORT_TYPE_MAP
from mobile_cv.predictor.api import FuncInfo
from detectron2.export.flatten import TracingAdapter
from detectron2.export.torchscript_patch import patch_builtin_len
from d2go.utils.export_utils import (D2Caffe2MetaArchPreprocessFunc,
D2Caffe2MetaArchPostprocessFunc, D2TracingAdapterPreprocessFunc, D2TracingAdapterPostFunc,
dataclass_object_dump)
logger = logging.getLogger(__name__)
def d2_meta_arch_prepare_for_export(self, cfg, inputs, export_scheme):
if "torchscript" in export_scheme and "@tracing" in export_scheme:
def inference_func(model, image):
inputs = [{"image": image}]
return model.inference(inputs, do_postprocess=False)[0]
def data_generator(x):
return (x[0]["image"],)
image = data_generator(inputs)[0]
wrapper = TracingAdapter(self, image, inference_func)
wrapper.eval()
# HACK: outputs_schema can only be obtained after running tracing, but
# PredictorExportConfig requires a pre-defined postprocessing function, this
# causes tracing to run twice.
logger.info("tracing the model to get outputs_schema ...")
with torch.no_grad(), patch_builtin_len():
_ = torch.jit.trace(wrapper, (image,))
outputs_schema_json = json.dumps(
wrapper.outputs_schema, default=dataclass_object_dump
)
return PredictorExportConfig(
model=wrapper,
data_generator=data_generator,
preprocess_info=FuncInfo.gen_func_info(
D2TracingAdapterPreprocessFunc, params={}
),
postprocess_info=FuncInfo.gen_func_info(
D2TracingAdapterPostFunc,
params={"outputs_schema_json": outputs_schema_json},
),
)
if cfg.MODEL.META_ARCHITECTURE in META_ARCH_CAFFE2_EXPORT_TYPE_MAP:
C2MetaArch = META_ARCH_CAFFE2_EXPORT_TYPE_MAP[cfg.MODEL.META_ARCHITECTURE]
c2_compatible_model = C2MetaArch(cfg, self)
preprocess_info = FuncInfo.gen_func_info(
D2Caffe2MetaArchPreprocessFunc,
params=D2Caffe2MetaArchPreprocessFunc.get_params(cfg, c2_compatible_model),
)
postprocess_info = FuncInfo.gen_func_info(
D2Caffe2MetaArchPostprocessFunc,
params=D2Caffe2MetaArchPostprocessFunc.get_params(cfg, c2_compatible_model),
)
preprocess_func = preprocess_info.instantiate()
return PredictorExportConfig(
model=c2_compatible_model,
# Caffe2MetaArch takes a single tuple as input (which is the return of
# preprocess_func), data_generator requires all positional args as a tuple.
data_generator=lambda x: (preprocess_func(x),),
preprocess_info=preprocess_info,
postprocess_info=postprocess_info,
)
raise NotImplementedError("Can't determine prepare_for_tracing!")
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import logging
import os
from functools import lru_cache
def get_tensorboard_log_dir(output_dir):
return output_dir
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import logging
import os
import re
import time
from detectron2.checkpoint import DetectionCheckpointer
from fvcore.common.file_io import PathManager
from pytorch_lightning.callbacks.model_checkpoint import ModelCheckpoint
logger = logging.getLogger(__name__)
def fetch_checkpoints_till_final(checkpoint_dir):
"""
A generator that yields all checkpoint paths under the given directory, it'll
keep refreshing until model_final is found.
"""
MIN_SLEEP_INTERVAL = 1.0 # in seconds
MAX_SLEEP_INTERVAL = 60.0 # in seconds
sleep_interval = MIN_SLEEP_INTERVAL
finished_checkpoints = set()
def _add_and_log(path):
finished_checkpoints.add(path)
logger.info("Found checkpoint: {}".format(path))
return path
def _log_and_sleep(sleep_interval):
logger.info(
"Sleep {} seconds while waiting for model_final.pth".format(sleep_interval)
)
time.sleep(sleep_interval)
return min(sleep_interval * 2, MAX_SLEEP_INTERVAL)
def _get_lightning_checkpoints(path: str):
return [
os.path.join(path, x)
for x in PathManager.ls(path)
if x.endswith(ModelCheckpoint.FILE_EXTENSION)
and not x.startswith(ModelCheckpoint.CHECKPOINT_NAME_LAST)
]
while True:
if not PathManager.exists(checkpoint_dir):
sleep_interval = _log_and_sleep(sleep_interval)
continue
checkpoint_paths = DetectionCheckpointer(
None, save_dir=checkpoint_dir
).get_all_checkpoint_files()
checkpoint_paths.extend(_get_lightning_checkpoints(checkpoint_dir))
final_model_path = None
periodic_checkpoints = []
for path in sorted(checkpoint_paths):
if path.endswith("model_final.pth") or path.endswith("model_final.ckpt"):
final_model_path = path
continue
if path.endswith(ModelCheckpoint.FILE_EXTENSION):
# Lightning checkpoint
model_iter = int(
re.findall(
r"(?<=step=)\d+(?={})".format(ModelCheckpoint.FILE_EXTENSION),
path,
)[0]
)
else:
model_iter = int(re.findall(r"(?<=model_)\d+(?=\.pth)", path)[0])
periodic_checkpoints.append((path, model_iter))
periodic_checkpoints = [
pc for pc in periodic_checkpoints if pc[0] not in finished_checkpoints
]
periodic_checkpoints = sorted(periodic_checkpoints, key=lambda x: x[1])
for pc in periodic_checkpoints:
yield _add_and_log(pc[0])
sleep_interval = MIN_SLEEP_INTERVAL
if final_model_path is None:
sleep_interval = _log_and_sleep(sleep_interval)
else:
yield _add_and_log(final_model_path)
break
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
from detectron2.data import DatasetCatalog, MetadataCatalog, detection_utils as utils
from detectron2.evaluation import DatasetEvaluator
from detectron2.modeling import META_ARCH_REGISTRY
from detectron2.utils.events import get_event_storage
from detectron2.utils.visualizer import Visualizer
class VisualizerWrapper(object):
"""
D2's Visualizer provides low-level APIs to draw common structures, such as
draw_instance_predictions/draw_sem_seg/overlay_instances. This class provides
the high-level interface for visualizing.
"""
def __init__(self, cfg):
self.cfg = cfg
def _get_meta_arch_class(self):
return META_ARCH_REGISTRY.get(self.cfg.MODEL.META_ARCHITECTURE)
def visualize_train_input(self, input_dict):
"""
Visulize a single input image of model (also the output from train loader)
used for training, this contains the data augmentation.
"""
per_image = input_dict
cfg = self.cfg
# customization
if hasattr(self._get_meta_arch_class(), "visualize_train_input"):
return self._get_meta_arch_class().visualize_train_input(self, input_dict)
img = per_image["image"].permute(1, 2, 0).cpu().detach().numpy()
img = utils.convert_image_to_rgb(img, cfg.INPUT.FORMAT)
metadata = MetadataCatalog.get(cfg.DATASETS.TRAIN[0])
scale = 2.0
visualizer = Visualizer(img, metadata=metadata, scale=scale)
if "instances" in per_image:
target_fields = per_image["instances"].get_fields()
labels = [metadata.thing_classes[i] for i in target_fields["gt_classes"]]
vis = visualizer.overlay_instances(
labels=labels,
boxes=target_fields.get("gt_boxes", None),
masks=target_fields.get("gt_masks", None),
keypoints=target_fields.get("gt_keypoints", None),
)
if "sem_seg" in per_image:
vis = visualizer.draw_sem_seg(
per_image["sem_seg"], area_threshold=0, alpha=0.5
)
return vis.get_image()
def visualize_test_output(
self, dataset_name, dataset_mapper, input_dict, output_dict
):
"""
Visualize the output of model
"""
# customization
if hasattr(self._get_meta_arch_class(), "visualize_test_output"):
return self._get_meta_arch_class().visualize_test_output(
self, dataset_name, dataset_mapper, input_dict, output_dict
)
image = dataset_mapper._read_image(input_dict, "RGB")
visualizer = Visualizer(image, metadata=MetadataCatalog.get(dataset_name))
if "panoptic_seg" in output_dict:
# NOTE: refer to https://fburl.com/diffusion/evarrhbh
raise NotImplementedError()
if "instances" in output_dict:
visualizer.draw_instance_predictions(output_dict["instances"].to("cpu"))
if "sem_seg" in output_dict:
visualizer.draw_sem_seg(
output_dict["sem_seg"].argmax(dim=0).to("cpu"),
area_threshold=0,
alpha=0.5,
)
return visualizer.get_output().get_image()
def visualize_dataset_dict(self, dataset_name, dataset_mapper, dataset_dict):
"""
Visualize the dataset_dict
"""
image = dataset_mapper._read_image(dataset_dict, "RGB")
visualizer = Visualizer(image, metadata=MetadataCatalog.get(dataset_name))
visualizer.draw_dataset_dict(dataset_dict)
return visualizer.get_output().get_image()
class DataLoaderVisWrapper:
"""
Wrap the data loader to visualize its output via TensorBoardX at given frequency.
"""
def __init__(self, cfg, tbx_writer, data_loader):
self.tbx_writer = tbx_writer
self.data_loader = data_loader
self._visualizer = VisualizerWrapper(cfg)
self.log_frequency = cfg.TENSORBOARD.TRAIN_LOADER_VIS_WRITE_PERIOD
self.log_limit = cfg.TENSORBOARD.TRAIN_LOADER_VIS_MAX_IMAGES
assert self.log_frequency >= 0
assert self.log_limit >= 0
self._remaining = self.log_limit
def __iter__(self):
for data in self.data_loader:
self._maybe_write_vis(data)
yield data
def _maybe_write_vis(self, data):
try:
storage = get_event_storage()
except AssertionError:
# wrapped data loader might be used outside EventStorage, don't visualize
# anything
return
if (
self.log_frequency == 0
or not storage.iter % self.log_frequency == 0
or self._remaining <= 0
):
return
length = min(len(data), self._remaining)
data = data[:length]
self._remaining -= length
for i, per_image in enumerate(data):
vis_image = self._visualizer.visualize_train_input(per_image)
tag = "train_loader_batch_{}/".format(storage.iter)
if "dataset_name" in per_image:
tag += per_image["dataset_name"] + "/"
if "file_name" in per_image:
tag += "img_{}/{}".format(i, per_image["file_name"])
self.tbx_writer._writer.add_image(
tag=tag,
img_tensor=vis_image,
global_step=storage.iter,
dataformats="HWC",
)
class VisualizationEvaluator(DatasetEvaluator):
"""
Visualize GT and prediction during evaluation. It doesn't calculate any
metrics, just uses evaluator's interface as hook.
"""
# NOTE: the evaluator will be created for every eval (during training and
# after training), so the images will be logged multiple times, use a global
# counter to differentiate them in TB.
_counter = 0
def __init__(
self, cfg, tbx_writer, dataset_mapper, dataset_name, train_iter=None, tag_postfix=None
):
self.tbx_writer = tbx_writer
self.dataset_mapper = dataset_mapper
self.dataset_name = dataset_name
self._visualizer = VisualizerWrapper(cfg)
self.train_iter = train_iter or VisualizationEvaluator._counter
self.tag_postfix = tag_postfix or ""
self.log_limit = max(cfg.TENSORBOARD.TEST_VIS_MAX_IMAGES, 0)
if self.log_limit > 0:
self._metadata = MetadataCatalog.get(dataset_name)
# NOTE: Since there's no GT from test loader, we need to get GT from
# the dataset_dict, this assumes the test data loader uses the item from
# dataset_dict in the default way.
self._dataset_dict = DatasetCatalog.get(dataset_name)
self._file_name_to_dataset_dict = {
dic["file_name"]: dic for dic in self._dataset_dict
}
VisualizationEvaluator._counter += 1
self.reset()
def reset(self):
self._iter = 0
self._log_remaining = self.log_limit
def process(self, inputs, outputs):
for input, output in zip(inputs, outputs):
if self._log_remaining <= 0:
return
file_name = input["file_name"]
dataset_dict = self._file_name_to_dataset_dict[file_name]
gt_img = self._visualizer.visualize_dataset_dict(
self.dataset_name, self.dataset_mapper, dataset_dict
)
pred_img = self._visualizer.visualize_test_output(
self.dataset_name, self.dataset_mapper, input, output
)
tag_base = f"{self.dataset_name}{self.tag_postfix}/eval_iter_{self._iter}/{file_name}"
self.tbx_writer._writer.add_image(
f"{tag_base}/GT",
gt_img,
self.train_iter,
dataformats="HWC",
)
if not isinstance(pred_img, dict):
pred_img = {"Pred": pred_img}
for img_type in pred_img.keys():
self.tbx_writer._writer.add_image(
f"{tag_base}/{img_type}",
pred_img[img_type],
self.train_iter,
dataformats="HWC",
)
self._log_remaining -= 1
self._iter += 1
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
DETR
Copyright 2020 - present, Facebook, Inc
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Deformable DETR
Copyright 2020 SenseTime
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
# DETR and Deformable DETR in D2Go
This project extend D2Go with [DETR](https://github.com/facebookresearch/detr) and [Deformable DETR](https://github.com/fundamentalvision/Deformable-DETR) models. The pretrained models with efficient backbone are provided.
## Usage
Please install D2Go following the [instructions](../README.md). Then install this extension:
```bash
cd projects/detr/
python setup.py install
```
### Evaluating Pretrained Models
Please use the `tools/train_net.py` in the main directory as the entry point. The pretrained model can be evaluated using
```bash
python train_net.py --runner detr.runner.DETRRunner --eval-only --config configs/deformable_detr_fbnetv3a_bs16.yaml MODEL.WEIGHTS https://mobile-cv.s3-us-west-2.amazonaws.com/d2go/models/252811934/model_final.pth
```
### Training
Please use the `tools/train_net.py` in the main directory as the entry point and pass the runner as `detr.runner.DETRRunner`.
```bash
python train_net.py --runner detr.runner.DETRRunner --config configs/deformable_detr_fbnetv3a_bs16.yaml
```
### Pretrained Models
| name | box AP | model id | download |
| ------------------------------------------------------------ | ------ | --------- | ------------------------------------------------------------ |
| [Deformable-DETR-FBNetV3A](./configs/deformable_detr_fbnetv3a_bs16.yaml) | 27.53 | 252811934 | [model](https://mobile-cv.s3-us-west-2.amazonaws.com/d2go/models/252811934/model_final.pth)\|[mertrics](https://mobile-cv.s3-us-west-2.amazonaws.com/d2go/models/252811934/metrics.json) |
MODEL:
META_ARCHITECTURE: "Detr"
PIXEL_MEAN: [123.675, 116.280, 103.530]
PIXEL_STD: [58.395, 57.120, 57.375]
MASK_ON: False
BACKBONE:
NAME: "FBNetV2C4Backbone"
FBNET_V2:
ARCH: "FBNetV3_A_dsmask_C5"
NORM: "sync_bn"
WIDTH_DIVISOR: 8
SCALE_FACTOR: 1.0
OUT_FEATURES: ["trunk4"]
DETR:
NUM_CLASSES: 80
DEFORMABLE: True
CLS_WEIGHT: 2.0
DIM_FEEDFORWARD: 1024
GIOU_WEIGHT: 2.0
L1_WEIGHT: 5.0
NUM_OBJECT_QUERIES: 300
CENTERED_POSITION_ENCODIND: True
USE_FOCAL_LOSS: True
NUM_FEATURE_LEVELS: 1
DATASETS:
TRAIN: ("coco_2017_train",)
TEST: ("coco_2017_val",)
SOLVER:
IMS_PER_BATCH: 16
BASE_LR: 0.0002
STEPS: (887040,)
MAX_ITER: 1108800
WARMUP_FACTOR: 1.0
WARMUP_ITERS: 10
WEIGHT_DECAY: 0.0001
OPTIMIZER: "ADAMW"
CLIP_GRADIENTS:
ENABLED: True
CLIP_TYPE: "full_model"
CLIP_VALUE: 0.1
NORM_TYPE: 2.0
LR_MULTIPLIER_OVERWRITE: [{'backbone': 0.1}, {'reference_points': 0.1, 'sampling_offsets': 0.1}]
INPUT:
MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800)
CROP:
ENABLED: True
TYPE: "absolute_range"
SIZE: (384, 600)
FORMAT: "RGB"
D2GO_DATA:
MAPPER:
NAME: "DETRDatasetMapper"
TEST:
EVAL_PERIOD: 4000
DATALOADER:
FILTER_EMPTY_ANNOTATIONS: False
NUM_WORKERS: 4
VERSION: 2
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
from . import models, util, datasets
__all__ = ['models', 'util', 'datasets']
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
from .config import add_detr_config
from .detr import Detr
from .dataset_mapper import DetrDatasetMapper
__all__ = ['add_detr_config', 'Detr', 'DetrDatasetMapper']
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
# -*- coding: utf-8 -*-
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
from detectron2.config import CfgNode as CN
def add_detr_config(cfg):
"""
Add config for DETR.
"""
cfg.MODEL.DETR = CN()
cfg.MODEL.DETR.NUM_CLASSES = 80
# FBNet
cfg.MODEL.FBNET_V2.OUT_FEATURES = ["trunk3"]
# For Segmentation
cfg.MODEL.DETR.FROZEN_WEIGHTS = ''
# LOSS
cfg.MODEL.DETR.DEFORMABLE = False
cfg.MODEL.DETR.USE_FOCAL_LOSS = False
cfg.MODEL.DETR.CENTERED_POSITION_ENCODIND = False
cfg.MODEL.DETR.CLS_WEIGHT = 1.0
cfg.MODEL.DETR.NUM_FEATURE_LEVELS = 4
cfg.MODEL.DETR.GIOU_WEIGHT = 2.0
cfg.MODEL.DETR.L1_WEIGHT = 5.0
cfg.MODEL.DETR.DEEP_SUPERVISION = True
cfg.MODEL.DETR.NO_OBJECT_WEIGHT = 0.1
# TRANSFORMER
cfg.MODEL.DETR.NHEADS = 8
cfg.MODEL.DETR.DROPOUT = 0.1
cfg.MODEL.DETR.DIM_FEEDFORWARD = 2048
cfg.MODEL.DETR.ENC_LAYERS = 6
cfg.MODEL.DETR.DEC_LAYERS = 6
cfg.MODEL.DETR.PRE_NORM = False
cfg.MODEL.DETR.HIDDEN_DIM = 256
cfg.MODEL.DETR.NUM_OBJECT_QUERIES = 100
cfg.SOLVER.OPTIMIZER = "ADAMW"
cfg.SOLVER.BACKBONE_MULTIPLIER = 0.1
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import copy
import logging
import numpy as np
import torch
from detectron2.data import detection_utils as utils
from detectron2.data import transforms as T
__all__ = ["DetrDatasetMapper"]
def build_transform_gen(cfg, is_train):
"""
Create a list of :class:`TransformGen` from config.
Returns:
list[TransformGen]
"""
if is_train:
min_size = cfg.INPUT.MIN_SIZE_TRAIN
max_size = cfg.INPUT.MAX_SIZE_TRAIN
sample_style = cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING
else:
min_size = cfg.INPUT.MIN_SIZE_TEST
max_size = cfg.INPUT.MAX_SIZE_TEST
sample_style = "choice"
if sample_style == "range":
assert len(min_size) == 2, "more than 2 ({}) min_size(s) are provided for ranges".format(len(min_size))
logger = logging.getLogger(__name__)
tfm_gens = []
if is_train:
tfm_gens.append(T.RandomFlip())
tfm_gens.append(T.ResizeShortestEdge(min_size, max_size, sample_style))
if is_train:
logger.info("TransformGens used in training: " + str(tfm_gens))
return tfm_gens
class DetrDatasetMapper:
"""
A callable which takes a dataset dict in Detectron2 Dataset format,
and map it into a format used by DETR.
The callable currently does the following:
1. Read the image from "file_name"
2. Applies geometric transforms to the image and annotation
3. Find and applies suitable cropping to the image and annotation
4. Prepare image and annotation to Tensors
"""
def __init__(self, cfg, is_train=True):
if cfg.INPUT.CROP.ENABLED and is_train:
self.crop_gen = [
T.ResizeShortestEdge([400, 500, 600], sample_style="choice"),
T.RandomCrop(cfg.INPUT.CROP.TYPE, cfg.INPUT.CROP.SIZE),
]
else:
self.crop_gen = None
self.mask_on = cfg.MODEL.MASK_ON
self.tfm_gens = build_transform_gen(cfg, is_train)
logging.getLogger(__name__).info(
"Full TransformGens used in training: {}, crop: {}".format(str(self.tfm_gens), str(self.crop_gen))
)
self.img_format = cfg.INPUT.FORMAT
self.is_train = is_train
def __call__(self, dataset_dict):
"""
Args:
dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
Returns:
dict: a format that builtin models in detectron2 accept
"""
dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below
image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
utils.check_image_size(dataset_dict, image)
if self.crop_gen is None:
image, transforms = T.apply_transform_gens(self.tfm_gens, image)
else:
if np.random.rand() > 0.5:
image, transforms = T.apply_transform_gens(self.tfm_gens, image)
else:
image, transforms = T.apply_transform_gens(
self.tfm_gens[:-1] + self.crop_gen + self.tfm_gens[-1:], image
)
image_shape = image.shape[:2] # h, w
# Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
# but not efficient on large generic data structures due to the use of pickle & mp.Queue.
# Therefore it's important to use torch.Tensor.
dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1)))
if not self.is_train:
# USER: Modify this if you want to keep them for some reason.
dataset_dict.pop("annotations", None)
return dataset_dict
if "annotations" in dataset_dict:
# USER: Modify this if you want to keep them for some reason.
for anno in dataset_dict["annotations"]:
if not self.mask_on:
anno.pop("segmentation", None)
anno.pop("keypoints", None)
# USER: Implement additional transformations if you have other types of data
annos = [
utils.transform_instance_annotations(obj, transforms, image_shape)
for obj in dataset_dict.pop("annotations")
if obj.get("iscrowd", 0) == 0
]
instances = utils.annotations_to_instances(annos, image_shape)
dataset_dict["instances"] = utils.filter_empty_instances(instances)
return dataset_dict
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import numpy as np
import torch
import torch.nn.functional as F
from torch import nn
from detectron2.layers import ShapeSpec
from detectron2.modeling import META_ARCH_REGISTRY, build_backbone, detector_postprocess
from detectron2.structures import Boxes, ImageList, Instances, BitMasks
from detr.models.backbone import Joiner
from detr.models.detr import DETR
from detr.models.deformable_detr import DeformableDETR
from detr.models.setcriterion import SetCriterion, FocalLossSetCriterion
from detr.models.matcher import HungarianMatcher
from detr.models.position_encoding import PositionEmbeddingSine
from detr.models.transformer import Transformer
from detr.models.deformable_transformer import DeformableTransformer
from detr.models.segmentation import DETRsegm, PostProcessSegm
from detr.util.box_ops import box_cxcywh_to_xyxy, box_xyxy_to_cxcywh
from detr.util.misc import NestedTensor
from detr.datasets.coco import convert_coco_poly_to_mask
__all__ = ["Detr"]
class ResNetMaskedBackbone(nn.Module):
""" This is a thin wrapper around D2's backbone to provide padding masking"""
def __init__(self, cfg):
super().__init__()
self.backbone = build_backbone(cfg)
backbone_shape = self.backbone.output_shape()
if cfg.MODEL.DETR.NUM_FEATURE_LEVELS > 1:
self.strides = [8, 16, 32]
else:
self.strides = [32]
if cfg.MODEL.RESNETS.RES5_DILATION == 2:
# fix dilation from d2
self.backbone.stages[-1][0].conv2.dilation = (1, 1)
self.backbone.stages[-1][0].conv2.padding = (1, 1)
self.strides[-1] = self.strides[-1] // 2
self.feature_strides = [backbone_shape[f].stride for f in backbone_shape.keys()]
self.num_channels = [backbone_shape[k].channels for k in backbone_shape.keys()]
def forward(self, images):
features = self.backbone(images.tensor)
masks = self.mask_out_padding(
[features_per_level.shape for features_per_level in features.values()],
images.image_sizes,
images.tensor.device,
)
assert len(features) == len(masks)
for i, k in enumerate(features.keys()):
features[k] = NestedTensor(features[k], masks[i])
return features
def mask_out_padding(self, feature_shapes, image_sizes, device):
masks = []
assert len(feature_shapes) == len(self.feature_strides)
for idx, shape in enumerate(feature_shapes):
N, _, H, W = shape
masks_per_feature_level = torch.ones((N, H, W), dtype=torch.bool, device=device)
for img_idx, (h, w) in enumerate(image_sizes):
masks_per_feature_level[
img_idx,
: int(np.ceil(float(h) / self.feature_strides[idx])),
: int(np.ceil(float(w) / self.feature_strides[idx])),
] = 0
masks.append(masks_per_feature_level)
return masks
class FBNetMaskedBackbone(nn.Module):
""" This is a thin wrapper around D2's backbone to provide padding masking"""
def __init__(self, cfg):
super().__init__()
self.backbone = build_backbone(cfg)
self.out_features = cfg.MODEL.FBNET_V2.OUT_FEATURES
self.feature_strides = list(self.backbone._out_feature_strides.values())
self.num_channels = [self.backbone._out_feature_channels[k] for k in self.out_features]
self.strides = [self.backbone._out_feature_strides[k] for k in self.out_features]
def forward(self, images):
features = self.backbone(images.tensor)
masks = self.mask_out_padding(
[features_per_level.shape for features_per_level in features.values()],
images.image_sizes,
images.tensor.device,
)
assert len(features) == len(masks)
ret_features = {}
for i, k in enumerate(features.keys()):
if k in self.out_features:
ret_features[k] = NestedTensor(features[k], masks[i])
return ret_features
def mask_out_padding(self, feature_shapes, image_sizes, device):
masks = []
assert len(feature_shapes) == len(self.feature_strides)
for idx, shape in enumerate(feature_shapes):
N, _, H, W = shape
masks_per_feature_level = torch.ones((N, H, W), dtype=torch.bool, device=device)
for img_idx, (h, w) in enumerate(image_sizes):
masks_per_feature_level[
img_idx,
: int(np.ceil(float(h) / self.feature_strides[idx])),
: int(np.ceil(float(w) / self.feature_strides[idx])),
] = 0
masks.append(masks_per_feature_level)
return masks
@META_ARCH_REGISTRY.register()
class Detr(nn.Module):
"""
Implement Detr
"""
def __init__(self, cfg):
super().__init__()
self.device = torch.device(cfg.MODEL.DEVICE)
self.num_classes = cfg.MODEL.DETR.NUM_CLASSES
self.mask_on = cfg.MODEL.MASK_ON
hidden_dim = cfg.MODEL.DETR.HIDDEN_DIM
num_queries = cfg.MODEL.DETR.NUM_OBJECT_QUERIES
# Transformer parameters:
nheads = cfg.MODEL.DETR.NHEADS
dropout = cfg.MODEL.DETR.DROPOUT
dim_feedforward = cfg.MODEL.DETR.DIM_FEEDFORWARD
enc_layers = cfg.MODEL.DETR.ENC_LAYERS
dec_layers = cfg.MODEL.DETR.DEC_LAYERS
pre_norm = cfg.MODEL.DETR.PRE_NORM
# Loss parameters:
giou_weight = cfg.MODEL.DETR.GIOU_WEIGHT
l1_weight = cfg.MODEL.DETR.L1_WEIGHT
cls_weight = cfg.MODEL.DETR.CLS_WEIGHT
deep_supervision = cfg.MODEL.DETR.DEEP_SUPERVISION
no_object_weight = cfg.MODEL.DETR.NO_OBJECT_WEIGHT
centered_position_encoding = cfg.MODEL.DETR.CENTERED_POSITION_ENCODIND
num_feature_levels = cfg.MODEL.DETR.NUM_FEATURE_LEVELS
N_steps = hidden_dim // 2
if 'resnet' in cfg.MODEL.BACKBONE.NAME.lower():
d2_backbone = ResNetMaskedBackbone(cfg)
elif 'fbnet' in cfg.MODEL.BACKBONE.NAME.lower():
d2_backbone =FBNetMaskedBackbone(cfg)
else:
raise NotImplementedError
backbone = Joiner(d2_backbone, PositionEmbeddingSine(N_steps, normalize=True, centered=centered_position_encoding))
backbone.num_channels = d2_backbone.num_channels
self.use_focal_loss = cfg.MODEL.DETR.USE_FOCAL_LOSS
if cfg.MODEL.DETR.DEFORMABLE:
transformer = DeformableTransformer(
d_model=hidden_dim,
nhead=nheads,
num_encoder_layers=enc_layers,
num_decoder_layers=dec_layers,
dim_feedforward=dim_feedforward,
dropout=dropout,
activation="relu",
return_intermediate_dec=True,
num_feature_levels=num_feature_levels,
dec_n_points=4,
enc_n_points=4,
two_stage=False,
two_stage_num_proposals=num_queries,
)
self.detr = DeformableDETR(
backbone, transformer, num_classes=self.num_classes, num_queries=num_queries,
num_feature_levels=num_feature_levels, aux_loss=deep_supervision,
)
else:
transformer = Transformer(
d_model=hidden_dim,
dropout=dropout,
nhead=nheads,
dim_feedforward=dim_feedforward,
num_encoder_layers=enc_layers,
num_decoder_layers=dec_layers,
normalize_before=pre_norm,
return_intermediate_dec=deep_supervision,
)
self.detr = DETR(
backbone, transformer, num_classes=self.num_classes, num_queries=num_queries,
aux_loss=deep_supervision, use_focal_loss=self.use_focal_loss,
)
if self.mask_on:
frozen_weights = cfg.MODEL.DETR.FROZEN_WEIGHTS
if frozen_weights != '':
print("LOAD pre-trained weights")
weight = torch.load(frozen_weights, map_location=lambda storage, loc: storage)['model']
new_weight = {}
for k, v in weight.items():
if 'detr.' in k:
new_weight[k.replace('detr.', '')] = v
else:
print(f"Skipping loading weight {k} from frozen model")
del weight
self.detr.load_state_dict(new_weight)
del new_weight
self.detr = DETRsegm(self.detr, freeze_detr=(frozen_weights != ''))
self.seg_postprocess = PostProcessSegm
self.detr.to(self.device)
# building criterion
matcher = HungarianMatcher(cost_class=cls_weight, cost_bbox=l1_weight,
cost_giou=giou_weight, use_focal_loss=self.use_focal_loss)
weight_dict = {"loss_ce": cls_weight, "loss_bbox": l1_weight}
weight_dict["loss_giou"] = giou_weight
if deep_supervision:
aux_weight_dict = {}
for i in range(dec_layers - 1):
aux_weight_dict.update({k + f"_{i}": v for k, v in weight_dict.items()})
weight_dict.update(aux_weight_dict)
losses = ["labels", "boxes", "cardinality"]
if self.mask_on:
losses += ["masks"]
if self.use_focal_loss:
self.criterion = FocalLossSetCriterion(
self.num_classes, matcher=matcher, weight_dict=weight_dict, losses=losses,
)
else:
self.criterion = SetCriterion(
self.num_classes, matcher=matcher, weight_dict=weight_dict, eos_coef=no_object_weight, losses=losses,
)
self.criterion.to(self.device)
pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(3, 1, 1)
pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(3, 1, 1)
self.normalizer = lambda x: (x - pixel_mean) / pixel_std
self.to(self.device)
def forward(self, batched_inputs):
"""
Args:
batched_inputs: a list, batched outputs of :class:`DatasetMapper` .
Each item in the list contains the inputs for one image.
For now, each item in the list is a dict that contains:
* image: Tensor, image in (C, H, W) format.
* instances: Instances
Other information that's included in the original dicts, such as:
* "height", "width" (int): the output resolution of the model, used in inference.
See :meth:`postprocess` for details.
Returns:
dict[str: Tensor]:
mapping from a named loss to a tensor storing the loss. Used during training only.
"""
images = self.preprocess_image(batched_inputs)
output = self.detr(images)
if self.training:
gt_instances = [x["instances"].to(self.device) for x in batched_inputs]
targets = self.prepare_targets(gt_instances)
loss_dict = self.criterion(output, targets)
weight_dict = self.criterion.weight_dict
for k in loss_dict.keys():
if k in weight_dict:
loss_dict[k] *= weight_dict[k]
return loss_dict
else:
box_cls = output["pred_logits"]
box_pred = output["pred_boxes"]
mask_pred = output["pred_masks"] if self.mask_on else None
results = self.inference(box_cls, box_pred, mask_pred, images.image_sizes)
processed_results = []
for results_per_image, input_per_image, image_size in zip(results, batched_inputs, images.image_sizes):
height = input_per_image.get("height", image_size[0])
width = input_per_image.get("width", image_size[1])
r = detector_postprocess(results_per_image, height, width)
processed_results.append({"instances": r})
return processed_results
def prepare_targets(self, targets):
new_targets = []
for targets_per_image in targets:
h, w = targets_per_image.image_size
image_size_xyxy = torch.as_tensor([w, h, w, h], dtype=torch.float, device=self.device)
gt_classes = targets_per_image.gt_classes
gt_boxes = targets_per_image.gt_boxes.tensor / image_size_xyxy
gt_boxes = box_xyxy_to_cxcywh(gt_boxes)
new_targets.append({"labels": gt_classes, "boxes": gt_boxes})
if self.mask_on and hasattr(targets_per_image, 'gt_masks'):
gt_masks = targets_per_image.gt_masks
gt_masks = convert_coco_poly_to_mask(gt_masks.polygons, h, w)
new_targets[-1].update({'masks': gt_masks})
return new_targets
def inference(self, box_cls, box_pred, mask_pred, image_sizes):
"""
Arguments:
box_cls (Tensor): tensor of shape (batch_size, num_queries, K).
The tensor predicts the classification probability for each query.
box_pred (Tensor): tensors of shape (batch_size, num_queries, 4).
The tensor predicts 4-vector (x,y,w,h) box
regression values for every queryx
image_sizes (List[torch.Size]): the input image sizes
Returns:
results (List[Instances]): a list of #images elements.
"""
assert len(box_cls) == len(image_sizes)
results = []
# For each box we assign the best class or the second best if the best on is `no_object`.
if self.use_focal_loss:
prob = box_cls.sigmoid()
# TODO make top-100 as an option for non-focal-loss as well
scores, topk_indexes = torch.topk(prob.view(box_cls.shape[0], -1), 100, dim=1)
topk_boxes = topk_indexes // box_cls.shape[2]
labels = topk_indexes % box_cls.shape[2]
else:
scores, labels = F.softmax(box_cls, dim=-1)[:, :, :-1].max(-1)
for i, (scores_per_image, labels_per_image, box_pred_per_image, image_size) in enumerate(zip(
scores, labels, box_pred, image_sizes
)):
result = Instances(image_size)
boxes = box_cxcywh_to_xyxy(box_pred_per_image)
if self.use_focal_loss:
boxes = torch.gather(boxes.unsqueeze(0), 1, topk_boxes.unsqueeze(-1).repeat(1,1,4)).squeeze()
result.pred_boxes = Boxes(boxes)
result.pred_boxes.scale(scale_x=image_size[1], scale_y=image_size[0])
if self.mask_on:
mask = F.interpolate(mask_pred[i].unsqueeze(0), size=image_size, mode='bilinear', align_corners=False)
mask = mask[0].sigmoid() > 0.5
B, N, H, W = mask_pred.shape
mask = BitMasks(mask.cpu()).crop_and_resize(result.pred_boxes.tensor.cpu(), 32)
result.pred_masks = mask.unsqueeze(1).to(mask_pred[0].device)
result.scores = scores_per_image
result.pred_classes = labels_per_image
results.append(result)
return results
def preprocess_image(self, batched_inputs):
"""
Normalize, pad and batch the input images.
"""
images = [self.normalizer(x["image"].to(self.device)) for x in batched_inputs]
images = ImageList.from_tensors(images)
return images
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import torch.utils.data
import torchvision
from .coco import build as build_coco
def get_coco_api_from_dataset(dataset):
for _ in range(10):
# if isinstance(dataset, torchvision.datasets.CocoDetection):
# break
if isinstance(dataset, torch.utils.data.Subset):
dataset = dataset.dataset
if isinstance(dataset, torchvision.datasets.CocoDetection):
return dataset.coco
def build_dataset(image_set, args):
if args.dataset_file == 'coco':
return build_coco(image_set, args)
if args.dataset_file == 'coco_panoptic':
# to avoid making panopticapi required for coco
from .coco_panoptic import build as build_coco_panoptic
return build_coco_panoptic(image_set, args)
raise ValueError(f'dataset {args.dataset_file} not supported')
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
"""
COCO dataset which returns image_id for evaluation.
Mostly copy-paste from https://github.com/pytorch/vision/blob/13b35ff/references/detection/coco_utils.py
"""
from pathlib import Path
import torch
import torch.utils.data
import torchvision
from pycocotools import mask as coco_mask
import detr.datasets.transforms as T
class CocoDetection(torchvision.datasets.CocoDetection):
def __init__(self, img_folder, ann_file, transforms, return_masks):
super(CocoDetection, self).__init__(img_folder, ann_file)
self._transforms = transforms
self.prepare = ConvertCocoPolysToMask(return_masks)
def __getitem__(self, idx):
img, target = super(CocoDetection, self).__getitem__(idx)
image_id = self.ids[idx]
target = {'image_id': image_id, 'annotations': target}
img, target = self.prepare(img, target)
if self._transforms is not None:
img, target = self._transforms(img, target)
return img, target
def convert_coco_poly_to_mask(segmentations, height, width):
masks = []
for polygons in segmentations:
rles = coco_mask.frPyObjects(polygons, height, width)
mask = coco_mask.decode(rles)
if len(mask.shape) < 3:
mask = mask[..., None]
mask = torch.as_tensor(mask, dtype=torch.uint8)
mask = mask.any(dim=2)
masks.append(mask)
if masks:
masks = torch.stack(masks, dim=0)
else:
masks = torch.zeros((0, height, width), dtype=torch.uint8)
return masks
class ConvertCocoPolysToMask(object):
def __init__(self, return_masks=False):
self.return_masks = return_masks
def __call__(self, image, target):
w, h = image.size
image_id = target["image_id"]
image_id = torch.tensor([image_id])
anno = target["annotations"]
anno = [obj for obj in anno if 'iscrowd' not in obj or obj['iscrowd'] == 0]
boxes = [obj["bbox"] for obj in anno]
# guard against no boxes via resizing
boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4)
boxes[:, 2:] += boxes[:, :2]
boxes[:, 0::2].clamp_(min=0, max=w)
boxes[:, 1::2].clamp_(min=0, max=h)
classes = [obj["category_id"] for obj in anno]
classes = torch.tensor(classes, dtype=torch.int64)
if self.return_masks:
segmentations = [obj["segmentation"] for obj in anno]
masks = convert_coco_poly_to_mask(segmentations, h, w)
keypoints = None
if anno and "keypoints" in anno[0]:
keypoints = [obj["keypoints"] for obj in anno]
keypoints = torch.as_tensor(keypoints, dtype=torch.float32)
num_keypoints = keypoints.shape[0]
if num_keypoints:
keypoints = keypoints.view(num_keypoints, -1, 3)
keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0])
boxes = boxes[keep]
classes = classes[keep]
if self.return_masks:
masks = masks[keep]
if keypoints is not None:
keypoints = keypoints[keep]
target = {}
target["boxes"] = boxes
target["labels"] = classes
if self.return_masks:
target["masks"] = masks
target["image_id"] = image_id
if keypoints is not None:
target["keypoints"] = keypoints
# for conversion to coco api
area = torch.tensor([obj["area"] for obj in anno])
iscrowd = torch.tensor([obj["iscrowd"] if "iscrowd" in obj else 0 for obj in anno])
target["area"] = area[keep]
target["iscrowd"] = iscrowd[keep]
target["orig_size"] = torch.as_tensor([int(h), int(w)])
target["size"] = torch.as_tensor([int(h), int(w)])
return image, target
def make_coco_transforms(image_set):
normalize = T.Compose([
T.ToTensor(),
T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
scales = [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800]
if image_set == 'train':
return T.Compose([
T.RandomHorizontalFlip(),
T.RandomSelect(
T.RandomResize(scales, max_size=1333),
T.Compose([
T.RandomResize([400, 500, 600]),
T.RandomSizeCrop(384, 600),
T.RandomResize(scales, max_size=1333),
])
),
normalize,
])
if image_set == 'val':
return T.Compose([
T.RandomResize([800], max_size=1333),
normalize,
])
raise ValueError(f'unknown {image_set}')
def build(image_set, args):
root = Path(args.coco_path)
assert root.exists(), f'provided COCO path {root} does not exist'
mode = 'instances'
PATHS = {
"train": (root / "train2017", root / "annotations" / f'{mode}_train2017.json'),
"val": (root / "val2017", root / "annotations" / f'{mode}_val2017.json'),
}
img_folder, ann_file = PATHS[image_set]
dataset = CocoDetection(img_folder, ann_file, transforms=make_coco_transforms(image_set), return_masks=args.masks)
return dataset
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment