Commit 1345fab2 authored by luopl's avatar luopl
Browse files

Initial commit

parents
Pipeline #1263 canceled with stages
This source diff could not be displayed because it is too large. You can view the blob instead.
import argparse
import os
from functools import partial
from test import create_test_data_loader
from typing import Dict, List, Tuple
import accelerate
import cv2
import numpy as np
import torch
import torch.utils.data as data
from accelerate import Accelerator
from PIL import Image
from tqdm import tqdm
from util.lazy_load import Config
from util.logger import setup_logger
from util.utils import load_checkpoint, load_state_dict
from util.visualize import plot_bounding_boxes_on_image_cv2
def is_image(file_path):
try:
img = Image.open(file_path)
img.close()
return True
except:
return False
def parse_args():
parser = argparse.ArgumentParser(description="Inference a detector")
# dataset parameters
parser.add_argument("--image-dir", type=str, required=True)
parser.add_argument("--workers", type=int, default=0)
# model parameters
parser.add_argument("--model-config", type=str, required=True)
parser.add_argument("--checkpoint", type=str, required=True)
# visualization parameters
parser.add_argument("--show-dir", type=str, default=None)
parser.add_argument("--show-conf", type=float, default=0.5)
# plot parameters
parser.add_argument("--font-scale", type=float, default=1.0)
parser.add_argument("--box-thick", type=int, default=1)
parser.add_argument("--fill-alpha", type=float, default=0.2)
parser.add_argument("--text-box-color", type=int, nargs="+", default=(255, 255, 255))
parser.add_argument("--text-font-color", type=int, nargs="+", default=None)
parser.add_argument("--text-alpha", type=float, default=1.0)
# engine parameters
parser.add_argument("--seed", type=int, default=42)
args = parser.parse_args()
return args
class InferenceDataset(data.Dataset):
def __init__(self, root):
self.images = [os.path.join(root, img) for img in os.listdir(root)]
self.images = [img for img in self.images if is_image(img)]
assert len(self.images) > 0, "No images found"
def __len__(self):
return len(self.images)
def __getitem__(self, index):
cv2.setNumThreads(0)
cv2.ocl.setUseOpenCL(False)
image = cv2.imdecode(np.fromfile(self.images[index], dtype=np.uint8), -1)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).transpose(2, 0, 1)
return torch.tensor(image)
def inference():
args = parse_args()
# set fixed seed and deterministic_algorithms
accelerator = Accelerator()
accelerate.utils.set_seed(args.seed, device_specific=False)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
# deterministic in low version pytorch leads to RuntimeError
# torch.use_deterministic_algorithms(True, warn_only=True)
# setup logger
for logger_name in ["py.warnings", "accelerate", os.path.basename(os.getcwd())]:
setup_logger(distributed_rank=accelerator.local_process_index, name=logger_name)
dataset = InferenceDataset(args.image_dir)
data_loader = create_test_data_loader(
dataset, accelerator=accelerator, batch_size=1, num_workers=args.workers
)
# get inference results from model output
model = Config(args.model_config).model.eval()
checkpoint = load_checkpoint(args.checkpoint)
if isinstance(checkpoint, Dict) and "model" in checkpoint:
checkpoint = checkpoint["model"]
load_state_dict(model, checkpoint)
model = accelerator.prepare_model(model)
with torch.inference_mode():
predictions = []
for index, images in enumerate(tqdm(data_loader)):
prediction = model(images)[0]
# change torch.Tensor to CPU
for key in prediction:
prediction[key] = prediction[key].to("cpu", non_blocking=True)
image_name = data_loader.dataset.images[index]
image = images[0].to("cpu", non_blocking=True)
prediction = {"image_name": image_name, "image": image, "output": prediction}
predictions.append(prediction)
# save visualization results
if args.show_dir:
os.makedirs(args.show_dir, exist_ok=True)
# create a dummy dataset for visualization with multi-workers
data_loader = create_test_data_loader(
predictions, accelerator=accelerator, batch_size=1, num_workers=args.workers
)
data_loader.collate_fn = partial(_visualize_batch_for_infer, classes=model.CLASSES, **vars(args))
[None for _ in tqdm(data_loader)]
def _visualize_batch_for_infer(
batch: Tuple[Dict],
classes: List[str],
show_conf: float = 0.0,
show_dir: str = None,
font_scale: float = 1.0,
box_thick: int = 3,
fill_alpha: float = 0.2,
text_box_color: Tuple[int] = (255, 255, 255),
text_font_color: Tuple[int] = None,
text_alpha: float = 0.5,
**kwargs, # Not useful
):
image_name, image, output = batch[0].values()
# plot bounding boxes on image
image = image.numpy().transpose(1, 2, 0)
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
image = plot_bounding_boxes_on_image_cv2(
image=image,
boxes=output["boxes"],
labels=output["labels"],
scores=output.get("scores", None),
classes=classes,
show_conf=show_conf,
font_scale=font_scale,
box_thick=box_thick,
fill_alpha=fill_alpha,
text_box_color=text_box_color,
text_font_color=text_font_color,
text_alpha=text_alpha,
)
cv2.imwrite(os.path.join(show_dir, os.path.basename(image_name)), image)
if __name__ == "__main__":
inference()
import argparse
import datetime
import os
import pprint
import re
import time
import accelerate
import torch
from accelerate import Accelerator, DistributedDataParallelKwargs
from accelerate.logging import get_logger
from accelerate.tracking import TensorBoardTracker
from accelerate.utils import ProjectConfiguration
from torch.utils import data
from util.collate_fn import collate_fn
from util.engine import evaluate_acc, train_one_epoch_acc
from util.group_by_aspect_ratio import GroupedBatchSampler, create_aspect_ratio_groups
from util.lazy_load import Config
from util.misc import default_setup, encode_labels, fixed_generator, seed_worker
from util.utils import HighestCheckpoint, load_checkpoint, load_state_dict
def parse_args():
parser = argparse.ArgumentParser(description="Train a detector")
parser.add_argument("--config-file", default="configs/train_config.py")
parser.add_argument(
"--mixed-precision",
type=str,
default=None,
choices=["no", "fp16", "bf16", "fp8"],
help="Whether to use mixed precision. Choose"
"between fp16 and bf16 (bfloat16). Bf16 requires PyTorch >= 1.10."
"and an Nvidia Ampere GPU.",
)
parser.add_argument(
"--accumulate-steps", type=int, default=1, help="Steps to accumulate gradients"
)
parser.add_argument("--seed", type=int, help="Random seed")
parser.add_argument("--use-deterministic-algorithms", action="store_true")
dynamo_backend = ["no", "eager", "aot_eager", "inductor", "aot_ts_nvfuser", "nvprims_nvfuser"]
dynamo_backend += ["cudagraphs", "ofi", "fx2trt", "onnxrt", "tensorrt", "ipex", "tvm"]
parser.add_argument(
"--dynamo-backend",
type=str,
default="no",
choices=dynamo_backend,
help="""
Set to one of the possible dynamo backends to optimize the training with torch dynamo.
See https://pytorch.org/docs/stable/torch.compiler.html and
https://huggingface.co/docs/accelerate/main/en/package_reference/utilities#accelerate.utils.DynamoBackend
""",
)
args = parser.parse_args()
return args
def train():
args = parse_args()
cfg = Config(args.config_file, partials=("lr_scheduler", "optimizer", "param_dicts"))
# modify output directory
if getattr(cfg, "output_dir", None) is None:
if hasattr(cfg, "resume_from_checkpoint") and os.path.isdir(str(cfg.resume_from_checkpoint)):
# default path: xxxx-xx-xx-yy_yy_yy/checkpoints/{checkpoint_1}
if "checkpoints" in os.listdir(cfg.resume_from_checkpoint):
# if given output_dir, find the newest checkpoint under checkpoints directory
output_dir = os.path.join(cfg.resume_from_checkpoint, "checkpoints")
folders = [os.path.join(output_dir, folder) for folder in os.listdir(output_dir)]
folders.sort(
key=lambda folder:
list(map(int, re.findall(r"[\/]?([0-9]+)(?=[^\/]*$)", folder)))[0]
)
cfg.resume_from_checkpoint = folders[-1]
if "checkpoints" in os.path.dirname(cfg.resume_from_checkpoint):
cfg.output_dir = os.path.dirname(os.path.dirname(cfg.resume_from_checkpoint))
else:
# make sure all processes have same output directory
accelerate.utils.wait_for_everyone()
cfg.output_dir = os.path.join(
"checkpoints",
os.path.basename(cfg.model_path).split(".")[0],
"train",
datetime.datetime.now().strftime("%Y-%m-%d-%H_%M_%S"),
)
# Initialize accelerator
project_config = ProjectConfiguration(
project_dir=cfg.output_dir, total_limit=5, automatic_checkpoint_naming=True
)
tensorboard_tracker = TensorBoardTracker(run_name="tf_log", logging_dir=cfg.output_dir)
kwargs = DistributedDataParallelKwargs(find_unused_parameters=cfg.find_unused_parameters)
accelerator = Accelerator(
log_with=tensorboard_tracker,
project_config=project_config,
mixed_precision=args.mixed_precision,
gradient_accumulation_steps=args.accumulate_steps,
dynamo_backend=args.dynamo_backend,
step_scheduler_with_optimizer=False,
kwargs_handlers=[kwargs],
)
accelerator.init_trackers("det_train")
default_setup(args, cfg, accelerator)
# instantiate dataset
params = dict(num_workers=cfg.num_workers, collate_fn=collate_fn)
params.update(dict(pin_memory=cfg.pin_memory, persistent_workers=True))
if args.use_deterministic_algorithms:
# set using deterministic algorithms
torch.backends.cudnn.benchmark = False
torch.use_deterministic_algorithms(True, warn_only=True)
params.update({"worker_init_fn": seed_worker, "generator": fixed_generator()})
# we use group_based sampler, which increases training speed slightly
group_ids = create_aspect_ratio_groups(cfg.train_dataset, k=3)
train_batch_sampler = GroupedBatchSampler(
data.RandomSampler(cfg.train_dataset), group_ids, cfg.batch_size
)
train_loader = data.DataLoader(cfg.train_dataset, batch_sampler=train_batch_sampler, **params)
test_loader = data.DataLoader(cfg.test_dataset, 1, shuffle=False, **params)
# instantiate model, optimizer and lr_scheduler
model = Config(cfg.model_path).model
if accelerator.use_distributed:
model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)
optimizer = cfg.optimizer(cfg.param_dicts(model))
lr_scheduler = cfg.lr_scheduler(optimizer)
# register dataset class information into the model, useful for inference
cat_ids = list(range(max(cfg.train_dataset.coco.cats.keys()) + 1))
classes = tuple(cfg.train_dataset.coco.cats.get(c, {"name": "none"})["name"] for c in cat_ids)
model.register_buffer("_classes_", torch.tensor(encode_labels(classes)))
# log the configerations
logger = get_logger(os.path.basename(os.getcwd()) + "." + __name__)
# prepare for distributed training
model, optimizer, train_loader, test_loader, lr_scheduler = accelerator.prepare(
model, optimizer, train_loader, test_loader, lr_scheduler
)
if getattr(cfg, "resume_from_checkpoint", None) is not None:
if os.path.isdir(str(cfg.resume_from_checkpoint)):
accelerator.load_state(cfg.resume_from_checkpoint)
path = os.path.basename(cfg.resume_from_checkpoint)
cfg.starting_epoch = int(path.split("_")[-1]) + 1
accelerator.project_configuration.iteration = cfg.starting_epoch
logger.info(f"resume training of {cfg.output_dir}, from {path}")
elif os.path.isfile(str(cfg.resume_from_checkpoint)):
checkpoint = load_checkpoint(cfg.resume_from_checkpoint)
checkpoint = checkpoint["model"] if "model" in checkpoint else checkpoint
load_state_dict(accelerator.unwrap_model(model), checkpoint)
# overwrite _classes_ in checkpoint with current datasets categories
model.register_buffer("_classes_", torch.tensor(encode_labels(classes)))
logger.info(
f"load pretrained from {cfg.resume_from_checkpoint}, output_dir is {cfg.output_dir}"
)
else:
logger.warn("resume_from_checkpoint is not a path or a file, skip loading")
else:
n_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
logger.info("model parameters: {}".format(n_params))
logger.info("optimizer: {}".format(optimizer))
logger.info("lr_scheduler: {}".format(pprint.pformat(lr_scheduler.state_dict())))
# save dataset name, useful for inference
if accelerator.is_main_process:
label_file = os.path.join(cfg.output_dir, "label_names.txt")
with open(label_file, "w") as f:
caid_name = [f"{k} {v['name']}" for k, v in cfg.train_dataset.coco.cats.items()]
caid_name = "\n".join(caid_name)
f.write(caid_name)
logger.info(f"Label names is saved to {label_file}")
logger.info("Start training")
start_time = time.perf_counter()
highest_checkpoint = HighestCheckpoint(accelerator, model)
for epoch in range(cfg.starting_epoch, cfg.num_epochs):
train_one_epoch_acc(
model=model,
optimizer=optimizer,
data_loader=train_loader,
epoch=epoch,
print_freq=cfg.print_freq,
max_grad_norm=cfg.max_norm,
accelerator=accelerator,
)
lr_scheduler.step()
# we save model and labels together
accelerator.save_state(safe_serialization=False)
logger.info("Start evaluation")
coco_evaluator = evaluate_acc(model, test_loader, epoch, accelerator)
# save best results
cur_ap, cur_ap50 = coco_evaluator.coco_eval["bbox"].stats[:2]
highest_checkpoint.update(ap=cur_ap, ap50=cur_ap50)
total_time = time.perf_counter() - start_time
total_time = str(datetime.timedelta(seconds=int(total_time)))
logger.info("Training time: {}".format(total_time))
accelerator.end_training()
if __name__ == "__main__":
train()
# 模型唯一标识
modelCode=730
# 模型名称
modelName=salience_detr_pytorch
# 模型描述
modelDescription=Salience_DETR:用层次显著性滤波细化增强检测变换器的推理和训练
# 应用场景
appScenario=训练,推理,科研,制造,医疗,家居,教育
# 框架类型
frameType=Pytorch
import inspect
import logging
import os
from typing import Dict
from omegaconf import DictConfig
from torch import nn
from util.utils import load_state_dict as _load_state_dict
class BaseBackbone:
@staticmethod
def load_state_dict(model: nn.Module, state_dict: Dict):
if state_dict is None:
return
assert isinstance(state_dict, Dict), "state_dict must be OrderedDict."
_load_state_dict(model, state_dict)
@staticmethod
def freeze_module(module: nn.Module):
module.eval()
for param in module.parameters():
param.requires_grad = False
def get_instantiate_config(self, func_name, arch, extra_params):
# log some necessary information about backbone
logger = logging.getLogger(os.path.basename(os.getcwd()) + "." + __name__)
assert arch is None or arch in self.model_arch, \
f"Expected architecture in {self.model_arch.keys()} but got {arch}"
logger.info(f"Backbone architecture: {arch}")
# merge parameters from self.arch with extra params
model_config = self.model_arch[arch] if arch is not None else {}
for name, param in inspect.signature(func_name).parameters.items():
# get default, current and modified params
default = param.default if param.default is not inspect.Parameter.empty else None
modified_param = extra_params.get(name, None)
if isinstance(model_config, Dict):
cur_param = model_config.get(name, None)
elif isinstance(model_config, DictConfig):
cur_param = getattr(model_config, name, None)
else:
cur_param = None
# choose the high-prior parameter
if cur_param is not None:
default = cur_param
if modified_param is not None:
default = modified_param
# replace parameters in model_config
if isinstance(model_config, Dict):
model_config[name] = default
elif isinstance(model_config, DictConfig):
setattr(model_config, name, default)
else:
raise TypeError("Only Dict and DictConfig supported.")
return model_config
from functools import partial
from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union
import torch
from torch import Tensor, nn
from torch.nn import functional as F
from torchvision.models.feature_extraction import create_feature_extractor
from torchvision.ops.stochastic_depth import StochasticDepth
from models.backbones.base_backbone import BaseBackbone
from models.bricks.misc import Conv2dNormActivation, Permute
from util.lazy_load import LazyCall as L
from util.lazy_load import instantiate
from util.utils import load_checkpoint
class LayerNorm2d(nn.LayerNorm):
def forward(self, x: Tensor) -> Tensor:
x = x.permute(0, 2, 3, 1)
x = F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps)
x = x.permute(0, 3, 1, 2)
return x
class CNBlock(nn.Module):
def __init__(
self,
dim,
layer_scale: float,
stochastic_depth_prob: float,
norm_layer: Optional[Callable[..., nn.Module]] = None,
) -> None:
super().__init__()
if norm_layer is None:
norm_layer = partial(nn.LayerNorm, eps=1e-6)
self.block = nn.Sequential(
nn.Conv2d(dim, dim, kernel_size=7, padding=3, groups=dim, bias=True),
Permute([0, 2, 3, 1]),
norm_layer(dim),
nn.Linear(in_features=dim, out_features=4 * dim, bias=True),
nn.GELU(),
nn.Linear(in_features=4 * dim, out_features=dim, bias=True),
Permute([0, 3, 1, 2]),
)
self.layer_scale = nn.Parameter(torch.ones(dim, 1, 1) * layer_scale)
self.stochastic_depth = StochasticDepth(stochastic_depth_prob, "row")
def forward(self, input: Tensor) -> Tensor:
result = self.layer_scale * self.block(input)
result = self.stochastic_depth(result)
result += input
return result
class CNBlockConfig:
# Stores information listed at Section 3 of the ConvNeXt paper
def __init__(
self,
input_channels: int,
out_channels: Optional[int],
num_layers: int,
) -> None:
self.input_channels = input_channels
self.out_channels = out_channels
self.num_layers = num_layers
def __repr__(self) -> str:
s = self.__class__.__name__ + "("
s += "input_channels={input_channels}"
s += ", out_channels={out_channels}"
s += ", num_layers={num_layers}"
s += ")"
return s.format(**self.__dict__)
class ConvNeXt(nn.Module):
def __init__(
self,
block_setting: List[CNBlockConfig],
stochastic_depth_prob: float = 0.0,
layer_scale: float = 1e-6,
num_classes: int = 1000,
block: Optional[Callable[..., nn.Module]] = None,
norm_layer: Optional[Callable[..., nn.Module]] = None,
**kwargs: Any,
) -> None:
super().__init__()
if not block_setting:
raise ValueError("The block_setting should not be empty")
elif not (
isinstance(block_setting, Sequence) and all([isinstance(s, CNBlockConfig) for s in block_setting])
):
raise TypeError("The block_setting should be List[CNBlockConfig]")
if block is None:
block = CNBlock
if norm_layer is None:
norm_layer = partial(LayerNorm2d, eps=1e-6)
layers: List[nn.Module] = []
# Stem
firstconv_output_channels = block_setting[0].input_channels
layers.append(
Conv2dNormActivation(
3,
firstconv_output_channels,
kernel_size=4,
stride=4,
padding=0,
norm_layer=norm_layer,
activation_layer=None,
bias=True,
)
)
total_stage_blocks = sum(cnf.num_layers for cnf in block_setting)
stage_block_id = 0
for cnf in block_setting:
# Bottlenecks
stage: List[nn.Module] = []
for _ in range(cnf.num_layers):
# adjust stochastic depth probability based on the depth of the stage block
sd_prob = stochastic_depth_prob * stage_block_id / (total_stage_blocks - 1.0)
stage.append(block(cnf.input_channels, layer_scale, sd_prob))
stage_block_id += 1
layers.append(nn.Sequential(*stage))
if cnf.out_channels is not None:
# Downsampling
layers.append(
nn.Sequential(
norm_layer(cnf.input_channels),
nn.Conv2d(cnf.input_channels, cnf.out_channels, kernel_size=2, stride=2),
)
)
self.features = nn.Sequential(*layers)
self.avgpool = nn.AdaptiveAvgPool2d(1)
lastblock = block_setting[-1]
lastconv_output_channels = (
lastblock.out_channels if lastblock.out_channels is not None else lastblock.input_channels
)
self.classifier = nn.Sequential(
norm_layer(lastconv_output_channels), nn.Flatten(1),
nn.Linear(lastconv_output_channels, num_classes)
)
for m in self.modules():
if isinstance(m, (nn.Conv2d, nn.Linear)):
nn.init.trunc_normal_(m.weight, std=0.02)
if m.bias is not None:
nn.init.zeros_(m.bias)
def _forward_impl(self, x: Tensor) -> Tensor:
x = self.features(x)
x = self.avgpool(x)
x = self.classifier(x)
return x
def forward(self, x: Tensor) -> Tensor:
return self._forward_impl(x)
class ConvNeXtBackbone(BaseBackbone):
# yapf: disable
model_weights = {
# The following weights are from torchvision
"conv_t": "https://download.pytorch.org/models/convnext_tiny-983f1562.pth",
"conv_s": "https://download.pytorch.org/models/convnext_small-0c510722.pth",
"conv_b": "https://download.pytorch.org/models/convnext_base-6075fbad.pth",
"conv_l": "https://download.pytorch.org/models/convnext_large-ea097f82.pth",
}
model_arch = {
"conv_t": L(ConvNeXt)(
block_setting=[
CNBlockConfig(96, 192, 3),
CNBlockConfig(192, 384, 3),
CNBlockConfig(384, 768, 9),
CNBlockConfig(768, None, 3),
],
stochastic_depth_prob=0.1,
url=model_weights["conv_t"],
),
"conv_s": L(ConvNeXt)(
block_setting=[
CNBlockConfig(96, 192, 3),
CNBlockConfig(192, 384, 3),
CNBlockConfig(384, 768, 27),
CNBlockConfig(768, None, 3),
],
stochastic_depth_prob=0.4,
url=model_weights["conv_s"],
),
"conv_b": L(ConvNeXt)(
block_setting = [
CNBlockConfig(128, 256, 3),
CNBlockConfig(256, 512, 3),
CNBlockConfig(512, 1024, 27),
CNBlockConfig(1024, None, 3),
],
stochastic_depth_prob=0.5,
url=model_weights["conv_b"],
),
"conv_l": L(ConvNeXt)(
block_setting = [
CNBlockConfig(192, 384, 3),
CNBlockConfig(384, 768, 3),
CNBlockConfig(768, 1536, 27),
CNBlockConfig(1536, None, 3),
],
stochastic_depth_prob=0.5,
url=model_weights["conv_l"],
)
}
# yapf: enable
def __new__(
self,
arch: str,
weights: Union[str, Dict] = None,
return_indices: Tuple[int] = (0, 1, 2, 3),
freeze_indices: Tuple = (),
**kwargs,
):
# get parameters and instantiate backbone
model_config = self.get_instantiate_config(self, ConvNeXt, arch, kwargs)
default_weight = model_config.pop("url", None)
convnext = instantiate(model_config)
# load state dict
weights = load_checkpoint(default_weight if weights is None else weights)
if isinstance(weights, Dict):
weights = weights["model"] if "model" in weights else weights
self.load_state_dict(convnext, weights)
# freeze stages
self._freeze_stages(self, convnext, freeze_indices)
# create feature extractor
return_layers = [f"features.{2 * idx + 1}" for idx in return_indices]
convnext = create_feature_extractor(convnext, return_layers)
convnext.num_channels = [model_config.block_setting[i].input_channels for i in return_indices]
return convnext
def _freeze_stages(self, model: nn.Module, freeze_indices: Tuple[int]):
# freeze stem
if len(freeze_indices) > 0:
self.freeze_module(model.features[0])
for idx in freeze_indices:
# freeze layers
self.freeze_module(model.features[2 * idx + 1])
# freeze downsample layers
if 2 * idx + 2 < len(model.features):
self.freeze_module(model.features[2 * idx + 2])
This diff is collapsed.
from typing import Callable, Dict, List, Optional, Tuple, Type, Union
import torch
from torch import Tensor, nn
from torchvision.models.feature_extraction import create_feature_extractor
from models.bricks.misc import FrozenBatchNorm2d
from models.backbones.base_backbone import BaseBackbone
from models.bricks.deform_conv2d_pack import DeformConv2dPack
from util.lazy_load import LazyCall as L
from util.lazy_load import instantiate
from util.utils import load_checkpoint
def conv3x3(
in_planes: int, out_planes: int, stride: int = 1, groups: int = 1, dilation: int = 1
) -> nn.Conv2d:
"""3x3 convolution with padding"""
return nn.Conv2d(
in_planes,
out_planes,
kernel_size=3,
stride=stride,
padding=dilation,
groups=groups,
bias=False,
dilation=dilation,
)
def conv3x3_dcn(
in_planes: int, out_planes: int, stride: int = 1, groups: int = 1, dilation: int = 1
) -> DeformConv2dPack:
"""3x3 deformable convolution with padding"""
return DeformConv2dPack(
in_planes,
out_planes,
kernel_size=3,
stride=stride,
padding=dilation,
groups=groups,
bias=False,
dilation=dilation,
)
def conv1x1(in_planes: int, out_planes: int, stride: int = 1) -> nn.Conv2d:
"""1x1 convolution"""
return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
class BasicBlock(nn.Module):
expansion: int = 1
def __init__(
self,
inplanes: int,
planes: int,
stride: int = 1,
downsample: Optional[nn.Module] = None,
groups: int = 1,
base_width: int = 64,
dilation: int = 1,
norm_layer: Optional[Callable[..., nn.Module]] = None,
with_dcn: bool = False,
) -> None:
super().__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
if groups != 1 or base_width != 64:
raise ValueError("BasicBlock only supports groups=1 and base_width=64")
if dilation > 1:
raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
# Both self.conv1 and self.downsample layers downsample the input when stride != 1
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = norm_layer(planes)
self.relu = nn.ReLU(inplace=True)
if with_dcn:
self.conv2 = conv3x3_dcn(planes, planes)
else:
self.conv2 = conv3x3(planes, planes)
self.bn2 = norm_layer(planes)
self.downsample = downsample
self.stride = stride
def forward(self, x: Tensor) -> Tensor:
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity
out = self.relu(out)
return out
class Bottleneck(nn.Module):
# Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
# while original implementation places the stride at the first 1x1 convolution(self.conv1)
# according to "Deep residual learning for image recognition" https://arxiv.org/abs/1512.03385.
# This variant is also known as ResNet V1.5 and improves accuracy according to
# https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch.
expansion: int = 4
def __init__(
self,
inplanes: int,
planes: int,
stride: int = 1,
downsample: Optional[nn.Module] = None,
groups: int = 1,
base_width: int = 64,
dilation: int = 1,
norm_layer: Optional[Callable[..., nn.Module]] = None,
with_dcn: bool = False,
) -> None:
super().__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
width = int(planes * (base_width / 64.0)) * groups
# Both self.conv2 and self.downsample layers downsample the input when stride != 1
self.conv1 = conv1x1(inplanes, width)
self.bn1 = norm_layer(width)
if with_dcn:
self.conv2 = conv3x3_dcn(width, width, stride, groups, dilation)
else:
self.conv2 = conv3x3(width, width, stride, groups, dilation)
self.bn2 = norm_layer(width)
self.conv3 = conv1x1(width, planes * self.expansion)
self.bn3 = norm_layer(planes * self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x: Tensor) -> Tensor:
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity
out = self.relu(out)
return out
class ResNet(nn.Module):
def __init__(
self,
block: Type[Union[BasicBlock, Bottleneck]],
layers: List[int],
num_classes: int = 1000,
zero_init_residual: bool = False,
groups: int = 1,
width_per_group: int = 64,
replace_stride_with_dilation: Optional[List[bool]] = None,
stage_with_dcn: Optional[List[bool]] = None, # we only add an extra parameter
norm_layer: Optional[Callable[..., nn.Module]] = None,
) -> None:
super().__init__()
if stage_with_dcn is None:
stage_with_dcn = [False] * 4
if norm_layer is None:
norm_layer = nn.BatchNorm2d
self._norm_layer = norm_layer
self.inplanes = 64
self.dilation = 1
if replace_stride_with_dilation is None:
# each element in the tuple indicates if we should replace
# the 2x2 stride with a dilated convolution instead
replace_stride_with_dilation = [False, False, False]
if len(replace_stride_with_dilation) != 3:
raise ValueError(
"replace_stride_with_dilation should be None "
f"or a 3-element tuple, got {replace_stride_with_dilation}"
)
self.groups = groups
self.base_width = width_per_group
self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False)
self.bn1 = norm_layer(self.inplanes)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0], with_dcn=stage_with_dcn[0])
self.layer2 = self._make_layer(
block,
128,
layers[1],
stride=2,
dilate=replace_stride_with_dilation[0],
with_dcn=stage_with_dcn[1],
)
self.layer3 = self._make_layer(
block,
256,
layers[2],
stride=2,
dilate=replace_stride_with_dilation[1],
with_dcn=stage_with_dcn[2],
)
self.layer4 = self._make_layer(
block,
512,
layers[3],
stride=2,
dilate=replace_stride_with_dilation[2],
with_dcn=stage_with_dcn[3],
)
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(512 * block.expansion, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
# Zero-initialize the last BN in each residual branch,
# so that the residual branch starts with zeros, and each residual block behaves like an identity.
# This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
if zero_init_residual:
for m in self.modules():
if isinstance(m, Bottleneck) and m.bn3.weight is not None:
nn.init.constant_(m.bn3.weight, 0) # type: ignore[arg-type]
elif isinstance(m, BasicBlock) and m.bn2.weight is not None:
nn.init.constant_(m.bn2.weight, 0) # type: ignore[arg-type]
def _make_layer(
self,
block: Type[Union[BasicBlock, Bottleneck]],
planes: int,
blocks: int,
stride: int = 1,
dilate: bool = False,
with_dcn: bool = False,
) -> nn.Sequential:
norm_layer = self._norm_layer
downsample = None
previous_dilation = self.dilation
if dilate:
self.dilation *= stride
stride = 1
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
conv1x1(self.inplanes, planes * block.expansion, stride),
norm_layer(planes * block.expansion),
)
layers = []
layers.append(
block(
self.inplanes,
planes,
stride,
downsample,
self.groups,
self.base_width,
previous_dilation,
norm_layer,
with_dcn,
)
)
self.inplanes = planes * block.expansion
for _ in range(1, blocks):
layers.append(
block(
self.inplanes,
planes,
groups=self.groups,
base_width=self.base_width,
dilation=self.dilation,
norm_layer=norm_layer,
with_dcn=with_dcn,
)
)
return nn.Sequential(*layers)
def _forward_impl(self, x: Tensor) -> Tensor:
# See note [TorchScript super()]
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.avgpool(x)
x = torch.flatten(x, 1)
x = self.fc(x)
return x
def forward(self, x: Tensor) -> Tensor:
return self._forward_impl(x)
class ResNetBackbone(BaseBackbone):
# yapf: disable
model_weights = {
# The following weights are from torchvision
"resnet18": "https://download.pytorch.org/models/resnet18-f37072fd.pth",
"resnet34": "https://download.pytorch.org/models/resnet34-b627a593.pth",
"resnet50_v1": "https://download.pytorch.org/models/resnet50-0676ba61.pth",
"resnet50_v2": "https://download.pytorch.org/models/resnet50-11ad3fa6.pth",
"resnet101_v1": "https://download.pytorch.org/models/resnet101-63fe2227.pth",
"resnet101_v2": "https://download.pytorch.org/models/resnet101-cd907fc2.pth",
"resnet152_v1": "https://download.pytorch.org/models/resnet152-394f9c45.pth",
"resnet152_v2": "https://download.pytorch.org/models/resnet152-f82ba261.pth",
"resnext50_32x4d_v1": "https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth",
"resnext50_32x4d_v2": "https://download.pytorch.org/models/resnext50_32x4d-1a0047aa.pth",
"resnext101_32x8d_v1": "https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth",
"resnext101_32x8d_v2": "https://download.pytorch.org/models/resnext101_32x8d-110c445d.pth",
"resnext101_64x4d": "https://download.pytorch.org/models/resnext101_64x4d-173b62eb.pth",
"wide_resnet50_2_v1": "https://download.pytorch.org/models/wide_resnet50_2-95faca4d.pth",
"wide_resnet50_2_v2": "https://download.pytorch.org/models/wide_resnet50_2-9ba9bcbe.pth",
"wide_resnet101_2_v1": "https://download.pytorch.org/models/wide_resnet101_2-32ee1156.pth",
"wide_resnet101_2_v2": "https://download.pytorch.org/models/wide_resnet101_2-d733dc28.pth",
# The following weights are transfomed from mmpretrain
"resnext101_32x4d":
"https://github.com/xiuqhou/pretrained_weights/releases/download/v1.0.1-beta/resnext101_32x4d-e0fa3dd5.pth",
}
model_arch = {
"resnet18": L(ResNet)(block=BasicBlock, layers=(2, 2, 2, 2), url=model_weights["resnet18"]),
"resnet34": L(ResNet)(block=BasicBlock, layers=(3, 4, 6, 3), url=model_weights["resnet34"]),
"resnet50": L(ResNet)(block=Bottleneck, layers=(3, 4, 6, 3), url=model_weights["resnet50_v2"]),
"resnet101": L(ResNet)(block=Bottleneck, layers=(3, 4, 23, 3), url=model_weights["resnet101_v2"]),
"resnet152": L(ResNet)(block=Bottleneck, layers=(3, 8, 36, 3), url=model_weights["resnet152_v2"]),
"resnext50_32x4d": L(ResNet)(
block=Bottleneck,
layers=(3, 4, 6, 3),
groups=32,
width_per_group=4,
url=model_weights["resnext50_32x4d_v2"],
),
"resnext101_32x4d": L(ResNet)(
block=Bottleneck,
layers=(3, 4, 23, 3),
groups=32,
width_per_group=4,
url=model_weights["resnext101_32x4d"],
),
"resnext101_32x8d": L(ResNet)(
block=Bottleneck,
layers=(3, 4, 23, 3),
groups=32,
width_per_group=8,
url=model_weights["resnext101_32x8d_v2"],
),
"resnext101_64x4d": L(ResNet)(
block=Bottleneck,
layers=(3, 4, 23, 3),
groups=64,
width_per_group=4,
url=model_weights["resnext101_64x4d"],
),
"wide_resnet50_2": L(ResNet)(
block=Bottleneck,
layers=(3, 4, 6, 3),
width_per_group=64 * 2,
url=model_weights["wide_resnet50_2_v2"],
),
"wide_resnet101_2": L(ResNet)(
block=Bottleneck,
layers=(3, 4, 23, 3),
width_per_group=64 * 2,
url=model_weights["wide_resnet101_2_v2"],
),
}
# yapf: enable
def __new__(
self,
arch: str,
weights: Dict = None,
return_indices: Tuple[int] = (0, 1, 2, 3),
freeze_indices: Tuple = (),
**kwargs,
):
# get parameters and instantiate backbone
model_config = self.get_instantiate_config(self, ResNet, arch, kwargs)
default_weight = model_config.pop("url", None)
resnet = instantiate(model_config)
# load state dict
weights = load_checkpoint(default_weight if weights is None else weights)
if isinstance(weights, Dict):
weights = weights["model"] if "model" in weights else weights
self.load_state_dict(resnet, weights)
# freeze stages
self._freeze_stages(self, resnet, freeze_indices)
# create feature extractor
return_layers = [f"layer{idx + 1}" for idx in return_indices]
resnet = create_feature_extractor(
resnet, return_layers, tracer_kwargs={"leaf_modules": [FrozenBatchNorm2d]}
)
resnet.num_channels = [64 * model_config.block.expansion * 2**idx for idx in return_indices]
return resnet
def _freeze_stages(self, model: nn.Module, freeze_indices: Tuple[int]):
# freeze stem
if len(freeze_indices) > 0:
self.freeze_module(model.conv1)
self.freeze_module(model.bn1)
# freeze layers
for idx in freeze_indices:
self.freeze_module(model.get_submodule(f"layer{idx+1}"))
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment