Commit 2136e796 authored by mashun1's avatar mashun1
Browse files

codeformer

parents
Pipeline #699 canceled with stages
import torch
import sys
sys.path.insert(0,'./facelib/detection/yolov5face')
model = torch.load('facelib/detection/yolov5face/yolov5n-face.pt', map_location='cpu')['model']
torch.save(model.state_dict(),'weights/facelib/yolov5n-face.pth')
\ No newline at end of file
import math
import time
import numpy as np
import torch
import torchvision
def check_img_size(img_size, s=32):
# Verify img_size is a multiple of stride s
new_size = make_divisible(img_size, int(s)) # ceil gs-multiple
# if new_size != img_size:
# print(f"WARNING: --img-size {img_size:g} must be multiple of max stride {s:g}, updating to {new_size:g}")
return new_size
def make_divisible(x, divisor):
# Returns x evenly divisible by divisor
return math.ceil(x / divisor) * divisor
def xyxy2xywh(x):
# Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
y[:, 0] = (x[:, 0] + x[:, 2]) / 2 # x center
y[:, 1] = (x[:, 1] + x[:, 3]) / 2 # y center
y[:, 2] = x[:, 2] - x[:, 0] # width
y[:, 3] = x[:, 3] - x[:, 1] # height
return y
def xywh2xyxy(x):
# Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x
y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y
y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x
y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y
return y
def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
# Rescale coords (xyxy) from img1_shape to img0_shape
if ratio_pad is None: # calculate from img0_shape
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding
else:
gain = ratio_pad[0][0]
pad = ratio_pad[1]
coords[:, [0, 2]] -= pad[0] # x padding
coords[:, [1, 3]] -= pad[1] # y padding
coords[:, :4] /= gain
clip_coords(coords, img0_shape)
return coords
def clip_coords(boxes, img_shape):
# Clip bounding xyxy bounding boxes to image shape (height, width)
boxes[:, 0].clamp_(0, img_shape[1]) # x1
boxes[:, 1].clamp_(0, img_shape[0]) # y1
boxes[:, 2].clamp_(0, img_shape[1]) # x2
boxes[:, 3].clamp_(0, img_shape[0]) # y2
def box_iou(box1, box2):
# https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
"""
Return intersection-over-union (Jaccard index) of boxes.
Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
Arguments:
box1 (Tensor[N, 4])
box2 (Tensor[M, 4])
Returns:
iou (Tensor[N, M]): the NxM matrix containing the pairwise
IoU values for every element in boxes1 and boxes2
"""
def box_area(box):
return (box[2] - box[0]) * (box[3] - box[1])
area1 = box_area(box1.T)
area2 = box_area(box2.T)
inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2)
return inter / (area1[:, None] + area2 - inter)
def non_max_suppression_face(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, labels=()):
"""Performs Non-Maximum Suppression (NMS) on inference results
Returns:
detections with shape: nx6 (x1, y1, x2, y2, conf, cls)
"""
nc = prediction.shape[2] - 15 # number of classes
xc = prediction[..., 4] > conf_thres # candidates
# Settings
# (pixels) maximum box width and height
max_wh = 4096
time_limit = 10.0 # seconds to quit after
redundant = True # require redundant detections
multi_label = nc > 1 # multiple labels per box (adds 0.5ms/img)
merge = False # use merge-NMS
t = time.time()
output = [torch.zeros((0, 16), device=prediction.device)] * prediction.shape[0]
for xi, x in enumerate(prediction): # image index, image inference
# Apply constraints
x = x[xc[xi]] # confidence
# Cat apriori labels if autolabelling
if labels and len(labels[xi]):
label = labels[xi]
v = torch.zeros((len(label), nc + 15), device=x.device)
v[:, :4] = label[:, 1:5] # box
v[:, 4] = 1.0 # conf
v[range(len(label)), label[:, 0].long() + 15] = 1.0 # cls
x = torch.cat((x, v), 0)
# If none remain process next image
if not x.shape[0]:
continue
# Compute conf
x[:, 15:] *= x[:, 4:5] # conf = obj_conf * cls_conf
# Box (center x, center y, width, height) to (x1, y1, x2, y2)
box = xywh2xyxy(x[:, :4])
# Detections matrix nx6 (xyxy, conf, landmarks, cls)
if multi_label:
i, j = (x[:, 15:] > conf_thres).nonzero(as_tuple=False).T
x = torch.cat((box[i], x[i, j + 15, None], x[:, 5:15], j[:, None].float()), 1)
else: # best class only
conf, j = x[:, 15:].max(1, keepdim=True)
x = torch.cat((box, conf, x[:, 5:15], j.float()), 1)[conf.view(-1) > conf_thres]
# Filter by class
if classes is not None:
x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
# If none remain process next image
n = x.shape[0] # number of boxes
if not n:
continue
# Batched NMS
c = x[:, 15:16] * (0 if agnostic else max_wh) # classes
boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS
if merge and (1 < n < 3e3): # Merge NMS (boxes merged using weighted mean)
# update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix
weights = iou * scores[None] # box weights
x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes
if redundant:
i = i[iou.sum(1) > 1] # require redundancy
output[xi] = x[i]
if (time.time() - t) > time_limit:
break # time limit exceeded
return output
def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, labels=()):
"""Performs Non-Maximum Suppression (NMS) on inference results
Returns:
detections with shape: nx6 (x1, y1, x2, y2, conf, cls)
"""
nc = prediction.shape[2] - 5 # number of classes
xc = prediction[..., 4] > conf_thres # candidates
# Settings
# (pixels) maximum box width and height
max_wh = 4096
time_limit = 10.0 # seconds to quit after
redundant = True # require redundant detections
multi_label = nc > 1 # multiple labels per box (adds 0.5ms/img)
merge = False # use merge-NMS
t = time.time()
output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0]
for xi, x in enumerate(prediction): # image index, image inference
x = x[xc[xi]] # confidence
# Cat apriori labels if autolabelling
if labels and len(labels[xi]):
label_id = labels[xi]
v = torch.zeros((len(label_id), nc + 5), device=x.device)
v[:, :4] = label_id[:, 1:5] # box
v[:, 4] = 1.0 # conf
v[range(len(label_id)), label_id[:, 0].long() + 5] = 1.0 # cls
x = torch.cat((x, v), 0)
# If none remain process next image
if not x.shape[0]:
continue
# Compute conf
x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf
# Box (center x, center y, width, height) to (x1, y1, x2, y2)
box = xywh2xyxy(x[:, :4])
# Detections matrix nx6 (xyxy, conf, cls)
if multi_label:
i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
else: # best class only
conf, j = x[:, 5:].max(1, keepdim=True)
x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]
# Filter by class
if classes is not None:
x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
# Check shape
n = x.shape[0] # number of boxes
if not n: # no boxes
continue
x = x[x[:, 4].argsort(descending=True)] # sort by confidence
# Batched NMS
c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS
if merge and (1 < n < 3e3): # Merge NMS (boxes merged using weighted mean)
# update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix
weights = iou * scores[None] # box weights
x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes
if redundant:
i = i[iou.sum(1) > 1] # require redundancy
output[xi] = x[i]
if (time.time() - t) > time_limit:
print(f"WARNING: NMS time limit {time_limit}s exceeded")
break # time limit exceeded
return output
def scale_coords_landmarks(img1_shape, coords, img0_shape, ratio_pad=None):
# Rescale coords (xyxy) from img1_shape to img0_shape
if ratio_pad is None: # calculate from img0_shape
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding
else:
gain = ratio_pad[0][0]
pad = ratio_pad[1]
coords[:, [0, 2, 4, 6, 8]] -= pad[0] # x padding
coords[:, [1, 3, 5, 7, 9]] -= pad[1] # y padding
coords[:, :10] /= gain
coords[:, 0].clamp_(0, img0_shape[1]) # x1
coords[:, 1].clamp_(0, img0_shape[0]) # y1
coords[:, 2].clamp_(0, img0_shape[1]) # x2
coords[:, 3].clamp_(0, img0_shape[0]) # y2
coords[:, 4].clamp_(0, img0_shape[1]) # x3
coords[:, 5].clamp_(0, img0_shape[0]) # y3
coords[:, 6].clamp_(0, img0_shape[1]) # x4
coords[:, 7].clamp_(0, img0_shape[0]) # y4
coords[:, 8].clamp_(0, img0_shape[1]) # x5
coords[:, 9].clamp_(0, img0_shape[0]) # y5
return coords
import torch
from torch import nn
def fuse_conv_and_bn(conv, bn):
# Fuse convolution and batchnorm layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/
fusedconv = (
nn.Conv2d(
conv.in_channels,
conv.out_channels,
kernel_size=conv.kernel_size,
stride=conv.stride,
padding=conv.padding,
groups=conv.groups,
bias=True,
)
.requires_grad_(False)
.to(conv.weight.device)
)
# prepare filters
w_conv = conv.weight.clone().view(conv.out_channels, -1)
w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size()))
# prepare spatial bias
b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device) if conv.bias is None else conv.bias
b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn)
return fusedconv
def copy_attr(a, b, include=(), exclude=()):
# Copy attributes from b to a, options to only include [...] and to exclude [...]
for k, v in b.__dict__.items():
if (include and k not in include) or k.startswith("_") or k in exclude:
continue
setattr(a, k, v)
import torch
from facelib.utils import load_file_from_url
from .bisenet import BiSeNet
from .parsenet import ParseNet
def init_parsing_model(model_name='bisenet', half=False, device='cuda'):
if model_name == 'bisenet':
model = BiSeNet(num_class=19)
model_url = 'https://github.com/sczhou/CodeFormer/releases/download/v0.1.0/parsing_bisenet.pth'
elif model_name == 'parsenet':
model = ParseNet(in_size=512, out_size=512, parsing_ch=19)
model_url = 'https://github.com/sczhou/CodeFormer/releases/download/v0.1.0/parsing_parsenet.pth'
else:
raise NotImplementedError(f'{model_name} is not implemented.')
model_path = load_file_from_url(url=model_url, model_dir='weights/facelib', progress=True, file_name=None)
load_net = torch.load(model_path, map_location=lambda storage, loc: storage)
model.load_state_dict(load_net, strict=True)
model.eval()
model = model.to(device)
return model
import torch
import torch.nn as nn
import torch.nn.functional as F
from .resnet import ResNet18
class ConvBNReLU(nn.Module):
def __init__(self, in_chan, out_chan, ks=3, stride=1, padding=1):
super(ConvBNReLU, self).__init__()
self.conv = nn.Conv2d(in_chan, out_chan, kernel_size=ks, stride=stride, padding=padding, bias=False)
self.bn = nn.BatchNorm2d(out_chan)
def forward(self, x):
x = self.conv(x)
x = F.relu(self.bn(x))
return x
class BiSeNetOutput(nn.Module):
def __init__(self, in_chan, mid_chan, num_class):
super(BiSeNetOutput, self).__init__()
self.conv = ConvBNReLU(in_chan, mid_chan, ks=3, stride=1, padding=1)
self.conv_out = nn.Conv2d(mid_chan, num_class, kernel_size=1, bias=False)
def forward(self, x):
feat = self.conv(x)
out = self.conv_out(feat)
return out, feat
class AttentionRefinementModule(nn.Module):
def __init__(self, in_chan, out_chan):
super(AttentionRefinementModule, self).__init__()
self.conv = ConvBNReLU(in_chan, out_chan, ks=3, stride=1, padding=1)
self.conv_atten = nn.Conv2d(out_chan, out_chan, kernel_size=1, bias=False)
self.bn_atten = nn.BatchNorm2d(out_chan)
self.sigmoid_atten = nn.Sigmoid()
def forward(self, x):
feat = self.conv(x)
atten = F.avg_pool2d(feat, feat.size()[2:])
atten = self.conv_atten(atten)
atten = self.bn_atten(atten)
atten = self.sigmoid_atten(atten)
out = torch.mul(feat, atten)
return out
class ContextPath(nn.Module):
def __init__(self):
super(ContextPath, self).__init__()
self.resnet = ResNet18()
self.arm16 = AttentionRefinementModule(256, 128)
self.arm32 = AttentionRefinementModule(512, 128)
self.conv_head32 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1)
self.conv_head16 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1)
self.conv_avg = ConvBNReLU(512, 128, ks=1, stride=1, padding=0)
def forward(self, x):
feat8, feat16, feat32 = self.resnet(x)
h8, w8 = feat8.size()[2:]
h16, w16 = feat16.size()[2:]
h32, w32 = feat32.size()[2:]
avg = F.avg_pool2d(feat32, feat32.size()[2:])
avg = self.conv_avg(avg)
avg_up = F.interpolate(avg, (h32, w32), mode='nearest')
feat32_arm = self.arm32(feat32)
feat32_sum = feat32_arm + avg_up
feat32_up = F.interpolate(feat32_sum, (h16, w16), mode='nearest')
feat32_up = self.conv_head32(feat32_up)
feat16_arm = self.arm16(feat16)
feat16_sum = feat16_arm + feat32_up
feat16_up = F.interpolate(feat16_sum, (h8, w8), mode='nearest')
feat16_up = self.conv_head16(feat16_up)
return feat8, feat16_up, feat32_up # x8, x8, x16
class FeatureFusionModule(nn.Module):
def __init__(self, in_chan, out_chan):
super(FeatureFusionModule, self).__init__()
self.convblk = ConvBNReLU(in_chan, out_chan, ks=1, stride=1, padding=0)
self.conv1 = nn.Conv2d(out_chan, out_chan // 4, kernel_size=1, stride=1, padding=0, bias=False)
self.conv2 = nn.Conv2d(out_chan // 4, out_chan, kernel_size=1, stride=1, padding=0, bias=False)
self.relu = nn.ReLU(inplace=True)
self.sigmoid = nn.Sigmoid()
def forward(self, fsp, fcp):
fcat = torch.cat([fsp, fcp], dim=1)
feat = self.convblk(fcat)
atten = F.avg_pool2d(feat, feat.size()[2:])
atten = self.conv1(atten)
atten = self.relu(atten)
atten = self.conv2(atten)
atten = self.sigmoid(atten)
feat_atten = torch.mul(feat, atten)
feat_out = feat_atten + feat
return feat_out
class BiSeNet(nn.Module):
def __init__(self, num_class):
super(BiSeNet, self).__init__()
self.cp = ContextPath()
self.ffm = FeatureFusionModule(256, 256)
self.conv_out = BiSeNetOutput(256, 256, num_class)
self.conv_out16 = BiSeNetOutput(128, 64, num_class)
self.conv_out32 = BiSeNetOutput(128, 64, num_class)
def forward(self, x, return_feat=False):
h, w = x.size()[2:]
feat_res8, feat_cp8, feat_cp16 = self.cp(x) # return res3b1 feature
feat_sp = feat_res8 # replace spatial path feature with res3b1 feature
feat_fuse = self.ffm(feat_sp, feat_cp8)
out, feat = self.conv_out(feat_fuse)
out16, feat16 = self.conv_out16(feat_cp8)
out32, feat32 = self.conv_out32(feat_cp16)
out = F.interpolate(out, (h, w), mode='bilinear', align_corners=True)
out16 = F.interpolate(out16, (h, w), mode='bilinear', align_corners=True)
out32 = F.interpolate(out32, (h, w), mode='bilinear', align_corners=True)
if return_feat:
feat = F.interpolate(feat, (h, w), mode='bilinear', align_corners=True)
feat16 = F.interpolate(feat16, (h, w), mode='bilinear', align_corners=True)
feat32 = F.interpolate(feat32, (h, w), mode='bilinear', align_corners=True)
return out, out16, out32, feat, feat16, feat32
else:
return out, out16, out32
"""Modified from https://github.com/chaofengc/PSFRGAN
"""
import numpy as np
import torch.nn as nn
from torch.nn import functional as F
class NormLayer(nn.Module):
"""Normalization Layers.
Args:
channels: input channels, for batch norm and instance norm.
input_size: input shape without batch size, for layer norm.
"""
def __init__(self, channels, normalize_shape=None, norm_type='bn'):
super(NormLayer, self).__init__()
norm_type = norm_type.lower()
self.norm_type = norm_type
if norm_type == 'bn':
self.norm = nn.BatchNorm2d(channels, affine=True)
elif norm_type == 'in':
self.norm = nn.InstanceNorm2d(channels, affine=False)
elif norm_type == 'gn':
self.norm = nn.GroupNorm(32, channels, affine=True)
elif norm_type == 'pixel':
self.norm = lambda x: F.normalize(x, p=2, dim=1)
elif norm_type == 'layer':
self.norm = nn.LayerNorm(normalize_shape)
elif norm_type == 'none':
self.norm = lambda x: x * 1.0
else:
assert 1 == 0, f'Norm type {norm_type} not support.'
def forward(self, x, ref=None):
if self.norm_type == 'spade':
return self.norm(x, ref)
else:
return self.norm(x)
class ReluLayer(nn.Module):
"""Relu Layer.
Args:
relu type: type of relu layer, candidates are
- ReLU
- LeakyReLU: default relu slope 0.2
- PRelu
- SELU
- none: direct pass
"""
def __init__(self, channels, relu_type='relu'):
super(ReluLayer, self).__init__()
relu_type = relu_type.lower()
if relu_type == 'relu':
self.func = nn.ReLU(True)
elif relu_type == 'leakyrelu':
self.func = nn.LeakyReLU(0.2, inplace=True)
elif relu_type == 'prelu':
self.func = nn.PReLU(channels)
elif relu_type == 'selu':
self.func = nn.SELU(True)
elif relu_type == 'none':
self.func = lambda x: x * 1.0
else:
assert 1 == 0, f'Relu type {relu_type} not support.'
def forward(self, x):
return self.func(x)
class ConvLayer(nn.Module):
def __init__(self,
in_channels,
out_channels,
kernel_size=3,
scale='none',
norm_type='none',
relu_type='none',
use_pad=True,
bias=True):
super(ConvLayer, self).__init__()
self.use_pad = use_pad
self.norm_type = norm_type
if norm_type in ['bn']:
bias = False
stride = 2 if scale == 'down' else 1
self.scale_func = lambda x: x
if scale == 'up':
self.scale_func = lambda x: nn.functional.interpolate(x, scale_factor=2, mode='nearest')
self.reflection_pad = nn.ReflectionPad2d(int(np.ceil((kernel_size - 1.) / 2)))
self.conv2d = nn.Conv2d(in_channels, out_channels, kernel_size, stride, bias=bias)
self.relu = ReluLayer(out_channels, relu_type)
self.norm = NormLayer(out_channels, norm_type=norm_type)
def forward(self, x):
out = self.scale_func(x)
if self.use_pad:
out = self.reflection_pad(out)
out = self.conv2d(out)
out = self.norm(out)
out = self.relu(out)
return out
class ResidualBlock(nn.Module):
"""
Residual block recommended in: http://torch.ch/blog/2016/02/04/resnets.html
"""
def __init__(self, c_in, c_out, relu_type='prelu', norm_type='bn', scale='none'):
super(ResidualBlock, self).__init__()
if scale == 'none' and c_in == c_out:
self.shortcut_func = lambda x: x
else:
self.shortcut_func = ConvLayer(c_in, c_out, 3, scale)
scale_config_dict = {'down': ['none', 'down'], 'up': ['up', 'none'], 'none': ['none', 'none']}
scale_conf = scale_config_dict[scale]
self.conv1 = ConvLayer(c_in, c_out, 3, scale_conf[0], norm_type=norm_type, relu_type=relu_type)
self.conv2 = ConvLayer(c_out, c_out, 3, scale_conf[1], norm_type=norm_type, relu_type='none')
def forward(self, x):
identity = self.shortcut_func(x)
res = self.conv1(x)
res = self.conv2(res)
return identity + res
class ParseNet(nn.Module):
def __init__(self,
in_size=128,
out_size=128,
min_feat_size=32,
base_ch=64,
parsing_ch=19,
res_depth=10,
relu_type='LeakyReLU',
norm_type='bn',
ch_range=[32, 256]):
super().__init__()
self.res_depth = res_depth
act_args = {'norm_type': norm_type, 'relu_type': relu_type}
min_ch, max_ch = ch_range
ch_clip = lambda x: max(min_ch, min(x, max_ch)) # noqa: E731
min_feat_size = min(in_size, min_feat_size)
down_steps = int(np.log2(in_size // min_feat_size))
up_steps = int(np.log2(out_size // min_feat_size))
# =============== define encoder-body-decoder ====================
self.encoder = []
self.encoder.append(ConvLayer(3, base_ch, 3, 1))
head_ch = base_ch
for i in range(down_steps):
cin, cout = ch_clip(head_ch), ch_clip(head_ch * 2)
self.encoder.append(ResidualBlock(cin, cout, scale='down', **act_args))
head_ch = head_ch * 2
self.body = []
for i in range(res_depth):
self.body.append(ResidualBlock(ch_clip(head_ch), ch_clip(head_ch), **act_args))
self.decoder = []
for i in range(up_steps):
cin, cout = ch_clip(head_ch), ch_clip(head_ch // 2)
self.decoder.append(ResidualBlock(cin, cout, scale='up', **act_args))
head_ch = head_ch // 2
self.encoder = nn.Sequential(*self.encoder)
self.body = nn.Sequential(*self.body)
self.decoder = nn.Sequential(*self.decoder)
self.out_img_conv = ConvLayer(ch_clip(head_ch), 3)
self.out_mask_conv = ConvLayer(ch_clip(head_ch), parsing_ch)
def forward(self, x):
feat = self.encoder(x)
x = feat + self.body(feat)
x = self.decoder(x)
out_img = self.out_img_conv(x)
out_mask = self.out_mask_conv(x)
return out_mask, out_img
import torch.nn as nn
import torch.nn.functional as F
def conv3x3(in_planes, out_planes, stride=1):
"""3x3 convolution with padding"""
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
class BasicBlock(nn.Module):
def __init__(self, in_chan, out_chan, stride=1):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(in_chan, out_chan, stride)
self.bn1 = nn.BatchNorm2d(out_chan)
self.conv2 = conv3x3(out_chan, out_chan)
self.bn2 = nn.BatchNorm2d(out_chan)
self.relu = nn.ReLU(inplace=True)
self.downsample = None
if in_chan != out_chan or stride != 1:
self.downsample = nn.Sequential(
nn.Conv2d(in_chan, out_chan, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(out_chan),
)
def forward(self, x):
residual = self.conv1(x)
residual = F.relu(self.bn1(residual))
residual = self.conv2(residual)
residual = self.bn2(residual)
shortcut = x
if self.downsample is not None:
shortcut = self.downsample(x)
out = shortcut + residual
out = self.relu(out)
return out
def create_layer_basic(in_chan, out_chan, bnum, stride=1):
layers = [BasicBlock(in_chan, out_chan, stride=stride)]
for i in range(bnum - 1):
layers.append(BasicBlock(out_chan, out_chan, stride=1))
return nn.Sequential(*layers)
class ResNet18(nn.Module):
def __init__(self):
super(ResNet18, self).__init__()
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = create_layer_basic(64, 64, bnum=2, stride=1)
self.layer2 = create_layer_basic(64, 128, bnum=2, stride=2)
self.layer3 = create_layer_basic(128, 256, bnum=2, stride=2)
self.layer4 = create_layer_basic(256, 512, bnum=2, stride=2)
def forward(self, x):
x = self.conv1(x)
x = F.relu(self.bn1(x))
x = self.maxpool(x)
x = self.layer1(x)
feat8 = self.layer2(x) # 1/8
feat16 = self.layer3(feat8) # 1/16
feat32 = self.layer4(feat16) # 1/32
return feat8, feat16, feat32
from .face_utils import align_crop_face_landmarks, compute_increased_bbox, get_valid_bboxes, paste_face_back
from .misc import img2tensor, load_file_from_url, download_pretrained_models, scandir
__all__ = [
'align_crop_face_landmarks', 'compute_increased_bbox', 'get_valid_bboxes', 'load_file_from_url',
'download_pretrained_models', 'paste_face_back', 'img2tensor', 'scandir'
]
import cv2
import numpy as np
import os
import torch
from torchvision.transforms.functional import normalize
from facelib.detection import init_detection_model
from facelib.parsing import init_parsing_model
from facelib.utils.misc import img2tensor, imwrite, is_gray, bgr2gray, adain_npy
from basicsr.utils.download_util import load_file_from_url
from basicsr.utils.misc import get_device
dlib_model_url = {
'face_detector': 'https://github.com/sczhou/CodeFormer/releases/download/v0.1.0/mmod_human_face_detector-4cb19393.dat',
'shape_predictor_5': 'https://github.com/sczhou/CodeFormer/releases/download/v0.1.0/shape_predictor_5_face_landmarks-c4b1e980.dat'
}
def get_largest_face(det_faces, h, w):
def get_location(val, length):
if val < 0:
return 0
elif val > length:
return length
else:
return val
face_areas = []
for det_face in det_faces:
left = get_location(det_face[0], w)
right = get_location(det_face[2], w)
top = get_location(det_face[1], h)
bottom = get_location(det_face[3], h)
face_area = (right - left) * (bottom - top)
face_areas.append(face_area)
largest_idx = face_areas.index(max(face_areas))
return det_faces[largest_idx], largest_idx
def get_center_face(det_faces, h=0, w=0, center=None):
if center is not None:
center = np.array(center)
else:
center = np.array([w / 2, h / 2])
center_dist = []
for det_face in det_faces:
face_center = np.array([(det_face[0] + det_face[2]) / 2, (det_face[1] + det_face[3]) / 2])
dist = np.linalg.norm(face_center - center)
center_dist.append(dist)
center_idx = center_dist.index(min(center_dist))
return det_faces[center_idx], center_idx
class FaceRestoreHelper(object):
"""Helper for the face restoration pipeline (base class)."""
def __init__(self,
upscale_factor,
face_size=512,
crop_ratio=(1, 1),
det_model='retinaface_resnet50',
save_ext='png',
template_3points=False,
pad_blur=False,
use_parse=False,
device=None):
self.template_3points = template_3points # improve robustness
self.upscale_factor = int(upscale_factor)
# the cropped face ratio based on the square face
self.crop_ratio = crop_ratio # (h, w)
assert (self.crop_ratio[0] >= 1 and self.crop_ratio[1] >= 1), 'crop ration only supports >=1'
self.face_size = (int(face_size * self.crop_ratio[1]), int(face_size * self.crop_ratio[0]))
self.det_model = det_model
if self.det_model == 'dlib':
# standard 5 landmarks for FFHQ faces with 1024 x 1024
self.face_template = np.array([[686.77227723, 488.62376238], [586.77227723, 493.59405941],
[337.91089109, 488.38613861], [437.95049505, 493.51485149],
[513.58415842, 678.5049505]])
self.face_template = self.face_template / (1024 // face_size)
elif self.template_3points:
self.face_template = np.array([[192, 240], [319, 240], [257, 371]])
else:
# standard 5 landmarks for FFHQ faces with 512 x 512
# facexlib
self.face_template = np.array([[192.98138, 239.94708], [318.90277, 240.1936], [256.63416, 314.01935],
[201.26117, 371.41043], [313.08905, 371.15118]])
# dlib: left_eye: 36:41 right_eye: 42:47 nose: 30,32,33,34 left mouth corner: 48 right mouth corner: 54
# self.face_template = np.array([[193.65928, 242.98541], [318.32558, 243.06108], [255.67984, 328.82894],
# [198.22603, 372.82502], [313.91018, 372.75659]])
self.face_template = self.face_template * (face_size / 512.0)
if self.crop_ratio[0] > 1:
self.face_template[:, 1] += face_size * (self.crop_ratio[0] - 1) / 2
if self.crop_ratio[1] > 1:
self.face_template[:, 0] += face_size * (self.crop_ratio[1] - 1) / 2
self.save_ext = save_ext
self.pad_blur = pad_blur
if self.pad_blur is True:
self.template_3points = False
self.all_landmarks_5 = []
self.det_faces = []
self.affine_matrices = []
self.inverse_affine_matrices = []
self.cropped_faces = []
self.restored_faces = []
self.pad_input_imgs = []
if device is None:
# self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
self.device = get_device()
else:
self.device = device
# init face detection model
if self.det_model == 'dlib':
self.face_detector, self.shape_predictor_5 = self.init_dlib(dlib_model_url['face_detector'], dlib_model_url['shape_predictor_5'])
else:
self.face_detector = init_detection_model(det_model, half=False, device=self.device)
# init face parsing model
self.use_parse = use_parse
self.face_parse = init_parsing_model(model_name='parsenet', device=self.device)
def set_upscale_factor(self, upscale_factor):
self.upscale_factor = upscale_factor
def read_image(self, img):
"""img can be image path or cv2 loaded image."""
# self.input_img is Numpy array, (h, w, c), BGR, uint8, [0, 255]
if isinstance(img, str):
img = cv2.imread(img)
if np.max(img) > 256: # 16-bit image
img = img / 65535 * 255
if len(img.shape) == 2: # gray image
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
elif img.shape[2] == 4: # BGRA image with alpha channel
img = img[:, :, 0:3]
self.input_img = img
self.is_gray = is_gray(img, threshold=10)
if self.is_gray:
print('Grayscale input: True')
if min(self.input_img.shape[:2])<512:
f = 512.0/min(self.input_img.shape[:2])
self.input_img = cv2.resize(self.input_img, (0,0), fx=f, fy=f, interpolation=cv2.INTER_LINEAR)
def init_dlib(self, detection_path, landmark5_path):
"""Initialize the dlib detectors and predictors."""
try:
import dlib
except ImportError:
print('Please install dlib by running:' 'conda install -c conda-forge dlib')
detection_path = load_file_from_url(url=detection_path, model_dir='weights/dlib', progress=True, file_name=None)
landmark5_path = load_file_from_url(url=landmark5_path, model_dir='weights/dlib', progress=True, file_name=None)
face_detector = dlib.cnn_face_detection_model_v1(detection_path)
shape_predictor_5 = dlib.shape_predictor(landmark5_path)
return face_detector, shape_predictor_5
def get_face_landmarks_5_dlib(self,
only_keep_largest=False,
scale=1):
det_faces = self.face_detector(self.input_img, scale)
if len(det_faces) == 0:
print('No face detected. Try to increase upsample_num_times.')
return 0
else:
if only_keep_largest:
print('Detect several faces and only keep the largest.')
face_areas = []
for i in range(len(det_faces)):
face_area = (det_faces[i].rect.right() - det_faces[i].rect.left()) * (
det_faces[i].rect.bottom() - det_faces[i].rect.top())
face_areas.append(face_area)
largest_idx = face_areas.index(max(face_areas))
self.det_faces = [det_faces[largest_idx]]
else:
self.det_faces = det_faces
if len(self.det_faces) == 0:
return 0
for face in self.det_faces:
shape = self.shape_predictor_5(self.input_img, face.rect)
landmark = np.array([[part.x, part.y] for part in shape.parts()])
self.all_landmarks_5.append(landmark)
return len(self.all_landmarks_5)
def get_face_landmarks_5(self,
only_keep_largest=False,
only_center_face=False,
resize=None,
blur_ratio=0.01,
eye_dist_threshold=None):
if self.det_model == 'dlib':
return self.get_face_landmarks_5_dlib(only_keep_largest)
if resize is None:
scale = 1
input_img = self.input_img
else:
h, w = self.input_img.shape[0:2]
scale = resize / min(h, w)
scale = max(1, scale) # always scale up
h, w = int(h * scale), int(w * scale)
interp = cv2.INTER_AREA if scale < 1 else cv2.INTER_LINEAR
input_img = cv2.resize(self.input_img, (w, h), interpolation=interp)
with torch.no_grad():
bboxes = self.face_detector.detect_faces(input_img)
if bboxes is None or bboxes.shape[0] == 0:
return 0
else:
bboxes = bboxes / scale
for bbox in bboxes:
# remove faces with too small eye distance: side faces or too small faces
eye_dist = np.linalg.norm([bbox[6] - bbox[8], bbox[7] - bbox[9]])
if eye_dist_threshold is not None and (eye_dist < eye_dist_threshold):
continue
if self.template_3points:
landmark = np.array([[bbox[i], bbox[i + 1]] for i in range(5, 11, 2)])
else:
landmark = np.array([[bbox[i], bbox[i + 1]] for i in range(5, 15, 2)])
self.all_landmarks_5.append(landmark)
self.det_faces.append(bbox[0:5])
if len(self.det_faces) == 0:
return 0
if only_keep_largest:
h, w, _ = self.input_img.shape
self.det_faces, largest_idx = get_largest_face(self.det_faces, h, w)
self.all_landmarks_5 = [self.all_landmarks_5[largest_idx]]
elif only_center_face:
h, w, _ = self.input_img.shape
self.det_faces, center_idx = get_center_face(self.det_faces, h, w)
self.all_landmarks_5 = [self.all_landmarks_5[center_idx]]
# pad blurry images
if self.pad_blur:
self.pad_input_imgs = []
for landmarks in self.all_landmarks_5:
# get landmarks
eye_left = landmarks[0, :]
eye_right = landmarks[1, :]
eye_avg = (eye_left + eye_right) * 0.5
mouth_avg = (landmarks[3, :] + landmarks[4, :]) * 0.5
eye_to_eye = eye_right - eye_left
eye_to_mouth = mouth_avg - eye_avg
# Get the oriented crop rectangle
# x: half width of the oriented crop rectangle
x = eye_to_eye - np.flipud(eye_to_mouth) * [-1, 1]
# - np.flipud(eye_to_mouth) * [-1, 1]: rotate 90 clockwise
# norm with the hypotenuse: get the direction
x /= np.hypot(*x) # get the hypotenuse of a right triangle
rect_scale = 1.5
x *= max(np.hypot(*eye_to_eye) * 2.0 * rect_scale, np.hypot(*eye_to_mouth) * 1.8 * rect_scale)
# y: half height of the oriented crop rectangle
y = np.flipud(x) * [-1, 1]
# c: center
c = eye_avg + eye_to_mouth * 0.1
# quad: (left_top, left_bottom, right_bottom, right_top)
quad = np.stack([c - x - y, c - x + y, c + x + y, c + x - y])
# qsize: side length of the square
qsize = np.hypot(*x) * 2
border = max(int(np.rint(qsize * 0.1)), 3)
# get pad
# pad: (width_left, height_top, width_right, height_bottom)
pad = (int(np.floor(min(quad[:, 0]))), int(np.floor(min(quad[:, 1]))), int(np.ceil(max(quad[:, 0]))),
int(np.ceil(max(quad[:, 1]))))
pad = [
max(-pad[0] + border, 1),
max(-pad[1] + border, 1),
max(pad[2] - self.input_img.shape[0] + border, 1),
max(pad[3] - self.input_img.shape[1] + border, 1)
]
if max(pad) > 1:
# pad image
pad_img = np.pad(self.input_img, ((pad[1], pad[3]), (pad[0], pad[2]), (0, 0)), 'reflect')
# modify landmark coords
landmarks[:, 0] += pad[0]
landmarks[:, 1] += pad[1]
# blur pad images
h, w, _ = pad_img.shape
y, x, _ = np.ogrid[:h, :w, :1]
mask = np.maximum(1.0 - np.minimum(np.float32(x) / pad[0],
np.float32(w - 1 - x) / pad[2]),
1.0 - np.minimum(np.float32(y) / pad[1],
np.float32(h - 1 - y) / pad[3]))
blur = int(qsize * blur_ratio)
if blur % 2 == 0:
blur += 1
blur_img = cv2.boxFilter(pad_img, 0, ksize=(blur, blur))
# blur_img = cv2.GaussianBlur(pad_img, (blur, blur), 0)
pad_img = pad_img.astype('float32')
pad_img += (blur_img - pad_img) * np.clip(mask * 3.0 + 1.0, 0.0, 1.0)
pad_img += (np.median(pad_img, axis=(0, 1)) - pad_img) * np.clip(mask, 0.0, 1.0)
pad_img = np.clip(pad_img, 0, 255) # float32, [0, 255]
self.pad_input_imgs.append(pad_img)
else:
self.pad_input_imgs.append(np.copy(self.input_img))
return len(self.all_landmarks_5)
def align_warp_face(self, save_cropped_path=None, border_mode='constant'):
"""Align and warp faces with face template.
"""
if self.pad_blur:
assert len(self.pad_input_imgs) == len(
self.all_landmarks_5), f'Mismatched samples: {len(self.pad_input_imgs)} and {len(self.all_landmarks_5)}'
for idx, landmark in enumerate(self.all_landmarks_5):
# use 5 landmarks to get affine matrix
# use cv2.LMEDS method for the equivalence to skimage transform
# ref: https://blog.csdn.net/yichxi/article/details/115827338
affine_matrix = cv2.estimateAffinePartial2D(landmark, self.face_template, method=cv2.LMEDS)[0]
self.affine_matrices.append(affine_matrix)
# warp and crop faces
if border_mode == 'constant':
border_mode = cv2.BORDER_CONSTANT
elif border_mode == 'reflect101':
border_mode = cv2.BORDER_REFLECT101
elif border_mode == 'reflect':
border_mode = cv2.BORDER_REFLECT
if self.pad_blur:
input_img = self.pad_input_imgs[idx]
else:
input_img = self.input_img
cropped_face = cv2.warpAffine(
input_img, affine_matrix, self.face_size, borderMode=border_mode, borderValue=(135, 133, 132)) # gray
self.cropped_faces.append(cropped_face)
# save the cropped face
if save_cropped_path is not None:
path = os.path.splitext(save_cropped_path)[0]
save_path = f'{path}_{idx:02d}.{self.save_ext}'
imwrite(cropped_face, save_path)
def get_inverse_affine(self, save_inverse_affine_path=None):
"""Get inverse affine matrix."""
for idx, affine_matrix in enumerate(self.affine_matrices):
inverse_affine = cv2.invertAffineTransform(affine_matrix)
inverse_affine *= self.upscale_factor
self.inverse_affine_matrices.append(inverse_affine)
# save inverse affine matrices
if save_inverse_affine_path is not None:
path, _ = os.path.splitext(save_inverse_affine_path)
save_path = f'{path}_{idx:02d}.pth'
torch.save(inverse_affine, save_path)
def add_restored_face(self, restored_face, input_face=None):
if self.is_gray:
restored_face = bgr2gray(restored_face) # convert img into grayscale
if input_face is not None:
restored_face = adain_npy(restored_face, input_face) # transfer the color
self.restored_faces.append(restored_face)
def paste_faces_to_input_image(self, save_path=None, upsample_img=None, draw_box=False, face_upsampler=None):
h, w, _ = self.input_img.shape
h_up, w_up = int(h * self.upscale_factor), int(w * self.upscale_factor)
if upsample_img is None:
# simply resize the background
# upsample_img = cv2.resize(self.input_img, (w_up, h_up), interpolation=cv2.INTER_LANCZOS4)
upsample_img = cv2.resize(self.input_img, (w_up, h_up), interpolation=cv2.INTER_LINEAR)
else:
upsample_img = cv2.resize(upsample_img, (w_up, h_up), interpolation=cv2.INTER_LANCZOS4)
assert len(self.restored_faces) == len(
self.inverse_affine_matrices), ('length of restored_faces and affine_matrices are different.')
inv_mask_borders = []
for restored_face, inverse_affine in zip(self.restored_faces, self.inverse_affine_matrices):
if face_upsampler is not None:
restored_face = face_upsampler.enhance(restored_face, outscale=self.upscale_factor)[0]
inverse_affine /= self.upscale_factor
inverse_affine[:, 2] *= self.upscale_factor
face_size = (self.face_size[0]*self.upscale_factor, self.face_size[1]*self.upscale_factor)
else:
# Add an offset to inverse affine matrix, for more precise back alignment
if self.upscale_factor > 1:
extra_offset = 0.5 * self.upscale_factor
else:
extra_offset = 0
inverse_affine[:, 2] += extra_offset
face_size = self.face_size
inv_restored = cv2.warpAffine(restored_face, inverse_affine, (w_up, h_up))
# if draw_box or not self.use_parse: # use square parse maps
# mask = np.ones(face_size, dtype=np.float32)
# inv_mask = cv2.warpAffine(mask, inverse_affine, (w_up, h_up))
# # remove the black borders
# inv_mask_erosion = cv2.erode(
# inv_mask, np.ones((int(2 * self.upscale_factor), int(2 * self.upscale_factor)), np.uint8))
# pasted_face = inv_mask_erosion[:, :, None] * inv_restored
# total_face_area = np.sum(inv_mask_erosion) # // 3
# # add border
# if draw_box:
# h, w = face_size
# mask_border = np.ones((h, w, 3), dtype=np.float32)
# border = int(1400/np.sqrt(total_face_area))
# mask_border[border:h-border, border:w-border,:] = 0
# inv_mask_border = cv2.warpAffine(mask_border, inverse_affine, (w_up, h_up))
# inv_mask_borders.append(inv_mask_border)
# if not self.use_parse:
# # compute the fusion edge based on the area of face
# w_edge = int(total_face_area**0.5) // 20
# erosion_radius = w_edge * 2
# inv_mask_center = cv2.erode(inv_mask_erosion, np.ones((erosion_radius, erosion_radius), np.uint8))
# blur_size = w_edge * 2
# inv_soft_mask = cv2.GaussianBlur(inv_mask_center, (blur_size + 1, blur_size + 1), 0)
# if len(upsample_img.shape) == 2: # upsample_img is gray image
# upsample_img = upsample_img[:, :, None]
# inv_soft_mask = inv_soft_mask[:, :, None]
# always use square mask
mask = np.ones(face_size, dtype=np.float32)
inv_mask = cv2.warpAffine(mask, inverse_affine, (w_up, h_up))
# remove the black borders
inv_mask_erosion = cv2.erode(
inv_mask, np.ones((int(2 * self.upscale_factor), int(2 * self.upscale_factor)), np.uint8))
pasted_face = inv_mask_erosion[:, :, None] * inv_restored
total_face_area = np.sum(inv_mask_erosion) # // 3
# add border
if draw_box:
h, w = face_size
mask_border = np.ones((h, w, 3), dtype=np.float32)
border = int(1400/np.sqrt(total_face_area))
mask_border[border:h-border, border:w-border,:] = 0
inv_mask_border = cv2.warpAffine(mask_border, inverse_affine, (w_up, h_up))
inv_mask_borders.append(inv_mask_border)
# compute the fusion edge based on the area of face
w_edge = int(total_face_area**0.5) // 20
erosion_radius = w_edge * 2
inv_mask_center = cv2.erode(inv_mask_erosion, np.ones((erosion_radius, erosion_radius), np.uint8))
blur_size = w_edge * 2
inv_soft_mask = cv2.GaussianBlur(inv_mask_center, (blur_size + 1, blur_size + 1), 0)
if len(upsample_img.shape) == 2: # upsample_img is gray image
upsample_img = upsample_img[:, :, None]
inv_soft_mask = inv_soft_mask[:, :, None]
# parse mask
if self.use_parse:
# inference
face_input = cv2.resize(restored_face, (512, 512), interpolation=cv2.INTER_LINEAR)
face_input = img2tensor(face_input.astype('float32') / 255., bgr2rgb=True, float32=True)
normalize(face_input, (0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=True)
face_input = torch.unsqueeze(face_input, 0).to(self.device)
with torch.no_grad():
out = self.face_parse(face_input)[0]
out = out.argmax(dim=1).squeeze().cpu().numpy()
parse_mask = np.zeros(out.shape)
MASK_COLORMAP = [0, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 0, 255, 0, 0, 0]
for idx, color in enumerate(MASK_COLORMAP):
parse_mask[out == idx] = color
# blur the mask
parse_mask = cv2.GaussianBlur(parse_mask, (101, 101), 11)
parse_mask = cv2.GaussianBlur(parse_mask, (101, 101), 11)
# remove the black borders
thres = 10
parse_mask[:thres, :] = 0
parse_mask[-thres:, :] = 0
parse_mask[:, :thres] = 0
parse_mask[:, -thres:] = 0
parse_mask = parse_mask / 255.
parse_mask = cv2.resize(parse_mask, face_size)
parse_mask = cv2.warpAffine(parse_mask, inverse_affine, (w_up, h_up), flags=3)
inv_soft_parse_mask = parse_mask[:, :, None]
# pasted_face = inv_restored
fuse_mask = (inv_soft_parse_mask<inv_soft_mask).astype('int')
inv_soft_mask = inv_soft_parse_mask*fuse_mask + inv_soft_mask*(1-fuse_mask)
if len(upsample_img.shape) == 3 and upsample_img.shape[2] == 4: # alpha channel
alpha = upsample_img[:, :, 3:]
upsample_img = inv_soft_mask * pasted_face + (1 - inv_soft_mask) * upsample_img[:, :, 0:3]
upsample_img = np.concatenate((upsample_img, alpha), axis=2)
else:
upsample_img = inv_soft_mask * pasted_face + (1 - inv_soft_mask) * upsample_img
if np.max(upsample_img) > 256: # 16-bit image
upsample_img = upsample_img.astype(np.uint16)
else:
upsample_img = upsample_img.astype(np.uint8)
# draw bounding box
if draw_box:
# upsample_input_img = cv2.resize(input_img, (w_up, h_up))
img_color = np.ones([*upsample_img.shape], dtype=np.float32)
img_color[:,:,0] = 0
img_color[:,:,1] = 255
img_color[:,:,2] = 0
for inv_mask_border in inv_mask_borders:
upsample_img = inv_mask_border * img_color + (1 - inv_mask_border) * upsample_img
# upsample_input_img = inv_mask_border * img_color + (1 - inv_mask_border) * upsample_input_img
if save_path is not None:
path = os.path.splitext(save_path)[0]
save_path = f'{path}.{self.save_ext}'
imwrite(upsample_img, save_path)
return upsample_img
def clean_all(self):
self.all_landmarks_5 = []
self.restored_faces = []
self.affine_matrices = []
self.cropped_faces = []
self.inverse_affine_matrices = []
self.det_faces = []
self.pad_input_imgs = []
\ No newline at end of file
import cv2
import numpy as np
import torch
def compute_increased_bbox(bbox, increase_area, preserve_aspect=True):
left, top, right, bot = bbox
width = right - left
height = bot - top
if preserve_aspect:
width_increase = max(increase_area, ((1 + 2 * increase_area) * height - width) / (2 * width))
height_increase = max(increase_area, ((1 + 2 * increase_area) * width - height) / (2 * height))
else:
width_increase = height_increase = increase_area
left = int(left - width_increase * width)
top = int(top - height_increase * height)
right = int(right + width_increase * width)
bot = int(bot + height_increase * height)
return (left, top, right, bot)
def get_valid_bboxes(bboxes, h, w):
left = max(bboxes[0], 0)
top = max(bboxes[1], 0)
right = min(bboxes[2], w)
bottom = min(bboxes[3], h)
return (left, top, right, bottom)
def align_crop_face_landmarks(img,
landmarks,
output_size,
transform_size=None,
enable_padding=True,
return_inverse_affine=False,
shrink_ratio=(1, 1)):
"""Align and crop face with landmarks.
The output_size and transform_size are based on width. The height is
adjusted based on shrink_ratio_h/shring_ration_w.
Modified from:
https://github.com/NVlabs/ffhq-dataset/blob/master/download_ffhq.py
Args:
img (Numpy array): Input image.
landmarks (Numpy array): 5 or 68 or 98 landmarks.
output_size (int): Output face size.
transform_size (ing): Transform size. Usually the four time of
output_size.
enable_padding (float): Default: True.
shrink_ratio (float | tuple[float] | list[float]): Shring the whole
face for height and width (crop larger area). Default: (1, 1).
Returns:
(Numpy array): Cropped face.
"""
lm_type = 'retinaface_5' # Options: dlib_5, retinaface_5
if isinstance(shrink_ratio, (float, int)):
shrink_ratio = (shrink_ratio, shrink_ratio)
if transform_size is None:
transform_size = output_size * 4
# Parse landmarks
lm = np.array(landmarks)
if lm.shape[0] == 5 and lm_type == 'retinaface_5':
eye_left = lm[0]
eye_right = lm[1]
mouth_avg = (lm[3] + lm[4]) * 0.5
elif lm.shape[0] == 5 and lm_type == 'dlib_5':
lm_eye_left = lm[2:4]
lm_eye_right = lm[0:2]
eye_left = np.mean(lm_eye_left, axis=0)
eye_right = np.mean(lm_eye_right, axis=0)
mouth_avg = lm[4]
elif lm.shape[0] == 68:
lm_eye_left = lm[36:42]
lm_eye_right = lm[42:48]
eye_left = np.mean(lm_eye_left, axis=0)
eye_right = np.mean(lm_eye_right, axis=0)
mouth_avg = (lm[48] + lm[54]) * 0.5
elif lm.shape[0] == 98:
lm_eye_left = lm[60:68]
lm_eye_right = lm[68:76]
eye_left = np.mean(lm_eye_left, axis=0)
eye_right = np.mean(lm_eye_right, axis=0)
mouth_avg = (lm[76] + lm[82]) * 0.5
eye_avg = (eye_left + eye_right) * 0.5
eye_to_eye = eye_right - eye_left
eye_to_mouth = mouth_avg - eye_avg
# Get the oriented crop rectangle
# x: half width of the oriented crop rectangle
x = eye_to_eye - np.flipud(eye_to_mouth) * [-1, 1]
# - np.flipud(eye_to_mouth) * [-1, 1]: rotate 90 clockwise
# norm with the hypotenuse: get the direction
x /= np.hypot(*x) # get the hypotenuse of a right triangle
rect_scale = 1 # TODO: you can edit it to get larger rect
x *= max(np.hypot(*eye_to_eye) * 2.0 * rect_scale, np.hypot(*eye_to_mouth) * 1.8 * rect_scale)
# y: half height of the oriented crop rectangle
y = np.flipud(x) * [-1, 1]
x *= shrink_ratio[1] # width
y *= shrink_ratio[0] # height
# c: center
c = eye_avg + eye_to_mouth * 0.1
# quad: (left_top, left_bottom, right_bottom, right_top)
quad = np.stack([c - x - y, c - x + y, c + x + y, c + x - y])
# qsize: side length of the square
qsize = np.hypot(*x) * 2
quad_ori = np.copy(quad)
# Shrink, for large face
# TODO: do we really need shrink
shrink = int(np.floor(qsize / output_size * 0.5))
if shrink > 1:
h, w = img.shape[0:2]
rsize = (int(np.rint(float(w) / shrink)), int(np.rint(float(h) / shrink)))
img = cv2.resize(img, rsize, interpolation=cv2.INTER_AREA)
quad /= shrink
qsize /= shrink
# Crop
h, w = img.shape[0:2]
border = max(int(np.rint(qsize * 0.1)), 3)
crop = (int(np.floor(min(quad[:, 0]))), int(np.floor(min(quad[:, 1]))), int(np.ceil(max(quad[:, 0]))),
int(np.ceil(max(quad[:, 1]))))
crop = (max(crop[0] - border, 0), max(crop[1] - border, 0), min(crop[2] + border, w), min(crop[3] + border, h))
if crop[2] - crop[0] < w or crop[3] - crop[1] < h:
img = img[crop[1]:crop[3], crop[0]:crop[2], :]
quad -= crop[0:2]
# Pad
# pad: (width_left, height_top, width_right, height_bottom)
h, w = img.shape[0:2]
pad = (int(np.floor(min(quad[:, 0]))), int(np.floor(min(quad[:, 1]))), int(np.ceil(max(quad[:, 0]))),
int(np.ceil(max(quad[:, 1]))))
pad = (max(-pad[0] + border, 0), max(-pad[1] + border, 0), max(pad[2] - w + border, 0), max(pad[3] - h + border, 0))
if enable_padding and max(pad) > border - 4:
pad = np.maximum(pad, int(np.rint(qsize * 0.3)))
img = np.pad(img, ((pad[1], pad[3]), (pad[0], pad[2]), (0, 0)), 'reflect')
h, w = img.shape[0:2]
y, x, _ = np.ogrid[:h, :w, :1]
mask = np.maximum(1.0 - np.minimum(np.float32(x) / pad[0],
np.float32(w - 1 - x) / pad[2]),
1.0 - np.minimum(np.float32(y) / pad[1],
np.float32(h - 1 - y) / pad[3]))
blur = int(qsize * 0.02)
if blur % 2 == 0:
blur += 1
blur_img = cv2.boxFilter(img, 0, ksize=(blur, blur))
img = img.astype('float32')
img += (blur_img - img) * np.clip(mask * 3.0 + 1.0, 0.0, 1.0)
img += (np.median(img, axis=(0, 1)) - img) * np.clip(mask, 0.0, 1.0)
img = np.clip(img, 0, 255) # float32, [0, 255]
quad += pad[:2]
# Transform use cv2
h_ratio = shrink_ratio[0] / shrink_ratio[1]
dst_h, dst_w = int(transform_size * h_ratio), transform_size
template = np.array([[0, 0], [0, dst_h], [dst_w, dst_h], [dst_w, 0]])
# use cv2.LMEDS method for the equivalence to skimage transform
# ref: https://blog.csdn.net/yichxi/article/details/115827338
affine_matrix = cv2.estimateAffinePartial2D(quad, template, method=cv2.LMEDS)[0]
cropped_face = cv2.warpAffine(
img, affine_matrix, (dst_w, dst_h), borderMode=cv2.BORDER_CONSTANT, borderValue=(135, 133, 132)) # gray
if output_size < transform_size:
cropped_face = cv2.resize(
cropped_face, (output_size, int(output_size * h_ratio)), interpolation=cv2.INTER_LINEAR)
if return_inverse_affine:
dst_h, dst_w = int(output_size * h_ratio), output_size
template = np.array([[0, 0], [0, dst_h], [dst_w, dst_h], [dst_w, 0]])
# use cv2.LMEDS method for the equivalence to skimage transform
# ref: https://blog.csdn.net/yichxi/article/details/115827338
affine_matrix = cv2.estimateAffinePartial2D(
quad_ori, np.array([[0, 0], [0, output_size], [dst_w, dst_h], [dst_w, 0]]), method=cv2.LMEDS)[0]
inverse_affine = cv2.invertAffineTransform(affine_matrix)
else:
inverse_affine = None
return cropped_face, inverse_affine
def paste_face_back(img, face, inverse_affine):
h, w = img.shape[0:2]
face_h, face_w = face.shape[0:2]
inv_restored = cv2.warpAffine(face, inverse_affine, (w, h))
mask = np.ones((face_h, face_w, 3), dtype=np.float32)
inv_mask = cv2.warpAffine(mask, inverse_affine, (w, h))
# remove the black borders
inv_mask_erosion = cv2.erode(inv_mask, np.ones((2, 2), np.uint8))
inv_restored_remove_border = inv_mask_erosion * inv_restored
total_face_area = np.sum(inv_mask_erosion) // 3
# compute the fusion edge based on the area of face
w_edge = int(total_face_area**0.5) // 20
erosion_radius = w_edge * 2
inv_mask_center = cv2.erode(inv_mask_erosion, np.ones((erosion_radius, erosion_radius), np.uint8))
blur_size = w_edge * 2
inv_soft_mask = cv2.GaussianBlur(inv_mask_center, (blur_size + 1, blur_size + 1), 0)
img = inv_soft_mask * inv_restored_remove_border + (1 - inv_soft_mask) * img
# float32, [0, 255]
return img
if __name__ == '__main__':
import os
from facelib.detection import init_detection_model
from facelib.utils.face_restoration_helper import get_largest_face
img_path = '/home/wxt/datasets/ffhq/ffhq_wild/00009.png'
img_name = os.splitext(os.path.basename(img_path))[0]
# initialize model
det_net = init_detection_model('retinaface_resnet50', half=False)
img_ori = cv2.imread(img_path)
h, w = img_ori.shape[0:2]
# if larger than 800, scale it
scale = max(h / 800, w / 800)
if scale > 1:
img = cv2.resize(img_ori, (int(w / scale), int(h / scale)), interpolation=cv2.INTER_LINEAR)
with torch.no_grad():
bboxes = det_net.detect_faces(img, 0.97)
if scale > 1:
bboxes *= scale # the score is incorrect
bboxes = get_largest_face(bboxes, h, w)[0]
landmarks = np.array([[bboxes[i], bboxes[i + 1]] for i in range(5, 15, 2)])
cropped_face, inverse_affine = align_crop_face_landmarks(
img_ori,
landmarks,
output_size=512,
transform_size=None,
enable_padding=True,
return_inverse_affine=True,
shrink_ratio=(1, 1))
cv2.imwrite(f'tmp/{img_name}_cropeed_face.png', cropped_face)
img = paste_face_back(img_ori, cropped_face, inverse_affine)
cv2.imwrite(f'tmp/{img_name}_back.png', img)
import cv2
import os
import os.path as osp
import numpy as np
from PIL import Image
import torch
from torch.hub import download_url_to_file, get_dir
from urllib.parse import urlparse
# from basicsr.utils.download_util import download_file_from_google_drive
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
def download_pretrained_models(file_ids, save_path_root):
import gdown
os.makedirs(save_path_root, exist_ok=True)
for file_name, file_id in file_ids.items():
file_url = 'https://drive.google.com/uc?id='+file_id
save_path = osp.abspath(osp.join(save_path_root, file_name))
if osp.exists(save_path):
user_response = input(f'{file_name} already exist. Do you want to cover it? Y/N\n')
if user_response.lower() == 'y':
print(f'Covering {file_name} to {save_path}')
gdown.download(file_url, save_path, quiet=False)
# download_file_from_google_drive(file_id, save_path)
elif user_response.lower() == 'n':
print(f'Skipping {file_name}')
else:
raise ValueError('Wrong input. Only accepts Y/N.')
else:
print(f'Downloading {file_name} to {save_path}')
gdown.download(file_url, save_path, quiet=False)
# download_file_from_google_drive(file_id, save_path)
def imwrite(img, file_path, params=None, auto_mkdir=True):
"""Write image to file.
Args:
img (ndarray): Image array to be written.
file_path (str): Image file path.
params (None or list): Same as opencv's :func:`imwrite` interface.
auto_mkdir (bool): If the parent folder of `file_path` does not exist,
whether to create it automatically.
Returns:
bool: Successful or not.
"""
if auto_mkdir:
dir_name = os.path.abspath(os.path.dirname(file_path))
os.makedirs(dir_name, exist_ok=True)
return cv2.imwrite(file_path, img, params)
def img2tensor(imgs, bgr2rgb=True, float32=True):
"""Numpy array to tensor.
Args:
imgs (list[ndarray] | ndarray): Input images.
bgr2rgb (bool): Whether to change bgr to rgb.
float32 (bool): Whether to change to float32.
Returns:
list[tensor] | tensor: Tensor images. If returned results only have
one element, just return tensor.
"""
def _totensor(img, bgr2rgb, float32):
if img.shape[2] == 3 and bgr2rgb:
if img.dtype == 'float64':
img = img.astype('float32')
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = torch.from_numpy(img.transpose(2, 0, 1))
if float32:
img = img.float()
return img
if isinstance(imgs, list):
return [_totensor(img, bgr2rgb, float32) for img in imgs]
else:
return _totensor(imgs, bgr2rgb, float32)
def load_file_from_url(url, model_dir=None, progress=True, file_name=None):
"""Ref:https://github.com/1adrianb/face-alignment/blob/master/face_alignment/utils.py
"""
if model_dir is None:
hub_dir = get_dir()
model_dir = os.path.join(hub_dir, 'checkpoints')
os.makedirs(os.path.join(ROOT_DIR, model_dir), exist_ok=True)
parts = urlparse(url)
filename = os.path.basename(parts.path)
if file_name is not None:
filename = file_name
cached_file = os.path.abspath(os.path.join(ROOT_DIR, model_dir, filename))
if not os.path.exists(cached_file):
print(f'Downloading: "{url}" to {cached_file}\n')
download_url_to_file(url, cached_file, hash_prefix=None, progress=progress)
return cached_file
def scandir(dir_path, suffix=None, recursive=False, full_path=False):
"""Scan a directory to find the interested files.
Args:
dir_path (str): Path of the directory.
suffix (str | tuple(str), optional): File suffix that we are
interested in. Default: None.
recursive (bool, optional): If set to True, recursively scan the
directory. Default: False.
full_path (bool, optional): If set to True, include the dir_path.
Default: False.
Returns:
A generator for all the interested files with relative paths.
"""
if (suffix is not None) and not isinstance(suffix, (str, tuple)):
raise TypeError('"suffix" must be a string or tuple of strings')
root = dir_path
def _scandir(dir_path, suffix, recursive):
for entry in os.scandir(dir_path):
if not entry.name.startswith('.') and entry.is_file():
if full_path:
return_path = entry.path
else:
return_path = osp.relpath(entry.path, root)
if suffix is None:
yield return_path
elif return_path.endswith(suffix):
yield return_path
else:
if recursive:
yield from _scandir(entry.path, suffix=suffix, recursive=recursive)
else:
continue
return _scandir(dir_path, suffix=suffix, recursive=recursive)
def is_gray(img, threshold=10):
img = Image.fromarray(img)
if len(img.getbands()) == 1:
return True
img1 = np.asarray(img.getchannel(channel=0), dtype=np.int16)
img2 = np.asarray(img.getchannel(channel=1), dtype=np.int16)
img3 = np.asarray(img.getchannel(channel=2), dtype=np.int16)
diff1 = (img1 - img2).var()
diff2 = (img2 - img3).var()
diff3 = (img3 - img1).var()
diff_sum = (diff1 + diff2 + diff3) / 3.0
if diff_sum <= threshold:
return True
else:
return False
def rgb2gray(img, out_channel=3):
r, g, b = img[:,:,0], img[:,:,1], img[:,:,2]
gray = 0.2989 * r + 0.5870 * g + 0.1140 * b
if out_channel == 3:
gray = gray[:,:,np.newaxis].repeat(3, axis=2)
return gray
def bgr2gray(img, out_channel=3):
b, g, r = img[:,:,0], img[:,:,1], img[:,:,2]
gray = 0.2989 * r + 0.5870 * g + 0.1140 * b
if out_channel == 3:
gray = gray[:,:,np.newaxis].repeat(3, axis=2)
return gray
def calc_mean_std(feat, eps=1e-5):
"""
Args:
feat (numpy): 3D [w h c]s
"""
size = feat.shape
assert len(size) == 3, 'The input feature should be 3D tensor.'
c = size[2]
feat_var = feat.reshape(-1, c).var(axis=0) + eps
feat_std = np.sqrt(feat_var).reshape(1, 1, c)
feat_mean = feat.reshape(-1, c).mean(axis=0).reshape(1, 1, c)
return feat_mean, feat_std
def adain_npy(content_feat, style_feat):
"""Adaptive instance normalization for numpy.
Args:
content_feat (numpy): The input feature.
style_feat (numpy): The reference feature.
"""
size = content_feat.shape
style_mean, style_std = calc_mean_std(style_feat)
content_mean, content_std = calc_mean_std(content_feat)
normalized_feat = (content_feat - np.broadcast_to(content_mean, size)) / np.broadcast_to(content_std, size)
return normalized_feat * np.broadcast_to(style_std, size) + np.broadcast_to(style_mean, size)
\ No newline at end of file
import os
import cv2
import argparse
import glob
import torch
from torchvision.transforms.functional import normalize
from basicsr.utils import imwrite, img2tensor, tensor2img
from basicsr.utils.download_util import load_file_from_url
from basicsr.utils.misc import gpu_is_available, get_device
from facelib.utils.face_restoration_helper import FaceRestoreHelper
from facelib.utils.misc import is_gray
from basicsr.utils.registry import ARCH_REGISTRY
pretrain_model_url = {
'restoration': 'https://github.com/sczhou/CodeFormer/releases/download/v0.1.0/codeformer.pth',
}
def set_realesrgan():
from basicsr.archs.rrdbnet_arch import RRDBNet
from basicsr.utils.realesrgan_utils import RealESRGANer
use_half = False
if torch.cuda.is_available(): # set False in CPU/MPS mode
no_half_gpu_list = ['1650', '1660'] # set False for GPUs that don't support f16
if not True in [gpu in torch.cuda.get_device_name(0) for gpu in no_half_gpu_list]:
use_half = True
model = RRDBNet(
num_in_ch=3,
num_out_ch=3,
num_feat=64,
num_block=23,
num_grow_ch=32,
scale=2,
)
upsampler = RealESRGANer(
scale=2,
model_path="https://github.com/sczhou/CodeFormer/releases/download/v0.1.0/RealESRGAN_x2plus.pth",
model=model,
tile=args.bg_tile,
tile_pad=40,
pre_pad=0,
half=use_half
)
if not gpu_is_available(): # CPU
import warnings
warnings.warn('Running on CPU now! Make sure your PyTorch version matches your CUDA.'
'The unoptimized RealESRGAN is slow on CPU. '
'If you want to disable it, please remove `--bg_upsampler` and `--face_upsample` in command.',
category=RuntimeWarning)
return upsampler
if __name__ == '__main__':
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = get_device()
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--input_path', type=str, default='./inputs/whole_imgs',
help='Input image, video or folder. Default: inputs/whole_imgs')
parser.add_argument('-o', '--output_path', type=str, default=None,
help='Output folder. Default: results/<input_name>_<w>')
parser.add_argument('-w', '--fidelity_weight', type=float, default=0.5,
help='Balance the quality and fidelity. Default: 0.5')
parser.add_argument('-s', '--upscale', type=int, default=2,
help='The final upsampling scale of the image. Default: 2')
parser.add_argument('--has_aligned', action='store_true', help='Input are cropped and aligned faces. Default: False')
parser.add_argument('--only_center_face', action='store_true', help='Only restore the center face. Default: False')
parser.add_argument('--draw_box', action='store_true', help='Draw the bounding box for the detected faces. Default: False')
# large det_model: 'YOLOv5l', 'retinaface_resnet50'
# small det_model: 'YOLOv5n', 'retinaface_mobile0.25'
parser.add_argument('--detection_model', type=str, default='retinaface_resnet50',
help='Face detector. Optional: retinaface_resnet50, retinaface_mobile0.25, YOLOv5l, YOLOv5n, dlib. \
Default: retinaface_resnet50')
parser.add_argument('--bg_upsampler', type=str, default='None', help='Background upsampler. Optional: realesrgan')
parser.add_argument('--face_upsample', action='store_true', help='Face upsampler after enhancement. Default: False')
parser.add_argument('--bg_tile', type=int, default=400, help='Tile size for background sampler. Default: 400')
parser.add_argument('--suffix', type=str, default=None, help='Suffix of the restored faces. Default: None')
parser.add_argument('--save_video_fps', type=float, default=None, help='Frame rate for saving video. Default: None')
args = parser.parse_args()
# ------------------------ input & output ------------------------
w = args.fidelity_weight
input_video = False
if args.input_path.endswith(('jpg', 'jpeg', 'png', 'JPG', 'JPEG', 'PNG')): # input single img path
input_img_list = [args.input_path]
result_root = f'results/test_img_{w}'
elif args.input_path.endswith(('mp4', 'mov', 'avi', 'MP4', 'MOV', 'AVI')): # input video path
from basicsr.utils.video_util import VideoReader, VideoWriter
input_img_list = []
vidreader = VideoReader(args.input_path)
image = vidreader.get_frame()
while image is not None:
input_img_list.append(image)
image = vidreader.get_frame()
audio = vidreader.get_audio()
fps = vidreader.get_fps() if args.save_video_fps is None else args.save_video_fps
video_name = os.path.basename(args.input_path)[:-4]
result_root = f'results/{video_name}_{w}'
input_video = True
vidreader.close()
else: # input img folder
if args.input_path.endswith('/'): # solve when path ends with /
args.input_path = args.input_path[:-1]
# scan all the jpg and png images
input_img_list = sorted(glob.glob(os.path.join(args.input_path, '*.[jpJP][pnPN]*[gG]')))
result_root = f'results/{os.path.basename(args.input_path)}_{w}'
if not args.output_path is None: # set output path
result_root = args.output_path
test_img_num = len(input_img_list)
if test_img_num == 0:
raise FileNotFoundError('No input image/video is found...\n'
'\tNote that --input_path for video should end with .mp4|.mov|.avi')
# ------------------ set up background upsampler ------------------
if args.bg_upsampler == 'realesrgan':
bg_upsampler = set_realesrgan()
else:
bg_upsampler = None
# ------------------ set up face upsampler ------------------
if args.face_upsample:
if bg_upsampler is not None:
face_upsampler = bg_upsampler
else:
face_upsampler = set_realesrgan()
else:
face_upsampler = None
# ------------------ set up CodeFormer restorer -------------------
net = ARCH_REGISTRY.get('CodeFormer')(dim_embd=512, codebook_size=1024, n_head=8, n_layers=9,
connect_list=['32', '64', '128', '256']).to(device)
# ckpt_path = 'experiments/20240106_012817_CodeFormer_stage3/models/net_g_150000.pth'
ckpt_path = load_file_from_url(url=pretrain_model_url['restoration'],
model_dir='weights/CodeFormer', progress=True, file_name=None)
checkpoint = torch.load(ckpt_path)['params_ema']
net.load_state_dict(checkpoint)
net.eval()
# ------------------ set up FaceRestoreHelper -------------------
# large det_model: 'YOLOv5l', 'retinaface_resnet50'
# small det_model: 'YOLOv5n', 'retinaface_mobile0.25'
if not args.has_aligned:
print(f'Face detection model: {args.detection_model}')
if bg_upsampler is not None:
print(f'Background upsampling: True, Face upsampling: {args.face_upsample}')
else:
print(f'Background upsampling: False, Face upsampling: {args.face_upsample}')
face_helper = FaceRestoreHelper(
args.upscale,
face_size=512,
crop_ratio=(1, 1),
det_model = args.detection_model,
save_ext='png',
use_parse=True,
device=device)
# -------------------- start to processing ---------------------
for i, img_path in enumerate(input_img_list):
# clean all the intermediate results to process the next image
face_helper.clean_all()
if isinstance(img_path, str):
img_name = os.path.basename(img_path)
basename, ext = os.path.splitext(img_name)
print(f'[{i+1}/{test_img_num}] Processing: {img_name}')
img = cv2.imread(img_path, cv2.IMREAD_COLOR)
else: # for video processing
basename = str(i).zfill(6)
img_name = f'{video_name}_{basename}' if input_video else basename
print(f'[{i+1}/{test_img_num}] Processing: {img_name}')
img = img_path
if args.has_aligned:
# the input faces are already cropped and aligned
img = cv2.resize(img, (512, 512), interpolation=cv2.INTER_LINEAR)
face_helper.is_gray = is_gray(img, threshold=10)
if face_helper.is_gray:
print('Grayscale input: True')
face_helper.cropped_faces = [img]
else:
face_helper.read_image(img)
# get face landmarks for each face
num_det_faces = face_helper.get_face_landmarks_5(
only_center_face=args.only_center_face, resize=640, eye_dist_threshold=5)
print(f'\tdetect {num_det_faces} faces')
# align and warp each face
face_helper.align_warp_face()
# face restoration for each cropped face
for idx, cropped_face in enumerate(face_helper.cropped_faces):
# prepare data
cropped_face_t = img2tensor(cropped_face / 255., bgr2rgb=True, float32=True)
normalize(cropped_face_t, (0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=True)
cropped_face_t = cropped_face_t.unsqueeze(0).to(device)
try:
with torch.no_grad():
output = net(cropped_face_t, w=w, adain=True)[0]
restored_face = tensor2img(output, rgb2bgr=True, min_max=(-1, 1))
del output
torch.cuda.empty_cache()
except Exception as error:
print(f'\tFailed inference for CodeFormer: {error}')
restored_face = tensor2img(cropped_face_t, rgb2bgr=True, min_max=(-1, 1))
restored_face = restored_face.astype('uint8')
face_helper.add_restored_face(restored_face, cropped_face)
# paste_back
if not args.has_aligned:
# upsample the background
if bg_upsampler is not None:
# Now only support RealESRGAN for upsampling background
bg_img = bg_upsampler.enhance(img, outscale=args.upscale)[0]
else:
bg_img = None
face_helper.get_inverse_affine(None)
# paste each restored face to the input image
if args.face_upsample and face_upsampler is not None:
restored_img = face_helper.paste_faces_to_input_image(upsample_img=bg_img, draw_box=args.draw_box, face_upsampler=face_upsampler)
else:
restored_img = face_helper.paste_faces_to_input_image(upsample_img=bg_img, draw_box=args.draw_box)
# save faces
for idx, (cropped_face, restored_face) in enumerate(zip(face_helper.cropped_faces, face_helper.restored_faces)):
# save cropped face
if not args.has_aligned:
save_crop_path = os.path.join(result_root, 'cropped_faces', f'{basename}_{idx:02d}.png')
imwrite(cropped_face, save_crop_path)
# save restored face
if args.has_aligned:
save_face_name = f'{basename}.png'
else:
save_face_name = f'{basename}_{idx:02d}.png'
if args.suffix is not None:
save_face_name = f'{save_face_name[:-4]}_{args.suffix}.png'
save_restore_path = os.path.join(result_root, 'restored_faces', save_face_name)
imwrite(restored_face, save_restore_path)
# save restored img
if not args.has_aligned and restored_img is not None:
if args.suffix is not None:
basename = f'{basename}_{args.suffix}'
save_restore_path = os.path.join(result_root, 'final_results', f'{basename}.png')
imwrite(restored_img, save_restore_path)
# save enhanced video
if input_video:
print('Video Saving...')
# load images
video_frames = []
img_list = sorted(glob.glob(os.path.join(result_root, 'final_results', '*.[jp][pn]g')))
for img_path in img_list:
img = cv2.imread(img_path)
video_frames.append(img)
# write images to video
height, width = video_frames[0].shape[:2]
if args.suffix is not None:
video_name = f'{video_name}_{args.suffix}.png'
save_restore_path = os.path.join(result_root, f'{video_name}.mp4')
vidwriter = VideoWriter(save_restore_path, height, width, fps, audio)
for f in video_frames:
vidwriter.write_frame(f)
vidwriter.close()
print(f'\nAll results are saved in {result_root}')
import os
import cv2
import argparse
import glob
import torch
from torchvision.transforms.functional import normalize
from basicsr.utils import imwrite, img2tensor, tensor2img
from basicsr.utils.download_util import load_file_from_url
from basicsr.utils.misc import get_device
from basicsr.utils.registry import ARCH_REGISTRY
pretrain_model_url = 'https://github.com/sczhou/CodeFormer/releases/download/v0.1.0/codeformer_colorization.pth'
if __name__ == '__main__':
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = get_device()
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--input_path', type=str, default='./inputs/gray_faces',
help='Input image or folder. Default: inputs/gray_faces')
parser.add_argument('-o', '--output_path', type=str, default=None,
help='Output folder. Default: results/<input_name>')
parser.add_argument('--suffix', type=str, default=None,
help='Suffix of the restored faces. Default: None')
args = parser.parse_args()
# ------------------------ input & output ------------------------
print('[NOTE] The input face images should be aligned and cropped to a resolution of 512x512.')
if args.input_path.endswith(('jpg', 'jpeg', 'png', 'JPG', 'JPEG', 'PNG')): # input single img path
input_img_list = [args.input_path]
result_root = f'results/test_colorization_img'
else: # input img folder
if args.input_path.endswith('/'): # solve when path ends with /
args.input_path = args.input_path[:-1]
# scan all the jpg and png images
input_img_list = sorted(glob.glob(os.path.join(args.input_path, '*.[jpJP][pnPN]*[gG]')))
result_root = f'results/{os.path.basename(args.input_path)}'
if not args.output_path is None: # set output path
result_root = args.output_path
test_img_num = len(input_img_list)
# ------------------ set up CodeFormer restorer -------------------
net = ARCH_REGISTRY.get('CodeFormer')(dim_embd=512, codebook_size=1024, n_head=8, n_layers=9,
connect_list=['32', '64', '128']).to(device)
# ckpt_path = 'weights/CodeFormer/codeformer.pth'
ckpt_path = load_file_from_url(url=pretrain_model_url,
model_dir='weights/CodeFormer', progress=True, file_name=None)
checkpoint = torch.load(ckpt_path)['params_ema']
net.load_state_dict(checkpoint)
net.eval()
# -------------------- start to processing ---------------------
for i, img_path in enumerate(input_img_list):
img_name = os.path.basename(img_path)
basename, ext = os.path.splitext(img_name)
print(f'[{i+1}/{test_img_num}] Processing: {img_name}')
input_face = cv2.imread(img_path)
assert input_face.shape[:2] == (512, 512), 'Input resolution must be 512x512 for colorization.'
# input_face = cv2.resize(input_face, (512, 512), interpolation=cv2.INTER_LINEAR)
input_face = img2tensor(input_face / 255., bgr2rgb=True, float32=True)
normalize(input_face, (0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=True)
input_face = input_face.unsqueeze(0).to(device)
try:
with torch.no_grad():
# w is fixed to 0 since we didn't train the Stage III for colorization
output_face = net(input_face, w=0, adain=True)[0]
save_face = tensor2img(output_face, rgb2bgr=True, min_max=(-1, 1))
del output_face
torch.cuda.empty_cache()
except Exception as error:
print(f'\tFailed inference for CodeFormer: {error}')
save_face = tensor2img(input_face, rgb2bgr=True, min_max=(-1, 1))
save_face = save_face.astype('uint8')
# save face
if args.suffix is not None:
basename = f'{basename}_{args.suffix}'
save_restore_path = os.path.join(result_root, f'{basename}.png')
imwrite(save_face, save_restore_path)
print(f'\nAll results are saved in {result_root}')
import os
import cv2
import argparse
import glob
import torch
from torchvision.transforms.functional import normalize
from basicsr.utils import imwrite, img2tensor, tensor2img
from basicsr.utils.download_util import load_file_from_url
from basicsr.utils.misc import get_device
from basicsr.utils.registry import ARCH_REGISTRY
pretrain_model_url = 'https://github.com/sczhou/CodeFormer/releases/download/v0.1.0/codeformer_inpainting.pth'
if __name__ == '__main__':
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = get_device()
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--input_path', type=str, default='./inputs/masked_faces',
help='Input image or folder. Default: inputs/masked_faces')
parser.add_argument('-o', '--output_path', type=str, default=None,
help='Output folder. Default: results/<input_name>')
parser.add_argument('--suffix', type=str, default=None,
help='Suffix of the restored faces. Default: None')
args = parser.parse_args()
# ------------------------ input & output ------------------------
print('[NOTE] The input face images should be aligned and cropped to a resolution of 512x512.')
if args.input_path.endswith(('jpg', 'jpeg', 'png', 'JPG', 'JPEG', 'PNG')): # input single img path
input_img_list = [args.input_path]
result_root = f'results/test_inpainting_img'
else: # input img folder
if args.input_path.endswith('/'): # solve when path ends with /
args.input_path = args.input_path[:-1]
# scan all the jpg and png images
input_img_list = sorted(glob.glob(os.path.join(args.input_path, '*.[jpJP][pnPN]*[gG]')))
result_root = f'results/{os.path.basename(args.input_path)}'
if not args.output_path is None: # set output path
result_root = args.output_path
test_img_num = len(input_img_list)
# ------------------ set up CodeFormer restorer -------------------
net = ARCH_REGISTRY.get('CodeFormer')(dim_embd=512, codebook_size=512, n_head=8, n_layers=9,
connect_list=['32', '64', '128']).to(device)
# ckpt_path = 'weights/CodeFormer/codeformer.pth'
ckpt_path = load_file_from_url(url=pretrain_model_url,
model_dir='weights/CodeFormer', progress=True, file_name=None)
checkpoint = torch.load(ckpt_path)['params_ema']
net.load_state_dict(checkpoint)
net.eval()
# -------------------- start to processing ---------------------
for i, img_path in enumerate(input_img_list):
img_name = os.path.basename(img_path)
basename, ext = os.path.splitext(img_name)
print(f'[{i+1}/{test_img_num}] Processing: {img_name}')
input_face = cv2.imread(img_path)
assert input_face.shape[:2] == (512, 512), 'Input resolution must be 512x512 for inpainting.'
# input_face = cv2.resize(input_face, (512, 512), interpolation=cv2.INTER_LINEAR)
input_face = img2tensor(input_face / 255., bgr2rgb=True, float32=True)
normalize(input_face, (0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=True)
input_face = input_face.unsqueeze(0).to(device)
try:
with torch.no_grad():
mask = torch.zeros(512, 512)
m_ind = torch.sum(input_face[0], dim=0)
mask[m_ind==3] = 1.0
mask = mask.view(1, 1, 512, 512).to(device)
# w is fixed to 1, adain=False for inpainting
output_face = net(input_face, w=1, adain=False)[0]
output_face = (1-mask)*input_face + mask*output_face
save_face = tensor2img(output_face, rgb2bgr=True, min_max=(-1, 1))
del output_face
torch.cuda.empty_cache()
except Exception as error:
print(f'\tFailed inference for CodeFormer: {error}')
save_face = tensor2img(input_face, rgb2bgr=True, min_max=(-1, 1))
save_face = save_face.astype('uint8')
# save face
if args.suffix is not None:
basename = f'{basename}_{args.suffix}'
save_restore_path = os.path.join(result_root, f'{basename}.png')
imwrite(save_face, save_restore_path)
print(f'\nAll results are saved in {result_root}')
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment