Commit c36d19db authored by mashun1's avatar mashun1
Browse files

liveportrait

parents
Pipeline #1402 canceled with stages
# -*- coding: utf-8 -*-
# @Organization : insightface.ai
# @Author : Jia Guo
# @Time : 2021-05-04
# @Function :
import os
import os.path as osp
import glob
import onnxruntime
from .arcface_onnx import *
from .retinaface import *
#from .scrfd import *
from .landmark import *
from .attribute import Attribute
from .inswapper import INSwapper
from ..utils import download_onnx
__all__ = ['get_model']
class PickableInferenceSession(onnxruntime.InferenceSession):
# This is a wrapper to make the current InferenceSession class pickable.
def __init__(self, model_path, **kwargs):
super().__init__(model_path, **kwargs)
self.model_path = model_path
def __getstate__(self):
return {'model_path': self.model_path}
def __setstate__(self, values):
model_path = values['model_path']
self.__init__(model_path)
class ModelRouter:
def __init__(self, onnx_file):
self.onnx_file = onnx_file
def get_model(self, **kwargs):
session = PickableInferenceSession(self.onnx_file, **kwargs)
# print(f'Applied providers: {session._providers}, with options: {session._provider_options}')
inputs = session.get_inputs()
input_cfg = inputs[0]
input_shape = input_cfg.shape
outputs = session.get_outputs()
if len(outputs)>=5:
return RetinaFace(model_file=self.onnx_file, session=session)
elif input_shape[2]==192 and input_shape[3]==192:
return Landmark(model_file=self.onnx_file, session=session)
elif input_shape[2]==96 and input_shape[3]==96:
return Attribute(model_file=self.onnx_file, session=session)
elif len(inputs)==2 and input_shape[2]==128 and input_shape[3]==128:
return INSwapper(model_file=self.onnx_file, session=session)
elif input_shape[2]==input_shape[3] and input_shape[2]>=112 and input_shape[2]%16==0:
return ArcFaceONNX(model_file=self.onnx_file, session=session)
else:
#raise RuntimeError('error on model routing')
return None
def find_onnx_file(dir_path):
if not os.path.exists(dir_path):
return None
paths = glob.glob("%s/*.onnx" % dir_path)
if len(paths) == 0:
return None
paths = sorted(paths)
return paths[-1]
def get_default_providers():
return ['CUDAExecutionProvider', 'CoreMLExecutionProvider', 'CPUExecutionProvider']
def get_default_provider_options():
return None
def get_model(name, **kwargs):
root = kwargs.get('root', '~/.insightface')
root = os.path.expanduser(root)
model_root = osp.join(root, 'models')
allow_download = kwargs.get('download', False)
download_zip = kwargs.get('download_zip', False)
if not name.endswith('.onnx'):
model_dir = os.path.join(model_root, name)
model_file = find_onnx_file(model_dir)
if model_file is None:
return None
else:
model_file = name
if not osp.exists(model_file) and allow_download:
model_file = download_onnx('models', model_file, root=root, download_zip=download_zip)
assert osp.exists(model_file), 'model_file %s should exist'%model_file
assert osp.isfile(model_file), 'model_file %s should be a file'%model_file
router = ModelRouter(model_file)
providers = kwargs.get('providers', get_default_providers())
provider_options = kwargs.get('provider_options', get_default_provider_options())
model = router.get_model(providers=providers, provider_options=provider_options)
return model
# -*- coding: utf-8 -*-
# @Organization : insightface.ai
# @Author : Jia Guo
# @Time : 2021-09-18
# @Function :
from __future__ import division
import datetime
import numpy as np
import onnx
import onnxruntime
import os
import os.path as osp
import cv2
import sys
def softmax(z):
assert len(z.shape) == 2
s = np.max(z, axis=1)
s = s[:, np.newaxis] # necessary step to do broadcasting
e_x = np.exp(z - s)
div = np.sum(e_x, axis=1)
div = div[:, np.newaxis] # dito
return e_x / div
def distance2bbox(points, distance, max_shape=None):
"""Decode distance prediction to bounding box.
Args:
points (Tensor): Shape (n, 2), [x, y].
distance (Tensor): Distance from the given point to 4
boundaries (left, top, right, bottom).
max_shape (tuple): Shape of the image.
Returns:
Tensor: Decoded bboxes.
"""
x1 = points[:, 0] - distance[:, 0]
y1 = points[:, 1] - distance[:, 1]
x2 = points[:, 0] + distance[:, 2]
y2 = points[:, 1] + distance[:, 3]
if max_shape is not None:
x1 = x1.clamp(min=0, max=max_shape[1])
y1 = y1.clamp(min=0, max=max_shape[0])
x2 = x2.clamp(min=0, max=max_shape[1])
y2 = y2.clamp(min=0, max=max_shape[0])
return np.stack([x1, y1, x2, y2], axis=-1)
def distance2kps(points, distance, max_shape=None):
"""Decode distance prediction to bounding box.
Args:
points (Tensor): Shape (n, 2), [x, y].
distance (Tensor): Distance from the given point to 4
boundaries (left, top, right, bottom).
max_shape (tuple): Shape of the image.
Returns:
Tensor: Decoded bboxes.
"""
preds = []
for i in range(0, distance.shape[1], 2):
px = points[:, i%2] + distance[:, i]
py = points[:, i%2+1] + distance[:, i+1]
if max_shape is not None:
px = px.clamp(min=0, max=max_shape[1])
py = py.clamp(min=0, max=max_shape[0])
preds.append(px)
preds.append(py)
return np.stack(preds, axis=-1)
class RetinaFace:
def __init__(self, model_file=None, session=None):
import onnxruntime
self.model_file = model_file
self.session = session
self.taskname = 'detection'
if self.session is None:
assert self.model_file is not None
assert osp.exists(self.model_file)
self.session = onnxruntime.InferenceSession(self.model_file, None)
self.center_cache = {}
self.nms_thresh = 0.4
self.det_thresh = 0.5
self._init_vars()
def _init_vars(self):
input_cfg = self.session.get_inputs()[0]
input_shape = input_cfg.shape
#print(input_shape)
if isinstance(input_shape[2], str):
self.input_size = None
else:
self.input_size = tuple(input_shape[2:4][::-1])
#print('image_size:', self.image_size)
input_name = input_cfg.name
self.input_shape = input_shape
outputs = self.session.get_outputs()
output_names = []
for o in outputs:
output_names.append(o.name)
self.input_name = input_name
self.output_names = output_names
self.input_mean = 127.5
self.input_std = 128.0
#print(self.output_names)
#assert len(outputs)==10 or len(outputs)==15
self.use_kps = False
self._anchor_ratio = 1.0
self._num_anchors = 1
if len(outputs)==6:
self.fmc = 3
self._feat_stride_fpn = [8, 16, 32]
self._num_anchors = 2
elif len(outputs)==9:
self.fmc = 3
self._feat_stride_fpn = [8, 16, 32]
self._num_anchors = 2
self.use_kps = True
elif len(outputs)==10:
self.fmc = 5
self._feat_stride_fpn = [8, 16, 32, 64, 128]
self._num_anchors = 1
elif len(outputs)==15:
self.fmc = 5
self._feat_stride_fpn = [8, 16, 32, 64, 128]
self._num_anchors = 1
self.use_kps = True
def prepare(self, ctx_id, **kwargs):
if ctx_id<0:
self.session.set_providers(['CPUExecutionProvider'])
nms_thresh = kwargs.get('nms_thresh', None)
if nms_thresh is not None:
self.nms_thresh = nms_thresh
det_thresh = kwargs.get('det_thresh', None)
if det_thresh is not None:
self.det_thresh = det_thresh
input_size = kwargs.get('input_size', None)
if input_size is not None:
if self.input_size is not None:
print('warning: det_size is already set in detection model, ignore')
else:
self.input_size = input_size
def forward(self, img, threshold):
scores_list = []
bboxes_list = []
kpss_list = []
input_size = tuple(img.shape[0:2][::-1])
blob = cv2.dnn.blobFromImage(img, 1.0/self.input_std, input_size, (self.input_mean, self.input_mean, self.input_mean), swapRB=True)
net_outs = self.session.run(self.output_names, {self.input_name : blob})
input_height = blob.shape[2]
input_width = blob.shape[3]
fmc = self.fmc
for idx, stride in enumerate(self._feat_stride_fpn):
scores = net_outs[idx]
bbox_preds = net_outs[idx+fmc]
bbox_preds = bbox_preds * stride
if self.use_kps:
kps_preds = net_outs[idx+fmc*2] * stride
height = input_height // stride
width = input_width // stride
K = height * width
key = (height, width, stride)
if key in self.center_cache:
anchor_centers = self.center_cache[key]
else:
#solution-1, c style:
#anchor_centers = np.zeros( (height, width, 2), dtype=np.float32 )
#for i in range(height):
# anchor_centers[i, :, 1] = i
#for i in range(width):
# anchor_centers[:, i, 0] = i
#solution-2:
#ax = np.arange(width, dtype=np.float32)
#ay = np.arange(height, dtype=np.float32)
#xv, yv = np.meshgrid(np.arange(width), np.arange(height))
#anchor_centers = np.stack([xv, yv], axis=-1).astype(np.float32)
#solution-3:
anchor_centers = np.stack(np.mgrid[:height, :width][::-1], axis=-1).astype(np.float32)
#print(anchor_centers.shape)
anchor_centers = (anchor_centers * stride).reshape( (-1, 2) )
if self._num_anchors>1:
anchor_centers = np.stack([anchor_centers]*self._num_anchors, axis=1).reshape( (-1,2) )
if len(self.center_cache)<100:
self.center_cache[key] = anchor_centers
pos_inds = np.where(scores>=threshold)[0]
bboxes = distance2bbox(anchor_centers, bbox_preds)
pos_scores = scores[pos_inds]
pos_bboxes = bboxes[pos_inds]
scores_list.append(pos_scores)
bboxes_list.append(pos_bboxes)
if self.use_kps:
kpss = distance2kps(anchor_centers, kps_preds)
#kpss = kps_preds
kpss = kpss.reshape( (kpss.shape[0], -1, 2) )
pos_kpss = kpss[pos_inds]
kpss_list.append(pos_kpss)
return scores_list, bboxes_list, kpss_list
def detect(self, img, input_size = None, max_num=0, metric='default'):
assert input_size is not None or self.input_size is not None
input_size = self.input_size if input_size is None else input_size
im_ratio = float(img.shape[0]) / img.shape[1]
model_ratio = float(input_size[1]) / input_size[0]
if im_ratio>model_ratio:
new_height = input_size[1]
new_width = int(new_height / im_ratio)
else:
new_width = input_size[0]
new_height = int(new_width * im_ratio)
det_scale = float(new_height) / img.shape[0]
resized_img = cv2.resize(img, (new_width, new_height))
det_img = np.zeros( (input_size[1], input_size[0], 3), dtype=np.uint8 )
det_img[:new_height, :new_width, :] = resized_img
scores_list, bboxes_list, kpss_list = self.forward(det_img, self.det_thresh)
scores = np.vstack(scores_list)
scores_ravel = scores.ravel()
order = scores_ravel.argsort()[::-1]
bboxes = np.vstack(bboxes_list) / det_scale
if self.use_kps:
kpss = np.vstack(kpss_list) / det_scale
pre_det = np.hstack((bboxes, scores)).astype(np.float32, copy=False)
pre_det = pre_det[order, :]
keep = self.nms(pre_det)
det = pre_det[keep, :]
if self.use_kps:
kpss = kpss[order,:,:]
kpss = kpss[keep,:,:]
else:
kpss = None
if max_num > 0 and det.shape[0] > max_num:
area = (det[:, 2] - det[:, 0]) * (det[:, 3] -
det[:, 1])
img_center = img.shape[0] // 2, img.shape[1] // 2
offsets = np.vstack([
(det[:, 0] + det[:, 2]) / 2 - img_center[1],
(det[:, 1] + det[:, 3]) / 2 - img_center[0]
])
offset_dist_squared = np.sum(np.power(offsets, 2.0), 0)
if metric=='max':
values = area
else:
values = area - offset_dist_squared * 2.0 # some extra weight on the centering
bindex = np.argsort(
values)[::-1] # some extra weight on the centering
bindex = bindex[0:max_num]
det = det[bindex, :]
if kpss is not None:
kpss = kpss[bindex, :]
return det, kpss
def nms(self, dets):
thresh = self.nms_thresh
x1 = dets[:, 0]
y1 = dets[:, 1]
x2 = dets[:, 2]
y2 = dets[:, 3]
scores = dets[:, 4]
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
order = scores.argsort()[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
ovr = inter / (areas[i] + areas[order[1:]] - inter)
inds = np.where(ovr <= thresh)[0]
order = order[inds + 1]
return keep
def get_retinaface(name, download=False, root='~/.insightface/models', **kwargs):
if not download:
assert os.path.exists(name)
return RetinaFace(name)
else:
from .model_store import get_model_file
_file = get_model_file("retinaface_%s" % name, root=root)
return retinaface(_file)
# -*- coding: utf-8 -*-
# @Organization : insightface.ai
# @Author : Jia Guo
# @Time : 2021-05-04
# @Function :
from __future__ import division
import datetime
import numpy as np
import onnx
import onnxruntime
import os
import os.path as osp
import cv2
import sys
def softmax(z):
assert len(z.shape) == 2
s = np.max(z, axis=1)
s = s[:, np.newaxis] # necessary step to do broadcasting
e_x = np.exp(z - s)
div = np.sum(e_x, axis=1)
div = div[:, np.newaxis] # dito
return e_x / div
def distance2bbox(points, distance, max_shape=None):
"""Decode distance prediction to bounding box.
Args:
points (Tensor): Shape (n, 2), [x, y].
distance (Tensor): Distance from the given point to 4
boundaries (left, top, right, bottom).
max_shape (tuple): Shape of the image.
Returns:
Tensor: Decoded bboxes.
"""
x1 = points[:, 0] - distance[:, 0]
y1 = points[:, 1] - distance[:, 1]
x2 = points[:, 0] + distance[:, 2]
y2 = points[:, 1] + distance[:, 3]
if max_shape is not None:
x1 = x1.clamp(min=0, max=max_shape[1])
y1 = y1.clamp(min=0, max=max_shape[0])
x2 = x2.clamp(min=0, max=max_shape[1])
y2 = y2.clamp(min=0, max=max_shape[0])
return np.stack([x1, y1, x2, y2], axis=-1)
def distance2kps(points, distance, max_shape=None):
"""Decode distance prediction to bounding box.
Args:
points (Tensor): Shape (n, 2), [x, y].
distance (Tensor): Distance from the given point to 4
boundaries (left, top, right, bottom).
max_shape (tuple): Shape of the image.
Returns:
Tensor: Decoded bboxes.
"""
preds = []
for i in range(0, distance.shape[1], 2):
px = points[:, i%2] + distance[:, i]
py = points[:, i%2+1] + distance[:, i+1]
if max_shape is not None:
px = px.clamp(min=0, max=max_shape[1])
py = py.clamp(min=0, max=max_shape[0])
preds.append(px)
preds.append(py)
return np.stack(preds, axis=-1)
class SCRFD:
def __init__(self, model_file=None, session=None):
import onnxruntime
self.model_file = model_file
self.session = session
self.taskname = 'detection'
self.batched = False
if self.session is None:
assert self.model_file is not None
assert osp.exists(self.model_file)
self.session = onnxruntime.InferenceSession(self.model_file, None)
self.center_cache = {}
self.nms_thresh = 0.4
self.det_thresh = 0.5
self._init_vars()
def _init_vars(self):
input_cfg = self.session.get_inputs()[0]
input_shape = input_cfg.shape
#print(input_shape)
if isinstance(input_shape[2], str):
self.input_size = None
else:
self.input_size = tuple(input_shape[2:4][::-1])
#print('image_size:', self.image_size)
input_name = input_cfg.name
self.input_shape = input_shape
outputs = self.session.get_outputs()
if len(outputs[0].shape) == 3:
self.batched = True
output_names = []
for o in outputs:
output_names.append(o.name)
self.input_name = input_name
self.output_names = output_names
self.input_mean = 127.5
self.input_std = 128.0
#print(self.output_names)
#assert len(outputs)==10 or len(outputs)==15
self.use_kps = False
self._anchor_ratio = 1.0
self._num_anchors = 1
if len(outputs)==6:
self.fmc = 3
self._feat_stride_fpn = [8, 16, 32]
self._num_anchors = 2
elif len(outputs)==9:
self.fmc = 3
self._feat_stride_fpn = [8, 16, 32]
self._num_anchors = 2
self.use_kps = True
elif len(outputs)==10:
self.fmc = 5
self._feat_stride_fpn = [8, 16, 32, 64, 128]
self._num_anchors = 1
elif len(outputs)==15:
self.fmc = 5
self._feat_stride_fpn = [8, 16, 32, 64, 128]
self._num_anchors = 1
self.use_kps = True
def prepare(self, ctx_id, **kwargs):
if ctx_id<0:
self.session.set_providers(['CPUExecutionProvider'])
nms_thresh = kwargs.get('nms_thresh', None)
if nms_thresh is not None:
self.nms_thresh = nms_thresh
det_thresh = kwargs.get('det_thresh', None)
if det_thresh is not None:
self.det_thresh = det_thresh
input_size = kwargs.get('input_size', None)
if input_size is not None:
if self.input_size is not None:
print('warning: det_size is already set in scrfd model, ignore')
else:
self.input_size = input_size
def forward(self, img, threshold):
scores_list = []
bboxes_list = []
kpss_list = []
input_size = tuple(img.shape[0:2][::-1])
blob = cv2.dnn.blobFromImage(img, 1.0/self.input_std, input_size, (self.input_mean, self.input_mean, self.input_mean), swapRB=True)
net_outs = self.session.run(self.output_names, {self.input_name : blob})
input_height = blob.shape[2]
input_width = blob.shape[3]
fmc = self.fmc
for idx, stride in enumerate(self._feat_stride_fpn):
# If model support batch dim, take first output
if self.batched:
scores = net_outs[idx][0]
bbox_preds = net_outs[idx + fmc][0]
bbox_preds = bbox_preds * stride
if self.use_kps:
kps_preds = net_outs[idx + fmc * 2][0] * stride
# If model doesn't support batching take output as is
else:
scores = net_outs[idx]
bbox_preds = net_outs[idx + fmc]
bbox_preds = bbox_preds * stride
if self.use_kps:
kps_preds = net_outs[idx + fmc * 2] * stride
height = input_height // stride
width = input_width // stride
K = height * width
key = (height, width, stride)
if key in self.center_cache:
anchor_centers = self.center_cache[key]
else:
#solution-1, c style:
#anchor_centers = np.zeros( (height, width, 2), dtype=np.float32 )
#for i in range(height):
# anchor_centers[i, :, 1] = i
#for i in range(width):
# anchor_centers[:, i, 0] = i
#solution-2:
#ax = np.arange(width, dtype=np.float32)
#ay = np.arange(height, dtype=np.float32)
#xv, yv = np.meshgrid(np.arange(width), np.arange(height))
#anchor_centers = np.stack([xv, yv], axis=-1).astype(np.float32)
#solution-3:
anchor_centers = np.stack(np.mgrid[:height, :width][::-1], axis=-1).astype(np.float32)
#print(anchor_centers.shape)
anchor_centers = (anchor_centers * stride).reshape( (-1, 2) )
if self._num_anchors>1:
anchor_centers = np.stack([anchor_centers]*self._num_anchors, axis=1).reshape( (-1,2) )
if len(self.center_cache)<100:
self.center_cache[key] = anchor_centers
pos_inds = np.where(scores>=threshold)[0]
bboxes = distance2bbox(anchor_centers, bbox_preds)
pos_scores = scores[pos_inds]
pos_bboxes = bboxes[pos_inds]
scores_list.append(pos_scores)
bboxes_list.append(pos_bboxes)
if self.use_kps:
kpss = distance2kps(anchor_centers, kps_preds)
#kpss = kps_preds
kpss = kpss.reshape( (kpss.shape[0], -1, 2) )
pos_kpss = kpss[pos_inds]
kpss_list.append(pos_kpss)
return scores_list, bboxes_list, kpss_list
def detect(self, img, input_size = None, max_num=0, metric='default'):
assert input_size is not None or self.input_size is not None
input_size = self.input_size if input_size is None else input_size
im_ratio = float(img.shape[0]) / img.shape[1]
model_ratio = float(input_size[1]) / input_size[0]
if im_ratio>model_ratio:
new_height = input_size[1]
new_width = int(new_height / im_ratio)
else:
new_width = input_size[0]
new_height = int(new_width * im_ratio)
det_scale = float(new_height) / img.shape[0]
resized_img = cv2.resize(img, (new_width, new_height))
det_img = np.zeros( (input_size[1], input_size[0], 3), dtype=np.uint8 )
det_img[:new_height, :new_width, :] = resized_img
scores_list, bboxes_list, kpss_list = self.forward(det_img, self.det_thresh)
scores = np.vstack(scores_list)
scores_ravel = scores.ravel()
order = scores_ravel.argsort()[::-1]
bboxes = np.vstack(bboxes_list) / det_scale
if self.use_kps:
kpss = np.vstack(kpss_list) / det_scale
pre_det = np.hstack((bboxes, scores)).astype(np.float32, copy=False)
pre_det = pre_det[order, :]
keep = self.nms(pre_det)
det = pre_det[keep, :]
if self.use_kps:
kpss = kpss[order,:,:]
kpss = kpss[keep,:,:]
else:
kpss = None
if max_num > 0 and det.shape[0] > max_num:
area = (det[:, 2] - det[:, 0]) * (det[:, 3] -
det[:, 1])
img_center = img.shape[0] // 2, img.shape[1] // 2
offsets = np.vstack([
(det[:, 0] + det[:, 2]) / 2 - img_center[1],
(det[:, 1] + det[:, 3]) / 2 - img_center[0]
])
offset_dist_squared = np.sum(np.power(offsets, 2.0), 0)
if metric=='max':
values = area
else:
values = area - offset_dist_squared * 2.0 # some extra weight on the centering
bindex = np.argsort(
values)[::-1] # some extra weight on the centering
bindex = bindex[0:max_num]
det = det[bindex, :]
if kpss is not None:
kpss = kpss[bindex, :]
return det, kpss
def nms(self, dets):
thresh = self.nms_thresh
x1 = dets[:, 0]
y1 = dets[:, 1]
x2 = dets[:, 2]
y2 = dets[:, 3]
scores = dets[:, 4]
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
order = scores.argsort()[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
ovr = inter / (areas[i] + areas[order[1:]] - inter)
inds = np.where(ovr <= thresh)[0]
order = order[inds + 1]
return keep
def get_scrfd(name, download=False, root='~/.insightface/models', **kwargs):
if not download:
assert os.path.exists(name)
return SCRFD(name)
else:
from .model_store import get_model_file
_file = get_model_file("scrfd_%s" % name, root=root)
return SCRFD(_file)
def scrfd_2p5gkps(**kwargs):
return get_scrfd("2p5gkps", download=True, **kwargs)
if __name__ == '__main__':
import glob
detector = SCRFD(model_file='./det.onnx')
detector.prepare(-1)
img_paths = ['tests/data/t1.jpg']
for img_path in img_paths:
img = cv2.imread(img_path)
for _ in range(1):
ta = datetime.datetime.now()
#bboxes, kpss = detector.detect(img, 0.5, input_size = (640, 640))
bboxes, kpss = detector.detect(img, 0.5)
tb = datetime.datetime.now()
print('all cost:', (tb-ta).total_seconds()*1000)
print(img_path, bboxes.shape)
if kpss is not None:
print(kpss.shape)
for i in range(bboxes.shape[0]):
bbox = bboxes[i]
x1,y1,x2,y2,score = bbox.astype(np.int)
cv2.rectangle(img, (x1,y1) , (x2,y2) , (255,0,0) , 2)
if kpss is not None:
kps = kpss[i]
for kp in kps:
kp = kp.astype(np.int)
cv2.circle(img, tuple(kp) , 1, (0,0,255) , 2)
filename = img_path.split('/')[-1]
print('output:', filename)
cv2.imwrite('./outputs/%s'%filename, img)
from __future__ import absolute_import
from .storage import download, ensure_available, download_onnx
from .filesystem import get_model_dir
from .filesystem import makedirs, try_import_dali
from .constant import *
"""
This code file mainly comes from https://github.com/dmlc/gluon-cv/blob/master/gluoncv/utils/download.py
"""
import os
import hashlib
import requests
from tqdm import tqdm
def check_sha1(filename, sha1_hash):
"""Check whether the sha1 hash of the file content matches the expected hash.
Parameters
----------
filename : str
Path to the file.
sha1_hash : str
Expected sha1 hash in hexadecimal digits.
Returns
-------
bool
Whether the file content matches the expected hash.
"""
sha1 = hashlib.sha1()
with open(filename, 'rb') as f:
while True:
data = f.read(1048576)
if not data:
break
sha1.update(data)
sha1_file = sha1.hexdigest()
l = min(len(sha1_file), len(sha1_hash))
return sha1.hexdigest()[0:l] == sha1_hash[0:l]
def download_file(url, path=None, overwrite=False, sha1_hash=None):
"""Download an given URL
Parameters
----------
url : str
URL to download
path : str, optional
Destination path to store downloaded file. By default stores to the
current directory with same name as in url.
overwrite : bool, optional
Whether to overwrite destination file if already exists.
sha1_hash : str, optional
Expected sha1 hash in hexadecimal digits. Will ignore existing file when hash is specified
but doesn't match.
Returns
-------
str
The file path of the downloaded file.
"""
if path is None:
fname = url.split('/')[-1]
else:
path = os.path.expanduser(path)
if os.path.isdir(path):
fname = os.path.join(path, url.split('/')[-1])
else:
fname = path
if overwrite or not os.path.exists(fname) or (
sha1_hash and not check_sha1(fname, sha1_hash)):
dirname = os.path.dirname(os.path.abspath(os.path.expanduser(fname)))
if not os.path.exists(dirname):
os.makedirs(dirname)
print('Downloading %s from %s...' % (fname, url))
r = requests.get(url, stream=True)
if r.status_code != 200:
raise RuntimeError("Failed downloading url %s" % url)
total_length = r.headers.get('content-length')
with open(fname, 'wb') as f:
if total_length is None: # no content length header
for chunk in r.iter_content(chunk_size=1024):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
else:
total_length = int(total_length)
for chunk in tqdm(r.iter_content(chunk_size=1024),
total=int(total_length / 1024. + 0.5),
unit='KB',
unit_scale=False,
dynamic_ncols=True):
f.write(chunk)
if sha1_hash and not check_sha1(fname, sha1_hash):
raise UserWarning('File {} is downloaded but the content hash does not match. ' \
'The repo may be outdated or download may be incomplete. ' \
'If the "repo_url" is overridden, consider switching to ' \
'the default repo.'.format(fname))
return fname
import cv2
import numpy as np
from skimage import transform as trans
arcface_dst = np.array(
[[38.2946, 51.6963], [73.5318, 51.5014], [56.0252, 71.7366],
[41.5493, 92.3655], [70.7299, 92.2041]],
dtype=np.float32)
def estimate_norm(lmk, image_size=112,mode='arcface'):
assert lmk.shape == (5, 2)
assert image_size%112==0 or image_size%128==0
if image_size%112==0:
ratio = float(image_size)/112.0
diff_x = 0
else:
ratio = float(image_size)/128.0
diff_x = 8.0*ratio
dst = arcface_dst * ratio
dst[:,0] += diff_x
tform = trans.SimilarityTransform()
tform.estimate(lmk, dst)
M = tform.params[0:2, :]
return M
def norm_crop(img, landmark, image_size=112, mode='arcface'):
M = estimate_norm(landmark, image_size, mode)
warped = cv2.warpAffine(img, M, (image_size, image_size), borderValue=0.0)
return warped
def norm_crop2(img, landmark, image_size=112, mode='arcface'):
M = estimate_norm(landmark, image_size, mode)
warped = cv2.warpAffine(img, M, (image_size, image_size), borderValue=0.0)
return warped, M
def square_crop(im, S):
if im.shape[0] > im.shape[1]:
height = S
width = int(float(im.shape[1]) / im.shape[0] * S)
scale = float(S) / im.shape[0]
else:
width = S
height = int(float(im.shape[0]) / im.shape[1] * S)
scale = float(S) / im.shape[1]
resized_im = cv2.resize(im, (width, height))
det_im = np.zeros((S, S, 3), dtype=np.uint8)
det_im[:resized_im.shape[0], :resized_im.shape[1], :] = resized_im
return det_im, scale
def transform(data, center, output_size, scale, rotation):
scale_ratio = scale
rot = float(rotation) * np.pi / 180.0
#translation = (output_size/2-center[0]*scale_ratio, output_size/2-center[1]*scale_ratio)
t1 = trans.SimilarityTransform(scale=scale_ratio)
cx = center[0] * scale_ratio
cy = center[1] * scale_ratio
t2 = trans.SimilarityTransform(translation=(-1 * cx, -1 * cy))
t3 = trans.SimilarityTransform(rotation=rot)
t4 = trans.SimilarityTransform(translation=(output_size / 2,
output_size / 2))
t = t1 + t2 + t3 + t4
M = t.params[0:2]
cropped = cv2.warpAffine(data,
M, (output_size, output_size),
borderValue=0.0)
return cropped, M
def trans_points2d(pts, M):
new_pts = np.zeros(shape=pts.shape, dtype=np.float32)
for i in range(pts.shape[0]):
pt = pts[i]
new_pt = np.array([pt[0], pt[1], 1.], dtype=np.float32)
new_pt = np.dot(M, new_pt)
#print('new_pt', new_pt.shape, new_pt)
new_pts[i] = new_pt[0:2]
return new_pts
def trans_points3d(pts, M):
scale = np.sqrt(M[0][0] * M[0][0] + M[0][1] * M[0][1])
#print(scale)
new_pts = np.zeros(shape=pts.shape, dtype=np.float32)
for i in range(pts.shape[0]):
pt = pts[i]
new_pt = np.array([pt[0], pt[1], 1.], dtype=np.float32)
new_pt = np.dot(M, new_pt)
#print('new_pt', new_pt.shape, new_pt)
new_pts[i][0:2] = new_pt[0:2]
new_pts[i][2] = pts[i][2] * scale
return new_pts
def trans_points(pts, M):
if pts.shape[1] == 2:
return trans_points2d(pts, M)
else:
return trans_points3d(pts, M)
"""
This code file mainly comes from https://github.com/dmlc/gluon-cv/blob/master/gluoncv/utils/filesystem.py
"""
import os
import os.path as osp
import errno
def get_model_dir(name, root='~/.insightface'):
root = os.path.expanduser(root)
model_dir = osp.join(root, 'models', name)
return model_dir
def makedirs(path):
"""Create directory recursively if not exists.
Similar to `makedir -p`, you can skip checking existence before this function.
Parameters
----------
path : str
Path of the desired dir
"""
try:
os.makedirs(path)
except OSError as exc:
if exc.errno != errno.EEXIST:
raise
def try_import(package, message=None):
"""Try import specified package, with custom message support.
Parameters
----------
package : str
The name of the targeting package.
message : str, default is None
If not None, this function will raise customized error message when import error is found.
Returns
-------
module if found, raise ImportError otherwise
"""
try:
return __import__(package)
except ImportError as e:
if not message:
raise e
raise ImportError(message)
def try_import_cv2():
"""Try import cv2 at runtime.
Returns
-------
cv2 module if found. Raise ImportError otherwise
"""
msg = "cv2 is required, you can install by package manager, e.g. 'apt-get', \
or `pip install opencv-python --user` (note that this is unofficial PYPI package)."
return try_import('cv2', msg)
def try_import_mmcv():
"""Try import mmcv at runtime.
Returns
-------
mmcv module if found. Raise ImportError otherwise
"""
msg = "mmcv is required, you can install by first `pip install Cython --user` \
and then `pip install mmcv --user` (note that this is unofficial PYPI package)."
return try_import('mmcv', msg)
def try_import_rarfile():
"""Try import rarfile at runtime.
Returns
-------
rarfile module if found. Raise ImportError otherwise
"""
msg = "rarfile is required, you can install by first `sudo apt-get install unrar` \
and then `pip install rarfile --user` (note that this is unofficial PYPI package)."
return try_import('rarfile', msg)
def import_try_install(package, extern_url=None):
"""Try import the specified package.
If the package not installed, try use pip to install and import if success.
Parameters
----------
package : str
The name of the package trying to import.
extern_url : str or None, optional
The external url if package is not hosted on PyPI.
For example, you can install a package using:
"pip install git+http://github.com/user/repo/tarball/master/egginfo=xxx".
In this case, you can pass the url to the extern_url.
Returns
-------
<class 'Module'>
The imported python module.
"""
try:
return __import__(package)
except ImportError:
try:
from pip import main as pipmain
except ImportError:
from pip._internal import main as pipmain
# trying to install package
url = package if extern_url is None else extern_url
pipmain(['install', '--user',
url]) # will raise SystemExit Error if fails
# trying to load again
try:
return __import__(package)
except ImportError:
import sys
import site
user_site = site.getusersitepackages()
if user_site not in sys.path:
sys.path.append(user_site)
return __import__(package)
return __import__(package)
def try_import_dali():
"""Try import NVIDIA DALI at runtime.
"""
try:
dali = __import__('nvidia.dali', fromlist=['pipeline', 'ops', 'types'])
dali.Pipeline = dali.pipeline.Pipeline
except ImportError:
class dali:
class Pipeline:
def __init__(self):
raise NotImplementedError(
"DALI not found, please check if you installed it correctly."
)
return dali
import os
import os.path as osp
import zipfile
from .download import download_file
BASE_REPO_URL = 'https://github.com/deepinsight/insightface/releases/download/v0.7'
def download(sub_dir, name, force=False, root='~/.insightface'):
_root = os.path.expanduser(root)
dir_path = os.path.join(_root, sub_dir, name)
if osp.exists(dir_path) and not force:
return dir_path
print('download_path:', dir_path)
zip_file_path = os.path.join(_root, sub_dir, name + '.zip')
model_url = "%s/%s.zip"%(BASE_REPO_URL, name)
download_file(model_url,
path=zip_file_path,
overwrite=True)
if not os.path.exists(dir_path):
os.makedirs(dir_path)
with zipfile.ZipFile(zip_file_path) as zf:
zf.extractall(dir_path)
#os.remove(zip_file_path)
return dir_path
def ensure_available(sub_dir, name, root='~/.insightface'):
return download(sub_dir, name, force=False, root=root)
def download_onnx(sub_dir, model_file, force=False, root='~/.insightface', download_zip=False):
_root = os.path.expanduser(root)
model_root = osp.join(_root, sub_dir)
new_model_file = osp.join(model_root, model_file)
if osp.exists(new_model_file) and not force:
return new_model_file
if not osp.exists(model_root):
os.makedirs(model_root)
print('download_path:', new_model_file)
if not download_zip:
model_url = "%s/%s"%(BASE_REPO_URL, model_file)
download_file(model_url,
path=new_model_file,
overwrite=True)
else:
model_url = "%s/%s.zip"%(BASE_REPO_URL, model_file)
zip_file_path = new_model_file+".zip"
download_file(model_url,
path=zip_file_path,
overwrite=True)
with zipfile.ZipFile(zip_file_path) as zf:
zf.extractall(model_root)
return new_model_file
import cv2
import math
import numpy as np
from skimage import transform as trans
def transform(data, center, output_size, scale, rotation):
scale_ratio = scale
rot = float(rotation) * np.pi / 180.0
#translation = (output_size/2-center[0]*scale_ratio, output_size/2-center[1]*scale_ratio)
t1 = trans.SimilarityTransform(scale=scale_ratio)
cx = center[0] * scale_ratio
cy = center[1] * scale_ratio
t2 = trans.SimilarityTransform(translation=(-1 * cx, -1 * cy))
t3 = trans.SimilarityTransform(rotation=rot)
t4 = trans.SimilarityTransform(translation=(output_size / 2,
output_size / 2))
t = t1 + t2 + t3 + t4
M = t.params[0:2]
cropped = cv2.warpAffine(data,
M, (output_size, output_size),
borderValue=0.0)
return cropped, M
def trans_points2d(pts, M):
new_pts = np.zeros(shape=pts.shape, dtype=np.float32)
for i in range(pts.shape[0]):
pt = pts[i]
new_pt = np.array([pt[0], pt[1], 1.], dtype=np.float32)
new_pt = np.dot(M, new_pt)
#print('new_pt', new_pt.shape, new_pt)
new_pts[i] = new_pt[0:2]
return new_pts
def trans_points3d(pts, M):
scale = np.sqrt(M[0][0] * M[0][0] + M[0][1] * M[0][1])
#print(scale)
new_pts = np.zeros(shape=pts.shape, dtype=np.float32)
for i in range(pts.shape[0]):
pt = pts[i]
new_pt = np.array([pt[0], pt[1], 1.], dtype=np.float32)
new_pt = np.dot(M, new_pt)
#print('new_pt', new_pt.shape, new_pt)
new_pts[i][0:2] = new_pt[0:2]
new_pts[i][2] = pts[i][2] * scale
return new_pts
def trans_points(pts, M):
if pts.shape[1] == 2:
return trans_points2d(pts, M)
else:
return trans_points3d(pts, M)
def estimate_affine_matrix_3d23d(X, Y):
''' Using least-squares solution
Args:
X: [n, 3]. 3d points(fixed)
Y: [n, 3]. corresponding 3d points(moving). Y = PX
Returns:
P_Affine: (3, 4). Affine camera matrix (the third row is [0, 0, 0, 1]).
'''
X_homo = np.hstack((X, np.ones([X.shape[0],1]))) #n x 4
P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4
return P
def P2sRt(P):
''' decompositing camera matrix P
Args:
P: (3, 4). Affine Camera Matrix.
Returns:
s: scale factor.
R: (3, 3). rotation matrix.
t: (3,). translation.
'''
t = P[:, 3]
R1 = P[0:1, :3]
R2 = P[1:2, :3]
s = (np.linalg.norm(R1) + np.linalg.norm(R2))/2.0
r1 = R1/np.linalg.norm(R1)
r2 = R2/np.linalg.norm(R2)
r3 = np.cross(r1, r2)
R = np.concatenate((r1, r2, r3), 0)
return s, R, t
def matrix2angle(R):
''' get three Euler angles from Rotation Matrix
Args:
R: (3,3). rotation matrix
Returns:
x: pitch
y: yaw
z: roll
'''
sy = math.sqrt(R[0,0] * R[0,0] + R[1,0] * R[1,0])
singular = sy < 1e-6
if not singular :
x = math.atan2(R[2,1] , R[2,2])
y = math.atan2(-R[2,0], sy)
z = math.atan2(R[1,0], R[0,0])
else :
x = math.atan2(-R[1,2], R[1,1])
y = math.atan2(-R[2,0], sy)
z = 0
# rx, ry, rz = np.rad2deg(x), np.rad2deg(y), np.rad2deg(z)
rx, ry, rz = x*180/np.pi, y*180/np.pi, z*180/np.pi
return rx, ry, rz
# coding: utf-8
"""
face detectoin and alignment using InsightFace
"""
import numpy as np
from .rprint import rlog as log
from .dependencies.insightface.app import FaceAnalysis
from .dependencies.insightface.app.common import Face
from .timer import Timer
def sort_by_direction(faces, direction: str = 'large-small', face_center=None):
if len(faces) <= 0:
return faces
if direction == 'left-right':
return sorted(faces, key=lambda face: face['bbox'][0])
if direction == 'right-left':
return sorted(faces, key=lambda face: face['bbox'][0], reverse=True)
if direction == 'top-bottom':
return sorted(faces, key=lambda face: face['bbox'][1])
if direction == 'bottom-top':
return sorted(faces, key=lambda face: face['bbox'][1], reverse=True)
if direction == 'small-large':
return sorted(faces, key=lambda face: (face['bbox'][2] - face['bbox'][0]) * (face['bbox'][3] - face['bbox'][1]))
if direction == 'large-small':
return sorted(faces, key=lambda face: (face['bbox'][2] - face['bbox'][0]) * (face['bbox'][3] - face['bbox'][1]), reverse=True)
if direction == 'distance-from-retarget-face':
return sorted(faces, key=lambda face: (((face['bbox'][2]+face['bbox'][0])/2-face_center[0])**2+((face['bbox'][3]+face['bbox'][1])/2-face_center[1])**2)**0.5)
return faces
class FaceAnalysisDIY(FaceAnalysis):
def __init__(self, name='buffalo_l', root='~/.insightface', allowed_modules=None, **kwargs):
super().__init__(name=name, root=root, allowed_modules=allowed_modules, **kwargs)
self.timer = Timer()
def get(self, img_bgr, **kwargs):
max_num = kwargs.get('max_face_num', 0) # the number of the detected faces, 0 means no limit
flag_do_landmark_2d_106 = kwargs.get('flag_do_landmark_2d_106', True) # whether to do 106-point detection
direction = kwargs.get('direction', 'large-small') # sorting direction
face_center = None
bboxes, kpss = self.det_model.detect(img_bgr, max_num=max_num, metric='default')
if bboxes.shape[0] == 0:
return []
ret = []
for i in range(bboxes.shape[0]):
bbox = bboxes[i, 0:4]
det_score = bboxes[i, 4]
kps = None
if kpss is not None:
kps = kpss[i]
face = Face(bbox=bbox, kps=kps, det_score=det_score)
for taskname, model in self.models.items():
if taskname == 'detection':
continue
if (not flag_do_landmark_2d_106) and taskname == 'landmark_2d_106':
continue
# print(f'taskname: {taskname}')
model.get(img_bgr, face)
ret.append(face)
ret = sort_by_direction(ret, direction, face_center)
return ret
def warmup(self):
self.timer.tic()
img_bgr = np.zeros((512, 512, 3), dtype=np.uint8)
self.get(img_bgr)
elapse = self.timer.toc()
log(f'FaceAnalysisDIY warmup time: {elapse:.3f}s')
# coding: utf-8
import torch
import numpy as np
from pykalman import KalmanFilter
def smooth(x_d_lst, shape, device, observation_variance=1e-7, process_variance=1e-5):
x_d_lst_reshape = [x.reshape(-1) for x in x_d_lst]
x_d_stacked = np.vstack(x_d_lst_reshape)
kf = KalmanFilter(
initial_state_mean=x_d_stacked[0],
n_dim_obs=x_d_stacked.shape[1],
transition_covariance=process_variance * np.eye(x_d_stacked.shape[1]),
observation_covariance=observation_variance * np.eye(x_d_stacked.shape[1])
)
smoothed_state_means, _ = kf.smooth(x_d_stacked)
x_d_lst_smooth = [torch.tensor(state_mean.reshape(shape[-2:]), dtype=torch.float32, device=device) for state_mean in smoothed_state_means]
return x_d_lst_smooth
\ No newline at end of file
# coding: utf-8
"""
utility functions and classes to handle feature extraction and model loading
"""
import os
import os.path as osp
import torch
from collections import OrderedDict
import numpy as np
import cv2
from ..modules.spade_generator import SPADEDecoder
from ..modules.warping_network import WarpingNetwork
from ..modules.motion_extractor import MotionExtractor
from ..modules.appearance_feature_extractor import AppearanceFeatureExtractor
from ..modules.stitching_retargeting_network import StitchingRetargetingNetwork
def suffix(filename):
"""a.jpg -> jpg"""
pos = filename.rfind(".")
if pos == -1:
return ""
return filename[pos + 1:]
def prefix(filename):
"""a.jpg -> a"""
pos = filename.rfind(".")
if pos == -1:
return filename
return filename[:pos]
def basename(filename):
"""a/b/c.jpg -> c"""
return prefix(osp.basename(filename))
def remove_suffix(filepath):
"""a/b/c.jpg -> a/b/c"""
return osp.join(osp.dirname(filepath), basename(filepath))
def is_image(file_path):
image_extensions = ('.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff')
return file_path.lower().endswith(image_extensions)
def is_video(file_path):
if file_path.lower().endswith((".mp4", ".mov", ".avi", ".webm")) or osp.isdir(file_path):
return True
return False
def is_template(file_path):
if file_path.endswith(".pkl"):
return True
return False
def mkdir(d, log=False):
# return self-assined `d`, for one line code
if not osp.exists(d):
os.makedirs(d, exist_ok=True)
if log:
print(f"Make dir: {d}")
return d
def squeeze_tensor_to_numpy(tensor):
out = tensor.data.squeeze(0).cpu().numpy()
return out
def dct2device(dct: dict, device):
for key in dct:
dct[key] = torch.tensor(dct[key]).to(device)
return dct
def concat_feat(kp_source: torch.Tensor, kp_driving: torch.Tensor) -> torch.Tensor:
"""
kp_source: (bs, k, 3)
kp_driving: (bs, k, 3)
Return: (bs, 2k*3)
"""
bs_src = kp_source.shape[0]
bs_dri = kp_driving.shape[0]
assert bs_src == bs_dri, 'batch size must be equal'
feat = torch.cat([kp_source.view(bs_src, -1), kp_driving.view(bs_dri, -1)], dim=1)
return feat
def remove_ddp_dumplicate_key(state_dict):
state_dict_new = OrderedDict()
for key in state_dict.keys():
state_dict_new[key.replace('module.', '')] = state_dict[key]
return state_dict_new
def load_model(ckpt_path, model_config, device, model_type):
model_params = model_config['model_params'][f'{model_type}_params']
if model_type == 'appearance_feature_extractor':
model = AppearanceFeatureExtractor(**model_params).to(device)
elif model_type == 'motion_extractor':
model = MotionExtractor(**model_params).to(device)
elif model_type == 'warping_module':
model = WarpingNetwork(**model_params).to(device)
elif model_type == 'spade_generator':
model = SPADEDecoder(**model_params).to(device)
elif model_type == 'stitching_retargeting_module':
# Special handling for stitching and retargeting module
config = model_config['model_params']['stitching_retargeting_module_params']
checkpoint = torch.load(ckpt_path, map_location=lambda storage, loc: storage)
stitcher = StitchingRetargetingNetwork(**config.get('stitching'))
stitcher.load_state_dict(remove_ddp_dumplicate_key(checkpoint['retarget_shoulder']))
stitcher = stitcher.to(device)
stitcher.eval()
retargetor_lip = StitchingRetargetingNetwork(**config.get('lip'))
retargetor_lip.load_state_dict(remove_ddp_dumplicate_key(checkpoint['retarget_mouth']))
retargetor_lip = retargetor_lip.to(device)
retargetor_lip.eval()
retargetor_eye = StitchingRetargetingNetwork(**config.get('eye'))
retargetor_eye.load_state_dict(remove_ddp_dumplicate_key(checkpoint['retarget_eye']))
retargetor_eye = retargetor_eye.to(device)
retargetor_eye.eval()
return {
'stitching': stitcher,
'lip': retargetor_lip,
'eye': retargetor_eye
}
else:
raise ValueError(f"Unknown model type: {model_type}")
model.load_state_dict(torch.load(ckpt_path, map_location=lambda storage, loc: storage))
model.eval()
return model
def load_description(fp):
with open(fp, 'r', encoding='utf-8') as f:
content = f.read()
return content
def is_square_video(video_path):
video = cv2.VideoCapture(video_path)
width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
video.release()
# if width != height:
# gr.Info(f"Uploaded video is not square, force do crop (driving) to be True")
return width == height
\ No newline at end of file
# coding: utf-8
import os.path as osp
import imageio
import numpy as np
import pickle
import cv2; cv2.setNumThreads(0); cv2.ocl.setUseOpenCL(False)
from .helper import mkdir, suffix
def load_image_rgb(image_path: str):
if not osp.exists(image_path):
raise FileNotFoundError(f"Image not found: {image_path}")
img = cv2.imread(image_path, cv2.IMREAD_COLOR)
return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
def load_video(video_info, n_frames=-1):
reader = imageio.get_reader(video_info, "ffmpeg")
ret = []
for idx, frame_rgb in enumerate(reader):
if n_frames > 0 and idx >= n_frames:
break
ret.append(frame_rgb)
reader.close()
return ret
def contiguous(obj):
if not obj.flags.c_contiguous:
obj = obj.copy(order="C")
return obj
def resize_to_limit(img: np.ndarray, max_dim=1920, division=2):
"""
ajust the size of the image so that the maximum dimension does not exceed max_dim, and the width and the height of the image are multiples of n.
:param img: the image to be processed.
:param max_dim: the maximum dimension constraint.
:param n: the number that needs to be multiples of.
:return: the adjusted image.
"""
h, w = img.shape[:2]
# ajust the size of the image according to the maximum dimension
if max_dim > 0 and max(h, w) > max_dim:
if h > w:
new_h = max_dim
new_w = int(w * (max_dim / h))
else:
new_w = max_dim
new_h = int(h * (max_dim / w))
img = cv2.resize(img, (new_w, new_h))
# ensure that the image dimensions are multiples of n
division = max(division, 1)
new_h = img.shape[0] - (img.shape[0] % division)
new_w = img.shape[1] - (img.shape[1] % division)
if new_h == 0 or new_w == 0:
# when the width or height is less than n, no need to process
return img
if new_h != img.shape[0] or new_w != img.shape[1]:
img = img[:new_h, :new_w]
return img
def load_img_online(obj, mode="bgr", **kwargs):
max_dim = kwargs.get("max_dim", 1920)
n = kwargs.get("n", 2)
if isinstance(obj, str):
if mode.lower() == "gray":
img = cv2.imread(obj, cv2.IMREAD_GRAYSCALE)
else:
img = cv2.imread(obj, cv2.IMREAD_COLOR)
else:
img = obj
# Resize image to satisfy constraints
img = resize_to_limit(img, max_dim=max_dim, division=n)
if mode.lower() == "bgr":
return contiguous(img)
elif mode.lower() == "rgb":
return contiguous(img[..., ::-1])
else:
raise Exception(f"Unknown mode {mode}")
def load(fp):
suffix_ = suffix(fp)
if suffix_ == "npy":
return np.load(fp)
elif suffix_ == "pkl":
return pickle.load(open(fp, "rb"))
else:
raise Exception(f"Unknown type: {suffix}")
def dump(wfp, obj):
wd = osp.split(wfp)[0]
if wd != "" and not osp.exists(wd):
mkdir(wd)
_suffix = suffix(wfp)
if _suffix == "npy":
np.save(wfp, obj)
elif _suffix == "pkl":
pickle.dump(obj, open(wfp, "wb"))
else:
raise Exception("Unknown type: {}".format(_suffix))
\ No newline at end of file
# coding: utf-8
import os.path as osp
import cv2; cv2.setNumThreads(0); cv2.ocl.setUseOpenCL(False)
import torch
import numpy as np
import onnxruntime
from .timer import Timer
from .rprint import rlog
from .crop import crop_image, _transform_pts
def make_abs_path(fn):
return osp.join(osp.dirname(osp.realpath(__file__)), fn)
def to_ndarray(obj):
if isinstance(obj, torch.Tensor):
return obj.cpu().numpy()
elif isinstance(obj, np.ndarray):
return obj
else:
return np.array(obj)
class LandmarkRunner(object):
"""landmark runner"""
def __init__(self, **kwargs):
ckpt_path = kwargs.get('ckpt_path')
onnx_provider = kwargs.get('onnx_provider', 'cuda') # 默认用cuda
device_id = kwargs.get('device_id', 0)
self.dsize = kwargs.get('dsize', 224)
self.timer = Timer()
if onnx_provider.lower() == 'cuda':
self.session = onnxruntime.InferenceSession(
ckpt_path, providers=[
('CUDAExecutionProvider', {'device_id': device_id})
]
)
elif onnx_provider.lower() == 'mps':
self.session = onnxruntime.InferenceSession(
ckpt_path, providers=[
'CoreMLExecutionProvider'
]
)
else:
opts = onnxruntime.SessionOptions()
opts.intra_op_num_threads = 4 # 默认线程数为 4
self.session = onnxruntime.InferenceSession(
ckpt_path, providers=['CPUExecutionProvider'],
sess_options=opts
)
def _run(self, inp):
out = self.session.run(None, {'input': inp})
return out
def run(self, img_rgb: np.ndarray, lmk=None):
if lmk is not None:
crop_dct = crop_image(img_rgb, lmk, dsize=self.dsize, scale=1.5, vy_ratio=-0.1)
img_crop_rgb = crop_dct['img_crop']
else:
# NOTE: force resize to 224x224, NOT RECOMMEND!
img_crop_rgb = cv2.resize(img_rgb, (self.dsize, self.dsize))
scale = max(img_rgb.shape[:2]) / self.dsize
crop_dct = {
'M_c2o': np.array([
[scale, 0., 0.],
[0., scale, 0.],
[0., 0., 1.],
], dtype=np.float32),
}
inp = (img_crop_rgb.astype(np.float32) / 255.).transpose(2, 0, 1)[None, ...] # HxWx3 (BGR) -> 1x3xHxW (RGB!)
out_lst = self._run(inp)
out_pts = out_lst[2]
# 2d landmarks 203 points
lmk = to_ndarray(out_pts[0]).reshape(-1, 2) * self.dsize # scale to 0-224
lmk = _transform_pts(lmk, M=crop_dct['M_c2o'])
return lmk
def warmup(self):
self.timer.tic()
dummy_image = np.zeros((1, 3, self.dsize, self.dsize), dtype=np.float32)
_ = self._run(dummy_image)
elapse = self.timer.toc()
rlog(f'LandmarkRunner warmup time: {elapse:.3f}s')
"""
Functions to compute distance ratios between specific pairs of facial landmarks
"""
import numpy as np
def calculate_distance_ratio(lmk: np.ndarray, idx1: int, idx2: int, idx3: int, idx4: int, eps: float = 1e-6) -> np.ndarray:
return (np.linalg.norm(lmk[:, idx1] - lmk[:, idx2], axis=1, keepdims=True) /
(np.linalg.norm(lmk[:, idx3] - lmk[:, idx4], axis=1, keepdims=True) + eps))
def calc_eye_close_ratio(lmk: np.ndarray, target_eye_ratio: np.ndarray = None) -> np.ndarray:
lefteye_close_ratio = calculate_distance_ratio(lmk, 6, 18, 0, 12)
righteye_close_ratio = calculate_distance_ratio(lmk, 30, 42, 24, 36)
if target_eye_ratio is not None:
return np.concatenate([lefteye_close_ratio, righteye_close_ratio, target_eye_ratio], axis=1)
else:
return np.concatenate([lefteye_close_ratio, righteye_close_ratio], axis=1)
def calc_lip_close_ratio(lmk: np.ndarray) -> np.ndarray:
return calculate_distance_ratio(lmk, 90, 102, 48, 66)
# coding: utf-8
"""
custom print and log functions
"""
__all__ = ['rprint', 'rlog']
try:
from rich.console import Console
console = Console()
rprint = console.print
rlog = console.log
except:
rprint = print
rlog = print
# coding: utf-8
"""
tools to measure elapsed time
"""
import time
class Timer(object):
"""A simple timer."""
def __init__(self):
self.total_time = 0.
self.calls = 0
self.start_time = 0.
self.diff = 0.
def tic(self):
# using time.time instead of time.clock because time time.clock
# does not normalize for multithreading
self.start_time = time.time()
def toc(self, average=True):
self.diff = time.time() - self.start_time
return self.diff
def clear(self):
self.start_time = 0.
self.diff = 0.
# coding: utf-8
"""
Functions for processing video
ATTENTION: you need to install ffmpeg and ffprobe in your env!
"""
import os.path as osp
import numpy as np
import subprocess
import imageio
import cv2
from rich.progress import track
from .rprint import rlog as log
from .rprint import rprint as print
from .helper import prefix
def exec_cmd(cmd):
return subprocess.run(cmd, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
def images2video(images, wfp, **kwargs):
fps = kwargs.get('fps', 30)
video_format = kwargs.get('format', 'mp4') # default is mp4 format
codec = kwargs.get('codec', 'libx264') # default is libx264 encoding
quality = kwargs.get('quality') # video quality
pixelformat = kwargs.get('pixelformat', 'yuv420p') # video pixel format
image_mode = kwargs.get('image_mode', 'rgb')
macro_block_size = kwargs.get('macro_block_size', 2)
ffmpeg_params = ['-crf', str(kwargs.get('crf', 18))]
writer = imageio.get_writer(
wfp, fps=fps, format=video_format,
codec=codec, quality=quality, ffmpeg_params=ffmpeg_params, pixelformat=pixelformat, macro_block_size=macro_block_size
)
n = len(images)
for i in track(range(n), description='Writing', transient=True):
if image_mode.lower() == 'bgr':
writer.append_data(images[i][..., ::-1])
else:
writer.append_data(images[i])
writer.close()
def video2gif(video_fp, fps=30, size=256):
if osp.exists(video_fp):
d = osp.split(video_fp)[0]
fn = prefix(osp.basename(video_fp))
palette_wfp = osp.join(d, 'palette.png')
gif_wfp = osp.join(d, f'{fn}.gif')
# generate the palette
cmd = f'ffmpeg -i "{video_fp}" -vf "fps={fps},scale={size}:-1:flags=lanczos,palettegen" "{palette_wfp}" -y'
exec_cmd(cmd)
# use the palette to generate the gif
cmd = f'ffmpeg -i "{video_fp}" -i "{palette_wfp}" -filter_complex "fps={fps},scale={size}:-1:flags=lanczos[x];[x][1:v]paletteuse" "{gif_wfp}" -y'
exec_cmd(cmd)
else:
print(f'video_fp: {video_fp} not exists!')
def merge_audio_video(video_fp, audio_fp, wfp):
if osp.exists(video_fp) and osp.exists(audio_fp):
cmd = f'ffmpeg -i "{video_fp}" -i "{audio_fp}" -c:v copy -c:a aac "{wfp}" -y'
exec_cmd(cmd)
print(f'merge {video_fp} and {audio_fp} to {wfp}')
else:
print(f'video_fp: {video_fp} or audio_fp: {audio_fp} not exists!')
def blend(img: np.ndarray, mask: np.ndarray, background_color=(255, 255, 255)):
mask_float = mask.astype(np.float32) / 255.
background_color = np.array(background_color).reshape([1, 1, 3])
bg = np.ones_like(img) * background_color
img = np.clip(mask_float * img + (1 - mask_float) * bg, 0, 255).astype(np.uint8)
return img
def concat_frames(driving_image_lst, source_image_lst, I_p_lst):
# TODO: add more concat style, e.g., left-down corner driving
out_lst = []
h, w, _ = I_p_lst[0].shape
source_image_resized_lst = [cv2.resize(img, (w, h)) for img in source_image_lst]
for idx, _ in track(enumerate(I_p_lst), total=len(I_p_lst), description='Concatenating result...'):
I_p = I_p_lst[idx]
source_image_resized = source_image_resized_lst[idx] if len(source_image_lst) > 1 else source_image_resized_lst[0]
if driving_image_lst is None:
out = np.hstack((source_image_resized, I_p))
else:
driving_image = driving_image_lst[idx]
driving_image_resized = cv2.resize(driving_image, (w, h))
out = np.hstack((driving_image_resized, source_image_resized, I_p))
out_lst.append(out)
return out_lst
class VideoWriter:
def __init__(self, **kwargs):
self.fps = kwargs.get('fps', 30)
self.wfp = kwargs.get('wfp', 'video.mp4')
self.video_format = kwargs.get('format', 'mp4')
self.codec = kwargs.get('codec', 'libx264')
self.quality = kwargs.get('quality')
self.pixelformat = kwargs.get('pixelformat', 'yuv420p')
self.image_mode = kwargs.get('image_mode', 'rgb')
self.ffmpeg_params = kwargs.get('ffmpeg_params')
self.writer = imageio.get_writer(
self.wfp, fps=self.fps, format=self.video_format,
codec=self.codec, quality=self.quality,
ffmpeg_params=self.ffmpeg_params, pixelformat=self.pixelformat
)
def write(self, image):
if self.image_mode.lower() == 'bgr':
self.writer.append_data(image[..., ::-1])
else:
self.writer.append_data(image)
def close(self):
if self.writer is not None:
self.writer.close()
def change_video_fps(input_file, output_file, fps=20, codec='libx264', crf=12):
cmd = f'ffmpeg -i "{input_file}" -c:v {codec} -crf {crf} -r {fps} "{output_file}" -y'
exec_cmd(cmd)
def get_fps(filepath, default_fps=25):
try:
fps = cv2.VideoCapture(filepath).get(cv2.CAP_PROP_FPS)
if fps in (0, None):
fps = default_fps
except Exception as e:
log(e)
fps = default_fps
return fps
def has_audio_stream(video_path: str) -> bool:
"""
Check if the video file contains an audio stream.
:param video_path: Path to the video file
:return: True if the video contains an audio stream, False otherwise
"""
if osp.isdir(video_path):
return False
cmd = [
'ffprobe',
'-v', 'error',
'-select_streams', 'a',
'-show_entries', 'stream=codec_type',
'-of', 'default=noprint_wrappers=1:nokey=1',
f'"{video_path}"'
]
try:
# result = subprocess.run(cmd, capture_output=True, text=True)
result = exec_cmd(' '.join(cmd))
if result.returncode != 0:
log(f"Error occurred while probing video: {result.stderr}")
return False
# Check if there is any output from ffprobe command
return bool(result.stdout.strip())
except Exception as e:
log(
f"Error occurred while probing video: {video_path}, "
"you may need to install ffprobe! (https://ffmpeg.org/download.html) "
"Now set audio to false!",
style="bold red"
)
return False
def add_audio_to_video(silent_video_path: str, audio_video_path: str, output_video_path: str):
cmd = [
'ffmpeg',
'-y',
'-i', f'"{silent_video_path}"',
'-i', f'"{audio_video_path}"',
'-map', '0:v',
'-map', '1:a',
'-c:v', 'copy',
'-shortest',
f'"{output_video_path}"'
]
try:
exec_cmd(' '.join(cmd))
log(f"Video with audio generated successfully: {output_video_path}")
except subprocess.CalledProcessError as e:
log(f"Error occurred: {e}")
def bb_intersection_over_union(boxA, boxB):
xA = max(boxA[0], boxB[0])
yA = max(boxA[1], boxB[1])
xB = min(boxA[2], boxB[2])
yB = min(boxA[3], boxB[3])
interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)
iou = interArea / float(boxAArea + boxBArea - interArea)
return iou
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment