Commit 452069e3 authored by chenzk's avatar chenzk
Browse files

v1.0

parents
Pipeline #693 failed with stages
in 0 seconds
absl-py==1.4.0
accelerate==0.22.0
aiofiles==23.2.1
aiohttp==3.8.5
aiosignal==1.3.1
altair==5.0.1
annotated-types==0.5.0
antlr4-python3-runtime==4.9.3
anyio==3.7.1
async-timeout==4.0.3
attrs==23.1.0
cachetools==5.3.1
certifi==2023.7.22
charset-normalizer==3.2.0
click==8.1.7
cmake==3.27.2
contourpy==1.1.0
cycler==0.11.0
datasets==2.14.4
dill==0.3.7
einops==0.6.1
exceptiongroup==1.1.3
fastapi==0.103.0
ffmpy==0.3.1
filelock==3.12.2
fonttools==4.42.1
frozenlist==1.4.0
fsspec==2023.6.0
google-auth==2.22.0
google-auth-oauthlib==1.0.0
gradio==3.41.2
gradio-client==0.5.0
grpcio==1.57.0
h11==0.14.0
httpcore==0.17.3
httpx==0.24.1
huggingface-hub==0.16.4
idna==3.4
importlib-metadata==6.8.0
importlib-resources==6.0.1
jinja2==3.1.2
joblib==1.3.2
jsonschema==4.19.0
jsonschema-specifications==2023.7.1
kiwisolver==1.4.5
lightning-utilities==0.9.0
lit==16.0.6
markdown==3.4.4
markupsafe==2.1.3
matplotlib==3.7.2
mpmath==1.3.0
multidict==6.0.4
multiprocess==0.70.15
networkx==3.1
numpy==1.24.4
nvidia-cublas-cu11==11.10.3.66
nvidia-cuda-cupti-cu11==11.7.101
nvidia-cuda-nvrtc-cu11==11.7.99
nvidia-cuda-runtime-cu11==11.7.99
nvidia-cudnn-cu11==8.5.0.96
nvidia-cufft-cu11==10.9.0.58
nvidia-curand-cu11==10.2.10.91
nvidia-cusolver-cu11==11.4.0.1
nvidia-cusparse-cu11==11.7.4.91
nvidia-nccl-cu11==2.14.3
nvidia-nvtx-cu11==11.7.91
oauthlib==3.2.2
omegaconf==2.3.0
opencv-python==4.8.0.76
orjson==3.9.5
pandas==2.0.3
pillow==9.5.0
pkgutil-resolve-name==1.3.10
protobuf==4.24.2
psutil==5.9.5
pyarrow==13.0.0
pyasn1==0.5.0
pyasn1-modules==0.3.0
pydantic==2.3.0
pydantic-core==2.6.3
pydub==0.25.1
pyparsing==3.0.9
python-multipart==0.0.6
pytorch-lightning==2.0.7
pytz==2023.3
pyyaml==6.0.1
referencing==0.30.2
regex==2023.8.8
requests==2.31.0
requests-oauthlib==1.3.1
rpds-py==0.9.2
rsa==4.9
safetensors==0.3.3
semantic-version==2.10.0
sniffio==1.3.0
starlette==0.27.0
sympy==1.12
tensorboard==2.14.0
tensorboard-data-server==0.7.1
tokenizers==0.13.3
toolz==0.12.0
torchmetrics==1.1.0
tqdm==4.66.1
transformers==4.32.0
triton==2.0.0
tzdata==2023.3
urllib3==1.26.16
uvicorn==0.23.2
websockets==11.0.3
werkzeug==2.3.7
xxhash==3.3.0
yarl==1.9.2
zipp==3.16.2
decord
imageio==2.9.0
imageio-ffmpeg==0.4.3
timm
scipy
scikit-image
av
imgaug
lpips
ffmpeg-python
torch==2.0.1
torchvision==0.15.2
xformers==0.0.22
diffusers==0.21.4
python-slugify
git+https://github.com/facebookresearch/detectron2@main#subdirectory=projects/DensePose
git+https://github.com/facebookresearch/detectron2.git
# Copyright 2023 ByteDance and/or its affiliates.
#
# Copyright (2023) MagicAnimate Authors
#
# ByteDance, its affiliates and licensors retain all intellectual
# property and proprietary rights in and to this material, related
# documentation and any modifications thereto. Any use, reproduction,
# disclosure or distribution of this material and related documentation
# without an express license agreement from ByteDance or
# its affiliates is strictly prohibited.
import os
import socket
import warnings
import torch
from torch import distributed as dist
def distributed_init(args):
if dist.is_initialized():
warnings.warn("Distributed is already initialized, cannot initialize twice!")
args.rank = dist.get_rank()
else:
print(
f"Distributed Init (Rank {args.rank}): "
f"{args.init_method}"
)
dist.init_process_group(
backend='nccl',
init_method=args.init_method,
world_size=args.world_size,
rank=args.rank,
)
print(
f"Initialized Host {socket.gethostname()} as Rank "
f"{args.rank}"
)
if "MASTER_ADDR" not in os.environ or "MASTER_PORT" not in os.environ:
# Set for onboxdataloader support
split = args.init_method.split("//")
assert len(split) == 2, (
"host url for distributed should be split by '//' "
+ "into exactly two elements"
)
split = split[1].split(":")
assert (
len(split) == 2
), "host url should be of the form <host_url>:<host_port>"
os.environ["MASTER_ADDR"] = split[0]
os.environ["MASTER_PORT"] = split[1]
# perform a dummy all-reduce to initialize the NCCL communicator
dist.all_reduce(torch.zeros(1).cuda())
suppress_output(is_master())
args.rank = dist.get_rank()
return args.rank
def get_rank():
if not dist.is_available():
return 0
if not dist.is_nccl_available():
return 0
if not dist.is_initialized():
return 0
return dist.get_rank()
def is_master():
return get_rank() == 0
def synchronize():
if dist.is_initialized():
dist.barrier()
def suppress_output(is_master):
"""Suppress printing on the current device. Force printing with `force=True`."""
import builtins as __builtin__
builtin_print = __builtin__.print
def print(*args, **kwargs):
force = kwargs.pop("force", False)
if is_master or force:
builtin_print(*args, **kwargs)
__builtin__.print = print
import warnings
builtin_warn = warnings.warn
def warn(*args, **kwargs):
force = kwargs.pop("force", False)
if is_master or force:
builtin_warn(*args, **kwargs)
# Log warnings only once
warnings.warn = warn
warnings.simplefilter("once", UserWarning)
\ No newline at end of file
# *************************************************************************
# This file may have been modified by Bytedance Inc. (“Bytedance Inc.'s Mo-
# difications”). All Bytedance Inc.'s Modifications are Copyright (2023) B-
# ytedance Inc..
# *************************************************************************
# Adapted from https://github.com/guoyww/AnimateDiff
import os
import imageio
import numpy as np
import torch
import torchvision
from PIL import Image
from typing import Union
from tqdm import tqdm
from einops import rearrange
def save_videos_grid(videos: torch.Tensor, path: str, rescale=False, n_rows=6, fps=25):
videos = rearrange(videos, "b c t h w -> t b c h w")
outputs = []
for x in videos:
x = torchvision.utils.make_grid(x, nrow=n_rows)
x = x.transpose(0, 1).transpose(1, 2).squeeze(-1)
if rescale:
x = (x + 1.0) / 2.0 # -1,1 -> 0,1
x = (x * 255).numpy().astype(np.uint8)
outputs.append(x)
os.makedirs(os.path.dirname(path), exist_ok=True)
imageio.mimsave(path, outputs, fps=fps)
def save_images_grid(images: torch.Tensor, path: str):
assert images.shape[2] == 1 # no time dimension
images = images.squeeze(2)
grid = torchvision.utils.make_grid(images)
grid = (grid * 255).numpy().transpose(1, 2, 0).astype(np.uint8)
os.makedirs(os.path.dirname(path), exist_ok=True)
Image.fromarray(grid).save(path)
# DDIM Inversion
@torch.no_grad()
def init_prompt(prompt, pipeline):
uncond_input = pipeline.tokenizer(
[""], padding="max_length", max_length=pipeline.tokenizer.model_max_length,
return_tensors="pt"
)
uncond_embeddings = pipeline.text_encoder(uncond_input.input_ids.to(pipeline.device))[0]
text_input = pipeline.tokenizer(
[prompt],
padding="max_length",
max_length=pipeline.tokenizer.model_max_length,
truncation=True,
return_tensors="pt",
)
text_embeddings = pipeline.text_encoder(text_input.input_ids.to(pipeline.device))[0]
context = torch.cat([uncond_embeddings, text_embeddings])
return context
def next_step(model_output: Union[torch.FloatTensor, np.ndarray], timestep: int,
sample: Union[torch.FloatTensor, np.ndarray], ddim_scheduler):
timestep, next_timestep = min(
timestep - ddim_scheduler.config.num_train_timesteps // ddim_scheduler.num_inference_steps, 999), timestep
alpha_prod_t = ddim_scheduler.alphas_cumprod[timestep] if timestep >= 0 else ddim_scheduler.final_alpha_cumprod
alpha_prod_t_next = ddim_scheduler.alphas_cumprod[next_timestep]
beta_prod_t = 1 - alpha_prod_t
next_original_sample = (sample - beta_prod_t ** 0.5 * model_output) / alpha_prod_t ** 0.5
next_sample_direction = (1 - alpha_prod_t_next) ** 0.5 * model_output
next_sample = alpha_prod_t_next ** 0.5 * next_original_sample + next_sample_direction
return next_sample
def get_noise_pred_single(latents, t, context, unet):
noise_pred = unet(latents, t, encoder_hidden_states=context)["sample"]
return noise_pred
@torch.no_grad()
def ddim_loop(pipeline, ddim_scheduler, latent, num_inv_steps, prompt):
context = init_prompt(prompt, pipeline)
uncond_embeddings, cond_embeddings = context.chunk(2)
all_latent = [latent]
latent = latent.clone().detach()
for i in tqdm(range(num_inv_steps)):
t = ddim_scheduler.timesteps[len(ddim_scheduler.timesteps) - i - 1]
noise_pred = get_noise_pred_single(latent, t, cond_embeddings, pipeline.unet)
latent = next_step(noise_pred, t, latent, ddim_scheduler)
all_latent.append(latent)
return all_latent
@torch.no_grad()
def ddim_inversion(pipeline, ddim_scheduler, video_latent, num_inv_steps, prompt=""):
ddim_latents = ddim_loop(pipeline, ddim_scheduler, video_latent, num_inv_steps, prompt)
return ddim_latents
def video2images(path, step=4, length=16, start=0):
reader = imageio.get_reader(path)
frames = []
for frame in reader:
frames.append(np.array(frame))
frames = frames[start::step][:length]
return frames
def images2video(video, path, fps=8):
imageio.mimsave(path, video, fps=fps)
return
tensor_interpolation = None
def get_tensor_interpolation_method():
return tensor_interpolation
def set_tensor_interpolation_method(is_slerp):
global tensor_interpolation
tensor_interpolation = slerp if is_slerp else linear
def linear(v1, v2, t):
return (1.0 - t) * v1 + t * v2
def slerp(
v0: torch.Tensor, v1: torch.Tensor, t: float, DOT_THRESHOLD: float = 0.9995
) -> torch.Tensor:
u0 = v0 / v0.norm()
u1 = v1 / v1.norm()
dot = (u0 * u1).sum()
if dot.abs() > DOT_THRESHOLD:
#logger.info(f'warning: v0 and v1 close to parallel, using linear interpolation instead.')
return (1.0 - t) * v0 + t * v1
omega = dot.acos()
return (((1.0 - t) * omega).sin() * v0 + (t * omega).sin() * v1) / omega.sin()
\ No newline at end of file
# *************************************************************************
# This file may have been modified by Bytedance Inc. (“Bytedance Inc.'s Mo-
# difications”). All Bytedance Inc.'s Modifications are Copyright (2023) B-
# ytedance Inc..
# *************************************************************************
# Copyright 2022 ByteDance and/or its affiliates.
#
# Copyright (2022) PV3D Authors
#
# ByteDance, its affiliates and licensors retain all intellectual
# property and proprietary rights in and to this material, related
# documentation and any modifications thereto. Any use, reproduction,
# disclosure or distribution of this material and related documentation
# without an express license agreement from ByteDance or
# its affiliates is strictly prohibited.
import av, gc
import torch
import warnings
import numpy as np
_CALLED_TIMES = 0
_GC_COLLECTION_INTERVAL = 20
# remove warnings
av.logging.set_level(av.logging.ERROR)
class VideoReader():
"""
Simple wrapper around PyAV that exposes a few useful functions for
dealing with video reading. PyAV is a pythonic binding for the ffmpeg libraries.
Acknowledgement: Codes are borrowed from Bruno Korbar
"""
def __init__(self, video, num_frames=float("inf"), decode_lossy=False, audio_resample_rate=None, bi_frame=False):
"""
Arguments:
video_path (str): path or byte of the video to be loaded
"""
self.container = av.open(video)
self.num_frames = num_frames
self.bi_frame = bi_frame
self.resampler = None
if audio_resample_rate is not None:
self.resampler = av.AudioResampler(rate=audio_resample_rate)
if self.container.streams.video:
# enable multi-threaded video decoding
if decode_lossy:
warnings.warn('VideoReader| thread_type==AUTO can yield potential frame dropping!', RuntimeWarning)
self.container.streams.video[0].thread_type = 'AUTO'
self.video_stream = self.container.streams.video[0]
else:
self.video_stream = None
self.fps = self._get_video_frame_rate()
def seek(self, pts, backward=True, any_frame=False):
stream = self.video_stream
self.container.seek(pts, any_frame=any_frame, backward=backward, stream=stream)
def _occasional_gc(self):
# there are a lot of reference cycles in PyAV, so need to manually call
# the garbage collector from time to time
global _CALLED_TIMES, _GC_COLLECTION_INTERVAL
_CALLED_TIMES += 1
if _CALLED_TIMES % _GC_COLLECTION_INTERVAL == _GC_COLLECTION_INTERVAL - 1:
gc.collect()
def _read_video(self, offset):
self._occasional_gc()
pts = self.container.duration * offset
time_ = pts / float(av.time_base)
self.container.seek(int(pts))
video_frames = []
count = 0
for _, frame in enumerate(self._iter_frames()):
if frame.pts * frame.time_base >= time_:
video_frames.append(frame)
if count >= self.num_frames - 1:
break
count += 1
return video_frames
def _iter_frames(self):
for packet in self.container.demux(self.video_stream):
for frame in packet.decode():
yield frame
def _compute_video_stats(self):
if self.video_stream is None or self.container is None:
return 0
num_of_frames = self.container.streams.video[0].frames
if num_of_frames == 0:
num_of_frames = self.fps * float(self.container.streams.video[0].duration*self.video_stream.time_base)
self.seek(0, backward=False)
count = 0
time_base = 512
for p in self.container.decode(video=0):
count = count + 1
if count == 1:
start_pts = p.pts
elif count == 2:
time_base = p.pts - start_pts
break
return start_pts, time_base, num_of_frames
def _get_video_frame_rate(self):
return float(self.container.streams.video[0].guessed_rate)
def sample(self, debug=False):
if self.container is None:
raise RuntimeError('video stream not found')
sample = dict()
_, _, total_num_frames = self._compute_video_stats()
offset = torch.randint(max(1, total_num_frames-self.num_frames-1), [1]).item()
video_frames = self._read_video(offset/total_num_frames)
video_frames = np.array([np.uint8(f.to_rgb().to_ndarray()) for f in video_frames])
sample["frames"] = video_frames
sample["frame_idx"] = [offset]
if self.bi_frame:
frames = [np.random.beta(2, 1, size=1), np.random.beta(1, 2, size=1)]
frames = [int(frames[0] * self.num_frames), int(frames[1] * self.num_frames)]
frames.sort()
video_frames = np.array([video_frames[min(frames)], video_frames[max(frames)]])
Ts= [min(frames) / (self.num_frames - 1), max(frames) / (self.num_frames - 1)]
sample["frames"] = video_frames
sample["real_t"] = torch.tensor(Ts, dtype=torch.float32)
sample["frame_idx"] = [offset+min(frames), offset+max(frames)]
return sample
return sample
def read_frames(self, frame_indices):
self.num_frames = frame_indices[1] - frame_indices[0]
video_frames = self._read_video(frame_indices[0]/self.get_num_frames())
video_frames = np.array([
np.uint8(video_frames[0].to_rgb().to_ndarray()),
np.uint8(video_frames[-1].to_rgb().to_ndarray())
])
return video_frames
def read(self):
video_frames = self._read_video(0)
video_frames = np.array([np.uint8(f.to_rgb().to_ndarray()) for f in video_frames])
return video_frames
def get_num_frames(self):
_, _, total_num_frames = self._compute_video_stats()
return total_num_frames
\ No newline at end of file
# Magic Animate Installation And Usage Tutorial
## Introduction
Web UI of Magic Animate Tab:
![image](https://github.com/modelscope/facechain/tree/main/facechain_animate/resources/MagicAnimate/magicanimate_snapshot.jpg)
The function of this tab is based on [MagicAnimate](https://showlab.github.io/magicanimate/).
## 1. Install the newest modelscope
Make sure that the modelscope version you have installed is greater than 1.9.1, otherwise an error will be threw, please upgrade as follows:
```
pip install -U modelscope
```
Or install via source code:
```
pip uninstall modelscope -y
GIT_LFS_SKIP_SMUDGE=1 git clone https://github.com/modelscope/modelscope.git
cd modelscope
pip install -r requirements.txt
pip install .
```
## 2. Install Dependencies
You will need to install the following additional python dependencies. First go to the base folder of this project first: `cd /path/to/facechain/`, and then:
```
pip install -r facechain_animate/magicanimate/requirements.txt
pip install -r facechain_animate/magicanimate/requirements_additional.txt
```
## Usage
1. You should go to the base folder of this project first: `cd /path/to/facechain/`, and then run the command `python -m facechain_animate.app`.
2. You can upload a photo from your local computer or select one from previously generated images as the source image.
3. You can upload a motion sequence video from your local computer, which should be in mp4 format. Or you can upload an original video and generate motion sequence video of it.
4. In the right pane, set parameters.
5. Click the generate button and wait for the creation. The first use will download the model, please be patient. Subsequent generation usually takes about 5 minutes (based on the V100 graphics card).
6. Alternatively, you can run the command `python -m facechain_animate.magicanimate.pipelines.animation --config facechain_animate/magicanimate/configs/prompts/animation.yaml` directly in the command line. You can use `--videos_dir` and `--images_dir` to choose your motion sequence directory and source image directory for inference.
## Additional Information
1. Based on current test results, when users upload their own motion sequences, the generated videos are not particularly ideal (indicating insufficient generalization capability). The consistency is better when using motion sequences provided by the template.
2. Nevertheless, the consistency of the facial features and hand features still needs improvement. This will be an aspect that the project aims to enhance in the future, based on MagicAnimate.
\ No newline at end of file
# 人物动作视频生成模块安装使用教程
## 简介
人物动作视频生成标签页可视化界面:
![image](https://github.com/modelscope/facechain/tree/main/facechain_animate/resources/MagicAnimate/magicanimate_snapshot.jpg)
该标签页的功能主要是基于[MagicAnimate](https://showlab.github.io/magicanimate/)实现人物动作视频生成。
## 1.安装新版的modelscope
请确保您安装的modelscope版本大于1.9.1,否则会报错,请按照下面方式升级:
```
pip install -U modelscope
```
或者通过源码安装:
```
pip uninstall modelscope -y
GIT_LFS_SKIP_SMUDGE=1 git clone https://github.com/modelscope/modelscope.git
cd modelscope
pip install -r requirements.txt
pip install .
```
## 2. 安装相关依赖
首先您需要进入到facechain的根文件夹:`cd /path/to/facechain/`,然后额外安装下列依赖:
```
pip install -r facechain_animate/magicanimate/requirements.txt
pip install -r facechain_animate/magicanimate/requirements_additional.txt
```
## 使用教程
1. 首先您需要进入到facechain的根文件夹:`cd /path/to/facechain/`,然后运行命令 `python -m facechain_animate.app`
2. 您可以从本地电脑上传一张图片或者从之前生成的图片中选择一张作为源图片。
3. 您可以从本地电脑上传一段动作序列视频,该视频应为mp4格式。或者根据一段原始视频生成动作序列视频。
4. 在右侧面板配置参数。
5. 点击生成按钮等待生成。第一次使用会下载模型,请耐心等待。后续生成过程一般需要5分钟左右(以v100显卡为例)。
6. 或者,您也可以用命令行直接运行 `python -m facechain_animate.magicanimate.pipelines.animation --config facechain_animate/magicanimate/configs/prompts/animation.yaml`。您可以通过指定`--videos_dir``--images_dir`两个可选参数指定推理阶段使用的姿态动作序列视频文件夹和源图像文件夹。
## 其他
1. 目前测试结果而言,当使用用户自己上传motion sequence时,生成效果不会特别理想(即泛化能力仍有不足)。使用模版提供的motion sequence时一致性会更好。
2. 然而尽管如此,人脸一致性仍然有待提升。这也将是本项目后续会基于MagicAnimate提升的部分。
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment