Commit 30af93f2 authored by chenpangpang's avatar chenpangpang
Browse files

feat: gpu初始提交

parent 68e98ab8
Pipeline #2159 canceled with stages
import os
import numpy as np
from tqdm import tqdm
from PIL import Image
import torch
import torchvision
from torch import Tensor
from torchvision.utils import make_grid
from torchvision.transforms.functional import to_tensor
def frames_to_mp4(frame_dir, output_path, fps):
def read_first_n_frames(d: os.PathLike, num_frames: int):
if num_frames:
images = [
Image.open(os.path.join(d, f))
for f in sorted(os.listdir(d))[:num_frames]
]
else:
images = [Image.open(os.path.join(d, f)) for f in sorted(os.listdir(d))]
images = [to_tensor(x) for x in images]
return torch.stack(images)
videos = read_first_n_frames(frame_dir, num_frames=None)
videos = videos.mul(255).to(torch.uint8).permute(0, 2, 3, 1)
torchvision.io.write_video(
output_path, videos, fps=fps, video_codec="h264", options={"crf": "10"}
)
def tensor_to_mp4(video, savepath, fps, rescale=True, nrow=None):
"""
video: torch.Tensor, b,c,t,h,w, 0-1
if -1~1, enable rescale=True
"""
n = video.shape[0]
video = video.permute(2, 0, 1, 3, 4) # t,n,c,h,w
nrow = int(np.sqrt(n)) if nrow is None else nrow
frame_grids = [
torchvision.utils.make_grid(framesheet, nrow=nrow) for framesheet in video
] # [3, grid_h, grid_w]
# stack in temporal dim [T, 3, grid_h, grid_w]
grid = torch.stack(frame_grids, dim=0)
grid = torch.clamp(grid.float(), -1.0, 1.0)
if rescale:
grid = (grid + 1.0) / 2.0
# [T, 3, grid_h, grid_w] -> [T, grid_h, grid_w, 3]
grid = (grid * 255).to(torch.uint8).permute(0, 2, 3, 1)
# print(f'Save video to {savepath}')
torchvision.io.write_video(
savepath, grid, fps=fps, video_codec="h264", options={"crf": "10"}
)
def tensor2videogrids(video, root, filename, fps, rescale=True, clamp=True):
assert video.dim() == 5 # b,c,t,h,w
assert isinstance(video, torch.Tensor)
video = video.detach().cpu()
if clamp:
video = torch.clamp(video, -1.0, 1.0)
n = video.shape[0]
video = video.permute(2, 0, 1, 3, 4) # t,n,c,h,w
frame_grids = [
torchvision.utils.make_grid(framesheet, nrow=int(np.sqrt(n)))
for framesheet in video
] # [3, grid_h, grid_w]
# stack in temporal dim [T, 3, grid_h, grid_w]
grid = torch.stack(frame_grids, dim=0)
if rescale:
grid = (grid + 1.0) / 2.0
# [T, 3, grid_h, grid_w] -> [T, grid_h, grid_w, 3]
grid = (grid * 255).to(torch.uint8).permute(0, 2, 3, 1)
path = os.path.join(root, filename)
# print('Save video ...')
torchvision.io.write_video(
path, grid, fps=fps, video_codec="h264", options={"crf": "10"}
)
# print('Finish!')
def log_txt_as_img(wh, xc, size=10):
# wh a tuple of (width, height)
# xc a list of captions to plot
b = len(xc)
txts = list()
for bi in range(b):
txt = Image.new("RGB", wh, color="white")
draw = ImageDraw.Draw(txt)
font = ImageFont.truetype("data/DejaVuSans.ttf", size=size)
nc = int(40 * (wh[0] / 256))
lines = "\n".join(
xc[bi][start : start + nc] for start in range(0, len(xc[bi]), nc)
)
try:
draw.text((0, 0), lines, fill="black", font=font)
except UnicodeEncodeError:
print("Cant encode string for logging. Skipping.")
txt = np.array(txt).transpose(2, 0, 1) / 127.5 - 1.0
txts.append(txt)
txts = np.stack(txts)
txts = torch.tensor(txts)
return txts
def log_local(batch_logs, save_dir, filename, save_fps=10, rescale=True):
if batch_logs is None:
return None
""" save images and videos from images dict """
def save_img_grid(grid, path, rescale):
if rescale:
grid = (grid + 1.0) / 2.0 # -1,1 -> 0,1; c,h,w
grid = grid.transpose(0, 1).transpose(1, 2).squeeze(-1)
grid = grid.numpy()
grid = (grid * 255).astype(np.uint8)
os.makedirs(os.path.split(path)[0], exist_ok=True)
Image.fromarray(grid).save(path)
for key in batch_logs:
value = batch_logs[key]
if isinstance(value, list) and isinstance(value[0], str):
# a batch of captions
path = os.path.join(save_dir, "%s-%s.txt" % (key, filename))
with open(path, "w") as f:
for i, txt in enumerate(value):
f.write(f"idx={i}, txt={txt}\n")
f.close()
elif isinstance(value, torch.Tensor) and value.dim() == 5:
# save video grids
video = value # b,c,t,h,w
# only save grayscale or rgb mode
if video.shape[1] != 1 and video.shape[1] != 3:
continue
n = video.shape[0]
video = video.permute(2, 0, 1, 3, 4) # t,n,c,h,w
frame_grids = [
torchvision.utils.make_grid(framesheet, nrow=int(1))
for framesheet in video
] # [3, n*h, 1*w]
# stack in temporal dim [t, 3, n*h, w]
grid = torch.stack(frame_grids, dim=0)
if rescale:
grid = (grid + 1.0) / 2.0
grid = (grid * 255).to(torch.uint8).permute(0, 2, 3, 1)
path = os.path.join(save_dir, "%s-%s.mp4" % (key, filename))
torchvision.io.write_video(
path, grid, fps=save_fps, video_codec="h264", options={"crf": "10"}
)
elif isinstance(value, torch.Tensor) and value.dim() == 4:
img = value
if img.shape[1] != 1 and img.shape[1] != 3:
continue
n = img.shape[0]
grid = torchvision.utils.make_grid(img, nrow=1)
path = os.path.join(save_dir, "%s-%s.jpg" % (key, filename))
save_img_grid(grid, path, rescale)
else:
pass
def prepare_to_log(batch_logs, max_images=100000, clamp=True):
if batch_logs is None:
return None
# process
for key in batch_logs:
N = (
batch_logs[key].shape[0]
if hasattr(batch_logs[key], "shape")
else len(batch_logs[key])
)
N = min(N, max_images)
batch_logs[key] = batch_logs[key][:N]
if isinstance(batch_logs[key], torch.Tensor):
batch_logs[key] = batch_logs[key].detach().cpu()
if clamp:
try:
batch_logs[key] = torch.clamp(batch_logs[key].float(), -1.0, 1.0)
except RuntimeError:
print("clamp_scalar_cpu not implemented for Half")
return batch_logs
def fill_with_black_squares(video, desired_len: int) -> Tensor:
if len(video) >= desired_len:
return video
return torch.cat(
[
video,
torch.zeros_like(video[0])
.unsqueeze(0)
.repeat(desired_len - len(video), 1, 1, 1),
],
dim=0,
)
def load_num_videos(data_path, num_videos):
# first argument can be either data_path of np array
if isinstance(data_path, str):
videos = np.load(data_path)["arr_0"] # NTHWC
elif isinstance(data_path, np.ndarray):
videos = data_path
else:
raise Exception
if num_videos is not None:
videos = videos[:num_videos, :, :, :, :]
return videos
def npz_to_video_grid(
data_path, out_path, num_frames, fps, num_videos=None, nrow=None, verbose=True
):
if isinstance(data_path, str):
videos = load_num_videos(data_path, num_videos)
elif isinstance(data_path, np.ndarray):
videos = data_path
else:
raise Exception
n, t, h, w, c = videos.shape
videos_th = []
for i in range(n):
video = videos[i, :, :, :, :]
images = [video[j, :, :, :] for j in range(t)]
images = [to_tensor(img) for img in images]
video = torch.stack(images)
videos_th.append(video)
if verbose:
videos = [
fill_with_black_squares(v, num_frames)
for v in tqdm(videos_th, desc="Adding empty frames")
] # NTCHW
else:
videos = [fill_with_black_squares(v, num_frames) for v in videos_th] # NTCHW
frame_grids = torch.stack(videos).permute(1, 0, 2, 3, 4) # [T, N, C, H, W]
if nrow is None:
nrow = int(np.ceil(np.sqrt(n)))
if verbose:
frame_grids = [
make_grid(fs, nrow=nrow) for fs in tqdm(frame_grids, desc="Making grids")
]
else:
frame_grids = [make_grid(fs, nrow=nrow) for fs in frame_grids]
if os.path.dirname(out_path) != "":
os.makedirs(os.path.dirname(out_path), exist_ok=True)
frame_grids = (
(torch.stack(frame_grids) * 255).to(torch.uint8).permute(0, 2, 3, 1)
) # [T, H, W, C]
torchvision.io.write_video(
out_path, frame_grids, fps=fps, video_codec="h264", options={"crf": "10"}
)
import importlib
import numpy as np
import torch
import torch.distributed as dist
def count_params(model, verbose=False):
total_params = sum(p.numel() for p in model.parameters())
if verbose:
print(f"{model.__class__.__name__} has {total_params*1.e-6:.2f} M params.")
return total_params
def check_istarget(name, para_list):
"""
name: full name of source para
para_list: partial name of target para
"""
istarget = False
for para in para_list:
if para in name:
return True
return istarget
def instantiate_from_config(config):
if not "target" in config:
if config == "__is_first_stage__":
return None
elif config == "__is_unconditional__":
return None
raise KeyError("Expected key `target` to instantiate.")
return get_obj_from_str(config["target"])(**config.get("params", dict()))
def get_obj_from_str(string, reload=False):
module, cls = string.rsplit(".", 1)
if reload:
module_imp = importlib.import_module(module)
importlib.reload(module_imp)
return getattr(importlib.import_module(module, package=None), cls)
def load_npz_from_dir(data_dir):
data = [
np.load(os.path.join(data_dir, data_name))["arr_0"]
for data_name in os.listdir(data_dir)
]
data = np.concatenate(data, axis=0)
return data
def load_npz_from_paths(data_paths):
data = [np.load(data_path)["arr_0"] for data_path in data_paths]
data = np.concatenate(data, axis=0)
return data
def setup_dist(args):
if dist.is_initialized():
return
torch.cuda.set_device(args.local_rank)
torch.distributed.init_process_group("nccl", init_method="env://")
from huggingface_hub import hf_hub_download
ckpt = hf_hub_download(
repo_id="TencentARC/NVComposer", filename="NVComposer-V0.1.ckpt", repo_type="model", local_dir="./models"
)
#!/bin/bash
cd /root/NVComposer
python app.py
{
"cells": [
{
"cell_type": "markdown",
"id": "e5c5a211-2ccd-4341-af10-ac546484b91f",
"metadata": {
"tags": []
},
"source": [
"## 项目介绍\n",
"- 原项目地址:https://huggingface.co/spaces/TencentARC/NVComposer\n",
"- NVComposer是一个多视图图像增强生成新视图合成的模型。上传单、多张视图可生成类似相机移动拍摄的视频。 \n",
"- 项目在L20显卡,cuda12.2上进行适配\n",
"## 使用说明\n",
"- 启动和重启 Notebook 点上方工具栏中的「重启并运行所有单元格」。出现如下内容就算成功了:\n",
" - `Running on local URL: http://0.0.0.0:7860`\n",
" - `Running on public URL: https://xxxxxxxxxxxxxxx.gradio.live`\n",
"- 通过以下方式开启页面:\n",
" - 控制台打开「自定义服务」了,访问自定义服务端口号设置为7860\n",
" - 直接打开显示的公开链接`public URL`\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "53a96614-e2d2-4710-a82b-0d5ca9cb9872",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# 启动\n",
"!sh start.sh"
]
},
{
"cell_type": "markdown",
"source": [
"---\n",
"**扫码关注公众号,获取更多资讯**<br>\n",
"<div align=center>\n",
"<img src=\"assets/二维码.jpeg\" width = 20% />\n",
"</div>\n"
],
"metadata": {
"collapsed": false
},
"id": "2f54158c2967bc25"
},
{
"cell_type": "code",
"outputs": [],
"source": [],
"metadata": {
"collapsed": false
},
"id": "6dc59fbbcf222b6b"
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment