Commit f7264a94 authored by chenpangpang's avatar chenpangpang
Browse files

feat: fix bug

parent 08a21d59
FROM image.sourcefind.cn:5000/gpu/admin/base/jupyterlab-pytorch:2.3.1-py3.10-cuda11.8-ubuntu22.04-devel as base FROM image.sourcefind.cn:5000/gpu/admin/base/jupyterlab-pytorch:2.5.1-py3.10-cuda11.8-ubuntu22.04-devel as base
ARG IMAGE=ruyi-mini-7b ARG IMAGE=ruyi-mini-7b
ARG IMAGE_UPPER=Ruyi-Mini-7B ARG IMAGE_UPPER=Ruyi-Models
ARG BRANCH=gpu ARG BRANCH=gpu
RUN cd /root && git clone -b $BRANCH http://developer.hpccube.com/codes/chenpangpang/$IMAGE.git RUN cd /root && git clone -b $BRANCH http://developer.hpccube.com/codes/chenpangpang/$IMAGE.git
WORKDIR /root/$IMAGE/$IMAGE_UPPER WORKDIR /root/$IMAGE/$IMAGE_UPPER
...@@ -10,13 +10,14 @@ RUN pip install -r requirements.txt ...@@ -10,13 +10,14 @@ RUN pip install -r requirements.txt
######### #########
# Prod # # Prod #
######### #########
FROM image.sourcefind.cn:5000/gpu/admin/base/jupyterlab-pytorch:2.3.1-py3.10-cuda11.8-ubuntu22.04-devel FROM image.sourcefind.cn:5000/gpu/admin/base/jupyterlab-pytorch:2.5.1-py3.10-cuda11.8-ubuntu22.04-devel
ARG IMAGE=ruyi-mini-7b ARG IMAGE=ruyi-mini-7b
ARG IMAGE_UPPER=Ruyi-Mini-7B ARG IMAGE_UPPER=Ruyi-Models
COPY chenyh/$IMAGE/frpc_linux_amd64_* /opt/conda/lib/python3.10/site-packages/gradio/ COPY chenyh/$IMAGE/frpc_linux_amd64_* /opt/conda/lib/python3.10/site-packages/gradio/
RUN chmod +x /opt/conda/lib/python3.10/site-packages/gradio/frpc_linux_amd64_* RUN chmod +x /opt/conda/lib/python3.10/site-packages/gradio/frpc_linux_amd64_*
COPY chenyh/$IMAGE/ /root/Ruyi-Models/models COPY chenyh/$IMAGE/ /root/$IMAGE_UPPER/models
RUN apt-get update && apt-get install -y libgl1-mesa-glx libglib2.0-0 libsm6 libxrender1 libxext6
COPY --from=base /opt/conda/lib/python3.10/site-packages /opt/conda/lib/python3.10/site-packages COPY --from=base /opt/conda/lib/python3.10/site-packages /opt/conda/lib/python3.10/site-packages
COPY --from=base /root/$IMAGE/$IMAGE_UPPER /root/Ruyi-Models COPY --from=base /root/$IMAGE/$IMAGE_UPPER /root/$IMAGE_UPPER
COPY --from=base /root/$IMAGE/启动器.ipynb /root/$IMAGE/start.sh /root/ COPY --from=base /root/$IMAGE/启动器.ipynb /root/$IMAGE/start.sh /root/
COPY --from=base /root/$IMAGE/assets/ /root/assets/ COPY --from=base /root/$IMAGE/assets/ /root/assets/
\ No newline at end of file
import torch
import os
from PIL import Image
from predict_i2v import try_setup_pipeline, get_control_embeddings
from ruyi.data.bucket_sampler import ASPECT_RATIO_512, get_closest_ratio
from ruyi.utils.lora_utils import merge_lora, unmerge_lora
from ruyi.utils.utils import get_image_to_video_latent, save_videos_grid
from diffusers import (EulerDiscreteScheduler, EulerAncestralDiscreteScheduler,
DPMSolverMultistepScheduler, PNDMScheduler, DDIMScheduler)
import gradio as gr
from omegaconf import OmegaConf
# Model settings
config_path = "config/default.yaml"
model_name = "Ruyi-Mini-7B"
model_path = f"models/{model_name}" # (Down)load mode in this path
# LoRA settings
lora_path = None
lora_weight = 1.0
# GPU memory settings
low_gpu_memory_mode = True # Low gpu memory mode
gpu_offload_steps = 5 # Choose in [0, 10, 7, 5, 1], the latter number requires less GPU memory but longer time
# Random seed
seed = 42 # The Answer to the Ultimate Question of Life, The Universe, and Everything
output_video_path = "outputs/example_01.mp4"
# Other settings
weight_dtype = torch.bfloat16
device = torch.device("cuda")
# Load config
config = OmegaConf.load(config_path)
# Check for update
repo_id = f"IamCreateAI/{model_name}"
# Init model
pipeline = try_setup_pipeline(model_path, weight_dtype, config)
if pipeline is None:
message = (f"[Load Model Failed] "
f"Please download Ruyi model from huggingface repo '{repo_id}', "
f"And put it into '{model_path}'.")
raise FileNotFoundError(message)
# Setup GPU memory mode
if low_gpu_memory_mode:
pipeline.enable_sequential_cpu_offload()
else:
pipeline.enable_model_cpu_offload()
# Set hidden states offload steps
pipeline.transformer.hidden_cache_size = gpu_offload_steps
def run_inference(start_image_path=None, end_image_path=None, video_length=-1, base_resolution=-1, cfg=-1,
steps=-1, scheduler_name=None, motion=None, camera_direction=None, aspect_ratio=None,
progress=gr.Progress(track_tqdm=True)):
global pipeline
# Load images
start_img = [Image.open(start_image_path).convert("RGB")]
end_img = [Image.open(end_image_path).convert("RGB")] if end_image_path is not None else None
# Prepare LoRA config
loras = {
'models': [lora_path] if lora_path is not None else [],
'weights': [lora_weight] if lora_path is not None else [],
}
video_size = None
# Count most suitable height and width
if video_size is None:
aspect_ratio_sample_size = {key: [x / 512 * base_resolution for x in ASPECT_RATIO_512[key]] for key in
ASPECT_RATIO_512.keys()}
original_width, original_height = start_img[0].size if type(start_img) is list else Image.open(start_img).size
closest_size, closest_ratio = get_closest_ratio(original_height, original_width,
ratios=aspect_ratio_sample_size)
height, width = [int(x / 16) * 16 for x in closest_size]
else:
height, width = video_size
# Load Sampler
if scheduler_name == "DPM++":
noise_scheduler = DPMSolverMultistepScheduler.from_pretrained(model_path, subfolder='scheduler')
elif scheduler_name == "Euler":
noise_scheduler = EulerDiscreteScheduler.from_pretrained(model_path, subfolder='scheduler')
elif scheduler_name == "Euler A":
noise_scheduler = EulerAncestralDiscreteScheduler.from_pretrained(model_path, subfolder='scheduler')
elif scheduler_name == "PNDM":
noise_scheduler = PNDMScheduler.from_pretrained(model_path, subfolder='scheduler')
elif scheduler_name == "DDIM":
noise_scheduler = DDIMScheduler.from_pretrained(model_path, subfolder='scheduler')
pipeline.scheduler = noise_scheduler
# Set random seed
generator = torch.Generator(device).manual_seed(seed)
# Load control embeddings
embeddings = get_control_embeddings(pipeline, aspect_ratio, motion, camera_direction)
with torch.no_grad():
video_length = int(
video_length // pipeline.vae.mini_batch_encoder * pipeline.vae.mini_batch_encoder) if video_length != 1 else 1
input_video, input_video_mask, clip_image = get_image_to_video_latent(start_img, end_img,
video_length=video_length,
sample_size=(height, width))
for _lora_path, _lora_weight in zip(loras.get("models", []), loras.get("weights", [])):
pipeline = merge_lora(pipeline, _lora_path, _lora_weight)
sample = pipeline(
prompt_embeds=embeddings["positive_embeds"],
prompt_attention_mask=embeddings["positive_attention_mask"],
prompt_embeds_2=embeddings["positive_embeds_2"],
prompt_attention_mask_2=embeddings["positive_attention_mask_2"],
negative_prompt_embeds=embeddings["negative_embeds"],
negative_prompt_attention_mask=embeddings["negative_attention_mask"],
negative_prompt_embeds_2=embeddings["negative_embeds_2"],
negative_prompt_attention_mask_2=embeddings["negative_attention_mask_2"],
video_length=video_length,
height=height,
width=width,
generator=generator,
guidance_scale=cfg,
num_inference_steps=steps,
video=input_video,
mask_video=input_video_mask,
clip_image=clip_image,
).videos
for _lora_path, _lora_weight in zip(loras.get("models", []), loras.get("weights", [])):
pipeline = unmerge_lora(pipeline, _lora_path, _lora_weight)
# Save the video
output_folder = os.path.dirname(output_video_path)
if output_folder != '':
os.makedirs(output_folder, exist_ok=True)
save_videos_grid(sample, output_video_path, fps=24)
return output_video_path
with gr.Blocks() as demo:
gr.HTML(
"""
<div style="text-align: center;">
<h1 style="text-align: center; color: #333333;">📸 Ruyi-Mini-7B </h1>
</div>
"""
)
with gr.Row():
with gr.Column(scale=1):
with gr.Accordion("Video Settings", open=True):
video_length = gr.Slider(
minimum=24,
maximum=120,
step=1,
value=120,
label="Video Length(frames)",
)
base_resolution = gr.Slider(
minimum=384,
maximum=640,
step=1,
value=640,
label="Base Resolution",
)
aspect_ratio = gr.Radio(
choices=["16:9", "9:16"],
label="Aspect Ratio",
value="16:9",
interactive=True,
)
with gr.Accordion("Control settings", open=True):
motion = gr.Radio(
choices=["1", "2", "3", "4", "auto"],
label="motion",
value="auto",
interactive=True,
)
camera_direction = gr.Radio(
choices=["static", "left", "right", "up", "down", "auto"],
label="Camera Direction",
value="static",
interactive=True,
)
with gr.Accordion("Advanced Sampling Settings", open=False):
steps = gr.Slider(
value=25,
label="Steps",
minimum=1,
maximum=50,
step=1,
)
cfg_scale = gr.Slider(
value=7.0,
label="Classifier-Free Guidance Scale",
minimum=1,
maximum=10,
step=0.1,
)
scheduler_name = gr.Radio(
choices=["Euler", "Euler A", "DPM++", "PNDM", "DDIM"],
label="Scheduler",
value="DDIM",
interactive=True,
)
with gr.Column(scale=1):
with gr.Accordion("Input Image(s)", open=True):
num_images_slider = gr.Slider(
minimum=1,
maximum=2,
step=1,
value=1,
label="Number of Input Image(s)",
)
condition_image_1 = gr.Image(label="Input Image 1", type="filepath")
condition_image_2 = gr.Image(label="Input Image 2", type="filepath", visible=False)
condition_image_3 = gr.Image(label="Input Image 3", type="filepath", visible=False)
condition_image_4 = gr.Image(label="Input Image 4", type="filepath", visible=False)
with gr.Column(scale=1):
with gr.Accordion("Output Video", open=True):
output_video = gr.Video(label="Output Video")
run_btn = gr.Button("Generate")
# Update visibility of condition images based on the slider
def update_visible_images(num_images):
return [
gr.update(visible=num_images >= 2),
gr.update(visible=num_images >= 3),
gr.update(visible=num_images >= 4),
]
# Trigger visibility update when the slider value changes
num_images_slider.change(
fn=update_visible_images,
inputs=num_images_slider,
outputs=[condition_image_2, condition_image_3, condition_image_4],
)
run_btn.click(
fn=run_inference,
inputs=[
condition_image_1,
condition_image_2,
video_length,
base_resolution,
cfg_scale,
steps,
scheduler_name,
motion,
camera_direction,
aspect_ratio
],
outputs=output_video,
)
demo.launch(share=True, server_name="0.0.0.0")
...@@ -23,7 +23,7 @@ output_video_path = "outputs/example_01.mp4" ...@@ -23,7 +23,7 @@ output_video_path = "outputs/example_01.mp4"
# Video settings # Video settings
video_length = 120 # The max video length is 120 frames (24 frames per second) video_length = 120 # The max video length is 120 frames (24 frames per second)
base_resolution = 512 # # The pixels in the generated video are approximately 512 x 512. Values in the range of [384, 896] typically produce good video quality. base_resolution = 640 # # The pixels in the generated video are approximately 512 x 512. Values in the range of [384, 896] typically produce good video quality.
video_size = None # Override base_resolution. Format: [height, width], e.g., [384, 672] video_size = None # Override base_resolution. Format: [height, width], e.g., [384, 672]
# Control settings # Control settings
aspect_ratio = "16:9" # Choose in ["16:9", "9:16"], note that this is only the hint aspect_ratio = "16:9" # Choose in ["16:9", "9:16"], note that this is only the hint
...@@ -46,8 +46,8 @@ config_path = "config/default.yaml" ...@@ -46,8 +46,8 @@ config_path = "config/default.yaml"
model_name = "Ruyi-Mini-7B" model_name = "Ruyi-Mini-7B"
model_type = "Inpaint" model_type = "Inpaint"
model_path = f"models/{model_name}" # (Down)load mode in this path model_path = f"models/{model_name}" # (Down)load mode in this path
auto_download = True # Automatically download the model if the pipeline creation fails auto_download = False # Automatically download the model if the pipeline creation fails
auto_update = True # If auto_download is enabled, check for updates and update the model if necessary auto_update = False # If auto_download is enabled, check for updates and update the model if necessary
# LoRA settings # LoRA settings
lora_path = None lora_path = None
...@@ -146,117 +146,117 @@ def try_setup_pipeline(model_path, weight_dtype, config): ...@@ -146,117 +146,117 @@ def try_setup_pipeline(model_path, weight_dtype, config):
return None return None
# Load config # # Load config
config = OmegaConf.load(config_path) # config = OmegaConf.load(config_path)
#
# Load images # # Load images
start_img = [Image.open(start_image_path).convert("RGB")] # start_img = [Image.open(start_image_path).convert("RGB")]
end_img = [Image.open(end_image_path).convert("RGB")] if end_image_path is not None else None # end_img = [Image.open(end_image_path).convert("RGB")] if end_image_path is not None else None
#
# Check for update # # Check for update
repo_id = f"IamCreateAI/{model_name}" # repo_id = f"IamCreateAI/{model_name}"
if auto_download and auto_update: # if auto_download and auto_update:
print(f"Checking for {model_name} updates ...") # print(f"Checking for {model_name} updates ...")
#
# Download the model # # Download the model
snapshot_download(repo_id=repo_id, local_dir=model_path) # snapshot_download(repo_id=repo_id, local_dir=model_path)
#
# Init model # # Init model
pipeline = try_setup_pipeline(model_path, weight_dtype, config) # pipeline = try_setup_pipeline(model_path, weight_dtype, config)
if pipeline is None and auto_download: # if pipeline is None and auto_download:
print(f"Downloading {model_name} ...") # print(f"Downloading {model_name} ...")
#
# Download the model # # Download the model
snapshot_download(repo_id=repo_id, local_dir=model_path) # snapshot_download(repo_id=repo_id, local_dir=model_path)
#
pipeline = try_setup_pipeline(model_path, weight_dtype, config) # pipeline = try_setup_pipeline(model_path, weight_dtype, config)
#
if pipeline is None: # if pipeline is None:
message = (f"[Load Model Failed] " # message = (f"[Load Model Failed] "
f"Please download Ruyi model from huggingface repo '{repo_id}', " # f"Please download Ruyi model from huggingface repo '{repo_id}', "
f"And put it into '{model_path}'.") # f"And put it into '{model_path}'.")
if not auto_download: # if not auto_download:
message += "\nOr just set auto_download to 'True'." # message += "\nOr just set auto_download to 'True'."
raise FileNotFoundError(message) # raise FileNotFoundError(message)
#
# Setup GPU memory mode # # Setup GPU memory mode
if low_gpu_memory_mode: # if low_gpu_memory_mode:
pipeline.enable_sequential_cpu_offload() # pipeline.enable_sequential_cpu_offload()
else: # else:
pipeline.enable_model_cpu_offload() # pipeline.enable_model_cpu_offload()
#
# Prepare LoRA config # # Prepare LoRA config
loras = { # loras = {
'models': [lora_path] if lora_path is not None else [], # 'models': [lora_path] if lora_path is not None else [],
'weights': [lora_weight] if lora_path is not None else [], # 'weights': [lora_weight] if lora_path is not None else [],
} # }
#
# Count most suitable height and width # # Count most suitable height and width
if video_size is None: # if video_size is None:
aspect_ratio_sample_size = {key : [x / 512 * base_resolution for x in ASPECT_RATIO_512[key]] for key in ASPECT_RATIO_512.keys()} # aspect_ratio_sample_size = {key : [x / 512 * base_resolution for x in ASPECT_RATIO_512[key]] for key in ASPECT_RATIO_512.keys()}
original_width, original_height = start_img[0].size if type(start_img) is list else Image.open(start_img).size # original_width, original_height = start_img[0].size if type(start_img) is list else Image.open(start_img).size
closest_size, closest_ratio = get_closest_ratio(original_height, original_width, ratios=aspect_ratio_sample_size) # closest_size, closest_ratio = get_closest_ratio(original_height, original_width, ratios=aspect_ratio_sample_size)
height, width = [int(x / 16) * 16 for x in closest_size] # height, width = [int(x / 16) * 16 for x in closest_size]
else: # else:
height, width = video_size # height, width = video_size
#
# Set hidden states offload steps # # Set hidden states offload steps
pipeline.transformer.hidden_cache_size = gpu_offload_steps # pipeline.transformer.hidden_cache_size = gpu_offload_steps
#
# Load Sampler # # Load Sampler
if scheduler_name == "DPM++": # if scheduler_name == "DPM++":
noise_scheduler = DPMSolverMultistepScheduler.from_pretrained(model_path, subfolder='scheduler') # noise_scheduler = DPMSolverMultistepScheduler.from_pretrained(model_path, subfolder='scheduler')
elif scheduler_name == "Euler": # elif scheduler_name == "Euler":
noise_scheduler = EulerDiscreteScheduler.from_pretrained(model_path, subfolder='scheduler') # noise_scheduler = EulerDiscreteScheduler.from_pretrained(model_path, subfolder='scheduler')
elif scheduler_name == "Euler A": # elif scheduler_name == "Euler A":
noise_scheduler = EulerAncestralDiscreteScheduler.from_pretrained(model_path, subfolder='scheduler') # noise_scheduler = EulerAncestralDiscreteScheduler.from_pretrained(model_path, subfolder='scheduler')
elif scheduler_name == "PNDM": # elif scheduler_name == "PNDM":
noise_scheduler = PNDMScheduler.from_pretrained(model_path, subfolder='scheduler') # noise_scheduler = PNDMScheduler.from_pretrained(model_path, subfolder='scheduler')
elif scheduler_name == "DDIM": # elif scheduler_name == "DDIM":
noise_scheduler = DDIMScheduler.from_pretrained(model_path, subfolder='scheduler') # noise_scheduler = DDIMScheduler.from_pretrained(model_path, subfolder='scheduler')
pipeline.scheduler = noise_scheduler # pipeline.scheduler = noise_scheduler
#
# Set random seed # # Set random seed
generator= torch.Generator(device).manual_seed(seed) # generator= torch.Generator(device).manual_seed(seed)
#
# Load control embeddings # # Load control embeddings
embeddings = get_control_embeddings(pipeline, aspect_ratio, motion, camera_direction) # embeddings = get_control_embeddings(pipeline, aspect_ratio, motion, camera_direction)
#
with torch.no_grad(): # with torch.no_grad():
video_length = int(video_length // pipeline.vae.mini_batch_encoder * pipeline.vae.mini_batch_encoder) if video_length != 1 else 1 # video_length = int(video_length // pipeline.vae.mini_batch_encoder * pipeline.vae.mini_batch_encoder) if video_length != 1 else 1
input_video, input_video_mask, clip_image = get_image_to_video_latent(start_img, end_img, video_length=video_length, sample_size=(height, width)) # input_video, input_video_mask, clip_image = get_image_to_video_latent(start_img, end_img, video_length=video_length, sample_size=(height, width))
#
for _lora_path, _lora_weight in zip(loras.get("models", []), loras.get("weights", [])): # for _lora_path, _lora_weight in zip(loras.get("models", []), loras.get("weights", [])):
pipeline = merge_lora(pipeline, _lora_path, _lora_weight) # pipeline = merge_lora(pipeline, _lora_path, _lora_weight)
#
sample = pipeline( # sample = pipeline(
prompt_embeds = embeddings["positive_embeds"], # prompt_embeds = embeddings["positive_embeds"],
prompt_attention_mask = embeddings["positive_attention_mask"], # prompt_attention_mask = embeddings["positive_attention_mask"],
prompt_embeds_2 = embeddings["positive_embeds_2"], # prompt_embeds_2 = embeddings["positive_embeds_2"],
prompt_attention_mask_2 = embeddings["positive_attention_mask_2"], # prompt_attention_mask_2 = embeddings["positive_attention_mask_2"],
#
negative_prompt_embeds = embeddings["negative_embeds"], # negative_prompt_embeds = embeddings["negative_embeds"],
negative_prompt_attention_mask = embeddings["negative_attention_mask"], # negative_prompt_attention_mask = embeddings["negative_attention_mask"],
negative_prompt_embeds_2 = embeddings["negative_embeds_2"], # negative_prompt_embeds_2 = embeddings["negative_embeds_2"],
negative_prompt_attention_mask_2 = embeddings["negative_attention_mask_2"], # negative_prompt_attention_mask_2 = embeddings["negative_attention_mask_2"],
#
video_length = video_length, # video_length = video_length,
height = height, # height = height,
width = width, # width = width,
generator = generator, # generator = generator,
guidance_scale = cfg, # guidance_scale = cfg,
num_inference_steps = steps, # num_inference_steps = steps,
#
video = input_video, # video = input_video,
mask_video = input_video_mask, # mask_video = input_video_mask,
clip_image = clip_image, # clip_image = clip_image,
).videos # ).videos
#
for _lora_path, _lora_weight in zip(loras.get("models", []), loras.get("weights", [])): # for _lora_path, _lora_weight in zip(loras.get("models", []), loras.get("weights", [])):
pipeline = unmerge_lora(pipeline, _lora_path, _lora_weight) # pipeline = unmerge_lora(pipeline, _lora_path, _lora_weight)
#
# Save the video # # Save the video
output_folder = os.path.dirname(output_video_path) # output_folder = os.path.dirname(output_video_path)
if output_folder != '': # if output_folder != '':
os.makedirs(output_folder, exist_ok=True) # os.makedirs(output_folder, exist_ok=True)
save_videos_grid(sample, output_video_path, fps=24) # save_videos_grid(sample, output_video_path, fps=24)
...@@ -62,7 +62,7 @@ def save_videos_grid(videos: torch.Tensor, path: str, rescale=False, n_rows=6, f ...@@ -62,7 +62,7 @@ def save_videos_grid(videos: torch.Tensor, path: str, rescale=False, n_rows=6, f
os.makedirs(os.path.dirname(path), exist_ok=True) os.makedirs(os.path.dirname(path), exist_ok=True)
if imageio_backend: if imageio_backend:
if path.endswith("mp4"): if path.endswith("mp4"):
imageio.mimsave(path, outputs, fps=fps) imageio.mimsave(path, outputs, fps=fps, quality=8)
else: else:
imageio.mimsave(path, outputs, duration=(1000 * 1/fps)) imageio.mimsave(path, outputs, duration=(1000 * 1/fps))
else: else:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment