Commit 08a21d59 authored by chenpangpang's avatar chenpangpang
Browse files

feat: 初始提交

parent 1a6b26f1
Pipeline #2165 failed with stages
in 0 seconds
.idea
chenyh
FROM image.sourcefind.cn:5000/gpu/admin/base/jupyterlab-pytorch:2.3.1-py3.10-cuda11.8-ubuntu22.04-devel as base
ARG IMAGE=ruyi-mini-7b
ARG IMAGE_UPPER=Ruyi-Mini-7B
ARG BRANCH=gpu
RUN cd /root && git clone -b $BRANCH http://developer.hpccube.com/codes/chenpangpang/$IMAGE.git
WORKDIR /root/$IMAGE/$IMAGE_UPPER
RUN pip install -r requirements.txt
#########
# Prod #
#########
FROM image.sourcefind.cn:5000/gpu/admin/base/jupyterlab-pytorch:2.3.1-py3.10-cuda11.8-ubuntu22.04-devel
ARG IMAGE=ruyi-mini-7b
ARG IMAGE_UPPER=Ruyi-Mini-7B
COPY chenyh/$IMAGE/frpc_linux_amd64_* /opt/conda/lib/python3.10/site-packages/gradio/
RUN chmod +x /opt/conda/lib/python3.10/site-packages/gradio/frpc_linux_amd64_*
COPY chenyh/$IMAGE/ /root/Ruyi-Models/models
COPY --from=base /opt/conda/lib/python3.10/site-packages /opt/conda/lib/python3.10/site-packages
COPY --from=base /root/$IMAGE/$IMAGE_UPPER /root/Ruyi-Models
COPY --from=base /root/$IMAGE/启动器.ipynb /root/$IMAGE/start.sh /root/
COPY --from=base /root/$IMAGE/assets/ /root/assets/
\ No newline at end of file
# Ruyi-Mini-7B
# NVComposer
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
.pdm.toml
.pdm-python
.pdm-build/
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
# Ruyi-Models
English | [简体中文](./README_CN.md)
Welcome to Ruyi-Models!
Ruyi is an image-to-video model capable of generating cinematic-quality videos at a **resolution of 768**, with a frame rate of **24 frames per second**, totaling **5 seconds and 120 frames**. It supports **lens control** and **motion amplitude control**. Using a **RTX 3090 or RTX 4090**, you can generate 512 resolution, 120 frames (or 768 resolution, ~72 frames) videos **without any loss of quality**.
## Table of Contents
- [Installation Instructions](#installation-instructions)
- [Download Model (Optional)](#download-model-optional)
- [How to Use](#how-to-use)
- [Showcase](#showcase)
- [GPU Memory Optimization](#gpu-memory-optimization)
- [License](#license)
## Installation Instructions
The installation instructions are simple. Just clone the repo and install the requirements.
```shell
git clone https://github.com/IamCreateAI/Ruyi-Models
cd Ruyi-Models
pip install -r requirements.txt
```
### For ComfyUI Users
#### Method (1): Installation via ComfyUI Manager
Download and install [ComfyUI-Manager](https://github.com/ltdrdata/ComfyUI-Manager).
```shell
cd ComfyUI/custom_nodes/
git clone https://github.com/ltdrdata/ComfyUI-Manager.git
# install requirements
pip install -r ComfyUI-Manager/requirements.txt
```
Next, start ComfyUI and open the Manager. Select Custom Nodes Manager, then search for "Ruyi". You should see ComfyUI-Ruyi as shown in the screenshot below. Click "Install" to proceed.
<div align=center>
<img src="https://github.com/user-attachments/assets/10dda65f-13d5-4da8-9437-9c98b114536c"></img>
</div>
Finally, search for "ComfyUI-VideoHelperSuite" and install it as well.
#### Method (2): Manual Installation
Download and save this repository to the path *ComfyUI/custom_nodes/Ruyi-Models*.
```shell
# download the repo
cd ComfyUI/custom_nodes/
git clone https://github.com/IamCreateAI/Ruyi-Models.git
# install requirements
pip install -r Ruyi-Models/requirements.txt
```
Install the dependency [ComfyUI-VideoHelperSuite](https://github.com/Kosinkadink/ComfyUI-VideoHelperSuite) to display video output (skip this step if already installed).
```shell
# download ComfyUI-VideoHelperSuite
cd ComfyUI/custom_nodes/
git clone https://github.com/Kosinkadink/ComfyUI-VideoHelperSuite.git
# install requirements
pip install -r ComfyUI-VideoHelperSuite/requirements.txt
```
##### For Windows Users
When using the Windows operating system, a common distribution is [ComfyUI_windows_portable_nvidia](https://github.com/comfyanonymous/ComfyUI/releases). When launched with `run_nvidia_gpu.bat`, it utilizes the embedded Python interpreter included with the package. Therefore, the environment needs to be set up within this built-in Python.
For example, if the extracted directory of the distribution is ComfyUI_windows_portable, you can typically use the following command to download the repository and install the runtime environment:
```shell
# download the repo
cd ComfyUI_windows_portable\ComfyUI\custom_nodes
git clone https://github.com/IamCreateAI/Ruyi-Models.git
# install requirements using embedded Python interpreter
..\..\python_embeded\python.exe -m pip install -r Ruyi-Models\requirements.txt
```
## Download Model (Optional)
Download the model and save it to certain path. To directly run our model, it is recommand to save the models into _Ruyi-Models/models_ folder. For ComfyUI users, the path should be _ComfyUI/models/Ruyi_.
| Model Name | Type | Resolution | Max Frames | Frames per Second | Storage Space | Download |
| :---: | :---: | :---: | :---: | :---: | :---: | :---: |
| Ruyi-Mini-7B | Image to Video | 512 & 768 | 120 | 24 | 17 GB | [🤗](https://huggingface.co/IamCreateAI/Ruyi-Mini-7B) |
For example, after downloading Ruyi-Mini-7B, the file path structure should be:
```
📦 Ruyi-Models/models/ or ComfyUI/models/Ruyi/
├── 📂 Ruyi-Mini-7B/
│ ├── 📂 transformers/
│ ├── 📂 vae/
│ └── 📂 ...
```
> This repository **supports automatic model downloading**, but manual downloading provides more control. For instance, you can download the model to another location and then link it to the *ComfyUI/models/Ruyi* path using symbolic links or similar methods.
## How to Use
We provide two ways to run our model. The first is directly using python code.
```
python3 predict_i2v.py
```
Specifically, the script downloads the model to the _Ruyi-Models/models_ folder and uses images from the [_assets_](./assets/) folder as the start and end frames for video inference. You can modify the variables in the script to replace the input images and set parameters such as video length and resolution.
For users with more than 24GB of GPU memory, you can use predict_i2v_80g.py to enhance generation speed. For those with less GPU memory, we offer parameters to optimize memory usage, enabling the generation of higher resolution and longer videos by extending the inference time. The effects of these parameters can be found in the [GPU memory optimization section](#gpu-memory-optimization) section below.
Or use ComfyUI wrapper in our github repo, the detail of ComfyUI nodes is described in [_comfyui/README.md_](./comfyui/README.md).
## Showcase
### Image to Video Effects
<table>
<tr>
<td><video src="https://github.com/user-attachments/assets/4dedf40b-82f2-454c-9a67-5f4ed243f5ea"></video></td>
<td><video src="https://github.com/user-attachments/assets/905fef17-8c5d-49b0-a49a-6ae7e212fa07"></video></td>
<td><video src="https://github.com/user-attachments/assets/20daab12-b510-448a-9491-389d7bdbbf2e"></video></td>
<td><video src="https://github.com/user-attachments/assets/f1bb0a91-d52a-4611-bac2-8fcf9658cac0"></video></td>
</tr>
</table>
### Camera Control
<table>
<tr>
<td align=center><img src="https://github.com/user-attachments/assets/8aedcea6-3b8e-4c8b-9fed-9ceca4d41954" height=200></img>input</td>
<td align=center><video src="https://github.com/user-attachments/assets/d9d027d4-0d4f-45f5-9d46-49860b562c69"></video>left</td>
<td align=center><video src="https://github.com/user-attachments/assets/7716a67b-1bb8-4d44-b128-346cbc35e4ee"></video>right</td>
</tr>
<tr>
<td align=center><video src="https://github.com/user-attachments/assets/cc1f1928-cab7-4c4b-90af-928936102e66"></video>static</td>
<td align=center><video src="https://github.com/user-attachments/assets/c742ea2c-503a-454f-a61a-10b539100cd9"></video>up</td>
<td align=center><video src="https://github.com/user-attachments/assets/442839fa-cc53-4b75-b015-909e44c065e0"></video>down</td>
</tr>
</table>
### Motion Amplitude Control
<table>
<tr>
<td align=center><video src="https://github.com/user-attachments/assets/0020bd54-0ff6-46ad-91ee-d9f0df013772"></video>motion 1</td>
<td align=center><video src="https://github.com/user-attachments/assets/d1c26419-54e3-4b86-8ae3-98e12de3022e"></video>motion 2</td>
<td align=center><video src="https://github.com/user-attachments/assets/535147a2-049a-4afc-8d2a-017bc778977e"></video>motion 3</td>
<td align=center><video src="https://github.com/user-attachments/assets/bf893d53-2e11-406f-bb9a-2aacffcecd44"></video>motion 4</td>
</tr>
</table>
## GPU Memory Optimization
We provide the options **`GPU_memory_mode` and `GPU_offload_steps` to reduce GPU memory usage**, catering to different user needs.
Generally speaking, using **less GPU memory requires more RAM and results in longer generation times**. Below is a reference table of expected GPU memory usage and generation times. Note that, the GPU memory reported below is the `max_memory_allocated()` value. The values read from nvidia-smi may be higher than the reported values because CUDA occupies some GPU memory (usually between 500 - 800 MiB), and PyTorch's caching mechanism also requests additional GPU memory.
Additionally, the community and we have created a detailed table featuring various resolutions and option combinations, which can be found in the [gpu_memory_appendix.md](assets/gpu_memory_appendix.md). We encourage community members to help us complete the table.
### A100 Results
- Resolution of 512
| Num frames | normal_mode + 0 steps | normal_mode + 10 steps | normal_mode + 7 steps | normal_mode + 5 steps | normal_mode + 1 steps | low_gpu_mode + 0 steps |
| :---: | :---: | :---: | :---: | :---: | :---: | :---: |
| 24 frames | 16119MiB <br> _01:01s_ | 15535MiB <br> _01:07s_ | 15340MiB <br> _01:13s_ | 15210MiB <br> _01:20s_ | 14950MiB <br> _01:32s_ | 4216MiB <br> _05:14s_ |
| 48 frames | 18398MiB <br> _01:53s_ | 17230MiB <br> _02:15s_ | 16840MiB <br> _02:29s_ | 16580MiB <br> _02:32s_ | 16060MiB <br> _02:54s_ | 4590MiB <br> _09:59s_ |
| 72 frames | 20678MiB <br> _03:00s_ | 18925MiB <br> _03:31s_ | 18340MiB <br> _03:53s_ | 17951MiB <br> _03:57s_ | 17171MiB <br> _04:25s_ | 6870MiB <br> _14:42s_ |
| 96 frames | 22958MiB <br> _04:11s_ | 20620MiB <br> _04:54s_ | 19841MiB <br> _05:10s_ | 19321MiB <br> _05:14s_ | 18281MiB <br> _05:47s_ | 9150MiB <br> _19:17s_ |
| 120 frames | 25238MiB <br> _05:42s_ | 22315MiB <br> _06:34s_ | 21341MiB <br> _06:59s_ | 20691MiB <br> _07:07s_ | 19392MiB <br> _07:41s_ | 11430MiB <br> _24:08s_ |
- Resolution of 768
| Num frames | normal_mode + 0 steps | normal_mode + 10 steps | normal_mode + 7 steps | normal_mode + 5 steps | normal_mode + 1 steps | low_gpu_mode + 0 steps |
| :---: | :---: | :---: | :---: | :---: | :---: | :---: |
| 24 frames | 18971MiB <br> _02:06s_ | 17655MiB <br> _02:40s_ | 17217MiB <br> _02:39s_ | 16925MiB <br> _02:41s_ | 16339MiB <br> _03:13s_ | 5162MiB <br> _13:42s_ |
| 48 frames | 24101MiB <br> _04:52s_ | 21469MiB <br> _05:44s_ | 20592MiB <br> _05:51s_ | 20008MiB <br> _06:00s_ | 18837MiB <br> _06:49s_ | 10292MiB <br> _20:58s_ |
| 72 frames | 29230MiB <br> _08:24s_ | 25283MiB <br> _09:45s_ | 25283MiB <br> _09:45s_ | 23091MiB <br> _10:10s_ | 21335MiB <br> _11:10s_ | 15421MiB <br> _39:12s_ |
| 96 frames | 34360MiB <br> _12:49s_ | 29097MiB <br> _14:41s_ | 27343MiB <br> _15:33s_ | 26174MiB <br> _15:44s_ | 23834MiB <br> _16:33s_ | 20550MiB <br> _43:47s_ |
| 120 frames | 39489MiB <br> _18:21s_ | 32911MiB <br> _20:39s_ | 30719MiB <br> _21:34s_ | 29257MiB <br> _21:48s_ | 26332MiB <br> _23:02s_ | 25679MiB <br> _63:01s_ |
### RTX 4090 Results
The values marked with `---` in the table indicate that an out-of-memory (OOM) error occurred, preventing generation.
- Resolution of 512
| Num frames | normal_mode + 0 steps | normal_mode + 10 steps | normal_mode + 7 steps | normal_mode + 5 steps | normal_mode + 1 steps | low_gpu_mode + 0 steps |
| :---: | :---: | :---: | :---: | :---: | :---: | :---: |
| 24 frames | 16366MiB <br> _01:18s_ | 15805MiB <br> _01:26s_ | 15607MiB <br> _01:37s_ | 15475MiB <br> _01:36s_ | 15211MiB <br> _01:39s_ | 4211MiB <br> _03:57s_ |
| 48 frames | 18720MiB <br> _02:21s_ | 17532MiB <br> _02:49s_ | 17136MiB <br> _02:55s_ | 16872MiB <br> _02:58s_ | 16344MiB <br> _03:01s_ | 4666MiB <br> _05:01s_ |
| 72 frames | 21036MiB <br> _03:41s_ | 19254MiB <br> _04:25s_ | 18660MiB <br> _04:34s_ | 18264MiB <br> _04:36s_ | 17472MiB <br> _04:51s_ | 6981MiB <br> _06:36s_ |
| 96 frames | -----MiB <br> _--:--s_ | 20972MiB <br> _06:18s_ | 20180MiB <br> _06:24s_ | 19652MiB <br> _06:36s_ | 18596MiB <br> _06:56s_ | 9298MiB <br> _10:03s_ |
| 120 frames | -----MiB <br> _--:--s_ | -----MiB <br> _--:--s_ | 21704MiB <br> _08:50s_ | 21044MiB <br> _08:53s_ | 19724MiB <br> _09:08s_ | 11613MiB <br> _13:57s_ |
- Resolution of 768
| Num frames | normal_mode + 0 steps | normal_mode + 10 steps | normal_mode + 7 steps | normal_mode + 5 steps | normal_mode + 1 steps | low_gpu_mode + 0 steps |
| :---: | :---: | :---: | :---: | :---: | :---: | :---: |
| 24 frames | 19223MiB <br> _02:38s_ | 17900MiB <br> _03:06s_ | 17448MiB <br> _03:18s_ | 17153MiB <br> _03:23s_ | 16624MiB <br> _03:34s_ | 5251MiB <br> _05:54s_ |
| 48 frames | -----MiB <br> _--:--s_ | -----MiB <br> _--:--s_ | 20946MiB <br> _07:28s_ | 20352MiB <br> _07:35s_ | 19164MiB <br> _08:04s_ | 10457MiB <br> _10:55s_ |
| 72 frames | -----MiB <br> _--:--s_ | -----MiB <br> _--:--s_ | -----MiB <br> _--:--s_ | -----MiB <br> _--:--s_ | -----MiB <br> _--:--s_ | 15671MiB <br> _18:52s_ |
## License
We’re releasing the model under a permissive **Apache 2.0 license**.
## BibTeX
```
@misc{createai2024ruyi,
title={Ruyi-Mini-7B},
author={CreateAI Team},
year={2024},
publisher = {GitHub},
journal = {GitHub repository},
howpublished={\url{https://github.com/IamCreateAI/Ruyi-Models}}
}
```
## Welcome Feedback and Collaborative Optimization
We sincerely welcome everyone to actively provide valuable feedback and suggestions, and we hope to work together to optimize our services and products. Your words will help us better understand user needs, allowing us to continuously enhance the user experience. Thank you for your support and attention to our work!
You are welcomed to join our [Discord](https://discord.com/invite/nueQFQwwGw) or Wechat Group (Scan QR code to add Ruyi Assistant and join the official group) for further discussion!
<img src="https://github.com/user-attachments/assets/cc5e25c6-34ab-4be1-a59b-7d5789264a9c" style="width:300px"></img>
# Ruyi-Models
欢迎使用 Ruyi-Models!
Ruyi 是一款图生视频模型,能够生成 **768 分辨率、每秒 24 帧总计 5 秒 120 帧的影视级视频**,支持**镜头控制****运动幅度控制**,使用 **RTX 3090****RTX 4090****无精度损失**地生成 512 分辨率、120 帧(768分辨率、~72帧)的视频。
## 安装方法
克隆本仓库并安装所需的依赖。
```shell
git clone https://github.com/IamCreateAI/Ruyi-Models
cd Ruyi-Models
pip install -r requirements.txt
```
### ComfyUI 的安装方法
#### 方法(1):通过 ComfyUI Manager 安装
下载并安装 [ComfyUI-Manager](https://github.com/ltdrdata/ComfyUI-Manager)
```shell
cd ComfyUI/custom_nodes/
git clone https://github.com/ltdrdata/ComfyUI-Manager.git
# install requirements
pip install -r ComfyUI-Manager/requirements.txt
```
启动 ComfyUI 并打开 Manager。选择 Custom Nodes Manager,然后搜索 “Ruyi”。选择搜索结果中的 ComfyUI-Ruyi(如下方截图所示),点击 “Install” 按钮安装。
<div align=center>
<img src="https://github.com/user-attachments/assets/10dda65f-13d5-4da8-9437-9c98b114536c"></img>
</div>
最后,搜索 “ComfyUI-VideoHelperSuite” 并安装。
#### 方法(2):手动安装
- 下载并保存本仓库到 *ComfyUI/custom_nodes/Ruyi-Models* 路径。
```shell
# download the repo
cd ComfyUI/custom_nodes/
git clone https://github.com/IamCreateAI/Ruyi-Models.git
# install requirements
pip install -r Ruyi-Models/requirements.txt
```
- 安装依赖项 [ComfyUI-VideoHelperSuite](https://github.com/Kosinkadink/ComfyUI-VideoHelperSuite) 以显示视频输出(如已安装,请跳过本步骤)。
```shell
# download ComfyUI-VideoHelperSuite
cd ComfyUI/custom_nodes/
git clone https://github.com/Kosinkadink/ComfyUI-VideoHelperSuite.git
# install requirements
pip install -r ComfyUI-VideoHelperSuite/requirements.txt
```
##### Windows 操作系统下的安装方法
在使用Windows操作系统时,[ComfyUI_windows_portable_nvidia](https://github.com/comfyanonymous/ComfyUI/releases) 是一种常见的发行版。当通过 `run_nvidia_gpu.bat` 启动时,会使用其中嵌入的 Python 解释器。因此,需要在这个内置的 Python 环境中安装运行环境。
例如,如果提取后的发行版目录是 _ComfyUI_windows_portable_,通常可使用以下命令下载仓库并安装运行时环境:
```shell
# download the repo
cd ComfyUI_windows_portable\ComfyUI\custom_nodes
git clone https://github.com/IamCreateAI/Ruyi-Models.git
# install requirements using embedded Python interpreter
..\..\python_embeded\python.exe -m pip install -r Ruyi-Models\requirements.txt
```
## 下载模型(可选)
下载模型并将其保存到指定路径。为了直接运行 Ruyi 模型,建议将模型保存到 _Ruyi-Models/models_ 文件夹中。对于 ComfyUI 用户,路径应为 _ComfyUI/models/Ruyi_。
| 名称 | 类型 | 分辨率 | 最大帧数 | 每秒帧数 | 存储空间 | 下载地址 |
| :---: | :---: | :---: | :---: | :---: | :---: | :---: |
| Ruyi-Mini-7B | 图生视频 | 512 & 768 | 120 | 24 | 17 GB | [🤗](https://huggingface.co/IamCreateAI/Ruyi-Mini-7B) |
例如,下载 Ruyi-Mini-7B 后,文件的路径结构应该为:
```
📦 Ruyi-Models/models/ or ComfyUI/models/Ruyi/
├── 📂 Ruyi-Mini-7B/
│ ├── 📂 transformers/
│ ├── 📂 vae/
│ └── 📂 ...
```
> 本仓库**支持自动下载模型功能**,但手动下载提供了更多的可控性。例如,可以下载模型到其他位置,再通过软链接等方式链接到 *ComfyUI/models/Ruyi* 路径。
## 使用方法
我们提供两种运行模型的方法。第一种是直接使用Python代码。
```shell
python3 predict_i2v.py
```
具体来说,该脚本将模型下载到 _Ruyi-Models/models_ 文件夹,并使用 _[assets](./assets)_ 文件夹中的图像作为视频推理的起始帧和结束帧。您可以修改脚本中的变量来替换输入图像,并设置视频长度和分辨率等参数。
对于显存超过 24GB 的用户,可以使用 predict_i2v_80g.py 来提高生成速度。对于显存较少的用户,提供了优化显存使用的参数,这些参数可以通过延长推理时间来生成更高分辨率和更长时长的视频。这些参数的影响可以在下面的显存优化选项小节找到。
或者,您可以使用我们 GitHub 仓库中的 ComfyUI 封装,ComfyUI 节点的详细信息在 _[comfyui/README_CN.md](comfyui/README_CN.md)_ 中描述。
## 效果展示
### 图生视频效果
<table>
<tr>
<td><video src="https://github.com/user-attachments/assets/4dedf40b-82f2-454c-9a67-5f4ed243f5ea"></video></td>
<td><video src="https://github.com/user-attachments/assets/905fef17-8c5d-49b0-a49a-6ae7e212fa07"></video></td>
<td><video src="https://github.com/user-attachments/assets/20daab12-b510-448a-9491-389d7bdbbf2e"></video></td>
<td><video src="https://github.com/user-attachments/assets/f1bb0a91-d52a-4611-bac2-8fcf9658cac0"></video></td>
</tr>
</table>
### 镜头控制
<table>
<tr>
<td align=center><img src="https://github.com/user-attachments/assets/8aedcea6-3b8e-4c8b-9fed-9ceca4d41954" height=200></img>input</td>
<td align=center><video src="https://github.com/user-attachments/assets/d9d027d4-0d4f-45f5-9d46-49860b562c69"></video>left</td>
<td align=center><video src="https://github.com/user-attachments/assets/7716a67b-1bb8-4d44-b128-346cbc35e4ee"></video>right</td>
</tr>
<tr>
<td align=center><video src="https://github.com/user-attachments/assets/cc1f1928-cab7-4c4b-90af-928936102e66"></video>static</td>
<td align=center><video src="https://github.com/user-attachments/assets/c742ea2c-503a-454f-a61a-10b539100cd9"></video>up</td>
<td align=center><video src="https://github.com/user-attachments/assets/442839fa-cc53-4b75-b015-909e44c065e0"></video>down</td>
</tr>
</table>
### 运动幅度控制
<table>
<tr>
<td align=center><video src="https://github.com/user-attachments/assets/0020bd54-0ff6-46ad-91ee-d9f0df013772"></video>motion 1</td>
<td align=center><video src="https://github.com/user-attachments/assets/d1c26419-54e3-4b86-8ae3-98e12de3022e"></video>motion 2</td>
<td align=center><video src="https://github.com/user-attachments/assets/535147a2-049a-4afc-8d2a-017bc778977e"></video>motion 3</td>
<td align=center><video src="https://github.com/user-attachments/assets/bf893d53-2e11-406f-bb9a-2aacffcecd44"></video>motion 4</td>
</tr>
</table>
## 显存优化选项
提供了 **`GPU_memory_mode` 和 `GPU_offload_steps`** 选项以**降低显存占用**,满足不同用户的需求。
通常来说,**使用更少的显存,需要更多的内存和更长的生成时间**。以下列出预期显存使用和生成时间的参考表格。请注意,下面报告的显存是 `max_memory_allocated()` 的返回值,而 nvidia-smi 的显存数值通常会高于报告的数值。因为 CUDA 会占用一些显存(通常在500 - 800 MiB之间),而 PyTorch 的缓存机制也会请求额外的显存。
此外,在社区的帮助下创建了一张详细的表格 [gpu_memory_appendix.md](assets/gpu_memory_appendix.md),包含了更多分辨率和选项组合。欢迎社区成员帮助完善表格。
### A100 的显存占用与运行时间
- Resolution of 512
| 帧数 | normal_mode + 0 steps | normal_mode + 10 steps | normal_mode + 7 steps | normal_mode + 5 steps | normal_mode + 1 steps | low_gpu_mode + 0 steps |
| :---: | :---: | :---: | :---: | :---: | :---: | :---: |
| 24 frames | 16119MiB <br> _01:01s_ | 15535MiB <br> _01:07s_ | 15340MiB <br> _01:13s_ | 15210MiB <br> _01:20s_ | 14950MiB <br> _01:32s_ | 4216MiB <br> _05:14s_ |
| 48 frames | 18398MiB <br> _01:53s_ | 17230MiB <br> _02:15s_ | 16840MiB <br> _02:29s_ | 16580MiB <br> _02:32s_ | 16060MiB <br> _02:54s_ | 4590MiB <br> _09:59s_ |
| 72 frames | 20678MiB <br> _03:00s_ | 18925MiB <br> _03:31s_ | 18340MiB <br> _03:53s_ | 17951MiB <br> _03:57s_ | 17171MiB <br> _04:25s_ | 6870MiB <br> _14:42s_ |
| 96 frames | 22958MiB <br> _04:11s_ | 20620MiB <br> _04:54s_ | 19841MiB <br> _05:10s_ | 19321MiB <br> _05:14s_ | 18281MiB <br> _05:47s_ | 9150MiB <br> _19:17s_ |
| 120 frames | 25238MiB <br> _05:42s_ | 22315MiB <br> _06:34s_ | 21341MiB <br> _06:59s_ | 20691MiB <br> _07:07s_ | 19392MiB <br> _07:41s_ | 11430MiB <br> _24:08s_ |
- Resolution of 768
| 帧数 | normal_mode + 0 steps | normal_mode + 10 steps | normal_mode + 7 steps | normal_mode + 5 steps | normal_mode + 1 steps | low_gpu_mode + 0 steps |
| :---: | :---: | :---: | :---: | :---: | :---: | :---: |
| 24 frames | 18971MiB <br> _02:06s_ | 17655MiB <br> _02:40s_ | 17217MiB <br> _02:39s_ | 16925MiB <br> _02:41s_ | 16339MiB <br> _03:13s_ | 5162MiB <br> _13:42s_ |
| 48 frames | 24101MiB <br> _04:52s_ | 21469MiB <br> _05:44s_ | 20592MiB <br> _05:51s_ | 20008MiB <br> _06:00s_ | 18837MiB <br> _06:49s_ | 10292MiB <br> _20:58s_ |
| 72 frames | 29230MiB <br> _08:24s_ | 25283MiB <br> _09:45s_ | 25283MiB <br> _09:45s_ | 23091MiB <br> _10:10s_ | 21335MiB <br> _11:10s_ | 15421MiB <br> _39:12s_ |
| 96 frames | 34360MiB <br> _12:49s_ | 29097MiB <br> _14:41s_ | 27343MiB <br> _15:33s_ | 26174MiB <br> _15:44s_ | 23834MiB <br> _16:33s_ | 20550MiB <br> _43:47s_ |
| 120 frames | 39489MiB <br> _18:21s_ | 32911MiB <br> _20:39s_ | 30719MiB <br> _21:34s_ | 29257MiB <br> _21:48s_ | 26332MiB <br> _23:02s_ | 25679MiB <br> _63:01s_ |
### RTX 4090 的显存占用与运行时间
表格中以 `---` 显示的值表示触发了显存溢出(OOM),无法生成视频。
- Resolution of 512
| 帧数 | normal_mode + 0 steps | normal_mode + 10 steps | normal_mode + 7 steps | normal_mode + 5 steps | normal_mode + 1 steps | low_gpu_mode + 0 steps |
| :---: | :---: | :---: | :---: | :---: | :---: | :---: |
| 24 frames | 16366MiB <br> _01:18s_ | 15805MiB <br> _01:26s_ | 15607MiB <br> _01:37s_ | 15475MiB <br> _01:36s_ | 15211MiB <br> _01:39s_ | 4211MiB <br> _03:57s_ |
| 48 frames | 18720MiB <br> _02:21s_ | 17532MiB <br> _02:49s_ | 17136MiB <br> _02:55s_ | 16872MiB <br> _02:58s_ | 16344MiB <br> _03:01s_ | 4666MiB <br> _05:01s_ |
| 72 frames | 21036MiB <br> _03:41s_ | 19254MiB <br> _04:25s_ | 18660MiB <br> _04:34s_ | 18264MiB <br> _04:36s_ | 17472MiB <br> _04:51s_ | 6981MiB <br> _06:36s_ |
| 96 frames | -----MiB <br> _--:--s_ | 20972MiB <br> _06:18s_ | 20180MiB <br> _06:24s_ | 19652MiB <br> _06:36s_ | 18596MiB <br> _06:56s_ | 9298MiB <br> _10:03s_ |
| 120 frames | -----MiB <br> _--:--s_ | -----MiB <br> _--:--s_ | 21704MiB <br> _08:50s_ | 21044MiB <br> _08:53s_ | 19724MiB <br> _09:08s_ | 11613MiB <br> _13:57s_ |
- Resolution of 768
| 帧数 | normal_mode + 0 steps | normal_mode + 10 steps | normal_mode + 7 steps | normal_mode + 5 steps | normal_mode + 1 steps | low_gpu_mode + 0 steps |
| :---: | :---: | :---: | :---: | :---: | :---: | :---: |
| 24 frames | 19223MiB <br> _02:38s_ | 17900MiB <br> _03:06s_ | 17448MiB <br> _03:18s_ | 17153MiB <br> _03:23s_ | 16624MiB <br> _03:34s_ | 5251MiB <br> _05:54s_ |
| 48 frames | -----MiB <br> _--:--s_ | -----MiB <br> _--:--s_ | 20946MiB <br> _07:28s_ | 20352MiB <br> _07:35s_ | 19164MiB <br> _08:04s_ | 10457MiB <br> _10:55s_ |
| 72 frames | -----MiB <br> _--:--s_ | -----MiB <br> _--:--s_ | -----MiB <br> _--:--s_ | -----MiB <br> _--:--s_ | -----MiB <br> _--:--s_ | 15671MiB <br> _18:52s_ |
## 许可证
我们将以宽松的 **Apache 2.0** 许可证发布该模型。
## 引用
```
@misc{createai2024ruyi,
title={Ruyi-Mini-7B},
author={CreateAI Team},
year={2024},
publisher = {GitHub},
journal = {GitHub repository},
howpublished={\url{https://github.com/IamCreateAI/Ruyi-Models}}
}
```
## 欢迎建议反馈与协同优化
我们真诚欢迎大家积极提供宝贵的反馈和建议,希望能够共同努力优化我们的服务和产品。您的意见将帮助我们更好地理解用户需求,从而不断提升用户体验。感谢您对我们工作的支持和关注!欢迎加入我们的[Discord](https://discord.com/invite/nueQFQwwGw) 或者微信群(扫描下方二维码,添加 Ruyi 小助手,加入官方交流群)!
<img src="https://github.com/user-attachments/assets/cc5e25c6-34ab-4be1-a59b-7d5789264a9c" style="width:300px"></img>
from .comfyui.comfyui_nodes import NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS
__all__ = ["NODE_CLASS_MAPPINGS", "NODE_DISPLAY_NAME_MAPPINGS"]
# GPU Memory Appendix
On this page, we will display the required memory and execution time under different combinations of memory optimization parameters. We would also like to thank the community for their support in helping us enrich the table data.
Please note that due to variations in the running environment, the content in the table may differ from actual conditions and is for reference only.
## RTX 3090 Results
### Results from [bmgjet](https://github.com/bmgjet)
- Environments
| CPU | Ram | Disk | GPU |
| :---: | :---: | :---: | :---: |
| 7950X | 64GB 6200mhz, 30,32,32 | NVME 990 Pro | 3090 (Water Cooled 500W PL, +1000mhz Vram) Sits about 2.1ghz on core |
Memory usage reported from GPUz logs.
Torch was not compiled with flash attention.
Windows 11 Using Python Environment from Forge (webui_forge_cu124_torch24.7z) Feb 5 2024.
- Resolution of 512
| Num frames | Parameters | GPU memory | Time |
| :---: | :---: | :---: | :---: |
| 24 frames | normal 0 | 18117mb | 1:32s - 3.80s/it |
| 48 frames | normal 0 | 19995mb | 4:06s - 9.98s/it |
| 72 frames | normal 0 | 22133mb | 9:57s - 23.88s/it |
| 96 frames | normal 10 | 22200mb | 12:51s - 30.84s/it |
| 120 frames | normal 7 | 23195mb | 15:57s - 38.26s/it |
- Resolution of 768
| Num frames | Parameters | GPU memory | Time |
| :---: | :---: | :---: | :---: |
| 24 frames | normal 0 | 22105mb | 4:07s - 9.94s/it |
| 48 frames | normal 7 | 22195mb | 14:34s - 34.98s/it |
| 72 frames | lowmem 0 | 17998mb | 38:29s - 91.99s/it |
| 96 frames | lowmem 0 | 23000mb | 1:04:16s - 153.74s/it |
| 120 frames | lowmem 0 | 28195mb | 5:29:54s - 797.28s/it <br> (froze system during vram overflows for a few sec each pass) |
## RTX 4090
### Results from CreateAI
> The GPU memory reported below is the `max_memory_allocated()` value. The values read from nvidia-smi may be higher than the reported values because CUDA occupies some GPU memory (usually between 500 - 800 MiB), and PyTorch's caching mechanism also requests additional GPU memory.
- Resolution of 384
| Num <br> frames | normal_mode + 0 steps | normal_mode + 10 steps | normal_mode + 7 steps | normal_mode + 5 steps | normal_mode + 1 steps |
| :---: | :---: | :---: | :---: | :---: | :---: |
| 24 <br> frames | 15387MiB <br> _00:42s_ | 15052MiB <br> _00:50s_ | 14941MiB <br> _00:50s_ | 14867MiB <br> _00:57s_ | 14718MiB <br> _00:58s_ |
| 48 <br> frames | 16686MiB <br> _01:15s_ | 16018MiB <br> _01:33s_ | 15795MiB <br> _01:35s_ | 15647MiB <br> _01:38s_ | 15350MiB <br> _01:40s_ |
| 72 <br> frames | 17996MiB <br> _01:51s_ | 16993MiB <br> _02:12s_ | 16659MiB <br> _02:21s_ | 16436MiB <br> _02:22s_ | 15990MiB <br> _02:31s_ |
| 96 <br> frames | 19297MiB <br> _02:31s_ | 17959MiB <br> _02:58s_ | 17514MiB <br> _03:09s_ | 17216MiB <br> _03:13s_ | 16621MiB <br> _03:27s_ |
| 120 <br> frames | 20599MiB <br> _03:17s_ | 18928MiB <br> _03:50s_ | 18371MiB <br> _04:09s_ | 17999MiB <br> _04:14s_ | 17256MiB <br> _04:29s_ |
| Num <br> frames | low_gpu_mode + 0 steps | low_gpu_mode + 10 steps | low_gpu_mode + 7 steps | low_gpu_mode + 5 steps | low_gpu_mode + 1 steps |
| :---: | :---: | :---: | :---: | :---: | :---: |
| 24 <br> frames | 4096MiB <br> _02:51s_ | 4096MiB <br> _03:25s_ | 4096MiB <br> _03:35s_ | 4096MiB <br> _02:45s_ | 4095MiB <br> _03:13s_ |
| 48 <br> frames | 4148MiB <br> _03:37s_ | 4148MiB <br> _04:01s_ | 4148MiB <br> _04:19s_ | 4148MiB <br> _03:43s_ | 4147MiB <br> _04:52s_ |
| 72 <br> frames | 4200MiB <br> _05:01s_ | 4200MiB <br> _05:40s_ | 4200MiB <br> _07:51s_ | 4200MiB <br> _05:01s_ | 4200MiB <br> _06:18s_ |
| 96 <br> frames | 5248MiB <br> _05:59s_ | 4249MiB <br> _06:49s_ | 4249MiB <br> _08:43s_ | 4249MiB <br> _06:03s_ | 4249MiB <br> _07:09s_ |
| 120 <br> frames | 6549MiB <br> _07:24s_ | 4876MiB <br> _08:12s_ | 4321MiB <br> _10:45s_ | 4299MiB <br> _08:09s_ | 4299MiB <br> _08:32s_ |
- Resolution of 512
| Num <br> frames | normal_mode + 0 steps | normal_mode + 10 steps | normal_mode + 7 steps | normal_mode + 5 steps | normal_mode + 1 steps |
| :---: | :---: | :---: | :---: | :---: | :---: |
| 24 <br> frames | 16366MiB <br> _01:18s_ | 15805MiB <br> _01:26s_ | 15607MiB <br> _01:37s_ | 15475MiB <br> _01:36s_ | 15211MiB <br> _01:39s_ |
| 48 <br> frames | 18720MiB <br> _02:21s_ | 17532MiB <br> _02:49s_ | 17136MiB <br> _02:55s_ | 16872MiB <br> _02:58s_ | 16344MiB <br> _03:01s_ |
| 72 <br> frames | 21036MiB <br> _03:41s_ | 19254MiB <br> _04:25s_ | 18660MiB <br> _04:34s_ | 18264MiB <br> _04:36s_ | 17472MiB <br> _04:51s_ |
| 96 <br> frames | -----MiB <br> _--:--s_ | 20972MiB <br> _06:18s_ | 20180MiB <br> _06:24s_ | 19652MiB <br> _06:36s_ | 18596MiB <br> _06:56s_ |
| 120 <br> frames | -----MiB <br> _--:--s_ | -----MiB <br> _--:--s_ | 21704MiB <br> _08:50s_ | 21044MiB <br> _08:53s_ | 19724MiB <br> _09:08s_ |
| Num <br> frames | low_gpu_mode + 0 steps | low_gpu_mode + 10 steps | low_gpu_mode + 7 steps | low_gpu_mode + 5 steps | low_gpu_mode + 1 steps |
| :---: | :---: | :---: | :---: | :---: | :---: |
| 24 <br> frames | 4212MiB <br> _04:31s_ | 4212MiB <br> _04:44s_ | 4211MiB <br> _05:12s_ | 4211MiB <br> _04:51s_ | 4212MiB <br> _04:14s_ |
| 48 <br> frames | 4666MiB <br> _05:17s_ | 4401MiB <br> _06:11s_ | 4400MiB <br> _06:33s_ | 4399MiB <br> _08:29s_ | 4401MiB <br> _06:18s_ |
| 72 <br> frames | 6981MiB <br> _07:27s_ | 5199MiB <br> _08:00s_ | 4605MiB <br> _08:57s_ | 4598MiB <br> _11:02s_ | 4598MiB <br> _08:46s_ |
| 96 <br> frames | 9298MiB <br> _09:33s_ | 6922MiB <br> _10:13s_ | 6130MiB <br> _11:31s_ | 5602MiB <br> _13:46s_ | 4794MiB <br> _11:29s_ |
| 120 <br> frames | 11613MiB <br> _11:33s_ | 8643MiB <br> _13:11s_ | 7653MiB <br> _14:18s_ | 6993MiB <br> _14:23s_ | 5673MiB <br> _14:16s_ |
- Resolution of 640
| Num <br> frames | normal_mode + 0 steps | normal_mode + 10 steps | normal_mode + 7 steps | normal_mode + 5 steps | normal_mode + 1 steps |
| :---: | :---: | :---: | :---: | :---: | :---: |
| 24 <br> frames | 17671MiB <br> _01:47s_ | 16739MiB <br> _02:09s_ | 16429MiB <br> _02:17s_ | 16234MiB <br> _02:23s_ | 15850MiB <br> _02:28s_ |
| 48 <br> frames | 21324MiB <br> _03:49s_ | 19468MiB <br> _04:23s_ | 18849MiB <br> _04:55s_ | 18437MiB <br> _04:59s_ | 17612MiB <br> _05:14s_ |
| 72 <br> frames | -----MiB <br> _--:--s_ | -----MiB <br> _--:--s_ | 21230MiB <br> _07:57s_ | 20611MiB <br> _08:13s_ | 19373MiB <br> _08:25s_ |
| 96 <br> frames | -----MiB <br> _--:--s_ | -----MiB <br> _--:--s_ | -----MiB <br> _--:--s_ | -----MiB <br> _--:--s_ | 21134MiB <br> _11:57s_ |
| 120 <br> frames | -----MiB <br> _--:--s_ | -----MiB <br> _--:--s_ | -----MiB <br> _--:--s_ | -----MiB <br> _--:--s_ | -----MiB <br> _--:--s_ |
| Num <br> frames | low_gpu_mode + 0 steps | low_gpu_mode + 10 steps | low_gpu_mode + 7 steps | low_gpu_mode + 5 steps | low_gpu_mode + 1 steps |
| :---: | :---: | :---: | :---: | :---: | :---: |
| 24 <br> frames | 4301MiB <br> _03:49s_ | 4301MiB <br> _04:31s_ | 4301MiB <br> _04:55s_ | 4301MiB <br> _06:33s_ | 4301MiB <br> _04:52s_ |
| 48 <br> frames | 7271MiB <br> _07:05s_ | 5416MiB <br> _08:04s_ | 4798MiB <br> _08:07s_ | 4589MiB <br> _09:42s_ | 4589MiB <br> _09:18s_ |
| 72 <br> frames | 10889MiB <br> _10:38s_ | 8106MiB <br> _11:54s_ | 7179MiB <br> _11:58s_ | 6560MiB <br> _12:52s_ | 5322MiB <br> _13:13s_ |
| 96 <br> frames | 14509MiB <br> _14:13s_ | 10795MiB <br> _15:50s_ | 9557MiB <br> _16:10s_ | 8732MiB <br> _17:08s_ | 7082MiB <br> _17:39s_ |
| 120 <br> frames | 18128MiB <br> _18:26s_ | 13487MiB <br> _19:57s_ | 11942MiB <br> _21:12s_ | 10910MiB <br> _21:55s_ | 8847MiB <br> _26:20s_ |
- Resolution of 768
| Num <br> frames | normal_mode + 0 steps | normal_mode + 10 steps | normal_mode + 7 steps | normal_mode + 5 steps | normal_mode + 1 steps |
| :---: | :---: | :---: | :---: | :---: | :---: |
| 24 <br> frames | 19223MiB <br> _02:38s_ | 17900MiB <br> _03:06s_ | 17448MiB <br> _03:18s_ | 17153MiB <br> _03:23s_ | 16624MiB <br> _03:34s_ |
| 48 <br> frames | -----MiB <br> _--:--s_ | -----MiB <br> _--:--s_ | 20946MiB <br> _07:28s_ | 20352MiB <br> _07:35s_ | 19164MiB <br> _08:04s_ |
| 72 <br> frames | -----MiB <br> _--:--s_ | -----MiB <br> _--:--s_ | -----MiB <br> _--:--s_ | -----MiB <br> _--:--s_ | -----MiB <br> _--:--s_ |
| 96 <br> frames | -----MiB <br> _--:--s_ | -----MiB <br> _--:--s_ | -----MiB <br> _--:--s_ | -----MiB <br> _--:--s_ | -----MiB <br> _--:--s_ |
| 120 <br> frames | -----MiB <br> _--:--s_ | -----MiB <br> _--:--s_ | -----MiB <br> _--:--s_ | -----MiB <br> _--:--s_ | -----MiB <br> _--:--s_ |
| Num <br> frames | low_gpu_mode + 0 steps | low_gpu_mode + 10 steps | low_gpu_mode + 7 steps | low_gpu_mode + 5 steps | low_gpu_mode + 1 steps |
| :---: | :---: | :---: | :---: | :---: | :---: |
| 24 <br> frames | 5251MiB <br> _05:19s_ | 4398MiB <br> _05:55s_ | 4398MiB <br> _06:52s_ | 4398MiB <br> _07:24s_ | 4399MiB <br> _07:35s_ |
| 48 <br> frames | 10457MiB <br> _09:38s_ | 7786MiB <br> _11:09s_ | 6896MiB <br> _12:43s_ | 6304MiB <br> _12:52s_ | 5114MiB <br> _13:20s_ |
| 72 <br> frames | 15671MiB <br> _15:14s_ | 11661MiB <br> _17:50s_ | 10325MiB <br> _18:22s_ | 9433MiB <br> _19:32s_ | 7652MiB <br> _19:19s_ |
| 96 <br> frames | -----MiB <br> _--:--s_ | 15534MiB <br> _24:30s_ | 13752MiB <br> _24:28s_ | 12564MiB <br> _25:54s_ | 10188MiB <br> _29:19s_ |
| 120 <br> frames | -----MiB <br> _--:--s_ | 19406MiB <br> _33:04s_ | 17179MiB <br> _33:14s_ | 15694MiB <br> _35:01s_ | 12724MiB <br> _39:11s_ |
- Resolution of 896
| Num <br> frames | normal_mode + 0 steps | normal_mode + 10 steps | normal_mode + 7 steps | normal_mode + 5 steps | normal_mode + 1 steps |
| :---: | :---: | :---: | :---: | :---: | :---: |
| 24 <br> frames | 21085MiB <br> _03:46s_ | 19265MiB <br> _04:25s_ | 18659MiB <br> _04:39s_ | 18248MiB <br> _04:43s_ | 17543MiB <br> _04:59s_ |
| 48 <br> frames | -----MiB <br> _--:--s_ | -----MiB <br> _--:--s_ | -----MiB <br> _--:--s_ | -----MiB <br> _--:--s_ | 20994MiB <br> _11:34s_ |
| 72 <br> frames | -----MiB <br> _--:--s_ | -----MiB <br> _--:--s_ | -----MiB <br> _--:--s_ | -----MiB <br> _--:--s_ | -----MiB <br> _--:--s_ |
| 96 <br> frames | -----MiB <br> _--:--s_ | -----MiB <br> _--:--s_ | -----MiB <br> _--:--s_ | -----MiB <br> _--:--s_ | -----MiB <br> _--:--s_ |
| 120 <br> frames | -----MiB <br> _--:--s_ | -----MiB <br> _--:--s_ | -----MiB <br> _--:--s_ | -----MiB <br> _--:--s_ | -----MiB <br> _--:--s_ |
| Num <br> frames | low_gpu_mode + 0 steps | low_gpu_mode + 10 steps | low_gpu_mode + 7 steps | low_gpu_mode + 5 steps | low_gpu_mode + 1 steps |
| :---: | :---: | :---: | :---: | :---: | :---: |
| 24 <br> frames | 7128MiB <br> _07:26s_ | 5309MiB <br> _07:45s_ | 4703MiB <br> _07:50s_ | 4517MiB <br> _08:29s_ | 4517MiB <br> _09:31s_ |
| 48 <br> frames | 14220MiB <br> _14:05s_ | 10582MiB <br> _15:01s_ | 9370MiB <br> _15:43s_ | 8562MiB <br> _16:34s_ | 6946MiB <br> _17:36s_ |
| 72 <br> frames | -----MiB <br> _--:--s_ | 15857MiB <br> _24:53s_ | 14038MiB <br> _25:06s_ | 12826MiB <br> _26:49s_ | 10401MiB <br> _27:30s_ |
| 96 <br> frames | -----MiB <br> _--:--s_ | -----MiB <br> _--:--s_ | -----MiB <br> _--:--s_ | 17087MiB <br> _38:52s_ | 13853MiB <br> _45:19s_ |
| 120 <br> frames | -----MiB <br> _--:--s_ | -----MiB <br> _--:--s_ | -----MiB <br> _--:--s_ | -----MiB <br> _--:--s_ | 17310MiB <br> _55:50s_ |
## A100
### Results from CreateAI
> The GPU memory reported below is the `max_memory_allocated()` value. The values read from nvidia-smi may be higher than the reported values because CUDA occupies some GPU memory (usually between 500 - 800 MiB), and PyTorch's caching mechanism also requests additional GPU memory.
- Resolution of 384
| Num <br> frames | normal_mode + 0 steps | normal_mode + 10 steps | normal_mode + 7 steps | normal_mode + 5 steps | normal_mode + 1 steps |
| :---: | :---: | :---: | :---: | :---: | :---: |
| 24 <br> frames | 15387MiB <br> _00:36s_ | 15052MiB <br> _00:43s_ | 14941MiB <br> _00:50s_ | 14867MiB <br> _00:50s_ | 14717MiB <br> _00:56s_ |
| 48 <br> frames | 16686MiB <br> _00:57s_ | 16018MiB <br> _01:10s_ | 15795MiB <br> _01:28s_ | 15647MiB <br> _01:16s_ | 15350MiB <br> _01:21s_ |
| 72 <br> frames | 17996MiB <br> _01:29s_ | 16993MiB <br> _01:43s_ | 16659MiB <br> _01:53s_ | 16436MiB <br> _01:53s_ | 15990MiB <br> _01:57s_ |
| 96 <br> frames | 19297MiB <br> _02:01s_ | 17959MiB <br> _02:21s_ | 17514MiB <br> _02:29s_ | 17216MiB <br> _02:33s_ | 16621MiB <br> _02:54s_ |
| 120 <br> frames | 20599MiB <br> _02:33s_ | 18928MiB <br> _03:04s_ | 18371MiB <br> _03:12s_ | 17999MiB <br> _03:16s_ | 17256MiB <br> _03:25s_ |
| Num <br> frames | low_gpu_mode + 0 steps | low_gpu_mode + 10 steps | low_gpu_mode + 7 steps | low_gpu_mode + 5 steps | low_gpu_mode + 1 steps |
| :---: | :---: | :---: | :---: | :---: | :---: |
| 24 <br> frames | 4096MiB <br> _03:56s_ | 4096MiB <br> _04:24s_ | 4096MiB <br> _04:16s_ | 4096MiB <br> _04:14s_ | 4095MiB <br> _04:41s_ |
| 48 <br> frames | 4148MiB <br> _04:45s_ | 4148MiB <br> _05:27s_ | 4148MiB <br> _05:29s_ | 4148MiB <br> _05:32s_ | 4147MiB <br> _06:06s_ |
| 72 <br> frames | 4200MiB <br> _06:10s_ | 4200MiB <br> _07:10s_ | 4200MiB <br> _07:13s_ | 4200MiB <br> _07:19s_ | 4200MiB <br> _07:24s_ |
| 96 <br> frames | 5248MiB <br> _07:53s_ | 4249MiB <br> _08:16s_ | 4249MiB <br> _08:30s_ | 4249MiB <br> _08:28s_ | 4249MiB <br> _09:05s_ |
| 120 <br> frames | 6547MiB <br> _08:43s_ | 4876MiB <br> _09:54s_ | 4321MiB <br> _10:16s_ | 4299MiB <br> _10:20s_ | 4299MiB <br> _11:26s_ |
- Resolution of 512
| Num <br> frames | normal_mode + 0 steps | normal_mode + 10 steps | normal_mode + 7 steps | normal_mode + 5 steps | normal_mode + 1 steps |
| :---: | :---: | :---: | :---: | :---: | :---: |
| 24 <br> frames | 16399MiB <br> _00:59s_ | 15805MiB <br> _01:07s_ | 15607MiB <br> _01:13s_ | 15475MiB <br> _01:18s_ | 15211MiB <br> _01:16s_ |
| 48 <br> frames | 18720MiB <br> _01:47s_ | 17532MiB <br> _02:10s_ | 17136MiB <br> _02:25s_ | 16872MiB <br> _02:22s_ | 16344MiB <br> _02:24s_ |
| 72 <br> frames | 21036MiB <br> _02:55s_ | 19254MiB <br> _03:27s_ | 18660MiB <br> _03:36s_ | 18264MiB <br> _03:41s_ | 17472MiB <br> _03:51s_ |
| 96 <br> frames | 23348MiB <br> _04:08s_ | 20972MiB <br> _04:46s_ | 20180MiB <br> _05:05s_ | 19652MiB <br> _05:12s_ | 18596MiB <br> _05:38s_ |
| 120 <br> frames | 25664MiB <br> _05:42s_ | 22694MiB <br> _06:28s_ | 21704MiB <br> _06:47s_ | 21044MiB <br> _06:50s_ | 19724MiB <br> _07:06s_ |
| Num <br> frames | low_gpu_mode + 0 steps | low_gpu_mode + 10 steps | low_gpu_mode + 7 steps | low_gpu_mode + 5 steps | low_gpu_mode + 1 steps |
| :---: | :---: | :---: | :---: | :---: | :---: |
| 24 <br> frames | 4212MiB <br> _05:03s_ | 4213MiB <br> _05:31s_ | 4212MiB <br> _05:23s_ | 4212MiB <br> _05:50s_ | 4212MiB <br> _05:51s_ |
| 48 <br> frames | 4666MiB <br> _06:47s_ | 4401MiB <br> _07:13s_ | 4401MiB <br> _07:34s_ | 4400MiB <br> _08:21s_ | 4400MiB <br> _08:19s_ |
| 72 <br> frames | 6981MiB <br> _08:41s_ | 5199MiB <br> _09:44s_ | 4605MiB <br> _10:10s_ | 4598MiB <br> _11:13s_ | 4598MiB <br> _12:00s_ |
| 96 <br> frames | 9298MiB <br> _10:59s_ | 6922MiB <br> _12:10s_ | 6130MiB <br> _12:16s_ | 5602MiB <br> _13:27s_ | 4794MiB <br> _15:17s_ |
| 120 <br> frames | 11613MiB <br> _13:27s_ | 8643MiB <br> _14:55s_ | 7653MiB <br> _15:22s_ | 6993MiB <br> _16:04s_ | 5673MiB <br> _18:04s_ |
- Resolution of 640
| Num <br> frames | normal_mode + 0 steps | normal_mode + 10 steps | normal_mode + 7 steps | normal_mode + 5 steps | normal_mode + 1 steps |
| :---: | :---: | :---: | :---: | :---: | :---: |
| 24 <br> frames | 17707MiB <br> _01:26s_ | 16778MiB <br> _01:45s_ | 16470MiB <br> _01:48s_ | 16263MiB <br> _01:51s_ | 15850MiB <br> _01:53s_ |
| 48 <br> frames | 21324MiB <br> _03:09s_ | 19468MiB <br> _03:36s_ | 18849MiB <br> _03:53s_ | 18437MiB <br> _03:53s_ | 17612MiB <br> _04:03s_ |
| 72 <br> frames | 24943MiB <br> _05:06s_ | 22158MiB <br> _05:54s_ | 21230MiB <br> _06:12s_ | 20612MiB <br> _06:18s_ | 19374MiB <br> _06:35s_ |
| 96 <br> frames | 28562MiB <br> _07:35s_ | 24848MiB <br> _08:36s_ | 23611MiB <br> _09:04s_ | 22785MiB <br> _09:07s_ | 21134MiB <br> _09:35s_ |
| 120 <br> frames | 32182MiB <br> _10:32s_ | 27541MiB <br> _11:54s_ | 25994MiB <br> _12:20s_ | 24962MiB <br> _12:37s_ | 22899MiB <br> _13:05s_ |
| Num <br> frames | low_gpu_mode + 0 steps | low_gpu_mode + 10 steps | low_gpu_mode + 7 steps | low_gpu_mode + 5 steps | low_gpu_mode + 1 steps |
| :---: | :---: | :---: | :---: | :---: | :---: |
| 24 <br> frames | 4301MiB <br> _05:40s_ | 4301MiB <br> _06:07s_ | 4301MiB <br> _06:27s_ | 4301MiB <br> _06:33s_ | 4301MiB <br> _06:57s_ |
| 48 <br> frames | 7271MiB <br> _09:18s_ | 5416MiB <br> _09:51s_ | 4798MiB <br> _10:26s_ | 4589MiB <br> _10:36s_ | 4589MiB <br> _11:52s_ |
| 72 <br> frames | 10889MiB <br> _13:03s_ | 8106MiB <br> _14:07s_ | 7179MiB <br> _14:35s_ | 6561MiB <br> _17:30s_ | 5322MiB <br> _16:33s_ |
| 96 <br> frames | 14509MiB <br> _16:57s_ | 10795MiB <br> _18:12s_ | 9557MiB <br> _18:29s_ | 8732MiB <br> _18:55s_ | 7082MiB <br> _21:10s_ |
| 120 <br> frames | 18128MiB <br> _22:05s_ | 13487MiB <br> _22:49s_ | 11942MiB <br> _23:47s_ | 10910MiB <br> _23:50s_ | 8847MiB <br> _32:46s_ |
- Resolution of 768
| Num <br> frames | normal_mode + 0 steps | normal_mode + 10 steps | normal_mode + 7 steps | normal_mode + 5 steps | normal_mode + 1 steps |
| :---: | :---: | :---: | :---: | :---: | :---: |
| 24 <br> frames | 19300MiB <br> _02:09s_ | 17962MiB <br> _02:32s_ | 17517MiB <br> _02:37s_ | 17219MiB <br> _02:39s_ | 16624MiB <br> _02:46s_ |
| 48 <br> frames | 24510MiB <br> _04:46s_ | 21837MiB <br> _05:32s_ | 20946MiB <br> _05:51s_ | 20352MiB <br> _05:59s_ | 19164MiB <br> _06:10s_ |
| 72 <br> frames | 29725MiB <br> _08:23s_ | 25715MiB <br> _09:35s_ | 24378MiB <br> _09:56s_ | 23486MiB <br> _10:05s_ | 21703MiB <br> _10:40s_ |
| 96 <br> frames | 34925MiB <br> _12:52s_ | 29579MiB <br> _14:33s_ | 27797MiB <br> _15:01s_ | 26609MiB <br> _15:13s_ | 24233MiB <br> _15:53s_ |
| 120 <br> frames | 40134MiB <br> _18:29s_ | 33451MiB <br> _20:39s_ | 31223MiB <br> _21:31s_ | 29737MiB <br> _21:42s_ | 26766MiB <br> _22:51s_ |
| Num <br> frames | low_gpu_mode + 0 steps | low_gpu_mode + 10 steps | low_gpu_mode + 7 steps | low_gpu_mode + 5 steps | low_gpu_mode + 1 steps |
| :---: | :---: | :---: | :---: | :---: | :---: |
| 24 <br> frames | 5251MiB <br> _08:04s_ | 4398MiB <br> _08:15s_ | 4398MiB <br> _08:21s_ | 4398MiB <br> _08:48s_ | 4399MiB <br> _09:14s_ |
| 48 <br> frames | 10457MiB <br> _12:42s_ | 7786MiB <br> _13:54s_ | 6896MiB <br> _14:17s_ | 6304MiB <br> _14:37s_ | 5114MiB <br> _16:21s_ |
| 72 <br> frames | 15671MiB <br> _18:18s_ | 11661MiB <br> _19:44s_ | 10325MiB <br> _20:53s_ | 9435MiB <br> _20:47s_ | 7652MiB <br> _23:42s_ |
| 96 <br> frames | 20880MiB <br> _24:55s_ | 15534MiB <br> _27:07s_ | 13752MiB <br> _28:13s_ | 12564MiB <br> _28:44s_ | 10188MiB <br> _30:30s_ |
| 120 <br> frames | 26092MiB <br> _31:21s_ | 19406MiB <br> _34:09s_ | 17179MiB <br> _35:13s_ | 15694MiB <br> _35:15s_ | 12724MiB <br> _32:55s_ |
- Resolution of 896
| Num <br> frames | normal_mode + 0 steps | normal_mode + 10 steps | normal_mode + 7 steps | normal_mode + 5 steps | normal_mode + 1 steps |
| :---: | :---: | :---: | :---: | :---: | :---: |
| 24 <br> frames | 21181MiB <br> _02:58s_ | 19362MiB <br> _03:32s_ | 18755MiB <br> _03:48s_ | 18351MiB <br> _03:47s_ | 17543MiB <br> _03:54s_ |
| 48 <br> frames | 28271MiB <br> _07:18s_ | 24633MiB <br> _08:24s_ | 23420MiB <br> _08:49s_ | 22611MiB <br> _08:53s_ | 20994MiB <br> _09:10s_ |
| 72 <br> frames | 35360MiB <br> _13:22s_ | 29902MiB <br> _14:57s_ | 28083MiB <br> _15:33s_ | 26870MiB <br> _15:46s_ | 24445MiB <br> _16:13s_ |
| 96 <br> frames | 42447MiB <br> _21:08s_ | 35169MiB <br> _23:48s_ | 32744MiB <br> _24:41s_ | 31126MiB <br> _25:08s_ | 27891MiB <br> _26:33s_ |
| 120 <br> frames | 49538MiB <br> _30:55s_ | 40442MiB <br> _34:03s_ | 37410MiB <br> _35:11s_ | 35388MiB <br> _35:31s_ | 31345MiB <br> _36:40s_ |
| Num <br> frames | low_gpu_mode + 0 steps | low_gpu_mode + 10 steps | low_gpu_mode + 7 steps | low_gpu_mode + 5 steps | low_gpu_mode + 1 steps |
| :---: | :---: | :---: | :---: | :---: | :---: |
| 24 <br> frames | 7128MiB <br> _08:52s_ | 5309MiB <br> _09:30s_ | 4702MiB <br> _10:37s_ | 4517MiB <br> _10:55s_ | 4517MiB <br> _12:09s_ |
| 48 <br> frames | 14220MiB <br> _15:56s_ | 10582MiB <br> _17:04s_ | 9370MiB <br> _18:49s_ | 8562MiB <br> _19:08s_ | 6946MiB <br> _21:50s_ |
| 72 <br> frames | 21315MiB <br> _24:58s_ | 15857MiB <br> _26:54s_ | 14038MiB <br> _29:10s_ | 12826MiB <br> _29:42s_ | 10401MiB <br> _30:58s_ |
| 96 <br> frames | 28407MiB <br> _31:16s_ | 21130MiB <br> _34:22s_ | 18704MiB <br> _36:45s_ | 17087MiB <br> _36:34s_ | 13853MiB <br> _43:48s_ |
| 120 <br> frames | 35504MiB <br> _45:00s_ | 26405MiB <br> _48:05s_ | 23373MiB <br> _49:27s_ | 21352MiB <br> _50:34s_ | 17310MiB <br> _59:38s_ |
## ComfyUI Nodes
After installation, you can find the three nodes provided by this repository in the *Add Node - Ruyi* menu, as shown in the image below:
<div align=center>
<img src="https://github.com/user-attachments/assets/0760399c-c57f-465d-9685-ef910b60421c"></img>
</div>
The following sections will introduce the functions and parameters of each node.
> Note: The new version of ComfyUI nodes is displayed in the NODE LIBRARY on the left side of the interface.
### Load Model
The Load Model node is used to load the model from disk. It also provides the functionality for **automatic model downloading** (auto_download parameter).
<div align=center>
<img src="https://github.com/user-attachments/assets/2e9fcbb5-6f7c-451c-b742-5b844e949a01"></img>
</div>
- **model**: Select which model to use. Currently, Ruyi-Mini-7B is the only option.
- **auto_download**: Whether to automatically download. Defaults to yes. If the model is detected as missing or incomplete, it will automatically download the model to the *ComfyUI/models/Ruyi* path.
- **auto_update**: Whether to automatically check for and update the current model. Defaults to yes. When auto_download is enabled, the system will automatically check for updates to the model and download any updates to the *ComfyUI/models/Ruyi* directory. Please note that this feature relies on the caching mechanism of huggingface_hub, so do not delete the *.cache* folder in the model directory to ensure a smooth update process.
### Load LoRA
The Load LoRA node is used to load LoRA models, which need to be placed in the *ComfyUI/models/loras* path.
<div align=center>
<img src="https://github.com/user-attachments/assets/33f860ca-94c9-4686-afe4-7e00de15b0ce"></img>
</div>
- **lora_name**: The LoRA to be loaded; it will automatically search and display all model files in the *ComfyUI/models/loras* path.
- **strength_model**: The degree of influence of the LoRA, typically set between 1.0 and 1.4 for optimal results based on experience.
### Sampler for Image to Video
The Sampler for Image to Video node is used to generate videos based on input images. The starting frame image (start_img) is a required input, while the ending frame image (end_img) is optional. This node also supports **camera control** (camera_direction parameter) and **motion amplitude control** of the video subject (motion parameter).
<div align=center>
<img src="https://github.com/user-attachments/assets/e5078e95-7100-4e9b-9baa-0d534b085e7a"></img>
</div>
- **start_img**: The starting frame image.
- **end_img**: The ending frame image, optional input.
- **video_length**: The length of the video, which must be divisible by 8, with a maximum of 120 frames.
- **base_resolution**: The video resolution, such as 512, indicates that the generated video will have pixel dimensions close to 512 x 512. The model will automatically select the closest output video aspect ratio based on the input image.
- **seed**: A random number; different random numbers usually generate different videos. If the generated video does not meet requirements, this value can be adjusted to try other generation possibilities.
- **control_after_generate**: The method of changing the random number after each generation.
- **Fixed** indicates the seed is fixed.
- **Increment** indicates the seed is increased by one each time.
- **Decrement** indicates the seed is decreased by one each time.
- **Randomize** indicates the seed is randomly set each time.
- **steps**: The number of iterations for video generation. **More iterations require more time**. Typically, 25 iterations yield good results.
- **cfg**: The guidance of instructions (such as input images). A higher value indicates better guidance, with values between 7 and 10 usually achieving better generation results.
- **motion**: Controls the motion amplitude of the video subject.
- **1** indicates minimal motion, which means the video subject is nearly static.
- **2** indicates normal motion, which could be used in most case.
- **3** indicates significant motion. The video subject is trying to move as much as possible.
- **4** indicates a very large motion. Sometimes the video subject may move out of the camera frame.
- **Auto** indicates the motion is automatically determined by the model.
- **camera_direction**: Camera movement.
- **Static** indicates a stationary camera.
- **Left** indicates the camera moves left.
- **Right** indicates the camera moves right.
- **Up** indicates the camera moves up.
- **Down** indicates the camera moves down.
- **Auto** indicates automatic determination.
- **GPU_memory_mode**: Determines how GPU memory is utilized.
- **normal_mode** is the default mode, using more GPU memory and generating faster.
- **low_memory_mode** is the low memory mode, which significantly reduces GPU memory usage but severely impacts generation speed.
- **GPU_offload_steps**: Used to **optimize GPU memory usage** by moving some temporary variables from GPU memory to RAM, which **increases memory usage and decreases generation speed**.
- **0** indicates no optimization.
- **1 - 10**, where 1 has the least GPU memory usage and the slowest generation speed; 10 has the most GPU memory usage (less than the non-optimized case) and the fastest generation speed.
- Generally, with 24GB of GPU memory, you can use 7 to generate a 512 resolution, 120 frame video. For more detailed data, please refer to the following.
## Workflow Example
This section presents an example workflow for generating videos from images. You can import the workflow using the *Load* button in the (bottom right) menu. Note that the new version of ComfyUI move the menu to the top left, and allows you to load workflows through the *Workflow - Open* option.
The workflows are located in the *[comfyui/workflows/](workflows/)* directory, while the assets can be found in the *[assets/](../assets/)* directory.
After importing the workflow, you **need to manually re-specify the input image for the LoadImage input node**. Since the workflow file can only record the names of input files, it does require manual configuration.
### Image to Video (Starting Frame)
The workflow corresponds to the *[workflow-ruyi-i2v-start-frame.json](workflows/workflow-ruyi-i2v-start-frame.json)* file. For users with larger GPU memory, you can also use *[workflow-ruyi-i2v-start-frame-80g.json](workflows/workflow-ruyi-i2v-start-frame-80g.json)* to enhance the generation speed.
<div align=center>
<img style="width:80%" src="https://github.com/user-attachments/assets/4c0a58b8-ea04-4656-bf1f-b8665a3802a3"></img>
</div>
### Image to Video (Starting and Ending Frames)
The workflow corresponds to the *[workflow-ruyi-i2v-start-end-frames.json](workflows/workflow-ruyi-i2v-start-end-frames.json)* file. For users with larger GPU memory, you can also use *[workflow-ruyi-i2v-start-end-frames-80g.json](workflows/workflow-ruyi-i2v-start-end-frames-80g.json)* to enhance the generation speed.
<div align=center>
<img style="width:80%" src="https://github.com/user-attachments/assets/42b685a4-35ad-4dd8-afa2-79ded3e936cd"></img>
</div>
## Frequently Asked Questions
### Model loading error: LoadModel: ConnectionError (MaxRetryError)
This is usually caused by network issues leading to a failure in downloading from huggingface_hub. If the network is functioning properly, simply rerunning the LoadModel node should resolve the issue.
### Video generation speed is slow and far below expectations
- First, check if the low_memory_mode in the Load Model node is enabled. This mode significantly reduces video generation speed.
- Second, verify the version of PyTorch. PyTorch version 2.2 supports FlashAttention-2 ([link](https://pytorch.org/blog/pytorch2-2/)), which can greatly enhance computational efficiency. Installing the latest version of PyTorch can effectively improve generation speed.
## 节点功能
安装完成后,可在 ComfyUI 的 *Add Node - Ruyi* 菜单找到本仓库提供的 3 个节点,如下图所示:
<div align=center>
<img src="https://github.com/user-attachments/assets/0760399c-c57f-465d-9685-ef910b60421c"></img>
</div>
下文依次介绍各个节点的功能与参数。
> 注:新版 ComfyUI 节点在界面左侧 NODE LIBRARY 中显示。
### Load Model
用于加载模型,并提供**自动下载模型**(通过 auto_download 选项设置)的功能。
<div align=center>
<img src="https://github.com/user-attachments/assets/2e9fcbb5-6f7c-451c-b742-5b844e949a01"></img>
</div>
- **model**: 选择使用哪个模型,目前只有 Ruyi-Mini-7B 一个选项。
- **auto_download**: 是否自动下载,默认为 yes,检测到模型不存在(或不完整)时,将自动下载模型到 *ComfyUI/models/Ruyi* 路径。
- **auto_update**: 是否自动检查并更新当前模型,默认为 yes。当启用 auto_download 时,系统将自动检查模型是否有更新,并将更新内容下载到 *ComfyUI/models/Ruyi* 路径。请注意,此功能依赖于 huggingface_hub 的缓存机制,因此请勿删除模型路径中的 *.cache* 文件夹,以确保更新过程顺利进行。
### Load LoRA
用于加载 LoRA 模型,LoRA 模型需要放在 *ComfyUI/models/loras* 路径下。
<div align=center>
<img src="https://github.com/user-attachments/assets/33f860ca-94c9-4686-afe4-7e00de15b0ce"></img>
</div>
- **lora_name**: 需要加载的 LoRA,将自动搜索并显示 *ComfyUI/models/loras* 路径下所有模型文件。
- **strength_model**: LoRA 的影响程度,根据经验通常设置在 1.0 ~ 1.4 效果较好。
### Sampler for Image to Video
用于根据输入图片生成视频,首帧图片(start_img)为必须输入,尾帧图片(end_img)为可选输入。同时,该节点支持**镜头控制**(camera_direction)与**视频主体的运动幅度控制**(motion)。
<div align=center>
<img src="https://github.com/user-attachments/assets/e5078e95-7100-4e9b-9baa-0d534b085e7a"></img>
</div>
- **start_img**: 首帧图片。
- **end_img**: 尾帧图片,可选输入。
- **video_length**: 视频长度,必须能**被 8 整除****最大支持 120 帧**
- **base_resolution**: 视频分辨率,**模型将根据输入图片的长宽比自动选择最接近的输出视频长宽**
- **512** 表示生成的视频像素数接近 512 * 512。
- **768** 表示生成的视频像素数接近 768 * 768。
- **seed**: 随机种子,用于控制随机数生成器产生随机数的序列。**不同的随机种子通常能生成不同的视频**,当生成的视频不符合需求时候,可调整此值以尝试其他的生成可能。
- **control_after_generate**: 每次生成后随机种子的变化方式。
- **Fixed** 表示随机种子固定不变。
- **Increment** 表示随机种子每次增加一。
- **Decrement** 表示随机种子每次减少一。
- **Randomize** 表示随机种子运行后随机设置。
- **steps**: 视频生成的迭代次数,**迭代次数越多,需要的时间越久**,通常 25 次能够得到较好的结果。
- **cfg**: 指令(如输入图片)的遵循程度,数值越大遵循程度越好,取值 7 ~ 10 通常能取得较好的生成效果。
- **motion**: 控制视频主体的运动幅度。
- **1** 基本不运动,适用于静态场景。
- **2** 正常运动幅度,适用于谈话、转头等常见场合。
- **3** 运动幅度较大,可能出现转身、走动等情况。
- **4** 运动幅度非常大,可能出现视频主体离开画面的情况。
- **Auto** 表示模型自动判断运动幅度大小。
- **camera_direction**: 镜头运动。
- **Static** 表示静止镜头。
- **Left** 表示镜头向左移动。
- **Right** 表示镜头向右移动。
- **Up** 表示镜头向上移动。
- **Down** 表示镜头向下移动。
- **Auto** 表示模型自动判断镜头运动方向。
- **GPU_memory_mode**:
- **normal_mode** 是默认模式,**使用显存较多,生成速度较快**
- **low_memory_mode** 是低显存模式,**能大幅降低显存用量,但严重影响生成速度**
- **GPU_offload_steps**: 用于**优化显存占用**,通过将部分临时变量从显存移动到内存而实现,会**增加内存的占用并降低生成速度**
- **0** 表示不优化。
- **1 - 10**,1 显存占用最小,生成速度最慢;10 显存占用最多(少于不优化情况),生成速度最快。
- 通常情况下,24G 显存可以使用 7 生成 512 分辨率,120 帧视频。更详细数据请参考下文。
## 工作流样例
本节展示了图生视频的工作流样例,可通过菜单中的 *Load* 按钮导入工作流。新版 ComfyUI 可通过左上方菜单的 *Workflow - Open* 加载工作流。
工作流位于 *[comfyui/workflows/](workflows/)* 目录中,素材位于 *[assets/](../assets/)* 目录中。
导入工作流后,**需要手动重新指定输入节点 LoadImage 的输入图片**,由于工作流文件只能记录输入文件的名字,所以目前只能手动设置。
### 图生视频(首帧)
工作流对应 *[workflow-ruyi-i2v-start-frame.json](workflows/workflow-ruyi-i2v-start-frame.json)* 文件。对于显存较大的用户,也可以使用 *[workflow-ruyi-i2v-start-frame-80g.json](workflows/workflow-ruyi-i2v-start-frame-80g.json)* 以提高生成速度。
<div align=center>
<img style="width:80%" src="https://github.com/user-attachments/assets/4c0a58b8-ea04-4656-bf1f-b8665a3802a3"></img>
</div>
### 图生视频(首尾帧)
工作流对应 *[workflow-ruyi-i2v-start-end-frames.json](workflows/workflow-ruyi-i2v-start-end-frames.json)* 文件。对于显存较大的用户,也可以使用 *[workflow-ruyi-i2v-start-end-frames-80g.json](workflows/workflow-ruyi-i2v-start-end-frames-80g.json)* 以提高生成速度。
<div align=center>
<img style="width:80%" src="https://github.com/user-attachments/assets/42b685a4-35ad-4dd8-afa2-79ded3e936cd"></img>
</div>
## 常见问题
### 模型加载错误,LoadModel: ConnectionError (MaxRetryError)
通常是网络问题导致 huggingface_hub 下载失败。网络正常的情况下,再次运行 LoadModel 节点即可解决。
### 视频生成速度慢、远低于预期
- 首先,请检查是否开启 Load Model 节点中 GPU_memory_mode 的 low_memory_mode,此模式会大幅降低视频生成速度。
- 其次,请检查 PyTorch 版本。PyTorch 在 2.2 版本支持了 FlashAttention-2([链接](https://pytorch.org/blog/pytorch2-2/)),能大幅提升计算效率。安装新版本的 PyTorch 能够有效提升生成速度。
"""Modified from https://github.com/kijai/ComfyUI-EasyAnimateWrapper/blob/main/nodes.py
"""
import gc
import os
import comfy.model_management as mm
import cv2
import folder_paths
import numpy as np
import torch
from comfy.utils import ProgressBar
from diffusers import (DDIMScheduler, DPMSolverMultistepScheduler, PNDMScheduler,
EulerAncestralDiscreteScheduler, EulerDiscreteScheduler)
from einops import rearrange
from omegaconf import OmegaConf
from PIL import Image
from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection
from safetensors.torch import load_file as load_safetensors
from huggingface_hub import snapshot_download
from ..ruyi.data.bucket_sampler import ASPECT_RATIO_512, get_closest_ratio
from ..ruyi.models.autoencoder_magvit import AutoencoderKLMagvit
from ..ruyi.models.transformer3d import HunyuanTransformer3DModel
from ..ruyi.pipeline.pipeline_ruyi_inpaint import RuyiInpaintPipeline
from ..ruyi.utils.lora_utils import merge_lora, unmerge_lora
from ..ruyi.utils.utils import get_image_to_video_latent
# The directory of scripts
script_directory = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
class Ruyi_LoadModel:
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"model": (
[
"Ruyi-Mini-7B",
],
{ "default": "Ruyi-Mini-7B", }
),
"auto_download": (
["yes", "no"],
{
"default": "yes"
}
),
"auto_update": (
["yes", "no"],
{
"default": "yes"
}
)
},
}
RETURN_TYPES = ("RUYI_MODEL",)
RETURN_NAMES = ("ruyi_model",)
FUNCTION = "load_model"
CATEGORY = "Ruyi"
RUYI_MODEL_TYPE_DICT = {
"Ruyi-Mini-7B": "Inpaint"
}
def get_model_type(self, model_name):
return self.RUYI_MODEL_TYPE_DICT.get(model_name, "Inpaint")
def try_setup_pipeline(self, model_path, weight_dtype, config):
try:
# Init processbar
pbar = ProgressBar(5)
# Get Vae
vae = AutoencoderKLMagvit.from_pretrained(
model_path,
subfolder="vae"
).to(weight_dtype)
# Update pbar
pbar.update(1)
# Get Transformer
transformer_additional_kwargs = OmegaConf.to_container(config['transformer_additional_kwargs'])
transformer = HunyuanTransformer3DModel.from_pretrained_2d(
model_path,
subfolder="transformer",
transformer_additional_kwargs=transformer_additional_kwargs
).to(weight_dtype)
# Update pbar
pbar.update(1)
# Load Clip
clip_image_encoder = CLIPVisionModelWithProjection.from_pretrained(
model_path, subfolder="image_encoder"
).to(weight_dtype)
clip_image_processor = CLIPImageProcessor.from_pretrained(
model_path, subfolder="image_encoder"
)
# Update pbar
pbar.update(1)
# Load sampler and create pipeline
Choosen_Scheduler = DDIMScheduler
scheduler = Choosen_Scheduler.from_pretrained(
model_path,
subfolder="scheduler"
)
pipeline = RuyiInpaintPipeline.from_pretrained(
model_path,
vae=vae,
transformer=transformer,
scheduler=scheduler,
torch_dtype=weight_dtype,
clip_image_encoder=clip_image_encoder,
clip_image_processor=clip_image_processor,
)
# Load embeddings
embeddings = load_safetensors(os.path.join(model_path, "embeddings.safetensors"))
pipeline.embeddings = embeddings
# Update pbar
pbar.update(1)
return pipeline
except Exception as e:
print("[Ruyi] Setup pipeline failed:", e)
return None
def load_model(self, model, auto_download, auto_update):
# Init weight_dtype and device
device = mm.get_torch_device()
offload_device = mm.unet_offload_device()
# Init model name and type
model_name = model
model_type = self.get_model_type(model_name)
weight_dtype = torch.bfloat16
# Load config
config_path = os.path.join(script_directory, "config", "default.yaml")
config = OmegaConf.load(config_path)
# Check for update
repo_id = f"IamCreateAI/{model_name}"
model_path = os.path.join(folder_paths.models_dir, "Ruyi", model_name)
if auto_download == "yes" and auto_update == "yes":
print(f"Checking for {model_name} updates ...")
# Download the model
snapshot_download(repo_id=repo_id, local_dir=model_path)
# Init model
pipeline = self.try_setup_pipeline(model_path, weight_dtype, config)
if pipeline is None and auto_download == "yes":
mm.soft_empty_cache()
gc.collect()
# Download the model
snapshot_download(repo_id=repo_id, local_dir=model_path)
pipeline = self.try_setup_pipeline(model_path, weight_dtype, config)
if pipeline is None:
message = (f"[ Load Model {model_name} Failed ]\n"
f"Please download Ruyi model from huggingface repo '{repo_id}',\n"
f"And put it into '{model_path}'.")
if auto_download == "no":
message += "\n\nOr just set auto_download to 'yes'."
raise FileNotFoundError(message)
pipeline.enable_model_cpu_offload()
ruyi_model = {
'pipeline': pipeline,
'dtype': weight_dtype,
'model_path': model_path,
'model_type': model_type,
'loras': [],
'strength_model': [],
}
return (ruyi_model,)
class Ruyi_LoadLora:
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"ruyi_model": ("RUYI_MODEL",),
"lora_name": (folder_paths.get_filename_list("loras"), {"default": None,}),
"strength_model": ("FLOAT", {"default": 1.0, "min": -100.0, "max": 100.0, "step": 0.01}),
}
}
RETURN_TYPES = ("RUYI_MODEL",)
RETURN_NAMES = ("ruyi_model",)
FUNCTION = "load_lora"
CATEGORY = "Ruyi"
def load_lora(self, ruyi_model, lora_name, strength_model):
if lora_name is not None:
return (
{
'pipeline': ruyi_model["pipeline"],
'dtype': ruyi_model["dtype"],
'model_path': ruyi_model["model_path"],
'model_type': ruyi_model["model_type"],
'loras': ruyi_model.get("loras", []) + [folder_paths.get_full_path("loras", lora_name)],
'strength_model': ruyi_model.get("strength_model", []) + [strength_model],
},
)
else:
return (ruyi_model,)
class Ruyi_I2VSampler:
@classmethod
def INPUT_TYPES(s):
return {
"required": {
"ruyi_model": (
"RUYI_MODEL",
),
"video_length": (
"INT", {"default": 72, "min": 8, "max": 120, "step": 8}
),
"base_resolution": (
"INT", {"default": 512, "min": 384, "max": 1024, "step": 16}
),
"seed": (
"INT", {"default": 42, "min": 0, "max": 0xffffffffffffffff}
),
"steps": (
"INT", {"default": 25, "min": 1, "max": 200, "step": 1}
),
"cfg": (
"FLOAT", {"default": 7.0, "min": 1.0, "max": 20.0, "step": 0.01}
),
"scheduler": (
[
"Euler",
"Euler A",
"DPM++",
"PNDM",
"DDIM",
],
{
"default": 'DDIM'
}
),
"motion": (
[ "1", "2", "3", "4", "auto" ],
{ "default": "2" }
),
"camera_direction": (
[ "static", "left", "right", "up", "down", "auto" ],
{ "default": "static" }
),
"GPU_memory_mode": (
["normal_mode", "low_memory_mode"],
{
"default": "normal_mode",
}
),
"GPU_offload_steps": (
[ "0", "1", "5", "7", "10" ],
{ "default": "0" }
),
"start_img": (
"IMAGE",
),
},
"optional":{
"end_img": ("IMAGE",),
},
}
RETURN_TYPES = ("IMAGE",)
RETURN_NAMES =("images",)
FUNCTION = "process"
CATEGORY = "Ruyi"
def tensor2pil(self, image):
return Image.fromarray(np.clip(255. * image.cpu().numpy(), 0, 255).astype(np.uint8))
def numpy2pil(self, image):
return Image.fromarray(np.clip(255. * image, 0, 255).astype(np.uint8))
def to_pil(self, image):
if isinstance(image, Image.Image):
return image
if isinstance(image, torch.Tensor):
return self.tensor2pil(image)
if isinstance(image, np.ndarray):
return self.numpy2pil(image)
raise ValueError(f"Cannot convert {type(image)} to PIL.Image")
def get_control_embeddings(self, pipeline, aspect_ratio, motion, camera_direction):
# Default keys
p_default_key = "p.default"
n_default_key = "n.default"
# Load embeddings
if motion == "auto":
motion = "0"
p_key = f"p.{aspect_ratio.replace(':', 'x')}movie{motion}{camera_direction}"
embeddings = pipeline.embeddings
# Get embeddings
positive_embeds = embeddings.get(f"{p_key}.emb1", embeddings[f"{p_default_key}.emb1"])
positive_attention_mask = embeddings.get(f"{p_key}.mask1", embeddings[f"{p_default_key}.mask1"])
positive_embeds_2 = embeddings.get(f"{p_key}.emb2", embeddings[f"{p_default_key}.emb2"])
positive_attention_mask_2 = embeddings.get(f"{p_key}.mask2", embeddings[f"{p_default_key}.mask2"])
negative_embeds = embeddings[f"{n_default_key}.emb1"]
negative_attention_mask = embeddings[f"{n_default_key}.mask1"]
negative_embeds_2 = embeddings[f"{n_default_key}.emb2"]
negative_attention_mask_2 = embeddings[f"{n_default_key}.mask2"]
return {
"positive_embeds": positive_embeds,
"positive_attention_mask": positive_attention_mask,
"positive_embeds_2": positive_embeds_2,
"positive_attention_mask_2": positive_attention_mask_2,
"negative_embeds": negative_embeds,
"negative_attention_mask": negative_attention_mask,
"negative_embeds_2": negative_embeds_2,
"negative_attention_mask_2": negative_attention_mask_2,
}
def get_scheduler(self, model_path, scheduler_name):
if scheduler_name == "DPM++":
noise_scheduler = DPMSolverMultistepScheduler.from_pretrained(model_path, subfolder='scheduler')
elif scheduler_name == "Euler":
noise_scheduler = EulerDiscreteScheduler.from_pretrained(model_path, subfolder='scheduler')
elif scheduler_name == "Euler A":
noise_scheduler = EulerAncestralDiscreteScheduler.from_pretrained(model_path, subfolder='scheduler')
elif scheduler_name == "PNDM":
noise_scheduler = PNDMScheduler.from_pretrained(model_path, subfolder='scheduler')
elif scheduler_name == "DDIM":
noise_scheduler = DDIMScheduler.from_pretrained(model_path, subfolder='scheduler')
return noise_scheduler
def process(
self, ruyi_model, video_length, base_resolution, seed, steps, cfg, scheduler,
motion, camera_direction, GPU_memory_mode, GPU_offload_steps,
start_img, end_img=None
):
device = mm.get_torch_device()
offload_device = mm.unet_offload_device()
mm.soft_empty_cache()
gc.collect()
start_img = [self.to_pil(_start_img) for _start_img in start_img] if start_img is not None else None
end_img = [self.to_pil(_end_img) for _end_img in end_img] if end_img is not None else None
# Count most suitable height and width
aspect_ratio_sample_size = {key : [x / 512 * base_resolution for x in ASPECT_RATIO_512[key]] for key in ASPECT_RATIO_512.keys()}
original_width, original_height = start_img[0].size if type(start_img) is list else Image.open(start_img).size
closest_size, closest_ratio = get_closest_ratio(original_height, original_width, ratios=aspect_ratio_sample_size)
height, width = [int(x / 16) * 16 for x in closest_size]
aspect_ratio = "16:9" if width > height else "9:16"
# Get Pipeline
pipeline = ruyi_model['pipeline']
model_path = ruyi_model['model_path']
# Set GPU memory mode
if GPU_memory_mode == "low_memory_mode" and pipeline.model_cpu_offload_flag:
# Switch to low_memory_mode
pipeline.enable_sequential_cpu_offload()
elif GPU_memory_mode == "normal_mode" and not pipeline.model_cpu_offload_flag:
# Switch to normal_mode
pipeline.enable_model_cpu_offload()
# Set GPU offload steps
pipeline.transformer.hidden_cache_size = int(GPU_offload_steps)
# Load Sampler
pipeline.scheduler = self.get_scheduler(model_path, scheduler)
# Set random seed
generator= torch.Generator(device).manual_seed(seed)
# Load control embeddings
embeddings = self.get_control_embeddings(pipeline, aspect_ratio, motion, camera_direction)
# Inference
with torch.no_grad():
video_length = int(video_length // pipeline.vae.mini_batch_encoder * pipeline.vae.mini_batch_encoder) if video_length != 1 else 1
input_video, input_video_mask, clip_image = get_image_to_video_latent(start_img, end_img, video_length=video_length, sample_size=(height, width))
for _lora_path, _lora_weight in zip(ruyi_model.get("loras", []), ruyi_model.get("strength_model", [])):
pipeline = merge_lora(pipeline, _lora_path, _lora_weight)
sample = pipeline(
prompt_embeds = embeddings["positive_embeds"],
prompt_attention_mask = embeddings["positive_attention_mask"],
prompt_embeds_2 = embeddings["positive_embeds_2"],
prompt_attention_mask_2 = embeddings["positive_attention_mask_2"],
negative_prompt_embeds = embeddings["negative_embeds"],
negative_prompt_attention_mask = embeddings["negative_attention_mask"],
negative_prompt_embeds_2 = embeddings["negative_embeds_2"],
negative_prompt_attention_mask_2 = embeddings["negative_attention_mask_2"],
video_length = video_length,
height = height,
width = width,
generator = generator,
guidance_scale = cfg,
num_inference_steps = steps,
video = input_video,
mask_video = input_video_mask,
clip_image = clip_image,
comfyui_progressbar = True,
).videos
videos = rearrange(sample, "b c t h w -> (b t) h w c")
for _lora_path, _lora_weight in zip(ruyi_model.get("loras", []), ruyi_model.get("strength_model", [])):
pipeline = unmerge_lora(pipeline, _lora_path, _lora_weight)
return (videos,)
NODE_CLASS_MAPPINGS = {
"Ruyi_LoadModel": Ruyi_LoadModel,
"Ruyi_LoadLora": Ruyi_LoadLora,
"Ruyi_I2VSampler": Ruyi_I2VSampler,
}
NODE_DISPLAY_NAME_MAPPINGS = {
"Ruyi_LoadModel": "Load Model",
"Ruyi_LoadLora": "Load LoRA",
"Ruyi_I2VSampler": "Sampler for Image to Video",
}
{
"last_node_id": 5,
"last_link_id": 4,
"nodes": [
{
"id": 1,
"type": "Ruyi_LoadModel",
"pos": {
"0": 295,
"1": 238
},
"size": {
"0": 315,
"1": 82
},
"flags": {},
"order": 0,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "ruyi_model",
"type": "RUYI_MODEL",
"links": [
2
],
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "Ruyi_LoadModel"
},
"widgets_values": [
"Ruyi-Mini-7B",
"yes",
"yes"
]
},
{
"id": 2,
"type": "Ruyi_I2VSampler",
"pos": {
"0": 753,
"1": 326
},
"size": {
"0": 327.5999755859375,
"1": 338
},
"flags": {},
"order": 3,
"mode": 0,
"inputs": [
{
"name": "ruyi_model",
"type": "RUYI_MODEL",
"link": 2
},
{
"name": "start_img",
"type": "IMAGE",
"link": 3
},
{
"name": "end_img",
"type": "IMAGE",
"link": 4,
"shape": 7
}
],
"outputs": [
{
"name": "images",
"type": "IMAGE",
"links": [
1
],
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "Ruyi_I2VSampler"
},
"widgets_values": [
120,
512,
112876945129710,
"randomize",
25,
7,
"DDIM",
"auto",
"auto",
"normal_mode",
"0"
]
},
{
"id": 5,
"type": "VHS_VideoCombine",
"pos": {
"0": 1183,
"1": 89
},
"size": [
317,
758.0769230769231
],
"flags": {},
"order": 4,
"mode": 0,
"inputs": [
{
"name": "images",
"type": "IMAGE",
"link": 1
},
{
"name": "audio",
"type": "AUDIO",
"link": null,
"shape": 7
},
{
"name": "meta_batch",
"type": "VHS_BatchManager",
"link": null,
"shape": 7
},
{
"name": "vae",
"type": "VAE",
"link": null,
"shape": 7
}
],
"outputs": [
{
"name": "Filenames",
"type": "VHS_FILENAMES",
"links": null
}
],
"properties": {
"Node name for S&R": "VHS_VideoCombine"
},
"widgets_values": {
"frame_rate": 24,
"loop_count": 0,
"filename_prefix": "Ruyi-I2V-StartEndFrames",
"format": "video/h264-mp4",
"pix_fmt": "yuv420p",
"crf": 19,
"save_metadata": true,
"pingpong": false,
"save_output": true,
"videopreview": {
"hidden": false,
"paused": false,
"params": {
"filename": "Ruyi-I2V-StartEndFrames_00001.mp4",
"subfolder": "",
"type": "output",
"format": "video/h264-mp4",
"frame_rate": 24
},
"muted": false
}
}
},
{
"id": 3,
"type": "LoadImage",
"pos": {
"0": 180,
"1": 398
},
"size": {
"0": 240.88999938964844,
"1": 419.6499938964844
},
"flags": {},
"order": 1,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"links": [
3
],
"slot_index": 0
},
{
"name": "MASK",
"type": "MASK",
"links": null
}
],
"properties": {
"Node name for S&R": "LoadImage"
},
"widgets_values": [
"girl_01.jpg",
"image"
]
},
{
"id": 4,
"type": "LoadImage",
"pos": {
"0": 453,
"1": 398
},
"size": [
242.0999999999999,
421.6500000000001
],
"flags": {},
"order": 2,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"links": [
4
],
"slot_index": 0
},
{
"name": "MASK",
"type": "MASK",
"links": null
}
],
"properties": {
"Node name for S&R": "LoadImage"
},
"widgets_values": [
"girl_02.jpg",
"image"
]
}
],
"links": [
[
1,
2,
0,
5,
0,
"IMAGE"
],
[
2,
1,
0,
2,
0,
"RUYI_MODEL"
],
[
3,
3,
0,
2,
1,
"IMAGE"
],
[
4,
4,
0,
2,
2,
"IMAGE"
]
],
"groups": [],
"config": {},
"extra": {
"ds": {
"scale": 1,
"offset": [
-43.00999999999996,
33.75000000000006
]
}
},
"version": 0.4
}
\ No newline at end of file
{
"last_node_id": 5,
"last_link_id": 4,
"nodes": [
{
"id": 1,
"type": "Ruyi_LoadModel",
"pos": {
"0": 295,
"1": 238
},
"size": {
"0": 315,
"1": 82
},
"flags": {},
"order": 0,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "ruyi_model",
"type": "RUYI_MODEL",
"links": [
2
],
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "Ruyi_LoadModel"
},
"widgets_values": [
"Ruyi-Mini-7B",
"yes",
"yes"
]
},
{
"id": 5,
"type": "VHS_VideoCombine",
"pos": {
"0": 1183,
"1": 89
},
"size": [
317,
758.0769230769231
],
"flags": {},
"order": 4,
"mode": 0,
"inputs": [
{
"name": "images",
"type": "IMAGE",
"link": 1
},
{
"name": "audio",
"type": "AUDIO",
"link": null,
"shape": 7
},
{
"name": "meta_batch",
"type": "VHS_BatchManager",
"link": null,
"shape": 7
},
{
"name": "vae",
"type": "VAE",
"link": null,
"shape": 7
}
],
"outputs": [
{
"name": "Filenames",
"type": "VHS_FILENAMES",
"links": null
}
],
"properties": {
"Node name for S&R": "VHS_VideoCombine"
},
"widgets_values": {
"frame_rate": 24,
"loop_count": 0,
"filename_prefix": "Ruyi-I2V-StartEndFrames",
"format": "video/h264-mp4",
"pix_fmt": "yuv420p",
"crf": 19,
"save_metadata": true,
"pingpong": false,
"save_output": true,
"videopreview": {
"hidden": false,
"paused": false,
"params": {
"filename": "Ruyi-I2V-StartEndFrames_00001.mp4",
"subfolder": "",
"type": "output",
"format": "video/h264-mp4",
"frame_rate": 24
},
"muted": false
}
}
},
{
"id": 3,
"type": "LoadImage",
"pos": {
"0": 180,
"1": 398
},
"size": {
"0": 240.88999938964844,
"1": 419.6499938964844
},
"flags": {},
"order": 1,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"links": [
3
],
"slot_index": 0
},
{
"name": "MASK",
"type": "MASK",
"links": null
}
],
"properties": {
"Node name for S&R": "LoadImage"
},
"widgets_values": [
"gir_01.jpg",
"image"
]
},
{
"id": 4,
"type": "LoadImage",
"pos": {
"0": 453,
"1": 398
},
"size": [
242.0999999999999,
421.6500000000001
],
"flags": {},
"order": 2,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"links": [
4
],
"slot_index": 0
},
{
"name": "MASK",
"type": "MASK",
"links": null
}
],
"properties": {
"Node name for S&R": "LoadImage"
},
"widgets_values": [
"girl_02.jpg",
"image"
]
},
{
"id": 2,
"type": "Ruyi_I2VSampler",
"pos": {
"0": 753,
"1": 326
},
"size": {
"0": 327.5999755859375,
"1": 338
},
"flags": {},
"order": 3,
"mode": 0,
"inputs": [
{
"name": "ruyi_model",
"type": "RUYI_MODEL",
"link": 2
},
{
"name": "start_img",
"type": "IMAGE",
"link": 3
},
{
"name": "end_img",
"type": "IMAGE",
"link": 4,
"shape": 7
}
],
"outputs": [
{
"name": "images",
"type": "IMAGE",
"links": [
1
],
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "Ruyi_I2VSampler"
},
"widgets_values": [
120,
512,
112876945129710,
"randomize",
25,
7,
"DDIM",
"auto",
"auto",
"normal_mode",
"5"
]
}
],
"links": [
[
1,
2,
0,
5,
0,
"IMAGE"
],
[
2,
1,
0,
2,
0,
"RUYI_MODEL"
],
[
3,
3,
0,
2,
1,
"IMAGE"
],
[
4,
4,
0,
2,
2,
"IMAGE"
]
],
"groups": [],
"config": {},
"extra": {
"ds": {
"scale": 1,
"offset": [
-43.00999999999996,
33.75000000000006
]
}
},
"version": 0.4
}
\ No newline at end of file
{
"last_node_id": 6,
"last_link_id": 3,
"nodes": [
{
"id": 2,
"type": "Ruyi_I2VSampler",
"pos": {
"0": 628,
"1": 284
},
"size": {
"0": 327.5999755859375,
"1": 338
},
"flags": {},
"order": 2,
"mode": 0,
"inputs": [
{
"name": "ruyi_model",
"type": "RUYI_MODEL",
"link": 1
},
{
"name": "start_img",
"type": "IMAGE",
"link": 2
},
{
"name": "end_img",
"type": "IMAGE",
"link": null,
"shape": 7
}
],
"outputs": [
{
"name": "images",
"type": "IMAGE",
"links": [
3
],
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "Ruyi_I2VSampler"
},
"widgets_values": [
120,
512,
925247271358454,
"randomize",
25,
7,
"DDIM",
"2",
"static",
"normal_mode",
"0"
]
},
{
"id": 1,
"type": "Ruyi_LoadModel",
"pos": {
"0": 210,
"1": 162
},
"size": {
"0": 315,
"1": 82
},
"flags": {},
"order": 0,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "ruyi_model",
"type": "RUYI_MODEL",
"links": [
1
],
"slot_index": 0
}
],
"properties": {
"Node name for S&R": "Ruyi_LoadModel"
},
"widgets_values": [
"Ruyi-Mini-7B",
"yes",
"yes"
]
},
{
"id": 4,
"type": "VHS_VideoCombine",
"pos": {
"0": 1045,
"1": 133
},
"size": [
404.73553466796875,
601.8645528157551
],
"flags": {},
"order": 3,
"mode": 0,
"inputs": [
{
"name": "images",
"type": "IMAGE",
"link": 3
},
{
"name": "audio",
"type": "AUDIO",
"link": null,
"shape": 7
},
{
"name": "meta_batch",
"type": "VHS_BatchManager",
"link": null,
"shape": 7
},
{
"name": "vae",
"type": "VAE",
"link": null,
"shape": 7
}
],
"outputs": [
{
"name": "Filenames",
"type": "VHS_FILENAMES",
"links": null
}
],
"properties": {
"Node name for S&R": "VHS_VideoCombine"
},
"widgets_values": {
"frame_rate": 24,
"loop_count": 0,
"filename_prefix": "Ruyi-I2V-StartFrame",
"format": "video/h264-mp4",
"pix_fmt": "yuv420p",
"crf": 19,
"save_metadata": true,
"pingpong": false,
"save_output": true,
"videopreview": {
"hidden": false,
"paused": false,
"params": {
"filename": "Ruyi-I2V-StartFrame_00001.mp4",
"subfolder": "",
"type": "output",
"format": "video/h264-mp4",
"frame_rate": 24
},
"muted": false
}
}
},
{
"id": 3,
"type": "LoadImage",
"pos": {
"0": 200,
"1": 439
},
"size": {
"0": 315,
"1": 314
},
"flags": {},
"order": 1,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"links": [
2
],
"slot_index": 0
},
{
"name": "MASK",
"type": "MASK",
"links": null
}
],
"properties": {
"Node name for S&R": "LoadImage"
},
"widgets_values": [
"example_03.jpg",
"image"
]
}
],
"links": [
[
1,
1,
0,
2,
0,
"RUYI_MODEL"
],
[
2,
3,
0,
2,
1,
"IMAGE"
],
[
3,
2,
0,
4,
0,
"IMAGE"
]
],
"groups": [],
"config": {},
"extra": {
"ds": {
"scale": 1,
"offset": [
0,
0
]
}
},
"version": 0.4
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment