Commit 1f5da520 authored by yangzhong's avatar yangzhong
Browse files

git init

parents
Pipeline #3144 failed with stages
in 0 seconds
File added
A serene scene of a panda bear playing a guitar at sunset unfolds by a tranquil lake. The panda, with its black-and-white fur, strums the guitar while seated on a rock. Behind, a breathtaking mountain range glows under the orange and pink hues of the setting sun, contrasting beautifully with the lake's deep blue. The composition highlights the panda's relaxed interaction with the guitar, set against the stunning natural landscape, creating depth and peaceful harmony.
\ No newline at end of file
*.7z filter=lfs diff=lfs merge=lfs -text
*.arrow filter=lfs diff=lfs merge=lfs -text
*.bin filter=lfs diff=lfs merge=lfs -text
*.bz2 filter=lfs diff=lfs merge=lfs -text
*.ckpt filter=lfs diff=lfs merge=lfs -text
*.ftz filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.h5 filter=lfs diff=lfs merge=lfs -text
*.joblib filter=lfs diff=lfs merge=lfs -text
*.lfs.* filter=lfs diff=lfs merge=lfs -text
*.mlmodel filter=lfs diff=lfs merge=lfs -text
*.model filter=lfs diff=lfs merge=lfs -text
*.msgpack filter=lfs diff=lfs merge=lfs -text
*.npy filter=lfs diff=lfs merge=lfs -text
*.npz filter=lfs diff=lfs merge=lfs -text
*.onnx filter=lfs diff=lfs merge=lfs -text
*.ot filter=lfs diff=lfs merge=lfs -text
*.parquet filter=lfs diff=lfs merge=lfs -text
*.pb filter=lfs diff=lfs merge=lfs -text
*.pickle filter=lfs diff=lfs merge=lfs -text
*.pkl filter=lfs diff=lfs merge=lfs -text
*.pt filter=lfs diff=lfs merge=lfs -text
*.pth filter=lfs diff=lfs merge=lfs -text
*.rar filter=lfs diff=lfs merge=lfs -text
*.safetensors filter=lfs diff=lfs merge=lfs -text
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.tar.* filter=lfs diff=lfs merge=lfs -text
*.tar filter=lfs diff=lfs merge=lfs -text
*.tflite filter=lfs diff=lfs merge=lfs -text
*.tgz filter=lfs diff=lfs merge=lfs -text
*.wasm filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
We provide two versions for I2VGen-XL-based model, heavy_deg.pt for heavy degraded videos and light_deg.pt for light degraded videos (e.g., the low-resolution video downloaded from video websites).
You can put the weight into pretrained_weight/
本仓库提供了快速从huggingface下载该模型的脚本,你可以运行以下命令进行下载:
python /STAR/down_model.py
accelerate
av
torch==2.0.1
torchvision==0.15.2
torchaudio==2.0.2
opencv-python==4.10.0.84
easydict==1.13
einops==0.8.0
open-clip-torch==2.20.0
xformers==0.0.21
fairscale==0.4.13
torchsde==0.2.6
pytorch-lightning==2.0.1
diffusers==0.30.0
huggingface_hub==0.23.3
peft==0.5.0
gradio==4.41.0
numpy==1.24
保存结果文件
## Generate Training Data
### Step 1: Create the environment
```
conda create -n make_data python=3.10
conda activate make_data
bash build.sh
```
### Step 2: Prepare CSV File
Create a CSV file listing the paths to ground-truth (GT) videos and their corresponding text descriptions. Use the following format:
```
path,text
/xxx/xxx/dog.mp4, A dog is sitting on the couch.
...
```
### Step 3: Configure Paths
Open `make_paired_data.sh` and modify the following variables:
- `INPUT_CSV`: Path to your CSV file
- `SAVE_PATH`: Directory to save the generated paired data
### Step 4: Run the Script
```
bash make_paired_data.sh
```
⚠️ **Notice:** The current version of `make_paired_data.sh` only supports `batch_size=1`.
To process data in parallel, you can split the CSV file into multiple parts and run the script separately on each part.
pip3 install --upgrade pip
pip3 install greenlet==1.1.3
pip3 install gevent==22.8.0
pip3 install torch
pip3 install torchvision
pip3 install ftfy
pip3 install numpy
pip3 install tqdm
pip3 install psutil
pip3 install pre-commit
pip3 install rich
pip3 install click
pip3 install fabric
pip3 install contexttimer
pip3 install safetensors
pip3 install einops
pip3 install pydantic
pip3 install ray
pip3 install protobuf
pip3 install gdown
pip3 install pyav
pip3 install tensorboard
pip3 install timm
pip3 install matplotlib
pip3 install accelerate
pip3 install diffusers
pip3 install transformers
pip3 install ipdb
pip3 install opencv-python
pip3 install webdataset
pip3 install gateloop_transformer
pip3 install kornia
pip3 install scipy
sudo apt-get install -y libgl1-mesa-dev
# install flash attention (optional)
# set enable_flashattn=False in config to avoid using flash attention
pip3 install packaging
pip3 install ninja
pip3 install flash-attn --no-build-isolation
# install apex (optional)
# set enable_layernorm_kernel=False in config to avoid using apex
pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" git+https://github.com/NVIDIA/apex.git
# install xformers
#pip install -U xformers --index-url https://download.pytorch.org/whl/cu121
# cp -r /mnt/bn/videodataset/VSR/data/compile/xformers-0.0.25.post1-cp39-cp39-manylinux2014_x86_64.whl .
# pip install xformers-0.0.25.post1-cp39-cp39-manylinux2014_x86_64.whl
# install this project
git clone https://github.com/hpcaitech/Open-Sora
cd Open-Sora
pip install -v .
pip uninstall colossalai -y
pip install colossalai==0.3.7
cd ..
\ No newline at end of file
# Define dataset
dataset = dict(
type="VideoTextDataset",
data_path=None,
num_frames=32,
frame_interval=2,
image_size=(720, 1280),
)
data_path = ''
save_path = ''
dtype = "bf16"
num_workers = 2
batch_size = 1 # now only support batch_size=1
seed = 42
\ No newline at end of file
import os
import colossalai
import torch
import torch.distributed as dist
from colossalai.cluster import DistCoordinator
from mmengine.runner import set_random_seed
from opensora.acceleration.parallel_states import set_sequence_parallel_group
from opensora.datasets import save_sample, prepare_dataloader
from opensora.registry import MODELS, SCHEDULERS, build_module, DATASETS
from opensora.utils.config_utils import parse_configs
from opensora.utils.misc import to_torch_dtype
import torch.nn.functional as F
from einops import rearrange
from opensora.datasets.high_order.degrade_video import degradation_process
from tqdm import tqdm
def main():
# ======================================================
# 1. cfg and init distributed env
# ======================================================
cfg = parse_configs(training=False)
print(cfg)
# init distributed
if os.environ.get("WORLD_SIZE", None):
use_dist = True
colossalai.launch_from_torch({})
coordinator = DistCoordinator()
if coordinator.world_size > 1:
set_sequence_parallel_group(dist.group.WORLD)
enable_sequence_parallelism = True
else:
enable_sequence_parallelism = False
else:
use_dist = False
enable_sequence_parallelism = False
# ======================================================
# 2. runtime variables
# ======================================================
torch.set_grad_enabled(False)
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True
device = "cuda" if torch.cuda.is_available() else "cpu"
dtype = to_torch_dtype(cfg.dtype)
set_random_seed(seed=cfg.seed)
# ======================================================
# 3. build model & load weights
# ======================================================
cfg.dataset['data_path'] = cfg.data_path
dataset = build_module(cfg.dataset, DATASETS)
dataloader_args = dict(
dataset=dataset,
batch_size=cfg.batch_size,
num_workers=cfg.num_workers,
seed=cfg.seed,
shuffle=True,
drop_last=True,
pin_memory=False,
)
dataloader = prepare_dataloader(**dataloader_args)
dataloader_iter = iter(dataloader)
# ======================================================
# 4. inference
# ======================================================
sample_idx = 0
save_dir_gt = cfg.save_path + '/gt'
save_dir_lq = cfg.save_path + '/lq'
save_dir_txt = cfg.save_path + '/text'
os.makedirs(save_dir_gt, exist_ok=True)
os.makedirs(save_dir_lq, exist_ok=True)
os.makedirs(save_dir_txt, exist_ok=True)
# 4.1. batch generation with progress bar
for _, batch in tqdm(enumerate(dataloader_iter), total=len(dataloader), desc="Processing 10K Batches"):
x = batch.pop("video").to(device, dtype) # [B, C, T, H, W], HR-video
fps = batch.pop('fps')
# generate LR-video
lr, x = degradation_process(x)
_, _, t, _, _ = lr.shape
lr = rearrange(F.interpolate(rearrange(lr, "B C T H W -> (B T) C H W"), scale_factor=4, mode='bicubic'), "(B T) C H W -> B C T H W", T=t)
y = batch.pop("text")
# 4.4. save samples
if not use_dist or coordinator.is_master():
for i in range(0, lr.shape[0]):
save_dir_gt_ = os.path.join(save_dir_gt, f"{sample_idx}")
save_dir_lq_ = os.path.join(save_dir_lq, f"{sample_idx}")
save_dir_txt_ = os.path.join(save_dir_txt, f"{sample_idx}.txt")
save_sample(x[i], fps=fps / cfg.dataset['frame_interval'], save_path=save_dir_gt_)
save_sample(lr[i], fps=fps / cfg.dataset['frame_interval'], save_path=save_dir_lq_)
with open(save_dir_txt_, 'w', encoding='utf-8') as file:
file.write(y[i])
sample_idx += 1
if __name__ == "__main__":
main()
\ No newline at end of file
#!/bin/bash
# Define environment variables
export CUDA_VISIBLE_DEVICES="0"
export TOKENIZERS_PARALLELISM=false
# Define paths
INPUT_CSV=""
SAVE_PATH=""
# Run script on the full CSV file
torchrun --nnodes=1 --nproc_per_node=1 --master_port=29501 \
make_paired_data.py \
--config "./make_data_config.py" \
--data-path $INPUT_CSV \
--save_path $SAVE_PATH
\ No newline at end of file
from .acceleration import *
from .datasets import *
# from .models import *
from .registry import *
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment