Commit 5f9377aa authored by mashun1's avatar mashun1
Browse files

latte

parent 5bd891c3
......@@ -46,6 +46,23 @@
pip install timm --no-deps
### Anaconda (方法三)
1、关于本项目DCU显卡所需的特殊深度学习库可从光合开发者社区下载安装:
https://developer.hpccube.com/tool/
DTK驱动:dtk23.04
python:python3.10
torch:2.1.0 (whl.zip文件中)
torchvision:0.16.1
Tips:以上dtk驱动、python、torch等DCU相关工具版本需要严格一一对应
2、其它非特殊库参照requirements.txt安装
pip install -r requirements.txt
pip install torchvision==0.16.0 --no-deps
pip install timm --no-deps
## 数据集
......@@ -74,19 +91,12 @@
│   └── v_BandMarching_g01_c01.avi
## 训练
# 训练UCF-101
torchrun --nnodes=1 --nproc_per_node=N train.py --config ./configs/ucf101/ucf101_train.yaml
# 使用集群训练
sbatch slurm_scripts/ucf101.slurm
# 视频-图像联合训练
torchrun --nnodes=1 --nproc_per_node=N train_with_img.py --config ./configs/ucf101/ucf101_img_train.yaml
注意:训练前需要准备相应的预训练模型,具体参考`推理-模型下载`
注意:训练前需要准备相应的预训练模型,具体参考`推理-模型下载`
## 推理
......@@ -106,6 +116,9 @@ https://hf-mirror.com/PixArt-alpha/PixArt-XL-2-512x512/tree/main/transformer
├── sd-vae-ft-ema
│ ├── config.json
│ └── diffusion_pytorch_model.bin
├── sd-vae-ft-mse
│ ├── config.json
│ └── diffusion_pytorch_model.bin
├── ....
├── t2v_required_models
│ ├── model_index.json
......
......@@ -2,7 +2,8 @@
dataset: "ucf101_img"
data_path: "train_datasets/UCF-101_tiny"
frame_data_txt: "/path/to/datasets/UCF101/train_256_list.txt"
# frame_data_txt: "/path/to/datasets/UCF101/train_256_list.txt"
frame_data_txt: "train_datasets/imgslist.txt"
pretrained_model_path: "pretrained_models"
# save and load
......
......@@ -14,3 +14,4 @@ omegaconf
imageio==2.27.0
imageio-ffmpeg==0.4.9
pillow
sentencepiece
\ No newline at end of file
dataset: ucf101_img
data_path: train_datasets/UCF-101_tiny
frame_data_txt: /path/to/datasets/UCF101/train_256_list.txt
pretrained_model_path: pretrained_models
results_dir: ./results_img
pretrained: null
model: LatteIMG-XL/2
num_frames: 16
image_size: 256
num_sampling_steps: 250
frame_interval: 3
fixed_spatial: false
attention_bias: true
learn_sigma: true
extras: 2
save_ceph: true
use_image_num: 8
learning_rate: 0.0001
ckpt_every: 10000
clip_max_norm: 0.1
start_clip_iter: 100000
local_batch_size: 4
max_train_steps: 1000000
global_seed: 3407
num_workers: 8
log_every: 50
lr_warmup_steps: 0
resume_from_checkpoint: null
gradient_accumulation_steps: 1
num_classes: 101
use_compile: false
mixed_precision: false
enable_xformers_memory_efficient_attention: false
gradient_checkpointing: false
[2024-03-01 16:45:10] Experiment directory created at ./results_img/000-LatteIMG-XL-2-F16S3-ucf101_img
dataset: ucf101_img
data_path: train_datasets/UCF-101_tiny
frame_data_txt: /path/to/datasets/UCF101/train_256_list.txt
pretrained_model_path: pretrained_models
results_dir: ./results_img
pretrained: null
model: LatteIMG-XL/2
num_frames: 16
image_size: 256
num_sampling_steps: 250
frame_interval: 3
fixed_spatial: false
attention_bias: true
learn_sigma: true
extras: 2
save_ceph: true
use_image_num: 8
learning_rate: 0.0001
ckpt_every: 10000
clip_max_norm: 0.1
start_clip_iter: 100000
local_batch_size: 4
max_train_steps: 1000000
global_seed: 3407
num_workers: 8
log_every: 50
lr_warmup_steps: 0
resume_from_checkpoint: null
gradient_accumulation_steps: 1
num_classes: 101
use_compile: false
mixed_precision: false
enable_xformers_memory_efficient_attention: false
gradient_checkpointing: false
[2024-03-01 16:49:29] Experiment directory created at ./results_img/001-LatteIMG-XL-2-F16S3-ucf101_img
dataset: ucf101_img
data_path: train_datasets/UCF-101_tiny
frame_data_txt: /path/to/datasets/UCF101/train_256_list.txt
pretrained_model_path: pretrained_models
results_dir: ./results_img
pretrained: null
model: LatteIMG-XL/2
num_frames: 16
image_size: 256
num_sampling_steps: 250
frame_interval: 3
fixed_spatial: false
attention_bias: true
learn_sigma: true
extras: 2
save_ceph: true
use_image_num: 8
learning_rate: 0.0001
ckpt_every: 10000
clip_max_norm: 0.1
start_clip_iter: 100000
local_batch_size: 4
max_train_steps: 1000000
global_seed: 3407
num_workers: 8
log_every: 50
lr_warmup_steps: 0
resume_from_checkpoint: null
gradient_accumulation_steps: 1
num_classes: 101
use_compile: false
mixed_precision: false
enable_xformers_memory_efficient_attention: false
gradient_checkpointing: false
[2024-03-01 17:08:24] Experiment directory created at ./results_img/002-LatteIMG-XL-2-F16S3-ucf101_img
dataset: ucf101_img
data_path: train_datasets/UCF-101_tiny
frame_data_txt: /path/to/datasets/UCF101/train_256_list.txt
pretrained_model_path: pretrained_models
results_dir: ./results_img
pretrained: null
model: LatteIMG-XL/2
num_frames: 16
image_size: 256
num_sampling_steps: 250
frame_interval: 3
fixed_spatial: false
attention_bias: true
learn_sigma: true
extras: 2
save_ceph: true
use_image_num: 8
learning_rate: 0.0001
ckpt_every: 10000
clip_max_norm: 0.1
start_clip_iter: 100000
local_batch_size: 4
max_train_steps: 1000000
global_seed: 3407
num_workers: 8
log_every: 50
lr_warmup_steps: 0
resume_from_checkpoint: null
gradient_accumulation_steps: 1
num_classes: 101
use_compile: false
mixed_precision: false
enable_xformers_memory_efficient_attention: false
gradient_checkpointing: false
[2024-03-01 17:10:41] Experiment directory created at ./results_img/003-LatteIMG-XL-2-F16S3-ucf101_img
[2024-03-01 17:10:58] Model Parameters: 674,112,416
dataset: ucf101_img
data_path: train_datasets/UCF-101_tiny
frame_data_txt: train_datasets/imageslist.txt
pretrained_model_path: pretrained_models
results_dir: ./results_img
pretrained: null
model: LatteIMG-XL/2
num_frames: 16
image_size: 256
num_sampling_steps: 250
frame_interval: 3
fixed_spatial: false
attention_bias: true
learn_sigma: true
extras: 2
save_ceph: true
use_image_num: 8
learning_rate: 0.0001
ckpt_every: 10000
clip_max_norm: 0.1
start_clip_iter: 100000
local_batch_size: 4
max_train_steps: 1000000
global_seed: 3407
num_workers: 8
log_every: 50
lr_warmup_steps: 0
resume_from_checkpoint: null
gradient_accumulation_steps: 1
num_classes: 101
use_compile: false
mixed_precision: false
enable_xformers_memory_efficient_attention: false
gradient_checkpointing: false
[2024-03-01 17:18:35] Experiment directory created at ./results_img/004-LatteIMG-XL-2-F16S3-ucf101_img
[2024-03-01 17:18:51] Model Parameters: 674,112,416
dataset: ucf101_img
data_path: train_datasets/UCF-101_tiny
frame_data_txt: train_datasets/imgslist.txt
pretrained_model_path: pretrained_models
results_dir: ./results_img
pretrained: null
model: LatteIMG-XL/2
num_frames: 16
image_size: 256
num_sampling_steps: 250
frame_interval: 3
fixed_spatial: false
attention_bias: true
learn_sigma: true
extras: 2
save_ceph: true
use_image_num: 8
learning_rate: 0.0001
ckpt_every: 10000
clip_max_norm: 0.1
start_clip_iter: 100000
local_batch_size: 4
max_train_steps: 1000000
global_seed: 3407
num_workers: 8
log_every: 50
lr_warmup_steps: 0
resume_from_checkpoint: null
gradient_accumulation_steps: 1
num_classes: 101
use_compile: false
mixed_precision: false
enable_xformers_memory_efficient_attention: false
gradient_checkpointing: false
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment