Commit 30af93f2 authored by chenpangpang's avatar chenpangpang
Browse files

feat: gpu初始提交

parent 68e98ab8
Pipeline #2159 canceled with stages
.idea
chenyh
FROM image.sourcefind.cn:5000/gpu/admin/base/jupyterlab-pytorch:2.3.1-py3.10-cuda11.8-ubuntu22.04-devel as base
ARG IMAGE=nvcomposer
ARG IMAGE_UPPER=NVComposer
ARG BRANCH=gpu
RUN cd /root && git clone -b $BRANCH http://developer.hpccube.com/codes/chenpangpang/$IMAGE.git
WORKDIR /root/$IMAGE/$IMAGE_UPPER
RUN pip install -r requirements.txt
#########
# Prod #
#########
FROM image.sourcefind.cn:5000/gpu/admin/base/jupyterlab-pytorch:2.3.1-py3.10-cuda11.8-ubuntu22.04-devel
ARG IMAGE=nvcomposer
ARG IMAGE_UPPER=NVComposer
COPY chenyh/$IMAGE/frpc_linux_amd64_* /opt/conda/lib/python3.10/site-packages/gradio/
RUN chmod +x /opt/conda/lib/python3.10/site-packages/gradio/frpc_linux_amd64_*
COPY chenyh/nvcomposer/NVComposer-V0.1.ckpt /root/$IMAGE_UPPER/NVComposer-V0.1.ckpt
COPY --from=base /opt/conda/lib/python3.10/site-packages /opt/conda/lib/python3.10/site-packages
COPY --from=base /root/$IMAGE/$IMAGE_UPPER /root/$IMAGE_UPPER
COPY --from=base /root/$IMAGE/启动器.ipynb /root/$IMAGE/start.sh /root/
COPY --from=base /root/$IMAGE/assets/ /root/assets/
\ No newline at end of file
.idea
__pycache__
.git
*.pyc
.DS_Store
._*
cache
\ No newline at end of file
This diff is collapsed.
---
title: NVComposer
emoji: 📸
colorFrom: indigo
colorTo: gray
sdk: gradio
sdk_version: 4.38.1
app_file: app.py
pinned: false
python_version: 3.1
---
\ No newline at end of file
This diff is collapsed.
num_frames: &num_frames 16
resolution: &resolution [576, 1024]
model:
base_learning_rate: 1.0e-5
scale_lr: false
target: core.models.diffusion.DualStreamMultiViewDiffusionModel
params:
use_task_embedding: false
ray_as_image: false
apply_condition_mask_in_training_loss: true
separate_noise_and_condition: true
condition_padding_with_anchor: false
use_ray_decoder_loss_high_frequency_isolation: false
train_with_multi_view_feature_alignment: true
use_text_cross_attention_condition: false
linear_start: 0.00085
linear_end: 0.012
num_time_steps_cond: 1
log_every_t: 200
time_steps: 1000
data_key_images: combined_images
data_key_rays: combined_rays
data_key_text_condition: caption
cond_stage_trainable: false
image_size: [72, 128]
channels: 10
monitor: global_step
scale_by_std: false
scale_factor: 0.18215
use_dynamic_rescale: true
base_scale: 0.3
use_ema: false
uncond_prob: 0.05
uncond_type: 'empty_seq'
use_camera_pose_query_transformer: false
random_cond: false
cond_concat: true
frame_mask: false
padding: true
per_frame_auto_encoding: true
parameterization: "v"
rescale_betas_zero_snr: true
use_noise_offset: false
scheduler_config:
target: utils.lr_scheduler.LambdaLRScheduler
interval: 'step'
frequency: 100
params:
start_step: 0
final_decay_ratio: 0.1
decay_steps: 100
bd_noise: false
unet_config:
target: core.modules.networks.unet_modules.UNetModel
params:
in_channels: 20
out_channels: 10
model_channels: 320
attention_resolutions:
- 4
- 2
- 1
num_res_blocks: 2
channel_mult:
- 1
- 2
- 4
- 4
dropout: 0.1
num_head_channels: 64
transformer_depth: 1
context_dim: 1024
use_linear: true
use_checkpoint: true
temporal_conv: true
temporal_attention: true
temporal_selfatt_only: true
use_relative_position: false
use_causal_attention: false
temporal_length: *num_frames
addition_attention: true
image_cross_attention: true
image_cross_attention_scale_learnable: true
default_fs: 3
fs_condition: false
use_spatial_temporal_attention: true
use_addition_ray_output_head: true
ray_channels: 6
use_lora_for_rays_in_output_blocks: false
use_task_embedding: false
use_ray_decoder: true
use_ray_decoder_residual: true
full_spatial_temporal_attention: true
enhance_multi_view_correspondence: false
camera_pose_condition: true
use_feature_alignment: true
first_stage_config:
target: core.models.autoencoder.AutoencoderKL
params:
embed_dim: 4
monitor: val/rec_loss
ddconfig:
double_z: true
z_channels: 4
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult: [1, 2, 4, 4]
num_res_blocks: 2
attn_resolutions: []
dropout: 0.0
lossconfig:
target: torch.nn.Identity
cond_img_config:
target: core.modules.encoders.condition.FrozenOpenCLIPImageEmbedderV2
params:
freeze: true
image_proj_model_config:
target: core.modules.encoders.resampler.Resampler
params:
dim: 1024
depth: 4
dim_head: 64
heads: 12
num_queries: 16
embedding_dim: 1280
output_dim: 1024
ff_mult: 4
video_length: *num_frames
import torch.nn as nn
from utils.utils import instantiate_from_config
def disabled_train(self, mode=True):
"""Overwrite model.train with this function to make sure train/eval mode
does not change anymore."""
return self
def zero_module(module):
"""
Zero out the parameters of a module and return it.
"""
for p in module.parameters():
p.detach().zero_()
return module
def scale_module(module, scale):
"""
Scale the parameters of a module and return it.
"""
for p in module.parameters():
p.detach().mul_(scale)
return module
def conv_nd(dims, *args, **kwargs):
"""
Create a 1D, 2D, or 3D convolution module.
"""
if dims == 1:
return nn.Conv1d(*args, **kwargs)
elif dims == 2:
return nn.Conv2d(*args, **kwargs)
elif dims == 3:
return nn.Conv3d(*args, **kwargs)
raise ValueError(f"unsupported dimensions: {dims}")
def linear(*args, **kwargs):
"""
Create a linear module.
"""
return nn.Linear(*args, **kwargs)
def avg_pool_nd(dims, *args, **kwargs):
"""
Create a 1D, 2D, or 3D average pooling module.
"""
if dims == 1:
return nn.AvgPool1d(*args, **kwargs)
elif dims == 2:
return nn.AvgPool2d(*args, **kwargs)
elif dims == 3:
return nn.AvgPool3d(*args, **kwargs)
raise ValueError(f"unsupported dimensions: {dims}")
def nonlinearity(type="silu"):
if type == "silu":
return nn.SiLU()
elif type == "leaky_relu":
return nn.LeakyReLU()
class GroupNormSpecific(nn.GroupNorm):
def forward(self, x):
return super().forward(x.float()).type(x.dtype)
def normalization(channels, num_groups=32):
"""
Make a standard normalization layer.
:param channels: number of input channels.
:param num_groups: number of groupseg.
:return: an nn.Module for normalization.
"""
return GroupNormSpecific(num_groups, channels)
class HybridConditioner(nn.Module):
def __init__(self, c_concat_config, c_crossattn_config):
super().__init__()
self.concat_conditioner = instantiate_from_config(c_concat_config)
self.crossattn_conditioner = instantiate_from_config(c_crossattn_config)
def forward(self, c_concat, c_crossattn):
c_concat = self.concat_conditioner(c_concat)
c_crossattn = self.crossattn_conditioner(c_crossattn)
return {"c_concat": [c_concat], "c_crossattn": [c_crossattn]}
import math
from inspect import isfunction
import torch
import torch.distributed as dist
from torch import nn
def gather_data(data, return_np=True):
"""gather data from multiple processes to one list"""
data_list = [torch.zeros_like(data) for _ in range(dist.get_world_size())]
dist.all_gather(data_list, data) # gather not supported with NCCL
if return_np:
data_list = [data.cpu().numpy() for data in data_list]
return data_list
def autocast(f):
def do_autocast(*args, **kwargs):
with torch.cuda.amp.autocast(
enabled=True,
dtype=torch.get_autocast_gpu_dtype(),
cache_enabled=torch.is_autocast_cache_enabled(),
):
return f(*args, **kwargs)
return do_autocast
def extract_into_tensor(a, t, x_shape):
b, *_ = t.shape
out = a.gather(-1, t)
return out.reshape(b, *((1,) * (len(x_shape) - 1)))
def noise_like(shape, device, repeat=False):
def repeat_noise():
return torch.randn((1, *shape[1:]), device=device).repeat(
shape[0], *((1,) * (len(shape) - 1))
)
def noise():
return torch.randn(shape, device=device)
return repeat_noise() if repeat else noise()
def default(val, d):
if exists(val):
return val
return d() if isfunction(d) else d
def exists(val):
return val is not None
def identity(*args, **kwargs):
return nn.Identity()
def uniq(arr):
return {el: True for el in arr}.keys()
def mean_flat(tensor):
"""
Take the mean over all non-batch dimensions.
"""
return tensor.mean(dim=list(range(1, len(tensor.shape))))
def ismap(x):
if not isinstance(x, torch.Tensor):
return False
return (len(x.shape) == 4) and (x.shape[1] > 3)
def isimage(x):
if not isinstance(x, torch.Tensor):
return False
return (len(x.shape) == 4) and (x.shape[1] == 3 or x.shape[1] == 1)
def max_neg_value(t):
return -torch.finfo(t.dtype).max
def shape_to_str(x):
shape_str = "x".join([str(x) for x in x.shape])
return shape_str
def init_(tensor):
dim = tensor.shape[-1]
std = 1 / math.sqrt(dim)
tensor.uniform_(-std, std)
return tensor
# USE_DEEP_SPEED_CHECKPOINTING = False
# if USE_DEEP_SPEED_CHECKPOINTING:
# import deepspeed
#
# _gradient_checkpoint_function = deepspeed.checkpointing.checkpoint
# else:
_gradient_checkpoint_function = torch.utils.checkpoint.checkpoint
def gradient_checkpoint(func, inputs, params, flag):
"""
Evaluate a function without caching intermediate activations, allowing for
reduced memory at the expense of extra compute in the backward pass.
:param func: the function to evaluate.
:param inputs: the argument sequence to pass to `func`.
:param params: a sequence of parameters `func` depends on but does not
explicitly take as arguments.
:param flag: if False, disable gradient checkpointing.
"""
if flag:
# args = tuple(inputs) + tuple(params)
# return CheckpointFunction.apply(func, len(inputs), *args)
if isinstance(inputs, tuple):
return _gradient_checkpoint_function(func, *inputs, use_reentrant=False)
else:
return _gradient_checkpoint_function(func, inputs, use_reentrant=False)
else:
return func(*inputs)
class CheckpointFunction(torch.autograd.Function):
@staticmethod
@torch.cuda.amp.custom_fwd
def forward(ctx, run_function, length, *args):
ctx.run_function = run_function
ctx.input_tensors = list(args[:length])
ctx.input_params = list(args[length:])
with torch.no_grad():
output_tensors = ctx.run_function(*ctx.input_tensors)
return output_tensors
@staticmethod
@torch.cuda.amp.custom_bwd # add this
def backward(ctx, *output_grads):
"""
for x in ctx.input_tensors:
if isinstance(x, int):
print('-----------------', ctx.run_function)
"""
ctx.input_tensors = [x.detach().requires_grad_(True) for x in ctx.input_tensors]
with torch.enable_grad():
# Fixes a bug where the first op in run_function modifies the
# Tensor storage in place, which is not allowed for detach()'d
# Tensors.
shallow_copies = [x.view_as(x) for x in ctx.input_tensors]
output_tensors = ctx.run_function(*shallow_copies)
input_grads = torch.autograd.grad(
output_tensors,
ctx.input_tensors + ctx.input_params,
output_grads,
allow_unused=True,
)
del ctx.input_tensors
del ctx.input_params
del output_tensors
return (None, None) + input_grads
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment