"docs/vscode:/vscode.git/clone" did not exist on "7ebd359446c2cb31bfbbbd98046cd916de8bdc7b"
Unverified Commit 92fe689f authored by Aryan's avatar Aryan Committed by GitHub
Browse files

Change Framepack transformer layer initialization order (#11535)

update
parent 0ba1f76d
...@@ -152,9 +152,19 @@ class HunyuanVideoFramepackTransformer3DModel( ...@@ -152,9 +152,19 @@ class HunyuanVideoFramepackTransformer3DModel(
# 1. Latent and condition embedders # 1. Latent and condition embedders
self.x_embedder = HunyuanVideoPatchEmbed((patch_size_t, patch_size, patch_size), in_channels, inner_dim) self.x_embedder = HunyuanVideoPatchEmbed((patch_size_t, patch_size, patch_size), in_channels, inner_dim)
# Framepack history projection embedder
self.clean_x_embedder = None
if has_clean_x_embedder:
self.clean_x_embedder = HunyuanVideoHistoryPatchEmbed(in_channels, inner_dim)
self.context_embedder = HunyuanVideoTokenRefiner( self.context_embedder = HunyuanVideoTokenRefiner(
text_embed_dim, num_attention_heads, attention_head_dim, num_layers=num_refiner_layers text_embed_dim, num_attention_heads, attention_head_dim, num_layers=num_refiner_layers
) )
# Framepack image-conditioning embedder
self.image_projection = FramepackClipVisionProjection(image_proj_dim, inner_dim) if has_image_proj else None
self.time_text_embed = HunyuanVideoConditionEmbedding( self.time_text_embed = HunyuanVideoConditionEmbedding(
inner_dim, pooled_projection_dim, guidance_embeds, image_condition_type inner_dim, pooled_projection_dim, guidance_embeds, image_condition_type
) )
...@@ -186,13 +196,6 @@ class HunyuanVideoFramepackTransformer3DModel( ...@@ -186,13 +196,6 @@ class HunyuanVideoFramepackTransformer3DModel(
self.norm_out = AdaLayerNormContinuous(inner_dim, inner_dim, elementwise_affine=False, eps=1e-6) self.norm_out = AdaLayerNormContinuous(inner_dim, inner_dim, elementwise_affine=False, eps=1e-6)
self.proj_out = nn.Linear(inner_dim, patch_size_t * patch_size * patch_size * out_channels) self.proj_out = nn.Linear(inner_dim, patch_size_t * patch_size * patch_size * out_channels)
# Framepack specific modules
self.image_projection = FramepackClipVisionProjection(image_proj_dim, inner_dim) if has_image_proj else None
self.clean_x_embedder = None
if has_clean_x_embedder:
self.clean_x_embedder = HunyuanVideoHistoryPatchEmbed(in_channels, inner_dim)
self.use_gradient_checkpointing = False self.use_gradient_checkpointing = False
def forward( def forward(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment