首次提交

f05e915f · weishb · 297bf637 · f05e915f · f05e915f · f05e915f
Commit f05e915f authored May 27, 2026 by weishb
20 changed files
--- a/TRELLIS.2_DCU/assets/hdri/night.exr
+++ b/TRELLIS.2_DCU/assets/hdri/night.exr
--- a/TRELLIS.2_DCU/assets/hdri/studio.exr
+++ b/TRELLIS.2_DCU/assets/hdri/studio.exr
--- a/TRELLIS.2_DCU/assets/hdri/sunrise.exr
+++ b/TRELLIS.2_DCU/assets/hdri/sunrise.exr
--- a/TRELLIS.2_DCU/assets/hdri/sunset.exr
+++ b/TRELLIS.2_DCU/assets/hdri/sunset.exr
--- a/TRELLIS.2_DCU/assets/teaser.webp
+++ b/TRELLIS.2_DCU/assets/teaser.webp
--- a/TRELLIS.2_DCU/configs/gen/slat_flow_img2shape_dit_1_3B_512_bf16.json
+++ b/TRELLIS.2_DCU/configs/gen/slat_flow_img2shape_dit_1_3B_512_bf16.json
+{
+    "models": {
+        "denoiser": {
+            "name": "ElasticSLatFlowModel",
+            "args": {
+                "resolution": 32,
+                "in_channels": 32,
+                "out_channels": 32,
+                "model_channels": 1536,
+                "cond_channels": 1024,
+                "num_blocks": 30,
+                "num_heads": 12,
+                "mlp_ratio": 5.3334,
+                "pe_mode": "rope",
+                "share_mod": true,
+                "initialization": "scaled",
+                "qk_rms_norm": true,
+                "qk_rms_norm_cross": true
+            }
+        }
+    },
+    "dataset": {
+        "name": "ImageConditionedSLatShape",
+        "args": {
+            "resolution": 512,
+            "image_size": 512,
+            "min_aesthetic_score": 4.5,
+            "max_tokens": 8192,
+            "normalization": {
+                "mean": [
+                    0.781296, 0.018091, -0.495192, -0.558457, 1.060530, 0.093252, 1.518149, -0.933218,
+                    -0.732996, 2.604095, -0.118341, -2.143904, 0.495076, -2.179512, -2.130751, -0.996944,
+                    0.261421, -2.217463, 1.260067, -0.150213, 3.790713, 1.481266, -1.046058, -1.523667,
+                    -0.059621, 2.220780, 1.621212, 0.877230, 0.567247, -3.175944, -3.186688, 1.578665
+                ],
+                "std": [
+                    5.972266, 4.706852, 5.445010, 5.209927, 5.320220, 4.547237, 5.020802, 5.444004,
+                    5.226681, 5.683095, 4.831436, 5.286469, 5.652043, 5.367606, 5.525084, 4.730578,
+                    4.805265, 5.124013, 5.530808, 5.619001, 5.103930, 5.417670, 5.269677, 5.547194,
+                    5.634698, 5.235274, 6.110351, 5.511298, 6.237273, 4.879207, 5.347008, 5.405691
+                ]
+            },
+            "pretrained_slat_dec": "microsoft/TRELLIS.2-4B/ckpts/shape_dec_next_dc_f16c32_fp16"
+        }
+    },
+    "trainer": {
+        "name": "ImageConditionedSparseFlowMatchingCFGTrainer",
+        "args": {
+            "max_steps": 1000000,
+            "batch_size_per_gpu": 8,
+            "batch_split": 2,
+            "optimizer": {
+                "name": "AdamW",
+                "args": {
+                    "lr": 1e-4,
+                    "weight_decay": 0.01,
+                    "betas": [0.9, 0.95],
+                    "eps": 1e-8
+                }
+            },
+            "ema_rate": [
+                0.9999
+            ],
+            "mix_precision_mode": "amp",
+            "mix_precision_dtype": "bfloat16",
+            "elastic": {
+                "name": "LinearMemoryController",
+                "args": {
+                    "target_ratio": 0.75,
+                    "max_mem_ratio_start": 0.5
+                }
+            },
+            "grad_clip": {
+                "name": "AdaptiveGradClipper",
+                "args": {
+                    "max_norm": 1.0,
+                    "clip_percentile": 95
+                }
+            },
+            "i_log": 500,
+            "i_sample": 10000,
+            "i_save": 10000,
+            "p_uncond": 0.1,
+            "t_schedule": {
+                "name": "uniform",
+                "args": {}
+            },
+            "sigma_min": 1e-5,
+            "image_cond_model": {
+                "name": "DinoV3FeatureExtractor",
+                "args": {
+                    "model_name": "facebook/dinov3-vitl16-pretrain-lvd1689m",
+                    "image_size": 512
+                }
+            }
+        }
+    }
+}
\ No newline at end of file
--- a/TRELLIS.2_DCU/configs/gen/slat_flow_img2shape_dit_1_3B_512_bf16_ft1024.json
+++ b/TRELLIS.2_DCU/configs/gen/slat_flow_img2shape_dit_1_3B_512_bf16_ft1024.json
+{
+    "models": {
+        "denoiser": {
+            "name": "ElasticSLatFlowModel",
+            "args": {
+                "resolution": 64,
+                "in_channels": 32,
+                "out_channels": 32,
+                "model_channels": 1536,
+                "cond_channels": 1024,
+                "num_blocks": 30,
+                "num_heads": 12,
+                "mlp_ratio": 5.3334,
+                "pe_mode": "rope",
+                "share_mod": true,
+                "initialization": "scaled",
+                "qk_rms_norm": true,
+                "qk_rms_norm_cross": true
+            }
+        }
+    },
+    "dataset": {
+        "name": "ImageConditionedSLatShape",
+        "args": {
+            "resolution": 1024,
+            "image_size": 1024,
+            "min_aesthetic_score": 4.5,
+            "max_tokens": 32768,
+            "normalization": {
+                "mean": [
+                    0.781296, 0.018091, -0.495192, -0.558457, 1.060530, 0.093252, 1.518149, -0.933218,
+                    -0.732996, 2.604095, -0.118341, -2.143904, 0.495076, -2.179512, -2.130751, -0.996944,
+                    0.261421, -2.217463, 1.260067, -0.150213, 3.790713, 1.481266, -1.046058, -1.523667,
+                    -0.059621, 2.220780, 1.621212, 0.877230, 0.567247, -3.175944, -3.186688, 1.578665
+                ],
+                "std": [
+                    5.972266, 4.706852, 5.445010, 5.209927, 5.320220, 4.547237, 5.020802, 5.444004,
+                    5.226681, 5.683095, 4.831436, 5.286469, 5.652043, 5.367606, 5.525084, 4.730578,
+                    4.805265, 5.124013, 5.530808, 5.619001, 5.103930, 5.417670, 5.269677, 5.547194,
+                    5.634698, 5.235274, 6.110351, 5.511298, 6.237273, 4.879207, 5.347008, 5.405691
+                ]
+            },
+            "pretrained_slat_dec": "microsoft/TRELLIS.2-4B/ckpts/shape_dec_next_dc_f16c32_fp16"
+        }
+    },
+    "trainer": {
+        "name": "ImageConditionedSparseFlowMatchingCFGTrainer",
+        "args": {
+            "max_steps": 1000000,
+            "batch_size_per_gpu": 2,
+            "batch_split": 1,
+            "optimizer": {
+                "name": "AdamW",
+                "args": {
+                    "lr": 2e-5,
+                    "weight_decay": 0.01,
+                    "betas": [0.9, 0.95],
+                    "eps": 1e-8
+                }
+            },
+            "ema_rate": [
+                0.9999
+            ],
+            "mix_precision_mode": "amp",
+            "mix_precision_dtype": "bfloat16",
+            "elastic": {
+                "name": "LinearMemoryController",
+                "args": {
+                    "target_ratio": 0.75,
+                    "max_mem_ratio_start": 0.25
+                }
+            },
+            "grad_clip": {
+                "name": "AdaptiveGradClipper",
+                "args": {
+                    "max_norm": 1.0,
+                    "clip_percentile": 95
+                }
+            },
+            "finetune_ckpt": {
+                "denoiser": "PATH_TO_512_CKPT"
+            },
+            "i_log": 500,
+            "i_sample": 1000,
+            "i_save": 1000,
+            "p_uncond": 0.1,
+            "t_schedule": {
+                "name": "uniform",
+                "args": {}
+            },
+            "sigma_min": 1e-5,
+            "image_cond_model": {
+                "name": "DinoV3FeatureExtractor",
+                "args": {
+                    "model_name": "facebook/dinov3-vitl16-pretrain-lvd1689m",
+                    "image_size": 1024
+                }
+            }
+        }
+    }
+}
\ No newline at end of file
--- a/TRELLIS.2_DCU/configs/gen/slat_flow_imgshape2tex_dit_1_3B_512_bf16.json
+++ b/TRELLIS.2_DCU/configs/gen/slat_flow_imgshape2tex_dit_1_3B_512_bf16.json
+{
+    "models": {
+        "denoiser": {
+            "name": "ElasticSLatFlowModel",
+            "args": {
+                "resolution": 32,
+                "in_channels": 64,
+                "out_channels": 32,
+                "model_channels": 1536,
+                "cond_channels": 1024,
+                "num_blocks": 30,
+                "num_heads": 12,
+                "mlp_ratio": 5.3334,
+                "pe_mode": "rope",
+                "share_mod": true,
+                "initialization": "scaled",
+                "qk_rms_norm": true,
+                "qk_rms_norm_cross": true
+            }
+        }
+    },
+    "dataset": {
+        "name": "ImageConditionedSLatPbr",
+        "args": {
+            "resolution": 512,
+            "image_size": 512,
+            "min_aesthetic_score": 4.5,
+            "max_tokens": 8192,
+            "pbr_slat_normalization": {
+                "mean": [
+                    3.501659, 2.212398, 2.226094, 0.251093, -0.026248, -0.687364, 0.439898, -0.928075,
+                    0.029398, -0.339596, -0.869527, 1.038479, -0.972385, 0.126042, -1.129303, 0.455149,
+                    -1.209521, 2.069067, 0.544735, 2.569128, -0.323407, 2.293000, -1.925608, -1.217717,
+                    1.213905, 0.971588, -0.023631, 0.106750, 2.021786, 0.250524, -0.662387, -0.768862
+                ],
+                "std": [
+                    2.665652, 2.743913, 2.765121, 2.595319, 3.037293, 2.291316, 2.144656, 2.911822,
+                    2.969419, 2.501689, 2.154811, 3.163343, 2.621215, 2.381943, 3.186697, 3.021588,
+                    2.295916, 3.234985, 3.233086, 2.260140, 2.874801, 2.810596, 3.292720, 2.674999,
+                    2.680878, 2.372054, 2.451546, 2.353556, 2.995195, 2.379849, 2.786195, 2.775190
+                ]
+            },
+            "shape_slat_normalization": {
+                "mean": [
+                    0.781296, 0.018091, -0.495192, -0.558457, 1.060530, 0.093252, 1.518149, -0.933218,
+                    -0.732996, 2.604095, -0.118341, -2.143904, 0.495076, -2.179512, -2.130751, -0.996944,
+                    0.261421, -2.217463, 1.260067, -0.150213, 3.790713, 1.481266, -1.046058, -1.523667,
+                    -0.059621, 2.220780, 1.621212, 0.877230, 0.567247, -3.175944, -3.186688, 1.578665
+                ],
+                "std": [
+                    5.972266, 4.706852, 5.445010, 5.209927, 5.320220, 4.547237, 5.020802, 5.444004,
+                    5.226681, 5.683095, 4.831436, 5.286469, 5.652043, 5.367606, 5.525084, 4.730578,
+                    4.805265, 5.124013, 5.530808, 5.619001, 5.103930, 5.417670, 5.269677, 5.547194,
+                    5.634698, 5.235274, 6.110351, 5.511298, 6.237273, 4.879207, 5.347008, 5.405691
+                ]
+            },
+            "attrs": [
+                "base_color",
+                "metallic",
+                "roughness",
+                "alpha"
+            ],
+            "pretrained_pbr_slat_dec": "microsoft/TRELLIS.2-4B/ckpts/tex_dec_next_dc_f16c32_fp16",
+            "pretrained_shape_slat_dec": "microsoft/TRELLIS.2-4B/ckpts/shape_dec_next_dc_f16c32_fp16"
+        }
+    },
+    "trainer": {
+        "name": "ImageConditionedSparseFlowMatchingCFGTrainer",
+        "args": {
+            "max_steps": 1000000,
+            "batch_size_per_gpu": 8,
+            "batch_split": 2,
+            "optimizer": {
+                "name": "AdamW",
+                "args": {
+                    "lr": 1e-4,
+                    "weight_decay": 0.01,
+                    "betas": [0.9, 0.95],
+                    "eps": 1e-8
+                }
+            },
+            "ema_rate": [
+                0.9999
+            ],
+            "mix_precision_mode": "amp",
+            "mix_precision_dtype": "bfloat16",
+            "elastic": {
+                "name": "LinearMemoryController",
+                "args": {
+                    "target_ratio": 0.75,
+                    "max_mem_ratio_start": 0.5
+                }
+            },
+            "grad_clip": {
+                "name": "AdaptiveGradClipper",
+                "args": {
+                    "max_norm": 1.0,
+                    "clip_percentile": 95
+                }
+            },
+            "i_log": 500,
+            "i_sample": 10000,
+            "i_save": 10000,
+            "p_uncond": 0.1,
+            "t_schedule": {
+                "name": "uniform",
+                "args": {}
+            },
+            "sigma_min": 1e-5,
+            "image_cond_model": {
+                "name": "DinoV3FeatureExtractor",
+                "args": {
+                    "model_name": "facebook/dinov3-vitl16-pretrain-lvd1689m",
+                    "image_size": 512
+                }
+            }
+        }
+    }
+}
\ No newline at end of file
--- a/TRELLIS.2_DCU/configs/gen/slat_flow_imgshape2tex_dit_1_3B_512_bf16_ft1024.json
+++ b/TRELLIS.2_DCU/configs/gen/slat_flow_imgshape2tex_dit_1_3B_512_bf16_ft1024.json
+{
+    "models": {
+        "denoiser": {
+            "name": "ElasticSLatFlowModel",
+            "args": {
+                "resolution": 32,
+                "in_channels": 64,
+                "out_channels": 32,
+                "model_channels": 1536,
+                "cond_channels": 1024,
+                "num_blocks": 30,
+                "num_heads": 12,
+                "mlp_ratio": 5.3334,
+                "pe_mode": "rope",
+                "share_mod": true,
+                "initialization": "scaled",
+                "qk_rms_norm": true,
+                "qk_rms_norm_cross": true
+            }
+        }
+    },
+    "dataset": {
+        "name": "ImageConditionedSLatPbr",
+        "args": {
+            "resolution": 1024,
+            "image_size": 1024,
+            "min_aesthetic_score": 4.5,
+            "max_tokens": 32768,
+            "full_pbr": true,
+            "pbr_slat_normalization": {
+                "mean": [
+                    3.501659, 2.212398, 2.226094, 0.251093, -0.026248, -0.687364, 0.439898, -0.928075,
+                    0.029398, -0.339596, -0.869527, 1.038479, -0.972385, 0.126042, -1.129303, 0.455149,
+                    -1.209521, 2.069067, 0.544735, 2.569128, -0.323407, 2.293000, -1.925608, -1.217717,
+                    1.213905, 0.971588, -0.023631, 0.106750, 2.021786, 0.250524, -0.662387, -0.768862
+                ],
+                "std": [
+                    2.665652, 2.743913, 2.765121, 2.595319, 3.037293, 2.291316, 2.144656, 2.911822,
+                    2.969419, 2.501689, 2.154811, 3.163343, 2.621215, 2.381943, 3.186697, 3.021588,
+                    2.295916, 3.234985, 3.233086, 2.260140, 2.874801, 2.810596, 3.292720, 2.674999,
+                    2.680878, 2.372054, 2.451546, 2.353556, 2.995195, 2.379849, 2.786195, 2.775190
+                ]
+            },
+            "shape_slat_normalization": {
+                "mean": [
+                    0.781296, 0.018091, -0.495192, -0.558457, 1.060530, 0.093252, 1.518149, -0.933218,
+                    -0.732996, 2.604095, -0.118341, -2.143904, 0.495076, -2.179512, -2.130751, -0.996944,
+                    0.261421, -2.217463, 1.260067, -0.150213, 3.790713, 1.481266, -1.046058, -1.523667,
+                    -0.059621, 2.220780, 1.621212, 0.877230, 0.567247, -3.175944, -3.186688, 1.578665
+                ],
+                "std": [
+                    5.972266, 4.706852, 5.445010, 5.209927, 5.320220, 4.547237, 5.020802, 5.444004,
+                    5.226681, 5.683095, 4.831436, 5.286469, 5.652043, 5.367606, 5.525084, 4.730578,
+                    4.805265, 5.124013, 5.530808, 5.619001, 5.103930, 5.417670, 5.269677, 5.547194,
+                    5.634698, 5.235274, 6.110351, 5.511298, 6.237273, 4.879207, 5.347008, 5.405691
+                ]
+            },
+            "attrs": [
+                "base_color",
+                "metallic",
+                "roughness",
+                "alpha"
+            ],
+            "pretrained_pbr_slat_dec": "microsoft/TRELLIS.2-4B/ckpts/tex_dec_next_dc_f16c32_fp16",
+            "pretrained_shape_slat_dec": "microsoft/TRELLIS.2-4B/ckpts/shape_dec_next_dc_f16c32_fp16"
+        }
+    },
+    "trainer": {
+        "name": "ImageConditionedSparseFlowMatchingCFGTrainer",
+        "args": {
+            "max_steps": 1000000,
+            "batch_size_per_gpu": 2,
+            "batch_split": 1,
+            "optimizer": {
+                "name": "AdamW",
+                "args": {
+                    "lr": 2e-5,
+                    "weight_decay": 0.01,
+                    "betas": [0.9, 0.95],
+                    "eps": 1e-8
+                }
+            },
+            "ema_rate": [
+                0.9999
+            ],
+            "mix_precision_mode": "amp",
+            "mix_precision_dtype": "bfloat16",
+            "elastic": {
+                "name": "LinearMemoryController",
+                "args": {
+                    "target_ratio": 0.75,
+                    "max_mem_ratio_start": 0.25
+                }
+            },
+            "grad_clip": {
+                "name": "AdaptiveGradClipper",
+                "args": {
+                    "max_norm": 1.0,
+                    "clip_percentile": 95
+                }
+            },
+            "i_log": 500,
+            "i_sample": 1000,
+            "i_save": 1000,
+            "p_uncond": 0.1,
+            "t_schedule": {
+                "name": "uniform",
+                "args": {}
+            },
+            "sigma_min": 1e-5,
+            "image_cond_model": {
+                "name": "DinoV3FeatureExtractor",
+                "args": {
+                    "model_name": "facebook/dinov3-vitl16-pretrain-lvd1689m",
+                    "image_size": 1024
+                }
+            }
+        }
+    }
+}
\ No newline at end of file
--- a/TRELLIS.2_DCU/configs/gen/ss_flow_img_dit_1_3B_64_bf16.json
+++ b/TRELLIS.2_DCU/configs/gen/ss_flow_img_dit_1_3B_64_bf16.json
+{
+    "models": {
+        "denoiser": {
+            "name": "SparseStructureFlowModel",
+            "args": {
+                "resolution": 16,
+                "in_channels": 8,
+                "out_channels": 8,
+                "model_channels": 1536,
+                "cond_channels": 1024,
+                "num_blocks": 30,
+                "num_heads": 12,
+                "mlp_ratio": 5.3334,
+                "pe_mode": "rope",
+                "share_mod": true,
+                "initialization": "scaled",
+                "qk_rms_norm": true,
+                "qk_rms_norm_cross": true
+            }
+        }
+    },
+    "dataset": {
+        "name": "ImageConditionedSparseStructureLatent",
+        "args": {
+           "min_aesthetic_score": 4.5,
+            "image_size": 512,
+            "pretrained_ss_dec": "microsoft/TRELLIS-image-large/ckpts/ss_dec_conv3d_16l8_fp16"
+        }
+    },
+    "trainer": {
+        "name": "ImageConditionedFlowMatchingCFGTrainer",
+        "args": {
+            "max_steps": 1000000,
+            "batch_size_per_gpu": 8,
+            "batch_split": 4,
+            "optimizer": {
+                "name": "AdamW",
+                "args": {
+                    "lr": 1e-4,
+                    "weight_decay": 0.01,
+                    "betas": [0.9, 0.95],
+                    "eps": 1e-8
+                }
+            },
+            "ema_rate": [
+                0.9999
+            ],
+            "mix_precision_mode": "amp",
+            "mix_precision_dtype": "bfloat16",
+            "grad_clip": {
+                "name": "AdaptiveGradClipper",
+                "args": {
+                    "max_norm": 1.0,
+                    "clip_percentile": 95
+                }
+            },
+            "i_log": 500,
+            "i_sample": 10000,
+            "i_save": 10000,
+            "p_uncond": 0.1,
+            "t_schedule": {
+                "name": "logitNormal",
+                "args": {
+                    "mean": 1.0,
+                    "std": 1.0
+                }
+            },
+            "sigma_min": 1e-5,
+            "image_cond_model": {
+                "name": "DinoV3FeatureExtractor",
+                "args": {
+                    "model_name": "facebook/dinov3-vitl16-pretrain-lvd1689m",
+                    "image_size": 512
+                }
+            }
+        }
+    }
+}
\ No newline at end of file
--- a/TRELLIS.2_DCU/configs/scvae/shape_vae_next_dc_f16c32_fp16.json
+++ b/TRELLIS.2_DCU/configs/scvae/shape_vae_next_dc_f16c32_fp16.json
+{
+    "models": {
+        "encoder": {
+            "name": "FlexiDualGridVaeEncoder",
+            "args": {
+                "model_channels": [64, 128, 256, 512, 1024],
+                "latent_channels": 32,
+                "num_blocks": [0, 4, 8, 16, 4],
+                "block_type": [
+                    "SparseConvNeXtBlock3d",
+                    "SparseConvNeXtBlock3d",
+                    "SparseConvNeXtBlock3d",
+                    "SparseConvNeXtBlock3d",
+                    "SparseConvNeXtBlock3d"                    
+                ],
+                "down_block_type": [
+                    "SparseResBlockS2C3d",
+                    "SparseResBlockS2C3d",
+                    "SparseResBlockS2C3d",
+                    "SparseResBlockS2C3d"                    
+                ],
+                "block_args": [
+                    {
+                        "use_checkpoint": true
+                    },
+                    {
+                        "use_checkpoint": true
+                    },
+                    {
+                        "use_checkpoint": false
+                    },
+                    {
+                        "use_checkpoint": false
+                    },
+                    {
+                        "use_checkpoint": false
+                    }
+                ],
+                "use_fp16": true
+            }
+        },
+        "decoder": {
+            "name": "FlexiDualGridVaeDecoder",
+            "args": {
+                "resolution": 256,
+                "model_channels": [1024, 512, 256, 128, 64],
+                "latent_channels": 32,
+                "num_blocks": [4, 16, 8, 4, 0],
+                "block_type": [
+                    "SparseConvNeXtBlock3d",
+                    "SparseConvNeXtBlock3d",
+                    "SparseConvNeXtBlock3d",
+                    "SparseConvNeXtBlock3d",
+                    "SparseConvNeXtBlock3d"                    
+                ],
+                "up_block_type": [
+                    "SparseResBlockC2S3d",
+                    "SparseResBlockC2S3d",
+                    "SparseResBlockC2S3d",
+                    "SparseResBlockC2S3d"
+                ],
+                "block_args": [
+                    {
+                        "use_checkpoint": false
+                    },
+                    {
+                        "use_checkpoint": false
+                    },
+                    {
+                        "use_checkpoint": false
+                    },
+                    {
+                        "use_checkpoint": true
+                    },
+                    {
+                        "use_checkpoint": true
+                    }
+                ],
+                "use_fp16": true
+            }
+        }
+    },
+    "dataset": {
+        "name": "FlexiDualGridDataset",
+        "args": {
+            "resolution": 256,
+            "max_active_voxels": 1000000,
+            "max_num_faces": 1000000,
+            "min_aesthetic_score": 4.5
+        }
+    },
+    "trainer": {
+        "name": "ShapeVaeTrainer",
+        "args": {
+            "max_steps": 1000000,
+            "batch_size_per_gpu": 8,
+            "batch_split": 2,
+            "optimizer": {
+                "name": "AdamW",
+                "args": {
+                    "lr": 1e-4,
+                    "weight_decay": 0.0
+                }
+            },
+            "ema_rate": [
+                0.9999
+            ],
+            "fp16_mode": "inflat_all",
+            "fp16_scale_growth": 0.001,
+            "grad_clip": {
+                "name": "AdaptiveGradClipper",
+                "args": {
+                    "max_norm": 1.0,
+                    "clip_percentile": 95
+                }
+            },
+            "i_log": 500,
+            "i_sample": 10000,
+            "i_save": 10000,
+            "lambda_subdiv": 0.1,
+            "lambda_intersected": 0.1,
+            "lambda_vertice": 1e-2,
+            "lambda_mask": 1,
+            "lambda_depth": 10,
+            "lambda_normal": 1,
+            "lambda_kl": 1e-6,
+            "lambda_ssim": 0.2,
+            "lambda_lpips": 0.2,
+            "camera_randomization_config": {
+                "radius_range": [2, 100]
+            }
+        }
+    }
+}
\ No newline at end of file
--- a/TRELLIS.2_DCU/configs/scvae/shape_vae_next_dc_f16c32_fp16_ft_512.json
+++ b/TRELLIS.2_DCU/configs/scvae/shape_vae_next_dc_f16c32_fp16_ft_512.json
+{
+    "models": {
+        "encoder": {
+            "name": "FlexiDualGridVaeEncoder",
+            "args": {
+                "model_channels": [64, 128, 256, 512, 1024],
+                "latent_channels": 32,
+                "num_blocks": [0, 4, 8, 16, 4],
+                "block_type": [
+                    "SparseConvNeXtBlock3d",
+                    "SparseConvNeXtBlock3d",
+                    "SparseConvNeXtBlock3d",
+                    "SparseConvNeXtBlock3d",
+                    "SparseConvNeXtBlock3d"                    
+                ],
+                "down_block_type": [
+                    "SparseResBlockS2C3d",
+                    "SparseResBlockS2C3d",
+                    "SparseResBlockS2C3d",
+                    "SparseResBlockS2C3d"                    
+                ],
+                "block_args": [
+                    {
+                        "use_checkpoint": true
+                    },
+                    {
+                        "use_checkpoint": true
+                    },
+                    {
+                        "use_checkpoint": true
+                    },
+                    {
+                        "use_checkpoint": true
+                    },
+                    {
+                        "use_checkpoint": true
+                    }
+                ],
+                "use_fp16": true
+            }
+        },
+        "decoder": {
+            "name": "FlexiDualGridVaeDecoder",
+            "args": {
+                "resolution": 512,
+                "model_channels": [1024, 512, 256, 128, 64],
+                "latent_channels": 32,
+                "num_blocks": [4, 16, 8, 4, 0],
+                "block_type": [
+                    "SparseConvNeXtBlock3d",
+                    "SparseConvNeXtBlock3d",
+                    "SparseConvNeXtBlock3d",
+                    "SparseConvNeXtBlock3d",
+                    "SparseConvNeXtBlock3d"                    
+                ],
+                "up_block_type": [
+                    "SparseResBlockC2S3d",
+                    "SparseResBlockC2S3d",
+                    "SparseResBlockC2S3d",
+                    "SparseResBlockC2S3d"
+                ],
+                "block_args": [
+                    {
+                        "use_checkpoint": true
+                    },
+                    {
+                        "use_checkpoint": true
+                    },
+                    {
+                        "use_checkpoint": true
+                    },
+                    {
+                        "use_checkpoint": true
+                    },
+                    {
+                        "use_checkpoint": true
+                    }
+                ],
+                "use_fp16": true
+            }
+        }
+    },
+    "dataset": {
+        "name": "FlexiDualGridDataset",
+        "args": {
+            "resolution": 512,
+            "max_active_voxels": 1000000,
+            "max_num_faces": 1000000,
+            "min_aesthetic_score": 4.5
+        }
+    },
+    "trainer": {
+        "name": "ShapeVaeTrainer",
+        "args": {
+            "max_steps": 1000000,
+            "batch_size_per_gpu": 4,
+            "batch_split": 2,
+            "optimizer": {
+                "name": "AdamW",
+                "args": {
+                    "lr": 1e-5,
+                    "weight_decay": 0.0
+                }
+            },
+            "ema_rate": [
+                0.9999
+            ],
+            "fp16_mode": "inflat_all",
+            "fp16_scale_growth": 0.001,
+            "grad_clip": {
+                "name": "AdaptiveGradClipper",
+                "args": {
+                    "max_norm": 1.0,
+                    "clip_percentile": 95
+                }
+            },
+            "finetune_ckpt": {
+                "encoder": "PATH_TO_ENCODER_CKPT",
+                "decoder": "PATH_TO_DECODER_CKPT"
+            },
+            "snapshot_batch_size": 1,
+            "i_log": 500,
+            "i_sample": 10000,
+            "i_save": 10000,
+            "lambda_subdiv": 0.1,
+            "lambda_intersected": 0.1,
+            "lambda_vertice": 1e-2,
+            "lambda_mask": 1,
+            "lambda_depth": 10,
+            "lambda_normal": 1,
+            "lambda_kl": 1e-6,
+            "lambda_ssim": 0.2,
+            "lambda_lpips": 0.2,
+            "render_resolution": 1024,
+            "camera_randomization_config": {
+                "radius_range": [2, 100]
+            }
+        }
+    }
+}
\ No newline at end of file
--- a/TRELLIS.2_DCU/configs/scvae/tex_vae_next_dc_f16c32_fp16.json
+++ b/TRELLIS.2_DCU/configs/scvae/tex_vae_next_dc_f16c32_fp16.json
+{
+    "models": {
+        "encoder": {
+            "name": "SparseUnetVaeEncoder",
+            "args": {
+                "in_channels": 6,
+                "model_channels": [64, 128, 256, 512, 1024],
+                "latent_channels": 32,
+                "num_blocks": [0, 4, 8, 16, 4],
+                "block_type": [
+                    "SparseConvNeXtBlock3d",
+                    "SparseConvNeXtBlock3d",
+                    "SparseConvNeXtBlock3d",
+                    "SparseConvNeXtBlock3d",
+                    "SparseConvNeXtBlock3d"                    
+                ],
+                "down_block_type": [
+                    "SparseResBlockS2C3d",
+                    "SparseResBlockS2C3d",
+                    "SparseResBlockS2C3d",
+                    "SparseResBlockS2C3d"                    
+                ],
+                "block_args": [
+                    {
+                        "use_checkpoint": true
+                    },
+                    {
+                        "use_checkpoint": true
+                    },
+                    {
+                        "use_checkpoint": false
+                    },
+                    {
+                        "use_checkpoint": false
+                    },
+                    {
+                        "use_checkpoint": false
+                    }
+                ],
+                "use_fp16": true
+            }
+        },
+        "decoder": {
+            "name": "SparseUnetVaeDecoder",
+            "args": {
+                "out_channels": 6,
+                "model_channels": [1024, 512, 256, 128, 64],
+                "latent_channels": 32,
+                "num_blocks": [4, 16, 8, 4, 0],
+                "block_type": [
+                    "SparseConvNeXtBlock3d",
+                    "SparseConvNeXtBlock3d",
+                    "SparseConvNeXtBlock3d",
+                    "SparseConvNeXtBlock3d",
+                    "SparseConvNeXtBlock3d"                    
+                ],
+                "up_block_type": [
+                    "SparseResBlockC2S3d",
+                    "SparseResBlockC2S3d",
+                    "SparseResBlockC2S3d",
+                    "SparseResBlockC2S3d"
+                ],
+                "block_args": [
+                    {
+                        "use_checkpoint": false
+                    },
+                    {
+                        "use_checkpoint": false
+                    },
+                    {
+                        "use_checkpoint": false
+                    },
+                    {
+                        "use_checkpoint": true
+                    },
+                    {
+                        "use_checkpoint": true
+                    }
+                ],
+                "use_fp16": true,
+                "pred_subdiv": false
+            }
+        }
+    },
+    "dataset": {
+        "name": "SparseVoxelPbrDataset",
+        "args": {
+            "resolution": 256,
+            "min_aesthetic_score": 4.5,
+            "max_active_voxels": 1000000,
+            "max_num_faces": 1000000,
+            "with_mesh": false,
+            "attrs": [
+                "base_color",
+                "metallic",
+                "roughness",
+                "alpha"
+            ]
+        }
+    },
+    "trainer": {
+        "name": "PbrVaeTrainer",
+        "args": {
+            "max_steps": 1000000,
+            "batch_size_per_gpu": 8,
+            "batch_split": 1,
+            "optimizer": {
+                "name": "AdamW",
+                "args": {
+                    "lr": 1e-4,
+                    "weight_decay": 0.0
+                }
+            },
+            "ema_rate": [
+                0.9999
+            ],
+            "fp16_mode": "inflat_all",
+            "fp16_scale_growth": 0.001,
+            "grad_clip": {
+                "name": "AdaptiveGradClipper",
+                "args": {
+                    "max_norm": 1.0,
+                    "clip_percentile": 95
+                }
+            },
+            "i_log": 500,
+            "i_sample": 10000,
+            "i_save": 10000,
+            "lambda_kl": 1e-6,
+            "loss_type": "l1",
+            "lambda_render": 0.0
+        }
+    }
+}
\ No newline at end of file
--- a/TRELLIS.2_DCU/configs/scvae/tex_vae_next_dc_f16c32_fp16_ft_512.json
+++ b/TRELLIS.2_DCU/configs/scvae/tex_vae_next_dc_f16c32_fp16_ft_512.json
+{
+    "models": {
+        "encoder": {
+            "name": "SparseUnetVaeEncoder",
+            "args": {
+                "in_channels": 6,
+                "model_channels": [64, 128, 256, 512, 1024],
+                "latent_channels": 32,
+                "num_blocks": [0, 4, 8, 16, 4],
+                "block_type": [
+                    "SparseConvNeXtBlock3d",
+                    "SparseConvNeXtBlock3d",
+                    "SparseConvNeXtBlock3d",
+                    "SparseConvNeXtBlock3d",
+                    "SparseConvNeXtBlock3d"                    
+                ],
+                "down_block_type": [
+                    "SparseResBlockS2C3d",
+                    "SparseResBlockS2C3d",
+                    "SparseResBlockS2C3d",
+                    "SparseResBlockS2C3d"                    
+                ],
+                "block_args": [
+                    {
+                        "use_checkpoint": true
+                    },
+                    {
+                        "use_checkpoint": true
+                    },
+                    {
+                        "use_checkpoint": true
+                    },
+                    {
+                        "use_checkpoint": true
+                    },
+                    {
+                        "use_checkpoint": true
+                    }
+                ],
+                "use_fp16": true
+            }
+        },
+        "decoder": {
+            "name": "SparseUnetVaeDecoder",
+            "args": {
+                "out_channels": 6,
+                "model_channels": [1024, 512, 256, 128, 64],
+                "latent_channels": 32,
+                "num_blocks": [4, 16, 8, 4, 0],
+                "block_type": [
+                    "SparseConvNeXtBlock3d",
+                    "SparseConvNeXtBlock3d",
+                    "SparseConvNeXtBlock3d",
+                    "SparseConvNeXtBlock3d",
+                    "SparseConvNeXtBlock3d"                    
+                ],
+                "up_block_type": [
+                    "SparseResBlockC2S3d",
+                    "SparseResBlockC2S3d",
+                    "SparseResBlockC2S3d",
+                    "SparseResBlockC2S3d"
+                ],
+                "block_args": [
+                    {
+                        "use_checkpoint": true
+                    },
+                    {
+                        "use_checkpoint": true
+                    },
+                    {
+                        "use_checkpoint": true
+                    },
+                    {
+                        "use_checkpoint": true
+                    },
+                    {
+                        "use_checkpoint": true
+                    }
+                ],
+                "use_fp16": true,
+                "pred_subdiv": false
+            }
+        }
+    },
+    "dataset": {
+        "name": "SparseVoxelPbrDataset",
+        "args": {
+            "resolution": 512,
+            "min_aesthetic_score": 4.5,
+            "max_active_voxels": 1000000,
+            "max_num_faces": 1000000,
+            "attrs": [
+                "base_color",
+                "metallic",
+                "roughness",
+                "alpha"
+            ]
+        }
+    },
+    "trainer": {
+        "name": "PbrVaeTrainer",
+        "args": {
+            "max_steps": 1000000,
+            "batch_size_per_gpu": 4,
+            "batch_split": 2,
+            "optimizer": {
+                "name": "AdamW",
+                "args": {
+                    "lr": 1e-5,
+                    "weight_decay": 0.0
+                }
+            },
+            "ema_rate": [
+                0.9999
+            ],
+            "fp16_mode": "inflat_all",
+            "fp16_scale_growth": 0.001,
+            "grad_clip": {
+                "name": "AdaptiveGradClipper",
+                "args": {
+                    "max_norm": 1.0,
+                    "clip_percentile": 95
+                }
+            },
+            "finetune_ckpt": {
+                "encoder": "PATH_TO_ENCODER_CKPT",
+                "decoder": "PATH_TO_DECODER_CKPT"
+            },
+            "snapshot_batch_size": 1,
+            "render_resolution": 512,
+            "i_log": 500,
+            "i_sample": 10000,
+            "i_save": 10000,
+            "lambda_kl": 1e-6,
+            "lambda_render": 1.0,
+            "loss_type": "l1",
+            "lambda_ssim": 0.2,
+            "lambda_lpips": 0.2,
+            "camera_randomization_config": {
+                "radius_range": [2, 100]
+            }
+        }
+    }
+}
\ No newline at end of file
--- a/TRELLIS.2_DCU/data_toolkit/README.md
+++ b/TRELLIS.2_DCU/data_toolkit/README.md
+# Dataset Preparation Toolkit
+
+This toolkit provides a comprehensive pipeline for preparing 3D datasets, including downloading, processing, voxelizing, and latent encoding for SC-VAE and Flow Model training.
+
+### Step 1: Install Dependencies
+
+Initialize the environment and install necessary dependencies:
+
+```bash
+. ./data_toolkit/setup.sh
+```
+
+### Step 2: Initialize Metadata
+
+Before processing, load the dataset metadata.
+
+```bash
+python data_toolkit/build_metadata.py <SUBSET> --root <ROOT> [--source <SOURCE>]
+```
+
+**Arguments:**
+- `SUBSET`: Target dataset subset. Options: `ObjaverseXL`, `ABO`, `HSSD`, `TexVerse` (Training sets); `SketchfabPicked`, `Toys4k` (Test sets).
+- `ROOT`: Root directory to save the data.
+- `SOURCE`: Data source (Required if `SUBSET` is `ObjaverseXL`). Options: `sketchfab`, `github`.
+
+**Example:**
+Load metadata for `ObjaverseXL` (sketchfab) and save to `datasets/ObjaverseXL_sketchfab`:
+```bash
+python data_toolkit/build_metadata.py ObjaverseXL --source sketchfab --root datasets/ObjaverseXL_sketchfab
+```
+
+### Step 3: Download Data
+
+Download the 3D assets to the local storage.
+
+```bash
+python data_toolkit/download.py <SUBSET> --root <ROOT> [--rank <RANK> --world_size <WORLD_SIZE>]
+```
+
+**Arguments:**
+- `RANK` / `WORLD_SIZE`: Parameters for multi-node distributed downloading.
+
+**Example:**
+To download the `ObjaverseXL` subset:
+
+> **Note:** The example below sets a large `WORLD_SIZE` (160,000) for demonstration purposes, meaning only a tiny fraction of the dataset will be downloaded by this single process.
+
+```bash
+python data_toolkit/download.py ObjaverseXL --root datasets/ObjaverseXL_sketchfab --world_size 160000
+```
+
+*Attention: Some datasets may require an interactive Hugging Face login or manual steps. Please follow any on-screen instructions.*
+
+**Update Metadata:**
+After downloading, update the metadata registry:
+```bash
+python data_toolkit/build_metadata.py ObjaverseXL --root datasets/ObjaverseXL_sketchfab
+```
+
+### Step 4: Process Mesh and PBR Textures
+
+Standardize 3D assets by dumping mesh and PBR textures.
+*Note: This process utilizes the CPU.*
+
+```bash
+# Dump Meshes
+python data_toolkit/dump_mesh.py <SUBSET> --root <ROOT> [--rank <RANK> --world_size <WORLD_SIZE>]
+
+# Dump PBR Textures
+python data_toolkit/dump_pbr.py <SUBSET> --root <ROOT> [--rank <RANK> --world_size <WORLD_SIZE>]
+
+# Get statisitics of the asset
+python asset_stats.py --root <ROOT> [--rank <RANK> --world_size <WORLD_SIZE>]
+```
+
+**Example:**
+```bash
+python data_toolkit/dump_mesh.py ObjaverseXL --root datasets/ObjaverseXL_sketchfab
+python data_toolkit/dump_pbr.py ObjaverseXL --root datasets/ObjaverseXL_sketchfab
+python asset_stats.py --root datasets/ObjaverseXL_sketchfab
+```
+
+**Update Metadata:**
+```bash
+python data_toolkit/build_metadata.py ObjaverseXL --root datasets/ObjaverseXL_sketchfab
+```
+
+### Step 5: Convert to O-Voxels
+
+Convert the processed meshes and textures into O-Voxels format.
+*Note: This process utilizes the CPU.*
+
+```bash
+python data_toolkit/dual_grid.py <SUBSET> --root <ROOT> [--rank <RANK> --world_size <WORLD_SIZE>] [--resolution <RESOLUTION>]
+
+python data_toolkit/voxelize_pbr.py <SUBSET> --root <ROOT> [--rank <RANK> --world_size <WORLD_SIZE>] [--resolution <RESOLUTION>]
+```
+
+**Arguments:**
+- `RESOLUTION`: Target resolutions for O-Voxels, comma-separated (e.g., `256,512,1024`). Default is `256`.
+
+**Example:**
+Convert `ObjaverseXL` to resolutions 256, 512, and 1024:
+```bash
+python data_toolkit/dual_grid.py ObjaverseXL --root datasets/ObjaverseXL_sketchfab --resolution 256,512,1024
+python data_toolkit/voxelize_pbr.py ObjaverseXL --root datasets/ObjaverseXL_sketchfab --resolution 256,512,1024
+```
+
+
+### At this point, the dataset is ready for SC-VAE Training
+
+### Step 6: Encode Latents
+
+Encode sparse structures into latents to train the first-stage generator.
+
+```bash
+# 1. Encode Shape Latents
+python data_toolkit/encode_shape_latent.py --root <ROOT> [--rank <RANK> --world_size <WORLD_SIZE>] [--resolution <RESOLUTION>]
+
+# 2. Encode PBR Latents
+python data_toolkit/encode_pbr_latent.py --root <ROOT> [--rank <RANK> --world_size <WORLD_SIZE>] [--resolution <RESOLUTION>]
+
+# 3. Update Metadata (Required before next step)
+python data_toolkit/build_metadata.py <SUBSET> --root <ROOT>
+
+# 4. Encode Sparse Structure (SS) Latents
+python data_toolkit/encode_ss_latent.py --root <ROOT> --shape_latent_name <SHAPE_LATENT_NAME> [--rank <RANK> --world_size <WORLD_SIZE>] [--resolution <SS_RESOLUTION>] 
+```
+
+**Arguments:**
+- `RESOLUTION`: Input O-Voxel resolution. Default is `1024`.
+- `SS_RESOLUTION`: Resolution for sparse structures. Default is `64`.
+- `SHAPE_LATENT_NAME`: The specific version name of the shape latent.
+
+**Example:**
+```bash
+python data_toolkit/encode_shape_latent.py --root datasets/ObjaverseXL_sketchfab --resolution 512
+python data_toolkit/encode_pbr_latent.py --root datasets/ObjaverseXL_sketchfab --resolution 512
+python data_toolkit/encode_shape_latent.py --root datasets/ObjaverseXL_sketchfab --resolution 1024
+python data_toolkit/encode_pbr_latent.py --root datasets/ObjaverseXL_sketchfab --resolution 1024
+
+# Update metadata
+python data_toolkit/build_metadata.py ObjaverseXL --root datasets/ObjaverseXL_sketchfab
+
+# Encode SS Latents
+python data_toolkit/encode_ss_latent.py --root datasets/ObjaverseXL_sketchfab --shape_latent_name shape_enc_next_dc_f16c32_fp16_1024 --resolution 64
+
+# Final Metadata Update
+python data_toolkit/build_metadata.py ObjaverseXL --root datasets/ObjaverseXL_sketchfab
+```
+
+### Step 7: Render Image Conditions
+
+Render multi-view images to train the image-conditioned generator.
+*Note: This process may utilize the CPU.*
+
+```bash
+python data_toolkit/render_cond.py <SUBSET> --root <ROOT> [--num_views <NUM_VIEWS>] [--rank <RANK> --world_size <WORLD_SIZE>]
+```
+
+**Arguments:**
+- `NUM_VIEWS`: Number of views to render per asset. Default is `16`.
+
+**Example:**
+```bash
+python data_toolkit/render_cond.py ObjaverseXL --root datasets/ObjaverseXL_sketchfab
+```
+
+**Final Metadata Update:**
+```bash
+python data_toolkit/build_metadata.py ObjaverseXL --root datasets/ObjaverseXL_sketchfab
+```
\ No newline at end of file
--- a/TRELLIS.2_DCU/data_toolkit/asset_stats.py
+++ b/TRELLIS.2_DCU/data_toolkit/asset_stats.py
+import os
+import argparse
+import pickle
+from tqdm import tqdm
+import pandas as pd
+from easydict import EasyDict as edict
+from concurrent.futures import ThreadPoolExecutor
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--root', type=str, required=True,
+                        help='Directory to save the metadata')
+    parser.add_argument('--mesh_dump_root', type=str, default=None,
+                        help='Directory to save the mesh dumps')
+    parser.add_argument('--pbr_dump_root', type=str, default=None,
+                        help='Directory to save the pbr dumps')
+    parser.add_argument('--instances', type=str, default=None,
+                        help='Instances to process')
+    parser.add_argument('--rank', type=int, default=0)
+    parser.add_argument('--world_size', type=int, default=1)
+    parser.add_argument('--max_workers', type=int, default=0)
+    opt = parser.parse_args()
+    opt = edict(vars(opt))
+    opt.mesh_dump_root = opt.mesh_dump_root or opt.root
+    opt.pbr_dump_root = opt.pbr_dump_root or opt.root
+
+    os.makedirs(os.path.join(opt.root, 'asset_stats', 'new_records'), exist_ok=True)
+
+    # get file list
+    if not os.path.exists(os.path.join(opt.root, 'metadata.csv')):
+        raise ValueError('metadata.csv not found')
+    metadata = pd.read_csv(os.path.join(opt.root, 'metadata.csv')).set_index('sha256')
+    if os.path.exists(os.path.join(opt.root, 'asset_stats', 'metadata.csv')):
+        metadata = metadata.combine_first(pd.read_csv(os.path.join(opt.root, 'asset_stats','metadata.csv')).set_index('sha256'))
+    if os.path.exists(os.path.join(opt.mesh_dump_root, 'mesh_dumps', 'metadata.csv')):
+        metadata = metadata.combine_first(pd.read_csv(os.path.join(opt.mesh_dump_root, 'mesh_dumps','metadata.csv')).set_index('sha256'))
+    if os.path.exists(os.path.join(opt.pbr_dump_root, 'pbr_dumps', 'metadata.csv')):
+        metadata = metadata.combine_first(pd.read_csv(os.path.join(opt.pbr_dump_root, 'pbr_dumps', 'metadata.csv')).set_index('sha256'))
+    metadata = metadata.reset_index()
+    if opt.instances is None:
+        if 'num_faces' in metadata.columns:
+            metadata = metadata[metadata['num_faces'].isnull()]
+        metadata = metadata[(metadata['mesh_dumped'] == True) | (metadata['pbr_dumped'] == True)]
+    else:
+        if os.path.exists(opt.instances):
+            with open(opt.instances, 'r') as f:
+                instances = f.read().splitlines()
+        else:
+            instances = opt.instances.split(',')
+        metadata = metadata[metadata['sha256'].isin(instances)]
+
+    start = len(metadata) * opt.rank // opt.world_size
+    end = len(metadata) * (opt.rank + 1) // opt.world_size
+    metadata = metadata[start:end]
+       
+    print(f'Processing {len(metadata)} objects...')
+
+    # process objects
+    records = []
+    with ThreadPoolExecutor(max_workers=opt.max_workers or os.cpu_count()) as executor, \
+         tqdm(total=len(metadata), desc='Processing objects') as pbar:
+        def worker(metadatum):
+            try:
+                sha256 = metadatum['sha256']
+                if metadatum['pbr_dumped'] == True:
+                    with open(os.path.join(opt.pbr_dump_root, 'pbr_dumps', f'{sha256}.pickle'), 'rb') as f:
+                        dump = pickle.load(f)
+
+                        num_faces = 0
+                        num_vertices = 0
+                        for obj in dump['objects']:
+                            if obj['vertices'].size == 0 or obj['faces'].size == 0:
+                                continue
+                            num_faces += obj['faces'].shape[0]
+                            num_vertices += obj['vertices'].shape[0]
+
+                        num_basecolor_tex = 0
+                        num_metallic_tex = 0
+                        num_roughness_tex = 0
+                        num_alpha_tex = 0
+                        for mat in dump['materials']:
+                            if mat['baseColorTexture'] is not None:
+                                num_basecolor_tex += 1
+                            if mat['metallicTexture'] is not None:
+                                num_metallic_tex += 1
+                            if mat['roughnessTexture'] is not None:
+                                num_roughness_tex += 1
+                            if mat['alphaTexture'] is not None:
+                                num_alpha_tex += 1
+
+                        record = {
+                            'sha256': sha256,
+                            'num_faces': num_faces,
+                            'num_vertices': num_vertices,
+                            'num_basecolor_tex': num_basecolor_tex,
+                            'num_metallic_tex': num_metallic_tex,
+                            'num_roughness_tex': num_roughness_tex,
+                            'num_alpha_tex': num_alpha_tex,
+                        }
+                        records.append(record)
+                else:
+                    with open(os.path.join(opt.mesh_dump_root,'mesh_dumps', f'{sha256}.pickle'), 'rb') as f:
+                        dump = pickle.load(f)
+
+                        num_faces = 0
+                        num_vertices = 0
+                        for obj in dump['objects']:
+                            if obj['vertices'].size == 0 or obj['faces'].size == 0:
+                                continue
+                            num_faces += obj['faces'].shape[0]
+                            num_vertices += obj['vertices'].shape[0]
+
+                        record = {
+                            'sha256': sha256,
+                            'num_faces': num_faces,
+                            'num_vertices': num_vertices,
+                        }
+                        records.append(record)
+                pbar.update()
+            except Exception as e:
+                print(f'Error processing {sha256}: {e}')
+                pbar.update()                
+
+        for metadatum in metadata.to_dict('records'):
+            executor.submit(worker, metadatum)
+
+        executor.shutdown(wait=True)
+
+    # save records
+    records = pd.DataFrame.from_records(records)
+    records.to_csv(os.path.join(opt.root, 'asset_stats', 'new_records', f'part_{opt.rank}.csv'), index=False)
--- a/TRELLIS.2_DCU/data_toolkit/blender_script/dump_mesh.py
+++ b/TRELLIS.2_DCU/data_toolkit/blender_script/dump_mesh.py
+import argparse, sys, os, math, io
+from typing import *
+import bpy
+import bmesh
+from mathutils import Vector, Matrix
+import numpy as np
+import pickle
+
+
+"""=============== BLENDER ==============="""
+
+IMPORT_FUNCTIONS: Dict[str, Callable] = {
+    "obj": bpy.ops.import_scene.obj if bpy.app.version[0] < 4 else bpy.ops.wm.obj_import,
+    "glb": bpy.ops.import_scene.gltf,
+    "gltf": bpy.ops.import_scene.gltf,
+    "usd": bpy.ops.import_scene.usd,
+    "fbx": bpy.ops.import_scene.fbx,
+    "stl": bpy.ops.import_mesh.stl if bpy.app.version[0] < 4 else bpy.ops.wm.stl_import,
+    "usda": bpy.ops.import_scene.usda,
+    "dae": bpy.ops.wm.collada_import,
+    "ply": bpy.ops.import_mesh.ply if bpy.app.version[0] < 4 else bpy.ops.wm.ply_import,
+    "abc": bpy.ops.wm.alembic_import,
+    "blend": bpy.ops.wm.append,
+}
+
+
+def init_scene() -> None:
+    """Resets the scene to a clean state.
+
+    Returns:
+        None
+    """
+    # delete everything
+    for obj in bpy.data.objects:
+        bpy.data.objects.remove(obj, do_unlink=True)
+
+    # delete all the materials
+    for material in bpy.data.materials:
+        bpy.data.materials.remove(material, do_unlink=True)
+
+    # delete all the textures
+    for texture in bpy.data.textures:
+        bpy.data.textures.remove(texture, do_unlink=True)
+
+    # delete all the images
+    for image in bpy.data.images:
+        bpy.data.images.remove(image, do_unlink=True)
+
+
+def load_object(object_path: str) -> None:
+    """Loads a model with a supported file extension into the scene.
+
+    Args:
+        object_path (str): Path to the model file.
+
+    Raises:
+        ValueError: If the file extension is not supported.
+
+    Returns:
+        None
+    """
+    file_extension = object_path.split(".")[-1].lower()
+    if file_extension is None:
+        raise ValueError(f"Unsupported file type: {object_path}")
+
+    if file_extension == "usdz":
+        # install usdz io package
+        dirname = os.path.dirname(os.path.realpath(__file__))
+        usdz_package = os.path.join(dirname, "io_scene_usdz.zip")
+        bpy.ops.preferences.addon_install(filepath=usdz_package)
+        # enable it
+        addon_name = "io_scene_usdz"
+        bpy.ops.preferences.addon_enable(module=addon_name)
+        # import the usdz
+        from io_scene_usdz.import_usdz import import_usdz
+
+        import_usdz(context, filepath=object_path, materials=True, animations=True)
+        return None
+
+    # load from existing import functions
+    import_function = IMPORT_FUNCTIONS[file_extension]
+
+    print(f"Loading object from {object_path}")
+    if file_extension == "blend":
+        import_function(directory=object_path, link=False)
+    elif file_extension in {"glb", "gltf"}:
+        import_function(filepath=object_path, merge_vertices=True, import_shading='NORMALS', bone_heuristic='TEMPERANCE')
+    else:
+        import_function(filepath=object_path)
+        
+        
+def delete_invisible_objects() -> None:
+    """Deletes all invisible objects in the scene.
+
+    Returns:
+        None
+    """
+    to_remove = []
+    for obj in bpy.context.scene.objects:
+        if obj.hide_viewport or obj.hide_render:
+            obj.hide_viewport = False
+            obj.hide_render = False
+            obj.hide_select = False
+            to_remove.append(obj)
+    for obj in to_remove:
+        bpy.data.objects.remove(obj, do_unlink=True)
+
+    # Delete invisible collections
+    invisible_collections = [col for col in bpy.data.collections if col.hide_viewport]
+    for col in invisible_collections:
+        bpy.data.collections.remove(col)
+      
+
+def scene_bbox() -> Tuple[Vector, Vector]:
+    """Returns the bounding box of the scene.
+
+    Taken from Shap-E rendering script
+    (https://github.com/openai/shap-e/blob/main/shap_e/rendering/blender/blender_script.py#L68-L82)
+
+    Returns:
+        Tuple[Vector, Vector]: The minimum and maximum coordinates of the bounding box.
+    """
+    bbox_min = (math.inf,) * 3
+    bbox_max = (-math.inf,) * 3
+    found = False
+    scene_meshes = [obj for obj in bpy.context.scene.objects.values() if isinstance(obj.data, bpy.types.Mesh)]
+    for obj in scene_meshes:
+        found = True
+        for coord in obj.bound_box:
+            coord = Vector(coord)
+            coord = obj.matrix_world @ coord
+            bbox_min = tuple(min(x, y) for x, y in zip(bbox_min, coord))
+            bbox_max = tuple(max(x, y) for x, y in zip(bbox_max, coord))
+    if not found:
+        raise RuntimeError("no objects in scene to compute bounding box for")
+    return Vector(bbox_min), Vector(bbox_max)
+
+
+def normalize_scene() -> Tuple[float, Vector]:
+    """Normalizes the scene by scaling and translating it to fit in a unit cube centered
+    at the origin.
+
+    Mostly taken from the Point-E / Shap-E rendering script
+    (https://github.com/openai/point-e/blob/main/point_e/evals/scripts/blender_script.py#L97-L112),
+    but fix for multiple root objects: (see bug report here:
+    https://github.com/openai/shap-e/pull/60).
+
+    Returns:
+        Tuple[float, Vector]: The scale factor and the offset applied to the scene.
+    """
+    scene_root_objects = [obj for obj in bpy.context.scene.objects.values() if not obj.parent]
+    if len(scene_root_objects) > 1:
+        # create an empty object to be used as a parent for all root objects
+        scene = bpy.data.objects.new("ParentEmpty", None)
+        bpy.context.scene.collection.objects.link(scene)
+
+        # parent all root objects to the empty object
+        for obj in scene_root_objects:
+            obj.parent = scene
+    else:
+        scene = scene_root_objects[0]
+
+    bbox_min, bbox_max = scene_bbox()
+    scale = 1 / max(bbox_max - bbox_min)
+    scene.scale = scene.scale * scale
+
+    # Apply scale to matrix_world.
+    bpy.context.view_layer.update()
+    bbox_min, bbox_max = scene_bbox()
+    offset = -(bbox_min + bbox_max) / 2
+    scene.matrix_world.translation += offset
+    
+    return scale, offset
+
+
+def main(arg):    
+    # Initialize context
+    if arg.object.endswith(".blend"):
+        delete_invisible_objects()
+    else:
+        init_scene()
+        load_object(arg.object)
+    print('[INFO] Scene initialized.')
+    
+    # Normalize scene
+    scale, offset = normalize_scene()
+    print('[INFO] Scene normalized.')
+    
+    # Start dumping
+    depsgraph = bpy.context.evaluated_depsgraph_get()
+    scene = bpy.context.scene
+    output = {
+        'objects': [],
+    }
+
+    # Dumping meshes
+    for obj in scene.objects:
+        if obj.type != 'MESH':
+            continue
+        
+        pack = {
+            "vertices": None,
+            "faces": None,
+        }
+        
+        eval_obj = obj.evaluated_get(depsgraph)
+        eval_mesh = eval_obj.to_mesh()
+        
+        bm = bmesh.new()
+        bm.from_mesh(eval_mesh)
+        bm.transform(obj.matrix_world)
+        bmesh.ops.triangulate(bm, faces=bm.faces)
+        bm.to_mesh(eval_mesh)
+        bm.free()
+                
+        pack["vertices"] = np.array([
+            v.co[:] for v in eval_mesh.vertices
+        ], dtype=np.float32)   # (N, 3)
+        
+        pack["faces"] = np.array([
+            [eval_mesh.loops[i].vertex_index for i in poly.loop_indices]
+            for poly in eval_mesh.polygons
+        ], dtype=np.int32)   # (F, 3)
+
+        output['objects'].append(pack)
+
+    # Save output
+    os.makedirs(os.path.dirname(arg.output_path), exist_ok=True)
+    with open(arg.output_path, 'wb') as f:
+        pickle.dump(output, f)
+    print('[INFO] Output saved to {}.'.format(arg.output_path))
+
+        
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Renders given obj file by rotation a camera around it.')
+    parser.add_argument('--object', type=str, help='Path to the 3D model file to be rendered.')
+    parser.add_argument('--output_path', type=str, default='/tmp', help='The path the output will be dumped to.')
+    argv = sys.argv[sys.argv.index("--") + 1:]
+    args = parser.parse_args(argv)
+
+    main(args)
+    
\ No newline at end of file
--- a/TRELLIS.2_DCU/data_toolkit/blender_script/dump_pbr.py
+++ b/TRELLIS.2_DCU/data_toolkit/blender_script/dump_pbr.py
+import argparse, sys, os, math, io
+from typing import *
+import bpy
+import bmesh
+from mathutils import Vector, Matrix
+import numpy as np
+from PIL import Image
+import pickle
+
+
+"""=============== BLENDER ==============="""
+
+IMPORT_FUNCTIONS: Dict[str, Callable] = {
+    "obj": bpy.ops.import_scene.obj if bpy.app.version[0] < 4 else bpy.ops.wm.obj_import,
+    "glb": bpy.ops.import_scene.gltf,
+    "gltf": bpy.ops.import_scene.gltf,
+    "usd": bpy.ops.import_scene.usd,
+    "fbx": bpy.ops.import_scene.fbx,
+    "stl": bpy.ops.import_mesh.stl if bpy.app.version[0] < 4 else bpy.ops.wm.stl_import,
+    "usda": bpy.ops.import_scene.usda,
+    "dae": bpy.ops.wm.collada_import,
+    "ply": bpy.ops.import_mesh.ply if bpy.app.version[0] < 4 else bpy.ops.wm.ply_import,
+    "abc": bpy.ops.wm.alembic_import,
+    "blend": bpy.ops.wm.append,
+}
+
+
+def init_scene() -> None:
+    """Resets the scene to a clean state.
+
+    Returns:
+        None
+    """
+    # delete everything
+    for obj in bpy.data.objects:
+        bpy.data.objects.remove(obj, do_unlink=True)
+
+    # delete all the materials
+    for material in bpy.data.materials:
+        bpy.data.materials.remove(material, do_unlink=True)
+
+    # delete all the textures
+    for texture in bpy.data.textures:
+        bpy.data.textures.remove(texture, do_unlink=True)
+
+    # delete all the images
+    for image in bpy.data.images:
+        bpy.data.images.remove(image, do_unlink=True)
+
+
+def load_object(object_path: str) -> None:
+    """Loads a model with a supported file extension into the scene.
+
+    Args:
+        object_path (str): Path to the model file.
+
+    Raises:
+        ValueError: If the file extension is not supported.
+
+    Returns:
+        None
+    """
+    file_extension = object_path.split(".")[-1].lower()
+    if file_extension is None:
+        raise ValueError(f"Unsupported file type: {object_path}")
+
+    if file_extension == "usdz":
+        # install usdz io package
+        dirname = os.path.dirname(os.path.realpath(__file__))
+        usdz_package = os.path.join(dirname, "io_scene_usdz.zip")
+        bpy.ops.preferences.addon_install(filepath=usdz_package)
+        # enable it
+        addon_name = "io_scene_usdz"
+        bpy.ops.preferences.addon_enable(module=addon_name)
+        # import the usdz
+        from io_scene_usdz.import_usdz import import_usdz
+
+        import_usdz(context, filepath=object_path, materials=True, animations=True)
+        return None
+
+    # load from existing import functions
+    import_function = IMPORT_FUNCTIONS[file_extension]
+
+    print(f"Loading object from {object_path}")
+    if file_extension == "blend":
+        import_function(directory=object_path, link=False)
+    elif file_extension in {"glb", "gltf"}:
+        import_function(filepath=object_path, merge_vertices=True, import_shading='NORMALS', bone_heuristic='TEMPERANCE')
+    else:
+        import_function(filepath=object_path)
+        
+        
+def delete_invisible_objects() -> None:
+    """Deletes all invisible objects in the scene.
+
+    Returns:
+        None
+    """
+    to_remove = []
+    for obj in bpy.context.scene.objects:
+        if obj.hide_viewport or obj.hide_render:
+            obj.hide_viewport = False
+            obj.hide_render = False
+            obj.hide_select = False
+            to_remove.append(obj)
+    for obj in to_remove:
+        bpy.data.objects.remove(obj, do_unlink=True)
+
+    # Delete invisible collections
+    invisible_collections = [col for col in bpy.data.collections if col.hide_viewport]
+    for col in invisible_collections:
+        bpy.data.collections.remove(col)
+      
+
+def scene_bbox() -> Tuple[Vector, Vector]:
+    """Returns the bounding box of the scene.
+
+    Taken from Shap-E rendering script
+    (https://github.com/openai/shap-e/blob/main/shap_e/rendering/blender/blender_script.py#L68-L82)
+
+    Returns:
+        Tuple[Vector, Vector]: The minimum and maximum coordinates of the bounding box.
+    """
+    bbox_min = (math.inf,) * 3
+    bbox_max = (-math.inf,) * 3
+    found = False
+    scene_meshes = [obj for obj in bpy.context.scene.objects.values() if isinstance(obj.data, bpy.types.Mesh)]
+    for obj in scene_meshes:
+        found = True
+        for coord in obj.bound_box:
+            coord = Vector(coord)
+            coord = obj.matrix_world @ coord
+            bbox_min = tuple(min(x, y) for x, y in zip(bbox_min, coord))
+            bbox_max = tuple(max(x, y) for x, y in zip(bbox_max, coord))
+    if not found:
+        raise RuntimeError("no objects in scene to compute bounding box for")
+    return Vector(bbox_min), Vector(bbox_max)
+
+
+def normalize_scene() -> Tuple[float, Vector]:
+    """Normalizes the scene by scaling and translating it to fit in a unit cube centered
+    at the origin.
+
+    Mostly taken from the Point-E / Shap-E rendering script
+    (https://github.com/openai/point-e/blob/main/point_e/evals/scripts/blender_script.py#L97-L112),
+    but fix for multiple root objects: (see bug report here:
+    https://github.com/openai/shap-e/pull/60).
+
+    Returns:
+        Tuple[float, Vector]: The scale factor and the offset applied to the scene.
+    """
+    scene_root_objects = [obj for obj in bpy.context.scene.objects.values() if not obj.parent]
+    if len(scene_root_objects) > 1:
+        # create an empty object to be used as a parent for all root objects
+        scene = bpy.data.objects.new("ParentEmpty", None)
+        bpy.context.scene.collection.objects.link(scene)
+
+        # parent all root objects to the empty object
+        for obj in scene_root_objects:
+            obj.parent = scene
+    else:
+        scene = scene_root_objects[0]
+
+    bbox_min, bbox_max = scene_bbox()
+    scale = 1 / max(bbox_max - bbox_min)
+    scene.scale = scene.scale * scale
+
+    # Apply scale to matrix_world.
+    bpy.context.view_layer.update()
+    bbox_min, bbox_max = scene_bbox()
+    offset = -(bbox_min + bbox_max) / 2
+    scene.matrix_world.translation += offset
+    
+    return scale, offset
+
+
+# =============== NODE TREE PARSING ===============
+
+def extract_image(tex_node, channels):
+        image = tex_node.image
+        pixels = np.array(image.pixels[:])
+        data = pixels.reshape(image.size[1], image.size[0], -1)
+        data = data[..., channels]
+
+        if data.dtype != np.uint8:
+            data = np.clip(data, 0.0, 1.0)
+            data = (data * 255).astype(np.uint8)
+
+        if len(data.shape) == 2:  # Single channel
+            pil_image = Image.fromarray(data, mode='L')
+        elif data.shape[2] == 3:
+            pil_image = Image.fromarray(data, mode='RGB')
+        elif data.shape[2] == 4:
+            pil_image = Image.fromarray(data, mode='RGBA')
+        else:
+            raise ValueError("Unsupported channel shape for image")
+
+        buffer = io.BytesIO()
+        pil_image.save(buffer, format='PNG')
+        png_bytes = buffer.getvalue()
+
+        return {
+            'image': png_bytes,
+            'interpolation': tex_node.interpolation,
+            'extension': tex_node.extension,
+        }
+
+
+def try_extract_image(link, expected_channel='RGB'):
+    """
+    Tries to extract an image from a texture node link.
+    Supported sub tree modes:
+      - RGB:
+        TEX_IMAGE ->
+      - R, G, B:
+        TEX_IMAGE -> SEPARATE_COLOR ->
+      - A:
+        TEX_IMAGE ->
+    """
+    assert expected_channel in ['RGB', 'R', 'G', 'B', 'A'], "Unsupported channel"
+
+    if expected_channel == 'RGB':
+        assert link.from_node.type == 'TEX_IMAGE', "Material is not supported"
+        assert link.from_socket.name == 'Color', "Material is not supported"
+        tex_node = link.from_node
+        return extract_image(tex_node, [0, 1, 2])
+
+    if expected_channel in ['R', 'G', 'B']:
+        socket_name = {
+            'R': 'Red',
+            'G': 'Green',
+            'B': 'Blue',
+        }[expected_channel]
+        assert link.from_node.type == 'SEPARATE_COLOR' and link.from_node.mode == 'RGB', \
+            f"Material is not supported, {link.from_node.type}, {link.from_node.mode}"
+        assert link.from_socket.name == socket_name, "Material is not supported"
+        sep_node = link.from_node
+        assert sep_node.inputs[0].is_linked and sep_node.inputs[0].links[0].from_node.type == 'TEX_IMAGE', \
+            "Material is not supported"
+        assert sep_node.inputs[0].links[0].from_socket.name == 'Color', "Material is not supported"
+        tex_node = sep_node.inputs[0].links[0].from_node
+        channel_index = {
+            'R': 0,
+            'G': 1,
+            'B': 2,
+        }[expected_channel]
+        return extract_image(tex_node, channel_index)
+
+    if expected_channel == 'A':
+        assert link.from_node.type == 'TEX_IMAGE', "Material is not supported"
+        assert link.from_socket.name == 'Alpha', "Material is not supported"
+        tex_node = link.from_node
+        return extract_image(tex_node, 3)
+
+
+def try_extract_factor(link, mode='color'):
+    """
+    Tries to extract a factor from a math node link.
+    Supported sub tree modes:
+      - color:
+       ANY -> MIX(MULTIPLY) ->
+      - scalar:
+       ANY -> MATH(MULTIPLY) ->
+    """
+    assert mode in ['color','scalar'], "Unsupported mode"
+
+    if mode == 'color':
+        if link.from_node.type == 'MIX':
+            mix_node = link.from_node
+            assert mix_node.data_type == 'RGBA' and mix_node.blend_type == 'MULTIPLY', f"Material is not supported, {mix_node.data_type}, {mix_node.blend_type}"
+            assert not mix_node.inputs['Factor'].is_linked and mix_node.inputs['Factor'].default_value == 1.0, \
+                "Material is not supported"
+            if mix_node.inputs['A'].is_linked:
+                assert not mix_node.inputs['B'].is_linked, "Material is not supported"
+                return (list(mix_node.inputs['B'].default_value)[:3], mix_node.inputs['A'].links[0])
+            else:
+                assert not mix_node.inputs['A'].is_linked, "Material is not supported"
+                assert mix_node.inputs['B'].is_linked, "Material is not supported"
+                return (list(mix_node.inputs['A'].default_value)[:3], mix_node.inputs['B'].links[0])
+        return ([1.0, 1.0, 1.0], link)
+
+    if mode =='scalar':
+        if link.from_node.type == 'MATH':
+            math_node = link.from_node
+            assert math_node.operation == 'MULTIPLY', "Material is not supported"
+            assert math_node.inputs[0].is_linked, "Material is not supported"
+            assert not math_node.inputs[1].is_linked, "Material is not supported"
+            return (math_node.inputs[1].default_value, math_node.inputs[0].links[0])
+        return (1.0, link)
+
+
+def try_extract_image_with_factor(link, expected_channel='RGB'):
+    """
+    Tries to extract an image and a factor from a texture node link.
+    """
+    factor, link = try_extract_factor(link, 'color' if expected_channel in ['RGB'] else 'scalar')
+    image = try_extract_image(link, expected_channel)
+    return (factor, image)
+
+
+def main(arg):    
+    # Initialize context
+    if arg.object.endswith(".blend"):
+        delete_invisible_objects()
+    else:
+        init_scene()
+        load_object(arg.object)
+    print('[INFO] Scene initialized.')
+    
+    # Normalize scene
+    scale, offset = normalize_scene()
+    print('[INFO] Scene normalized.')
+    
+    # Start dumping
+    depsgraph = bpy.context.evaluated_depsgraph_get()
+    scene = bpy.context.scene
+    output = {
+        'materials': [],
+        'objects': [],
+    }
+
+    # Dumping materials
+    for mat in bpy.data.materials:
+        assert mat.use_nodes == True, "Material is not supported"
+
+        pack = {
+            "baseColorFactor": [1.0, 1.0, 1.0],
+            "alphaFactor": 1.0,
+            "metallicFactor": 1.0,
+            "roughnessFactor": 1.0,
+            "alphaMode": "OPAQUE",
+            "alphaCutoff": 0.5,
+            "baseColorTexture": None,
+            "alphaTexture": None,
+            "metallicTexture": None,
+            "roughnessTexture": None,
+        }
+
+        try:
+            principled_node = mat.node_tree.nodes.get('Principled BSDF')
+            assert principled_node is not None, "Material is not supported"
+
+            # Handle base color
+            if not principled_node.inputs['Base Color'].is_linked:
+                pack["baseColorFactor"] = list(principled_node.inputs['Base Color'].default_value)
+            else:
+                link = principled_node.inputs['Base Color'].links[0]
+                if link.from_node.type == 'RGB':
+                    pack["baseColorFactor"] = list(link.from_node.outputs[0].default_value)
+                else:
+                    factor, image = try_extract_image_with_factor(link, 'RGB')
+                    pack["baseColorFactor"] = factor
+                    pack["baseColorTexture"] = image
+
+            # Handle alpha
+            if not principled_node.inputs['Alpha'].is_linked:
+                pack["alphaFactor"] = principled_node.inputs['Alpha'].default_value
+                if pack["alphaFactor"] < 1.0:
+                    pack["alphaMode"] = "BLEND"
+            else:
+                link = principled_node.inputs['Alpha'].links[0]
+                node = link.from_node
+                if node.type == 'VALUE':
+                    pack["alphaFactor"] = node.outputs[0].default_value
+                    if pack["alphaFactor"] < 1.0:
+                        pack["alphaMode"] = "BLEND"
+                else:
+                    pack["alphaMode"] = "BLEND"
+                    if node.type == 'MATH':
+                        if node.operation == 'ROUND':
+                            assert node.inputs[0].is_linked, "Material is not supported"
+                            pack["alphaMode"] = "MASK"
+                            link = node.inputs[0].links[0]
+                        elif node.operation == 'SUBTRACT':
+                            assert node.inputs[0].default_value == 1.0 and \
+                                node.inputs[1].is_linked and \
+                                node.inputs[1].links[0].from_node.type == 'MATH' and \
+                                node.inputs[1].links[0].from_node.operation == 'LESS_THAN', \
+                                "Material is not supported"
+                            assert node.inputs[1].links[0].from_node.inputs[0].is_linked, "Material is not supported"
+                            pack["alphaMode"] = "MASK"
+                            pack["alphaCutoff"] = node.inputs[1].links[0].from_node.inputs[1].default_value
+                            link = node.inputs[1].links[0].from_node.inputs[0].links[0]
+                    factor, image = try_extract_image_with_factor(link, 'A')
+                    pack["alphaFactor"] = factor
+                    pack["alphaTexture"] = image
+
+            # Handle metallic
+            if not principled_node.inputs['Metallic'].is_linked:
+                pack["metallicFactor"] = principled_node.inputs['Metallic'].default_value
+            else:
+                link = principled_node.inputs['Metallic'].links[0]
+                node = link.from_node
+                if node.type == 'VALUE':
+                    pack["metallicFactor"] = node.outputs[0].default_value
+                else:
+                    factor, image = try_extract_image_with_factor(link, 'B')
+                    pack["metallicFactor"] = factor
+                    pack["metallicTexture"] = image
+
+            # Handle roughness
+            if not principled_node.inputs['Roughness'].is_linked:
+                pack["roughnessFactor"] = principled_node.inputs['Roughness'].default_value
+            else:
+                link = principled_node.inputs['Roughness'].links[0]
+                node = link.from_node
+                if node.type == 'VALUE':
+                    pack["roughnessFactor"] = node.outputs[0].default_value
+                else:
+                    factor, image = try_extract_image_with_factor(link, 'G')
+                    pack["roughnessFactor"] = factor
+                    pack["roughnessTexture"] = image
+
+            output['materials'].append(pack)
+        except:
+            with open(arg.output_path + '_error.txt', 'w') as f:
+                f.write(str([[n.name] for n in mat.node_tree.nodes]))
+            raise RuntimeError("Material is not supported")
+
+    # Dumping meshes
+    for obj in scene.objects:
+        if obj.type != 'MESH':
+            continue
+        
+        pack = {
+            "vertices": None,
+            "faces": None,
+            "uvs": None,
+            "matIDs": None,
+        }
+        
+        eval_obj = obj.evaluated_get(depsgraph)
+        eval_mesh = eval_obj.to_mesh()
+        
+        bm = bmesh.new()
+        bm.from_mesh(eval_mesh)
+        bm.transform(obj.matrix_world)
+        bmesh.ops.triangulate(bm, faces=bm.faces)
+        bm.to_mesh(eval_mesh)
+        bm.free()
+                
+        pack["vertices"] = np.array([
+            v.co[:] for v in eval_mesh.vertices
+        ], dtype=np.float32)   # (N, 3)
+        
+        pack["faces"] = np.array([
+            [eval_mesh.loops[i].vertex_index for i in poly.loop_indices]
+            for poly in eval_mesh.polygons
+        ], dtype=np.int32)   # (F, 3)
+        
+        pack["normals"] = np.array([
+            [eval_mesh.loops[i].normal for i in poly.loop_indices]
+            for poly in eval_mesh.polygons
+        ], dtype=np.float32)  # (F, 3, 3)
+        
+        if eval_mesh.uv_layers.active is not None:
+            pack["uvs"] = np.array([
+                [eval_mesh.uv_layers.active.data[i].uv for i in poly.loop_indices]
+                for poly in eval_mesh.polygons
+            ], dtype=np.float32)  # (F, 3, 2)
+
+        pack["mat_ids"] = np.array([
+            bpy.data.materials.find(obj.material_slots[poly.material_index].name)
+            if len(obj.material_slots) > 0 and obj.material_slots[poly.material_index].material is not None else -1
+            for poly in eval_mesh.polygons
+        ], dtype=np.int32)
+
+        output['objects'].append(pack)
+
+    # Save output
+    os.makedirs(os.path.dirname(arg.output_path), exist_ok=True)
+    with open(arg.output_path, 'wb') as f:
+        pickle.dump(output, f)
+    print('[INFO] Output saved to {}.'.format(arg.output_path))
+
+        
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Renders given obj file by rotation a camera around it.')
+    parser.add_argument('--object', type=str, help='Path to the 3D model file to be rendered.')
+    parser.add_argument('--output_path', type=str, default='/tmp', help='The path the output will be dumped to.')
+    argv = sys.argv[sys.argv.index("--") + 1:]
+    args = parser.parse_args(argv)
+
+    main(args)
+    
\ No newline at end of file
--- a/TRELLIS.2_DCU/data_toolkit/blender_script/install_pillow.py
+++ b/TRELLIS.2_DCU/data_toolkit/blender_script/install_pillow.py
+import subprocess
+import sys
+import ensurepip
+
+ensurepip.bootstrap()
+subprocess.check_call([sys.executable, "-m", "pip", "install", "Pillow"])
\ No newline at end of file
--- a/TRELLIS.2_DCU/data_toolkit/blender_script/io_scene_usdz.zip
+++ b/TRELLIS.2_DCU/data_toolkit/blender_script/io_scene_usdz.zip