[Fix] Fix move moe model to cpu bug (#328)

492501d7 · gushiqiao · GitHub · 409e5cec · 492501d7 · 492501d7
Commit 492501d7 authored Sep 23, 2025 by gushiqiao Committed by GitHub Sep 23, 2025
4 changed files
--- a/lightx2v/models/networks/wan/audio_model.py
+++ b/lightx2v/models/networks/wan/audio_model.py
@@ -89,7 +89,7 @@ class WanAudioModel(WanModel):
        self.enable_compile_mode("_infer_cond_uncond")
        if self.cpu_offload:
-            if self.offload_granularity == "model" and self.scheduler.step_index == 0:
+            if self.offload_granularity == "model" and self.scheduler.step_index == 0 and "wan2.2_moe" not in self.config.model_cls:
                self.to_cuda()
            elif self.offload_granularity != "model":
                self.pre_weight.to_cuda()
@@ -99,7 +99,7 @@ class WanAudioModel(WanModel):
            self.start_compile(shape)
        if self.cpu_offload:
-            if self.offload_granularity == "model" and self.scheduler.step_index == self.scheduler.infer_steps - 1:
+            if self.offload_granularity == "model" and self.scheduler.step_index == self.scheduler.infer_steps - 1 and "wan2.2_moe" not in self.config.model_cls:
                self.to_cpu()
            elif self.offload_granularity != "model":
                self.pre_weight.to_cpu()

--- a/lightx2v/models/networks/wan/model.py
+++ b/lightx2v/models/networks/wan/model.py
@@ -344,7 +344,7 @@ class WanModel(CompiledMethodsMixin):
    @torch.no_grad()
    def infer(self, inputs):
        if self.cpu_offload:
-            if self.offload_granularity == "model" and self.scheduler.step_index == 0:
+            if self.offload_granularity == "model" and self.scheduler.step_index == 0 and "wan2.2_moe" not in self.config.model_cls:
                self.to_cuda()
            elif self.offload_granularity != "model":
                self.pre_weight.to_cuda()
@@ -377,7 +377,7 @@ class WanModel(CompiledMethodsMixin):
            self.scheduler.noise_pred = self._infer_cond_uncond(inputs, infer_condition=True)
        if self.cpu_offload:
-            if self.offload_granularity == "model" and self.scheduler.step_index == self.scheduler.infer_steps - 1:
+            if self.offload_granularity == "model" and self.scheduler.step_index == self.scheduler.infer_steps - 1 and "wan2.2_moe" not in self.config.model_cls:
                self.to_cpu()
            elif self.offload_granularity != "model":
                self.pre_weight.to_cpu()

--- a/lightx2v/models/runners/wan/wan_distill_runner.py
+++ b/lightx2v/models/runners/wan/wan_distill_runner.py
@@ -7,6 +7,7 @@ from lightx2v.models.networks.wan.lora_adapter import WanLoraWrapper
 from lightx2v.models.networks.wan.model import WanModel
 from lightx2v.models.runners.wan.wan_runner import MultiModelStruct, WanRunner
 from lightx2v.models.schedulers.wan.step_distill.scheduler import Wan22StepDistillScheduler, WanStepDistillScheduler
+from lightx2v.utils.profiler import *
 from lightx2v.utils.registry_factory import RUNNER_REGISTER
@@ -49,6 +50,7 @@ class MultiDistillModelStruct(MultiModelStruct):
        self.cur_model_index = -1
        logger.info(f"boundary step index: {self.boundary_step_index}")
+    @ProfilingContext4DebugL2("Swtich models in infer_main costs")
    def get_current_model_index(self):
        if self.scheduler.step_index < self.boundary_step_index:
            logger.info(f"using - HIGH - noise model at step_index {self.scheduler.step_index + 1}")

--- a/lightx2v/models/runners/wan/wan_runner.py
+++ b/lightx2v/models/runners/wan/wan_runner.py
@@ -25,6 +25,7 @@ from lightx2v.models.video_encoders.hf.wan.vae import WanVAE
 from lightx2v.models.video_encoders.hf.wan.vae_2_2 import Wan2_2_VAE
 from lightx2v.models.video_encoders.hf.wan.vae_tiny import Wan2_2_VAE_tiny, WanVAE_tiny
 from lightx2v.utils.envs import *
+from lightx2v.utils.profiler import *
 from lightx2v.utils.registry_factory import RUNNER_REGISTER
 from lightx2v.utils.utils import *
 from lightx2v.utils.utils import best_output_size, cache_video
@@ -395,6 +396,7 @@ class MultiModelStruct:
        self.get_current_model_index()
        self.model[self.cur_model_index].infer(inputs)
+    @ProfilingContext4DebugL2("Swtich models in infer_main costs")
    def get_current_model_index(self):
        if self.scheduler.timesteps[self.scheduler.step_index] >= self.boundary_timestep:
            logger.info(f"using - HIGH - noise model at step_index {self.scheduler.step_index + 1}")