update disk offload (#589)

2620398e · Gu Shiqiao · GitHub · 22484a22 · 2620398e · 2620398e
Unverified Commit 2620398e authored Dec 09, 2025 by Gu Shiqiao Committed by GitHub Dec 09, 2025
10 changed files
--- a/app/gradio_demo.py
+++ b/app/gradio_demo.py
@@ -755,7 +755,7 @@ def auto_configure(resolution):
    if is_ada_architecture_gpu():
        quant_op_priority = ["q8f", "vllm", "sgl"]
    else:
-        quant_op_priority = ["sgl", "vllm", "q8f"]
+        quant_op_priority = ["vllm", "sgl", "q8f"]

    for op in attn_priority:
        if dict(available_attn_ops).get(op):
@@ -890,10 +890,7 @@ def auto_configure(resolution):
    )


-def main():
-    with gr.Blocks(
-        title="Lightx2v (Lightweight Video Inference and Generation Engine)",
-        css="""
+css = """
        .main-content { max-width: 1600px; margin: auto; padding: 20px; }
        .warning { color: #ff6b6b; font-weight: bold; }

@@ -961,10 +958,13 @@ def main():
            border-radius: 10px;
            box-shadow: 0 4px 15px rgba(0, 0, 0, 0.1);
        }
-    """,
-    ) as demo:
-        gr.Markdown(f"# 🎬 LightX2V Video Generator")
+    """

+
+def main():
+    with gr.Blocks(title="Lightx2v (Lightweight Video Inference and Generation Engine)") as demo:
+        gr.Markdown(f"# 🎬 LightX2V Video Generator")
+        gr.HTML(f"<style>{css}</style>")
        # Main layout: left and right columns
        with gr.Row():
            # Left: configuration and input area

--- a/app/gradio_demo_zh.py
+++ b/app/gradio_demo_zh.py
@@ -755,7 +755,7 @@ def auto_configure(resolution):
    if is_ada_architecture_gpu():
        quant_op_priority = ["q8f", "vllm", "sgl"]
    else:
-        quant_op_priority = ["sgl", "vllm", "q8f"]
+        quant_op_priority = ["vllm", "sgl", "q8f"]

    for op in attn_priority:
        if dict(available_attn_ops).get(op):
@@ -890,10 +890,7 @@ def auto_configure(resolution):
    )


-def main():
-    with gr.Blocks(
-        title="Lightx2v (轻量级视频推理和生成引擎)",
-        css="""
+css = """
        .main-content { max-width: 1600px; margin: auto; padding: 20px; }
        .warning { color: #ff6b6b; font-weight: bold; }

@@ -961,10 +958,13 @@ def main():
            border-radius: 10px;
            box-shadow: 0 4px 15px rgba(0, 0, 0, 0.1);
        }
-    """,
-    ) as demo:
-        gr.Markdown(f"# 🎬 LightX2V 视频生成器")
+    """

+
+def main():
+    with gr.Blocks(title="Lightx2v (轻量级视频推理和生成引擎)") as demo:
+        gr.Markdown(f"# 🎬 LightX2V 视频生成器")
+        gr.HTML(f"<style>{css}</style>")
        # 主布局：左右分栏
        with gr.Row():
            # 左侧：配置和输入区域

--- a/app/run_gradio.sh
+++ b/app/run_gradio.sh
@@ -14,11 +14,11 @@

 # Lightx2v project root directory path
 # Example: /home/user/lightx2v or /data/video_gen/lightx2v
-lightx2v_path=/path/to/LightX2V
+lightx2v_path=/data/video_gen/lightx2v_debug/LightX2V

 # Model path configuration
 # Example: /path/to/Wan2.1-I2V-14B-720P-Lightx2v
-model_path=/path/to/models
+model_path=/models/

 # Server configuration
 server_name="0.0.0.0"

--- a/lightx2v/common/offload/manager.py
+++ b/lightx2v/common/offload/manager.py
-import time
 from concurrent.futures import ThreadPoolExecutor

 import torch
@@ -116,12 +115,12 @@ class WeightAsyncStreamManager(object):
            self.prefetch_futures.append(future)

    def swap_cpu_buffers(self):
-        wait_start = time.time()
-        already_done = all(f.done() for f in self.prefetch_futures)
+        #  wait_start = time.time()
+        # already_done = all(f.done() for f in self.prefetch_futures)
        for f in self.prefetch_futures:
            f.result()
-        wait_time = time.time() - wait_start
-        logger.debug(f"[Prefetch] block {self.prefetch_block_idx}: wait={wait_time:.3f}s, already_done={already_done}")
+        # wait_time = time.time() - wait_start
+        # logger.debug(f"[Prefetch] block {self.prefetch_block_idx}: wait={wait_time:.3f}s, already_done={already_done}")
        self.cpu_buffers = [self.cpu_buffers[1], self.cpu_buffers[0]]

    def __del__(self):

--- a/lightx2v/common/ops/mm/mm_weight.py
+++ b/lightx2v/common/ops/mm/mm_weight.py
 import os
 import re
 from abc import ABCMeta, abstractmethod
+from pathlib import Path

 import torch
 from safetensors import safe_open
@@ -130,7 +131,10 @@ class MMWeight(MMWeightTemplate):

    def _get_source_tensor(self, source_name, weight_dict=None):
        if self.lazy_load:
-            lazy_load_file_path = os.path.join(self.lazy_load_file, f"block_{source_name.split('.')[1]}.safetensors")
+            if Path(self.lazy_load_file).is_file():
+                lazy_load_file_path = self.lazy_load_file
+            else:
+                lazy_load_file_path = os.path.join(self.lazy_load_file, f"block_{source_name.split('.')[1]}.safetensors")
            with safe_open(lazy_load_file_path, framework="pt", device="cpu") as lazy_load_file:
                return lazy_load_file.get_tensor(source_name)
        return weight_dict[source_name]
@@ -150,7 +154,10 @@ class MMWeight(MMWeightTemplate):

    def _load_cpu_pin_buffers(self):
        if self.lazy_load:
-            lazy_load_file_path = os.path.join(self.lazy_load_file, f"block_{self.weight_name.split('.')[1]}.safetensors")
+            if Path(self.lazy_load_file).is_file():
+                lazy_load_file_path = self.lazy_load_file
+            else:
+                lazy_load_file_path = os.path.join(self.lazy_load_file, f"block_{self.weight_name.split('.')[1]}.safetensors")
            with safe_open(lazy_load_file_path, framework="pt", device="cpu") as lazy_load_file:
                weight_tensor = lazy_load_file.get_tensor(self.weight_name)
                self.pin_weight = self._create_pin_tensor(weight_tensor, transpose=True)
@@ -210,8 +217,10 @@ class MMWeight(MMWeightTemplate):
                self.bias_name = re.sub(r"\.\d+", lambda m: f".{adapter_block_index}", self.bias_name, count=1)
            else:
                self.bias_name = re.sub(r"\.\d+", lambda m: f".{block_index}", self.bias_name, count=1)
-
-        lazy_load_file_path = os.path.join(self.lazy_load_file, f"block_{block_index}.safetensors")
+        if Path(self.lazy_load_file).is_file():
+            lazy_load_file_path = self.lazy_load_file
+        else:
+            lazy_load_file_path = os.path.join(self.lazy_load_file, f"block_{block_index}.safetensors")
        with safe_open(lazy_load_file_path, framework="pt", device="cpu") as lazy_load_file:
            weight_tensor = lazy_load_file.get_tensor(self.weight_name).t()
            self.pin_weight = self.pin_weight.copy_(weight_tensor)
@@ -294,7 +303,10 @@ class MMWeightQuantTemplate(MMWeightTemplate):

    def _load_cuda_buffers(self, weight_dict):
        if self.lazy_load:
-            lazy_load_file_path = os.path.join(self.lazy_load_file, f"block_{self.weight_name.split('.')[1]}.safetensors")
+            if Path(self.lazy_load_file).is_file():
+                lazy_load_file_path = self.lazy_load_file
+            else:
+                lazy_load_file_path = os.path.join(self.lazy_load_file, f"block_{self.weight_name.split('.')[1]}.safetensors")
            with safe_open(lazy_load_file_path, framework="pt", device="cpu") as source:
                self.weight_cuda_buffer, self.weight_scale_cuda_buffer = self._get_cuda_tensor_pair(source, self.lazy_load)
                self.bias_cuda_buffer = self._get_cuda_bias_tensor(source, self.lazy_load)
@@ -334,7 +346,10 @@ class MMWeightQuantTemplate(MMWeightTemplate):

    def _get_cpu_pin_tensor_pair(self, source, is_lazy):
        if is_lazy:
-            lazy_load_file_path = os.path.join(self.lazy_load_file, f"block_{self.weight_name.split('.')[1]}.safetensors")
+            if Path(self.lazy_load_file).is_file():
+                lazy_load_file_path = self.lazy_load_file
+            else:
+                lazy_load_file_path = os.path.join(self.lazy_load_file, f"block_{self.weight_name.split('.')[1]}.safetensors")
            with safe_open(lazy_load_file_path, framework="pt", device="cpu") as source:
                weight_tensor = source.get_tensor(self.weight_name)
                scale_tensor = source.get_tensor(self.weight_scale_name)
@@ -353,7 +368,10 @@ class MMWeightQuantTemplate(MMWeightTemplate):
        if self.bias_name is None:
            return None
        if is_lazy:
-            lazy_load_file_path = os.path.join(self.lazy_load_file, f"block_{self.weight_name.split('.')[1]}.safetensors")
+            if Path(self.lazy_load_file).is_file():
+                lazy_load_file_path = self.lazy_load_file
+            else:
+                lazy_load_file_path = os.path.join(self.lazy_load_file, f"block_{self.weight_name.split('.')[1]}.safetensors")
            with safe_open(lazy_load_file_path, framework="pt", device="cpu") as source:
                bias_tensor = source.get_tensor(self.bias_name)
                if not self.bias_force_fp32:
@@ -673,8 +691,10 @@ class MMWeightQuantTemplate(MMWeightTemplate):
                self.bias_name = re.sub(r"\.\d+", lambda m: f".{adapter_block_index}", self.bias_name, count=1)
            else:
                self.bias_name = re.sub(r"\.\d+", lambda m: f".{block_index}", self.bias_name, count=1)
-
-        lazy_load_file_path = os.path.join(self.lazy_load_file, f"block_{block_index}.safetensors")
+        if Path(self.lazy_load_file).is_file():
+            lazy_load_file_path = self.lazy_load_file
+        else:
+            lazy_load_file_path = os.path.join(self.lazy_load_file, f"block_{block_index}.safetensors")
        with safe_open(lazy_load_file_path, framework="pt", device="cpu") as lazy_load_file:
            if self.weight_need_transpose:
                weight_tensor = lazy_load_file.get_tensor(self.weight_name).t()

--- a/lightx2v/common/ops/norm/layer_norm_weight.py
+++ b/lightx2v/common/ops/norm/layer_norm_weight.py
 import os
 import re
 from abc import ABCMeta, abstractmethod
+from pathlib import Path

 import torch
 from safetensors import safe_open
@@ -55,7 +56,10 @@ class LNWeightTemplate(metaclass=ABCMeta):
        if name is None:
            return None
        if self.lazy_load:
-            lazy_load_file_path = os.path.join(self.lazy_load_file, f"block_{name.split('.')[1]}.safetensors")
+            if Path(self.lazy_load_file).is_file():
+                lazy_load_file_path = self.lazy_load_file
+            else:
+                lazy_load_file_path = os.path.join(self.lazy_load_file, f"block_{name.split('.')[1]}.safetensors")
            with safe_open(lazy_load_file_path, framework="pt", device="cpu") as lazy_load_file:
                tensor = lazy_load_file.get_tensor(name)
                if use_infer_dtype:
@@ -155,7 +159,10 @@ class LNWeightTemplate(metaclass=ABCMeta):

    def load_state_dict_from_disk(self, block_index, adapter_block_index=None):
        if self.weight_name is not None:
-            lazy_load_file_path = os.path.join(self.lazy_load_file, f"block_{block_index}.safetensors")
+            if Path(self.lazy_load_file).is_file():
+                lazy_load_file_path = self.lazy_load_file
+            else:
+                lazy_load_file_path = os.path.join(self.lazy_load_file, f"block_{block_index}.safetensors")
            if self.is_post_adapter:
                self.weight_name = re.sub(r"\.\d+", lambda m: f".{adapter_block_index}", self.weight_name, count=1)
            else:
@@ -167,7 +174,10 @@ class LNWeightTemplate(metaclass=ABCMeta):
            del weight_tensor

        if self.bias_name is not None:
-            lazy_load_file_path = os.path.join(self.lazy_load_file, f"block_{block_index}.safetensors")
+            if Path(self.lazy_load_file).is_file():
+                lazy_load_file_path = self.lazy_load_file
+            else:
+                lazy_load_file_path = os.path.join(self.lazy_load_file, f"block_{block_index}.safetensors")
            if self.is_post_adapter:
                assert adapter_block_index is not None
                self.bias_name = re.sub(r"\.\d+", lambda m: f".{adapter_block_index}", self.bias_name, count=1)

--- a/lightx2v/common/ops/norm/rms_norm_weight.py
+++ b/lightx2v/common/ops/norm/rms_norm_weight.py
 import os
 import re
 from abc import ABCMeta, abstractmethod
+from pathlib import Path

 import torch
 from safetensors import safe_open
@@ -48,7 +49,10 @@ class RMSWeightTemplate(metaclass=ABCMeta):

    def _get_weight_tensor(self, weight_dict=None, use_infer_dtype=False):
        if self.lazy_load:
-            lazy_load_file_path = os.path.join(self.lazy_load_file, f"block_{self.weight_name.split('.')[1]}.safetensors")
+            if Path(self.lazy_load_file).is_file():
+                lazy_load_file_path = self.lazy_load_file
+            else:
+                lazy_load_file_path = os.path.join(self.lazy_load_file, f"block_{self.weight_name.split('.')[1]}.safetensors")
            with safe_open(lazy_load_file_path, framework="pt", device="cpu") as lazy_load_file:
                tensor = lazy_load_file.get_tensor(self.weight_name)
                if use_infer_dtype:
@@ -111,7 +115,10 @@ class RMSWeightTemplate(metaclass=ABCMeta):
            self.weight_name = re.sub(r"\.\d+", lambda m: f".{adapter_block_index}", self.weight_name, count=1)
        else:
            self.weight_name = re.sub(r"\.\d+", lambda m: f".{block_index}", self.weight_name, count=1)
-        lazy_load_file_path = os.path.join(self.lazy_load_file, f"block_{block_index}.safetensors")
+        if Path(self.lazy_load_file).is_file():
+            lazy_load_file_path = self.lazy_load_file
+        else:
+            lazy_load_file_path = os.path.join(self.lazy_load_file, f"block_{block_index}.safetensors")
        with safe_open(lazy_load_file_path, framework="pt", device="cpu") as lazy_load_file:
            weight_tensor = lazy_load_file.get_tensor(self.weight_name).to(self.infer_dtype)
            self.pin_weight = self.pin_weight.copy_(weight_tensor)

--- a/lightx2v/common/ops/tensor/tensor.py
+++ b/lightx2v/common/ops/tensor/tensor.py
 import os
 import re
+from pathlib import Path

 import torch
 from safetensors import safe_open
@@ -41,7 +42,10 @@ class DefaultTensor:

    def _get_tensor(self, weight_dict=None, use_infer_dtype=False):
        if self.lazy_load:
-            lazy_load_file_path = os.path.join(self.lazy_load_file, f"block_{self.tensor_name.split('.')[1]}.safetensors")
+            if Path(self.lazy_load_file).is_file():
+                lazy_load_file_path = self.lazy_load_file
+            else:
+                lazy_load_file_path = os.path.join(self.lazy_load_file, f"block_{self.tensor_name.split('.')[1]}.safetensors")
            with safe_open(lazy_load_file_path, framework="pt", device="cpu") as lazy_load_file:
                tensor = lazy_load_file.get_tensor(self.tensor_name)
                if use_infer_dtype:
@@ -96,7 +100,10 @@ class DefaultTensor:
            self.tensor_name = re.sub(r"\.\d+", lambda m: f".{adapter_block_index}", self.tensor_name, count=1)
        else:
            self.tensor_name = re.sub(r"\.\d+", lambda m: f".{block_index}", self.tensor_name, count=1)
-        lazy_load_file_path = os.path.join(self.lazy_load_file, f"block_{block_index}.safetensors")
+        if Path(self.lazy_load_file).is_file():
+            lazy_load_file_path = self.lazy_load_file
+        else:
+            lazy_load_file_path = os.path.join(self.lazy_load_file, f"block_{block_index}.safetensors")
        with safe_open(lazy_load_file_path, framework="pt", device="cpu") as lazy_load_file:
            tensor = lazy_load_file.get_tensor(self.tensor_name).to(self.infer_dtype)
            self.pin_tensor = self.pin_tensor.copy_(tensor)

--- a/lightx2v/deploy/common/va_reader_omni.py
+++ b/lightx2v/deploy/common/va_reader_omni.py
@@ -14,8 +14,13 @@ import numpy as np
 import torch
 import torch.distributed as dist
 import zmq
-from bson import BSON
 from loguru import logger
+
+try:
+    from bson import BSON
+except ImportError:
+    BSON = None
+    logger.warning("BSON is not installed")
 from scipy.signal import resample



--- a/lightx2v/models/networks/wan/model.py
+++ b/lightx2v/models/networks/wan/model.py
@@ -168,18 +168,20 @@ class WanModel(CompiledMethodsMixin):
            safetensors_path = self.model_path

        if os.path.isdir(safetensors_path):
-            safetensors_files = glob.glob(os.path.join(safetensors_path, "*.safetensors"))
+            if self.lazy_load:
+                self.lazy_load_path = safetensors_path
+                non_block_file = os.path.join(safetensors_path, "non_block.safetensors")
+                if os.path.exists(non_block_file):
+                    safetensors_files = [non_block_file]
+                else:
+                    raise ValueError(f"Non-block file not found in {safetensors_path}. Please check the model path.")
+            else:
+                safetensors_files = glob.glob(os.path.join(safetensors_path, "*.safetensors"))
        else:
+            if self.lazy_load:
+                self.lazy_load_path = safetensors_path
            safetensors_files = [safetensors_path]

-        if self.lazy_load:
-            self.lazy_load_path = safetensors_path
-            non_block_file = os.path.join(safetensors_path, "non_block.safetensors")
-            if os.path.exists(non_block_file):
-                safetensors_files = [non_block_file]
-            else:
-                raise ValueError(f"Non-block file not found in {safetensors_path}. Please check the model path. Lazy load mode only supports loading chunked model weights.")
-
        weight_dict = {}
        for file_path in safetensors_files:
            if self.config.get("adapter_model_path", None) is not None:
@@ -210,19 +212,21 @@ class WanModel(CompiledMethodsMixin):
            return weight_dict

        if os.path.isdir(safetensors_path):
-            safetensors_files = glob.glob(os.path.join(safetensors_path, "*.safetensors"))
+            if self.lazy_load:
+                self.lazy_load_path = safetensors_path
+                non_block_file = os.path.join(safetensors_path, "non_block.safetensors")
+                if os.path.exists(non_block_file):
+                    safetensors_files = [non_block_file]
+                else:
+                    raise ValueError(f"Non-block file not found in {safetensors_path}. Please check the model path.")
+            else:
+                safetensors_files = glob.glob(os.path.join(safetensors_path, "*.safetensors"))
        else:
+            if self.lazy_load:
+                self.lazy_load_path = safetensors_path
            safetensors_files = [safetensors_path]
            safetensors_path = os.path.dirname(safetensors_path)

-        if self.lazy_load:
-            self.lazy_load_path = safetensors_path
-            non_block_file = os.path.join(safetensors_path, "non_block.safetensors")
-            if os.path.exists(non_block_file):
-                safetensors_files = [non_block_file]
-            else:
-                raise ValueError(f"Non-block file not found in {safetensors_path}. Please check the model path. Lazy load mode only supports loading chunked model weights.")
-
        weight_dict = {}
        for safetensor_path in safetensors_files:
            if self.config.get("adapter_model_path", None) is not None: