Commit 27c5575f authored by Yang Yong(雍洋)'s avatar Yang Yong(雍洋) Committed by GitHub
Browse files

Support Multi Levels Profile Log (#290)

parent dd870f3f
......@@ -46,7 +46,7 @@ gpu_id=0
export CUDA_VISIBLE_DEVICES=$gpu_id
export CUDA_LAUNCH_BLOCKING=1
export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
export ENABLE_PROFILING_DEBUG=true
export PROFILING_DEBUG_LEVEL=2
export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
# ==================== Parameter Parsing ====================
......
......@@ -45,7 +45,7 @@ set gpu_id=0
REM ==================== Environment Variables Setup ====================
set CUDA_VISIBLE_DEVICES=%gpu_id%
set PYTHONPATH=%lightx2v_path%;%PYTHONPATH%
set ENABLE_PROFILING_DEBUG=true
set PROFILING_DEBUG_LEVEL=2
set PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
REM ==================== Parameter Parsing ====================
......
......@@ -21,7 +21,7 @@ from lightx2v.deploy.server.auth import AuthManager
from lightx2v.deploy.server.metrics import MetricMonitor
from lightx2v.deploy.server.monitor import ServerMonitor, WorkerStatus
from lightx2v.deploy.task_manager import LocalTaskManager, PostgresSQLTaskManager, TaskStatus
from lightx2v.utils.profiler import ProfilingContext
from lightx2v.utils.profiler import *
from lightx2v.utils.service_utils import ProcessManager
# =========================
......@@ -679,7 +679,7 @@ if __name__ == "__main__":
args = parser.parse_args()
logger.info(f"args: {args}")
with ProfilingContext("Init Server Cost"):
with ProfilingContext4DebugL1("Init Server Cost"):
model_pipelines = Pipeline(args.pipeline_json)
auth_manager = AuthManager()
if args.task_url.startswith("/"):
......
......@@ -16,14 +16,14 @@ from lightx2v.deploy.common.utils import class_try_catch_async
from lightx2v.infer import init_runner # noqa
from lightx2v.models.runners.graph_runner import GraphRunner
from lightx2v.utils.envs import CHECK_ENABLE_GRAPH_MODE
from lightx2v.utils.profiler import ProfilingContext
from lightx2v.utils.profiler import *
from lightx2v.utils.registry_factory import RUNNER_REGISTER
from lightx2v.utils.set_config import set_config, set_parallel_config
from lightx2v.utils.utils import seed_all
class BaseWorker:
@ProfilingContext("Init Worker Worker Cost:")
@ProfilingContext4DebugL1("Init Worker Worker Cost:")
def __init__(self, args):
config = set_config(args)
config["mode"] = ""
......
......@@ -15,7 +15,7 @@ from lightx2v.models.runners.wan.wan_runner import Wan22MoeRunner, WanRunner #
from lightx2v.models.runners.wan.wan_skyreels_v2_df_runner import WanSkyreelsV2DFRunner # noqa: F401
from lightx2v.models.runners.wan.wan_vace_runner import WanVaceRunner # noqa: F401
from lightx2v.utils.envs import *
from lightx2v.utils.profiler import ProfilingContext
from lightx2v.utils.profiler import *
from lightx2v.utils.registry_factory import RUNNER_REGISTER
from lightx2v.utils.set_config import print_config, set_config, set_parallel_config
from lightx2v.utils.utils import seed_all
......@@ -103,7 +103,7 @@ def main():
print_config(config)
with ProfilingContext("Total Cost"):
with ProfilingContext4DebugL1("Total Cost"):
runner = init_runner(config)
runner.run_pipeline()
......
......@@ -10,7 +10,7 @@ from requests.exceptions import RequestException
from lightx2v.utils.envs import *
from lightx2v.utils.generate_task_id import generate_task_id
from lightx2v.utils.profiler import ProfilingContext, ProfilingContext4Debug
from lightx2v.utils.profiler import *
from lightx2v.utils.utils import save_to_video, vae_to_comfyui_image
from .base_runner import BaseRunner
......@@ -60,7 +60,7 @@ class DefaultRunner(BaseRunner):
else:
raise ValueError(f"Unsupported VFI model: {self.config['video_frame_interpolation']['algo']}")
@ProfilingContext("Load models")
@ProfilingContext4DebugL2("Load models")
def load_model(self):
self.model = self.load_transformer()
self.text_encoders = self.load_text_encoder()
......@@ -116,13 +116,13 @@ class DefaultRunner(BaseRunner):
self.check_stop()
logger.info(f"==> step_index: {step_index + 1} / {total_steps}")
with ProfilingContext4Debug("step_pre"):
with ProfilingContext4DebugL1("step_pre"):
self.model.scheduler.step_pre(step_index=step_index)
with ProfilingContext4Debug("🚀 infer_main"):
with ProfilingContext4DebugL1("🚀 infer_main"):
self.model.infer(self.inputs)
with ProfilingContext4Debug("step_post"):
with ProfilingContext4DebugL1("step_post"):
self.model.scheduler.step_post()
if self.progress_callback:
......@@ -155,7 +155,7 @@ class DefaultRunner(BaseRunner):
img = TF.to_tensor(img_ori).sub_(0.5).div_(0.5).unsqueeze(0).cuda()
return img, img_ori
@ProfilingContext("Run Encoders")
@ProfilingContext4DebugL2("Run Encoders")
def _run_input_encoder_local_i2v(self):
prompt = self.config["prompt_enhanced"] if self.config["use_prompt_enhancer"] else self.config["prompt"]
img, img_ori = self.read_image_input(self.config["image_path"])
......@@ -166,7 +166,7 @@ class DefaultRunner(BaseRunner):
gc.collect()
return self.get_encoder_output_i2v(clip_encoder_out, vae_encode_out, text_encoder_output, img)
@ProfilingContext("Run Encoders")
@ProfilingContext4DebugL2("Run Encoders")
def _run_input_encoder_local_t2v(self):
prompt = self.config["prompt_enhanced"] if self.config["use_prompt_enhancer"] else self.config["prompt"]
text_encoder_output = self.run_text_encoder(prompt, None)
......@@ -177,7 +177,7 @@ class DefaultRunner(BaseRunner):
"image_encoder_output": None,
}
@ProfilingContext("Run Encoders")
@ProfilingContext4DebugL2("Run Encoders")
def _run_input_encoder_local_flf2v(self):
prompt = self.config["prompt_enhanced"] if self.config["use_prompt_enhancer"] else self.config["prompt"]
first_frame, _ = self.read_image_input(self.config["image_path"])
......@@ -189,7 +189,7 @@ class DefaultRunner(BaseRunner):
gc.collect()
return self.get_encoder_output_i2v(clip_encoder_out, vae_encode_out, text_encoder_output)
@ProfilingContext("Run Encoders")
@ProfilingContext4DebugL2("Run Encoders")
def _run_input_encoder_local_vace(self):
prompt = self.config["prompt_enhanced"] if self.config["use_prompt_enhancer"] else self.config["prompt"]
src_video = self.config.get("src_video", None)
......@@ -219,12 +219,12 @@ class DefaultRunner(BaseRunner):
if self.config.get("model_cls") == "wan2.2" and self.config["task"] == "i2v":
self.inputs["image_encoder_output"]["vae_encoder_out"] = None
@ProfilingContext("Run DiT")
@ProfilingContext4DebugL2("Run DiT")
def run_main(self, total_steps=None):
self.init_run()
for segment_idx in range(self.video_segment_num):
logger.info(f"🔄 segment_idx: {segment_idx + 1}/{self.video_segment_num}")
with ProfilingContext(f"segment end2end {segment_idx + 1}/{self.video_segment_num}"):
logger.info(f"🔄 start segment {segment_idx + 1}/{self.video_segment_num}")
with ProfilingContext4DebugL1(f"segment end2end {segment_idx + 1}/{self.video_segment_num}"):
self.check_stop()
# 1. default do nothing
self.init_run_segment(segment_idx)
......@@ -236,7 +236,7 @@ class DefaultRunner(BaseRunner):
self.end_run_segment()
self.end_run()
@ProfilingContext("Run VAE Decoder")
@ProfilingContext4DebugL1("Run VAE Decoder")
def run_vae_decoder(self, latents):
if self.config.get("lazy_load", False) or self.config.get("unload_modules", False):
self.vae_decoder = self.load_vae_decoder()
......
from loguru import logger
from lightx2v.utils.profiler import ProfilingContext4Debug
from lightx2v.utils.profiler import *
class GraphRunner:
......@@ -13,7 +13,7 @@ class GraphRunner:
logger.info("🚀 Starting Model Compilation - Please wait, this may take a while... 🚀")
logger.info("=" * 60)
with ProfilingContext4Debug("compile"):
with ProfilingContext4DebugL2("compile"):
self.runner.run_step()
logger.info("=" * 60)
......
......@@ -10,7 +10,7 @@ from lightx2v.models.networks.qwen_image.model import QwenImageTransformerModel
from lightx2v.models.runners.default_runner import DefaultRunner
from lightx2v.models.schedulers.qwen_image.scheduler import QwenImageScheduler
from lightx2v.models.video_encoders.hf.qwen_image.vae import AutoencoderKLQwenImageVAE
from lightx2v.utils.profiler import ProfilingContext
from lightx2v.utils.profiler import *
from lightx2v.utils.registry_factory import RUNNER_REGISTER
......@@ -32,7 +32,7 @@ class QwenImageRunner(DefaultRunner):
def __init__(self, config):
super().__init__(config)
@ProfilingContext("Load models")
@ProfilingContext4DebugL2("Load models")
def load_model(self):
self.model = self.load_transformer()
self.text_encoders = self.load_text_encoder()
......@@ -69,7 +69,7 @@ class QwenImageRunner(DefaultRunner):
else:
assert NotImplementedError
@ProfilingContext("Run DiT")
@ProfilingContext4DebugL2("Run DiT")
def _run_dit_local(self, total_steps=None):
if self.config.get("lazy_load", False) or self.config.get("unload_modules", False):
self.model = self.load_transformer()
......@@ -81,7 +81,7 @@ class QwenImageRunner(DefaultRunner):
self.end_run()
return latents, generator
@ProfilingContext("Run Encoders")
@ProfilingContext4DebugL2("Run Encoders")
def _run_input_encoder_local_t2i(self):
prompt = self.config["prompt_enhanced"] if self.config["use_prompt_enhancer"] else self.config["prompt"]
text_encoder_output = self.run_text_encoder(prompt)
......@@ -92,7 +92,7 @@ class QwenImageRunner(DefaultRunner):
"image_encoder_output": None,
}
@ProfilingContext("Run Encoders")
@ProfilingContext4DebugL2("Run Encoders")
def _run_input_encoder_local_i2i(self):
image = Image.open(self.config["image_path"])
prompt = self.config["prompt_enhanced"] if self.config["use_prompt_enhancer"] else self.config["prompt"]
......@@ -125,20 +125,18 @@ class QwenImageRunner(DefaultRunner):
return {"image_latents": image_latents}
def run(self, total_steps=None):
from lightx2v.utils.profiler import ProfilingContext4Debug
if total_steps is None:
total_steps = self.model.scheduler.infer_steps
for step_index in range(total_steps):
logger.info(f"==> step_index: {step_index + 1} / {total_steps}")
with ProfilingContext4Debug("step_pre"):
with ProfilingContext4DebugL1("step_pre"):
self.model.scheduler.step_pre(step_index=step_index)
with ProfilingContext4Debug("🚀 infer_main"):
with ProfilingContext4DebugL1("🚀 infer_main"):
self.model.infer(self.inputs)
with ProfilingContext4Debug("step_post"):
with ProfilingContext4DebugL1("step_post"):
self.model.scheduler.step_post()
if self.progress_callback:
......@@ -181,7 +179,7 @@ class QwenImageRunner(DefaultRunner):
def run_image_encoder(self):
pass
@ProfilingContext("Load models")
@ProfilingContext4DebugL2("Load models")
def load_model(self):
self.model = self.load_transformer()
self.text_encoders = self.load_text_encoder()
......@@ -189,7 +187,7 @@ class QwenImageRunner(DefaultRunner):
self.vae = self.load_vae()
self.vfi_model = self.load_vfi_model() if "video_frame_interpolation" in self.config else None
@ProfilingContext("Run VAE Decoder")
@ProfilingContext4DebugL1("Run VAE Decoder")
def _run_vae_decoder_local(self, latents, generator):
if self.config.get("lazy_load", False) or self.config.get("unload_modules", False):
self.vae_decoder = self.load_vae()
......
......@@ -25,7 +25,7 @@ from lightx2v.models.runners.wan.wan_runner import WanRunner
from lightx2v.models.schedulers.wan.audio.scheduler import EulerScheduler
from lightx2v.models.video_encoders.hf.wan.vae_2_2 import Wan2_2_VAE
from lightx2v.utils.envs import *
from lightx2v.utils.profiler import ProfilingContext, ProfilingContext4Debug
from lightx2v.utils.profiler import *
from lightx2v.utils.registry_factory import RUNNER_REGISTER
from lightx2v.utils.utils import find_torch_model_path, load_weights, save_to_video, vae_to_comfyui_image
......@@ -368,7 +368,7 @@ class WanAudioRunner(WanRunner): # type:ignore
gc.collect()
return vae_encoder_out
@ProfilingContext("Run Encoders")
@ProfilingContext4DebugL2("Run Encoders")
def _run_input_encoder_local_r2v_audio(self):
prompt = self.config["prompt_enhanced"] if self.config["use_prompt_enhancer"] else self.config["prompt"]
img = self.read_image_input(self.config["image_path"])
......@@ -410,7 +410,7 @@ class WanAudioRunner(WanRunner): # type:ignore
self.vae_encoder = self.load_vae_encoder()
_, nframe, height, width = self.model.scheduler.latents.shape
with ProfilingContext4Debug("vae_encoder in init run segment"):
with ProfilingContext4DebugL1("vae_encoder in init run segment"):
if self.config.model_cls == "wan2.2_audio":
if prev_video is not None:
prev_latents = self.vae_encoder.encode(prev_frames.to(dtype))
......@@ -460,7 +460,7 @@ class WanAudioRunner(WanRunner): # type:ignore
self.cut_audio_list = []
self.prev_video = None
@ProfilingContext4Debug("Init run segment")
@ProfilingContext4DebugL1("Init run segment")
def init_run_segment(self, segment_idx, audio_array=None):
self.segment_idx = segment_idx
if audio_array is not None:
......@@ -485,7 +485,7 @@ class WanAudioRunner(WanRunner): # type:ignore
if segment_idx > 0:
self.model.scheduler.reset(self.inputs["previmg_encoder_output"])
@ProfilingContext4Debug("End run segment")
@ProfilingContext4DebugL1("End run segment")
def end_run_segment(self):
self.gen_video = torch.clamp(self.gen_video, -1, 1).to(torch.float)
useful_length = self.segment.end_frame - self.segment.start_frame
......@@ -575,7 +575,7 @@ class WanAudioRunner(WanRunner): # type:ignore
max_fail_count = 10
while True:
with ProfilingContext4Debug(f"stream segment get audio segment {segment_idx}"):
with ProfilingContext4DebugL1(f"stream segment get audio segment {segment_idx}"):
self.check_stop()
audio_array = self.va_reader.get_audio_segment(timeout=fetch_timeout)
if audio_array is None:
......@@ -585,7 +585,7 @@ class WanAudioRunner(WanRunner): # type:ignore
raise Exception(f"Failed to get audio chunk {fail_count} times, stop reader")
continue
with ProfilingContext4Debug(f"stream segment end2end {segment_idx}"):
with ProfilingContext4DebugL1(f"stream segment end2end {segment_idx}"):
fail_count = 0
self.init_run_segment(segment_idx, audio_array)
latents = self.run_segment(total_steps=None)
......@@ -603,7 +603,7 @@ class WanAudioRunner(WanRunner): # type:ignore
self.va_recorder.stop(wait=False)
self.va_recorder = None
@ProfilingContext4Debug("Process after vae decoder")
@ProfilingContext4DebugL1("Process after vae decoder")
def process_images_after_vae_decoder(self, save_video=True):
# Merge results
gen_lvideo = torch.cat(self.gen_video_list, dim=2).float()
......@@ -728,12 +728,12 @@ class WanAudioRunner(WanRunner): # type:ignore
audio_adapter.load_state_dict(weights_dict, strict=False)
return audio_adapter.to(dtype=GET_DTYPE())
@ProfilingContext("Load models")
def load_model(self):
super().load_model()
self.audio_encoder = self.load_audio_encoder()
self.audio_adapter = self.load_audio_adapter()
self.model.set_audio_adapter(self.audio_adapter)
with ProfilingContext4DebugL2("Load audio encoder and adapter"):
self.audio_encoder = self.load_audio_encoder()
self.audio_adapter = self.load_audio_adapter()
self.model.set_audio_adapter(self.audio_adapter)
def set_target_shape(self):
"""Set target shape for generation"""
......
......@@ -9,7 +9,7 @@ from lightx2v.models.networks.wan.model import WanModel
from lightx2v.models.runners.wan.wan_runner import WanRunner
from lightx2v.models.schedulers.wan.step_distill.scheduler import WanStepDistillScheduler
from lightx2v.utils.envs import *
from lightx2v.utils.profiler import ProfilingContext4Debug
from lightx2v.utils.profiler import *
from lightx2v.utils.registry_factory import RUNNER_REGISTER
......@@ -85,11 +85,11 @@ class WanCausVidRunner(WanRunner):
if fragment_idx > 0:
logger.info("recompute the kv_cache ...")
with ProfilingContext4Debug("step_pre"):
with ProfilingContext4DebugL1("step_pre"):
self.model.scheduler.latents = self.model.scheduler.last_sample
self.model.scheduler.step_pre(step_index=self.model.scheduler.infer_steps - 1)
with ProfilingContext4Debug("🚀 infer_main"):
with ProfilingContext4DebugL1("🚀 infer_main"):
self.model.infer(self.inputs, kv_start, kv_end)
kv_start += self.num_frame_per_block * self.frame_seq_length
......@@ -105,13 +105,13 @@ class WanCausVidRunner(WanRunner):
for step_index in range(self.model.scheduler.infer_steps):
logger.info(f"==> step_index: {step_index + 1} / {self.model.scheduler.infer_steps}")
with ProfilingContext4Debug("step_pre"):
with ProfilingContext4DebugL1("step_pre"):
self.model.scheduler.step_pre(step_index=step_index)
with ProfilingContext4Debug("🚀 infer_main"):
with ProfilingContext4DebugL1("🚀 infer_main"):
self.model.infer(self.inputs, kv_start, kv_end)
with ProfilingContext4Debug("step_post"):
with ProfilingContext4DebugL1("step_post"):
self.model.scheduler.step_post()
kv_start += self.num_frame_per_block * self.frame_seq_length
......
......@@ -10,7 +10,7 @@ from loguru import logger
from lightx2v.models.runners.wan.wan_runner import WanRunner
from lightx2v.models.schedulers.wan.df.skyreels_v2_df_scheduler import WanSkyreelsV2DFScheduler
from lightx2v.utils.envs import *
from lightx2v.utils.profiler import ProfilingContext, ProfilingContext4Debug
from lightx2v.utils.profiler import *
from lightx2v.utils.registry_factory import RUNNER_REGISTER
......@@ -55,9 +55,9 @@ class WanSkyreelsV2DFRunner(WanRunner): # Diffustion foring for SkyReelsV2 DF I
def run_input_encoder(self):
image_encoder_output = None
if os.path.isfile(self.config.image_path):
with ProfilingContext("Run Img Encoder"):
with ProfilingContext4DebugL2("Run Img Encoder"):
image_encoder_output = self.run_image_encoder(self.config, self.image_encoder, self.vae_model)
with ProfilingContext("Run Text Encoder"):
with ProfilingContext4DebugL2("Run Text Encoder"):
text_encoder_output = self.run_text_encoder(self.config["prompt"], self.text_encoders, self.config, image_encoder_output)
self.set_target_shape()
self.inputs = {"text_encoder_output": text_encoder_output, "image_encoder_output": image_encoder_output}
......@@ -107,13 +107,13 @@ class WanSkyreelsV2DFRunner(WanRunner): # Diffustion foring for SkyReelsV2 DF I
for step_index in range(self.model.scheduler.infer_steps):
logger.info(f"==> step_index: {step_index + 1} / {self.model.scheduler.infer_steps}")
with ProfilingContext4Debug("step_pre"):
with ProfilingContext4DebugL1("step_pre"):
self.model.scheduler.step_pre(step_index=step_index)
with ProfilingContext4Debug("🚀 infer_main"):
with ProfilingContext4DebugL1("🚀 infer_main"):
self.model.infer(self.inputs)
with ProfilingContext4Debug("step_post"):
with ProfilingContext4DebugL1("step_post"):
self.model.scheduler.step_post()
videos = self.run_vae(self.model.scheduler.latents, self.model.scheduler.generator)
......
......@@ -9,7 +9,7 @@ from lightx2v.models.input_encoders.hf.vace.vace_processor import VaceVideoProce
from lightx2v.models.networks.wan.vace_model import WanVaceModel
from lightx2v.models.runners.wan.wan_runner import WanRunner
from lightx2v.utils.envs import *
from lightx2v.utils.profiler import ProfilingContext
from lightx2v.utils.profiler import *
from lightx2v.utils.registry_factory import RUNNER_REGISTER
......@@ -159,7 +159,7 @@ class WanVaceRunner(WanRunner):
target_shape[0] = int(target_shape[0] / 2)
self.config.target_shape = target_shape
@ProfilingContext("Run VAE Decoder")
@ProfilingContext4DebugL1("Run VAE Decoder")
def run_vae_decoder(self, latents):
if self.config.get("lazy_load", False) or self.config.get("unload_modules", False):
self.vae_decoder = self.load_vae_decoder()
......
......@@ -4,7 +4,7 @@ from typing import List, Optional, Tuple
import torch
from torch.nn import functional as F
from lightx2v.utils.profiler import ProfilingContext
from lightx2v.utils.profiler import *
class RIFEWrapper:
......@@ -25,12 +25,12 @@ class RIFEWrapper:
from .train_log.RIFE_HDv3 import Model
self.model = Model()
with ProfilingContext("Load RIFE model"):
with ProfilingContext4DebugL2("Load RIFE model"):
self.model.load_model(model_path, -1)
self.model.eval()
self.model.device()
@ProfilingContext("Interpolate frames")
@ProfilingContext4DebugL2("Interpolate frames")
def interpolate_frames(
self,
images: torch.Tensor,
......
......@@ -17,9 +17,9 @@ DTYPE_MAP = {
@lru_cache(maxsize=None)
def CHECK_ENABLE_PROFILING_DEBUG():
ENABLE_PROFILING_DEBUG = os.getenv("ENABLE_PROFILING_DEBUG", "false").lower() == "true"
return ENABLE_PROFILING_DEBUG
def CHECK_PROFILING_DEBUG_LEVEL(target_level):
current_level = int(os.getenv("PROFILING_DEBUG_LEVEL", "0"))
return current_level >= target_level
@lru_cache(maxsize=None)
......
......@@ -12,7 +12,6 @@ from lightx2v.utils.envs import *
class _ProfilingContext:
def __init__(self, name):
self.name = name
self.rank_info = ""
if dist.is_initialized():
self.rank_info = f"Rank {dist.get_rank()}"
else:
......@@ -80,5 +79,24 @@ class _NullContext:
return func
ProfilingContext = _ProfilingContext
ProfilingContext4Debug = _ProfilingContext if CHECK_ENABLE_PROFILING_DEBUG() else _NullContext
class _ProfilingContextL1(_ProfilingContext):
"""Level 1 profiling context with Level1_Log prefix."""
def __init__(self, name):
super().__init__(f"Level1_Log {name}")
class _ProfilingContextL2(_ProfilingContext):
"""Level 2 profiling context with Level2_Log prefix."""
def __init__(self, name):
super().__init__(f"Level2_Log {name}")
"""
PROFILING_DEBUG_LEVEL=0: [Default] disable all profiling
PROFILING_DEBUG_LEVEL=1: enable ProfilingContext4DebugL1
PROFILING_DEBUG_LEVEL=2: enable ProfilingContext4DebugL1 and ProfilingContext4DebugL2
"""
ProfilingContext4DebugL1 = _ProfilingContextL1 if CHECK_PROFILING_DEBUG_LEVEL(1) else _NullContext # if user >= 1, enable profiling
ProfilingContext4DebugL2 = _ProfilingContextL2 if CHECK_PROFILING_DEBUG_LEVEL(2) else _NullContext # if user >= 2, enable profiling
......@@ -4,7 +4,7 @@ import torch
from loguru import logger
from transformers import AutoModelForCausalLM, AutoTokenizer
from lightx2v.utils.profiler import ProfilingContext
from lightx2v.utils.profiler import *
sys_prompt = """
Transform the short prompt into a detailed video-generation caption using this structure:
......@@ -40,7 +40,7 @@ class PromptEnhancer:
def to_device(self, device):
self.model = self.model.to(device)
@ProfilingContext("Run prompt enhancer")
@ProfilingContext4DebugL1("Run prompt enhancer")
@torch.no_grad()
def __call__(self, prompt):
prompt = prompt.strip()
......
......@@ -32,12 +32,12 @@ export DTYPE=BF16
# Note: If set to FP32, it will be slower, so we recommend set ENABLE_GRAPH_MODE to true.
export SENSITIVE_LAYER_DTYPE=FP32
# Performance Profiling Debug Mode (Debug Only)
# Performance Profiling Debug Level (Debug Only)
# Enables detailed performance analysis output, such as time cost and memory usage
# Available options: [true, false]
# If not set, default value: false
# Note: This option can be set to false for production.
export ENABLE_PROFILING_DEBUG=true
# Available options: [0, 1, 2]
# If not set, default value: 0
# Note: This option can be set to 0 for production.
export PROFILING_DEBUG_LEVEL=2
# Graph Mode Optimization (Performance Enhancement)
# Enables torch.compile for graph optimization, can improve inference performance
......@@ -56,6 +56,6 @@ echo "model_path: ${model_path}"
echo "-------------------------------------------------------------------------------"
echo "Model Inference Data Type: ${DTYPE}"
echo "Sensitive Layer Data Type: ${SENSITIVE_LAYER_DTYPE}"
echo "Performance Profiling Debug Mode: ${ENABLE_PROFILING_DEBUG}"
echo "Performance Profiling Debug Level: ${PROFILING_DEBUG_LEVEL}"
echo "Graph Mode Optimization: ${ENABLE_GRAPH_MODE}"
echo "==============================================================================="
......@@ -27,7 +27,7 @@ export TOKENIZERS_PARALLELISM=false
export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
export DTYPE=BF16
export SENSITIVE_LAYER_DTYPE=FP32
export ENABLE_PROFILING_DEBUG=true
export PROFILING_DEBUG_LEVEL=2
export ENABLE_GRAPH_MODE=false
python -m lightx2v.infer \
......
......@@ -26,7 +26,7 @@ export TOKENIZERS_PARALLELISM=false
export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
export ENABLE_PROFILING_DEBUG=true
export PROFILING_DEBUG_LEVEL=2
export ENABLE_GRAPH_MODE=false
export DTYPE=BF16
......
......@@ -26,7 +26,7 @@ export TOKENIZERS_PARALLELISM=false
export PYTHONPATH=${lightx2v_path}:$PYTHONPATH
export ENABLE_PROFILING_DEBUG=true
export PROFILING_DEBUG_LEVEL=2
export ENABLE_GRAPH_MODE=false
export DTYPE=BF16
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment