Unverified Commit 43ce8608 authored by Alec's avatar Alec Committed by GitHub
Browse files

fix: prefix omni/diffusion CLI flags with --omni- to avoid vLLM collisions (#6476)


Signed-off-by: default avataralec-flowers <aflowers@nvidia.com>
Co-authored-by: default avatarClaude Opus 4.6 <noreply@anthropic.com>
parent e66f3267
...@@ -148,17 +148,20 @@ class DynamoVllmArgGroup(ArgGroup): ...@@ -148,17 +148,20 @@ class DynamoVllmArgGroup(ArgGroup):
help="Default frames per second for generated videos.", help="Default frames per second for generated videos.",
) )
# Diffusion engine-level args (passed to AsyncOmni constructor) # Diffusion engine-level args (passed to AsyncOmni constructor).
# All flags use the --omni- prefix to avoid collisions with vLLM's
# native engine flags (e.g. --enforce-eager), which are parsed by a
# separate argparse pass and would otherwise be silently consumed here.
add_negatable_bool_argument( add_negatable_bool_argument(
g, g,
flag_name="--enable-layerwise-offload", flag_name="--omni-enable-layerwise-offload",
env_var="DYN_VLLM_ENABLE_LAYERWISE_OFFLOAD", env_var="DYN_VLLM_ENABLE_LAYERWISE_OFFLOAD",
default=False, default=False,
help="Enable layerwise (blockwise) offloading on DiT modules to reduce GPU memory.", help="Enable layerwise (blockwise) offloading on DiT modules to reduce GPU memory.",
) )
add_argument( add_argument(
g, g,
flag_name="--layerwise-num-gpu-layers", flag_name="--omni-layerwise-num-gpu-layers",
env_var="DYN_VLLM_LAYERWISE_NUM_GPU_LAYERS", env_var="DYN_VLLM_LAYERWISE_NUM_GPU_LAYERS",
default=1, default=1,
arg_type=int, arg_type=int,
...@@ -166,21 +169,21 @@ class DynamoVllmArgGroup(ArgGroup): ...@@ -166,21 +169,21 @@ class DynamoVllmArgGroup(ArgGroup):
) )
add_negatable_bool_argument( add_negatable_bool_argument(
g, g,
flag_name="--vae-use-slicing", flag_name="--omni-vae-use-slicing",
env_var="DYN_VLLM_VAE_USE_SLICING", env_var="DYN_VLLM_VAE_USE_SLICING",
default=False, default=False,
help="Enable VAE slicing for memory optimization in diffusion models.", help="Enable VAE slicing for memory optimization in diffusion models.",
) )
add_negatable_bool_argument( add_negatable_bool_argument(
g, g,
flag_name="--vae-use-tiling", flag_name="--omni-vae-use-tiling",
env_var="DYN_VLLM_VAE_USE_TILING", env_var="DYN_VLLM_VAE_USE_TILING",
default=False, default=False,
help="Enable VAE tiling for memory optimization in diffusion models.", help="Enable VAE tiling for memory optimization in diffusion models.",
) )
add_argument( add_argument(
g, g,
flag_name="--boundary-ratio", flag_name="--omni-boundary-ratio",
env_var="DYN_VLLM_BOUNDARY_RATIO", env_var="DYN_VLLM_BOUNDARY_RATIO",
default=0.875, default=0.875,
arg_type=float, arg_type=float,
...@@ -193,7 +196,7 @@ class DynamoVllmArgGroup(ArgGroup): ...@@ -193,7 +196,7 @@ class DynamoVllmArgGroup(ArgGroup):
) )
add_argument( add_argument(
g, g,
flag_name="--flow-shift", flag_name="--omni-flow-shift",
env_var="DYN_VLLM_FLOW_SHIFT", env_var="DYN_VLLM_FLOW_SHIFT",
default=None, default=None,
arg_type=float, arg_type=float,
...@@ -201,7 +204,7 @@ class DynamoVllmArgGroup(ArgGroup): ...@@ -201,7 +204,7 @@ class DynamoVllmArgGroup(ArgGroup):
) )
add_argument( add_argument(
g, g,
flag_name="--diffusion-cache-backend", flag_name="--omni-diffusion-cache-backend",
env_var="DYN_VLLM_DIFFUSION_CACHE_BACKEND", env_var="DYN_VLLM_DIFFUSION_CACHE_BACKEND",
default=None, default=None,
choices=["cache_dit", "tea_cache"], choices=["cache_dit", "tea_cache"],
...@@ -213,28 +216,28 @@ class DynamoVllmArgGroup(ArgGroup): ...@@ -213,28 +216,28 @@ class DynamoVllmArgGroup(ArgGroup):
) )
add_argument( add_argument(
g, g,
flag_name="--diffusion-cache-config", flag_name="--omni-diffusion-cache-config",
env_var="DYN_VLLM_DIFFUSION_CACHE_CONFIG", env_var="DYN_VLLM_DIFFUSION_CACHE_CONFIG",
default=None, default=None,
help="Cache configuration as JSON string (overrides defaults). Only used with --omni.", help="Cache configuration as JSON string (overrides defaults). Only used with --omni.",
) )
add_negatable_bool_argument( add_negatable_bool_argument(
g, g,
flag_name="--enable-cache-dit-summary", flag_name="--omni-enable-cache-dit-summary",
env_var="DYN_VLLM_ENABLE_CACHE_DIT_SUMMARY", env_var="DYN_VLLM_ENABLE_CACHE_DIT_SUMMARY",
default=False, default=False,
help="Enable cache-dit summary logging after diffusion forward passes.", help="Enable cache-dit summary logging after diffusion forward passes.",
) )
add_negatable_bool_argument( add_negatable_bool_argument(
g, g,
flag_name="--enable-cpu-offload", flag_name="--omni-enable-cpu-offload",
env_var="DYN_VLLM_ENABLE_CPU_OFFLOAD", env_var="DYN_VLLM_ENABLE_CPU_OFFLOAD",
default=False, default=False,
help="Enable CPU offloading for diffusion models to reduce GPU memory usage.", help="Enable CPU offloading for diffusion models to reduce GPU memory usage.",
) )
add_negatable_bool_argument( add_negatable_bool_argument(
g, g,
flag_name="--enforce-eager", flag_name="--omni-enforce-eager",
env_var="DYN_VLLM_ENFORCE_EAGER", env_var="DYN_VLLM_ENFORCE_EAGER",
default=False, default=False,
help="Disable torch.compile and force eager execution for diffusion models.", help="Disable torch.compile and force eager execution for diffusion models.",
...@@ -242,7 +245,7 @@ class DynamoVllmArgGroup(ArgGroup): ...@@ -242,7 +245,7 @@ class DynamoVllmArgGroup(ArgGroup):
# Diffusion parallel configuration # Diffusion parallel configuration
add_argument( add_argument(
g, g,
flag_name="--ulysses-degree", flag_name="--omni-ulysses-degree",
env_var="DYN_VLLM_ULYSSES_DEGREE", env_var="DYN_VLLM_ULYSSES_DEGREE",
default=1, default=1,
arg_type=int, arg_type=int,
...@@ -250,7 +253,7 @@ class DynamoVllmArgGroup(ArgGroup): ...@@ -250,7 +253,7 @@ class DynamoVllmArgGroup(ArgGroup):
) )
add_argument( add_argument(
g, g,
flag_name="--ring-degree", flag_name="--omni-ring-degree",
env_var="DYN_VLLM_RING_DEGREE", env_var="DYN_VLLM_RING_DEGREE",
default=1, default=1,
arg_type=int, arg_type=int,
...@@ -258,7 +261,7 @@ class DynamoVllmArgGroup(ArgGroup): ...@@ -258,7 +261,7 @@ class DynamoVllmArgGroup(ArgGroup):
) )
add_argument( add_argument(
g, g,
flag_name="--cfg-parallel-size", flag_name="--omni-cfg-parallel-size",
env_var="DYN_VLLM_CFG_PARALLEL_SIZE", env_var="DYN_VLLM_CFG_PARALLEL_SIZE",
default=1, default=1,
arg_type=int, arg_type=int,
...@@ -313,22 +316,25 @@ class DynamoVllmConfig(ConfigBase): ...@@ -313,22 +316,25 @@ class DynamoVllmConfig(ConfigBase):
# Video encoding # Video encoding
default_video_fps: int = 16 default_video_fps: int = 16
# Diffusion engine-level parameters (passed to AsyncOmni constructor) # Diffusion engine-level parameters (passed to AsyncOmni constructor).
enable_layerwise_offload: bool = False # Field names use omni_ prefix to match the --omni-* CLI flags and avoid
layerwise_num_gpu_layers: int = 1 # collisions with vLLM's native engine args (e.g. enforce_eager).
vae_use_slicing: bool = False omni_enable_layerwise_offload: bool = False
vae_use_tiling: bool = False omni_layerwise_num_gpu_layers: int = 1
boundary_ratio: float = 0.875 omni_vae_use_slicing: bool = False
flow_shift: Optional[float] = None omni_vae_use_tiling: bool = False
diffusion_cache_backend: Optional[str] = None omni_boundary_ratio: float = 0.875
diffusion_cache_config: Optional[str] = None omni_flow_shift: Optional[float] = None
enable_cache_dit_summary: bool = False omni_diffusion_cache_backend: Optional[str] = None
enable_cpu_offload: bool = False omni_diffusion_cache_config: Optional[str] = None
omni_enable_cache_dit_summary: bool = False
omni_enable_cpu_offload: bool = False
omni_enforce_eager: bool = False
# Diffusion parallel configuration # Diffusion parallel configuration
ulysses_degree: int = 1 omni_ulysses_degree: int = 1
ring_degree: int = 1 omni_ring_degree: int = 1
cfg_parallel_size: int = 1 omni_cfg_parallel_size: int = 1
# Headless mode for multi-node TP/PP # Headless mode for multi-node TP/PP
headless: bool = False headless: bool = False
......
...@@ -85,37 +85,36 @@ class BaseOmniHandler(BaseWorkerHandler): ...@@ -85,37 +85,36 @@ class BaseOmniHandler(BaseWorkerHandler):
if config.stage_configs_path: if config.stage_configs_path:
omni_kwargs["stage_configs_path"] = config.stage_configs_path omni_kwargs["stage_configs_path"] = config.stage_configs_path
# Add diffusion engine-level params if present on config # Add diffusion engine-level params if present on config.
diffusion_params = [ # Config fields use the omni_ prefix; map them to AsyncOmni kwarg names.
"enable_layerwise_offload", diffusion_params = {
"layerwise_num_gpu_layers", # config attr → AsyncOmni kwarg
"vae_use_slicing", "omni_enable_layerwise_offload": "enable_layerwise_offload",
"vae_use_tiling", "omni_layerwise_num_gpu_layers": "layerwise_num_gpu_layers",
"boundary_ratio", "omni_vae_use_slicing": "vae_use_slicing",
"flow_shift", "omni_vae_use_tiling": "vae_use_tiling",
"diffusion_cache_backend", "omni_boundary_ratio": "boundary_ratio",
"diffusion_cache_config", "omni_flow_shift": "flow_shift",
"enable_cache_dit_summary", "omni_diffusion_cache_backend": "cache_backend",
"enable_cpu_offload", "omni_diffusion_cache_config": "cache_config",
] "omni_enable_cache_dit_summary": "enable_cache_dit_summary",
for param in diffusion_params: "omni_enable_cpu_offload": "enable_cpu_offload",
if hasattr(config, param): "omni_enforce_eager": "enforce_eager",
value = getattr(config, param) }
for config_attr, kwarg_name in diffusion_params.items():
if hasattr(config, config_attr):
value = getattr(config, config_attr)
if value is not None: if value is not None:
# Map config attribute names to AsyncOmni kwarg names
kwarg_name = param
if param == "diffusion_cache_backend":
kwarg_name = "cache_backend"
elif param == "diffusion_cache_config":
kwarg_name = "cache_config"
omni_kwargs[kwarg_name] = value omni_kwargs[kwarg_name] = value
# Build DiffusionParallelConfig if parallel params are present # Build DiffusionParallelConfig if parallel params are present
if DiffusionParallelConfig is not None and hasattr(config, "ulysses_degree"): if DiffusionParallelConfig is not None and hasattr(
config, "omni_ulysses_degree"
):
parallel_config = DiffusionParallelConfig( parallel_config = DiffusionParallelConfig(
ulysses_degree=getattr(config, "ulysses_degree", 1), ulysses_degree=getattr(config, "omni_ulysses_degree", 1),
ring_degree=getattr(config, "ring_degree", 1), ring_degree=getattr(config, "omni_ring_degree", 1),
cfg_parallel_size=getattr(config, "cfg_parallel_size", 1), cfg_parallel_size=getattr(config, "omni_cfg_parallel_size", 1),
) )
omni_kwargs["parallel_config"] = parallel_config omni_kwargs["parallel_config"] = parallel_config
elif DiffusionParallelConfig is None: elif DiffusionParallelConfig is None:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment