[Chore] Remove debug code in model implementation (#35883)

Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>

[Chore] Remove debug code in model implementation (#35883)
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
6e9f21e8 · Isotr0py · GitHub · c1d96340 · 6e9f21e8 · 6e9f21e8
Unverified Commit 6e9f21e8 authored Mar 04, 2026 by Isotr0py Committed by GitHub Mar 03, 2026
Showing with 0 additions and 178 deletions

vllm/model_executor/models/funaudiochat.py vllm/model_executor/models/funaudiochat.py +0 -80

vllm/model_executor/models/nano_nemotron_vl.py vllm/model_executor/models/nano_nemotron_vl.py +0 -98

No files found.
--- a/vllm/model_executor/models/funaudiochat.py
+++ b/vllm/model_executor/models/funaudiochat.py
@@ -13,7 +13,6 @@ positions via `inputs_embeds`, while `position_ids` (RoPE) remains standard 1D.

 from __future__ import annotations

-import os
 from collections.abc import Iterable, Mapping, Sequence
 from functools import cached_property
 from typing import Any
@@ -924,53 +923,6 @@ class FunAudioChatForConditionalGeneration(nn.Module, SupportsMultiModal, Suppor
                    f"sequence of Tensors (got {type(speech_attention_mask)})"
                )

-        debug = os.getenv("VLLM_FUN_AUDIOCHAT_DEBUG", "") == "1"
-        if debug:
-            print(
-                f"[FunAudioChat] embed_multimodal speech_ids={tuple(speech_ids.shape)} "
-                f"speech_attention_mask={tuple(speech_attention_mask.shape)}",
-                flush=True,
-            )
-            attn_impl = getattr(
-                self.continuous_audio_tower.config, "_attn_implementation", None
-            )
-            print(
-                f"[FunAudioChat] audio_attn_impl={attn_impl}",
-                flush=True,
-            )
-            if hasattr(self.continuous_audio_tower, "conv1"):
-                conv1_w = self.continuous_audio_tower.conv1.weight
-                print(
-                    f"[FunAudioChat] conv1_w_norm={float(conv1_w.norm().item()):.6g}",
-                    flush=True,
-                )
-            try:
-                attn0 = self.continuous_audio_tower.layers[0].self_attn
-                q_norm = float(attn0.q_proj.weight.norm().item())
-                k_norm = float(attn0.k_proj.weight.norm().item())
-                v_norm = float(attn0.v_proj.weight.norm().item())
-                o_norm = float(attn0.out_proj.weight.norm().item())
-                print(
-                    f"[FunAudioChat] attn0_q_norm={q_norm:.6g} "
-                    f"k_norm={k_norm:.6g} "
-                    f"v_norm={v_norm:.6g} "
-                    f"o_norm={o_norm:.6g}",
-                    flush=True,
-                )
-            except Exception:
-                pass
-            if isinstance(input_features, torch.Tensor):
-                print(
-                    f"[FunAudioChat] input_features={tuple(input_features.shape)}",
-                    flush=True,
-                )
-            if isinstance(feature_attention_mask, torch.Tensor):
-                print(
-                    "[FunAudioChat] feature_attention_mask="
-                    f"{tuple(feature_attention_mask.shape)}",
-                    flush=True,
-                )
-
        group_size = int(self.audio_tower.group_size)
        speech_maxlen = int(speech_ids.shape[-1])

@@ -1019,38 +971,6 @@ class FunAudioChatForConditionalGeneration(nn.Module, SupportsMultiModal, Suppor
        embeds = tuple(
            audio_features[i, : int(length)] for i, length in enumerate(lengths)
        )
-        if debug:
-            embed_lens = [int(t.shape[0]) for t in embeds]
-            print(f"[FunAudioChat] embed_multimodal out_lens={embed_lens}", flush=True)
-            if embeds:
-                t0 = embeds[0]
-                print(
-                    f"[FunAudioChat] embed0 dtype={t0.dtype} device={t0.device} "
-                    f"nan={bool(torch.isnan(t0).any())} "
-                    f"norm={float(t0.norm().item()):.6g}",
-                    flush=True,
-                )
-            dump_path = os.getenv("VLLM_FUN_AUDIOCHAT_DUMP_PATH", "")
-            if (
-                dump_path
-                and speech_ids.shape[0] == 1
-                and len(embeds) == 1
-                and embed_lens[0] > 10
-            ):
-                if not os.path.exists(dump_path):
-                    np.save(dump_path, embeds[0].detach().float().cpu().numpy())
-                    print(f"[FunAudioChat] dumped embeds to {dump_path}", flush=True)
-                cont_path = dump_path.replace(".npy", "_cont.npy")
-                if continuous_audio_features is not None and not os.path.exists(
-                    cont_path
-                ):
-                    np.save(
-                        cont_path,
-                        continuous_audio_features.detach().float().cpu().numpy(),
-                    )
-                    print(
-                        f"[FunAudioChat] dumped continuous to {cont_path}", flush=True
-                    )
        return embeds

    def forward(

--- a/vllm/model_executor/models/nano_nemotron_vl.py
+++ b/vllm/model_executor/models/nano_nemotron_vl.py
@@ -2225,104 +2225,6 @@ class NemotronH_Nano_VL_V2(
            assert len(sound_weights) > 0
            self.sound_encoder.load_weights(sound_weights)

-    def print_architecture(self, detailed: bool = True, save_to_file: str = None):
-        """
-        Print model architecture with parameter names, shapes, and sizes.
-
-        Args:
-            detailed: If True, show detailed parameter breakdown
-            save_to_file: If provided, save output to this file path
-        """
-        import sys
-        from io import StringIO
-
-        # Capture output if saving to file
-        original_stdout = sys.stdout
-        if save_to_file:
-            sys.stdout = StringIO()
-
-        try:
-            print("=" * 100)
-            print("NemotronH_Nano_VL_V2 Model Architecture")
-            print("=" * 100)
-
-            total_params = 0
-            param_groups = {
-                "language_model": [],
-                "vision_model": [],
-                "mlp1": [],
-                "other": [],
-            }
-
-            for name, param in self.named_parameters():
-                param_size = param.numel()
-                total_params += param_size
-
-                # Group parameters by main component
-                if name.startswith("language_model"):
-                    param_groups["language_model"].append(
-                        (name, param.shape, param_size, param.dtype)
-                    )
-                elif name.startswith("vision_model"):
-                    param_groups["vision_model"].append(
-                        (name, param.shape, param_size, param.dtype)
-                    )
-                elif name.startswith("mlp1"):
-                    param_groups["mlp1"].append(
-                        (name, param.shape, param_size, param.dtype)
-                    )
-                else:
-                    param_groups["other"].append(
-                        (name, param.shape, param_size, param.dtype)
-                    )
-
-                if detailed:
-                    print(
-                        f"{name:<70} | Shape: {str(param.shape):<25} | "
-                        f"Size: {param_size:>12,} | Dtype: {param.dtype}"
-                    )
-
-            print("=" * 100)
-            print("Summary by Component:")
-            print("-" * 60)
-
-            for component, params in param_groups.items():
-                if params:  # Only show components that have parameters
-                    component_total = sum(size for _, _, size, _ in params)
-                    percentage = (
-                        (component_total / total_params) * 100
-                        if total_params > 0
-                        else 0
-                    )
-                    print(
-                        f"{component:<20} | Parameters: {len(params):>4} | "
-                        f"Total Size: {component_total:>15,} | "
-                        f"{percentage:>6.2f}%"
-                    )
-
-            print("-" * 60)
-            print(f"{'Total Parameters':<20} | {total_params:>15,}")
-
-            # Estimate memory usage (assuming bfloat16 = 2 bytes per parameter)
-            memory_mb = total_params * 2 / (1024**2)
-            memory_gb = memory_mb / 1024
-            print(f"{'Est. Memory (MB)':<20} | {memory_mb:>15.2f}")
-            print(f"{'Est. Memory (GB)':<20} | {memory_gb:>15.2f}")
-            print("=" * 100)
-
-            # Save to file if requested
-            if save_to_file:
-                output = sys.stdout.getvalue()
-                sys.stdout = original_stdout
-                with open(save_to_file, "w") as f:
-                    f.write(output)
-                print(f"Architecture saved to: {save_to_file}")
-                print(output)  # Also print to console
-
-        finally:
-            if save_to_file and sys.stdout != original_stdout:
-                sys.stdout = original_stdout
-
    def get_vit_model_from_radio_config(self, hf_config):
        hf_config_vision = hf_config.vision_config
        model_name = hf_config_vision.args.get("model")