Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
6e9f21e8
Unverified
Commit
6e9f21e8
authored
Mar 04, 2026
by
Isotr0py
Committed by
GitHub
Mar 03, 2026
Browse files
[Chore] Remove debug code in model implementation (#35883)
Signed-off-by:
Isotr0py
<
mozf@mail2.sysu.edu.cn
>
parent
c1d96340
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
0 additions
and
178 deletions
+0
-178
vllm/model_executor/models/funaudiochat.py
vllm/model_executor/models/funaudiochat.py
+0
-80
vllm/model_executor/models/nano_nemotron_vl.py
vllm/model_executor/models/nano_nemotron_vl.py
+0
-98
No files found.
vllm/model_executor/models/funaudiochat.py
View file @
6e9f21e8
...
...
@@ -13,7 +13,6 @@ positions via `inputs_embeds`, while `position_ids` (RoPE) remains standard 1D.
from
__future__
import
annotations
import
os
from
collections.abc
import
Iterable
,
Mapping
,
Sequence
from
functools
import
cached_property
from
typing
import
Any
...
...
@@ -924,53 +923,6 @@ class FunAudioChatForConditionalGeneration(nn.Module, SupportsMultiModal, Suppor
f
"sequence of Tensors (got
{
type
(
speech_attention_mask
)
}
)"
)
debug
=
os
.
getenv
(
"VLLM_FUN_AUDIOCHAT_DEBUG"
,
""
)
==
"1"
if
debug
:
print
(
f
"[FunAudioChat] embed_multimodal speech_ids=
{
tuple
(
speech_ids
.
shape
)
}
"
f
"speech_attention_mask=
{
tuple
(
speech_attention_mask
.
shape
)
}
"
,
flush
=
True
,
)
attn_impl
=
getattr
(
self
.
continuous_audio_tower
.
config
,
"_attn_implementation"
,
None
)
print
(
f
"[FunAudioChat] audio_attn_impl=
{
attn_impl
}
"
,
flush
=
True
,
)
if
hasattr
(
self
.
continuous_audio_tower
,
"conv1"
):
conv1_w
=
self
.
continuous_audio_tower
.
conv1
.
weight
print
(
f
"[FunAudioChat] conv1_w_norm=
{
float
(
conv1_w
.
norm
().
item
()):.
6
g
}
"
,
flush
=
True
,
)
try
:
attn0
=
self
.
continuous_audio_tower
.
layers
[
0
].
self_attn
q_norm
=
float
(
attn0
.
q_proj
.
weight
.
norm
().
item
())
k_norm
=
float
(
attn0
.
k_proj
.
weight
.
norm
().
item
())
v_norm
=
float
(
attn0
.
v_proj
.
weight
.
norm
().
item
())
o_norm
=
float
(
attn0
.
out_proj
.
weight
.
norm
().
item
())
print
(
f
"[FunAudioChat] attn0_q_norm=
{
q_norm
:.
6
g
}
"
f
"k_norm=
{
k_norm
:.
6
g
}
"
f
"v_norm=
{
v_norm
:.
6
g
}
"
f
"o_norm=
{
o_norm
:.
6
g
}
"
,
flush
=
True
,
)
except
Exception
:
pass
if
isinstance
(
input_features
,
torch
.
Tensor
):
print
(
f
"[FunAudioChat] input_features=
{
tuple
(
input_features
.
shape
)
}
"
,
flush
=
True
,
)
if
isinstance
(
feature_attention_mask
,
torch
.
Tensor
):
print
(
"[FunAudioChat] feature_attention_mask="
f
"
{
tuple
(
feature_attention_mask
.
shape
)
}
"
,
flush
=
True
,
)
group_size
=
int
(
self
.
audio_tower
.
group_size
)
speech_maxlen
=
int
(
speech_ids
.
shape
[
-
1
])
...
...
@@ -1019,38 +971,6 @@ class FunAudioChatForConditionalGeneration(nn.Module, SupportsMultiModal, Suppor
embeds
=
tuple
(
audio_features
[
i
,
:
int
(
length
)]
for
i
,
length
in
enumerate
(
lengths
)
)
if
debug
:
embed_lens
=
[
int
(
t
.
shape
[
0
])
for
t
in
embeds
]
print
(
f
"[FunAudioChat] embed_multimodal out_lens=
{
embed_lens
}
"
,
flush
=
True
)
if
embeds
:
t0
=
embeds
[
0
]
print
(
f
"[FunAudioChat] embed0 dtype=
{
t0
.
dtype
}
device=
{
t0
.
device
}
"
f
"nan=
{
bool
(
torch
.
isnan
(
t0
).
any
())
}
"
f
"norm=
{
float
(
t0
.
norm
().
item
()):.
6
g
}
"
,
flush
=
True
,
)
dump_path
=
os
.
getenv
(
"VLLM_FUN_AUDIOCHAT_DUMP_PATH"
,
""
)
if
(
dump_path
and
speech_ids
.
shape
[
0
]
==
1
and
len
(
embeds
)
==
1
and
embed_lens
[
0
]
>
10
):
if
not
os
.
path
.
exists
(
dump_path
):
np
.
save
(
dump_path
,
embeds
[
0
].
detach
().
float
().
cpu
().
numpy
())
print
(
f
"[FunAudioChat] dumped embeds to
{
dump_path
}
"
,
flush
=
True
)
cont_path
=
dump_path
.
replace
(
".npy"
,
"_cont.npy"
)
if
continuous_audio_features
is
not
None
and
not
os
.
path
.
exists
(
cont_path
):
np
.
save
(
cont_path
,
continuous_audio_features
.
detach
().
float
().
cpu
().
numpy
(),
)
print
(
f
"[FunAudioChat] dumped continuous to
{
cont_path
}
"
,
flush
=
True
)
return
embeds
def
forward
(
...
...
vllm/model_executor/models/nano_nemotron_vl.py
View file @
6e9f21e8
...
...
@@ -2225,104 +2225,6 @@ class NemotronH_Nano_VL_V2(
assert
len
(
sound_weights
)
>
0
self
.
sound_encoder
.
load_weights
(
sound_weights
)
def
print_architecture
(
self
,
detailed
:
bool
=
True
,
save_to_file
:
str
=
None
):
"""
Print model architecture with parameter names, shapes, and sizes.
Args:
detailed: If True, show detailed parameter breakdown
save_to_file: If provided, save output to this file path
"""
import
sys
from
io
import
StringIO
# Capture output if saving to file
original_stdout
=
sys
.
stdout
if
save_to_file
:
sys
.
stdout
=
StringIO
()
try
:
print
(
"="
*
100
)
print
(
"NemotronH_Nano_VL_V2 Model Architecture"
)
print
(
"="
*
100
)
total_params
=
0
param_groups
=
{
"language_model"
:
[],
"vision_model"
:
[],
"mlp1"
:
[],
"other"
:
[],
}
for
name
,
param
in
self
.
named_parameters
():
param_size
=
param
.
numel
()
total_params
+=
param_size
# Group parameters by main component
if
name
.
startswith
(
"language_model"
):
param_groups
[
"language_model"
].
append
(
(
name
,
param
.
shape
,
param_size
,
param
.
dtype
)
)
elif
name
.
startswith
(
"vision_model"
):
param_groups
[
"vision_model"
].
append
(
(
name
,
param
.
shape
,
param_size
,
param
.
dtype
)
)
elif
name
.
startswith
(
"mlp1"
):
param_groups
[
"mlp1"
].
append
(
(
name
,
param
.
shape
,
param_size
,
param
.
dtype
)
)
else
:
param_groups
[
"other"
].
append
(
(
name
,
param
.
shape
,
param_size
,
param
.
dtype
)
)
if
detailed
:
print
(
f
"
{
name
:
<
70
}
| Shape:
{
str
(
param
.
shape
):
<
25
}
| "
f
"Size:
{
param_size
:
>
12
,
}
| Dtype:
{
param
.
dtype
}
"
)
print
(
"="
*
100
)
print
(
"Summary by Component:"
)
print
(
"-"
*
60
)
for
component
,
params
in
param_groups
.
items
():
if
params
:
# Only show components that have parameters
component_total
=
sum
(
size
for
_
,
_
,
size
,
_
in
params
)
percentage
=
(
(
component_total
/
total_params
)
*
100
if
total_params
>
0
else
0
)
print
(
f
"
{
component
:
<
20
}
| Parameters:
{
len
(
params
):
>
4
}
| "
f
"Total Size:
{
component_total
:
>
15
,
}
| "
f
"
{
percentage
:
>
6.2
f
}
%"
)
print
(
"-"
*
60
)
print
(
f
"
{
'Total Parameters'
:
<
20
}
|
{
total_params
:
>
15
,
}
"
)
# Estimate memory usage (assuming bfloat16 = 2 bytes per parameter)
memory_mb
=
total_params
*
2
/
(
1024
**
2
)
memory_gb
=
memory_mb
/
1024
print
(
f
"
{
'Est. Memory (MB)'
:
<
20
}
|
{
memory_mb
:
>
15.2
f
}
"
)
print
(
f
"
{
'Est. Memory (GB)'
:
<
20
}
|
{
memory_gb
:
>
15.2
f
}
"
)
print
(
"="
*
100
)
# Save to file if requested
if
save_to_file
:
output
=
sys
.
stdout
.
getvalue
()
sys
.
stdout
=
original_stdout
with
open
(
save_to_file
,
"w"
)
as
f
:
f
.
write
(
output
)
print
(
f
"Architecture saved to:
{
save_to_file
}
"
)
print
(
output
)
# Also print to console
finally
:
if
save_to_file
and
sys
.
stdout
!=
original_stdout
:
sys
.
stdout
=
original_stdout
def
get_vit_model_from_radio_config
(
self
,
hf_config
):
hf_config_vision
=
hf_config
.
vision_config
model_name
=
hf_config_vision
.
args
.
get
(
"model"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment