Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
3abb7560
Unverified
Commit
3abb7560
authored
Apr 16, 2026
by
Roger Wang
Committed by
GitHub
Apr 16, 2026
Browse files
[Bugfix] Fix audioflamingo test (#40052)
Signed-off-by:
Roger Wang
<
hey@rogerw.io
>
parent
617d1c2f
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
0 additions
and
85 deletions
+0
-85
tests/models/multimodal/processing/test_audioflamingo3.py
tests/models/multimodal/processing/test_audioflamingo3.py
+0
-85
No files found.
tests/models/multimodal/processing/test_audioflamingo3.py
View file @
3abb7560
...
...
@@ -140,88 +140,3 @@ def test_audio_token_count_matches_hf_processor_math():
_count_audio_tokens_from_mask
(
feature_attention_mask
,
chunk_counts
,
0
)
==
1499
)
assert
_count_audio_tokens_from_mask
(
feature_attention_mask
,
chunk_counts
,
1
)
==
375
def
test_audio_feature_pipeline_matches_hf_small_config
():
from
transformers.models.audioflamingo3
import
(
modeling_audioflamingo3
as
hf_audioflamingo3_modeling
,
)
from
transformers.models.audioflamingo3.configuration_audioflamingo3
import
(
AudioFlamingo3Config
,
)
from
vllm.model_executor.models.audioflamingo3
import
(
AudioFlamingo3Encoder
,
AudioFlamingo3MultiModalProjector
,
_build_audio_encoder_attention_mask
,
_flatten_valid_audio_embeddings
,
)
text_config
=
{
"model_type"
:
"qwen2"
,
"intermediate_size"
:
64
,
"initializer_range"
:
0.02
,
"hidden_size"
:
32
,
"max_position_embeddings"
:
1024
,
"num_hidden_layers"
:
2
,
"num_attention_heads"
:
4
,
"num_key_value_heads"
:
2
,
"vocab_size"
:
128
,
"pad_token_id"
:
1
,
"use_mrope"
:
False
,
}
audio_config
=
{
"hidden_size"
:
16
,
"num_attention_heads"
:
4
,
"intermediate_size"
:
32
,
"num_hidden_layers"
:
2
,
"num_mel_bins"
:
80
,
"max_source_positions"
:
1500
,
"dropout"
:
0.0
,
"attention_dropout"
:
0.0
,
"activation_dropout"
:
0.0
,
"encoder_layerdrop"
:
0.0
,
}
torch
.
manual_seed
(
0
)
config
=
AudioFlamingo3Config
(
text_config
=
text_config
,
audio_config
=
audio_config
,
audio_token_id
=
0
,
)
hf_model
=
hf_audioflamingo3_modeling
.
AudioFlamingo3ForConditionalGeneration
(
config
).
eval
()
vllm_encoder
=
AudioFlamingo3Encoder
(
config
.
audio_config
).
eval
()
vllm_encoder
.
load_state_dict
(
hf_model
.
audio_tower
.
state_dict
())
vllm_projector
=
AudioFlamingo3MultiModalProjector
(
config
).
eval
()
vllm_projector
.
load_state_dict
(
hf_model
.
multi_modal_projector
.
state_dict
())
input_features
=
torch
.
randn
(
3
,
80
,
3000
)
feature_attention_mask
=
torch
.
zeros
(
3
,
3000
,
dtype
=
torch
.
bool
)
feature_attention_mask
[
0
,
:
3000
]
=
True
feature_attention_mask
[
1
,
:
2500
]
=
True
feature_attention_mask
[
2
,
:
1500
]
=
True
hf_output
=
hf_model
.
get_audio_features
(
input_features
,
feature_attention_mask
,
return_dict
=
True
,
).
pooler_output
vllm_attention_mask
=
_build_audio_encoder_attention_mask
(
feature_attention_mask
,
dtype
=
vllm_encoder
.
conv1
.
weight
.
dtype
,
device
=
vllm_encoder
.
conv1
.
weight
.
device
,
)
vllm_hidden_states
=
vllm_encoder
(
input_features
,
attention_mask
=
vllm_attention_mask
,
)
vllm_output
,
_
=
_flatten_valid_audio_embeddings
(
vllm_projector
(
vllm_hidden_states
),
feature_attention_mask
,
)
torch
.
testing
.
assert_close
(
vllm_output
,
hf_output
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment