Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
de7eb10c
Unverified
Commit
de7eb10c
authored
Apr 27, 2025
by
Kero Liang
Committed by
GitHub
Apr 26, 2025
Browse files
[Bugfix] Fix Qwen2.5-Omni M-RoPE position ids generation (#16878)
Signed-off-by:
imkero
<
kerorek@outlook.com
>
parent
fd11a325
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
13 additions
and
15 deletions
+13
-15
vllm/model_executor/layers/rotary_embedding.py
vllm/model_executor/layers/rotary_embedding.py
+13
-15
No files found.
vllm/model_executor/layers/rotary_embedding.py
View file @
de7eb10c
...
...
@@ -1209,6 +1209,7 @@ class MRotaryEmbedding(RotaryEmbedding):
video_token_id
=
thinker_config
.
video_token_index
audio_start_token_id
=
thinker_config
.
audio_start_token_id
audio_end_token_id
=
thinker_config
.
audio_end_token_id
vision_start_token_id
=
thinker_config
.
vision_start_token_id
vision_end_token_id
=
thinker_config
.
vision_end_token_id
seconds_per_chunk
=
thinker_config
.
seconds_per_chunk
spatial_merge_size
=
thinker_config
.
vision_config
.
spatial_merge_size
...
...
@@ -1238,7 +1239,14 @@ class MRotaryEmbedding(RotaryEmbedding):
if
src_item
[
idx
]
not
in
[
audio_token_id
,
video_token_id
,
image_token_id
]:
if
src_item
[
idx
]
==
vision_end_token_id
and
use_audio_in_video
:
if
use_audio_in_video
and
idx
>
0
:
if
src_item
[
idx
]
==
vision_end_token_id
and
\
src_item
[
idx
-
1
]
==
audio_end_token_id
:
# processing the <|audio_eos|> before <|vision_eos|>
start_idx
-=
1
elif
src_item
[
idx
]
==
audio_start_token_id
and
\
src_item
[
idx
-
1
]
==
vision_start_token_id
:
# processing the <|audio_bos|> after <|vision_eos|>
start_idx
-=
1
new_src_item
.
append
(
src_item
[
idx
])
llm_pos_ids
=
torch
.
tensor
([
start_idx
],
...
...
@@ -1297,11 +1305,6 @@ class MRotaryEmbedding(RotaryEmbedding):
tokens_per_second
).
long
()
t_index_split_chunk
=
cls
.
_split_list_into_ranges
(
t_index
,
t_ntoken_per_chunk
)
new_src_item
.
extend
([
audio_start_token_id
])
start_idx
-=
1
llm_pos_ids_list
.
extend
([
torch
.
tensor
([
start_idx
],
dtype
=
torch
.
long
).
expand
(
3
,
-
1
)
]
*
1
)
place_num
=
(((
audio_seqlen
-
1
)
//
2
+
1
-
2
)
//
2
+
1
)
+
2
pure_audio_len
=
place_num
-
2
added_audio_len
=
0
...
...
@@ -1312,7 +1315,7 @@ class MRotaryEmbedding(RotaryEmbedding):
new_src_item
.
extend
([
video_token_id
]
*
vision_ntoken_per_chunk
)
vision_llm_pos_ids_list
=
cls
.
_get_llm_pos_ids_for_vision
(
start_idx
+
1
,
video_idx
,
spatial_merge_size
,
t_chunk
,
start_idx
,
video_idx
,
spatial_merge_size
,
t_chunk
,
grid_hs
,
grid_ws
).
split
(
1
,
dim
=
1
)
llm_pos_ids_list
.
extend
(
vision_llm_pos_ids_list
)
new_src_item
.
extend
(
...
...
@@ -1320,13 +1323,13 @@ class MRotaryEmbedding(RotaryEmbedding):
added_audio_len
)
*
[
audio_token_id
])
audio_start_idx
=
start_idx
if
len
(
audio_llm_pos_ids_list
)
==
0
else
audio_llm_pos_ids_list
[
-
1
][
0
].
item
()
)
==
0
else
audio_llm_pos_ids_list
[
-
1
][
0
].
item
()
+
1
if
min
(
t_ntoken_per_chunk
,
pure_audio_len
-
added_audio_len
)
>
0
:
audio_llm_pos_ids_list
=
(
torch
.
arange
(
min
(
t_ntoken_per_chunk
,
pure_audio_len
-
added_audio_len
)).
expand
(
3
,
-
1
)
+
audio_start_idx
+
1
).
split
(
audio_start_idx
).
split
(
1
,
dim
=
1
)
else
:
audio_llm_pos_ids_list
=
[]
...
...
@@ -1341,11 +1344,6 @@ class MRotaryEmbedding(RotaryEmbedding):
3
,
-
1
)
+
llm_pos_ids_list
[
-
1
].
max
()
+
1
).
split
(
1
,
dim
=
1
)
llm_pos_ids_list
.
extend
(
audio_llm_pos_ids_list
)
llm_pos_ids_list
.
extend
([
torch
.
tensor
(
[
llm_pos_ids_list
[
-
1
].
max
()
+
1
]
*
3
).
unsqueeze
(
1
)
]
*
1
)
new_src_item
.
extend
([
audio_end_token_id
])
audio_idx
+=
1
video_idx
+=
1
# move to the next token
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment