Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
b5044fbf
Unverified
Commit
b5044fbf
authored
Oct 10, 2025
by
Yuan Luo
Committed by
GitHub
Oct 10, 2025
Browse files
Replace pad with cat for better performance (#11388)
Co-authored-by:
luoyuan.luo
<
luoyuan.luo@antgroup.com
>
parent
70fbb3ad
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
5 additions
and
5 deletions
+5
-5
python/sglang/srt/models/dots_vlm_vit.py
python/sglang/srt/models/dots_vlm_vit.py
+1
-1
python/sglang/srt/models/glm4v.py
python/sglang/srt/models/glm4v.py
+1
-1
python/sglang/srt/models/qwen2_5_vl.py
python/sglang/srt/models/qwen2_5_vl.py
+1
-1
python/sglang/srt/models/qwen2_vl.py
python/sglang/srt/models/qwen2_vl.py
+1
-1
python/sglang/srt/models/qwen3_vl.py
python/sglang/srt/models/qwen3_vl.py
+1
-1
No files found.
python/sglang/srt/models/dots_vlm_vit.py
View file @
b5044fbf
...
@@ -323,7 +323,7 @@ class DotsVisionTransformer(PreTrainedModel):
...
@@ -323,7 +323,7 @@ class DotsVisionTransformer(PreTrainedModel):
dim
=
0
,
dim
=
0
,
dtype
=
grid_thw
.
dtype
if
torch
.
jit
.
is_tracing
()
else
torch
.
int32
,
dtype
=
grid_thw
.
dtype
if
torch
.
jit
.
is_tracing
()
else
torch
.
int32
,
)
)
cu_seqlens
=
F
.
pad
(
cu_seqlens
,
(
1
,
0
),
value
=
0
)
cu_seqlens
=
torch
.
cat
([
cu_seqlens
.
new_zeros
(
1
),
cu_seqlens
]
)
for
blk
in
self
.
blocks
:
for
blk
in
self
.
blocks
:
hidden_states
=
blk
(
hidden_states
=
blk
(
...
...
python/sglang/srt/models/glm4v.py
View file @
b5044fbf
...
@@ -434,7 +434,7 @@ class Glm4vVisionModel(nn.Module):
...
@@ -434,7 +434,7 @@ class Glm4vVisionModel(nn.Module):
cu_seqlens
=
torch
.
repeat_interleave
(
cu_seqlens
=
torch
.
repeat_interleave
(
grid_thw
[:,
1
]
*
grid_thw
[:,
2
],
grid_thw
[:,
0
]
grid_thw
[:,
1
]
*
grid_thw
[:,
2
],
grid_thw
[:,
0
]
).
cumsum
(
dim
=
0
,
dtype
=
torch
.
int32
)
).
cumsum
(
dim
=
0
,
dtype
=
torch
.
int32
)
cu_seqlens
=
F
.
pad
(
cu_seqlens
,
(
1
,
0
),
"constant"
,
0
)
cu_seqlens
=
torch
.
cat
([
cu_seqlens
.
new_zeros
(
1
),
cu_seqlens
]
)
seqlens
=
(
cu_seqlens
[
1
:]
-
cu_seqlens
[:
-
1
]).
tolist
()
seqlens
=
(
cu_seqlens
[
1
:]
-
cu_seqlens
[:
-
1
]).
tolist
()
x
=
self
.
embeddings
(
x
=
self
.
embeddings
(
...
...
python/sglang/srt/models/qwen2_5_vl.py
View file @
b5044fbf
...
@@ -436,7 +436,7 @@ class Qwen2_5_VisionTransformer(nn.Module):
...
@@ -436,7 +436,7 @@ class Qwen2_5_VisionTransformer(nn.Module):
.
to
(
device
=
x
.
device
,
dtype
=
torch
.
int32
),
.
to
(
device
=
x
.
device
,
dtype
=
torch
.
int32
),
]
]
)
)
cu_seqlens
=
F
.
pad
(
cu_seqlens
,
(
1
,
0
),
"constant"
,
0
)
cu_seqlens
=
torch
.
cat
([
cu_seqlens
.
new_zeros
(
1
),
cu_seqlens
]
)
# transformers
# transformers
x
=
x
.
unsqueeze
(
1
)
x
=
x
.
unsqueeze
(
1
)
...
...
python/sglang/srt/models/qwen2_vl.py
View file @
b5044fbf
...
@@ -407,7 +407,7 @@ class Qwen2VisionTransformer(nn.Module):
...
@@ -407,7 +407,7 @@ class Qwen2VisionTransformer(nn.Module):
cu_seqlens
=
torch
.
repeat_interleave
(
cu_seqlens
=
torch
.
repeat_interleave
(
grid_thw
[:,
1
]
*
grid_thw
[:,
2
],
grid_thw
[:,
0
]
grid_thw
[:,
1
]
*
grid_thw
[:,
2
],
grid_thw
[:,
0
]
).
cumsum
(
dim
=
0
,
dtype
=
torch
.
int32
)
).
cumsum
(
dim
=
0
,
dtype
=
torch
.
int32
)
cu_seqlens
=
F
.
pad
(
cu_seqlens
,
(
1
,
0
),
"constant"
,
0
)
cu_seqlens
=
torch
.
cat
([
cu_seqlens
.
new_zeros
(
1
),
cu_seqlens
]
)
# transformers
# transformers
x
=
x
.
unsqueeze
(
1
)
x
=
x
.
unsqueeze
(
1
)
...
...
python/sglang/srt/models/qwen3_vl.py
View file @
b5044fbf
...
@@ -458,7 +458,7 @@ class Qwen3_VisionTransformer(nn.Module):
...
@@ -458,7 +458,7 @@ class Qwen3_VisionTransformer(nn.Module):
(
grid_thw
[:,
0
]
*
grid_thw
[:,
1
]
*
grid_thw
[:,
2
]).
cumsum
(
dim
=
0
),
(
grid_thw
[:,
0
]
*
grid_thw
[:,
1
]
*
grid_thw
[:,
2
]).
cumsum
(
dim
=
0
),
]
]
)
)
cu_seqlens
=
F
.
pad
(
cu_seqlens
,
(
1
,
0
),
"constant"
,
0
)
cu_seqlens
=
torch
.
cat
([
cu_seqlens
.
new_zeros
(
1
),
cu_seqlens
]
)
# max_seqlen, seqlens = self.compute_attn_mask_seqlen(cu_seqlens)
# max_seqlen, seqlens = self.compute_attn_mask_seqlen(cu_seqlens)
x
=
x
.
unsqueeze
(
1
)
x
=
x
.
unsqueeze
(
1
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment