Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
804d250a
"docs/vscode:/vscode.git/clone" did not exist on "777063e1bfda024e7dfc3a9ba2acb20552aec6bc"
Unverified
Commit
804d250a
authored
Mar 18, 2025
by
Xiaoyu Zhang
Committed by
GitHub
Mar 17, 2025
Browse files
remove useless backend forward in rotary_embedding (#4500)
parent
dd865bef
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
0 additions
and
70 deletions
+0
-70
python/sglang/srt/layers/rotary_embedding.py
python/sglang/srt/layers/rotary_embedding.py
+0
-70
No files found.
python/sglang/srt/layers/rotary_embedding.py
View file @
804d250a
...
@@ -169,76 +169,6 @@ class RotaryEmbedding(CustomOp):
...
@@ -169,76 +169,6 @@ class RotaryEmbedding(CustomOp):
)
)
return
query
,
key
return
query
,
key
def
forward_xpu
(
self
,
positions
:
torch
.
Tensor
,
query
:
torch
.
Tensor
,
key
:
torch
.
Tensor
,
offsets
:
Optional
[
torch
.
Tensor
]
=
None
,
)
->
Tuple
[
torch
.
Tensor
,
torch
.
Tensor
]:
from
vllm._ipex_ops
import
ipex_ops
as
ops
self
.
cos_sin_cache
=
self
.
cos_sin_cache
.
to
(
positions
.
device
,
dtype
=
query
.
dtype
)
ops
.
rotary_embedding
(
positions
,
query
,
key
,
self
.
head_size
,
self
.
cos_sin_cache
,
self
.
is_neox_style
,
)
return
query
,
key
def
forward_hpu
(
self
,
positions
:
torch
.
Tensor
,
query
:
torch
.
Tensor
,
key
:
torch
.
Tensor
,
offsets
:
Optional
[
torch
.
Tensor
]
=
None
,
)
->
Tuple
[
torch
.
Tensor
,
torch
.
Tensor
]:
from
habana_frameworks.torch.hpex.kernels
import
(
RotaryPosEmbeddingMode
,
apply_rotary_pos_emb
,
)
positions
=
positions
.
flatten
()
if
offsets
is
not
None
:
positions
=
positions
+
offsets
num_tokens
=
positions
.
shape
[
0
]
cos_sin
=
self
.
cos_sin_cache
.
index_select
(
0
,
positions
).
view
(
num_tokens
,
1
,
-
1
)
cos
,
sin
=
cos_sin
.
chunk
(
2
,
dim
=-
1
)
# HPU RoPE kernel requires hidden dimension for cos and sin to be equal
# to query hidden dimension, so the original tensors need to be
# expanded
# GPT-NeoX kernel requires position_ids = None, offset, mode = BLOCKWISE
# and expansion of cos/sin tensors via concatenation
# GPT-J kernel requires position_ids = None, offset = 0, mode = PAIRWISE
# and expansion of cos/sin tensors via repeat_interleave
rope_mode
:
RotaryPosEmbeddingMode
if
self
.
is_neox_style
:
rope_mode
=
RotaryPosEmbeddingMode
.
BLOCKWISE
cos
=
torch
.
cat
((
cos
,
cos
),
dim
=-
1
)
sin
=
torch
.
cat
((
sin
,
sin
),
dim
=-
1
)
else
:
rope_mode
=
RotaryPosEmbeddingMode
.
PAIRWISE
sin
=
torch
.
repeat_interleave
(
sin
,
2
,
dim
=-
1
,
output_size
=
cos_sin
.
shape
[
-
1
])
cos
=
torch
.
repeat_interleave
(
cos
,
2
,
dim
=-
1
,
output_size
=
cos_sin
.
shape
[
-
1
])
query_shape
=
query
.
shape
query
=
query
.
view
(
num_tokens
,
-
1
,
self
.
head_size
)
query_rot
=
query
[...,
:
self
.
rotary_dim
]
query_pass
=
query
[...,
self
.
rotary_dim
:]
query_rot
=
apply_rotary_pos_emb
(
query_rot
,
cos
,
sin
,
None
,
0
,
rope_mode
)
query
=
torch
.
cat
((
query_rot
,
query_pass
),
dim
=-
1
).
reshape
(
query_shape
)
key_shape
=
key
.
shape
key
=
key
.
view
(
num_tokens
,
-
1
,
self
.
head_size
)
key_rot
=
key
[...,
:
self
.
rotary_dim
]
key_pass
=
key
[...,
self
.
rotary_dim
:]
key_rot
=
apply_rotary_pos_emb
(
key_rot
,
cos
,
sin
,
None
,
0
,
rope_mode
)
key
=
torch
.
cat
((
key_rot
,
key_pass
),
dim
=-
1
).
reshape
(
key_shape
)
return
query
,
key
def
extra_repr
(
self
)
->
str
:
def
extra_repr
(
self
)
->
str
:
s
=
f
"head_size=
{
self
.
head_size
}
, rotary_dim=
{
self
.
rotary_dim
}
"
s
=
f
"head_size=
{
self
.
head_size
}
, rotary_dim=
{
self
.
rotary_dim
}
"
s
+=
f
", max_position_embeddings=
{
self
.
max_position_embeddings
}
"
s
+=
f
", max_position_embeddings=
{
self
.
max_position_embeddings
}
"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment