Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
2329e1dd
Unverified
Commit
2329e1dd
authored
Jan 07, 2025
by
Xu-Chen
Committed by
GitHub
Jan 06, 2025
Browse files
Support llamafy/Qwen-Qwen2.5-7B-Instruct-llamafied (#2748)
Co-authored-by:
chenxu02
<
chenxu02@zhihu.com
>
parent
0f3eb1d2
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
9 additions
and
2 deletions
+9
-2
python/sglang/srt/models/llama.py
python/sglang/srt/models/llama.py
+9
-2
No files found.
python/sglang/srt/models/llama.py
View file @
2329e1dd
...
@@ -100,6 +100,7 @@ class LlamaAttention(nn.Module):
...
@@ -100,6 +100,7 @@ class LlamaAttention(nn.Module):
max_position_embeddings
:
int
=
8192
,
max_position_embeddings
:
int
=
8192
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
prefix
:
str
=
""
,
prefix
:
str
=
""
,
bias
:
bool
=
False
,
)
->
None
:
)
->
None
:
super
().
__init__
()
super
().
__init__
()
self
.
hidden_size
=
hidden_size
self
.
hidden_size
=
hidden_size
...
@@ -132,14 +133,14 @@ class LlamaAttention(nn.Module):
...
@@ -132,14 +133,14 @@ class LlamaAttention(nn.Module):
self
.
head_dim
,
self
.
head_dim
,
self
.
total_num_heads
,
self
.
total_num_heads
,
self
.
total_num_kv_heads
,
self
.
total_num_kv_heads
,
bias
=
False
,
bias
=
bias
,
quant_config
=
quant_config
,
quant_config
=
quant_config
,
prefix
=
f
"
{
prefix
}
.qkv_proj"
,
prefix
=
f
"
{
prefix
}
.qkv_proj"
,
)
)
self
.
o_proj
=
RowParallelLinear
(
self
.
o_proj
=
RowParallelLinear
(
self
.
total_num_heads
*
self
.
head_dim
,
self
.
total_num_heads
*
self
.
head_dim
,
hidden_size
,
hidden_size
,
bias
=
False
,
bias
=
bias
,
quant_config
=
quant_config
,
quant_config
=
quant_config
,
prefix
=
f
"
{
prefix
}
.o_proj"
,
prefix
=
f
"
{
prefix
}
.o_proj"
,
)
)
...
@@ -194,6 +195,11 @@ class LlamaDecoderLayer(nn.Module):
...
@@ -194,6 +195,11 @@ class LlamaDecoderLayer(nn.Module):
)
)
rope_is_neox_style
=
getattr
(
config
,
"rope_is_neox_style"
,
True
)
rope_is_neox_style
=
getattr
(
config
,
"rope_is_neox_style"
,
True
)
max_position_embeddings
=
getattr
(
config
,
"max_position_embeddings"
,
8192
)
max_position_embeddings
=
getattr
(
config
,
"max_position_embeddings"
,
8192
)
# Support llamafy/Qwen-Qwen2.5-7B-Instruct-llamafied with attention_bias
# Support internlm/internlm-7b with bias
attention_bias
=
getattr
(
config
,
"attention_bias"
,
False
)
or
getattr
(
config
,
"bias"
,
False
)
self
.
self_attn
=
LlamaAttention
(
self
.
self_attn
=
LlamaAttention
(
config
=
config
,
config
=
config
,
hidden_size
=
self
.
hidden_size
,
hidden_size
=
self
.
hidden_size
,
...
@@ -206,6 +212,7 @@ class LlamaDecoderLayer(nn.Module):
...
@@ -206,6 +212,7 @@ class LlamaDecoderLayer(nn.Module):
max_position_embeddings
=
max_position_embeddings
,
max_position_embeddings
=
max_position_embeddings
,
quant_config
=
quant_config
,
quant_config
=
quant_config
,
prefix
=
f
"
{
prefix
}
.self_attn"
,
prefix
=
f
"
{
prefix
}
.self_attn"
,
bias
=
attention_bias
,
)
)
self
.
mlp
=
LlamaMLP
(
self
.
mlp
=
LlamaMLP
(
hidden_size
=
self
.
hidden_size
,
hidden_size
=
self
.
hidden_size
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment