Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
e72275cf
Unverified
Commit
e72275cf
authored
Sep 10, 2024
by
William
Committed by
GitHub
Sep 10, 2024
Browse files
Support MiniCPM3 (#1371)
parent
fec2d122
Changes
5
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
683 additions
and
2 deletions
+683
-2
README.md
README.md
+1
-0
python/sglang/srt/layers/decode_attention.py
python/sglang/srt/layers/decode_attention.py
+4
-1
python/sglang/srt/layers/extend_attention.py
python/sglang/srt/layers/extend_attention.py
+4
-1
python/sglang/srt/model_config.py
python/sglang/srt/model_config.py
+5
-0
python/sglang/srt/models/minicpm3.py
python/sglang/srt/models/minicpm3.py
+669
-0
No files found.
README.md
View file @
e72275cf
...
@@ -259,6 +259,7 @@ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct
...
@@ -259,6 +259,7 @@ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct
-
ChatGLM
-
ChatGLM
-
InternLM 2
-
InternLM 2
-
Exaone 3
-
Exaone 3
-
MiniCPM / MiniCPM 3
**Embedding Models**
**Embedding Models**
...
...
python/sglang/srt/layers/decode_attention.py
View file @
e72275cf
...
@@ -483,11 +483,14 @@ def _decode_grouped_att_m_fwd(
...
@@ -483,11 +483,14 @@ def _decode_grouped_att_m_fwd(
# shape constraints
# shape constraints
Lq
,
Lk
=
q
.
shape
[
-
1
],
k_buffer
.
shape
[
-
1
]
Lq
,
Lk
=
q
.
shape
[
-
1
],
k_buffer
.
shape
[
-
1
]
assert
Lq
==
Lk
assert
Lq
==
Lk
assert
Lk
in
{
16
,
32
,
64
,
96
,
128
,
256
,
576
}
assert
Lk
in
{
16
,
32
,
64
,
96
,
128
,
256
,
576
,
288
}
if
Lk
==
576
:
if
Lk
==
576
:
BLOCK_DMODEL
=
512
BLOCK_DMODEL
=
512
BLOCK_DPE
=
64
BLOCK_DPE
=
64
elif
Lk
==
288
:
BLOCK_DMODEL
=
256
BLOCK_DPE
=
32
else
:
else
:
BLOCK_DMODEL
=
triton
.
next_power_of_2
(
Lk
)
BLOCK_DMODEL
=
triton
.
next_power_of_2
(
Lk
)
BLOCK_DPE
=
0
BLOCK_DPE
=
0
...
...
python/sglang/srt/layers/extend_attention.py
View file @
e72275cf
...
@@ -280,12 +280,15 @@ def extend_attention_fwd(
...
@@ -280,12 +280,15 @@ def extend_attention_fwd(
assert
Lq
==
Lk
and
Lv
==
Lo
assert
Lq
==
Lk
and
Lv
==
Lo
# TODO: is the assertion necessary?
# TODO: is the assertion necessary?
assert
Lq
in
{
16
,
32
,
64
,
96
,
128
,
256
,
576
}
assert
Lq
in
{
16
,
32
,
64
,
96
,
128
,
256
,
576
,
288
}
assert
Lv
in
{
16
,
32
,
64
,
96
,
128
,
256
,
512
}
assert
Lv
in
{
16
,
32
,
64
,
96
,
128
,
256
,
512
}
if
Lq
==
576
:
if
Lq
==
576
:
BLOCK_DMODEL
=
512
BLOCK_DMODEL
=
512
BLOCK_DPE
=
64
BLOCK_DPE
=
64
elif
Lq
==
288
:
BLOCK_DMODEL
=
256
BLOCK_DPE
=
32
else
:
else
:
BLOCK_DMODEL
=
triton
.
next_power_of_2
(
Lq
)
BLOCK_DMODEL
=
triton
.
next_power_of_2
(
Lq
)
BLOCK_DPE
=
0
BLOCK_DPE
=
0
...
...
python/sglang/srt/model_config.py
View file @
e72275cf
...
@@ -64,6 +64,11 @@ class ModelConfig:
...
@@ -64,6 +64,11 @@ class ModelConfig:
self
.
attention_arch
=
AttentionArch
.
MLA
self
.
attention_arch
=
AttentionArch
.
MLA
self
.
kv_lora_rank
=
self
.
hf_config
.
kv_lora_rank
self
.
kv_lora_rank
=
self
.
hf_config
.
kv_lora_rank
self
.
qk_rope_head_dim
=
self
.
hf_config
.
qk_rope_head_dim
self
.
qk_rope_head_dim
=
self
.
hf_config
.
qk_rope_head_dim
elif
"MiniCPM3ForCausalLM"
in
self
.
hf_config
.
architectures
:
self
.
head_dim
=
128
self
.
attention_arch
=
AttentionArch
.
MLA
self
.
kv_lora_rank
=
self
.
hf_config
.
kv_lora_rank
self
.
qk_rope_head_dim
=
self
.
hf_config
.
qk_rope_head_dim
else
:
else
:
self
.
attention_arch
=
AttentionArch
.
MHA
self
.
attention_arch
=
AttentionArch
.
MHA
...
...
python/sglang/srt/models/minicpm3.py
0 → 100644
View file @
e72275cf
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment