Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
04d0123f
Unverified
Commit
04d0123f
authored
Apr 26, 2025
by
ZXN
Committed by
GitHub
Apr 26, 2025
Browse files
[Fix]: support deepseek-vl2-tiny model (#5552)
Co-authored-by:
bppps
<
zouyu.zzx@alibaba-inc.com
>
parent
feda9b11
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
80 additions
and
6 deletions
+80
-6
python/sglang/srt/configs/deepseekvl2.py
python/sglang/srt/configs/deepseekvl2.py
+1
-1
python/sglang/srt/configs/model_config.py
python/sglang/srt/configs/model_config.py
+3
-1
python/sglang/srt/conversation.py
python/sglang/srt/conversation.py
+34
-1
python/sglang/srt/models/deepseek.py
python/sglang/srt/models/deepseek.py
+12
-2
python/sglang/srt/models/deepseek_vl2.py
python/sglang/srt/models/deepseek_vl2.py
+6
-1
test/srt/test_vision_openai_server.py
test/srt/test_vision_openai_server.py
+24
-0
No files found.
python/sglang/srt/configs/deepseekvl2.py
View file @
04d0123f
...
@@ -182,7 +182,7 @@ class DeepseekVLV2Processor(ProcessorMixin):
...
@@ -182,7 +182,7 @@ class DeepseekVLV2Processor(ProcessorMixin):
tokenized_str
,
images
,
seq_mask
,
spatial_crop
=
self
.
tokenize_with_images
(
tokenized_str
,
images
,
seq_mask
,
spatial_crop
=
self
.
tokenize_with_images
(
messages
,
messages
,
pil_images
[
image_index
:
image_index
+
image_token_cnt
],
pil_images
[
image_index
:
image_index
+
image_token_cnt
],
bos
=
Fals
e
,
bos
=
Tru
e
,
eos
=
True
,
eos
=
True
,
cropping
=
len
(
pil_images
)
<=
2
,
cropping
=
len
(
pil_images
)
<=
2
,
max_req_input_len
=
max_req_input_len
,
max_req_input_len
=
max_req_input_len
,
...
...
python/sglang/srt/configs/model_config.py
View file @
04d0123f
...
@@ -162,7 +162,9 @@ class ModelConfig:
...
@@ -162,7 +162,9 @@ class ModelConfig:
self
.
attention_arch
=
AttentionArch
.
MLA
self
.
attention_arch
=
AttentionArch
.
MLA
self
.
kv_lora_rank
=
self
.
hf_config
.
kv_lora_rank
self
.
kv_lora_rank
=
self
.
hf_config
.
kv_lora_rank
self
.
qk_rope_head_dim
=
self
.
hf_config
.
qk_rope_head_dim
self
.
qk_rope_head_dim
=
self
.
hf_config
.
qk_rope_head_dim
elif
"DeepseekVL2ForCausalLM"
in
self
.
hf_config
.
architectures
:
elif
"DeepseekVL2ForCausalLM"
in
self
.
hf_config
.
architectures
and
getattr
(
self
.
hf_text_config
,
"use_mla"
,
True
):
self
.
head_dim
=
256
self
.
head_dim
=
256
self
.
attention_arch
=
AttentionArch
.
MLA
self
.
attention_arch
=
AttentionArch
.
MLA
self
.
kv_lora_rank
=
self
.
hf_text_config
.
kv_lora_rank
self
.
kv_lora_rank
=
self
.
hf_text_config
.
kv_lora_rank
...
...
python/sglang/srt/conversation.py
View file @
04d0123f
...
@@ -463,6 +463,30 @@ def generate_embedding_convs(
...
@@ -463,6 +463,30 @@ def generate_embedding_convs(
return
convs
return
convs
# Models in which system adds modality tokens at prompt start automatically
# when media inputs exceed modality tokens in prompt (e.g. 3 images but 2 <image> tokens)
_MODELS_REQUIRING_MODALITY_SUPPLEMENT
=
{
"deepseek-vl2"
}
# adapted from https://github.com/vllm-project/vllm/blob/5124f5bf51b83e6f344c1bc6652e8c4d81313b34/vllm/entrypoints/chat_utils.py#L856
def
_get_full_multimodal_text_prompt
(
modality_token
:
str
,
modality_count
:
int
,
text_prompt
:
str
)
->
str
:
"""Combine multimodal prompts for a multimodal language model."""
# For any existing placeholder in the text prompt, we leave it as is
left
:
int
=
modality_count
-
text_prompt
.
count
(
modality_token
)
if
left
<
0
:
raise
ValueError
(
f
"Found more '
{
modality_token
}
' placeholders in input prompt than "
"actual multimodal data items."
)
# NOTE: For now we always add missing modality_token at the front of
# the prompt. This may change to be customizable in the future.
return
"
\n
"
.
join
([
modality_token
]
*
left
+
[
text_prompt
])
def
generate_chat_conv
(
def
generate_chat_conv
(
request
:
ChatCompletionRequest
,
template_name
:
str
request
:
ChatCompletionRequest
,
template_name
:
str
)
->
Conversation
:
)
->
Conversation
:
...
@@ -520,6 +544,12 @@ def generate_chat_conv(
...
@@ -520,6 +544,12 @@ def generate_chat_conv(
if
conv
.
name
!=
"qwen2-vl"
if
conv
.
name
!=
"qwen2-vl"
else
conv
.
image_token
else
conv
.
image_token
)
)
add_token_as_needed
:
bool
=
(
conv
.
name
in
_MODELS_REQUIRING_MODALITY_SUPPLEMENT
)
if
add_token_as_needed
:
image_token
=
""
audio_token
=
conv
.
audio_token
audio_token
=
conv
.
audio_token
for
content
in
message
.
content
:
for
content
in
message
.
content
:
if
content
.
type
==
"text"
:
if
content
.
type
==
"text"
:
...
@@ -533,7 +563,10 @@ def generate_chat_conv(
...
@@ -533,7 +563,10 @@ def generate_chat_conv(
elif
content
.
type
==
"audio_url"
:
elif
content
.
type
==
"audio_url"
:
real_content
+=
audio_token
real_content
+=
audio_token
conv
.
append_audio
(
content
.
audio_url
.
url
)
conv
.
append_audio
(
content
.
audio_url
.
url
)
if
add_token_as_needed
:
real_content
=
_get_full_multimodal_text_prompt
(
conv
.
image_token
,
num_image_url
,
real_content
)
conv
.
append_message
(
conv
.
roles
[
0
],
real_content
)
conv
.
append_message
(
conv
.
roles
[
0
],
real_content
)
elif
msg_role
==
"assistant"
:
elif
msg_role
==
"assistant"
:
parsed_content
=
""
parsed_content
=
""
...
...
python/sglang/srt/models/deepseek.py
View file @
04d0123f
...
@@ -382,8 +382,14 @@ class DeepseekModel(nn.Module):
...
@@ -382,8 +382,14 @@ class DeepseekModel(nn.Module):
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
forward_batch
:
ForwardBatch
,
forward_batch
:
ForwardBatch
,
input_embeds
:
torch
.
Tensor
=
None
,
)
->
torch
.
Tensor
:
)
->
torch
.
Tensor
:
hidden_states
=
self
.
embed_tokens
(
input_ids
)
if
input_embeds
is
None
:
hidden_states
=
self
.
embed_tokens
(
input_ids
)
else
:
hidden_states
=
input_embeds
residual
=
None
residual
=
None
for
i
in
range
(
len
(
self
.
layers
)):
for
i
in
range
(
len
(
self
.
layers
)):
layer
=
self
.
layers
[
i
]
layer
=
self
.
layers
[
i
]
...
@@ -416,14 +422,18 @@ class DeepseekForCausalLM(nn.Module):
...
@@ -416,14 +422,18 @@ class DeepseekForCausalLM(nn.Module):
)
)
self
.
logits_processor
=
LogitsProcessor
(
config
)
self
.
logits_processor
=
LogitsProcessor
(
config
)
def
get_input_embeddings
(
self
)
->
nn
.
Embedding
:
return
self
.
model
.
embed_tokens
@
torch
.
no_grad
()
@
torch
.
no_grad
()
def
forward
(
def
forward
(
self
,
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
forward_batch
:
ForwardBatch
,
forward_batch
:
ForwardBatch
,
input_embeds
:
torch
.
Tensor
=
None
,
)
->
torch
.
Tensor
:
)
->
torch
.
Tensor
:
hidden_states
=
self
.
model
(
input_ids
,
positions
,
forward_batch
)
hidden_states
=
self
.
model
(
input_ids
,
positions
,
forward_batch
,
input_embeds
)
return
self
.
logits_processor
(
return
self
.
logits_processor
(
input_ids
,
hidden_states
,
self
.
lm_head
,
forward_batch
input_ids
,
hidden_states
,
self
.
lm_head
,
forward_batch
)
)
...
...
python/sglang/srt/models/deepseek_vl2.py
View file @
04d0123f
...
@@ -18,6 +18,7 @@ from sglang.srt.managers.mm_utils import (
...
@@ -18,6 +18,7 @@ from sglang.srt.managers.mm_utils import (
from
sglang.srt.managers.schedule_batch
import
MultimodalDataItem
,
MultimodalInputs
from
sglang.srt.managers.schedule_batch
import
MultimodalDataItem
,
MultimodalInputs
from
sglang.srt.model_executor.forward_batch_info
import
ForwardBatch
from
sglang.srt.model_executor.forward_batch_info
import
ForwardBatch
from
sglang.srt.model_loader.weight_utils
import
default_weight_loader
from
sglang.srt.model_loader.weight_utils
import
default_weight_loader
from
sglang.srt.models.deepseek
import
DeepseekForCausalLM
from
sglang.srt.models.deepseek_v2
import
DeepseekV2ForCausalLM
from
sglang.srt.models.deepseek_v2
import
DeepseekV2ForCausalLM
...
@@ -189,7 +190,11 @@ class DeepseekVL2ForCausalLM(nn.Module):
...
@@ -189,7 +190,11 @@ class DeepseekVL2ForCausalLM(nn.Module):
# ----------- language model ------------
# ----------- language model ------------
language_config
=
config
.
language_config
language_config
=
config
.
language_config
self
.
language_model
=
DeepseekV2ForCausalLM
(
language_config
)
if
language_config
.
use_mla
:
self
.
language_model
=
DeepseekV2ForCausalLM
(
language_config
)
else
:
# deepseek-vl2-tiny forbids mla
self
.
language_model
=
DeepseekForCausalLM
(
language_config
)
def
_init_vision_module
(
def
_init_vision_module
(
self
,
vision_config
,
quant_config
:
Optional
[
QuantizationConfig
]
self
,
vision_config
,
quant_config
:
Optional
[
QuantizationConfig
]
...
...
test/srt/test_vision_openai_server.py
View file @
04d0123f
...
@@ -654,6 +654,30 @@ class TestDeepseekVL2Server(TestOpenAIVisionServer):
...
@@ -654,6 +654,30 @@ class TestDeepseekVL2Server(TestOpenAIVisionServer):
pass
pass
class
TestDeepseekVL2TinyServer
(
TestOpenAIVisionServer
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"deepseek-ai/deepseek-vl2-tiny"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--trust-remote-code"
,
"--chat-template"
,
"deepseek-vl2"
,
"--context-length"
,
"4096"
,
],
)
cls
.
base_url
+=
"/v1"
def
test_video_chat_completion
(
self
):
pass
class
TestJanusProServer
(
TestOpenAIVisionServer
):
class
TestJanusProServer
(
TestOpenAIVisionServer
):
@
classmethod
@
classmethod
def
setUpClass
(
cls
):
def
setUpClass
(
cls
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment