Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
aadb6565
Unverified
Commit
aadb6565
authored
Apr 18, 2025
by
Cyrus Leung
Committed by
GitHub
Apr 18, 2025
Browse files
[Misc] Clean up Kimi-VL (#16833)
Signed-off-by:
DarkLight1337
<
tlleungac@connect.ust.hk
>
parent
87e067de
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
20 additions
and
44 deletions
+20
-44
examples/offline_inference/vision_language.py
examples/offline_inference/vision_language.py
+2
-2
examples/offline_inference/vision_language_multi_image.py
examples/offline_inference/vision_language_multi_image.py
+1
-2
vllm/model_executor/models/kimi_vl.py
vllm/model_executor/models/kimi_vl.py
+17
-40
No files found.
examples/offline_inference/vision_language.py
View file @
aadb6565
...
...
@@ -376,9 +376,9 @@ def run_kimi_vl(questions: list[str], modality: str) -> ModelRequestData:
engine_args
=
EngineArgs
(
model
=
"moonshotai/Kimi-VL-A3B-Instruct"
,
max_model_len
=
4096
,
disable_mm_preprocessor_cache
=
args
.
disable_mm_preprocessor_cache
,
trust_remote_code
=
True
,
max_model_len
=
4096
,
limit_mm_per_prompt
=
{
"image"
:
1
},
)
return
ModelRequestData
(
...
...
examples/offline_inference/vision_language_multi_image.py
View file @
aadb6565
...
...
@@ -331,11 +331,10 @@ def load_kimi_vl(question: str, image_urls: list[str]) -> ModelRequestData:
engine_args
=
EngineArgs
(
model
=
model_name
,
trust_remote_code
=
True
,
max_model_len
=
4096
,
max_num_seqs
=
4
,
tensor_parallel_size
=
1
,
limit_mm_per_prompt
=
{
"image"
:
len
(
image_urls
)},
trust_remote_code
=
True
,
)
placeholders
=
[{
"type"
:
"image"
,
"image"
:
url
}
for
url
in
image_urls
]
...
...
vllm/model_executor/models/kimi_vl.py
View file @
aadb6565
...
...
@@ -56,7 +56,6 @@ from transformers.activations import GELUActivation
from
vllm.config
import
VllmConfig
from
vllm.distributed
import
(
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_world_size
)
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.fused_moe
import
FusedMoE
from
vllm.model_executor.layers.logits_processor
import
LogitsProcessor
from
vllm.model_executor.layers.sampler
import
SamplerOutput
,
get_sampler
...
...
@@ -70,22 +69,20 @@ from vllm.model_executor.models.moonvit import MoonVitPretrainedModel
from
vllm.model_executor.models.utils
import
merge_multimodal_embeddings
from
vllm.model_executor.sampling_metadata
import
SamplingMetadata
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal.inputs
import
(
MultiModal
FieldConfig
,
MultiModal
Kwargs
,
NestedTensors
)
from
vllm.multimodal.inputs
import
(
MultiModal
DataDict
,
MultiModal
FieldConfig
,
MultiModalKwargs
,
NestedTensors
)
from
vllm.multimodal.parse
import
(
ImageEmbeddingItems
,
ImageProcessorItems
,
MultiModalDataItems
)
from
vllm.multimodal.processing
import
(
BaseMultiModalProcessor
,
BaseProcessingInfo
,
PromptReplacement
,
PromptUpdate
)
from
vllm.multimodal.profiling
import
BaseDummyInputsBuilder
,
ProcessorInputs
from
vllm.multimodal.profiling
import
BaseDummyInputsBuilder
from
vllm.sequence
import
IntermediateTensors
from
vllm.transformers_utils.configs
import
KimiVLConfig
,
MoonViTConfig
from
vllm.transformers_utils.configs.deepseek_vl2
import
DeepseekV2Config
from
.utils
import
is_pp_missing_parameter
,
maybe_prefix
logger
=
init_logger
(
__name__
)
# For dummy input only
@
dataclass
...
...
@@ -143,6 +140,9 @@ class KimiVLProcessingInfo(BaseProcessingInfo):
def
get_hf_config
(
self
):
return
self
.
ctx
.
get_hf_config
(
KimiVLConfig
)
def
get_supported_mm_limits
(
self
)
->
Mapping
[
str
,
Optional
[
int
]]:
return
{
"image"
:
None
}
def
get_num_image_tokens
(
self
,
*
,
...
...
@@ -180,23 +180,6 @@ class KimiVLProcessingInfo(BaseProcessingInfo):
token_width
=
(
width
+
pad_width
)
//
(
kernel_size
[
1
]
*
patch_size
)
return
int
(
token_height
*
token_width
)
def
get_supported_mm_limits
(
self
)
->
Mapping
[
str
,
Optional
[
int
]]:
# None means unlimited
return
{
"image"
:
None
}
def
get_mm_max_tokens_per_item
(
self
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
)
->
Mapping
[
str
,
int
]:
return
{
"image"
:
self
.
get_num_image_tokens
(
image_width
=
MaxImageTokenMeta
.
width
,
image_height
=
MaxImageTokenMeta
.
height
,
),
}
@
property
def
image_token_id
(
self
)
->
int
:
return
self
.
get_hf_config
().
media_placeholder_token_id
...
...
@@ -204,34 +187,28 @@ class KimiVLProcessingInfo(BaseProcessingInfo):
class
KimiVLDummyInputsBuilder
(
BaseDummyInputsBuilder
[
KimiVLProcessingInfo
]):
def
__init__
(
self
,
info
:
KimiVLProcessingInfo
)
->
None
:
super
().
__init__
(
info
)
def
get_dummy_text
(
self
,
mm_counts
:
Mapping
[
str
,
int
])
->
str
:
num_images
=
mm_counts
.
get
(
"image"
,
0
)
processor
=
self
.
info
.
get_hf_processor
()
image_token
=
processor
.
image_token
self
.
image_token_id
=
self
.
info
.
image_token_id
self
.
image_token
=
self
.
info
.
get_tokenizer
().
decode
(
self
.
image_token_id
)
return
image_token
*
num_images
def
get_dummy_
processor_inputs
(
def
get_dummy_
mm_data
(
self
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
)
->
ProcessorInputs
:
)
->
MultiModalDataDict
:
num_images
=
mm_counts
.
get
(
"image"
,
0
)
width
=
MaxImageTokenMeta
.
width
height
=
MaxImageTokenMeta
.
height
mm_data
=
{
return
{
"image"
:
self
.
_get_dummy_images
(
width
=
width
,
height
=
height
,
self
.
_get_dummy_images
(
width
=
MaxImageTokenMeta
.
width
,
height
=
MaxImageTokenMeta
.
height
,
num_images
=
num_images
)
}
return
ProcessorInputs
(
prompt_text
=
self
.
image_token
*
num_images
,
mm_data
=
mm_data
,
)
class
KimiVLMultiModalProcessor
(
BaseMultiModalProcessor
[
KimiVLProcessingInfo
]):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment