Unverified Commit f7967577 authored by Harry Mellor's avatar Harry Mellor Committed by GitHub
Browse files

Remove requirement to use `--hf-overrides` for `DeepseekVLV2ForCausalLM` (#35203)


Signed-off-by: default avatarHarry Mellor <19981378+hmellor@users.noreply.github.com>
parent af770b8e
...@@ -682,7 +682,7 @@ These models primarily accept the [`LLM.generate`](./generative_models.md#llmgen ...@@ -682,7 +682,7 @@ These models primarily accept the [`LLM.generate`](./generative_models.md#llmgen
| `Blip2ForConditionalGeneration` | BLIP-2 | T + I<sup>E</sup> | `Salesforce/blip2-opt-2.7b`, `Salesforce/blip2-opt-6.7b`, etc. | ✅︎ | ✅︎ | | `Blip2ForConditionalGeneration` | BLIP-2 | T + I<sup>E</sup> | `Salesforce/blip2-opt-2.7b`, `Salesforce/blip2-opt-6.7b`, etc. | ✅︎ | ✅︎ |
| `ChameleonForConditionalGeneration` | Chameleon | T + I | `facebook/chameleon-7b`, etc. | | ✅︎ | | `ChameleonForConditionalGeneration` | Chameleon | T + I | `facebook/chameleon-7b`, etc. | | ✅︎ |
| `Cohere2VisionForConditionalGeneration` | Command A Vision | T + I<sup>+</sup> | `CohereLabs/command-a-vision-07-2025`, etc. | | ✅︎ | | `Cohere2VisionForConditionalGeneration` | Command A Vision | T + I<sup>+</sup> | `CohereLabs/command-a-vision-07-2025`, etc. | | ✅︎ |
| `DeepseekVLV2ForCausalLM`<sup>^</sup> | DeepSeek-VL2 | T + I<sup>+</sup> | `deepseek-ai/deepseek-vl2-tiny`, `deepseek-ai/deepseek-vl2-small`, `deepseek-ai/deepseek-vl2`, etc. | | ✅︎ | | `DeepseekVLV2ForCausalLM` | DeepSeek-VL2 | T + I<sup>+</sup> | `deepseek-ai/deepseek-vl2-tiny`, `deepseek-ai/deepseek-vl2-small`, `deepseek-ai/deepseek-vl2`, etc. | | ✅︎ |
| `DeepseekOCRForCausalLM` | DeepSeek-OCR | T + I<sup>+</sup> | `deepseek-ai/DeepSeek-OCR`, etc. | ✅︎ | ✅︎ | | `DeepseekOCRForCausalLM` | DeepSeek-OCR | T + I<sup>+</sup> | `deepseek-ai/DeepSeek-OCR`, etc. | ✅︎ | ✅︎ |
| `DeepseekOCR2ForCausalLM` | DeepSeek-OCR-2 | T + I<sup>+</sup> | `deepseek-ai/DeepSeek-OCR-2`, etc. | ✅︎ | ✅︎ | | `DeepseekOCR2ForCausalLM` | DeepSeek-OCR-2 | T + I<sup>+</sup> | `deepseek-ai/DeepSeek-OCR-2`, etc. | ✅︎ | ✅︎ |
| `Eagle2_5_VLForConditionalGeneration` | Eagle2.5-VL | T + I<sup>E+</sup> | `nvidia/Eagle2.5-8B`, etc. | ✅︎ | ✅︎ | | `Eagle2_5_VLForConditionalGeneration` | Eagle2.5-VL | T + I<sup>E+</sup> | `nvidia/Eagle2.5-8B`, etc. | ✅︎ | ✅︎ |
...@@ -762,10 +762,8 @@ Some models are supported only via the [Transformers modeling backend](#transfor ...@@ -762,10 +762,8 @@ Some models are supported only via the [Transformers modeling backend](#transfor
|--------------|--------|--------|-------------------|-----------------------------|-----------------------------------------| |--------------|--------|--------|-------------------|-----------------------------|-----------------------------------------|
| `Emu3ForConditionalGeneration` | Emu3 | T + I | `BAAI/Emu3-Chat-hf` | ✅︎ | ✅︎ | | `Emu3ForConditionalGeneration` | Emu3 | T + I | `BAAI/Emu3-Chat-hf` | ✅︎ | ✅︎ |
<sup>^</sup> You need to set the architecture name via `--hf-overrides` to match the one in vLLM. <sup>^</sup> You need to set the architecture name via `--hf-overrides` to match the one in vLLM.</br>
&nbsp;&nbsp;&nbsp;&nbsp;• For example, to use DeepSeek-VL2 series models: <sup>E</sup> Pre-computed embeddings can be inputted for this modality.</br>
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;`--hf-overrides '{"architectures": ["DeepseekVLV2ForCausalLM"]}'`
<sup>E</sup> Pre-computed embeddings can be inputted for this modality.
<sup>+</sup> Multiple items can be inputted per text prompt for this modality. <sup>+</sup> Multiple items can be inputted per text prompt for this modality.
!!! note !!! note
......
...@@ -715,7 +715,6 @@ _MULTIMODAL_EXAMPLE_MODELS = { ...@@ -715,7 +715,6 @@ _MULTIMODAL_EXAMPLE_MODELS = {
extras={"fork": "Isotr0py/deepseek-vl2-tiny"}, extras={"fork": "Isotr0py/deepseek-vl2-tiny"},
max_transformers_version="4.48", max_transformers_version="4.48",
transformers_version_reason={"hf": "HF model is not compatible."}, transformers_version_reason={"hf": "HF model is not compatible."},
hf_overrides={"architectures": ["DeepseekVLV2ForCausalLM"]},
), ),
"DeepseekOCRForCausalLM": _HfExamplesInfo( "DeepseekOCRForCausalLM": _HfExamplesInfo(
"deepseek-ai/DeepSeek-OCR", "deepseek-ai/DeepSeek-OCR",
......
...@@ -95,18 +95,6 @@ cleanup_instances() { ...@@ -95,18 +95,6 @@ cleanup_instances() {
sleep 2 sleep 2
} }
# Handle to get model-specific arguments for deepseek
get_model_args() {
local model_name=$1
local extra_args=""
if [[ "$model_name" == "deepseek-ai/deepseek-vl2-tiny" ]]; then
extra_args="--hf_overrides '{\"architectures\": [\"DeepseekVLV2ForCausalLM\"]}' --trust-remote-code"
fi
echo "$extra_args"
}
get_num_gpus() { get_num_gpus() {
if [[ "$SMI_BIN" == *"nvidia"* ]]; then if [[ "$SMI_BIN" == *"nvidia"* ]]; then
$SMI_BIN --query-gpu=name --format=csv,noheader | wc -l $SMI_BIN --query-gpu=name --format=csv,noheader | wc -l
...@@ -127,9 +115,6 @@ run_tests_for_model() { ...@@ -127,9 +115,6 @@ run_tests_for_model() {
echo "Testing model: $model_name" echo "Testing model: $model_name"
echo "================================" echo "================================"
# Get model-specific arguments
local model_args=$(get_model_args "$model_name")
# Arrays to store all hosts and ports # Arrays to store all hosts and ports
PREFILL_HOSTS=() PREFILL_HOSTS=()
PREFILL_PORTS=() PREFILL_PORTS=()
...@@ -172,11 +157,7 @@ run_tests_for_model() { ...@@ -172,11 +157,7 @@ run_tests_for_model() {
BASE_CMD="${BASE_CMD} --attention-backend=$ATTENTION_BACKEND" BASE_CMD="${BASE_CMD} --attention-backend=$ATTENTION_BACKEND"
fi fi
if [ -n "$model_args" ]; then
FULL_CMD="$BASE_CMD $model_args"
else
FULL_CMD="$BASE_CMD" FULL_CMD="$BASE_CMD"
fi
eval "$FULL_CMD &" eval "$FULL_CMD &"
...@@ -227,11 +208,7 @@ run_tests_for_model() { ...@@ -227,11 +208,7 @@ run_tests_for_model() {
--tensor-parallel-size 1 --enable-expert-parallel" --tensor-parallel-size 1 --enable-expert-parallel"
fi fi
if [ -n "$model_args" ]; then
FULL_CMD="$BASE_CMD $model_args"
else
FULL_CMD="$BASE_CMD" FULL_CMD="$BASE_CMD"
fi
eval "$FULL_CMD &" eval "$FULL_CMD &"
......
...@@ -55,19 +55,6 @@ cleanup_instances() { ...@@ -55,19 +55,6 @@ cleanup_instances() {
sleep 2 sleep 2
} }
# Handle to get model-specific arguments for deepseek
get_model_args() {
local model_name=$1
local extra_args=""
if [[ "$model_name" == "deepseek-ai/deepseek-vl2-tiny" ]]; then
extra_args="--hf_overrides '{\"architectures\": [\"DeepseekVLV2ForCausalLM\"]}' --trust-remote-code"
fi
echo "$extra_args"
}
# Function to run tests for a specific model # Function to run tests for a specific model
run_tests_for_model() { run_tests_for_model() {
local model_name=$1 local model_name=$1
...@@ -75,9 +62,6 @@ run_tests_for_model() { ...@@ -75,9 +62,6 @@ run_tests_for_model() {
echo "Testing model: $model_name" echo "Testing model: $model_name"
echo "================================" echo "================================"
# Get model-specific arguments
local model_args=$(get_model_args "$model_name")
# Start prefill instance # Start prefill instance
PREFILL_PORT=8001 PREFILL_PORT=8001
...@@ -87,11 +71,7 @@ run_tests_for_model() { ...@@ -87,11 +71,7 @@ run_tests_for_model() {
--gpu-memory-utilization 0.2 \ --gpu-memory-utilization 0.2 \
--kv-transfer-config '$KV_CONFIG'" --kv-transfer-config '$KV_CONFIG'"
if [ -n "$model_args" ]; then
FULL_CMD="$BASE_CMD $model_args"
else
FULL_CMD="$BASE_CMD" FULL_CMD="$BASE_CMD"
fi
eval "$FULL_CMD &" eval "$FULL_CMD &"
...@@ -105,11 +85,7 @@ run_tests_for_model() { ...@@ -105,11 +85,7 @@ run_tests_for_model() {
--gpu-memory-utilization 0.2 \ --gpu-memory-utilization 0.2 \
--kv-transfer-config '$KV_CONFIG'" --kv-transfer-config '$KV_CONFIG'"
if [ -n "$model_args" ]; then
FULL_CMD="$BASE_CMD $model_args"
else
FULL_CMD="$BASE_CMD" FULL_CMD="$BASE_CMD"
fi
eval "$FULL_CMD &" eval "$FULL_CMD &"
......
...@@ -89,6 +89,7 @@ class MlpProjectorConfig(PretrainedConfig): ...@@ -89,6 +89,7 @@ class MlpProjectorConfig(PretrainedConfig):
class DeepseekVLV2Config(PretrainedConfig): class DeepseekVLV2Config(PretrainedConfig):
model_type = "deepseek_vl_v2" model_type = "deepseek_vl_v2"
architectures: list[str] | None = None
vision_config: VisionEncoderConfig vision_config: VisionEncoderConfig
projector_config: MlpProjectorConfig projector_config: MlpProjectorConfig
...@@ -105,6 +106,9 @@ class DeepseekVLV2Config(PretrainedConfig): ...@@ -105,6 +106,9 @@ class DeepseekVLV2Config(PretrainedConfig):
): ):
super().__init__(**kwargs) super().__init__(**kwargs)
if self.architectures is None:
self.architectures = ["DeepseekVLV2ForCausalLM"]
vision_config = kwargs.get("vision_config", {}) vision_config = kwargs.get("vision_config", {})
self.vision_config = VisionEncoderConfig(**vision_config) self.vision_config = VisionEncoderConfig(**vision_config)
...@@ -120,8 +124,7 @@ class DeepseekVLV2Config(PretrainedConfig): ...@@ -120,8 +124,7 @@ class DeepseekVLV2Config(PretrainedConfig):
self.vocab_size = self.text_config.vocab_size self.vocab_size = self.text_config.vocab_size
# update model_type for OCR models # update model_type for OCR models
architectures = self.architectures or kwargs.get("architectures", []) if "DeepseekOCRForCausalLM" in self.architectures:
if "DeepseekOCRForCausalLM" in architectures:
self.model_type = "deepseek_ocr" self.model_type = "deepseek_ocr"
elif "DeepseekOCR2ForCausalLM" in architectures: elif "DeepseekOCR2ForCausalLM" in self.architectures:
self.model_type = "deepseek_ocr2" self.model_type = "deepseek_ocr2"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment