Merge tag 'v0.7.2' into v0.7.2-dev

66b809cc · zhuwenwen · 37b63c24 · 0408efc6 · 66b809cc · 66b809cc
Commit 66b809cc authored Feb 08, 2025 by zhuwenwen
20 changed files
--- a/vllm/engine/output_processor/single_step.py
+++ b/vllm/engine/output_processor/single_step.py
+# SPDX-License-Identifier: Apache-2.0
 from typing import List
 from vllm.config import SchedulerConfig

--- a/vllm/engine/output_processor/stop_checker.py
+++ b/vllm/engine/output_processor/stop_checker.py
+# SPDX-License-Identifier: Apache-2.0
 from typing import Callable, List, Optional, Tuple
 from vllm.lora.request import LoRARequest

--- a/vllm/engine/output_processor/util.py
+++ b/vllm/engine/output_processor/util.py
+# SPDX-License-Identifier: Apache-2.0
 from typing import List
 from typing import Sequence as GenericSequence
 from typing import cast

--- a/vllm/engine/protocol.py
+++ b/vllm/engine/protocol.py
+# SPDX-License-Identifier: Apache-2.0
 import asyncio
 from abc import ABC, abstractmethod
 from typing import AsyncGenerator, List, Mapping, Optional

--- a/vllm/entrypoints/api_server.py
+++ b/vllm/entrypoints/api_server.py
+# SPDX-License-Identifier: Apache-2.0
 """
 NOTE: This API server is used only for demonstrating usage of AsyncEngine
 and simple performance benchmarks. It is not intended for production use.

--- a/vllm/entrypoints/chat_utils.py
+++ b/vllm/entrypoints/chat_utils.py
+# SPDX-License-Identifier: Apache-2.0
 import asyncio
 import codecs
 import json
@@ -408,7 +410,7 @@ class BaseMultiModalItemTracker(ABC, Generic[_T]):
                return "<image>"
            if model_type == "mllama":
                return "<|image|>"
-            if model_type == "qwen2_vl":
+            if model_type in ("qwen2_vl", "qwen2_5_vl"):
                return "<|vision_start|><|image_pad|><|vision_end|>"
            if model_type == "molmo":
                return ""
@@ -428,7 +430,7 @@ class BaseMultiModalItemTracker(ABC, Generic[_T]):
                return "(<audio>./</audio>)"
            raise TypeError(f"Unknown model type: {model_type}")
        elif modality == "video":
-            if model_type == "qwen2_vl":
+            if model_type in ("qwen2_vl", "qwen2_5_vl"):
                return "<|vision_start|><|video_pad|><|vision_end|>"
            if model_type in ("minicpmo", "minicpmv"):
                return "(<video>./</video>)"

--- a/vllm/entrypoints/launcher.py
+++ b/vllm/entrypoints/launcher.py
+# SPDX-License-Identifier: Apache-2.0
 import asyncio
 import signal
 from http import HTTPStatus

--- a/vllm/entrypoints/llm.py
+++ b/vllm/entrypoints/llm.py
+# SPDX-License-Identifier: Apache-2.0
 import itertools
 import warnings
 from contextlib import contextmanager

--- a/vllm/entrypoints/logger.py
+++ b/vllm/entrypoints/logger.py
+# SPDX-License-Identifier: Apache-2.0
 from typing import List, Optional, Union
 from vllm.logger import init_logger

--- a/vllm/entrypoints/openai/api_server.py
+++ b/vllm/entrypoints/openai/api_server.py
+# SPDX-License-Identifier: Apache-2.0
 import asyncio
 import atexit
 import gc

--- a/vllm/entrypoints/openai/cli_args.py
+++ b/vllm/entrypoints/openai/cli_args.py
+# SPDX-License-Identifier: Apache-2.0
 """
 This file contains the command line arguments for the vLLM's
 OpenAI-compatible server. It is kept in a separate file for documentation

--- a/vllm/entrypoints/openai/logits_processors.py
+++ b/vllm/entrypoints/openai/logits_processors.py
+# SPDX-License-Identifier: Apache-2.0
 from functools import lru_cache, partial
 from typing import Dict, FrozenSet, Iterable, List, Optional, Union

--- a/vllm/entrypoints/openai/protocol.py
+++ b/vllm/entrypoints/openai/protocol.py
+# SPDX-License-Identifier: Apache-2.0
 # Adapted from
 # https://github.com/lm-sys/FastChat/blob/168ccc29d3f7edc50823016105c024fe2282732a/fastchat/protocol/openai_api_protocol.py
 import re

--- a/vllm/entrypoints/openai/reasoning_parsers/__init__.py
+++ b/vllm/entrypoints/openai/reasoning_parsers/__init__.py
+# SPDX-License-Identifier: Apache-2.0
 from .abs_reasoning_parsers import ReasoningParser, ReasoningParserManager
 from .deepseek_r1_reasoning_parser import DeepSeekR1ReasoningParser

--- a/vllm/entrypoints/openai/reasoning_parsers/abs_reasoning_parsers.py
+++ b/vllm/entrypoints/openai/reasoning_parsers/abs_reasoning_parsers.py
+# SPDX-License-Identifier: Apache-2.0
 import os
 from functools import cached_property
 from typing import Callable, Dict, List, Optional, Sequence, Tuple, Type, Union

--- a/vllm/entrypoints/openai/reasoning_parsers/deepseek_r1_reasoning_parser.py
+++ b/vllm/entrypoints/openai/reasoning_parsers/deepseek_r1_reasoning_parser.py
+# SPDX-License-Identifier: Apache-2.0
 import re
 from typing import Optional, Sequence, Tuple, Union

--- a/vllm/entrypoints/openai/run_batch.py
+++ b/vllm/entrypoints/openai/run_batch.py
+# SPDX-License-Identifier: Apache-2.0
 import asyncio
 from http import HTTPStatus
 from io import StringIO

--- a/vllm/entrypoints/openai/serving_chat.py
+++ b/vllm/entrypoints/openai/serving_chat.py
+# SPDX-License-Identifier: Apache-2.0
 import asyncio
 import json
 import time

--- a/vllm/entrypoints/openai/serving_completion.py
+++ b/vllm/entrypoints/openai/serving_completion.py
+# SPDX-License-Identifier: Apache-2.0
 import asyncio
 import time
 from typing import AsyncGenerator, AsyncIterator, Dict, List, Optional

--- a/vllm/entrypoints/openai/serving_embedding.py
+++ b/vllm/entrypoints/openai/serving_embedding.py
+# SPDX-License-Identifier: Apache-2.0
 import asyncio
 import base64
 import time