Unverified Commit e09d1753 authored by Harry Mellor's avatar Harry Mellor Committed by GitHub
Browse files

Remove Python 3.9 support ahead of PyTorch 2.9 in v0.11.1 (#26416)


Signed-off-by: default avatarHarry Mellor <19981378+hmellor@users.noreply.github.com>
parent 4ba88757
...@@ -55,11 +55,6 @@ repos: ...@@ -55,11 +55,6 @@ repos:
types_or: [python, pyi] types_or: [python, pyi]
require_serial: true require_serial: true
additional_dependencies: [mypy==1.11.1, regex, types-cachetools, types-setuptools, types-PyYAML, types-requests, types-torch, pydantic] additional_dependencies: [mypy==1.11.1, regex, types-cachetools, types-setuptools, types-PyYAML, types-requests, types-torch, pydantic]
- id: mypy-3.9 # TODO: Use https://github.com/pre-commit/mirrors-mypy when mypy setup is less awkward
name: Run mypy for Python 3.9
entry: python tools/pre_commit/mypy.py 1 "3.9"
<<: *mypy_common
stages: [manual] # Only run in CI
- id: mypy-3.10 # TODO: Use https://github.com/pre-commit/mirrors-mypy when mypy setup is less awkward - id: mypy-3.10 # TODO: Use https://github.com/pre-commit/mirrors-mypy when mypy setup is less awkward
name: Run mypy for Python 3.10 name: Run mypy for Python 3.10
entry: python tools/pre_commit/mypy.py 1 "3.10" entry: python tools/pre_commit/mypy.py 1 "3.10"
...@@ -75,6 +70,11 @@ repos: ...@@ -75,6 +70,11 @@ repos:
entry: python tools/pre_commit/mypy.py 1 "3.12" entry: python tools/pre_commit/mypy.py 1 "3.12"
<<: *mypy_common <<: *mypy_common
stages: [manual] # Only run in CI stages: [manual] # Only run in CI
- id: mypy-3.13 # TODO: Use https://github.com/pre-commit/mirrors-mypy when mypy setup is less awkward
name: Run mypy for Python 3.13
entry: python tools/pre_commit/mypy.py 1 "3.13"
<<: *mypy_common
stages: [manual] # Only run in CI
- id: shellcheck - id: shellcheck
name: Lint shell scripts name: Lint shell scripts
entry: tools/shellcheck.sh entry: tools/shellcheck.sh
......
...@@ -34,7 +34,7 @@ install(CODE "set(CMAKE_INSTALL_LOCAL_ONLY TRUE)" ALL_COMPONENTS) ...@@ -34,7 +34,7 @@ install(CODE "set(CMAKE_INSTALL_LOCAL_ONLY TRUE)" ALL_COMPONENTS)
# Supported python versions. These versions will be searched in order, the # Supported python versions. These versions will be searched in order, the
# first match will be selected. These should be kept in sync with setup.py. # first match will be selected. These should be kept in sync with setup.py.
# #
set(PYTHON_SUPPORTED_VERSIONS "3.9" "3.10" "3.11" "3.12" "3.13") set(PYTHON_SUPPORTED_VERSIONS "3.10" "3.11" "3.12" "3.13")
# Supported AMD GPU architectures. # Supported AMD GPU architectures.
set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx942;gfx950;gfx1030;gfx1100;gfx1101;gfx1200;gfx1201;gfx1150;gfx1151") set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx942;gfx950;gfx1030;gfx1100;gfx1101;gfx1200;gfx1201;gfx1150;gfx1151")
......
...@@ -13,7 +13,7 @@ from datetime import datetime ...@@ -13,7 +13,7 @@ from datetime import datetime
from enum import Enum from enum import Enum
from http import HTTPStatus from http import HTTPStatus
from statistics import mean from statistics import mean
from typing import NamedTuple, Optional, Union from typing import NamedTuple, Union
import aiohttp # type: ignore import aiohttp # type: ignore
import numpy as np # type: ignore import numpy as np # type: ignore
...@@ -46,9 +46,9 @@ class ConversationSampling(str, Enum): ...@@ -46,9 +46,9 @@ class ConversationSampling(str, Enum):
class ClientArgs(NamedTuple): class ClientArgs(NamedTuple):
seed: int seed: int
max_num_requests: Optional[int] max_num_requests: int | None
skip_first_turn: bool skip_first_turn: bool
max_turns: Optional[int] max_turns: int | None
max_active_conversations: int max_active_conversations: int
verbose: bool verbose: bool
print_content: bool print_content: bool
...@@ -109,9 +109,9 @@ class RequestStats(NamedTuple): ...@@ -109,9 +109,9 @@ class RequestStats(NamedTuple):
class MetricStats: class MetricStats:
def __init__(self) -> None: def __init__(self) -> None:
self.min: Optional[float] = None self.min: float | None = None
self.max: Optional[float] = None self.max: float | None = None
self.avg: Optional[float] = None self.avg: float | None = None
self.sum = 0.0 self.sum = 0.0
self.count = 0 self.count = 0
...@@ -143,7 +143,7 @@ class MovingAverage: ...@@ -143,7 +143,7 @@ class MovingAverage:
self.index = 0 self.index = 0
self.sum = 0.0 self.sum = 0.0
self.count = 0 self.count = 0
self.avg: Optional[float] = None self.avg: float | None = None
def update(self, new_value: float) -> None: def update(self, new_value: float) -> None:
if self.count < self.window_size: if self.count < self.window_size:
...@@ -198,14 +198,6 @@ class DebugStats: ...@@ -198,14 +198,6 @@ class DebugStats:
self.logger.info("-" * 50) self.logger.info("-" * 50)
# Must support Python 3.8, we can't use str.removeprefix(prefix)
# introduced in Python 3.9
def remove_prefix(text: str, prefix: str) -> str:
if text.startswith(prefix):
return text[len(prefix) :]
return text
def nanosec_to_millisec(value: float) -> float: def nanosec_to_millisec(value: float) -> float:
return value / 1000000.0 return value / 1000000.0
...@@ -220,8 +212,8 @@ async def send_request( ...@@ -220,8 +212,8 @@ async def send_request(
chat_url: str, chat_url: str,
model: str, model: str,
stream: bool = True, stream: bool = True,
min_tokens: Optional[int] = None, min_tokens: int | None = None,
max_tokens: Optional[int] = None, max_tokens: int | None = None,
) -> ServerResponse: ) -> ServerResponse:
payload = { payload = {
"model": model, "model": model,
...@@ -250,9 +242,9 @@ async def send_request( ...@@ -250,9 +242,9 @@ async def send_request(
timeout = aiohttp.ClientTimeout(total=timeout_sec) timeout = aiohttp.ClientTimeout(total=timeout_sec)
valid_response = True valid_response = True
ttft: Optional[float] = None ttft: float | None = None
chunk_delay: list[int] = [] chunk_delay: list[int] = []
latency: Optional[float] = None latency: float | None = None
first_chunk = "" first_chunk = ""
generated_text = "" generated_text = ""
...@@ -269,7 +261,7 @@ async def send_request( ...@@ -269,7 +261,7 @@ async def send_request(
if not chunk_bytes: if not chunk_bytes:
continue continue
chunk = remove_prefix(chunk_bytes.decode("utf-8"), "data: ") chunk = chunk_bytes.decode("utf-8").removeprefix("data: ")
if chunk == "[DONE]": if chunk == "[DONE]":
# End of stream # End of stream
latency = time.perf_counter_ns() - start_time latency = time.perf_counter_ns() - start_time
...@@ -364,7 +356,7 @@ async def send_turn( ...@@ -364,7 +356,7 @@ async def send_turn(
req_args: RequestArgs, req_args: RequestArgs,
verbose: bool, verbose: bool,
verify_output: bool, verify_output: bool,
) -> Optional[RequestStats]: ) -> RequestStats | None:
assert messages_to_use > 0 assert messages_to_use > 0
assert messages_to_use <= len(conversation_messages) assert messages_to_use <= len(conversation_messages)
...@@ -769,7 +761,7 @@ def get_client_config( ...@@ -769,7 +761,7 @@ def get_client_config(
"Number of conversations must be equal or larger than the number of clients" "Number of conversations must be equal or larger than the number of clients"
) )
max_req_per_client: Optional[int] = None max_req_per_client: int | None = None
if args.max_num_requests is not None: if args.max_num_requests is not None:
# Max number of requests per client # Max number of requests per client
req_per_client = args.max_num_requests // args.num_clients req_per_client = args.max_num_requests // args.num_clients
...@@ -1032,7 +1024,7 @@ def process_statistics( ...@@ -1032,7 +1024,7 @@ def process_statistics(
warmup_percentages: list[float], warmup_percentages: list[float],
test_params: dict, test_params: dict,
verbose: bool, verbose: bool,
gen_conv_args: Optional[GenConvArgs] = None, gen_conv_args: GenConvArgs | None = None,
excel_output: bool = False, excel_output: bool = False,
) -> None: ) -> None:
if len(client_metrics) == 0: if len(client_metrics) == 0:
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
# vllm-dev: used for development # vllm-dev: used for development
# #
# Build arguments: # Build arguments:
# PYTHON_VERSION=3.12 (default)|3.11|3.10|3.9 # PYTHON_VERSION=3.13|3.12 (default)|3.11|3.10
# VLLM_CPU_DISABLE_AVX512=false (default)|true # VLLM_CPU_DISABLE_AVX512=false (default)|true
# VLLM_CPU_AVX512BF16=false (default)|true # VLLM_CPU_AVX512BF16=false (default)|true
# VLLM_CPU_AVX512VNNI=false (default)|true # VLLM_CPU_AVX512VNNI=false (default)|true
......
...@@ -54,7 +54,7 @@ For more details about installing from source and installing for other hardware, ...@@ -54,7 +54,7 @@ For more details about installing from source and installing for other hardware,
For an optimized workflow when iterating on C++/CUDA kernels, see the [Incremental Compilation Workflow](./incremental_build.md) for recommendations. For an optimized workflow when iterating on C++/CUDA kernels, see the [Incremental Compilation Workflow](./incremental_build.md) for recommendations.
!!! tip !!! tip
vLLM is compatible with Python versions 3.9 to 3.12. However, vLLM's default [Dockerfile](gh-file:docker/Dockerfile) ships with Python 3.12 and tests in CI (except `mypy`) are run with Python 3.12. vLLM is compatible with Python versions 3.10 to 3.13. However, vLLM's default [Dockerfile](gh-file:docker/Dockerfile) ships with Python 3.12 and tests in CI (except `mypy`) are run with Python 3.12.
Therefore, we recommend developing with Python 3.12 to minimise the chance of your local environment clashing with our CI environment. Therefore, we recommend developing with Python 3.12 to minimise the chance of your local environment clashing with our CI environment.
...@@ -83,7 +83,7 @@ vLLM's `pre-commit` hooks will now run automatically every time you commit. ...@@ -83,7 +83,7 @@ vLLM's `pre-commit` hooks will now run automatically every time you commit.
```bash ```bash
pre-commit run --hook-stage manual markdownlint pre-commit run --hook-stage manual markdownlint
pre-commit run --hook-stage manual mypy-3.9 pre-commit run --hook-stage manual mypy-3.10
``` ```
### Documentation ### Documentation
......
...@@ -20,7 +20,7 @@ vLLM is a Python library that supports the following CPU variants. Select your C ...@@ -20,7 +20,7 @@ vLLM is a Python library that supports the following CPU variants. Select your C
## Requirements ## Requirements
- Python: 3.9 -- 3.12 - Python: 3.10 -- 3.13
=== "Intel/AMD x86" === "Intel/AMD x86"
......
...@@ -17,7 +17,7 @@ vLLM is a Python library that supports the following GPU variants. Select your G ...@@ -17,7 +17,7 @@ vLLM is a Python library that supports the following GPU variants. Select your G
## Requirements ## Requirements
- OS: Linux - OS: Linux
- Python: 3.9 -- 3.12 - Python: 3.10 -- 3.13
!!! note !!! note
vLLM does not support Windows natively. To run vLLM on Windows, you can use the Windows Subsystem for Linux (WSL) with a compatible Linux distribution, or use some community-maintained forks, e.g. [https://github.com/SystemPanic/vllm-windows](https://github.com/SystemPanic/vllm-windows). vLLM does not support Windows natively. To run vLLM on Windows, you can use the Windows Subsystem for Linux (WSL) with a compatible Linux distribution, or use some community-maintained forks, e.g. [https://github.com/SystemPanic/vllm-windows](https://github.com/SystemPanic/vllm-windows).
......
...@@ -8,7 +8,7 @@ This guide will help you quickly get started with vLLM to perform: ...@@ -8,7 +8,7 @@ This guide will help you quickly get started with vLLM to perform:
## Prerequisites ## Prerequisites
- OS: Linux - OS: Linux
- Python: 3.9 -- 3.13 - Python: 3.10 -- 3.13
## Installation ## Installation
......
[project] [project]
name = "examples-online-structured-outputs" name = "examples-online-structured-outputs"
requires-python = ">=3.9, <3.13" requires-python = ">=3.10, <3.14"
dependencies = ["openai==1.78.1", "pydantic==2.11.4"] dependencies = ["openai==1.78.1", "pydantic==2.11.4"]
version = "0.0.0" version = "0.0.0"
......
...@@ -20,7 +20,6 @@ license-files = ["LICENSE"] ...@@ -20,7 +20,6 @@ license-files = ["LICENSE"]
readme = "README.md" readme = "README.md"
description = "A high-throughput and memory-efficient inference and serving engine for LLMs" description = "A high-throughput and memory-efficient inference and serving engine for LLMs"
classifiers = [ classifiers = [
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.12",
...@@ -31,7 +30,7 @@ classifiers = [ ...@@ -31,7 +30,7 @@ classifiers = [
"Topic :: Scientific/Engineering :: Artificial Intelligence", "Topic :: Scientific/Engineering :: Artificial Intelligence",
"Topic :: Scientific/Engineering :: Information Analysis", "Topic :: Scientific/Engineering :: Information Analysis",
] ]
requires-python = ">=3.9,<3.14" requires-python = ">=3.10,<3.14"
dynamic = [ "version", "dependencies", "optional-dependencies"] dynamic = [ "version", "dependencies", "optional-dependencies"]
[project.urls] [project.urls]
...@@ -79,12 +78,18 @@ ignore = [ ...@@ -79,12 +78,18 @@ ignore = [
"F405", "F403", "F405", "F403",
# lambda expression assignment # lambda expression assignment
"E731", "E731",
# zip without `strict=`
"B905",
# Loop control variable not used within loop body # Loop control variable not used within loop body
"B007", "B007",
# f-string format # f-string format
"UP032", "UP032",
# Can remove once 3.10+ is the minimum Python version # Can remove once 3.10+ is the minimum Python version
"UP007", "UP007",
"UP027",
"UP035",
"UP038",
"UP045",
] ]
[tool.ruff.format] [tool.ruff.format]
......
# Common dependencies # Common dependencies
-r common.txt -r common.txt
numba == 0.60.0; python_version == '3.9' and platform_machine != "s390x" # v0.61 doesn't support Python 3.9. Required for N-gram speculative decoding numba == 0.61.2; platform_machine != "s390x" # Required for N-gram speculative decoding
numba == 0.61.2; python_version > '3.9' and platform_machine != "s390x"
# Dependencies for CPUs # Dependencies for CPUs
packaging>=24.2 packaging>=24.2
......
# Common dependencies # Common dependencies
-r common.txt -r common.txt
numba == 0.60.0; python_version == '3.9' # v0.61 doesn't support Python 3.9. Required for N-gram speculative decoding numba == 0.61.2 # Required for N-gram speculative decoding
numba == 0.61.2; python_version > '3.9'
# Dependencies for NVIDIA GPUs # Dependencies for NVIDIA GPUs
ray[cgraph]>=2.48.0 # Ray Compiled Graph, required for pipeline parallelism in V1. ray[cgraph]>=2.48.0 # Ray Compiled Graph, required for pipeline parallelism in V1.
......
...@@ -40,8 +40,7 @@ buildkite-test-collector==0.1.9 ...@@ -40,8 +40,7 @@ buildkite-test-collector==0.1.9
genai_perf==0.0.8 genai_perf==0.0.8
tritonclient==2.51.0 tritonclient==2.51.0
numba == 0.60.0; python_version == '3.9' # v0.61 doesn't support Python 3.9. Required for N-gram speculative decoding numba == 0.61.2 # Required for N-gram speculative decoding
numba == 0.61.2; python_version > '3.9'
numpy numpy
runai-model-streamer[s3,gcs]==0.14.0 runai-model-streamer[s3,gcs]==0.14.0
fastsafetensors>=0.1.10 fastsafetensors>=0.1.10
......
# Common dependencies # Common dependencies
-r common.txt -r common.txt
numba == 0.60.0; python_version == '3.9' # v0.61 doesn't support Python 3.9. Required for N-gram speculative decoding numba == 0.61.2 # Required for N-gram speculative decoding
numba == 0.61.2; python_version > '3.9'
# Dependencies for AMD GPUs # Dependencies for AMD GPUs
datasets datasets
......
...@@ -48,8 +48,7 @@ buildkite-test-collector==0.1.9 ...@@ -48,8 +48,7 @@ buildkite-test-collector==0.1.9
genai_perf==0.0.8 genai_perf==0.0.8
tritonclient==2.51.0 tritonclient==2.51.0
numba == 0.60.0; python_version == '3.9' # v0.61 doesn't support Python 3.9. Required for N-gram speculative decoding numba == 0.61.2 # Required for N-gram speculative decoding
numba == 0.61.2; python_version > '3.9'
numpy numpy
runai-model-streamer[s3,gcs]==0.14.0 runai-model-streamer[s3,gcs]==0.14.0
fastsafetensors>=0.1.10 fastsafetensors>=0.1.10
......
...@@ -9,7 +9,7 @@ setuptools>=77.0.3,<80.0.0 ...@@ -9,7 +9,7 @@ setuptools>=77.0.3,<80.0.0
wheel wheel
jinja2>=3.1.6 jinja2>=3.1.6
datasets # for benchmark scripts datasets # for benchmark scripts
numba == 0.60.0 # v0.61 doesn't support Python 3.9. Required for N-gram speculative decoding numba == 0.61.2 # Required for N-gram speculative decoding
nixl==0.3.0 # for PD disaggregation nixl==0.3.0 # for PD disaggregation
torch==2.8.0+xpu torch==2.8.0+xpu
torchaudio torchaudio
......
...@@ -8,6 +8,7 @@ and that each field has a docstring. ...@@ -8,6 +8,7 @@ and that each field has a docstring.
import ast import ast
import inspect import inspect
import sys import sys
from itertools import pairwise
import regex as re import regex as re
...@@ -20,19 +21,6 @@ def get_attr_docs(cls_node: ast.ClassDef) -> dict[str, str]: ...@@ -20,19 +21,6 @@ def get_attr_docs(cls_node: ast.ClassDef) -> dict[str, str]:
https://davidism.com/mit-license/ https://davidism.com/mit-license/
""" """
def pairwise(iterable):
"""
Manually implement https://docs.python.org/3/library/itertools.html#itertools.pairwise
Can be removed when Python 3.9 support is dropped.
"""
iterator = iter(iterable)
a = next(iterator, None)
for b in iterator:
yield a, b
a = b
out = {} out = {}
# Consider each pair of nodes. # Consider each pair of nodes.
......
...@@ -7,6 +7,7 @@ import inspect ...@@ -7,6 +7,7 @@ import inspect
import textwrap import textwrap
from collections.abc import Iterable from collections.abc import Iterable
from dataclasses import MISSING, Field, field, fields, is_dataclass, replace from dataclasses import MISSING, Field, field, fields, is_dataclass, replace
from itertools import pairwise
from typing import TYPE_CHECKING, Any, Protocol, TypeVar from typing import TYPE_CHECKING, Any, Protocol, TypeVar
import regex as re import regex as re
...@@ -102,19 +103,6 @@ def get_attr_docs(cls: type[Any]) -> dict[str, str]: ...@@ -102,19 +103,6 @@ def get_attr_docs(cls: type[Any]) -> dict[str, str]:
https://davidism.com/mit-license/ https://davidism.com/mit-license/
""" """
def pairwise(iterable):
"""
Manually implement https://docs.python.org/3/library/itertools.html#itertools.pairwise
Can be removed when Python 3.9 support is dropped.
"""
iterator = iter(iterable)
a = next(iterator, None)
for b in iterator:
yield a, b
a = b
try: try:
cls_node = ast.parse(textwrap.dedent(inspect.getsource(cls))).body[0] cls_node = ast.parse(textwrap.dedent(inspect.getsource(cls))).body[0]
except (OSError, KeyError, TypeError): except (OSError, KeyError, TypeError):
......
...@@ -15,12 +15,7 @@ plugins_loaded = False ...@@ -15,12 +15,7 @@ plugins_loaded = False
def load_plugins_by_group(group: str) -> dict[str, Callable[[], Any]]: def load_plugins_by_group(group: str) -> dict[str, Callable[[], Any]]:
import sys from importlib.metadata import entry_points
if sys.version_info < (3, 10):
from importlib_metadata import entry_points
else:
from importlib.metadata import entry_points
allowed_plugins = envs.VLLM_PLUGINS allowed_plugins = envs.VLLM_PLUGINS
......
...@@ -55,12 +55,7 @@ BUILTIN_LOGITS_PROCESSORS: list[type[LogitsProcessor]] = [ ...@@ -55,12 +55,7 @@ BUILTIN_LOGITS_PROCESSORS: list[type[LogitsProcessor]] = [
def _load_logitsprocs_plugins() -> list[type[LogitsProcessor]]: def _load_logitsprocs_plugins() -> list[type[LogitsProcessor]]:
"""Load all installed logit processor plugins""" """Load all installed logit processor plugins"""
import sys from importlib.metadata import entry_points
if sys.version_info < (3, 10):
from importlib_metadata import entry_points
else:
from importlib.metadata import entry_points
installed_logitsprocs_plugins = entry_points(group=LOGITSPROCS_GROUP) installed_logitsprocs_plugins = entry_points(group=LOGITSPROCS_GROUP)
if len(installed_logitsprocs_plugins) == 0: if len(installed_logitsprocs_plugins) == 0:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment