Unverified Commit 21063c11 authored by Aaron Pham's avatar Aaron Pham Committed by GitHub
Browse files

[CI/Build] drop support for Python 3.8 EOL (#8464)


Signed-off-by: default avatarAaron Pham <contact@aarnphm.xyz>
parent 4be3a451
...@@ -56,7 +56,7 @@ serving_column_mapping = { ...@@ -56,7 +56,7 @@ serving_column_mapping = {
def read_markdown(file): def read_markdown(file):
if os.path.exists(file): if os.path.exists(file):
with open(file, "r") as f: with open(file) as f:
return f.read() + "\n" return f.read() + "\n"
else: else:
return f"{file} not found.\n" return f"{file} not found.\n"
...@@ -75,14 +75,14 @@ if __name__ == "__main__": ...@@ -75,14 +75,14 @@ if __name__ == "__main__":
# collect results # collect results
for test_file in results_folder.glob("*.json"): for test_file in results_folder.glob("*.json"):
with open(test_file, "r") as f: with open(test_file) as f:
raw_result = json.loads(f.read()) raw_result = json.loads(f.read())
if "serving" in str(test_file): if "serving" in str(test_file):
# this result is generated via `benchmark_serving.py` # this result is generated via `benchmark_serving.py`
# attach the benchmarking command to raw_result # attach the benchmarking command to raw_result
with open(test_file.with_suffix(".commands"), "r") as f: with open(test_file.with_suffix(".commands")) as f:
command = json.loads(f.read()) command = json.loads(f.read())
raw_result.update(command) raw_result.update(command)
...@@ -97,7 +97,7 @@ if __name__ == "__main__": ...@@ -97,7 +97,7 @@ if __name__ == "__main__":
# this result is generated via `benchmark_latency.py` # this result is generated via `benchmark_latency.py`
# attach the benchmarking command to raw_result # attach the benchmarking command to raw_result
with open(test_file.with_suffix(".commands"), "r") as f: with open(test_file.with_suffix(".commands")) as f:
command = json.loads(f.read()) command = json.loads(f.read())
raw_result.update(command) raw_result.update(command)
...@@ -119,7 +119,7 @@ if __name__ == "__main__": ...@@ -119,7 +119,7 @@ if __name__ == "__main__":
# this result is generated via `benchmark_throughput.py` # this result is generated via `benchmark_throughput.py`
# attach the benchmarking command to raw_result # attach the benchmarking command to raw_result
with open(test_file.with_suffix(".commands"), "r") as f: with open(test_file.with_suffix(".commands")) as f:
command = json.loads(f.read()) command = json.loads(f.read())
raw_result.update(command) raw_result.update(command)
......
...@@ -72,7 +72,7 @@ def main(args): ...@@ -72,7 +72,7 @@ def main(args):
# collect results # collect results
for test_file in results_folder.glob("*_nightly_results.json"): for test_file in results_folder.glob("*_nightly_results.json"):
with open(test_file, "r") as f: with open(test_file) as f:
results = results + json.loads(f.read()) results = results + json.loads(f.read())
# generate markdown table # generate markdown table
...@@ -80,7 +80,7 @@ def main(args): ...@@ -80,7 +80,7 @@ def main(args):
md_table = tabulate(df, headers='keys', tablefmt='pipe', showindex=False) md_table = tabulate(df, headers='keys', tablefmt='pipe', showindex=False)
with open(args.description, "r") as f: with open(args.description) as f:
description = f.read() description = f.read()
description = description.format( description = description.format(
......
...@@ -36,11 +36,11 @@ if __name__ == "__main__": ...@@ -36,11 +36,11 @@ if __name__ == "__main__":
# collect results # collect results
for test_file in results_folder.glob("*.json"): for test_file in results_folder.glob("*.json"):
with open(test_file, "r") as f: with open(test_file) as f:
raw_result = json.loads(f.read()) raw_result = json.loads(f.read())
# attach the benchmarking command to raw_result # attach the benchmarking command to raw_result
with open(test_file.with_suffix(".commands"), "r") as f: with open(test_file.with_suffix(".commands")) as f:
command = json.loads(f.read()) command = json.loads(f.read())
raw_result.update(command) raw_result.update(command)
......
...@@ -25,7 +25,7 @@ jobs: ...@@ -25,7 +25,7 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
strategy: strategy:
matrix: matrix:
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] python-version: ["3.9", "3.10", "3.11", "3.12"]
steps: steps:
- uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 - uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1
- name: Set up Python ${{ matrix.python-version }} - name: Set up Python ${{ matrix.python-version }}
......
...@@ -48,7 +48,7 @@ jobs: ...@@ -48,7 +48,7 @@ jobs:
fail-fast: false fail-fast: false
matrix: matrix:
os: ['ubuntu-20.04'] os: ['ubuntu-20.04']
python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] python-version: ['3.9', '3.10', '3.11', '3.12']
pytorch-version: ['2.4.0'] # Must be the most recent version that meets requirements-cuda.txt. pytorch-version: ['2.4.0'] # Must be the most recent version that meets requirements-cuda.txt.
cuda-version: ['11.8', '12.1'] cuda-version: ['11.8', '12.1']
......
...@@ -6,7 +6,7 @@ version: 2 ...@@ -6,7 +6,7 @@ version: 2
build: build:
os: ubuntu-22.04 os: ubuntu-22.04
tools: tools:
python: "3.8" python: '3.9'
sphinx: sphinx:
configuration: docs/source/conf.py configuration: docs/source/conf.py
...@@ -19,4 +19,3 @@ formats: [] ...@@ -19,4 +19,3 @@ formats: []
python: python:
install: install:
- requirements: docs/requirements-docs.txt - requirements: docs/requirements-docs.txt
...@@ -79,7 +79,7 @@ async def async_request_tgi( ...@@ -79,7 +79,7 @@ async def async_request_tgi(
# any data, we should skip it. # any data, we should skip it.
if chunk_bytes.startswith(":"): if chunk_bytes.startswith(":"):
continue continue
chunk = remove_prefix(chunk_bytes, "data:") chunk = chunk_bytes.removeprefix("data:")
data = json.loads(chunk) data = json.loads(chunk)
timestamp = time.perf_counter() timestamp = time.perf_counter()
...@@ -144,7 +144,7 @@ async def async_request_trt_llm( ...@@ -144,7 +144,7 @@ async def async_request_trt_llm(
if not chunk_bytes: if not chunk_bytes:
continue continue
chunk = remove_prefix(chunk_bytes.decode("utf-8"), chunk = chunk_bytes.decode("utf-8").removeprefix(
"data:") "data:")
data = json.loads(chunk) data = json.loads(chunk)
...@@ -261,7 +261,7 @@ async def async_request_openai_completions( ...@@ -261,7 +261,7 @@ async def async_request_openai_completions(
if not chunk_bytes: if not chunk_bytes:
continue continue
chunk = remove_prefix(chunk_bytes.decode("utf-8"), chunk = chunk_bytes.decode("utf-8").removeprefix(
"data: ") "data: ")
if chunk == "[DONE]": if chunk == "[DONE]":
latency = time.perf_counter() - st latency = time.perf_counter() - st
...@@ -349,7 +349,7 @@ async def async_request_openai_chat_completions( ...@@ -349,7 +349,7 @@ async def async_request_openai_chat_completions(
if not chunk_bytes: if not chunk_bytes:
continue continue
chunk = remove_prefix(chunk_bytes.decode("utf-8"), chunk = chunk_bytes.decode("utf-8").removeprefix(
"data: ") "data: ")
if chunk == "[DONE]": if chunk == "[DONE]":
latency = time.perf_counter() - st latency = time.perf_counter() - st
...@@ -389,14 +389,6 @@ async def async_request_openai_chat_completions( ...@@ -389,14 +389,6 @@ async def async_request_openai_chat_completions(
return output return output
# Since vllm must support Python 3.8, we can't use str.removeprefix(prefix)
# introduced in Python 3.9
def remove_prefix(text: str, prefix: str) -> str:
if text.startswith(prefix):
return text[len(prefix):]
return text
def get_model(pretrained_model_name_or_path: str) -> str: def get_model(pretrained_model_name_or_path: str) -> str:
if os.getenv('VLLM_USE_MODELSCOPE', 'False').lower() == 'true': if os.getenv('VLLM_USE_MODELSCOPE', 'False').lower() == 'true':
from modelscope import snapshot_download from modelscope import snapshot_download
......
...@@ -269,10 +269,10 @@ def run_square_bench(args): ...@@ -269,10 +269,10 @@ def run_square_bench(args):
def run_range_bench(args): def run_range_bench(args):
m_start, k_start, n_start = [int(x) for x in args.dim_start.split(",")] m_start, k_start, n_start = (int(x) for x in args.dim_start.split(","))
m_end, k_end, n_end = [int(x) for x in args.dim_end.split(",")] m_end, k_end, n_end = (int(x) for x in args.dim_end.split(","))
m_increment, k_increment, n_increment = \ m_increment, k_increment, n_increment = \
[int(x) for x in args.dim_increment.split(",")] (int(x) for x in args.dim_increment.split(","))
Ms = list(range(m_start, m_end + 1, m_increment)) Ms = list(range(m_start, m_end + 1, m_increment))
Ks = list(range(k_start, k_end + 1, k_increment)) Ks = list(range(k_start, k_end + 1, k_increment))
Ns = list(range(n_start, n_end + 1, n_increment)) Ns = list(range(n_start, n_end + 1, n_increment))
......
...@@ -468,7 +468,7 @@ def generate(): ...@@ -468,7 +468,7 @@ def generate():
impl_configs = [] impl_configs = []
GPTQ_kernel_type_configs = list( GPTQ_kernel_type_configs = list(
(TypeConfig( TypeConfig(
element_a=element_a, element_a=element_a,
element_b=element_b, element_b=element_b,
element_b_scale=element_a, element_b_scale=element_a,
...@@ -476,7 +476,7 @@ def generate(): ...@@ -476,7 +476,7 @@ def generate():
element_d=element_a, element_d=element_a,
accumulator=DataType.f32, accumulator=DataType.f32,
) for element_b in (VLLMDataType.u4b8, VLLMDataType.u8b128) ) for element_b in (VLLMDataType.u4b8, VLLMDataType.u8b128)
for element_a in (DataType.f16, DataType.bf16))) for element_a in (DataType.f16, DataType.bf16))
GPTQ_kernel_specializations = [ GPTQ_kernel_specializations = [
Specialization(with_C=False, with_zeropoints=False, with_scales=True) Specialization(with_C=False, with_zeropoints=False, with_scales=True)
...@@ -490,7 +490,7 @@ def generate(): ...@@ -490,7 +490,7 @@ def generate():
] ]
AWQ_kernel_type_configs = list( AWQ_kernel_type_configs = list(
(TypeConfig( TypeConfig(
element_a=element_a, element_a=element_a,
element_b=element_b, element_b=element_b,
element_b_scale=element_a, element_b_scale=element_a,
...@@ -498,7 +498,7 @@ def generate(): ...@@ -498,7 +498,7 @@ def generate():
element_d=element_a, element_d=element_a,
accumulator=DataType.f32, accumulator=DataType.f32,
) for element_b in (DataType.u4, DataType.u8) ) for element_b in (DataType.u4, DataType.u8)
for element_a in (DataType.f16, DataType.bf16))) for element_a in (DataType.f16, DataType.bf16))
AWQ_kernel_specializations = [ AWQ_kernel_specializations = [
Specialization(with_C=False, with_zeropoints=True, with_scales=True) Specialization(with_C=False, with_zeropoints=True, with_scales=True)
......
...@@ -10,7 +10,7 @@ Requirements ...@@ -10,7 +10,7 @@ Requirements
============ ============
* OS: Linux * OS: Linux
* Python: 3.8 - 3.12 * Python: 3.9 -- 3.12
* GPU: compute capability 7.0 or higher (e.g., V100, T4, RTX20xx, A100, L4, H100, etc.) * GPU: compute capability 7.0 or higher (e.g., V100, T4, RTX20xx, A100, L4, H100, etc.)
Install released versions Install released versions
......
...@@ -34,7 +34,7 @@ select = [ ...@@ -34,7 +34,7 @@ select = [
# Pyflakes # Pyflakes
"F", "F",
# pyupgrade # pyupgrade
# "UP", "UP",
# flake8-bugbear # flake8-bugbear
"B", "B",
# flake8-simplify # flake8-simplify
...@@ -55,7 +55,7 @@ ignore = [ ...@@ -55,7 +55,7 @@ ignore = [
] ]
[tool.mypy] [tool.mypy]
python_version = "3.8" python_version = "3.9"
ignore_missing_imports = true ignore_missing_imports = true
check_untyped_defs = true check_untyped_defs = true
......
import importlib.util import importlib.util
import io
import logging import logging
import os import os
import re import re
...@@ -327,7 +326,7 @@ def get_neuronxcc_version(): ...@@ -327,7 +326,7 @@ def get_neuronxcc_version():
"__init__.py") "__init__.py")
# Check if the command was executed successfully # Check if the command was executed successfully
with open(version_file, "rt") as fp: with open(version_file) as fp:
content = fp.read() content = fp.read()
# Extract the version using a regular expression # Extract the version using a regular expression
...@@ -404,7 +403,8 @@ def read_readme() -> str: ...@@ -404,7 +403,8 @@ def read_readme() -> str:
"""Read the README file if present.""" """Read the README file if present."""
p = get_path("README.md") p = get_path("README.md")
if os.path.isfile(p): if os.path.isfile(p):
return io.open(get_path("README.md"), "r", encoding="utf-8").read() with open(get_path("README.md"), encoding="utf-8") as f:
return f.read()
else: else:
return "" return ""
...@@ -498,7 +498,6 @@ setup( ...@@ -498,7 +498,6 @@ setup(
"Documentation": "https://vllm.readthedocs.io/en/latest/", "Documentation": "https://vllm.readthedocs.io/en/latest/",
}, },
classifiers=[ classifiers=[
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.11",
...@@ -512,7 +511,7 @@ setup( ...@@ -512,7 +511,7 @@ setup(
], ],
packages=find_packages(exclude=("benchmarks", "csrc", "docs", "examples", packages=find_packages(exclude=("benchmarks", "csrc", "docs", "examples",
"tests*")), "tests*")),
python_requires=">=3.8", python_requires=">=3.9",
install_requires=get_requirements(), install_requires=get_requirements(),
ext_modules=ext_modules, ext_modules=ext_modules,
extras_require={ extras_require={
......
...@@ -429,8 +429,8 @@ def benchmark(): ...@@ -429,8 +429,8 @@ def benchmark():
# print in tabular format # print in tabular format
print("batch size\teager mode\tfull cudagraph\tpiecewise cudagraph") print("batch size\teager mode\tfull cudagraph\tpiecewise cudagraph")
for b in cudagraph_sizes: for b in cudagraph_sizes:
print((f"{b}\t{eager_time[b]:.3f}\t{full_cudagraph_time[b]:.3f}" print(f"{b}\t{eager_time[b]:.3f}\t{full_cudagraph_time[b]:.3f}"
f"\t{piecewise_cudagraph_time[b]:.3f}")) f"\t{piecewise_cudagraph_time[b]:.3f}")
if __name__ == "__main__": if __name__ == "__main__":
......
import json import json
import os import os
import sys
import tempfile import tempfile
from collections import UserList from collections import UserList
from enum import Enum from enum import Enum
...@@ -52,7 +51,7 @@ PromptVideoInput = _PromptMultiModalInput[np.ndarray] ...@@ -52,7 +51,7 @@ PromptVideoInput = _PromptMultiModalInput[np.ndarray]
def _read_prompts(filename: str) -> List[str]: def _read_prompts(filename: str) -> List[str]:
with open(filename, "r") as f: with open(filename) as f:
prompts = f.readlines() prompts = f.readlines()
return prompts return prompts
...@@ -62,13 +61,7 @@ class _ImageAssetPrompts(TypedDict): ...@@ -62,13 +61,7 @@ class _ImageAssetPrompts(TypedDict):
cherry_blossom: str cherry_blossom: str
if sys.version_info < (3, 9): class _ImageAssetsBase(UserList[ImageAsset]):
# UserList cannot be subscripted
class _ImageAssetsBase(UserList):
pass
else:
class _ImageAssetsBase(UserList[ImageAsset]):
pass pass
...@@ -94,13 +87,7 @@ class _VideoAssetPrompts(TypedDict): ...@@ -94,13 +87,7 @@ class _VideoAssetPrompts(TypedDict):
sample_demo_1: str sample_demo_1: str
if sys.version_info < (3, 9): class _VideoAssetsBase(UserList[VideoAsset]):
# UserList cannot be subscripted
class _VideoAssetsBase(UserList):
pass
else:
class _VideoAssetsBase(UserList[VideoAsset]):
pass pass
...@@ -958,7 +945,7 @@ def dummy_opt_path(): ...@@ -958,7 +945,7 @@ def dummy_opt_path():
"*.msgpack" "*.msgpack"
]) ])
assert os.path.exists(json_path) assert os.path.exists(json_path)
with open(json_path, "r") as f: with open(json_path) as f:
config = json.load(f) config = json.load(f)
config["architectures"] = ["MyOPTForCausalLM"] config["architectures"] = ["MyOPTForCausalLM"]
with open(json_path, "w") as f: with open(json_path, "w") as f:
...@@ -977,7 +964,7 @@ def dummy_llava_path(): ...@@ -977,7 +964,7 @@ def dummy_llava_path():
"*.msgpack" "*.msgpack"
]) ])
assert os.path.exists(json_path) assert os.path.exists(json_path)
with open(json_path, "r") as f: with open(json_path) as f:
config = json.load(f) config = json.load(f)
config["architectures"] = ["MyLlava"] config["architectures"] = ["MyLlava"]
with open(json_path, "w") as f: with open(json_path, "w") as f:
...@@ -996,7 +983,7 @@ def dummy_gemma2_embedding_path(): ...@@ -996,7 +983,7 @@ def dummy_gemma2_embedding_path():
"*.msgpack" "*.msgpack"
]) ])
assert os.path.exists(json_path) assert os.path.exists(json_path)
with open(json_path, "r") as f: with open(json_path) as f:
config = json.load(f) config = json.load(f)
config["architectures"] = ["MyGemma2Embedding"] config["architectures"] = ["MyGemma2Embedding"]
with open(json_path, "w") as f: with open(json_path, "w") as f:
......
...@@ -99,13 +99,11 @@ class TestPrefixCachingBlock: ...@@ -99,13 +99,11 @@ class TestPrefixCachingBlock:
token_ids = [random.randint(0, 50_000) for _ in range(num_tokens)] token_ids = [random.randint(0, 50_000) for _ in range(num_tokens)]
first_chain, second_chain = [ first_chain, second_chain = (TestPrefixCachingBlock.create_chain(
TestPrefixCachingBlock.create_chain(
block_size=block_size, block_size=block_size,
token_ids=token_ids, token_ids=token_ids,
num_empty_trailing_blocks=num_empty_trailing_blocks) num_empty_trailing_blocks=num_empty_trailing_blocks)
for _ in range(2) for _ in range(2))
]
for first_chain_block, second_chain_block in zip( for first_chain_block, second_chain_block in zip(
first_chain, second_chain): first_chain, second_chain):
......
...@@ -510,7 +510,7 @@ def test_selective_scan_varlen(with_padding, is_variable_B, is_variable_C, ...@@ -510,7 +510,7 @@ def test_selective_scan_varlen(with_padding, is_variable_B, is_variable_C,
for var in (u_ref, delta_ref, B_ref, C_ref, z_ref) for var in (u_ref, delta_ref, B_ref, C_ref, z_ref)
] ]
for i in range(len(seqlens[0])): for i in range(len(seqlens[0])):
u_s, delta_s, B_s, C_s, z_s = [v[i].unsqueeze(0) for v in splits] u_s, delta_s, B_s, C_s, z_s = (v[i].unsqueeze(0) for v in splits)
if padded_state_indices[i] == PAD_SLOT_ID: if padded_state_indices[i] == PAD_SLOT_ID:
continue continue
out_ref_s, _ = selective_scan_ref( out_ref_s, _ = selective_scan_ref(
......
...@@ -104,7 +104,7 @@ def test_input_mapper_valid_mm_data(input_mapper_for_qwen, ...@@ -104,7 +104,7 @@ def test_input_mapper_valid_mm_data(input_mapper_for_qwen,
# Sad path tests for the multimodal input processor and mapper, respectively # Sad path tests for the multimodal input processor and mapper, respectively
@pytest.mark.parametrize("mm_data", [ @pytest.mark.parametrize("mm_data", [
{ {
"image": torch.rand((5)) "image": torch.rand(5)
}, },
{ {
"image": torch.rand((5, 5, 5, 5, 5)) "image": torch.rand((5, 5, 5, 5, 5))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment