Unverified Commit 21063c11 authored by Aaron Pham's avatar Aaron Pham Committed by GitHub
Browse files

[CI/Build] drop support for Python 3.8 EOL (#8464)


Signed-off-by: default avatarAaron Pham <contact@aarnphm.xyz>
parent 4be3a451
......@@ -56,7 +56,7 @@ serving_column_mapping = {
def read_markdown(file):
if os.path.exists(file):
with open(file, "r") as f:
with open(file) as f:
return f.read() + "\n"
else:
return f"{file} not found.\n"
......@@ -75,14 +75,14 @@ if __name__ == "__main__":
# collect results
for test_file in results_folder.glob("*.json"):
with open(test_file, "r") as f:
with open(test_file) as f:
raw_result = json.loads(f.read())
if "serving" in str(test_file):
# this result is generated via `benchmark_serving.py`
# attach the benchmarking command to raw_result
with open(test_file.with_suffix(".commands"), "r") as f:
with open(test_file.with_suffix(".commands")) as f:
command = json.loads(f.read())
raw_result.update(command)
......@@ -97,7 +97,7 @@ if __name__ == "__main__":
# this result is generated via `benchmark_latency.py`
# attach the benchmarking command to raw_result
with open(test_file.with_suffix(".commands"), "r") as f:
with open(test_file.with_suffix(".commands")) as f:
command = json.loads(f.read())
raw_result.update(command)
......@@ -119,7 +119,7 @@ if __name__ == "__main__":
# this result is generated via `benchmark_throughput.py`
# attach the benchmarking command to raw_result
with open(test_file.with_suffix(".commands"), "r") as f:
with open(test_file.with_suffix(".commands")) as f:
command = json.loads(f.read())
raw_result.update(command)
......
......@@ -72,7 +72,7 @@ def main(args):
# collect results
for test_file in results_folder.glob("*_nightly_results.json"):
with open(test_file, "r") as f:
with open(test_file) as f:
results = results + json.loads(f.read())
# generate markdown table
......@@ -80,7 +80,7 @@ def main(args):
md_table = tabulate(df, headers='keys', tablefmt='pipe', showindex=False)
with open(args.description, "r") as f:
with open(args.description) as f:
description = f.read()
description = description.format(
......
......@@ -36,11 +36,11 @@ if __name__ == "__main__":
# collect results
for test_file in results_folder.glob("*.json"):
with open(test_file, "r") as f:
with open(test_file) as f:
raw_result = json.loads(f.read())
# attach the benchmarking command to raw_result
with open(test_file.with_suffix(".commands"), "r") as f:
with open(test_file.with_suffix(".commands")) as f:
command = json.loads(f.read())
raw_result.update(command)
......
......@@ -25,7 +25,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
python-version: ["3.9", "3.10", "3.11", "3.12"]
steps:
- uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1
- name: Set up Python ${{ matrix.python-version }}
......
......@@ -48,7 +48,7 @@ jobs:
fail-fast: false
matrix:
os: ['ubuntu-20.04']
python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
python-version: ['3.9', '3.10', '3.11', '3.12']
pytorch-version: ['2.4.0'] # Must be the most recent version that meets requirements-cuda.txt.
cuda-version: ['11.8', '12.1']
......
......@@ -6,7 +6,7 @@ version: 2
build:
os: ubuntu-22.04
tools:
python: "3.8"
python: '3.9'
sphinx:
configuration: docs/source/conf.py
......@@ -19,4 +19,3 @@ formats: []
python:
install:
- requirements: docs/requirements-docs.txt
......@@ -79,7 +79,7 @@ async def async_request_tgi(
# any data, we should skip it.
if chunk_bytes.startswith(":"):
continue
chunk = remove_prefix(chunk_bytes, "data:")
chunk = chunk_bytes.removeprefix("data:")
data = json.loads(chunk)
timestamp = time.perf_counter()
......@@ -144,7 +144,7 @@ async def async_request_trt_llm(
if not chunk_bytes:
continue
chunk = remove_prefix(chunk_bytes.decode("utf-8"),
chunk = chunk_bytes.decode("utf-8").removeprefix(
"data:")
data = json.loads(chunk)
......@@ -261,7 +261,7 @@ async def async_request_openai_completions(
if not chunk_bytes:
continue
chunk = remove_prefix(chunk_bytes.decode("utf-8"),
chunk = chunk_bytes.decode("utf-8").removeprefix(
"data: ")
if chunk == "[DONE]":
latency = time.perf_counter() - st
......@@ -349,7 +349,7 @@ async def async_request_openai_chat_completions(
if not chunk_bytes:
continue
chunk = remove_prefix(chunk_bytes.decode("utf-8"),
chunk = chunk_bytes.decode("utf-8").removeprefix(
"data: ")
if chunk == "[DONE]":
latency = time.perf_counter() - st
......@@ -389,14 +389,6 @@ async def async_request_openai_chat_completions(
return output
# Since vllm must support Python 3.8, we can't use str.removeprefix(prefix)
# introduced in Python 3.9
def remove_prefix(text: str, prefix: str) -> str:
if text.startswith(prefix):
return text[len(prefix):]
return text
def get_model(pretrained_model_name_or_path: str) -> str:
if os.getenv('VLLM_USE_MODELSCOPE', 'False').lower() == 'true':
from modelscope import snapshot_download
......
......@@ -269,10 +269,10 @@ def run_square_bench(args):
def run_range_bench(args):
m_start, k_start, n_start = [int(x) for x in args.dim_start.split(",")]
m_end, k_end, n_end = [int(x) for x in args.dim_end.split(",")]
m_start, k_start, n_start = (int(x) for x in args.dim_start.split(","))
m_end, k_end, n_end = (int(x) for x in args.dim_end.split(","))
m_increment, k_increment, n_increment = \
[int(x) for x in args.dim_increment.split(",")]
(int(x) for x in args.dim_increment.split(","))
Ms = list(range(m_start, m_end + 1, m_increment))
Ks = list(range(k_start, k_end + 1, k_increment))
Ns = list(range(n_start, n_end + 1, n_increment))
......
......@@ -468,7 +468,7 @@ def generate():
impl_configs = []
GPTQ_kernel_type_configs = list(
(TypeConfig(
TypeConfig(
element_a=element_a,
element_b=element_b,
element_b_scale=element_a,
......@@ -476,7 +476,7 @@ def generate():
element_d=element_a,
accumulator=DataType.f32,
) for element_b in (VLLMDataType.u4b8, VLLMDataType.u8b128)
for element_a in (DataType.f16, DataType.bf16)))
for element_a in (DataType.f16, DataType.bf16))
GPTQ_kernel_specializations = [
Specialization(with_C=False, with_zeropoints=False, with_scales=True)
......@@ -490,7 +490,7 @@ def generate():
]
AWQ_kernel_type_configs = list(
(TypeConfig(
TypeConfig(
element_a=element_a,
element_b=element_b,
element_b_scale=element_a,
......@@ -498,7 +498,7 @@ def generate():
element_d=element_a,
accumulator=DataType.f32,
) for element_b in (DataType.u4, DataType.u8)
for element_a in (DataType.f16, DataType.bf16)))
for element_a in (DataType.f16, DataType.bf16))
AWQ_kernel_specializations = [
Specialization(with_C=False, with_zeropoints=True, with_scales=True)
......
......@@ -10,7 +10,7 @@ Requirements
============
* OS: Linux
* Python: 3.8 - 3.12
* Python: 3.9 -- 3.12
* GPU: compute capability 7.0 or higher (e.g., V100, T4, RTX20xx, A100, L4, H100, etc.)
Install released versions
......
......@@ -34,7 +34,7 @@ select = [
# Pyflakes
"F",
# pyupgrade
# "UP",
"UP",
# flake8-bugbear
"B",
# flake8-simplify
......@@ -55,7 +55,7 @@ ignore = [
]
[tool.mypy]
python_version = "3.8"
python_version = "3.9"
ignore_missing_imports = true
check_untyped_defs = true
......
import importlib.util
import io
import logging
import os
import re
......@@ -327,7 +326,7 @@ def get_neuronxcc_version():
"__init__.py")
# Check if the command was executed successfully
with open(version_file, "rt") as fp:
with open(version_file) as fp:
content = fp.read()
# Extract the version using a regular expression
......@@ -404,7 +403,8 @@ def read_readme() -> str:
"""Read the README file if present."""
p = get_path("README.md")
if os.path.isfile(p):
return io.open(get_path("README.md"), "r", encoding="utf-8").read()
with open(get_path("README.md"), encoding="utf-8") as f:
return f.read()
else:
return ""
......@@ -498,7 +498,6 @@ setup(
"Documentation": "https://vllm.readthedocs.io/en/latest/",
},
classifiers=[
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
......@@ -512,7 +511,7 @@ setup(
],
packages=find_packages(exclude=("benchmarks", "csrc", "docs", "examples",
"tests*")),
python_requires=">=3.8",
python_requires=">=3.9",
install_requires=get_requirements(),
ext_modules=ext_modules,
extras_require={
......
......@@ -429,8 +429,8 @@ def benchmark():
# print in tabular format
print("batch size\teager mode\tfull cudagraph\tpiecewise cudagraph")
for b in cudagraph_sizes:
print((f"{b}\t{eager_time[b]:.3f}\t{full_cudagraph_time[b]:.3f}"
f"\t{piecewise_cudagraph_time[b]:.3f}"))
print(f"{b}\t{eager_time[b]:.3f}\t{full_cudagraph_time[b]:.3f}"
f"\t{piecewise_cudagraph_time[b]:.3f}")
if __name__ == "__main__":
......
import json
import os
import sys
import tempfile
from collections import UserList
from enum import Enum
......@@ -52,7 +51,7 @@ PromptVideoInput = _PromptMultiModalInput[np.ndarray]
def _read_prompts(filename: str) -> List[str]:
with open(filename, "r") as f:
with open(filename) as f:
prompts = f.readlines()
return prompts
......@@ -62,13 +61,7 @@ class _ImageAssetPrompts(TypedDict):
cherry_blossom: str
if sys.version_info < (3, 9):
# UserList cannot be subscripted
class _ImageAssetsBase(UserList):
pass
else:
class _ImageAssetsBase(UserList[ImageAsset]):
class _ImageAssetsBase(UserList[ImageAsset]):
pass
......@@ -94,13 +87,7 @@ class _VideoAssetPrompts(TypedDict):
sample_demo_1: str
if sys.version_info < (3, 9):
# UserList cannot be subscripted
class _VideoAssetsBase(UserList):
pass
else:
class _VideoAssetsBase(UserList[VideoAsset]):
class _VideoAssetsBase(UserList[VideoAsset]):
pass
......@@ -958,7 +945,7 @@ def dummy_opt_path():
"*.msgpack"
])
assert os.path.exists(json_path)
with open(json_path, "r") as f:
with open(json_path) as f:
config = json.load(f)
config["architectures"] = ["MyOPTForCausalLM"]
with open(json_path, "w") as f:
......@@ -977,7 +964,7 @@ def dummy_llava_path():
"*.msgpack"
])
assert os.path.exists(json_path)
with open(json_path, "r") as f:
with open(json_path) as f:
config = json.load(f)
config["architectures"] = ["MyLlava"]
with open(json_path, "w") as f:
......@@ -996,7 +983,7 @@ def dummy_gemma2_embedding_path():
"*.msgpack"
])
assert os.path.exists(json_path)
with open(json_path, "r") as f:
with open(json_path) as f:
config = json.load(f)
config["architectures"] = ["MyGemma2Embedding"]
with open(json_path, "w") as f:
......
......@@ -99,13 +99,11 @@ class TestPrefixCachingBlock:
token_ids = [random.randint(0, 50_000) for _ in range(num_tokens)]
first_chain, second_chain = [
TestPrefixCachingBlock.create_chain(
first_chain, second_chain = (TestPrefixCachingBlock.create_chain(
block_size=block_size,
token_ids=token_ids,
num_empty_trailing_blocks=num_empty_trailing_blocks)
for _ in range(2)
]
for _ in range(2))
for first_chain_block, second_chain_block in zip(
first_chain, second_chain):
......
......@@ -510,7 +510,7 @@ def test_selective_scan_varlen(with_padding, is_variable_B, is_variable_C,
for var in (u_ref, delta_ref, B_ref, C_ref, z_ref)
]
for i in range(len(seqlens[0])):
u_s, delta_s, B_s, C_s, z_s = [v[i].unsqueeze(0) for v in splits]
u_s, delta_s, B_s, C_s, z_s = (v[i].unsqueeze(0) for v in splits)
if padded_state_indices[i] == PAD_SLOT_ID:
continue
out_ref_s, _ = selective_scan_ref(
......
......@@ -104,7 +104,7 @@ def test_input_mapper_valid_mm_data(input_mapper_for_qwen,
# Sad path tests for the multimodal input processor and mapper, respectively
@pytest.mark.parametrize("mm_data", [
{
"image": torch.rand((5))
"image": torch.rand(5)
},
{
"image": torch.rand((5, 5, 5, 5, 5))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment