Unverified Commit cf069aa8 authored by Harry Mellor's avatar Harry Mellor Committed by GitHub
Browse files

Update deprecated Python 3.8 typing (#13971)

parent bf33700e
......@@ -4,7 +4,6 @@ import math
import pickle
import re
from collections import defaultdict
from typing import List
import matplotlib.pyplot as plt
import pandas as pd
......@@ -23,7 +22,7 @@ if __name__ == "__main__":
with open(args.filename, 'rb') as f:
data = pickle.load(f)
raw_results: List[TMeasurement] = data["results"]
raw_results: list[TMeasurement] = data["results"]
results = defaultdict(lambda: list())
for v in raw_results:
......
# SPDX-License-Identifier: Apache-2.0
import dataclasses
from typing import Any, Callable, Iterable, Optional
from collections.abc import Iterable
from typing import Any, Callable, Optional
import torch
import torch.utils.benchmark as TBenchmark
......
# SPDX-License-Identifier: Apache-2.0
import enum
from typing import Dict, Union
from typing import Union
from cutlass_library import *
......@@ -21,7 +21,7 @@ class MixedInputKernelScheduleType(enum.Enum):
TmaWarpSpecializedCooperative = enum_auto()
VLLMDataTypeNames: Dict[Union[VLLMDataType, DataType], str] = {
VLLMDataTypeNames: dict[Union[VLLMDataType, DataType], str] = {
**DataTypeNames, # type: ignore
**{
VLLMDataType.u4b8: "u4b8",
......@@ -29,7 +29,7 @@ VLLMDataTypeNames: Dict[Union[VLLMDataType, DataType], str] = {
}
}
VLLMDataTypeTag: Dict[Union[VLLMDataType, DataType], str] = {
VLLMDataTypeTag: dict[Union[VLLMDataType, DataType], str] = {
**DataTypeTag, # type: ignore
**{
VLLMDataType.u4b8: "cutlass::vllm_uint4b8_t",
......@@ -37,7 +37,7 @@ VLLMDataTypeTag: Dict[Union[VLLMDataType, DataType], str] = {
}
}
VLLMDataTypeSize: Dict[Union[VLLMDataType, DataType], int] = {
VLLMDataTypeSize: dict[Union[VLLMDataType, DataType], int] = {
**DataTypeSize, # type: ignore
**{
VLLMDataType.u4b8: 4,
......@@ -45,7 +45,7 @@ VLLMDataTypeSize: Dict[Union[VLLMDataType, DataType], int] = {
}
}
VLLMDataTypeVLLMScalarTypeTag: Dict[Union[VLLMDataType, DataType], str] = {
VLLMDataTypeVLLMScalarTypeTag: dict[Union[VLLMDataType, DataType], str] = {
VLLMDataType.u4b8: "vllm::kU4B8",
VLLMDataType.u8b128: "vllm::kU8B128",
DataType.u4: "vllm::kU4",
......@@ -56,7 +56,7 @@ VLLMDataTypeVLLMScalarTypeTag: Dict[Union[VLLMDataType, DataType], str] = {
DataType.bf16: "vllm::kBfloat16",
}
VLLMDataTypeTorchDataTypeTag: Dict[Union[VLLMDataType, DataType], str] = {
VLLMDataTypeTorchDataTypeTag: dict[Union[VLLMDataType, DataType], str] = {
DataType.u8: "at::ScalarType::Byte",
DataType.s8: "at::ScalarType::Char",
DataType.e4m3: "at::ScalarType::Float8_e4m3fn",
......@@ -66,7 +66,7 @@ VLLMDataTypeTorchDataTypeTag: Dict[Union[VLLMDataType, DataType], str] = {
DataType.f32: "at::ScalarType::Float",
}
VLLMKernelScheduleTag: Dict[Union[
VLLMKernelScheduleTag: dict[Union[
MixedInputKernelScheduleType, KernelScheduleType], str] = {
**KernelScheduleTag, # type: ignore
**{
......
......@@ -8,7 +8,7 @@ from collections.abc import Iterable
from copy import deepcopy
from dataclasses import dataclass, fields
from functools import reduce
from typing import Dict, List, Optional, Tuple, Union
from typing import Optional, Union
import jinja2
# yapf conflicts with isort for this block
......@@ -247,8 +247,8 @@ TmaCoop = EpilogueScheduleType.TmaWarpSpecializedCooperative
@dataclass(frozen=True)
class ScheduleConfig:
tile_shape_mn: Tuple[int, int]
cluster_shape_mnk: Tuple[int, int, int]
tile_shape_mn: tuple[int, int]
cluster_shape_mnk: tuple[int, int, int]
kernel_schedule: MixedInputKernelScheduleType
epilogue_schedule: EpilogueScheduleType
tile_scheduler: TileSchedulerType
......@@ -277,8 +277,8 @@ class PrepackTypeConfig:
@dataclass
class ImplConfig:
types: TypeConfig
schedules: List[ScheduleConfig]
heuristic: List[Tuple[Optional[str], ScheduleConfig]]
schedules: list[ScheduleConfig]
heuristic: list[tuple[Optional[str], ScheduleConfig]]
def generate_sch_sig(schedule_config: ScheduleConfig) -> str:
......@@ -333,7 +333,7 @@ def is_power_of_two(n):
return (n != 0) and (n & (n - 1) == 0)
def to_cute_constant(value: List[int]):
def to_cute_constant(value: list[int]):
def _to_cute_constant(value: int):
if is_power_of_two(value):
......@@ -347,7 +347,7 @@ def to_cute_constant(value: List[int]):
return _to_cute_constant(value)
def unique_schedules(impl_configs: List[ImplConfig]):
def unique_schedules(impl_configs: list[ImplConfig]):
return list(
set(sch for impl_config in impl_configs
for sch in impl_config.schedules))
......@@ -391,7 +391,7 @@ mm_impl_template = create_template(IMPL_TEMPLATE)
prepack_dispatch_template = create_template(PREPACK_TEMPLATE)
def create_sources(impl_configs: List[ImplConfig], num_impl_files=8):
def create_sources(impl_configs: list[ImplConfig], num_impl_files=8):
sources = []
sources.append((
......@@ -435,7 +435,7 @@ def create_sources(impl_configs: List[ImplConfig], num_impl_files=8):
num_impls = reduce(lambda x, y: x + len(y.schedules), impl_configs, 0)
num_impls_per_file = math.ceil(num_impls / num_impl_files)
files_impls: List[List[ImplConfig]] = [[]]
files_impls: list[list[ImplConfig]] = [[]]
curr_num_impls_assigned = 0
curr_impl_in_file = 0
......@@ -515,7 +515,7 @@ def generate():
for cond, tile_config in default_tile_heuristic_config.items()
]
def get_unique_schedules(heuristic: Dict[str, ScheduleConfig]):
def get_unique_schedules(heuristic: dict[str, ScheduleConfig]):
# Do not use schedules = list(set(...)) because we need to make sure
# the output list is deterministic; otherwise the generated kernel file
# will be non-deterministic and causes ccache miss.
......
......@@ -17,7 +17,6 @@ import inspect
import logging
import os
import sys
from typing import List
import requests
from sphinx.ext import autodoc
......@@ -58,7 +57,7 @@ templates_path = ['_templates']
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns: List[str] = ["**/*.template.md", "**/*.inc.md"]
exclude_patterns: list[str] = ["**/*.template.md", "**/*.inc.md"]
# Exclude the prompt "$" when copying code
copybutton_prompt_text = r"\$ "
......
......@@ -123,7 +123,7 @@ class ExampleParser(ReasoningParser):
def extract_reasoning_content(
self, model_output: str, request: ChatCompletionRequest
) -> Tuple[Optional[str], Optional[str]]:
) -> tuple[Optional[str], Optional[str]]:
"""
Extract reasoning content from a complete model-generated string.
......@@ -138,7 +138,7 @@ class ExampleParser(ReasoningParser):
The request object that was used to generate the model_output.
Returns:
Tuple[Optional[str], Optional[str]]
tuple[Optional[str], Optional[str]]
A tuple containing the reasoning content and the content.
"""
```
......
......@@ -193,7 +193,7 @@ class Step(BaseModel):
class MathResponse(BaseModel):
steps: List[Step]
steps: list[Step]
final_answer: str
......
......@@ -74,7 +74,7 @@ class Example:
path (Path): The path to the main directory or file.
category (str): The category of the document.
main_file (Path): The main file in the directory.
other_files (list[Path]): List of other files in the directory.
other_files (list[Path]): list of other files in the directory.
title (str): The title of the document.
Methods:
......
......@@ -6,7 +6,7 @@ distributively on a multi-nodes cluster.
Learn more about Ray Data in https://docs.ray.io/en/latest/data/data.html
"""
from typing import Any, Dict, List
from typing import Any
import numpy as np
import ray
......@@ -36,13 +36,13 @@ class LLMPredictor:
self.llm = LLM(model="meta-llama/Llama-2-7b-chat-hf",
tensor_parallel_size=tensor_parallel_size)
def __call__(self, batch: Dict[str, np.ndarray]) -> Dict[str, list]:
def __call__(self, batch: dict[str, np.ndarray]) -> dict[str, list]:
# Generate texts from the prompts.
# The output is a list of RequestOutput objects that contain the prompt,
# generated text, and other information.
outputs = self.llm.generate(batch["text"], sampling_params)
prompt: List[str] = []
generated_text: List[str] = []
prompt: list[str] = []
generated_text: list[str] = []
for output in outputs:
prompt.append(output.prompt)
generated_text.append(' '.join([o.text for o in output.outputs]))
......@@ -72,7 +72,7 @@ def scheduling_strategy_fn():
pg, placement_group_capture_child_tasks=True))
resources_kwarg: Dict[str, Any] = {}
resources_kwarg: dict[str, Any] = {}
if tensor_parallel_size == 1:
# For tensor_parallel_size == 1, we simply set num_gpus=1.
resources_kwarg["num_gpus"] = 1
......
# SPDX-License-Identifier: Apache-2.0
import argparse
from typing import List, Tuple
from vllm import EngineArgs, LLMEngine, RequestOutput, SamplingParams
from vllm.utils import FlexibleArgumentParser
def create_test_prompts() -> List[Tuple[str, SamplingParams]]:
def create_test_prompts() -> list[tuple[str, SamplingParams]]:
"""Create a list of test prompts with their sampling parameters."""
return [
("A robot may not injure a human being",
......@@ -24,7 +23,7 @@ def create_test_prompts() -> List[Tuple[str, SamplingParams]]:
def process_requests(engine: LLMEngine,
test_prompts: List[Tuple[str, SamplingParams]]):
test_prompts: list[tuple[str, SamplingParams]]):
"""Continuously process a list of prompts and handle the outputs."""
request_id = 0
......@@ -34,7 +33,7 @@ def process_requests(engine: LLMEngine,
engine.add_request(str(request_id), prompt, sampling_params)
request_id += 1
request_outputs: List[RequestOutput] = engine.step()
request_outputs: list[RequestOutput] = engine.step()
for request_output in request_outputs:
if request_output.finished:
......
......@@ -7,7 +7,7 @@ Requires HuggingFace credentials for access.
"""
import gc
from typing import List, Optional, Tuple
from typing import Optional
import torch
from huggingface_hub import snapshot_download
......@@ -18,7 +18,7 @@ from vllm.lora.request import LoRARequest
def create_test_prompts(
lora_path: str
) -> List[Tuple[str, SamplingParams, Optional[LoRARequest]]]:
) -> list[tuple[str, SamplingParams, Optional[LoRARequest]]]:
return [
# this is an example of using quantization without LoRA
("My name is",
......@@ -49,7 +49,7 @@ def create_test_prompts(
def process_requests(engine: LLMEngine,
test_prompts: List[Tuple[str, SamplingParams,
test_prompts: list[tuple[str, SamplingParams,
Optional[LoRARequest]]]):
"""Continuously process a list of prompts and handle the outputs."""
request_id = 0
......@@ -63,7 +63,7 @@ def process_requests(engine: LLMEngine,
lora_request=lora_request)
request_id += 1
request_outputs: List[RequestOutput] = engine.step()
request_outputs: list[RequestOutput] = engine.step()
for request_output in request_outputs:
if request_output.finished:
print("----------------------------------------------------")
......
......@@ -2,12 +2,11 @@
import gc
import time
from typing import List
from vllm import LLM, SamplingParams
def time_generation(llm: LLM, prompts: List[str],
def time_generation(llm: LLM, prompts: list[str],
sampling_params: SamplingParams):
# Generate texts from the prompts. The output is a list of RequestOutput
# objects that contain the prompt, generated text, and other information.
......
......@@ -6,7 +6,7 @@ for offline inference.
Requires HuggingFace credentials for access to Llama2.
"""
from typing import List, Optional, Tuple
from typing import Optional
from huggingface_hub import snapshot_download
......@@ -16,7 +16,7 @@ from vllm.lora.request import LoRARequest
def create_test_prompts(
lora_path: str
) -> List[Tuple[str, SamplingParams, Optional[LoRARequest]]]:
) -> list[tuple[str, SamplingParams, Optional[LoRARequest]]]:
"""Create a list of test prompts with their sampling parameters.
2 requests for base model, 4 requests for the LoRA. We define 2
......@@ -56,7 +56,7 @@ def create_test_prompts(
def process_requests(engine: LLMEngine,
test_prompts: List[Tuple[str, SamplingParams,
test_prompts: list[tuple[str, SamplingParams,
Optional[LoRARequest]]]):
"""Continuously process a list of prompts and handle the outputs."""
request_id = 0
......@@ -70,7 +70,7 @@ def process_requests(engine: LLMEngine,
lora_request=lora_request)
request_id += 1
request_outputs: List[RequestOutput] = engine.step()
request_outputs: list[RequestOutput] = engine.step()
for request_output in request_outputs:
if request_output.finished:
......
......@@ -21,7 +21,7 @@ import argparse
import datetime
import os
import re
from typing import List, Union
from typing import Union
import albumentations
import numpy as np
......@@ -260,9 +260,9 @@ def _convert_np_uint8(float_image: torch.Tensor):
def load_example(
file_paths: List[str],
mean: List[float] = None,
std: List[float] = None,
file_paths: list[str],
mean: list[float] = None,
std: list[float] = None,
indices: Union[list[int], None] = None,
):
"""Build an input example by loading images in *file_paths*.
......
......@@ -5,8 +5,9 @@ import json
import os
import sys
from argparse import RawTextHelpFormatter
from collections.abc import Generator
from dataclasses import asdict, dataclass
from typing import Any, Dict, Generator, List, Optional, TypeAlias
from typing import Any, Optional, TypeAlias
import torch
import tqdm
......@@ -42,8 +43,8 @@ def get_dtype(dtype: str):
return dtype
OutputLen_NumReqs_Map: TypeAlias = Dict[int, int]
def compute_request_output_lengths(batch_size: int, step_requests: List[int]) \
OutputLen_NumReqs_Map: TypeAlias = dict[int, int]
def compute_request_output_lengths(batch_size: int, step_requests: list[int]) \
-> OutputLen_NumReqs_Map:
"""
Given the number of requests, batch_size, and the number of requests
......@@ -63,7 +64,7 @@ def compute_request_output_lengths(batch_size: int, step_requests: List[int]) \
Args:
batch_size (int): Number of requests submitted for profile. This is
args.batch_size.
step_requests (List[int]): step_requests[i] is the number of requests
step_requests (list[int]): step_requests[i] is the number of requests
that the ith engine step should process.
Returns:
......@@ -114,7 +115,7 @@ def compute_request_output_lengths(batch_size: int, step_requests: List[int]) \
return ol_nr
def determine_requests_per_step(context: ProfileContext) -> List[int]:
def determine_requests_per_step(context: ProfileContext) -> list[int]:
"""
Determine number of requests each engine step should process.
If context.num_steps is set, then all engine steps process the
......@@ -130,7 +131,7 @@ def determine_requests_per_step(context: ProfileContext) -> List[int]:
context: ProfileContext object.
Returns:
List[int]: Number of requests to process for all engine-steps.
list[int]: Number of requests to process for all engine-steps.
output[i], contains the number of requests that the ith step
should process.
"""
......@@ -170,7 +171,7 @@ def run_profile(context: ProfileContext, csv_output: Optional[str],
for key, value in asdict(context).items():
print(f" {key} = {value}")
requests_per_step: List[int] = determine_requests_per_step(context)
requests_per_step: list[int] = determine_requests_per_step(context)
ol_nr: OutputLen_NumReqs_Map = compute_request_output_lengths(
context.batch_size, requests_per_step)
......
......@@ -4,7 +4,6 @@ import argparse
import dataclasses
import os
import time
from typing import List
import numpy as np
import torch_xla.debug.profiler as xp
......@@ -35,7 +34,7 @@ def main(args: argparse.Namespace):
dummy_prompt_token_ids = np.random.randint(10000,
size=(args.batch_size,
args.input_len))
dummy_prompts: List[PromptType] = [{
dummy_prompts: list[PromptType] = [{
"prompt_token_ids": batch
} for batch in dummy_prompt_token_ids.tolist()]
......
......@@ -5,7 +5,7 @@ multi-image input on vision language models for text generation,
using the chat template defined by the model.
"""
from argparse import Namespace
from typing import List, NamedTuple, Optional
from typing import NamedTuple, Optional
from PIL.Image import Image
from transformers import AutoProcessor, AutoTokenizer
......@@ -24,8 +24,8 @@ IMAGE_URLS = [
class ModelRequestData(NamedTuple):
llm: LLM
prompt: str
stop_token_ids: Optional[List[int]]
image_data: List[Image]
stop_token_ids: Optional[list[int]]
image_data: list[Image]
chat_template: Optional[str]
......@@ -34,7 +34,7 @@ class ModelRequestData(NamedTuple):
# Unless specified, these settings have been tested to work on a single L4.
def load_aria(question, image_urls: List[str]) -> ModelRequestData:
def load_aria(question, image_urls: list[str]) -> ModelRequestData:
model_name = "rhymes-ai/Aria"
llm = LLM(model=model_name,
tokenizer_mode="slow",
......@@ -55,7 +55,7 @@ def load_aria(question, image_urls: List[str]) -> ModelRequestData:
)
def load_deepseek_vl2(question: str, image_urls: List[str]):
def load_deepseek_vl2(question: str, image_urls: list[str]):
model_name = "deepseek-ai/deepseek-vl2-tiny"
llm = LLM(model=model_name,
......@@ -77,7 +77,7 @@ def load_deepseek_vl2(question: str, image_urls: List[str]):
)
def load_h2ovl(question: str, image_urls: List[str]) -> ModelRequestData:
def load_h2ovl(question: str, image_urls: list[str]) -> ModelRequestData:
model_name = "h2oai/h2ovl-mississippi-800m"
llm = LLM(
......@@ -111,7 +111,7 @@ def load_h2ovl(question: str, image_urls: List[str]) -> ModelRequestData:
)
def load_idefics3(question, image_urls: List[str]) -> ModelRequestData:
def load_idefics3(question, image_urls: list[str]) -> ModelRequestData:
model_name = "HuggingFaceM4/Idefics3-8B-Llama3"
# The configuration below has been confirmed to launch on a single L40 GPU.
......@@ -142,7 +142,7 @@ def load_idefics3(question, image_urls: List[str]) -> ModelRequestData:
)
def load_internvl(question: str, image_urls: List[str]) -> ModelRequestData:
def load_internvl(question: str, image_urls: list[str]) -> ModelRequestData:
model_name = "OpenGVLab/InternVL2-2B"
llm = LLM(
......@@ -179,7 +179,7 @@ def load_internvl(question: str, image_urls: List[str]) -> ModelRequestData:
)
def load_mllama(question, image_urls: List[str]) -> ModelRequestData:
def load_mllama(question, image_urls: list[str]) -> ModelRequestData:
model_name = "meta-llama/Llama-3.2-11B-Vision-Instruct"
# The configuration below has been confirmed to launch on a single L40 GPU.
......@@ -201,7 +201,7 @@ def load_mllama(question, image_urls: List[str]) -> ModelRequestData:
)
def load_nvlm_d(question: str, image_urls: List[str]):
def load_nvlm_d(question: str, image_urls: list[str]):
model_name = "nvidia/NVLM-D-72B"
# Adjust this as necessary to fit in GPU
......@@ -234,7 +234,7 @@ def load_nvlm_d(question: str, image_urls: List[str]):
)
def load_pixtral_hf(question: str, image_urls: List[str]) -> ModelRequestData:
def load_pixtral_hf(question: str, image_urls: list[str]) -> ModelRequestData:
model_name = "mistral-community/pixtral-12b"
# Adjust this as necessary to fit in GPU
......@@ -259,7 +259,7 @@ def load_pixtral_hf(question: str, image_urls: List[str]) -> ModelRequestData:
)
def load_phi3v(question: str, image_urls: List[str]) -> ModelRequestData:
def load_phi3v(question: str, image_urls: list[str]) -> ModelRequestData:
# num_crops is an override kwarg to the multimodal image processor;
# For some models, e.g., Phi-3.5-vision-instruct, it is recommended
# to use 16 for single frame scenarios, and 4 for multi-frame.
......@@ -295,7 +295,7 @@ def load_phi3v(question: str, image_urls: List[str]) -> ModelRequestData:
def load_qwen_vl_chat(question: str,
image_urls: List[str]) -> ModelRequestData:
image_urls: list[str]) -> ModelRequestData:
model_name = "Qwen/Qwen-VL-Chat"
llm = LLM(
model=model_name,
......@@ -336,7 +336,7 @@ def load_qwen_vl_chat(question: str,
)
def load_qwen2_vl(question, image_urls: List[str]) -> ModelRequestData:
def load_qwen2_vl(question, image_urls: list[str]) -> ModelRequestData:
try:
from qwen_vl_utils import process_vision_info
except ModuleNotFoundError:
......@@ -393,7 +393,7 @@ def load_qwen2_vl(question, image_urls: List[str]) -> ModelRequestData:
)
def load_qwen2_5_vl(question, image_urls: List[str]) -> ModelRequestData:
def load_qwen2_5_vl(question, image_urls: list[str]) -> ModelRequestData:
try:
from qwen_vl_utils import process_vision_info
except ModuleNotFoundError:
......@@ -466,7 +466,7 @@ model_example_map = {
}
def run_generate(model, question: str, image_urls: List[str]):
def run_generate(model, question: str, image_urls: list[str]):
req_data = model_example_map[model](question, image_urls)
sampling_params = SamplingParams(temperature=0.0,
......@@ -487,7 +487,7 @@ def run_generate(model, question: str, image_urls: List[str]):
print(generated_text)
def run_chat(model: str, question: str, image_urls: List[str]):
def run_chat(model: str, question: str, image_urls: list[str]):
req_data = model_example_map[model](question, image_urls)
sampling_params = SamplingParams(temperature=0.0,
......
......@@ -7,7 +7,7 @@ For production use, we recommend `vllm serve` and the OpenAI client API.
import argparse
import json
from typing import Iterable, List
from collections.abc import Iterable
import requests
......@@ -39,7 +39,7 @@ def post_http_request(prompt: str,
return response
def get_streaming_response(response: requests.Response) -> Iterable[List[str]]:
def get_streaming_response(response: requests.Response) -> Iterable[list[str]]:
for chunk in response.iter_lines(chunk_size=8192,
decode_unicode=False,
delimiter=b"\0"):
......@@ -49,7 +49,7 @@ def get_streaming_response(response: requests.Response) -> Iterable[List[str]]:
yield output
def get_response(response: requests.Response) -> List[str]:
def get_response(response: requests.Response) -> list[str]:
data = json.loads(response.content)
output = data["text"]
return output
......
......@@ -24,4 +24,4 @@ responses = client.embeddings.create(
)
for data in responses.data:
print(data.embedding) # list of float of len 4096
print(data.embedding) # List of float of len 4096
......@@ -65,6 +65,32 @@ exclude = [
[tool.ruff.lint.per-file-ignores]
"vllm/version.py" = ["F401"]
"vllm/_version.py" = ["ALL"]
# Python 3.8 typing. TODO: Remove these excludes after v1.0.0
"vllm/adapter_commons/**/*.py" = ["UP006", "UP035"]
"vllm/attention/**/*.py" = ["UP006", "UP035"]
"vllm/compilation/**/*.py" = ["UP006", "UP035"]
"vllm/core/**/*.py" = ["UP006", "UP035"]
"vllm/device_allocator/**/*.py" = ["UP006", "UP035"]
"vllm/distributed/**/*.py" = ["UP006", "UP035"]
"vllm/engine/**/*.py" = ["UP006", "UP035"]
"vllm/executor/**/*.py" = ["UP006", "UP035"]
"vllm/inputs/**/*.py" = ["UP006", "UP035"]
"vllm/logging_utils/**/*.py" = ["UP006", "UP035"]
"vllm/lora/**/*.py" = ["UP006", "UP035"]
"vllm/model_executor/**/*.py" = ["UP006", "UP035"]
"vllm/multimodal/**/*.py" = ["UP006", "UP035"]
"vllm/platforms/**/*.py" = ["UP006", "UP035"]
"vllm/plugins/**/*.py" = ["UP006", "UP035"]
"vllm/profiler/**/*.py" = ["UP006", "UP035"]
"vllm/prompt_adapter/**/*.py" = ["UP006", "UP035"]
"vllm/spec_decode/**/*.py" = ["UP006", "UP035"]
"vllm/third_party/**/*.py" = ["UP006", "UP035"]
"vllm/transformers_utils/**/*.py" = ["UP006", "UP035"]
"vllm/triton_utils/**/*.py" = ["UP006", "UP035"]
"vllm/usage/**/*.py" = ["UP006", "UP035"]
"vllm/vllm_flash_attn/**/*.py" = ["UP006", "UP035"]
"vllm/assets/**/*.py" = ["UP006", "UP035"]
"vllm/worker/**/*.py" = ["UP006", "UP035"]
[tool.ruff.lint]
select = [
......@@ -91,8 +117,6 @@ ignore = [
"B007",
# f-string format
"UP032",
# Python 3.8 typing
"UP006", "UP035",
# Can remove once 3.10+ is the minimum Python version
"UP007",
]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment