"vllm/vscode:/vscode.git/clone" did not exist on "9b17c57460bb5f6595f27b43e43caba144a8ec3c"
Unverified Commit 953d9c82 authored by Cyrus Leung's avatar Cyrus Leung Committed by GitHub
Browse files

[mypy] Pass type checking for `vllm/utils` and `vllm/v1/pool` (#29666)


Signed-off-by: default avatarDarkLight1337 <tlleungac@connect.ust.hk>
parent 33b06a6f
...@@ -36,8 +36,10 @@ FILES = [ ...@@ -36,8 +36,10 @@ FILES = [
"vllm/transformers_utils", "vllm/transformers_utils",
"vllm/triton_utils", "vllm/triton_utils",
"vllm/usage", "vllm/usage",
"vllm/utils",
"vllm/v1/core", "vllm/v1/core",
"vllm/v1/engine", "vllm/v1/engine",
"vllm/v1/pool",
"vllm/v1/worker", "vllm/v1/worker",
] ]
...@@ -59,7 +61,6 @@ SEPARATE_GROUPS = [ ...@@ -59,7 +61,6 @@ SEPARATE_GROUPS = [
"vllm/v1/executor", "vllm/v1/executor",
"vllm/v1/kv_offload", "vllm/v1/kv_offload",
"vllm/v1/metrics", "vllm/v1/metrics",
"vllm/v1/pool",
"vllm/v1/sample", "vllm/v1/sample",
"vllm/v1/spec_decode", "vllm/v1/spec_decode",
"vllm/v1/structured_output", "vllm/v1/structured_output",
......
...@@ -12,7 +12,7 @@ from asyncio import FIRST_COMPLETED, AbstractEventLoop, Future, Task ...@@ -12,7 +12,7 @@ from asyncio import FIRST_COMPLETED, AbstractEventLoop, Future, Task
from collections.abc import AsyncGenerator, Awaitable, Callable from collections.abc import AsyncGenerator, Awaitable, Callable
from concurrent.futures import Executor, ThreadPoolExecutor from concurrent.futures import Executor, ThreadPoolExecutor
from functools import partial from functools import partial
from typing import TypeVar from typing import TYPE_CHECKING, TypeVar
from transformers.tokenization_utils_base import BatchEncoding from transformers.tokenization_utils_base import BatchEncoding
from typing_extensions import ParamSpec from typing_extensions import ParamSpec
...@@ -257,6 +257,13 @@ def in_loop(event_loop: AbstractEventLoop) -> bool: ...@@ -257,6 +257,13 @@ def in_loop(event_loop: AbstractEventLoop) -> bool:
return False return False
# A hack to pass mypy
if TYPE_CHECKING:
def anext(it: AsyncGenerator[T, None]):
return it.__anext__()
async def merge_async_iterators( async def merge_async_iterators(
*iterators: AsyncGenerator[T, None], *iterators: AsyncGenerator[T, None],
) -> AsyncGenerator[tuple[int, T], None]: ) -> AsyncGenerator[tuple[int, T], None]:
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
from collections.abc import Callable, Iterable from collections.abc import Callable, Iterable
from functools import reduce from functools import reduce
from typing import TYPE_CHECKING, TypeAlias, TypeVar, cast, overload from typing import TYPE_CHECKING, Any, TypeAlias, TypeVar, overload
if TYPE_CHECKING: if TYPE_CHECKING:
import torch import torch
...@@ -82,16 +82,13 @@ def json_map_leaves( ...@@ -82,16 +82,13 @@ def json_map_leaves(
def json_map_leaves( def json_map_leaves(
func: Callable[[_T], _U], func: Callable[[_T], _U],
value: "BatchedTensorInputs" | _JSONTree[_T], value: Any,
) -> "BatchedTensorInputs" | _JSONTree[_U]: ) -> "BatchedTensorInputs" | _JSONTree[_U]:
"""Apply a function to each leaf in a nested JSON structure.""" """Apply a function to each leaf in a nested JSON structure."""
if isinstance(value, dict): if isinstance(value, dict):
return { return {k: json_map_leaves(func, v) for k, v in value.items()} # type: ignore
k: json_map_leaves(func, v) # type: ignore[arg-type]
for k, v in value.items()
}
elif isinstance(value, list): elif isinstance(value, list):
return [json_map_leaves(func, v) for v in value] return [json_map_leaves(func, v) for v in value] # type: ignore
elif isinstance(value, tuple): elif isinstance(value, tuple):
return tuple(json_map_leaves(func, v) for v in value) return tuple(json_map_leaves(func, v) for v in value)
else: else:
...@@ -140,9 +137,9 @@ def json_reduce_leaves( ...@@ -140,9 +137,9 @@ def json_reduce_leaves(
def json_reduce_leaves( def json_reduce_leaves(
func: Callable[..., _T | _U], func: Callable[[_T, _T], _T] | Callable[[_U, _T], _U],
value: _JSONTree[_T], value: _JSONTree[_T],
initial: _U = cast(_U, ...), # noqa: B008 initial: _U = ..., # type: ignore[assignment]
/, /,
) -> _T | _U: ) -> _T | _U:
""" """
...@@ -151,13 +148,9 @@ def json_reduce_leaves( ...@@ -151,13 +148,9 @@ def json_reduce_leaves(
sequence to a single value. sequence to a single value.
""" """
if initial is ...: if initial is ...:
return reduce(func, json_iter_leaves(value)) # type: ignore[arg-type] return reduce(func, json_iter_leaves(value)) # type: ignore
return reduce( return reduce(func, json_iter_leaves(value), initial) # type: ignore
func, # type: ignore[arg-type]
json_iter_leaves(value),
initial,
)
def json_count_leaves(value: JSONTree[_T]) -> int: def json_count_leaves(value: JSONTree[_T]) -> int:
......
...@@ -68,11 +68,11 @@ class MemorySnapshot: ...@@ -68,11 +68,11 @@ class MemorySnapshot:
timestamp: float = 0.0 timestamp: float = 0.0
auto_measure: bool = True auto_measure: bool = True
def __post_init__(self): def __post_init__(self) -> None:
if self.auto_measure: if self.auto_measure:
self.measure() self.measure()
def measure(self): def measure(self) -> None:
from vllm.platforms import current_platform from vllm.platforms import current_platform
# we measure the torch peak memory usage via allocated_bytes, # we measure the torch peak memory usage via allocated_bytes,
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
from __future__ import annotations from __future__ import annotations
import importlib import importlib.util
import os import os
import torch import torch
...@@ -47,8 +47,8 @@ def find_nccl_include_paths() -> list[str] | None: ...@@ -47,8 +47,8 @@ def find_nccl_include_paths() -> list[str] | None:
try: try:
spec = importlib.util.find_spec("nvidia.nccl") spec = importlib.util.find_spec("nvidia.nccl")
if spec and getattr(spec, "submodule_search_locations", None): if spec and (locs := getattr(spec, "submodule_search_locations", None)):
for loc in spec.submodule_search_locations: for loc in locs:
inc_dir = os.path.join(loc, "include") inc_dir = os.path.join(loc, "include")
if os.path.exists(os.path.join(inc_dir, "nccl.h")): if os.path.exists(os.path.join(inc_dir, "nccl.h")):
paths.append(inc_dir) paths.append(inc_dir)
......
...@@ -72,7 +72,7 @@ def get_ip() -> str: ...@@ -72,7 +72,7 @@ def get_ip() -> str:
return "0.0.0.0" return "0.0.0.0"
def test_loopback_bind(address, family): def test_loopback_bind(address: str, family: int) -> bool:
try: try:
s = socket.socket(family, socket.SOCK_DGRAM) s = socket.socket(family, socket.SOCK_DGRAM)
s.bind((address, 0)) # Port 0 = auto assign s.bind((address, 0)) # Port 0 = auto assign
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from typing import Any from typing import Any, TypeVar
_T = TypeVar("_T", bound=type)
class ExtensionManager: class ExtensionManager:
...@@ -34,7 +36,7 @@ class ExtensionManager: ...@@ -34,7 +36,7 @@ class ExtensionManager:
Decorator to register a class with the given name. Decorator to register a class with the given name.
""" """
def wrap(cls_to_register): def wrap(cls_to_register: _T) -> _T:
self.name2class[name] = cls_to_register self.name2class[name] = cls_to_register
return cls_to_register return cls_to_register
......
...@@ -13,7 +13,7 @@ import numpy.typing as npt ...@@ -13,7 +13,7 @@ import numpy.typing as npt
import torch import torch
from packaging import version from packaging import version
from packaging.version import Version from packaging.version import Version
from torch.library import Library from torch.library import Library, infer_schema
import vllm.envs as envs import vllm.envs as envs
...@@ -78,7 +78,6 @@ def guard_cuda_initialization(): ...@@ -78,7 +78,6 @@ def guard_cuda_initialization():
yield yield
return return
had_key = "CUDA_VISIBLE_DEVICES" in os.environ
old_value = os.environ.get("CUDA_VISIBLE_DEVICES") old_value = os.environ.get("CUDA_VISIBLE_DEVICES")
os.environ["CUDA_VISIBLE_DEVICES"] = "" os.environ["CUDA_VISIBLE_DEVICES"] = ""
try: try:
...@@ -90,10 +89,10 @@ def guard_cuda_initialization(): ...@@ -90,10 +89,10 @@ def guard_cuda_initialization():
err_msg = str(e) err_msg = str(e)
raise RuntimeError(err_msg) from e raise RuntimeError(err_msg) from e
finally: finally:
if had_key: if old_value is None:
os.environ["CUDA_VISIBLE_DEVICES"] = old_value del os.environ["CUDA_VISIBLE_DEVICES"]
else: else:
os.environ.pop("CUDA_VISIBLE_DEVICES") os.environ["CUDA_VISIBLE_DEVICES"] = old_value
def get_dtype_size(dtype: torch.dtype) -> int: def get_dtype_size(dtype: torch.dtype) -> int:
...@@ -525,8 +524,7 @@ def get_cuda_view_from_cpu_tensor(cpu_tensor: torch.Tensor) -> torch.Tensor: ...@@ -525,8 +524,7 @@ def get_cuda_view_from_cpu_tensor(cpu_tensor: torch.Tensor) -> torch.Tensor:
# Helper function used in testing. # Helper function used in testing.
def _is_torch_equal_or_newer(torch_version: str, target: str) -> bool: def _is_torch_equal_or_newer(torch_version: str, target: str) -> bool:
torch_version = version.parse(torch_version) return version.parse(torch_version) >= version.parse(target)
return torch_version >= version.parse(target)
def is_torch_equal_or_newer(target: str) -> bool: def is_torch_equal_or_newer(target: str) -> bool:
...@@ -640,15 +638,8 @@ def direct_register_custom_op( ...@@ -640,15 +638,8 @@ def direct_register_custom_op(
dispatch_key = current_platform.dispatch_key dispatch_key = current_platform.dispatch_key
import torch.library schema_str = infer_schema(op_func, mutates_args=mutates_args)
if hasattr(torch.library, "infer_schema"):
schema_str = torch.library.infer_schema(op_func, mutates_args=mutates_args)
else:
# for pytorch 2.4
import torch._custom_op.impl
schema_str = torch._custom_op.impl.infer_schema(op_func, mutates_args)
my_lib = target_lib or vllm_lib my_lib = target_lib or vllm_lib
my_lib.define(op_name + schema_str, tags=tags) my_lib.define(op_name + schema_str, tags=tags)
my_lib.impl(op_name, op_func, dispatch_key=dispatch_key) my_lib.impl(op_name, op_func, dispatch_key=dispatch_key)
......
...@@ -67,16 +67,16 @@ def build_pooling_cursor( ...@@ -67,16 +67,16 @@ def build_pooling_cursor(
n_seq = len(num_scheduled_tokens) n_seq = len(num_scheduled_tokens)
index = list(range(n_seq)) index = list(range(n_seq))
num_scheduled_tokens = torch.tensor(num_scheduled_tokens, device="cpu") num_scheduled_tokens_cpu = torch.tensor(num_scheduled_tokens, device="cpu")
cumsum = torch.zeros( cumsum = torch.zeros(
n_seq + 1, dtype=torch.int64, pin_memory=pin_memory, device="cpu" n_seq + 1, dtype=torch.int64, pin_memory=pin_memory, device="cpu"
) )
torch.cumsum(num_scheduled_tokens, dim=0, out=cumsum[1:]) torch.cumsum(num_scheduled_tokens_cpu, dim=0, out=cumsum[1:])
cumsum = cumsum.to(device, non_blocking=True) cumsum = cumsum.to(device, non_blocking=True)
return PoolingCursor( return PoolingCursor(
index=index, index=index,
first_token_indices_gpu=cumsum[:n_seq], first_token_indices_gpu=cumsum[:n_seq],
last_token_indices_gpu=cumsum[1:] - 1, last_token_indices_gpu=cumsum[1:] - 1,
prompt_lens_cpu=prompt_lens, prompt_lens_cpu=prompt_lens,
num_scheduled_tokens_cpu=num_scheduled_tokens, num_scheduled_tokens_cpu=num_scheduled_tokens_cpu,
) )
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment