Unverified Commit 7560d674 authored by Martin Hickey's avatar Martin Hickey Committed by GitHub
Browse files

[CI] Fix mypy for vllm/device allocator (#35518)


Signed-off-by: default avatarMartin Hickey <martin.hickey@ie.ibm.com>
Co-authored-by: default avatarHarry Mellor <19981378+hmellor@users.noreply.github.com>
parent d9c77308
...@@ -41,7 +41,6 @@ EXCLUDE = [ ...@@ -41,7 +41,6 @@ EXCLUDE = [
# TODO: Remove these entries after fixing mypy errors. # TODO: Remove these entries after fixing mypy errors.
"vllm/benchmarks", "vllm/benchmarks",
"vllm/config", "vllm/config",
"vllm/device_allocator",
"vllm/reasoning", "vllm/reasoning",
"vllm/tool_parser", "vllm/tool_parser",
] ]
......
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
import dataclasses import dataclasses
import gc import gc
import os import os
from collections.abc import Callable from collections.abc import Callable, Iterator
from contextlib import contextmanager from contextlib import contextmanager
from typing import Any from typing import Any
...@@ -25,6 +25,7 @@ logger = init_logger(__name__) ...@@ -25,6 +25,7 @@ logger = init_logger(__name__)
cumem_available = False cumem_available = False
libcudart: Any = None
try: try:
from vllm.cumem_allocator import ( from vllm.cumem_allocator import (
init_module, init_module,
...@@ -41,9 +42,7 @@ except ModuleNotFoundError: ...@@ -41,9 +42,7 @@ except ModuleNotFoundError:
init_module = None init_module = None
python_create_and_map = None python_create_and_map = None
python_unmap_and_release = None python_unmap_and_release = None
CudaRTLibrary = None
lib_name = None lib_name = None
libcudart = None
# py_device, py_alignedSize, py_d_mem, py_p_memHandle # py_device, py_alignedSize, py_d_mem, py_p_memHandle
HandleType = tuple[int, int, int, int] HandleType = tuple[int, int, int, int]
...@@ -65,7 +64,8 @@ def unmap_and_release(allocation_handle: HandleType) -> None: ...@@ -65,7 +64,8 @@ def unmap_and_release(allocation_handle: HandleType) -> None:
def get_pluggable_allocator( def get_pluggable_allocator(
python_malloc_fn: Callable[[int], int], python_free_func: Callable[[int, int], None] python_malloc_fn: Callable[[HandleType], None],
python_free_func: Callable[[int], HandleType],
) -> torch.cuda.memory.CUDAPluggableAllocator: ) -> torch.cuda.memory.CUDAPluggableAllocator:
init_module(python_malloc_fn, python_free_func) init_module(python_malloc_fn, python_free_func)
new_alloc = torch.cuda.memory.CUDAPluggableAllocator( new_alloc = torch.cuda.memory.CUDAPluggableAllocator(
...@@ -76,8 +76,11 @@ def get_pluggable_allocator( ...@@ -76,8 +76,11 @@ def get_pluggable_allocator(
@contextmanager @contextmanager
def use_memory_pool_with_allocator( def use_memory_pool_with_allocator(
python_malloc_fn: Callable[[int], int], python_free_func: Callable[[int, int], None] python_malloc_fn: Callable[[HandleType], None],
) -> None: python_free_func: Callable[[int], HandleType],
) -> Iterator[
tuple[torch.cuda.memory.MemPool, torch.cuda.memory.CUDAPluggableAllocator]
]:
new_alloc = get_pluggable_allocator(python_malloc_fn, python_free_func) new_alloc = get_pluggable_allocator(python_malloc_fn, python_free_func)
mem_pool = torch.cuda.memory.MemPool(new_alloc._allocator) mem_pool = torch.cuda.memory.MemPool(new_alloc._allocator)
with torch.cuda.memory.use_mem_pool(mem_pool): with torch.cuda.memory.use_mem_pool(mem_pool):
...@@ -109,7 +112,7 @@ class CuMemAllocator: ...@@ -109,7 +112,7 @@ class CuMemAllocator:
not work as expected. not work as expected.
""" """
instance: "CuMemAllocator" = None instance: "CuMemAllocator | None" = None
default_tag: str = "default" default_tag: str = "default"
@staticmethod @staticmethod
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment