"vllm/vscode:/vscode.git/clone" did not exist on "f451b4558b2bb42dafcdd19f7b5c0fc58194af5a"
Unverified Commit 82c73fd5 authored by Gene Der Su's avatar Gene Der Su Committed by GitHub
Browse files

[Bugfix] cuda error running llama 3.2 (#11047)

parent bfd61043
...@@ -4,7 +4,8 @@ pynvml. However, it should not initialize cuda context. ...@@ -4,7 +4,8 @@ pynvml. However, it should not initialize cuda context.
import os import os
from functools import lru_cache, wraps from functools import lru_cache, wraps
from typing import TYPE_CHECKING, Callable, List, Optional, TypeVar from typing import (TYPE_CHECKING, Callable, List, Optional, Tuple, TypeVar,
Union)
import pynvml import pynvml
import torch import torch
...@@ -78,7 +79,9 @@ class CudaPlatformBase(Platform): ...@@ -78,7 +79,9 @@ class CudaPlatformBase(Platform):
dispatch_key: str = "CUDA" dispatch_key: str = "CUDA"
@classmethod @classmethod
def get_device_capability(cls, device_id: int = 0) -> DeviceCapability: def get_device_capability(cls,
device_id: int = 0
) -> Optional[DeviceCapability]:
raise NotImplementedError raise NotImplementedError
@classmethod @classmethod
...@@ -144,11 +147,29 @@ class NvmlCudaPlatform(CudaPlatformBase): ...@@ -144,11 +147,29 @@ class NvmlCudaPlatform(CudaPlatformBase):
@classmethod @classmethod
@lru_cache(maxsize=8) @lru_cache(maxsize=8)
@with_nvml_context @with_nvml_context
def get_device_capability(cls, device_id: int = 0) -> DeviceCapability: def get_device_capability(cls,
device_id: int = 0
) -> Optional[DeviceCapability]:
try:
physical_device_id = device_id_to_physical_device_id(device_id) physical_device_id = device_id_to_physical_device_id(device_id)
handle = pynvml.nvmlDeviceGetHandleByIndex(physical_device_id) handle = pynvml.nvmlDeviceGetHandleByIndex(physical_device_id)
major, minor = pynvml.nvmlDeviceGetCudaComputeCapability(handle) major, minor = pynvml.nvmlDeviceGetCudaComputeCapability(handle)
return DeviceCapability(major=major, minor=minor) return DeviceCapability(major=major, minor=minor)
except RuntimeError:
return None
@classmethod
@lru_cache(maxsize=8)
@with_nvml_context
def has_device_capability(
cls,
capability: Union[Tuple[int, int], int],
device_id: int = 0,
) -> bool:
try:
return super().has_device_capability(capability, device_id)
except RuntimeError:
return False
@classmethod @classmethod
@lru_cache(maxsize=8) @lru_cache(maxsize=8)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment