Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
82c73fd5
Unverified
Commit
82c73fd5
authored
Dec 09, 2024
by
Gene Der Su
Committed by
GitHub
Dec 10, 2024
Browse files
[Bugfix] cuda error running llama 3.2 (#11047)
parent
bfd61043
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
28 additions
and
7 deletions
+28
-7
vllm/platforms/cuda.py
vllm/platforms/cuda.py
+28
-7
No files found.
vllm/platforms/cuda.py
View file @
82c73fd5
...
...
@@ -4,7 +4,8 @@ pynvml. However, it should not initialize cuda context.
import
os
from
functools
import
lru_cache
,
wraps
from
typing
import
TYPE_CHECKING
,
Callable
,
List
,
Optional
,
TypeVar
from
typing
import
(
TYPE_CHECKING
,
Callable
,
List
,
Optional
,
Tuple
,
TypeVar
,
Union
)
import
pynvml
import
torch
...
...
@@ -78,7 +79,9 @@ class CudaPlatformBase(Platform):
dispatch_key
:
str
=
"CUDA"
@
classmethod
def
get_device_capability
(
cls
,
device_id
:
int
=
0
)
->
DeviceCapability
:
def
get_device_capability
(
cls
,
device_id
:
int
=
0
)
->
Optional
[
DeviceCapability
]:
raise
NotImplementedError
@
classmethod
...
...
@@ -144,11 +147,29 @@ class NvmlCudaPlatform(CudaPlatformBase):
@
classmethod
@
lru_cache
(
maxsize
=
8
)
@
with_nvml_context
def
get_device_capability
(
cls
,
device_id
:
int
=
0
)
->
DeviceCapability
:
physical_device_id
=
device_id_to_physical_device_id
(
device_id
)
handle
=
pynvml
.
nvmlDeviceGetHandleByIndex
(
physical_device_id
)
major
,
minor
=
pynvml
.
nvmlDeviceGetCudaComputeCapability
(
handle
)
return
DeviceCapability
(
major
=
major
,
minor
=
minor
)
def
get_device_capability
(
cls
,
device_id
:
int
=
0
)
->
Optional
[
DeviceCapability
]:
try
:
physical_device_id
=
device_id_to_physical_device_id
(
device_id
)
handle
=
pynvml
.
nvmlDeviceGetHandleByIndex
(
physical_device_id
)
major
,
minor
=
pynvml
.
nvmlDeviceGetCudaComputeCapability
(
handle
)
return
DeviceCapability
(
major
=
major
,
minor
=
minor
)
except
RuntimeError
:
return
None
@
classmethod
@
lru_cache
(
maxsize
=
8
)
@
with_nvml_context
def
has_device_capability
(
cls
,
capability
:
Union
[
Tuple
[
int
,
int
],
int
],
device_id
:
int
=
0
,
)
->
bool
:
try
:
return
super
().
has_device_capability
(
capability
,
device_id
)
except
RuntimeError
:
return
False
@
classmethod
@
lru_cache
(
maxsize
=
8
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment