Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
0650e593
Unverified
Commit
0650e593
authored
May 06, 2024
by
zhaoyang-star
Committed by
GitHub
May 05, 2024
Browse files
Disable cuda version check in vllm-openai image (#4530)
parent
c7f2cf2b
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
2 additions
and
33 deletions
+2
-33
vllm/config.py
vllm/config.py
+1
-10
vllm/utils.py
vllm/utils.py
+1
-23
No files found.
vllm/config.py
View file @
0650e593
...
@@ -4,15 +4,13 @@ from dataclasses import dataclass, field, fields
...
@@ -4,15 +4,13 @@ from dataclasses import dataclass, field, fields
from
typing
import
TYPE_CHECKING
,
ClassVar
,
List
,
Optional
,
Union
from
typing
import
TYPE_CHECKING
,
ClassVar
,
List
,
Optional
,
Union
import
torch
import
torch
from
packaging.version
import
Version
from
transformers
import
PretrainedConfig
from
transformers
import
PretrainedConfig
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.quantization
import
(
QUANTIZATION_METHODS
,
from
vllm.model_executor.layers.quantization
import
(
QUANTIZATION_METHODS
,
get_quantization_config
)
get_quantization_config
)
from
vllm.transformers_utils.config
import
get_config
,
get_hf_text_config
from
vllm.transformers_utils.config
import
get_config
,
get_hf_text_config
from
vllm.utils
import
(
get_cpu_memory
,
get_nvcc_cuda_version
,
is_cpu
,
is_hip
,
from
vllm.utils
import
get_cpu_memory
,
is_cpu
,
is_hip
,
is_neuron
is_neuron
)
GPTQMarlinConfig
=
get_quantization_config
(
"gptq_marlin"
)
GPTQMarlinConfig
=
get_quantization_config
(
"gptq_marlin"
)
...
@@ -369,13 +367,6 @@ class CacheConfig:
...
@@ -369,13 +367,6 @@ class CacheConfig:
if
self
.
cache_dtype
==
"auto"
:
if
self
.
cache_dtype
==
"auto"
:
pass
pass
elif
self
.
cache_dtype
==
"fp8"
:
elif
self
.
cache_dtype
==
"fp8"
:
if
not
is_hip
():
nvcc_cuda_version
=
get_nvcc_cuda_version
()
if
nvcc_cuda_version
is
not
None
\
and
nvcc_cuda_version
<
Version
(
"11.8"
):
raise
ValueError
(
"FP8 is not supported when cuda version is"
"lower than 11.8."
)
logger
.
info
(
logger
.
info
(
"Using fp8 data type to store kv cache. It reduces the GPU "
"Using fp8 data type to store kv cache. It reduces the GPU "
"memory footprint and boosts the performance. "
"memory footprint and boosts the performance. "
...
...
vllm/utils.py
View file @
0650e593
...
@@ -19,7 +19,6 @@ from typing import (Any, AsyncIterator, Awaitable, Callable, Dict, Generic,
...
@@ -19,7 +19,6 @@ from typing import (Any, AsyncIterator, Awaitable, Callable, Dict, Generic,
import
psutil
import
psutil
import
torch
import
torch
from
packaging.version
import
Version
,
parse
import
vllm.envs
as
envs
import
vllm.envs
as
envs
from
vllm.logger
import
enable_trace_function_call
,
init_logger
from
vllm.logger
import
enable_trace_function_call
,
init_logger
...
@@ -314,27 +313,6 @@ def cdiv(a: int, b: int) -> int:
...
@@ -314,27 +313,6 @@ def cdiv(a: int, b: int) -> int:
return
-
(
a
//
-
b
)
return
-
(
a
//
-
b
)
@
lru_cache
(
maxsize
=
None
)
def
get_nvcc_cuda_version
()
->
Optional
[
Version
]:
cuda_home
=
envs
.
CUDA_HOME
if
not
cuda_home
:
cuda_home
=
'/usr/local/cuda'
if
os
.
path
.
isfile
(
cuda_home
+
'/bin/nvcc'
):
logger
.
info
(
'CUDA_HOME is not found in the environment. '
'Using %s as CUDA_HOME.'
,
cuda_home
)
else
:
logger
.
warning
(
'Not found nvcc in %s. Skip cuda version check!'
,
cuda_home
)
return
None
nvcc_output
=
subprocess
.
check_output
([
cuda_home
+
"/bin/nvcc"
,
"-V"
],
universal_newlines
=
True
)
output
=
nvcc_output
.
split
()
release_idx
=
output
.
index
(
"release"
)
+
1
nvcc_cuda_version
=
parse
(
output
[
release_idx
].
split
(
","
)[
0
])
return
nvcc_cuda_version
def
_generate_random_fp8
(
def
_generate_random_fp8
(
tensor
:
torch
.
tensor
,
tensor
:
torch
.
tensor
,
low
:
float
,
low
:
float
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment