Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
c320ca8e
Unverified
Commit
c320ca8e
authored
Feb 11, 2025
by
Russell Bryant
Committed by
GitHub
Feb 11, 2025
Browse files
[Core] Don't do platform detection at import time (#12933)
Signed-off-by:
Russell Bryant
<
rbryant@redhat.com
>
parent
58047c6f
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
8 additions
and
8 deletions
+8
-8
vllm/executor/executor_base.py
vllm/executor/executor_base.py
+3
-3
vllm/executor/ray_utils.py
vllm/executor/ray_utils.py
+3
-3
vllm/platforms/cuda.py
vllm/platforms/cuda.py
+2
-2
No files found.
vllm/executor/executor_base.py
View file @
c320ca8e
...
...
@@ -8,11 +8,11 @@ from typing import (Any, Awaitable, Callable, Dict, List, Optional, Set, Tuple,
import
torch.nn
as
nn
from
typing_extensions
import
TypeVar
import
vllm.platforms
from
vllm.config
import
VllmConfig
from
vllm.logger
import
init_logger
from
vllm.lora.request
import
LoRARequest
from
vllm.model_executor.layers.sampler
import
SamplerOutput
from
vllm.platforms
import
current_platform
from
vllm.prompt_adapter.request
import
PromptAdapterRequest
from
vllm.sequence
import
ExecuteModelRequest
,
PoolerOutput
from
vllm.utils
import
make_async
...
...
@@ -108,8 +108,8 @@ class ExecutorBase(ABC):
"""
# NOTE: This is logged in the executor because there can be >1 workers.
logger
.
info
(
"# %s blocks: %d, # CPU blocks: %d"
,
current_platform
.
dispatch_key
,
num_gpu_blocks
,
num_cpu_blocks
)
vllm
.
platforms
.
current_platform
.
dispatch_key
,
num_gpu_blocks
,
num_cpu_blocks
)
max_concurrency
=
(
num_gpu_blocks
*
self
.
cache_config
.
block_size
/
self
.
model_config
.
max_model_len
)
logger
.
info
(
"Maximum concurrency for %s tokens per request: %.2fx"
,
...
...
vllm/executor/ray_utils.py
View file @
c320ca8e
...
...
@@ -7,10 +7,10 @@ from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union
import
msgspec
import
vllm.platforms
from
vllm.config
import
ParallelConfig
from
vllm.executor.msgspec_utils
import
decode_hook
,
encode_hook
from
vllm.logger
import
init_logger
from
vllm.platforms
import
current_platform
from
vllm.sequence
import
ExecuteModelRequest
,
IntermediateTensors
from
vllm.utils
import
get_ip
from
vllm.worker.worker_base
import
WorkerWrapperBase
...
...
@@ -54,10 +54,10 @@ try:
def
get_node_and_gpu_ids
(
self
)
->
Tuple
[
str
,
List
[
int
]]:
node_id
=
ray
.
get_runtime_context
().
get_node_id
()
device_key
=
current_platform
.
ray_device_key
device_key
=
vllm
.
platforms
.
current_platform
.
ray_device_key
if
not
device_key
:
raise
RuntimeError
(
"current platform %s does not support ray."
,
current_platform
.
device_name
)
vllm
.
platforms
.
current_platform
.
device_name
)
gpu_ids
=
ray
.
get_runtime_context
().
get_accelerator_ids
(
)[
device_key
]
return
node_id
,
gpu_ids
...
...
vllm/platforms/cuda.py
View file @
c320ca8e
...
...
@@ -334,10 +334,10 @@ class NvmlCudaPlatform(CudaPlatformBase):
if
(
len
(
set
(
device_names
))
>
1
and
os
.
environ
.
get
(
"CUDA_DEVICE_ORDER"
)
!=
"PCI_BUS_ID"
):
logger
.
warning
(
"Detected different devices in the system:
\n
%s
\n
Please"
"Detected different devices in the system:
%s.
Please"
" make sure to set `CUDA_DEVICE_ORDER=PCI_BUS_ID` to "
"avoid unexpected behavior."
,
"
\n
"
.
join
(
device_names
),
"
,
"
.
join
(
device_names
),
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment