Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
4716a32d
Unverified
Commit
4716a32d
authored
Mar 28, 2024
by
Simon Mo
Committed by
GitHub
Mar 28, 2024
Browse files
fix logging msg for block manager (#3701)
parent
c0935c96
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
5 additions
and
4 deletions
+5
-4
vllm/attention/selector.py
vllm/attention/selector.py
+3
-1
vllm/core/block_manager_v1.py
vllm/core/block_manager_v1.py
+1
-2
vllm/model_executor/parallel_utils/pynccl_utils.py
vllm/model_executor/parallel_utils/pynccl_utils.py
+1
-1
No files found.
vllm/attention/selector.py
View file @
4716a32d
...
...
@@ -41,6 +41,8 @@ def _can_use_flash_attn(dtype: torch.dtype) -> bool:
try
:
import
flash_attn
# noqa: F401
except
ImportError
:
logger
.
info
(
"flash_attn is not found."
)
logger
.
info
(
"Cannot use FlashAttention because the package is not found. "
"Please install it for better performance."
)
return
False
return
True
vllm/core/block_manager_v1.py
View file @
4716a32d
...
...
@@ -230,13 +230,12 @@ class BlockSpaceManagerV1(BlockSpaceManager):
self
.
watermark_blocks
=
int
(
watermark
*
num_gpu_blocks
)
if
self
.
enable_caching
:
logger
.
info
(
"
enable a
utomatic prefix caching"
)
logger
.
info
(
"
A
utomatic prefix caching
is enabled.
"
)
self
.
gpu_allocator
=
CachedBlockAllocator
(
Device
.
GPU
,
block_size
,
num_gpu_blocks
)
self
.
cpu_allocator
=
CachedBlockAllocator
(
Device
.
CPU
,
block_size
,
num_cpu_blocks
)
else
:
logger
.
info
(
"disable automatic prefix caching"
)
self
.
gpu_allocator
=
UncachedBlockAllocator
(
Device
.
GPU
,
block_size
,
num_gpu_blocks
)
self
.
cpu_allocator
=
UncachedBlockAllocator
(
...
...
vllm/model_executor/parallel_utils/pynccl_utils.py
View file @
4716a32d
...
...
@@ -10,7 +10,6 @@ logger = logging.getLogger(__name__)
try
:
from
vllm.model_executor.parallel_utils.pynccl
import
(
NCCLCommunicator
,
ncclGetVersion
)
logger
.
info
(
f
"vLLM is using nccl==
{
ncclGetVersion
()
}
"
)
except
Exception
as
e
:
# in non-NVIDIA environments, we can't import the nccl module
# e.g. when running on machines with AMD GPUs
...
...
@@ -40,6 +39,7 @@ def init_process_group(world_size: int, local_rank: int, rank: int,
init_method
:
str
)
->
None
:
assert
not
is_initialized
()
global
comm
logger
.
info
(
f
"vLLM is using nccl==
{
ncclGetVersion
()
}
"
)
comm
=
NCCLCommunicator
(
init_method
=
init_method
,
world_size
=
world_size
,
local_rank
=
local_rank
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment