Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
e67c295b
"docs/source/api/multimodal/index.md" did not exist on "aa39a8e17537f9127b3da65dba6b33067bfd2f78"
Unverified
Commit
e67c295b
authored
Mar 25, 2024
by
TianYu GUO
Committed by
GitHub
Mar 25, 2024
Browse files
[Bugfix] fix automatic prefix args and add log info (#3608)
parent
925f3332
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
7 additions
and
1 deletion
+7
-1
vllm/core/block_manager.py
vllm/core/block_manager.py
+5
-0
vllm/engine/arg_utils.py
vllm/engine/arg_utils.py
+2
-1
No files found.
vllm/core/block_manager.py
View file @
e67c295b
...
...
@@ -9,6 +9,9 @@ from vllm.block import BlockTable, PhysicalTokenBlock
from
vllm.sequence
import
Sequence
,
SequenceGroup
,
SequenceStatus
from
vllm.utils
import
Device
from
vllm.core.evictor
import
Evictor
,
EvictionPolicy
,
make_evictor
from
vllm.logger
import
init_logger
logger
=
init_logger
(
__name__
)
class
BlockAllocatorBase
(
ABC
):
...
...
@@ -241,11 +244,13 @@ class BlockSpaceManager:
self
.
watermark_blocks
=
int
(
watermark
*
num_gpu_blocks
)
if
self
.
enable_caching
:
logger
.
info
(
"enable automatic prefix caching"
)
self
.
gpu_allocator
=
CachedBlockAllocator
(
Device
.
GPU
,
block_size
,
num_gpu_blocks
)
self
.
cpu_allocator
=
CachedBlockAllocator
(
Device
.
CPU
,
block_size
,
num_cpu_blocks
)
else
:
logger
.
info
(
"disable automatic prefix caching"
)
self
.
gpu_allocator
=
UncachedBlockAllocator
(
Device
.
GPU
,
block_size
,
num_gpu_blocks
)
self
.
cpu_allocator
=
UncachedBlockAllocator
(
...
...
vllm/engine/arg_utils.py
View file @
e67c295b
...
...
@@ -337,7 +337,8 @@ class EngineArgs:
cache_config
=
CacheConfig
(
self
.
block_size
,
self
.
gpu_memory_utilization
,
self
.
swap_space
,
self
.
kv_cache_dtype
,
model_config
.
get_sliding_window
())
model_config
.
get_sliding_window
(),
self
.
enable_prefix_caching
)
parallel_config
=
ParallelConfig
(
self
.
pipeline_parallel_size
,
self
.
tensor_parallel_size
,
self
.
worker_use_ray
,
self
.
max_parallel_loading_workers
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment