Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
66b809cc
Commit
66b809cc
authored
Feb 08, 2025
by
zhuwenwen
Browse files
Merge tag 'v0.7.2' into v0.7.2-dev
parents
37b63c24
0408efc6
Changes
1000
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
36 additions
and
0 deletions
+36
-0
vllm/core/block/utils.py
vllm/core/block/utils.py
+1
-0
vllm/core/block_manager.py
vllm/core/block_manager.py
+1
-0
vllm/core/evictor.py
vllm/core/evictor.py
+2
-0
vllm/core/interfaces.py
vllm/core/interfaces.py
+2
-0
vllm/core/placeholder_block_space_manager.py
vllm/core/placeholder_block_space_manager.py
+2
-0
vllm/core/scheduler.py
vllm/core/scheduler.py
+2
-0
vllm/device_allocator/cumem.py
vllm/device_allocator/cumem.py
+2
-0
vllm/distributed/__init__.py
vllm/distributed/__init__.py
+2
-0
vllm/distributed/communication_op.py
vllm/distributed/communication_op.py
+2
-0
vllm/distributed/device_communicators/cuda_wrapper.py
vllm/distributed/device_communicators/cuda_wrapper.py
+1
-0
vllm/distributed/device_communicators/custom_all_reduce.py
vllm/distributed/device_communicators/custom_all_reduce.py
+2
-0
vllm/distributed/device_communicators/custom_all_reduce_utils.py
...stributed/device_communicators/custom_all_reduce_utils.py
+2
-0
vllm/distributed/device_communicators/hpu_communicator.py
vllm/distributed/device_communicators/hpu_communicator.py
+2
-0
vllm/distributed/device_communicators/pynccl.py
vllm/distributed/device_communicators/pynccl.py
+2
-0
vllm/distributed/device_communicators/pynccl_wrapper.py
vllm/distributed/device_communicators/pynccl_wrapper.py
+2
-0
vllm/distributed/device_communicators/shm_broadcast.py
vllm/distributed/device_communicators/shm_broadcast.py
+2
-0
vllm/distributed/device_communicators/tpu_communicator.py
vllm/distributed/device_communicators/tpu_communicator.py
+2
-0
vllm/distributed/device_communicators/xpu_communicator.py
vllm/distributed/device_communicators/xpu_communicator.py
+2
-0
vllm/distributed/kv_transfer/kv_connector/base.py
vllm/distributed/kv_transfer/kv_connector/base.py
+1
-0
vllm/distributed/kv_transfer/kv_connector/factory.py
vllm/distributed/kv_transfer/kv_connector/factory.py
+2
-0
No files found.
Too many changes to show.
To preserve performance only
1000 of 1000+
files are displayed.
Plain diff
Email patch
vllm/core/block/utils.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""Block manager utils."""
"""Block manager utils."""
from
vllm.sequence
import
SequenceGroup
from
vllm.sequence
import
SequenceGroup
from
vllm.utils
import
(
STR_NOT_IMPL_ENC_DEC_PREFIX_CACHE
,
from
vllm.utils
import
(
STR_NOT_IMPL_ENC_DEC_PREFIX_CACHE
,
...
...
vllm/core/block_manager.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""A block manager that manages token blocks."""
"""A block manager that manages token blocks."""
from
typing
import
Dict
,
List
,
Optional
from
typing
import
Dict
,
List
,
Optional
from
typing
import
Sequence
as
GenericSequence
from
typing
import
Sequence
as
GenericSequence
...
...
vllm/core/evictor.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
enum
import
enum
import
heapq
import
heapq
from
abc
import
ABC
,
abstractmethod
from
abc
import
ABC
,
abstractmethod
...
...
vllm/core/interfaces.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
enum
import
enum
from
abc
import
ABC
,
abstractmethod
from
abc
import
ABC
,
abstractmethod
from
typing
import
List
from
typing
import
List
...
...
vllm/core/placeholder_block_space_manager.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
from
typing
import
List
,
Tuple
from
typing
import
List
,
Tuple
from
vllm.core.interfaces
import
AllocStatus
,
BlockSpaceManager
from
vllm.core.interfaces
import
AllocStatus
,
BlockSpaceManager
...
...
vllm/core/scheduler.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
enum
import
enum
import
os
import
os
import
random
import
random
...
...
vllm/device_allocator/cumem.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
# cumem-based pytorch pluggable allocator to implement sleep mode.
# cumem-based pytorch pluggable allocator to implement sleep mode.
# other approaches tried but failed:
# other approaches tried but failed:
# - cuda-python package binding
# - cuda-python package binding
...
...
vllm/distributed/__init__.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
from
.communication_op
import
*
from
.communication_op
import
*
from
.parallel_state
import
*
from
.parallel_state
import
*
from
.utils
import
*
from
.utils
import
*
vllm/distributed/communication_op.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
from
typing
import
Any
,
Dict
,
Optional
,
Union
from
typing
import
Any
,
Dict
,
Optional
,
Union
import
torch
import
torch
...
...
vllm/distributed/device_communicators/cuda_wrapper.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""This file is a pure Python wrapper for the cudart library.
"""This file is a pure Python wrapper for the cudart library.
It avoids the need to compile a separate shared library, and is
It avoids the need to compile a separate shared library, and is
convenient for use when we just need to call a few functions.
convenient for use when we just need to call a few functions.
...
...
vllm/distributed/device_communicators/custom_all_reduce.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
ctypes
import
ctypes
from
contextlib
import
contextmanager
from
contextlib
import
contextmanager
from
typing
import
List
,
Optional
,
Union
from
typing
import
List
,
Optional
,
Union
...
...
vllm/distributed/device_communicators/custom_all_reduce_utils.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
ctypes
import
ctypes
import
json
import
json
import
os
import
os
...
...
vllm/distributed/device_communicators/hpu_communicator.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
torch
import
torch
import
torch.distributed
as
dist
import
torch.distributed
as
dist
from
torch.distributed
import
ProcessGroup
from
torch.distributed
import
ProcessGroup
...
...
vllm/distributed/device_communicators/pynccl.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
from
typing
import
Optional
,
Union
from
typing
import
Optional
,
Union
# ===================== import region =====================
# ===================== import region =====================
...
...
vllm/distributed/device_communicators/pynccl_wrapper.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
# This file is a pure Python wrapper for the NCCL library.
# This file is a pure Python wrapper for the NCCL library.
# The main purpose is to use NCCL combined with CUDA graph.
# The main purpose is to use NCCL combined with CUDA graph.
# Before writing this script, we tried the following approach:
# Before writing this script, we tried the following approach:
...
...
vllm/distributed/device_communicators/shm_broadcast.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
os
import
os
import
pickle
import
pickle
import
sys
import
sys
...
...
vllm/distributed/device_communicators/tpu_communicator.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
os
import
os
import
torch
import
torch
...
...
vllm/distributed/device_communicators/xpu_communicator.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
torch
import
torch
import
torch.distributed
as
dist
import
torch.distributed
as
dist
from
torch.distributed
import
ProcessGroup
from
torch.distributed
import
ProcessGroup
...
...
vllm/distributed/kv_transfer/kv_connector/base.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""
"""
KVConnectorBase Class for Distributed KV Cache & Hidden State communication
KVConnectorBase Class for Distributed KV Cache & Hidden State communication
...
...
vllm/distributed/kv_transfer/kv_connector/factory.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
importlib
import
importlib
from
typing
import
TYPE_CHECKING
,
Callable
,
Dict
,
Type
from
typing
import
TYPE_CHECKING
,
Callable
,
Dict
,
Type
...
...
Prev
1
…
25
26
27
28
29
30
31
32
33
…
50
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment