Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
66b809cc
Commit
66b809cc
authored
Feb 08, 2025
by
zhuwenwen
Browse files
Merge tag 'v0.7.2' into v0.7.2-dev
parents
37b63c24
0408efc6
Changes
1000
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
62 additions
and
7 deletions
+62
-7
vllm/distributed/kv_transfer/kv_connector/simple_connector.py
.../distributed/kv_transfer/kv_connector/simple_connector.py
+1
-0
vllm/distributed/kv_transfer/kv_lookup_buffer/base.py
vllm/distributed/kv_transfer/kv_lookup_buffer/base.py
+1
-0
vllm/distributed/kv_transfer/kv_lookup_buffer/simple_buffer.py
...distributed/kv_transfer/kv_lookup_buffer/simple_buffer.py
+1
-0
vllm/distributed/kv_transfer/kv_pipe/base.py
vllm/distributed/kv_transfer/kv_pipe/base.py
+1
-0
vllm/distributed/kv_transfer/kv_pipe/mooncake_pipe.py
vllm/distributed/kv_transfer/kv_pipe/mooncake_pipe.py
+2
-0
vllm/distributed/kv_transfer/kv_pipe/pynccl_pipe.py
vllm/distributed/kv_transfer/kv_pipe/pynccl_pipe.py
+1
-0
vllm/distributed/kv_transfer/kv_transfer_agent.py
vllm/distributed/kv_transfer/kv_transfer_agent.py
+1
-0
vllm/distributed/parallel_state.py
vllm/distributed/parallel_state.py
+13
-3
vllm/distributed/utils.py
vllm/distributed/utils.py
+2
-0
vllm/engine/arg_utils.py
vllm/engine/arg_utils.py
+20
-4
vllm/engine/async_llm_engine.py
vllm/engine/async_llm_engine.py
+2
-0
vllm/engine/async_timeout.py
vllm/engine/async_timeout.py
+2
-0
vllm/engine/llm_engine.py
vllm/engine/llm_engine.py
+2
-0
vllm/engine/metrics.py
vllm/engine/metrics.py
+2
-0
vllm/engine/metrics_types.py
vllm/engine/metrics_types.py
+1
-0
vllm/engine/multiprocessing/__init__.py
vllm/engine/multiprocessing/__init__.py
+2
-0
vllm/engine/multiprocessing/client.py
vllm/engine/multiprocessing/client.py
+2
-0
vllm/engine/multiprocessing/engine.py
vllm/engine/multiprocessing/engine.py
+2
-0
vllm/engine/output_processor/interfaces.py
vllm/engine/output_processor/interfaces.py
+2
-0
vllm/engine/output_processor/multi_step.py
vllm/engine/output_processor/multi_step.py
+2
-0
No files found.
Too many changes to show.
To preserve performance only
1000 of 1000+
files are displayed.
Plain diff
Email patch
vllm/distributed/kv_transfer/kv_connector/simple_connector.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""
Simple KV Cache Connector for Distributed Machine Learning Inference
...
...
vllm/distributed/kv_transfer/kv_lookup_buffer/base.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""
This file contains a new class `KVLookupBufferBase` that allows developers to
think of KV cache operations as inserting new KV cache entries (`insert`)
...
...
vllm/distributed/kv_transfer/kv_lookup_buffer/simple_buffer.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""
Implements a distributed key-value (KV) cache transfer mechanism.
...
...
vllm/distributed/kv_transfer/kv_pipe/base.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""
This file defines an interface `KVPipeBase`
that provides an abstraction for sending and receiving tensors, or None, via
...
...
vllm/distributed/kv_transfer/kv_pipe/mooncake_pipe.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
json
import
os
import
pickle
...
...
vllm/distributed/kv_transfer/kv_pipe/pynccl_pipe.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""
This module implements a PyNccl pipe for sending and receiving
Optional[torch.Tensor] between distributed ranks with advanced
...
...
vllm/distributed/kv_transfer/kv_transfer_agent.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""A centralized entrypoint to perform distributed KV cache transfer.
This implementation is a shim wrapper on two APIs exposed by `kv_connector`:
...
...
vllm/distributed/parallel_state.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
# Copyright 2023 The vLLM team.
# Adapted from
# https://github.com/NVIDIA/Megatron-LM/blob/main/megatron/core/parallel_state.py
...
...
@@ -327,9 +329,17 @@ class GroupCoordinator:
return
input_
if
input_
.
is_cpu
:
import
intel_extension_for_pytorch
as
ipex
ipex
.
distributed
.
all_reduce
(
input_
,
group
=
self
.
device_group
)
return
input_
try
:
import
intel_extension_for_pytorch
as
ipex
ipex
.
distributed
.
all_reduce
(
input_
,
group
=
self
.
device_group
)
return
input_
except
ImportError
:
"""
Intel IPEX not found. Falling back to PyTorch native
all_reduce for CPU
"""
torch
.
distributed
.
all_reduce
(
input_
,
group
=
self
.
device_group
)
return
input_
if
self
.
tpu_communicator
is
not
None
and
\
not
self
.
tpu_communicator
.
disabled
:
...
...
vllm/distributed/utils.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
# Copyright 2023 The vLLM team.
# Adapted from
# https://github.com/NVIDIA/Megatron-LM/blob/main/megatron/core/tensor_parallel/utils.py
...
...
vllm/engine/arg_utils.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
argparse
import
dataclasses
import
json
...
...
@@ -11,10 +13,10 @@ import vllm.envs as envs
from
vllm.config
import
(
CacheConfig
,
CompilationConfig
,
ConfigFormat
,
DecodingConfig
,
DeviceConfig
,
HfOverrides
,
KVTransferConfig
,
LoadConfig
,
LoadFormat
,
LoRAConfig
,
ModelConfig
,
ObservabilityConfig
,
ParallelConfig
,
PoolerConfig
,
PromptAdapterConfig
,
SchedulerConfig
,
SpeculativeConfig
,
TaskOption
,
TokenizerPoolConfig
,
VllmConfig
)
ModelConfig
,
ModelImpl
,
ObservabilityConfig
,
ParallelConfig
,
PoolerConfig
,
PromptAdapterConfig
,
SchedulerConfig
,
SpeculativeConfig
,
TaskOption
,
TokenizerPoolConfig
,
VllmConfig
)
from
vllm.executor.executor_base
import
ExecutorBase
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.quantization
import
QUANTIZATION_METHODS
...
...
@@ -200,6 +202,7 @@ class EngineArgs:
generation_config
:
Optional
[
str
]
=
None
override_generation_config
:
Optional
[
Dict
[
str
,
Any
]]
=
None
enable_sleep_mode
:
bool
=
False
model_impl
:
str
=
"auto"
calculate_kv_scales
:
Optional
[
bool
]
=
None
...
...
@@ -379,6 +382,18 @@ class EngineArgs:
'qualified names that can be passed with the `logits_processors` '
'extra completion argument. Defaults to None, which allows no '
'processors.'
)
parser
.
add_argument
(
'--model-impl'
,
type
=
str
,
default
=
EngineArgs
.
model_impl
,
choices
=
[
f
.
value
for
f
in
ModelImpl
],
help
=
'Which implementation of the model to use.
\n\n
'
'* "auto" will try to use the vLLM implementation if it exists '
'and fall back to the Transformers implementation if no vLLM '
'implementation is available.
\n
'
'* "vllm" will use the vLLM model implementation.
\n
'
'* "transformers" will use the Transformers model '
'implementation.
\n
'
)
# Parallel arguments
parser
.
add_argument
(
'--distributed-executor-backend'
,
...
...
@@ -1032,6 +1047,7 @@ class EngineArgs:
generation_config
=
self
.
generation_config
,
override_generation_config
=
self
.
override_generation_config
,
enable_sleep_mode
=
self
.
enable_sleep_mode
,
model_impl
=
self
.
model_impl
,
)
def
create_load_config
(
self
)
->
LoadConfig
:
...
...
vllm/engine/async_llm_engine.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
asyncio
import
copy
import
time
...
...
vllm/engine/async_timeout.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
# Workaround for https://github.com/python/cpython/issues/86296
#
# From https://github.com/aio-libs/async-timeout/blob/master/async_timeout/__init__.py
...
...
vllm/engine/llm_engine.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
os
import
copy
import
time
...
...
vllm/engine/metrics.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
time
from
typing
import
TYPE_CHECKING
from
typing
import
Counter
as
CollectionsCounter
...
...
vllm/engine/metrics_types.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""
These types are defined in this file to avoid importing vllm.engine.metrics
and therefore importing prometheus_client.
...
...
vllm/engine/multiprocessing/__init__.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
uuid
from
dataclasses
import
dataclass
,
field
from
enum
import
Enum
...
...
vllm/engine/multiprocessing/client.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
asyncio
import
copy
import
pickle
...
...
vllm/engine/multiprocessing/engine.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
pickle
import
signal
from
contextlib
import
contextmanager
...
...
vllm/engine/output_processor/interfaces.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
from
abc
import
ABC
,
abstractmethod
from
typing
import
Callable
,
List
...
...
vllm/engine/output_processor/multi_step.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
functools
from
typing
import
Callable
,
List
,
cast
...
...
Prev
1
…
26
27
28
29
30
31
32
33
34
…
50
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment