Unverified Commit 35bdca54 authored by Wentao Ye's avatar Wentao Ye Committed by GitHub
Browse files

[Refactor] Remove dead code in KV connector (#36424)


Signed-off-by: default avataryewentao256 <zhyanwentao@126.com>
parent 8a248427
......@@ -50,7 +50,6 @@ from vllm.distributed.kv_transfer.kv_connector.v1.metrics import (
from vllm.distributed.parallel_state import (
get_tensor_model_parallel_rank,
get_tensor_model_parallel_world_size,
get_tp_group,
)
from vllm.forward_context import ForwardContext
from vllm.logger import init_logger
......@@ -564,7 +563,6 @@ class NixlConnectorScheduler:
# Background thread for handling new handshake requests.
self._nixl_handshake_listener_t: threading.Thread | None = None
self._encoded_xfer_handshake_metadata: dict[int, Any] = {}
self._stop_event = threading.Event()
# Requests that need to start recv/send.
......@@ -650,7 +648,6 @@ class NixlConnectorScheduler:
tp_rank,
str(len(encoded_data[tp_rank])),
)
self._encoded_xfer_handshake_metadata = encoded_data
# Only start the listener when we have metadata to serve.
if self._nixl_handshake_listener_t is None:
......@@ -995,7 +992,7 @@ class NixlConnectorWorker:
self.engine_id: EngineId = engine_id
self.tp_rank = get_tensor_model_parallel_rank()
self.world_size = get_tensor_model_parallel_world_size()
self.tp_group = get_tp_group()
self.num_blocks = kv_cache_config.num_blocks
self.enable_permute_local_kv = False
......@@ -1064,7 +1061,6 @@ class NixlConnectorWorker:
# Number of NIXL regions. Currently one region per cache
# (so 1 per layer for MLA, otherwise 2 per layer)
self.num_regions = 0
self.num_layers = 0
# nixl_prepped_dlist_handle.
self.src_xfer_handles_by_block_size: dict[int, int] = {}
......@@ -1108,7 +1104,6 @@ class NixlConnectorWorker:
self.block_size = vllm_config.cache_config.block_size
self.model_config = vllm_config.model_config
self.cache_config = vllm_config.cache_config
self.use_mla = self.model_config.use_mla
......@@ -1540,7 +1535,6 @@ class NixlConnectorWorker:
self.kv_caches_base_addr[self.engine_id][self.tp_rank] = seen_base_addresses
self.num_regions = len(caches_data)
self.num_layers = len(xfer_buffers.keys())
descs = self.nixl_wrapper.get_reg_descs(caches_data, self.nixl_memory_type)
logger.debug("Registering descs: %s", caches_data)
......
......@@ -184,13 +184,11 @@ class Scheduler(SchedulerInterface):
# Encoder-related.
# Calculate encoder cache size if applicable
self.supports_mm_inputs = mm_registry.supports_multimodal_inputs(
supports_mm_inputs = mm_registry.supports_multimodal_inputs(
vllm_config.model_config
)
self.mm_budget = mm_budget = (
MultiModalBudget(vllm_config, mm_registry)
if self.supports_mm_inputs
else None
mm_budget = (
MultiModalBudget(vllm_config, mm_registry) if supports_mm_inputs else None
)
# NOTE: Text-only encoder-decoder models are implemented as
......
......@@ -148,7 +148,7 @@ class EngineCore:
if self.scheduler.connector is not None: # type: ignore
self.model_executor.init_kv_output_aggregator(self.scheduler.connector) # type: ignore
self.mm_registry = mm_registry = MULTIMODAL_REGISTRY
mm_registry = MULTIMODAL_REGISTRY
self.mm_receiver_cache = mm_registry.engine_receiver_cache_from_config(
vllm_config
)
......@@ -800,8 +800,6 @@ class EngineCoreProc(EngineCore):
vllm_config,
client_handshake_address,
) as addresses:
self.client_count = len(addresses.outputs)
# Set up data parallel environment.
self.has_coordinator = addresses.coordinator_output is not None
self.frontend_stats_publish_address = (
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment