[Refactor] Remove dead code in pooling model (#37572)

Signed-off-by: yewentao256 <zhyanwentao@126.com>

[Refactor] Remove dead code in pooling model (#37572)
Signed-off-by: yewentao256 <zhyanwentao@126.com>
2be1a0f7 · Wentao Ye · GitHub · 4120a05f · 2be1a0f7 · 2be1a0f7
Unverified Commit 2be1a0f7 authored Mar 19, 2026 by Wentao Ye Committed by GitHub Mar 20, 2026
Show whitespace changes
Inline Side-by-side

Showing with 0 additions and 12 deletions

vllm/entrypoints/pooling/utils.py vllm/entrypoints/pooling/utils.py +0 -8

vllm/v1/pool/metadata.py vllm/v1/pool/metadata.py +0 -4

No files found.
--- a/vllm/entrypoints/pooling/utils.py
+++ b/vllm/entrypoints/pooling/utils.py
@@ -60,14 +60,6 @@ def encode_pooling_output_float(output: PoolingRequestOutput) -> list[float]:
    return output.outputs.data.tolist()
-def encode_pooling_output_binary(
-    output: PoolingRequestOutput,
-    embed_dtype: EmbedDType,
-    endianness: Endianness,
-) -> bytes:
-    return tensor2binary(output.outputs.data, embed_dtype, endianness)
 def encode_pooling_output_base64(
    output: PoolingRequestOutput,
    embed_dtype: EmbedDType,

--- a/vllm/v1/pool/metadata.py
+++ b/vllm/v1/pool/metadata.py
@@ -14,7 +14,6 @@ pin_memory = is_pin_memory_available()
 @dataclass
 class PoolingCursor:
-    index: list[int]
    first_token_indices_gpu: torch.Tensor
    last_token_indices_gpu: torch.Tensor
    prompt_lens_cpu: torch.Tensor
@@ -23,7 +22,6 @@ class PoolingCursor:
    def __getitem__(self, indices: slice):
        return PoolingCursor(
-            index=self.index[indices],
            first_token_indices_gpu=self.first_token_indices_gpu[indices],
            last_token_indices_gpu=self.last_token_indices_gpu[indices],
            prompt_lens_cpu=self.prompt_lens_cpu[indices],
@@ -108,7 +106,6 @@ class PoolingMetadata:
        assert len(prompt_lens) == n_seq
-        index = list(range(n_seq))
        num_scheduled_tokens_cpu = torch.from_numpy(num_scheduled_tokens_np)
        if query_start_loc_gpu is None:
            cumsum = torch.zeros(
@@ -130,7 +127,6 @@ class PoolingMetadata:
                )
            cumsum = query_start_loc_gpu
        self.pooling_cursor = PoolingCursor(
-            index=index,
            first_token_indices_gpu=cumsum[:n_seq],
            last_token_indices_gpu=cumsum[1:] - 1,
            prompt_lens_cpu=prompt_lens,