Unverified Commit 2be1a0f7 authored by Wentao Ye's avatar Wentao Ye Committed by GitHub
Browse files

[Refactor] Remove dead code in pooling model (#37572)


Signed-off-by: default avataryewentao256 <zhyanwentao@126.com>
parent 4120a05f
...@@ -60,14 +60,6 @@ def encode_pooling_output_float(output: PoolingRequestOutput) -> list[float]: ...@@ -60,14 +60,6 @@ def encode_pooling_output_float(output: PoolingRequestOutput) -> list[float]:
return output.outputs.data.tolist() return output.outputs.data.tolist()
def encode_pooling_output_binary(
output: PoolingRequestOutput,
embed_dtype: EmbedDType,
endianness: Endianness,
) -> bytes:
return tensor2binary(output.outputs.data, embed_dtype, endianness)
def encode_pooling_output_base64( def encode_pooling_output_base64(
output: PoolingRequestOutput, output: PoolingRequestOutput,
embed_dtype: EmbedDType, embed_dtype: EmbedDType,
......
...@@ -14,7 +14,6 @@ pin_memory = is_pin_memory_available() ...@@ -14,7 +14,6 @@ pin_memory = is_pin_memory_available()
@dataclass @dataclass
class PoolingCursor: class PoolingCursor:
index: list[int]
first_token_indices_gpu: torch.Tensor first_token_indices_gpu: torch.Tensor
last_token_indices_gpu: torch.Tensor last_token_indices_gpu: torch.Tensor
prompt_lens_cpu: torch.Tensor prompt_lens_cpu: torch.Tensor
...@@ -23,7 +22,6 @@ class PoolingCursor: ...@@ -23,7 +22,6 @@ class PoolingCursor:
def __getitem__(self, indices: slice): def __getitem__(self, indices: slice):
return PoolingCursor( return PoolingCursor(
index=self.index[indices],
first_token_indices_gpu=self.first_token_indices_gpu[indices], first_token_indices_gpu=self.first_token_indices_gpu[indices],
last_token_indices_gpu=self.last_token_indices_gpu[indices], last_token_indices_gpu=self.last_token_indices_gpu[indices],
prompt_lens_cpu=self.prompt_lens_cpu[indices], prompt_lens_cpu=self.prompt_lens_cpu[indices],
...@@ -108,7 +106,6 @@ class PoolingMetadata: ...@@ -108,7 +106,6 @@ class PoolingMetadata:
assert len(prompt_lens) == n_seq assert len(prompt_lens) == n_seq
index = list(range(n_seq))
num_scheduled_tokens_cpu = torch.from_numpy(num_scheduled_tokens_np) num_scheduled_tokens_cpu = torch.from_numpy(num_scheduled_tokens_np)
if query_start_loc_gpu is None: if query_start_loc_gpu is None:
cumsum = torch.zeros( cumsum = torch.zeros(
...@@ -130,7 +127,6 @@ class PoolingMetadata: ...@@ -130,7 +127,6 @@ class PoolingMetadata:
) )
cumsum = query_start_loc_gpu cumsum = query_start_loc_gpu
self.pooling_cursor = PoolingCursor( self.pooling_cursor = PoolingCursor(
index=index,
first_token_indices_gpu=cumsum[:n_seq], first_token_indices_gpu=cumsum[:n_seq],
last_token_indices_gpu=cumsum[1:] - 1, last_token_indices_gpu=cumsum[1:] - 1,
prompt_lens_cpu=prompt_lens, prompt_lens_cpu=prompt_lens,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment