Commit e3cacb3b authored by zhuwenwen's avatar zhuwenwen
Browse files

update num_rejected_tokens and num_speculative_tokens

parent 7bb28d54
......@@ -45,21 +45,24 @@ class CommonAttentionMetadata:
seq_lens_cpu: torch.Tensor
"""(batch_size,), the length of each request including both computed tokens
and newly scheduled tokens"""
num_computed_tokens_cpu: torch.Tensor
"""(batch_size,), the number of computed tokens for each request"""
num_reqs: int
"""Number of requests"""
num_actual_tokens: int
"""Total number of tokens in batch"""
max_query_len: int
"""Longest query in batch"""
num_rejected_tokens: list[int]
"""(batch_size,), record the rejected tokens number in cpu and gpu"""
num_speculative_tokens: int
"""Number of speculative tokens"""
block_table_tensor: torch.Tensor
slot_mapping: torch.Tensor
num_rejected_tokens: list[int] = None
"""(batch_size,), record the rejected tokens number in cpu and gpu"""
num_speculative_tokens: int = 0
"""Number of speculative tokens"""
M = TypeVar("M")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment