Unverified Commit e585b583 authored by Chen Zhang's avatar Chen Zhang Committed by GitHub
Browse files

[Bugfix] Support testing prefill throughput with benchmark_serving.py --hf-output-len 1 (#8891)

parent 090e945e
...@@ -89,8 +89,6 @@ def sample_sharegpt_requests( ...@@ -89,8 +89,6 @@ def sample_sharegpt_requests(
tokenizer: PreTrainedTokenizerBase, tokenizer: PreTrainedTokenizerBase,
fixed_output_len: Optional[int] = None, fixed_output_len: Optional[int] = None,
) -> List[Tuple[str, int, int, None]]: ) -> List[Tuple[str, int, int, None]]:
if fixed_output_len is not None and fixed_output_len < 4:
raise ValueError("output_len too small")
# Load the dataset. # Load the dataset.
with open(dataset_path) as f: with open(dataset_path) as f:
dataset = json.load(f) dataset = json.load(f)
...@@ -117,7 +115,7 @@ def sample_sharegpt_requests( ...@@ -117,7 +115,7 @@ def sample_sharegpt_requests(
prompt_len = len(prompt_token_ids) prompt_len = len(prompt_token_ids)
output_len = len(completion_token_ids output_len = len(completion_token_ids
) if fixed_output_len is None else fixed_output_len ) if fixed_output_len is None else fixed_output_len
if prompt_len < 4 or output_len < 4: if prompt_len < 4 or (fixed_output_len is None and output_len < 4):
# Prune too short sequences. # Prune too short sequences.
continue continue
if prompt_len > 1024 or prompt_len + output_len > 2048: if prompt_len > 1024 or prompt_len + output_len > 2048:
...@@ -228,10 +226,11 @@ def sample_hf_requests( ...@@ -228,10 +226,11 @@ def sample_hf_requests(
prompt_len = len(prompt_token_ids) prompt_len = len(prompt_token_ids)
output_len = len(completion_token_ids output_len = len(completion_token_ids
) if fixed_output_len is None else fixed_output_len ) if fixed_output_len is None else fixed_output_len
if prompt_len < 4 or output_len < 4: if fixed_output_len is None and (prompt_len < 4 or output_len < 4):
# Prune too short sequences. # Prune too short sequences.
continue continue
if prompt_len > 1024 or prompt_len + output_len > 2048: if fixed_output_len is None and \
(prompt_len > 1024 or prompt_len + output_len > 2048):
# Prune too long sequences. # Prune too long sequences.
continue continue
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment