Unverified Commit 3e397a94 authored by Alexey Belyakov's avatar Alexey Belyakov Committed by GitHub
Browse files

check input length of sonnet samples (#16423)


Signed-off-by: default avataralexey-belyakov <alexey.belyakov@intel.com>
parent 268c3250
...@@ -489,7 +489,7 @@ class SonnetDataset(BenchmarkDataset): ...@@ -489,7 +489,7 @@ class SonnetDataset(BenchmarkDataset):
prefix_lines = self.data[:num_prefix_lines] prefix_lines = self.data[:num_prefix_lines]
samples = [] samples = []
for _ in range(num_requests): while len(samples) < num_requests:
extra_lines = random.choices(self.data, extra_lines = random.choices(self.data,
k=num_input_lines - num_prefix_lines) k=num_input_lines - num_prefix_lines)
prompt = f"{base_prompt}{''.join(prefix_lines + extra_lines)}" prompt = f"{base_prompt}{''.join(prefix_lines + extra_lines)}"
...@@ -497,13 +497,14 @@ class SonnetDataset(BenchmarkDataset): ...@@ -497,13 +497,14 @@ class SonnetDataset(BenchmarkDataset):
prompt_formatted = tokenizer.apply_chat_template( prompt_formatted = tokenizer.apply_chat_template(
msg, add_generation_prompt=True, tokenize=False) msg, add_generation_prompt=True, tokenize=False)
prompt_len = len(tokenizer(prompt_formatted).input_ids) prompt_len = len(tokenizer(prompt_formatted).input_ids)
samples.append( if prompt_len <= input_len:
SampleRequest( samples.append(
prompt=prompt_formatted SampleRequest(
if return_prompt_formatted else prompt, prompt=prompt_formatted
prompt_len=prompt_len, if return_prompt_formatted else prompt,
expected_output_len=output_len, prompt_len=prompt_len,
)) expected_output_len=output_len,
))
return samples return samples
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment