Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
3e397a94
Unverified
Commit
3e397a94
authored
Apr 11, 2025
by
Alexey Belyakov
Committed by
GitHub
Apr 11, 2025
Browse files
check input length of sonnet samples (#16423)
Signed-off-by:
alexey-belyakov
<
alexey.belyakov@intel.com
>
parent
268c3250
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
9 additions
and
8 deletions
+9
-8
benchmarks/benchmark_dataset.py
benchmarks/benchmark_dataset.py
+9
-8
No files found.
benchmarks/benchmark_dataset.py
View file @
3e397a94
...
@@ -489,7 +489,7 @@ class SonnetDataset(BenchmarkDataset):
...
@@ -489,7 +489,7 @@ class SonnetDataset(BenchmarkDataset):
prefix_lines
=
self
.
data
[:
num_prefix_lines
]
prefix_lines
=
self
.
data
[:
num_prefix_lines
]
samples
=
[]
samples
=
[]
for
_
in
range
(
num_requests
)
:
while
len
(
samples
)
<
num_requests
:
extra_lines
=
random
.
choices
(
self
.
data
,
extra_lines
=
random
.
choices
(
self
.
data
,
k
=
num_input_lines
-
num_prefix_lines
)
k
=
num_input_lines
-
num_prefix_lines
)
prompt
=
f
"
{
base_prompt
}{
''
.
join
(
prefix_lines
+
extra_lines
)
}
"
prompt
=
f
"
{
base_prompt
}{
''
.
join
(
prefix_lines
+
extra_lines
)
}
"
...
@@ -497,13 +497,14 @@ class SonnetDataset(BenchmarkDataset):
...
@@ -497,13 +497,14 @@ class SonnetDataset(BenchmarkDataset):
prompt_formatted
=
tokenizer
.
apply_chat_template
(
prompt_formatted
=
tokenizer
.
apply_chat_template
(
msg
,
add_generation_prompt
=
True
,
tokenize
=
False
)
msg
,
add_generation_prompt
=
True
,
tokenize
=
False
)
prompt_len
=
len
(
tokenizer
(
prompt_formatted
).
input_ids
)
prompt_len
=
len
(
tokenizer
(
prompt_formatted
).
input_ids
)
samples
.
append
(
if
prompt_len
<=
input_len
:
SampleRequest
(
samples
.
append
(
prompt
=
prompt_formatted
SampleRequest
(
if
return_prompt_formatted
else
prompt
,
prompt
=
prompt_formatted
prompt_len
=
prompt_len
,
if
return_prompt_formatted
else
prompt
,
expected_output_len
=
output_len
,
prompt_len
=
prompt_len
,
))
expected_output_len
=
output_len
,
))
return
samples
return
samples
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment