Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
4dbd5ec9
Unverified
Commit
4dbd5ec9
authored
May 15, 2025
by
Tingchen Fu
Committed by
GitHub
May 15, 2025
Browse files
feat: add question suffix (#2876)
parent
2bde99e4
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
19 additions
and
3 deletions
+19
-3
lm_eval/__main__.py
lm_eval/__main__.py
+6
-0
lm_eval/api/task.py
lm_eval/api/task.py
+11
-3
lm_eval/evaluator.py
lm_eval/evaluator.py
+2
-0
No files found.
lm_eval/__main__.py
View file @
4dbd5ec9
...
...
@@ -261,6 +261,12 @@ def setup_parser() -> argparse.ArgumentParser:
default
=
""
,
help
=
"Comma separated string arguments passed to Hugging Face Hub's log function, e.g. `hub_results_org=EleutherAI,hub_repo_name=lm-eval-results`"
,
)
parser
.
add_argument
(
"--question_suffix"
,
type
=
str
,
default
=
None
,
help
=
"Suffix to append to the target question before the <|assistant|>, e.g., Think for maximum 128 tokens"
,
)
parser
.
add_argument
(
"--predict_only"
,
"-x"
,
...
...
lm_eval/api/task.py
View file @
4dbd5ec9
...
...
@@ -401,6 +401,7 @@ class Task(abc.ABC):
fewshot_as_multiturn
:
bool
=
False
,
chat_template
:
Optional
[
Callable
]
=
None
,
tokenizer_name
:
str
=
""
,
question_suffix
:
str
=
""
)
->
None
:
"""Build a set of Instances for a task, and store them in task.instances"""
...
...
@@ -464,6 +465,7 @@ class Task(abc.ABC):
fewshot_as_multiturn
,
chat_template
,
gen_prefix
=
self
.
doc_to_prefix
(
doc
),
question_suffix
=
question_suffix
,
)
# TODO: we should override self.config.repeats if doing greedy gen so users don't waste time+compute
...
...
@@ -1066,6 +1068,7 @@ class ConfigurableTask(Task):
question
:
str
,
fewshot_as_multiturn
:
bool
=
False
,
gen_prefix
:
Optional
[
str
]
=
None
,
question_suffix
:
Optional
[
str
]
=
None
,
)
->
None
:
"""Adds a target question to the labeled examples list.
If fewshot_as_multiturn is True, or labeled_examples is empty, or the last entry is a system turn, appends the question as a new user entry.
...
...
@@ -1074,13 +1077,13 @@ class ConfigurableTask(Task):
if
not
fewshot_as_multiturn
:
# if no messages or last message is system, append as new user entry
if
len
(
labeled_examples
)
==
0
or
labeled_examples
[
-
1
][
"role"
]
==
"system"
:
labeled_examples
.
append
({
"role"
:
"user"
,
"content"
:
question
}
)
labeled_examples
.
append
({
"role"
:
"user"
,
"content"
:
question
+
question_suffix
}
if
question_suffix
else
{
"role"
:
"user"
,
"content"
:
question
}
)
# if last message is user, append to it to avoid two user messages in a row
else
:
labeled_examples
[
-
1
][
"content"
]
+=
question
labeled_examples
[
-
1
][
"content"
]
+=
question
+
question_suffix
if
question_suffix
else
question
else
:
# if fewshot_as_multiturn is True, append as next user entry (last is always assistant)
labeled_examples
.
append
({
"role"
:
"user"
,
"content"
:
question
}
)
labeled_examples
.
append
({
"role"
:
"user"
,
"content"
:
question
+
question_suffix
}
if
question_suffix
else
{
"role"
:
"user"
,
"content"
:
question
}
)
if
gen_prefix
:
labeled_examples
.
append
({
"role"
:
"assistant"
,
"content"
:
gen_prefix
})
...
...
@@ -1094,6 +1097,7 @@ class ConfigurableTask(Task):
fewshot_as_multiturn
:
bool
=
False
,
chat_template
:
Optional
[
Callable
]
=
None
,
gen_prefix
:
Optional
[
str
]
=
None
,
question_suffix
:
Optional
[
str
]
=
None
,
)
->
Union
[
str
,
List
[
str
]]:
"""Returns a fewshot context string that is made up of a prepended description
(if provided), the `num_fewshot` number of examples, and an appended prompt example.
...
...
@@ -1171,6 +1175,7 @@ class ConfigurableTask(Task):
example
,
fewshot_as_multiturn
,
gen_prefix
=
gen_prefix
,
question_suffix
=
question_suffix
,
)
# for loglikelihood create a list of questions with appended choices
elif
isinstance
(
example
,
list
):
...
...
@@ -1183,6 +1188,7 @@ class ConfigurableTask(Task):
ex
,
fewshot_as_multiturn
,
gen_prefix
=
gen_prefix
,
question_suffix
=
question_suffix
,
)
# TODO: append prefill?
labeled_examples_list
.
append
(
...
...
@@ -1201,6 +1207,7 @@ class ConfigurableTask(Task):
choices
[
example
],
fewshot_as_multiturn
,
gen_prefix
=
gen_prefix
,
question_suffix
=
question_suffix
,
)
else
:
self
.
append_target_question
(
...
...
@@ -1208,6 +1215,7 @@ class ConfigurableTask(Task):
str
(
example
),
fewshot_as_multiturn
,
gen_prefix
=
gen_prefix
,
question_suffix
=
question_suffix
,
)
# return lm.apply_chat_template(labeled_examples)
return
chat_template
(
...
...
lm_eval/evaluator.py
View file @
4dbd5ec9
...
...
@@ -413,6 +413,7 @@ def evaluate(
fewshot_as_multiturn
:
bool
=
False
,
verbosity
:
str
=
"INFO"
,
confirm_run_unsafe_code
:
bool
=
False
,
question_suffix
:
Optional
[
str
]
=
None
,
):
"""Instantiate and evaluate a model on a list of tasks.
...
...
@@ -526,6 +527,7 @@ def evaluate(
tokenizer_name
=
getattr
(
lm
,
"tokenizer_name"
,
""
)
if
apply_chat_template
else
""
,
question_suffix
=
question_suffix
,
)
eval_logger
.
debug
(
f
"Task:
{
task_output
.
task_name
}
; number of requests on this rank:
{
len
(
task
.
instances
)
}
"
...
...
gaoqiong
@gaoqiong
mentioned in commit
29ea6832
·
Oct 16, 2025
mentioned in commit
29ea6832
mentioned in commit 29ea6832cd913b055ec1d6962180c773e8a7ac88
Toggle commit list
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment