Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
29ea6832
Unverified
Commit
29ea6832
authored
May 21, 2025
by
Baber Abbasi
Committed by
GitHub
May 21, 2025
Browse files
Revert "feat: add question suffix (#2876)" (#3007)
This reverts commit
4dbd5ec9
parent
143a7fe0
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
3 additions
and
29 deletions
+3
-29
lm_eval/__main__.py
lm_eval/__main__.py
+0
-6
lm_eval/api/task.py
lm_eval/api/task.py
+3
-21
lm_eval/evaluator.py
lm_eval/evaluator.py
+0
-2
No files found.
lm_eval/__main__.py
View file @
29ea6832
...
...
@@ -261,12 +261,6 @@ def setup_parser() -> argparse.ArgumentParser:
default
=
""
,
help
=
"Comma separated string arguments passed to Hugging Face Hub's log function, e.g. `hub_results_org=EleutherAI,hub_repo_name=lm-eval-results`"
,
)
parser
.
add_argument
(
"--question_suffix"
,
type
=
str
,
default
=
None
,
help
=
"Suffix to append to the target question before the <|assistant|>, e.g., Think for maximum 128 tokens"
,
)
parser
.
add_argument
(
"--predict_only"
,
"-x"
,
...
...
lm_eval/api/task.py
View file @
29ea6832
...
...
@@ -401,7 +401,6 @@ class Task(abc.ABC):
fewshot_as_multiturn
:
bool
=
False
,
chat_template
:
Optional
[
Callable
]
=
None
,
tokenizer_name
:
str
=
""
,
question_suffix
:
str
=
""
,
)
->
None
:
"""Build a set of Instances for a task, and store them in task.instances"""
...
...
@@ -465,7 +464,6 @@ class Task(abc.ABC):
fewshot_as_multiturn
,
chat_template
,
gen_prefix
=
self
.
doc_to_prefix
(
doc
),
question_suffix
=
question_suffix
,
)
# TODO: we should override self.config.repeats if doing greedy gen so users don't waste time+compute
...
...
@@ -1068,7 +1066,6 @@ class ConfigurableTask(Task):
question
:
str
,
fewshot_as_multiturn
:
bool
=
False
,
gen_prefix
:
Optional
[
str
]
=
None
,
question_suffix
:
Optional
[
str
]
=
None
,
)
->
None
:
"""Adds a target question to the labeled examples list.
If fewshot_as_multiturn is True, or labeled_examples is empty, or the last entry is a system turn, appends the question as a new user entry.
...
...
@@ -1077,23 +1074,13 @@ class ConfigurableTask(Task):
if
not
fewshot_as_multiturn
:
# if no messages or last message is system, append as new user entry
if
len
(
labeled_examples
)
==
0
or
labeled_examples
[
-
1
][
"role"
]
==
"system"
:
labeled_examples
.
append
(
{
"role"
:
"user"
,
"content"
:
question
+
question_suffix
}
if
question_suffix
else
{
"role"
:
"user"
,
"content"
:
question
}
)
labeled_examples
.
append
({
"role"
:
"user"
,
"content"
:
question
})
# if last message is user, append to it to avoid two user messages in a row
else
:
labeled_examples
[
-
1
][
"content"
]
+=
(
question
+
question_suffix
if
question_suffix
else
question
)
labeled_examples
[
-
1
][
"content"
]
+=
question
else
:
# if fewshot_as_multiturn is True, append as next user entry (last is always assistant)
labeled_examples
.
append
(
{
"role"
:
"user"
,
"content"
:
question
+
question_suffix
}
if
question_suffix
else
{
"role"
:
"user"
,
"content"
:
question
}
)
labeled_examples
.
append
({
"role"
:
"user"
,
"content"
:
question
})
if
gen_prefix
:
labeled_examples
.
append
({
"role"
:
"assistant"
,
"content"
:
gen_prefix
})
...
...
@@ -1107,7 +1094,6 @@ class ConfigurableTask(Task):
fewshot_as_multiturn
:
bool
=
False
,
chat_template
:
Optional
[
Callable
]
=
None
,
gen_prefix
:
Optional
[
str
]
=
None
,
question_suffix
:
Optional
[
str
]
=
None
,
)
->
Union
[
str
,
List
[
str
]]:
"""Returns a fewshot context string that is made up of a prepended description
(if provided), the `num_fewshot` number of examples, and an appended prompt example.
...
...
@@ -1185,7 +1171,6 @@ class ConfigurableTask(Task):
example
,
fewshot_as_multiturn
,
gen_prefix
=
gen_prefix
,
question_suffix
=
question_suffix
,
)
# for loglikelihood create a list of questions with appended choices
elif
isinstance
(
example
,
list
):
...
...
@@ -1198,7 +1183,6 @@ class ConfigurableTask(Task):
ex
,
fewshot_as_multiturn
,
gen_prefix
=
gen_prefix
,
question_suffix
=
question_suffix
,
)
# TODO: append prefill?
labeled_examples_list
.
append
(
...
...
@@ -1217,7 +1201,6 @@ class ConfigurableTask(Task):
choices
[
example
],
fewshot_as_multiturn
,
gen_prefix
=
gen_prefix
,
question_suffix
=
question_suffix
,
)
else
:
self
.
append_target_question
(
...
...
@@ -1225,7 +1208,6 @@ class ConfigurableTask(Task):
str
(
example
),
fewshot_as_multiturn
,
gen_prefix
=
gen_prefix
,
question_suffix
=
question_suffix
,
)
# return lm.apply_chat_template(labeled_examples)
return
chat_template
(
...
...
lm_eval/evaluator.py
View file @
29ea6832
...
...
@@ -413,7 +413,6 @@ def evaluate(
fewshot_as_multiturn
:
bool
=
False
,
verbosity
:
str
=
"INFO"
,
confirm_run_unsafe_code
:
bool
=
False
,
question_suffix
:
Optional
[
str
]
=
None
,
):
"""Instantiate and evaluate a model on a list of tasks.
...
...
@@ -527,7 +526,6 @@ def evaluate(
tokenizer_name
=
getattr
(
lm
,
"tokenizer_name"
,
""
)
if
apply_chat_template
else
""
,
question_suffix
=
question_suffix
,
)
eval_logger
.
debug
(
f
"Task:
{
task_output
.
task_name
}
; number of requests on this rank:
{
len
(
task
.
instances
)
}
"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment