Commit 5c914988 authored by Baber's avatar Baber
Browse files

handle doc_to_text is callable

parent 3f671a44
...@@ -9,6 +9,7 @@ from lm_eval import utils ...@@ -9,6 +9,7 @@ from lm_eval import utils
from lm_eval.api.group import ConfigurableGroup, GroupConfig from lm_eval.api.group import ConfigurableGroup, GroupConfig
from lm_eval.api.task import ConfigurableTask, Generate_MultipleChoice, Task from lm_eval.api.task import ConfigurableTask, Generate_MultipleChoice, Task
from lm_eval.evaluator_utils import get_subtask_list from lm_eval.evaluator_utils import get_subtask_list
from lm_eval.tasks.mmlu_pro.utils import doc_to_text
GROUP_ONLY_KEYS = list(GroupConfig().to_dict().keys()) GROUP_ONLY_KEYS = list(GroupConfig().to_dict().keys())
...@@ -20,13 +21,23 @@ def convert_mcq_to_generative(cfg: dict): ...@@ -20,13 +21,23 @@ def convert_mcq_to_generative(cfg: dict):
return cfg return cfg
else: else:
cfg["output_type"] = "generate_until" cfg["output_type"] = "generate_until"
cfg["doc_to_text"] = ( doc_to_text = cfg.get("doc_to_text", "")
prompt if isinstance(doc_to_text, str):
+ "\n" cfg["doc_to_text"] = (
+ cfg.get("doc_to_text", "") prompt
+ "\n" + "\n"
+ 'Your response should end with "The best answer is [the_answer_letter]" where the [the_answer_letter] is one of choice letters, A, B, C etc.' + cfg.get("doc_to_text", "")
) + "\n"
+ 'Your response should be formatted as "The best answer is [the_answer_letter]" where the [the_answer_letter] is one of choice letters, A, B, C etc.'
)
elif callable(doc_to_text):
cfg["doc_to_text"] = (
lambda doc: prompt
+ "\n"
+ doc_to_text(doc)
+ "\n"
+ 'Your response should be formatted as "The best answer is [the_answer_letter]" where the [the_answer_letter] is one of choice letters, A, B, C etc.'
)
cfg["target_delimiter"] = "\n\n" cfg["target_delimiter"] = "\n\n"
cfg["gen_prefix"] = "The best answer is" cfg["gen_prefix"] = "The best answer is"
cfg["generation_kwargs"] = {"until": ["."], "max_gen_toks": 10} cfg["generation_kwargs"] = {"until": ["."], "max_gen_toks": 10}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment