Merge pull request #752 from EleutherAI/babi-refactor

[Refactor] Port Babi task

Merge pull request #752 from EleutherAI/babi-refactor
[Refactor] Port Babi task
5d55b685 · Lintang Sutawika · GitHub · 1710b42d · f33d3758 · 5d55b685
Unverified Commit 5d55b685 authored Aug 09, 2023 by Lintang Sutawika Committed by GitHub Aug 09, 2023
Show whitespace changes
Inline Side-by-side

Showing with 21 additions and 1 deletion

lm_eval/evaluator.py lm_eval/evaluator.py +1 -1

lm_eval/tasks/babi/babi.yaml lm_eval/tasks/babi/babi.yaml +20 -0

No files found.
--- a/lm_eval/evaluator.py
+++ b/lm_eval/evaluator.py
@@ -253,7 +253,7 @@ def evaluate(
                    eval_logger.info(
                        f"Task: {task_name}; document {inst.doc_id}; context prompt (starting on next line):\n{inst.args[0]}\n(end of prompt on previous line)"
                    )
-                    eval_logger.info("Request:", inst)
+                    eval_logger.info(f"Request: {str(inst)}")
        # aggregate Instances by LM method requested to get output.
        reqtype = (

--- a/lm_eval/tasks/babi/babi.yaml
+++ b/lm_eval/tasks/babi/babi.yaml
+group:
+  - greedy_until
+task: babi
+dataset_path: Muennighoff/babi
+dataset_name: null
+output_type: greedy_until
+training_split: train
+validation_split: valid
+test_split: test
+doc_to_text: "Passage: {{passage}}Question: {{question}}\nAnswer:"
+doc_to_target: " {{answer}}"
+target_delimiter: ""
+generation_kwargs:
+  until:
+    - "\n"
+    - "Passage:"
+metric_list:
+  - metric: exact_match
+    aggregation: mean
+    higher_is_better: true