fixed arc

31c52f55 · lintangsutawika · 155160a1 · 31c52f55 · 31c52f55
Commit 31c52f55 authored Jul 11, 2023 by lintangsutawika
Hide whitespace changes
Inline Side-by-side

Showing with 5 additions and 24 deletions

lm_eval/tasks/arc/arc_challenge.yaml lm_eval/tasks/arc/arc_challenge.yaml +1 -18

lm_eval/tasks/arc/arc_easy.yaml lm_eval/tasks/arc/arc_easy.yaml +4 -6

No files found.
--- a/lm_eval/tasks/arc/arc_challenge.yaml
+++ b/lm_eval/tasks/arc/arc_challenge.yaml
+include: arc_easy.yaml
 group:
  - ai2_arc
  - multiple_choice
 task: arc_challenge
 dataset_path: ai2_arc
 dataset_name: ARC-Challenge
-output_type: multiple_choice
-training_split: train
-validation_split: validation
-test_split: test
-template_aliases: "{% set answer_choices = choices['text'] %}{% set gold = choices.label.index(answerKey) %}" # set the list of possible answer choices, and set what this doc's gold answer is (set what ds column used, and what)
-doc_to_text: "Question: {{question}}\nAnswer:"
-doc_to_target: "{{answer_choices[gold]}}"
-gold_alias: "{{gold}}" # this will be cast to an int.
-metric_list:
-  - metric: acc
-    aggregation: mean
-    higher_is_better: true
-  - metric: acc_norm
-    aggregation: mean
-    higher_is_better: true
-  # - metric: acc_mutual_info
-  #   aggregation: mean
-  #   higher_is_better: true
--- a/lm_eval/tasks/arc/arc_easy.yaml
+++ b/lm_eval/tasks/arc/arc_easy.yaml
@@ -8,10 +8,11 @@ output_type: multiple_choice
 training_split: train
 validation_split: validation
 test_split: test
-template_aliases: "{% set answer_choices = choices['text'] %}{% set gold = choices.label.index(answerKey) %}" # set the list of possible answer choices, and set what this doc's gold answer is (set what ds column used, and what)
 doc_to_text: "Question: {{question}}\nAnswer:"
-doc_to_target: "{{answer_choices[gold]}}"
+doc_to_target: "{{choices['text'][choices.label.index(answerKey)]}}"
-gold_alias: "{{gold}}" # this will be cast to an int.
+doc_to_choice: "{{choices.text}}"
+should_decontaminate: true
+doc_to_decontamination_query: "Question: {{question}}\nAnswer:"
 metric_list:
  - metric: acc
    aggregation: mean
@@ -19,6 +20,3 @@ metric_list:
  - metric: acc_norm
    aggregation: mean
    higher_is_better: true
-  - metric: acc_mutual_info
-    aggregation: mean
-    higher_is_better: true