edit anli

0456d543 · lintangsutawika · ae55476a · 0456d543 · 0456d543 · 0456d543
Commit 0456d543 authored Aug 14, 2023 by lintangsutawika
4 changed files
--- a/lm_eval/tasks/anli/README.md
+++ b/lm_eval/tasks/anli/README.md
@@ -30,19 +30,17 @@ Homepage: https://github.com/facebookresearch/anli
 }
 ```
-### Subtasks
+### Groups and Tasks
-List or describe tasks defined in this folder, and their names here:
+#### Groups
+* `anli`: Evaluates `anli_r1`, `anli_r2`, and `anli_r3`
+#### Tasks
 * `anli_r1`: The data collected adversarially in the first round.
 * `anli_r2`: The data collected adversarially in the second round, after training on the previous round's data.
 * `anli_r3`: The data collected adversarially in the third round, after training on the previous multiple rounds of data.
-### Groups
-  - `multiple_choice`
-  - `natural_language_inference`
-  - `nli`
-  - `adverserial`
 ### Checklist

--- a/lm_eval/tasks/anli/anli_r1.yaml
+++ b/lm_eval/tasks/anli/anli_r1.yaml
 group:
-  - multiple_choice
+  - anli
-  - natural_language_inference
-  - nli
-  - adverserial
 task: anli_r1
 dataset_path: anli
 dataset_name: null

--- a/lm_eval/tasks/anli/anli_r2.yaml
+++ b/lm_eval/tasks/anli/anli_r2.yaml
-group:
+include: anli_r1.yaml
-  - multiple_choice
-  - natural_language_inference
-  - nli
-  - adverserial
 task: anli_r2
-dataset_path: anli
-dataset_name: null
-output_type: multiple_choice
 training_split: train_r2
 validation_split: dev_r2
 test_split: test_r2
-doc_to_text: "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither?\nAnswer:"
-# True = entailment
-# False = contradiction
-# Neither = neutral
-doc_to_target: "{{['True', 'Neither', 'False'][label]}}"
-doc_to_choice:
-  - "True"
-  - "Neither"
-  - "False"
-should_decontaminate: true
-doc_to_decontamination_query: premise
-metric_list:
-  - metric: acc
-    aggregation: mean
-    higher_is_better: true
--- a/lm_eval/tasks/anli/anli_r3.yaml
+++ b/lm_eval/tasks/anli/anli_r3.yaml
-group:
+include: anli_r1.yaml
-  - multiple_choice
-  - natural_language_inference
-  - nli
-  - adverserial
 task: anli_r3
-dataset_path: anli
-dataset_name: null
-output_type: multiple_choice
 training_split: train_r3
 validation_split: dev_r3
 test_split: test_r3
-doc_to_text: "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither?\nAnswer:"
-# True = entailment
-# False = contradiction
-# Neither = neutral
-doc_to_target: "{{['True', 'Neither', 'False'][label]}}"
-doc_to_choice:
-  - "True"
-  - "Neither"
-  - "False"
-should_decontaminate: true
-doc_to_decontamination_query: premise
-metric_list:
-  - metric: acc
-    aggregation: mean
-    higher_is_better: true