Merge branch 'big-refactor' of https://github.com/EleutherAI/lm-evaluation-harness into xstorycloze

a5a9017b · lintangsutawika · a04a600a · 7634a6ec · a5a9017b · a5a9017b
Commit a5a9017b authored Aug 10, 2023 by lintangsutawika
20 changed files
--- a/lm_eval/tasks/blimp/npi_present_2.yaml
+++ b/lm_eval/tasks/blimp/npi_present_2.yaml
+# Generated by utils.py
+dataset_name: npi_present_2
+include: template_yaml
+task: blimp_npi_present_2
--- a/lm_eval/tasks/blimp/only_npi_licensor_present.yaml
+++ b/lm_eval/tasks/blimp/only_npi_licensor_present.yaml
+# Generated by utils.py
+dataset_name: only_npi_licensor_present
+include: template_yaml
+task: blimp_only_npi_licensor_present
--- a/lm_eval/tasks/blimp/only_npi_scope.yaml
+++ b/lm_eval/tasks/blimp/only_npi_scope.yaml
+# Generated by utils.py
+dataset_name: only_npi_scope
+include: template_yaml
+task: blimp_only_npi_scope
--- a/lm_eval/tasks/blimp/passive_1.yaml
+++ b/lm_eval/tasks/blimp/passive_1.yaml
+# Generated by utils.py
+dataset_name: passive_1
+include: template_yaml
+task: blimp_passive_1
--- a/lm_eval/tasks/blimp/passive_2.yaml
+++ b/lm_eval/tasks/blimp/passive_2.yaml
+# Generated by utils.py
+dataset_name: passive_2
+include: template_yaml
+task: blimp_passive_2
--- a/lm_eval/tasks/blimp/principle_A_c_command.yaml
+++ b/lm_eval/tasks/blimp/principle_A_c_command.yaml
+# Generated by utils.py
+dataset_name: principle_A_c_command
+include: template_yaml
+task: blimp_principle_A_c_command
--- a/lm_eval/tasks/blimp/principle_A_case_1.yaml
+++ b/lm_eval/tasks/blimp/principle_A_case_1.yaml
+# Generated by utils.py
+dataset_name: principle_A_case_1
+include: template_yaml
+task: blimp_principle_A_case_1
--- a/lm_eval/tasks/blimp/principle_A_case_2.yaml
+++ b/lm_eval/tasks/blimp/principle_A_case_2.yaml
+# Generated by utils.py
+dataset_name: principle_A_case_2
+include: template_yaml
+task: blimp_principle_A_case_2
--- a/lm_eval/tasks/blimp/principle_A_domain_1.yaml
+++ b/lm_eval/tasks/blimp/principle_A_domain_1.yaml
+# Generated by utils.py
+dataset_name: principle_A_domain_1
+include: template_yaml
+task: blimp_principle_A_domain_1
--- a/lm_eval/tasks/blimp/principle_A_domain_2.yaml
+++ b/lm_eval/tasks/blimp/principle_A_domain_2.yaml
+# Generated by utils.py
+dataset_name: principle_A_domain_2
+include: template_yaml
+task: blimp_principle_A_domain_2
--- a/lm_eval/tasks/blimp/principle_A_domain_3.yaml
+++ b/lm_eval/tasks/blimp/principle_A_domain_3.yaml
+# Generated by utils.py
+dataset_name: principle_A_domain_3
+include: template_yaml
+task: blimp_principle_A_domain_3
--- a/lm_eval/tasks/blimp/principle_A_reconstruction.yaml
+++ b/lm_eval/tasks/blimp/principle_A_reconstruction.yaml
+# Generated by utils.py
+dataset_name: principle_A_reconstruction
+include: template_yaml
+task: blimp_principle_A_reconstruction
--- a/lm_eval/tasks/blimp/regular_plural_subject_verb_agreement_1.yaml
+++ b/lm_eval/tasks/blimp/regular_plural_subject_verb_agreement_1.yaml
+# Generated by utils.py
+dataset_name: regular_plural_subject_verb_agreement_1
+include: template_yaml
+task: blimp_regular_plural_subject_verb_agreement_1
--- a/lm_eval/tasks/blimp/regular_plural_subject_verb_agreement_2.yaml
+++ b/lm_eval/tasks/blimp/regular_plural_subject_verb_agreement_2.yaml
+# Generated by utils.py
+dataset_name: regular_plural_subject_verb_agreement_2
+include: template_yaml
+task: blimp_regular_plural_subject_verb_agreement_2
--- a/lm_eval/tasks/blimp/sentential_negation_npi_licensor_present.yaml
+++ b/lm_eval/tasks/blimp/sentential_negation_npi_licensor_present.yaml
+# Generated by utils.py
+dataset_name: sentential_negation_npi_licensor_present
+include: template_yaml
+task: blimp_sentential_negation_npi_licensor_present
--- a/lm_eval/tasks/blimp/sentential_negation_npi_scope.yaml
+++ b/lm_eval/tasks/blimp/sentential_negation_npi_scope.yaml
+# Generated by utils.py
+dataset_name: sentential_negation_npi_scope
+include: template_yaml
+task: blimp_sentential_negation_npi_scope
--- a/lm_eval/tasks/blimp/sentential_subject_island.yaml
+++ b/lm_eval/tasks/blimp/sentential_subject_island.yaml
+# Generated by utils.py
+dataset_name: sentential_subject_island
+include: template_yaml
+task: blimp_sentential_subject_island
--- a/lm_eval/tasks/blimp/superlative_quantifiers_1.yaml
+++ b/lm_eval/tasks/blimp/superlative_quantifiers_1.yaml
+# Generated by utils.py
+dataset_name: superlative_quantifiers_1
+include: template_yaml
+task: blimp_superlative_quantifiers_1
--- a/lm_eval/tasks/blimp/superlative_quantifiers_2.yaml
+++ b/lm_eval/tasks/blimp/superlative_quantifiers_2.yaml
+# Generated by utils.py
+dataset_name: superlative_quantifiers_2
+include: template_yaml
+task: blimp_superlative_quantifiers_2
--- a/lm_eval/tasks/blimp/template_yaml
+++ b/lm_eval/tasks/blimp/template_yaml
+group: blimp
+dataset_path: blimp
+output_type: multiple_choice
+validation_split: validation
+doc_to_text: ""
+doc_to_target: 0
+doc_to_choice: "{{[sentence_good, sentence_bad]}}"
+should_decontaminate: true
+doc_to_decontamination_query: "{{sentence_good}} {{sentence_bad}}"
+metric_list:
+  - metric: acc