"...lm-evaluation-harness.git" did not exist on "6348b94724338059a64844d1d67be0bb214c208d"
Commit f7f298ee authored by lintangsutawika's avatar lintangsutawika
Browse files

removed unused files for now

parents c0d5a660 12bc8fce
"dataset_name": "virology"
"description": "The following are multiple choice questions (with answers) about virology.\n\
\n"
"group": "mmlu_other_generative"
"group_alias": "other"
"include": "_default_template_yaml"
"task": "mmlu_virology_generative"
"task_alias": "virology"
"dataset_name": "world_religions"
"description": "The following are multiple choice questions (with answers) about world\
\ religions.\n\n"
"group": "mmlu_humanities_generative"
"group_alias": "humanities"
"include": "_default_template_yaml"
"task": "mmlu_world_religions_generative"
"task_alias": "world_religions"
...@@ -27,6 +27,6 @@ metric_list: ...@@ -27,6 +27,6 @@ metric_list:
ignore_case: true ignore_case: true
ignore_punctuation: true ignore_punctuation: true
regexes_to_ignore: regexes_to_ignore:
- "\ban|a|the\b" - "\\b(?:The |the |An |A |The |a |an )"
metadata: metadata:
version: 2.0 version: 3.0
...@@ -2,7 +2,7 @@ group: ...@@ -2,7 +2,7 @@ group:
- polemo2 - polemo2
task: polemo2_in task: polemo2_in
dataset_path: allegro/klej-polemo2-in dataset_path: allegro/klej-polemo2-in
dataset_name: klej-polemo2-in dataset_name: null
output_type: generate_until output_type: generate_until
training_split: train training_split: train
validation_split: validation validation_split: validation
......
Investigate affect of letter options
- (A)
- A)
- A.
- A\t
- (a)
- a)
- a.
- a\t
Answer types:
- letters only
- original option
- just letter
- letters + continuation
- original option
- just letter
- continuation
dataset_path: super_glue
dataset_name: boolq
output_type: multiple_choice
training_split: train
validation_split: validation
doc_to_text: "{{passage}}\nQuestion: {{question}}?\nAnswer:"
doc_to_target: label
doc_to_choice: ["no", "yes"]
should_decontaminate: true
doc_to_decontamination_query: passage
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
- metric: brier_score
aggregation: brier_score
higher_is_better: false
metadata:
- version: 1.0
group: boolq_alt_ov
task:
- boolq_alt_ov_01
- boolq_alt_ov_02
- boolq_alt_ov_03
- boolq_alt_ov_04
- boolq_alt_ov_05
- boolq_alt_ov_06
- boolq_alt_ov_07
- boolq_alt_ov_08
include: ../_boolq_alt_ov_yaml
group: boolq_alt_ov_01
task: boolq_alt_ov_01a
doc_to_text: !function ../styles.template_01
doc_to_choice: !function ../styles.choice_01a
doc_to_decontamination_query: !function ../styles.template_01
include: ../_boolq_alt_ov_yaml
group: boolq_alt_ov_01
task: boolq_alt_ov_01b
doc_to_text: !function ../styles.template_01
doc_to_choice: !function ../styles.choice_01b
doc_to_decontamination_query: !function ../styles.template_01
include: ../_boolq_alt_ov_yaml
group: boolq_alt_ov_01
task: boolq_alt_ov_01c
doc_to_text: !function ../styles.template_01
doc_to_choice: !function ../styles.choice_01c
doc_to_decontamination_query: !function ../styles.template_01
include: ../_boolq_alt_ov_yaml
group: boolq_alt_ov_02
task: boolq_alt_ov_02a
doc_to_text: !function ../styles.template_02
doc_to_choice: !function ../styles.choice_02a
doc_to_decontamination_query: !function ../styles.template_02
include: ../_boolq_alt_ov_yaml
group: boolq_alt_ov_02
task: boolq_alt_ov_02b
doc_to_text: !function ../styles.template_02
doc_to_choice: !function ../styles.choice_02b
doc_to_decontamination_query: !function ../styles.template_02
include: ../_boolq_alt_ov_yaml
group: boolq_alt_ov_02
task: boolq_alt_ov_02c
doc_to_text: !function ../styles.template_02
doc_to_choice: !function ../styles.choice_02c
doc_to_decontamination_query: !function ../styles.template_02
include: ../_boolq_alt_ov_yaml
group: boolq_alt_ov_03
task: boolq_alt_ov_03a
doc_to_text: !function ../styles.template_03
doc_to_choice: !function ../styles.choice_03a
doc_to_decontamination_query: !function ../styles.template_03
include: ../_boolq_alt_ov_yaml
group: boolq_alt_ov_03
task: boolq_alt_ov_03b
doc_to_text: !function ../styles.template_03
doc_to_choice: !function ../styles.choice_03b
doc_to_decontamination_query: !function ../styles.template_03
include: ../_boolq_alt_ov_yaml
group: boolq_alt_ov_03
task: boolq_alt_ov_03c
doc_to_text: !function ../styles.template_03
doc_to_choice: !function ../styles.choice_03c
doc_to_decontamination_query: !function ../styles.template_03
include: ../_boolq_alt_ov_yaml
group: boolq_alt_ov_04
task: boolq_alt_ov_04a
doc_to_text: !function ../styles.template_04
doc_to_choice: !function ../styles.choice_04a
doc_to_decontamination_query: !function ../styles.template_04
include: ../_boolq_alt_ov_yaml
group: boolq_alt_ov_04
task: boolq_alt_ov_04b
doc_to_text: !function ../styles.template_04
doc_to_choice: !function ../styles.choice_04b
doc_to_decontamination_query: !function ../styles.template_04
include: ../_boolq_alt_ov_yaml
group: boolq_alt_ov_04
task: boolq_alt_ov_04c
doc_to_text: !function ../styles.template_04
doc_to_choice: !function ../styles.choice_04c
doc_to_decontamination_query: !function ../styles.template_04
include: ../_boolq_alt_ov_yaml
group: boolq_alt_ov_05
task: boolq_alt_ov_05a
doc_to_text: !function ../styles.template_05
doc_to_choice: !function ../styles.choice_05a
doc_to_decontamination_query: !function ../styles.template_05
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment