Commit 4277840b authored by lintangsutawika's avatar lintangsutawika
Browse files

add piqa

parent f38c7469
Investigate affect of letter options
- (A)
- A)
- A.
- A\t
- (a)
- a)
- a.
- a\t
Answer types:
- letters only
- original option
- just letter
- letters + continuation
- original option
- just letter
- continuation
group:
- ai2_arc
task: piqa
dataset_path: ai2_arc
dataset_name: ARC-Easy
output_type: multiple_choice
training_split: train
validation_split: validation
test_split: test
doc_to_text: "Question: {{question}}\nAnswer:"
doc_to_target: "{{choices.label.index(answerKey)}}"
doc_to_choice: "{{choices.text}}"
should_decontaminate: true
doc_to_decontamination_query: "Question: {{question}}\nAnswer:"
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
- metric: brier_score
aggregation: brier_score
higher_is_better: false
group: piqa_alt_ov
task:
- piqa_alt_ov_01
- piqa_alt_ov_02
- piqa_alt_ov_03
- piqa_alt_ov_04
- piqa_alt_ov_05
- piqa_alt_ov_06
- piqa_alt_ov_07
- piqa_alt_ov_08
include: ../_piqa_alt_ov_yaml
group: piqa_alt_ov_01
task: piqa_alt_ov_01a
doc_to_text: !function ../styles.template_01
doc_to_choice: !function ../styles.choice_01a
doc_to_decontamination_query: !function ../styles.template_01
include: ../_piqa_alt_ov_yaml
group: piqa_alt_ov_01
task: piqa_alt_ov_01b
doc_to_text: !function ../styles.template_01
doc_to_choice: !function ../styles.choice_01b
doc_to_decontamination_query: !function ../styles.template_01
include: ../_piqa_alt_ov_yaml
group: piqa_alt_ov_01
task: piqa_alt_ov_01c
doc_to_text: !function ../styles.template_01
doc_to_choice: !function ../styles.choice_01c
doc_to_decontamination_query: !function ../styles.template_01
include: ../_piqa_alt_ov_yaml
group: piqa_alt_ov_02
task: piqa_alt_ov_02a
doc_to_text: !function ../styles.template_02
doc_to_choice: !function ../styles.choice_02a
doc_to_decontamination_query: !function ../styles.template_02
include: ../_piqa_alt_ov_yaml
group: piqa_alt_ov_02
task: piqa_alt_ov_02b
doc_to_text: !function ../styles.template_02
doc_to_choice: !function ../styles.choice_02b
doc_to_decontamination_query: !function ../styles.template_02
include: ../_piqa_alt_ov_yaml
group: piqa_alt_ov_02
task: piqa_alt_ov_02c
doc_to_text: !function ../styles.template_02
doc_to_choice: !function ../styles.choice_02c
doc_to_decontamination_query: !function ../styles.template_02
include: ../_piqa_alt_ov_yaml
group: piqa_alt_ov_03
task: piqa_alt_ov_03a
doc_to_text: !function ../styles.template_03
doc_to_choice: !function ../styles.choice_03a
doc_to_decontamination_query: !function ../styles.template_03
include: ../_piqa_alt_ov_yaml
group: piqa_alt_ov_03
task: piqa_alt_ov_03b
doc_to_text: !function ../styles.template_03
doc_to_choice: !function ../styles.choice_03b
doc_to_decontamination_query: !function ../styles.template_03
include: ../_piqa_alt_ov_yaml
group: piqa_alt_ov_03
task: piqa_alt_ov_03c
doc_to_text: !function ../styles.template_03
doc_to_choice: !function ../styles.choice_03c
doc_to_decontamination_query: !function ../styles.template_03
include: ../_piqa_alt_ov_yaml
group: piqa_alt_ov_04
task: piqa_alt_ov_04a
doc_to_text: !function ../styles.template_04
doc_to_choice: !function ../styles.choice_04a
doc_to_decontamination_query: !function ../styles.template_04
include: ../_piqa_alt_ov_yaml
group: piqa_alt_ov_04
task: piqa_alt_ov_04b
doc_to_text: !function ../styles.template_04
doc_to_choice: !function ../styles.choice_04b
doc_to_decontamination_query: !function ../styles.template_04
include: ../_piqa_alt_ov_yaml
group: piqa_alt_ov_04
task: piqa_alt_ov_04c
doc_to_text: !function ../styles.template_04
doc_to_choice: !function ../styles.choice_04c
doc_to_decontamination_query: !function ../styles.template_04
include: ../_piqa_alt_ov_yaml
group: piqa_alt_ov_05
task: piqa_alt_ov_05a
doc_to_text: !function ../styles.template_05
doc_to_choice: !function ../styles.choice_05a
doc_to_decontamination_query: !function ../styles.template_05
include: ../_piqa_alt_ov_yaml
group: piqa_alt_ov_05
task: piqa_alt_ov_05b
doc_to_text: !function ../styles.template_05
doc_to_choice: !function ../styles.choice_05b
doc_to_decontamination_query: !function ../styles.template_05
include: ../_piqa_alt_ov_yaml
group: piqa_alt_ov_05
task: piqa_alt_ov_05c
doc_to_text: !function ../styles.template_05
doc_to_choice: !function ../styles.choice_05c
doc_to_decontamination_query: !function ../styles.template_05
include: ../_piqa_alt_ov_yaml
group: piqa_alt_ov_06
task: piqa_alt_ov_06a
doc_to_text: !function ../styles.template_06
doc_to_choice: !function ../styles.choice_06a
doc_to_decontamination_query: !function ../styles.template_06
include: ../_piqa_alt_ov_yaml
group: piqa_alt_ov_06
task: piqa_alt_ov_06b
doc_to_text: !function ../styles.template_06
doc_to_choice: !function ../styles.choice_06b
doc_to_decontamination_query: !function ../styles.template_06
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment