Commit f7b81bd4 authored by lintangsutawika's avatar lintangsutawika
Browse files

modifications for current evals on t5v2

parent 032e879b
......@@ -399,7 +399,7 @@ def evaluate(
if type(items[0]) == tuple:
numitem = len(items[0])
if isinstance(items[0], (str, list)):
if isinstance(items[0], (str, list, tuple)):
# handle the string case
gathered_items = [None] * lm.accelerator.num_processes
torch.distributed.all_gather_object(gathered_items, items)
......@@ -492,6 +492,8 @@ def evaluate(
]:
stderr = "_stderr,".join(metric.split(","))
stderr_score = results[task][stderr]
if isinstance(stderr_score, str):
stderr_score = 0
var_score = stderr_score**2
metric_score = results[task][metric]
......
......@@ -17,6 +17,7 @@ generation_kwargs:
- "</s>"
- "Q"
- "\n\n"
- "<0x0A>"
do_sample: false
temperature: 0.0
filter_list:
......
......@@ -14,6 +14,7 @@ generation_kwargs:
- "</s>"
- "Q"
- "\n\n"
- "<0x0A>"
do_sample: false
temperature: 0.0
filter_list:
......
......@@ -14,6 +14,7 @@ generation_kwargs:
- "</s>"
- "Q"
- "\n\n"
- "<0x0A>"
do_sample: false
temperature: 0.0
num_fewshot: 0
......
......@@ -14,6 +14,7 @@ generation_kwargs:
- "</s>"
- "Q:"
- "\n\n"
- "<0x0A>"
do_sample: false
temperature: 0.0
num_fewshot: 0
......
group: flan_anli
task:
- include: yaml_templates/held_in_template_yaml
task: anli_r1
task: r1
dataset_path: anli
use_prompt: prompt_templates/anli.yaml:*
validation_split: dev_r1
- include: yaml_templates/held_in_template_yaml
task: anli_r2
task: r2
dataset_path: anli
use_prompt: prompt_templates/anli.yaml:*
validation_split: dev_r2
- include: yaml_templates/held_in_template_yaml
task: anli_r3
task: r3
dataset_path: anli
use_prompt: prompt_templates/anli.yaml:*
validation_split: dev_r3
group: flan_arc
task:
- include: yaml_templates/held_in_template_yaml
task: arc_easy
dataset_path: ai2_arc
dataset_name: ARC-Easy
use_prompt: prompt_templates/arc.yaml:*
validation_split: validation
- include: yaml_templates/held_in_template_yaml
task: arc_challenge
dataset_path: ai2_arc
dataset_name: ARC-Challenge
use_prompt: prompt_templates/arc.yaml:*
......
group: flan_held_in
task:
- flan_boolq
- flan_rte
- flan_anli
- flan_arc
- include: yaml_templates/held_in_template_yaml
task: r1
dataset_path: anli
use_prompt: prompt_templates/anli.yaml:*
validation_split: dev_r1
- include: yaml_templates/held_in_template_yaml
task: r2
dataset_path: anli
use_prompt: prompt_templates/anli.yaml:*
validation_split: dev_r2
- include: yaml_templates/held_in_template_yaml
task: r3
dataset_path: anli
use_prompt: prompt_templates/anli.yaml:*
validation_split: dev_r3
- include: yaml_templates/held_in_template_yaml
dataset_path: ai2_arc
dataset_name: ARC-Easy
use_prompt: prompt_templates/arc.yaml:*
validation_split: validation
- include: yaml_templates/held_in_template_yaml
dataset_path: ai2_arc
dataset_name: ARC-Challenge
use_prompt: prompt_templates/arc.yaml:*
validation_split: validation
- include: yaml_templates/held_in_template_yaml
dataset_path: super_glue
dataset_name: boolq
use_prompt: prompt_templates/boolq.yaml:*
validation_split: validation
- include: yaml_templates/held_in_template_yaml
dataset_path: super_glue
dataset_name: rte
use_prompt: prompt_templates/rte.yaml:*
validation_split: validation
group: flan_held_in
task:
- include: flan/yaml_templates/held_in_template_yaml
dataset_path: super_glue
dataset_name: boolq
use_prompt: flan/prompt_templates/boolq.yaml:*
validation_split: validation
- include: flan/yaml_templates/held_in_template_yaml
dataset_path: super_glue
dataset_name: rte
use_prompt: flan/prompt_templates/rte.yaml:*
validation_split: validation
- include: flan/yaml_templates/held_in_template_yaml
task: anli_r1
dataset_path: anli
use_prompt: flan/prompt_templates/anli.yaml:*
validation_split: dev_r1
- include: flan/yaml_templates/held_in_template_yaml
task: anli_r2
dataset_path: anli
use_prompt: flan/prompt_templates/anli.yaml:*
validation_split: dev_r2
- include: flan/yaml_templates/held_in_template_yaml
task: anli_r3
dataset_path: anli
use_prompt: flan/prompt_templates/anli.yaml:*
validation_split: dev_r3
- include: flan/yaml_templates/held_in_template_yaml
task: arc_easy
dataset_path: ai2_arc
dataset_name: ARC-Easy
use_prompt: flan/prompt_templates/arc.yaml:*
validation_split: validation
- include: flan/yaml_templates/held_in_template_yaml
task: arc_challenge
dataset_path: ai2_arc
dataset_name: ARC-Challenge
use_prompt: flan/prompt_templates/arc.yaml:*
validation_split: validation
group: flan_held_out
task:
# BBH
- bbh_flan_zeroshot
- bbh_flan_fewshot
- bbh_flan_cot_fewshot
- bbh_flan_cot_zeroshot
- bbh_zeroshot
- bbh_fewshot
- bbh_cot_fewshot
- bbh_cot_zeroshot
# MMLU
- mmlu
- mmlu_flan_n_shot_generative
......
......@@ -8,6 +8,7 @@ doc_to_target: "{{['(A)', '(B)', '(C)', '(D)'][answer]}}"
generation_kwargs:
until:
- "</s>"
- "<0x0A>"
metric_list:
- metric: exact_match
aggregation: mean
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment