Unverified Commit 0375b792 authored by Lintang Sutawika's avatar Lintang Sutawika Committed by GitHub
Browse files

Merge pull request #520 from EleutherAI/update-config

Update config
parents c5ed8cdc eb42b01b
...@@ -17,10 +17,13 @@ PROMPT_REGISTRY = { ...@@ -17,10 +17,13 @@ PROMPT_REGISTRY = {
def get_prompt(prompt_id: str, dataset_name=None, subset_name=None): def get_prompt(prompt_id: str, dataset_name=None, subset_name=None):
# unpack prompt name # unpack prompt name
category_name, prompt_name = prompt_id.split(":") category_name, prompt_name = prompt_id.split(":")
eval_logger.info(f"Loading prompt from {category_name}") if subset_name is None:
dataset_full_name = dataset_name
else:
dataset_full_name = f"{dataset_name}-{subset_name}"
eval_logger.info(f"Loading prompt from {category_name} for {dataset_full_name}")
if category_name == "promptsource": if category_name == "promptsource":
try: try:
# prompts = DatasetTemplates(dataset_name, dataset_path)
if subset_name is None: if subset_name is None:
prompts = DatasetTemplates(dataset_name=dataset_name) prompts = DatasetTemplates(dataset_name=dataset_name)
else: else:
......
...@@ -55,7 +55,7 @@ def get_task(task_name, config): ...@@ -55,7 +55,7 @@ def get_task(task_name, config):
return TASK_REGISTRY[task_name](config=config) return TASK_REGISTRY[task_name](config=config)
except KeyError: except KeyError:
eval_logger.info("Available tasks:") eval_logger.info("Available tasks:")
eval_logger.info(TASK_REGISTRY) eval_logger.info(ALL_TASKS)
raise KeyError(f"Missing task {task_name}") raise KeyError(f"Missing task {task_name}")
......
group: group:
- t0-eval - super-glue-promptsource
task: "does the pronoun refer to" task: "GPT-3 Style"
dataset_path: super_glue dataset_path: super_glue
dataset_name: wsc.fixed dataset_name: boolq
training_split: train training_split: train
validation_split: validation validation_split: validation
use_prompt: "promptsource:does the pronoun refer to" use_prompt: "promptsource:GPT-3 Style"
metric_list: metric_list:
- metric: exact_match - metric: exact_match
aggregation: mean aggregation: mean
......
include: promptsource-00.yaml
group:
- super-glue-promptsource
task: "based on the previous passage"
use_prompt: "promptsource:based on the previous passage"
include: promptsource-00.yaml
group:
- super-glue-promptsource
task: "based on the following passage"
use_prompt: "promptsource:based on the following passage"
group:
- super-glue-cb
include: based_on_previous_passage.yaml
task: can we infer
reference: Webson & Pavlick 2021
doc_to_text: "Suppose {{premise}} Can we infer that \"{{hypothesis}}\"? Yes, no, or maybe?"
doc_to_target: "{% set answer_choices = ['Yes', 'No', 'Maybe'] %}{{answer_choices[label]}}"
group:
- super-glue-cb
include: based_on_previous_passage.yaml
task: claim true/false/inconclusive
reference: Sanh et al. 2021
doc_to_text: "{{premise}} Based on that information, is the claim: \"{{hypothesis}}\" \"true\", \"false\", or \"inconclusive\"?"
doc_to_target: "{% set answer_choices = ['True', 'False', 'Inconclusive'] %}{{answer_choices[label]}}"
group: group:
- t0-eval - super-glue-promptsource
task: "by p they mean" task: "GPT-3 style"
dataset_path: super_glue dataset_path: super_glue
dataset_name: wsc.fixed dataset_name: cb
training_split: train training_split: train
validation_split: validation validation_split: validation
use_prompt: "promptsource:by p they mean" use_prompt: "promptsource:GPT-3 style"
metric_list: metric_list:
- metric: exact_match - metric: exact_match
aggregation: mean aggregation: mean
......
include: promptsource-00.yaml
group:
- super-glue-promptsource
task: "MNLI crowdsource"
use_prompt: "promptsource:MNLI crowdsource"
include: promptsource-00.yaml
group:
- super-glue-promptsource
task: "based on the previous passage"
use_prompt: "promptsource:based on the previous passage"
group: group:
- super-glue-cb - super-glue-promptsource
task: based on the previous passage task: "C1 or C2? premise, so/because…"
reference: "Adapted from the BoolQ prompts in Schick & Sch\xFCtze 2021."
dataset_path: super_glue dataset_path: super_glue
dataset_name: cb dataset_name: copa
training_split: train training_split: train
validation_split: validation validation_split: validation
doc_to_text: "{{premise}} Based on the previous passage, is it true that \"{{hypothesis}}\"? Yes, no, or maybe?" use_prompt: "promptsource:C1 or C2? premise, so/because…"
doc_to_target: "{% set answer_choices = ['Yes', 'No', 'Maybe'] %}{{answer_choices[label]}}"
metric_list: metric_list:
- metric: exact_match - metric: exact_match
aggregation: mean aggregation: mean
......
include: promptsource-00.yaml
group:
- super-glue-promptsource
task: "best_option"
use_prompt: "promptsource:best_option"
include: promptsource-00.yaml
group:
- super-glue-promptsource
task: "cause_effect"
use_prompt: "promptsource:cause_effect"
group:
- super-glue-promptsource
task: "I was going to say…"
dataset_path: super_glue
dataset_name: multirc
training_split: train
validation_split: validation
use_prompt: "promptsource:I was going to say…"
metric_list:
- metric: exact_match
aggregation: mean
higher_is_better: true
ignore_case: true
ignore_punctuation: true
include: promptsource-00.yaml
group:
- super-glue-promptsource
task: "Would it be good to answer…"
use_prompt: "promptsource:Would it be good to answer…"
include: promptsource-00.yaml
group:
- super-glue-promptsource
task: "confirm"
use_prompt: "promptsource:confirm"
group:
- super-glue-promptsource
task: "Add sentence after (continuation choices)"
dataset_path: super_glue
dataset_name: record
training_split: train
validation_split: validation
use_prompt: "promptsource:Add sentence after (continuation choices)"
metric_list:
- metric: exact_match
aggregation: mean
higher_is_better: true
ignore_case: true
ignore_punctuation: true
include: promptsource-00.yaml
group:
- super-glue-promptsource
task: "Add sentence after after (continuation choices)"
use_prompt: "promptsource:Add sentence after after (continuation choices)"
include: promptsource-00.yaml
group:
- super-glue-promptsource
task: "Can you figure out…"
use_prompt: "promptsource:Can you figure out…"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment