"vscode:/vscode.git/clone" did not exist on "32efba1705b53d5da4baf13878e11b5c24fe3f3a"
Unverified Commit 79b972d6 authored by Hailey Schoelkopf's avatar Hailey Schoelkopf Committed by GitHub
Browse files

[Refactor] [WIP] New YAML advanced docs (#567)



* add wip gsm8k yaml

* cleanup tasks dir

* push gsm8k yaml changes

* rename gpt2.py

* add updated gsm8k , triviaqa baseline

* add new cot yaml

* allow for multiple filter pipelines, new filter types

* updated gsm8k + sampling gen configs

* cleanup self-consistency yaml

* push outline for advanced docs

* push docs checklist

* switch to inheritance for many tasks

* acc_norm and acc_mutual_info fixed

* fix missing newline in error msg

* remove many .py tasks

* updated GSM8k

* added more doc

* Update advanced_task_guide.md

Added list of parameters

* Update advanced_task_guide.md

* Added details on listing metrics

* Update advanced_task_guide.md

* Added more explanation

* modify current default filter name

* add new tags to tasks

* remove a lingering print()

* add rest of param docs, cleanup deprecated fields

* push docs update

* move ALL_TASKS definition location

* confirm write_out.py works if no description dict passed

---------
Co-authored-by: default avatarlintangsutawika <lintang@sutawika.com>
parent 761f0087
group: include: pile_arxiv.yaml
- pile
task: pile_bookcorpus2 task: pile_bookcorpus2
dataset_path: EleutherAI/the_pile
dataset_name: pile_bookcorpus2 dataset_name: pile_bookcorpus2
output_type: loglikelihood_rolling
test_split: train
template_aliases: ""
doc_to_text: ""
doc_to_target: "{{text}}"
should_decontaminate: true
doc_to_decontamination_query: "{{text}}"
metric_list:
- metric: word_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: byte_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: bits_per_byte
aggregation: bits_per_byte
higher_is_better: false
group: include: pile_arxiv.yaml
- pile
task: pile_books3 task: pile_books3
dataset_path: EleutherAI/the_pile
dataset_name: pile_books3 dataset_name: pile_books3
output_type: loglikelihood_rolling
test_split: train
template_aliases: ""
doc_to_text: ""
doc_to_target: "{{text}}"
should_decontaminate: true
doc_to_decontamination_query: "{{text}}"
metric_list:
- metric: word_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: byte_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: bits_per_byte
aggregation: bits_per_byte
higher_is_better: false
group: include: pile_arxiv.yaml
- pile
task: pile_dm-mathematics task: pile_dm-mathematics
dataset_path: EleutherAI/the_pile
dataset_name: pile_dm-mathematics dataset_name: pile_dm-mathematics
output_type: loglikelihood_rolling
test_split: train
template_aliases: ""
doc_to_text: ""
doc_to_target: "{{text}}"
should_decontaminate: true
doc_to_decontamination_query: "{{text}}"
metric_list:
- metric: word_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: byte_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: bits_per_byte
aggregation: bits_per_byte
higher_is_better: false
group: include: pile_arxiv.yaml
- pile
task: pile_enron task: pile_enron
dataset_path: EleutherAI/the_pile dataset_name: pile_enron
dataset_name: enron_emails
output_type: loglikelihood_rolling
test_split: train
template_aliases: ""
doc_to_text: ""
doc_to_target: "{{text}}"
should_decontaminate: true
doc_to_decontamination_query: "{{text}}"
metric_list:
- metric: word_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: byte_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: bits_per_byte
aggregation: bits_per_byte
higher_is_better: false
group: include: pile_arxiv.yaml
- pile
task: pile_europarl task: pile_europarl
dataset_path: EleutherAI/the_pile
dataset_name: pile_europarl dataset_name: pile_europarl
\ No newline at end of file
output_type: loglikelihood_rolling
test_split: train
template_aliases: ""
doc_to_text: ""
doc_to_target: "{{text}}"
should_decontaminate: true
doc_to_decontamination_query: "{{text}}"
metric_list:
- metric: word_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: byte_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: bits_per_byte
aggregation: bits_per_byte
higher_is_better: false
group: include: pile_arxiv.yaml
- pile
task: pile_freelaw task: pile_freelaw
dataset_path: EleutherAI/the_pile
dataset_name: pile_freelaw dataset_name: pile_freelaw
output_type: loglikelihood_rolling
test_split: train
template_aliases: ""
doc_to_text: ""
doc_to_target: "{{text}}"
should_decontaminate: true
doc_to_decontamination_query: "{{text}}"
metric_list:
- metric: word_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: byte_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: bits_per_byte
aggregation: bits_per_byte
higher_is_better: false
group: include: pile_arxiv.yaml
- pile
task: pile_github task: pile_github
dataset_path: EleutherAI/the_pile
dataset_name: pile_github dataset_name: pile_github
output_type: loglikelihood_rolling
test_split: train
template_aliases: ""
doc_to_text: ""
doc_to_target: "{{text}}"
should_decontaminate: true
doc_to_decontamination_query: "{{text}}"
metric_list:
- metric: word_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: byte_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: bits_per_byte
aggregation: bits_per_byte
higher_is_better: false
group: include: pile_arxiv.yaml
- pile
task: pile_gutenberg task: pile_gutenberg
dataset_path: EleutherAI/the_pile
dataset_name: pile_gutenberg dataset_name: pile_gutenberg
\ No newline at end of file
output_type: loglikelihood_rolling
test_split: train
template_aliases: ""
doc_to_text: ""
doc_to_target: "{{text}}"
should_decontaminate: true
doc_to_decontamination_query: "{{text}}"
metric_list:
- metric: word_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: byte_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: bits_per_byte
aggregation: bits_per_byte
higher_is_better: false
group: include: pile_arxiv.yaml
- pile
task: pile_hackernews task: pile_hackernews
dataset_path: EleutherAI/the_pile
dataset_name: pile_hackernews dataset_name: pile_hackernews
output_type: loglikelihood_rolling
test_split: train
template_aliases: ""
doc_to_text: ""
doc_to_target: "{{text}}"
should_decontaminate: true
doc_to_decontamination_query: "{{text}}"
metric_list:
- metric: word_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: byte_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: bits_per_byte
aggregation: bits_per_byte
higher_is_better: false
group: include: pile_arxiv.yaml
- pile
task: pile_nih-exporter task: pile_nih-exporter
dataset_path: EleutherAI/the_pile
dataset_name: pile_nih-exporter dataset_name: pile_nih-exporter
output_type: loglikelihood_rolling
test_split: train
template_aliases: ""
doc_to_text: ""
doc_to_target: "{{text}}"
should_decontaminate: true
doc_to_decontamination_query: "{{text}}"
metric_list:
- metric: word_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: byte_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: bits_per_byte
aggregation: bits_per_byte
higher_is_better: false
group: include: pile_arxiv.yaml
- pile
task: pile_opensubtitles task: pile_opensubtitles
dataset_path: EleutherAI/the_pile
dataset_name: pile_opensubtitles dataset_name: pile_opensubtitles
output_type: loglikelihood_rolling
test_split: train
template_aliases: ""
doc_to_text: ""
doc_to_target: "{{text}}"
should_decontaminate: true
doc_to_decontamination_query: "{{text}}"
metric_list:
- metric: word_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: byte_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: bits_per_byte
aggregation: bits_per_byte
higher_is_better: false
group: include: pile_arxiv.yaml
- pile
task: pile_openwebtext2 task: pile_openwebtext2
dataset_path: EleutherAI/the_pile
dataset_name: pile_openwebtext2 dataset_name: pile_openwebtext2
output_type: loglikelihood_rolling
test_split: train
template_aliases: ""
doc_to_text: ""
doc_to_target: "{{text}}"
should_decontaminate: true
doc_to_decontamination_query: "{{text}}"
metric_list:
- metric: word_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: byte_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: bits_per_byte
aggregation: bits_per_byte
higher_is_better: false
group: include: pile_arxiv.yaml
- pile
task: pile_philpapers task: pile_philpapers
dataset_path: EleutherAI/the_pile
dataset_name: pile_philpapers dataset_name: pile_philpapers
output_type: loglikelihood_rolling
test_split: train
template_aliases: ""
doc_to_text: ""
doc_to_target: "{{text}}"
should_decontaminate: true
doc_to_decontamination_query: "{{text}}"
metric_list:
- metric: word_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: byte_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: bits_per_byte
aggregation: bits_per_byte
higher_is_better: false
group: include: pile_arxiv.yaml
- pile
task: pile_pile-cc task: pile_pile-cc
dataset_path: EleutherAI/the_pile
dataset_name: pile_pile-cc dataset_name: pile_pile-cc
output_type: loglikelihood_rolling
test_split: train
template_aliases: ""
doc_to_text: ""
doc_to_target: "{{text}}"
should_decontaminate: true
doc_to_decontamination_query: "{{text}}"
metric_list:
- metric: word_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: byte_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: bits_per_byte
aggregation: bits_per_byte
higher_is_better: false
group: include: pile_arxiv.yaml
- pile
task: pile_pubmed-abstracts task: pile_pubmed-abstracts
dataset_path: EleutherAI/the_pile
dataset_name: pile_pubmed-abstracts dataset_name: pile_pubmed-abstracts
output_type: loglikelihood_rolling
test_split: train
template_aliases: ""
doc_to_text: ""
doc_to_target: "{{text}}"
should_decontaminate: true
doc_to_decontamination_query: "{{text}}"
metric_list:
- metric: word_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: byte_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: bits_per_byte
aggregation: bits_per_byte
higher_is_better: false
group: include: pile_arxiv.yaml
- pile
task: pile_pubmed-central task: pile_pubmed-central
dataset_path: EleutherAI/the_pile
dataset_name: pile_pubmed-central dataset_name: pile_pubmed-central
output_type: loglikelihood_rolling
test_split: train
template_aliases: ""
doc_to_text: ""
doc_to_target: "{{text}}"
should_decontaminate: true
doc_to_decontamination_query: "{{text}}"
metric_list:
- metric: word_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: byte_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: bits_per_byte
aggregation: bits_per_byte
higher_is_better: false
group: include: pile_arxiv.yaml
- pile
task: pile_stackexchange task: pile_stackexchange
dataset_path: EleutherAI/the_pile
dataset_name: pile_stackexchange dataset_name: pile_stackexchange
output_type: loglikelihood_rolling
test_split: train
template_aliases: ""
doc_to_text: ""
doc_to_target: "{{text}}"
should_decontaminate: true
doc_to_decontamination_query: "{{text}}"
metric_list:
- metric: word_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: byte_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: bits_per_byte
aggregation: bits_per_byte
higher_is_better: false
group: include: pile_arxiv.yaml
- pile
task: pile_ubuntu-irc task: pile_ubuntu-irc
dataset_path: EleutherAI/the_pile
dataset_name: pile_ubuntu-irc dataset_name: pile_ubuntu-irc
output_type: loglikelihood_rolling
test_split: train
template_aliases: ""
doc_to_text: ""
doc_to_target: "{{text}}"
should_decontaminate: true
doc_to_decontamination_query: "{{text}}"
metric_list:
- metric: word_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: byte_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: bits_per_byte
aggregation: bits_per_byte
higher_is_better: false
group: include: pile_arxiv.yaml
- pile
task: pile_uspto task: pile_uspto
dataset_path: EleutherAI/the_pile
dataset_name: pile_uspto dataset_name: pile_uspto
output_type: loglikelihood_rolling
test_split: train
template_aliases: ""
doc_to_text: ""
doc_to_target: "{{text}}"
should_decontaminate: true
doc_to_decontamination_query: "{{text}}"
metric_list:
- metric: word_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: byte_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: bits_per_byte
aggregation: bits_per_byte
higher_is_better: false
group: include: pile_arxiv.yaml
- pile
task: pile_wikipedia task: pile_wikipedia
dataset_path: EleutherAI/the_pile
dataset_name: pile_wikipedia dataset_name: pile_wikipedia
output_type: loglikelihood_rolling
test_split: train
template_aliases: ""
doc_to_text: ""
doc_to_target: "{{text}}"
should_decontaminate: true
doc_to_decontamination_query: "{{text}}"
metric_list:
- metric: word_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: byte_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: bits_per_byte
aggregation: bits_per_byte
higher_is_better: false
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment