Merge branch 'big-refactor' into seq2seq-refactor

a6c640d3 · Lintang Sutawika · GitHub · 55eccc29 · 24e3e3fa · a6c640d3
Unverified Commit a6c640d3 authored Jun 16, 2023 by Lintang Sutawika Committed by GitHub Jun 16, 2023
20 changed files
--- a/lm_eval/tasks/pile/pile_enron.yaml
+++ b/lm_eval/tasks/pile/pile_enron.yaml
-group:
-  - pile
+include: pile_arxiv.yaml
 task: pile_enron
-dataset_path: EleutherAI/the_pile
-dataset_name: enron_emails
-output_type: loglikelihood_rolling
-test_split: train
-template_aliases: ""
-doc_to_text: ""
-doc_to_target: "{{text}}"
-should_decontaminate: true
-doc_to_decontamination_query: "{{text}}"
-metric_list:
-  - metric: word_perplexity
-    aggregation: weighted_perplexity
-    higher_is_better: false
-  - metric: byte_perplexity
-    aggregation: weighted_perplexity
-    higher_is_better: false
-  - metric: bits_per_byte
-    aggregation: bits_per_byte
-    higher_is_better: false
+dataset_name: pile_enron
--- a/lm_eval/tasks/pile/pile_europarl.yaml
+++ b/lm_eval/tasks/pile/pile_europarl.yaml
-group:
-  - pile
+include: pile_arxiv.yaml
 task: pile_europarl
-dataset_path: EleutherAI/the_pile
 dataset_name: pile_europarl
-output_type: loglikelihood_rolling
-test_split: train
-template_aliases: ""
-doc_to_text: ""
-doc_to_target: "{{text}}"
-should_decontaminate: true
-doc_to_decontamination_query: "{{text}}"
-metric_list:
-  - metric: word_perplexity
-    aggregation: weighted_perplexity
-    higher_is_better: false
-  - metric: byte_perplexity
-    aggregation: weighted_perplexity
-    higher_is_better: false
-  - metric: bits_per_byte
-    aggregation: bits_per_byte
-    higher_is_better: false
--- a/lm_eval/tasks/pile/pile_freelaw.yaml
+++ b/lm_eval/tasks/pile/pile_freelaw.yaml
-group:
-  - pile
+include: pile_arxiv.yaml
 task: pile_freelaw
-dataset_path: EleutherAI/the_pile
 dataset_name: pile_freelaw
-output_type: loglikelihood_rolling
-test_split: train
-template_aliases: ""
-doc_to_text: ""
-doc_to_target: "{{text}}"
-should_decontaminate: true
-doc_to_decontamination_query: "{{text}}"
-metric_list:
-  - metric: word_perplexity
-    aggregation: weighted_perplexity
-    higher_is_better: false
-  - metric: byte_perplexity
-    aggregation: weighted_perplexity
-    higher_is_better: false
-  - metric: bits_per_byte
-    aggregation: bits_per_byte
-    higher_is_better: false
--- a/lm_eval/tasks/pile/pile_github.yaml
+++ b/lm_eval/tasks/pile/pile_github.yaml
-group:
-  - pile
+include: pile_arxiv.yaml
 task: pile_github
-dataset_path: EleutherAI/the_pile
 dataset_name: pile_github
-output_type: loglikelihood_rolling
-test_split: train
-template_aliases: ""
-doc_to_text: ""
-doc_to_target: "{{text}}"
-should_decontaminate: true
-doc_to_decontamination_query: "{{text}}"
-metric_list:
-  - metric: word_perplexity
-    aggregation: weighted_perplexity
-    higher_is_better: false
-  - metric: byte_perplexity
-    aggregation: weighted_perplexity
-    higher_is_better: false
-  - metric: bits_per_byte
-    aggregation: bits_per_byte
-    higher_is_better: false
--- a/lm_eval/tasks/pile/pile_gutenberg.yaml
+++ b/lm_eval/tasks/pile/pile_gutenberg.yaml
-group:
-  - pile
+include: pile_arxiv.yaml
 task: pile_gutenberg
-dataset_path: EleutherAI/the_pile
 dataset_name: pile_gutenberg
-output_type: loglikelihood_rolling
-test_split: train
-template_aliases: ""
-doc_to_text: ""
-doc_to_target: "{{text}}"
-should_decontaminate: true
-doc_to_decontamination_query: "{{text}}"
-metric_list:
-  - metric: word_perplexity
-    aggregation: weighted_perplexity
-    higher_is_better: false
-  - metric: byte_perplexity
-    aggregation: weighted_perplexity
-    higher_is_better: false
-  - metric: bits_per_byte
-    aggregation: bits_per_byte
-    higher_is_better: false
--- a/lm_eval/tasks/pile/pile_hackernews.yaml
+++ b/lm_eval/tasks/pile/pile_hackernews.yaml
-group:
-  - pile
+include: pile_arxiv.yaml
 task: pile_hackernews
-dataset_path: EleutherAI/the_pile
 dataset_name: pile_hackernews
-output_type: loglikelihood_rolling
-test_split: train
-template_aliases: ""
-doc_to_text: ""
-doc_to_target: "{{text}}"
-should_decontaminate: true
-doc_to_decontamination_query: "{{text}}"
-metric_list:
-  - metric: word_perplexity
-    aggregation: weighted_perplexity
-    higher_is_better: false
-  - metric: byte_perplexity
-    aggregation: weighted_perplexity
-    higher_is_better: false
-  - metric: bits_per_byte
-    aggregation: bits_per_byte
-    higher_is_better: false
--- a/lm_eval/tasks/pile/pile_nih-exporter.yaml
+++ b/lm_eval/tasks/pile/pile_nih-exporter.yaml
-group:
-  - pile
+include: pile_arxiv.yaml
 task: pile_nih-exporter
-dataset_path: EleutherAI/the_pile
 dataset_name: pile_nih-exporter
-output_type: loglikelihood_rolling
-test_split: train
-template_aliases: ""
-doc_to_text: ""
-doc_to_target: "{{text}}"
-should_decontaminate: true
-doc_to_decontamination_query: "{{text}}"
-metric_list:
-  - metric: word_perplexity
-    aggregation: weighted_perplexity
-    higher_is_better: false
-  - metric: byte_perplexity
-    aggregation: weighted_perplexity
-    higher_is_better: false
-  - metric: bits_per_byte
-    aggregation: bits_per_byte
-    higher_is_better: false
--- a/lm_eval/tasks/pile/pile_opensubtitles.yaml
+++ b/lm_eval/tasks/pile/pile_opensubtitles.yaml
-group:
-  - pile
+include: pile_arxiv.yaml
 task: pile_opensubtitles
-dataset_path: EleutherAI/the_pile
 dataset_name: pile_opensubtitles
-output_type: loglikelihood_rolling
-test_split: train
-template_aliases: ""
-doc_to_text: ""
-doc_to_target: "{{text}}"
-should_decontaminate: true
-doc_to_decontamination_query: "{{text}}"
-metric_list:
-  - metric: word_perplexity
-    aggregation: weighted_perplexity
-    higher_is_better: false
-  - metric: byte_perplexity
-    aggregation: weighted_perplexity
-    higher_is_better: false
-  - metric: bits_per_byte
-    aggregation: bits_per_byte
-    higher_is_better: false
--- a/lm_eval/tasks/pile/pile_openwebtext2.yaml
+++ b/lm_eval/tasks/pile/pile_openwebtext2.yaml
-group:
-  - pile
+include: pile_arxiv.yaml
 task: pile_openwebtext2
-dataset_path: EleutherAI/the_pile
 dataset_name: pile_openwebtext2
-output_type: loglikelihood_rolling
-test_split: train
-template_aliases: ""
-doc_to_text: ""
-doc_to_target: "{{text}}"
-should_decontaminate: true
-doc_to_decontamination_query: "{{text}}"
-metric_list:
-  - metric: word_perplexity
-    aggregation: weighted_perplexity
-    higher_is_better: false
-  - metric: byte_perplexity
-    aggregation: weighted_perplexity
-    higher_is_better: false
-  - metric: bits_per_byte
-    aggregation: bits_per_byte
-    higher_is_better: false
--- a/lm_eval/tasks/pile/pile_philpapers.yaml
+++ b/lm_eval/tasks/pile/pile_philpapers.yaml
-group:
-  - pile
+include: pile_arxiv.yaml
 task: pile_philpapers
-dataset_path: EleutherAI/the_pile
 dataset_name: pile_philpapers
-output_type: loglikelihood_rolling
-test_split: train
-template_aliases: ""
-doc_to_text: ""
-doc_to_target: "{{text}}"
-should_decontaminate: true
-doc_to_decontamination_query: "{{text}}"
-metric_list:
-  - metric: word_perplexity
-    aggregation: weighted_perplexity
-    higher_is_better: false
-  - metric: byte_perplexity
-    aggregation: weighted_perplexity
-    higher_is_better: false
-  - metric: bits_per_byte
-    aggregation: bits_per_byte
-    higher_is_better: false
--- a/lm_eval/tasks/pile/pile_pile-cc.yaml
+++ b/lm_eval/tasks/pile/pile_pile-cc.yaml
-group:
-  - pile
+include: pile_arxiv.yaml
 task: pile_pile-cc
-dataset_path: EleutherAI/the_pile
 dataset_name: pile_pile-cc
-output_type: loglikelihood_rolling
-test_split: train
-template_aliases: ""
-doc_to_text: ""
-doc_to_target: "{{text}}"
-should_decontaminate: true
-doc_to_decontamination_query: "{{text}}"
-metric_list:
-  - metric: word_perplexity
-    aggregation: weighted_perplexity
-    higher_is_better: false
-  - metric: byte_perplexity
-    aggregation: weighted_perplexity
-    higher_is_better: false
-  - metric: bits_per_byte
-    aggregation: bits_per_byte
-    higher_is_better: false
--- a/lm_eval/tasks/pile/pile_pubmed-abstracts.yaml
+++ b/lm_eval/tasks/pile/pile_pubmed-abstracts.yaml
-group:
-  - pile
+include: pile_arxiv.yaml
 task: pile_pubmed-abstracts
-dataset_path: EleutherAI/the_pile
 dataset_name: pile_pubmed-abstracts
-output_type: loglikelihood_rolling
-test_split: train
-template_aliases: ""
-doc_to_text: ""
-doc_to_target: "{{text}}"
-should_decontaminate: true
-doc_to_decontamination_query: "{{text}}"
-metric_list:
-  - metric: word_perplexity
-    aggregation: weighted_perplexity
-    higher_is_better: false
-  - metric: byte_perplexity
-    aggregation: weighted_perplexity
-    higher_is_better: false
-  - metric: bits_per_byte
-    aggregation: bits_per_byte
-    higher_is_better: false
--- a/lm_eval/tasks/pile/pile_pubmed-central.yaml
+++ b/lm_eval/tasks/pile/pile_pubmed-central.yaml
-group:
-  - pile
+include: pile_arxiv.yaml
 task: pile_pubmed-central
-dataset_path: EleutherAI/the_pile
 dataset_name: pile_pubmed-central
-output_type: loglikelihood_rolling
-test_split: train
-template_aliases: ""
-doc_to_text: ""
-doc_to_target: "{{text}}"
-should_decontaminate: true
-doc_to_decontamination_query: "{{text}}"
-metric_list:
-  - metric: word_perplexity
-    aggregation: weighted_perplexity
-    higher_is_better: false
-  - metric: byte_perplexity
-    aggregation: weighted_perplexity
-    higher_is_better: false
-  - metric: bits_per_byte
-    aggregation: bits_per_byte
-    higher_is_better: false
--- a/lm_eval/tasks/pile/pile_stackexchange.yaml
+++ b/lm_eval/tasks/pile/pile_stackexchange.yaml
-group:
-  - pile
+include: pile_arxiv.yaml
 task: pile_stackexchange
-dataset_path: EleutherAI/the_pile
 dataset_name: pile_stackexchange
-output_type: loglikelihood_rolling
-test_split: train
-template_aliases: ""
-doc_to_text: ""
-doc_to_target: "{{text}}"
-should_decontaminate: true
-doc_to_decontamination_query: "{{text}}"
-metric_list:
-  - metric: word_perplexity
-    aggregation: weighted_perplexity
-    higher_is_better: false
-  - metric: byte_perplexity
-    aggregation: weighted_perplexity
-    higher_is_better: false
-  - metric: bits_per_byte
-    aggregation: bits_per_byte
-    higher_is_better: false
--- a/lm_eval/tasks/pile/pile_ubuntu-irc.yaml
+++ b/lm_eval/tasks/pile/pile_ubuntu-irc.yaml
-group:
-  - pile
+include: pile_arxiv.yaml
 task: pile_ubuntu-irc
-dataset_path: EleutherAI/the_pile
 dataset_name: pile_ubuntu-irc
-output_type: loglikelihood_rolling
-test_split: train
-template_aliases: ""
-doc_to_text: ""
-doc_to_target: "{{text}}"
-should_decontaminate: true
-doc_to_decontamination_query: "{{text}}"
-metric_list:
-  - metric: word_perplexity
-    aggregation: weighted_perplexity
-    higher_is_better: false
-  - metric: byte_perplexity
-    aggregation: weighted_perplexity
-    higher_is_better: false
-  - metric: bits_per_byte
-    aggregation: bits_per_byte
-    higher_is_better: false
--- a/lm_eval/tasks/pile/pile_uspto.yaml
+++ b/lm_eval/tasks/pile/pile_uspto.yaml
-group:
-  - pile
+include: pile_arxiv.yaml
 task: pile_uspto
-dataset_path: EleutherAI/the_pile
 dataset_name: pile_uspto
-output_type: loglikelihood_rolling
-test_split: train
-template_aliases: ""
-doc_to_text: ""
-doc_to_target: "{{text}}"
-should_decontaminate: true
-doc_to_decontamination_query: "{{text}}"
-metric_list:
-  - metric: word_perplexity
-    aggregation: weighted_perplexity
-    higher_is_better: false
-  - metric: byte_perplexity
-    aggregation: weighted_perplexity
-    higher_is_better: false
-  - metric: bits_per_byte
-    aggregation: bits_per_byte
-    higher_is_better: false
--- a/lm_eval/tasks/pile/pile_wikipedia.yaml
+++ b/lm_eval/tasks/pile/pile_wikipedia.yaml
-group:
-  - pile
+include: pile_arxiv.yaml
 task: pile_wikipedia
-dataset_path: EleutherAI/the_pile
 dataset_name: pile_wikipedia
-output_type: loglikelihood_rolling
-test_split: train
-template_aliases: ""
-doc_to_text: ""
-doc_to_target: "{{text}}"
-should_decontaminate: true
-doc_to_decontamination_query: "{{text}}"
-metric_list:
-  - metric: word_perplexity
-    aggregation: weighted_perplexity
-    higher_is_better: false
-  - metric: byte_perplexity
-    aggregation: weighted_perplexity
-    higher_is_better: false
-  - metric: bits_per_byte
-    aggregation: bits_per_byte
-    higher_is_better: false
--- a/lm_eval/tasks/pile/pile_youtubesubtitles.yaml
+++ b/lm_eval/tasks/pile/pile_youtubesubtitles.yaml
-group:
-  - pile
+include: pile_arxiv.yaml
 task: pile_youtubesubtitles
-dataset_path: EleutherAI/the_pile
 dataset_name: pile_youtubesubtitles
-output_type: loglikelihood_rolling
-test_split: train
-template_aliases: ""
-doc_to_text: ""
-doc_to_target: "{{text}}"
-should_decontaminate: true
-doc_to_decontamination_query: "{{text}}"
-metric_list:
-  - metric: word_perplexity
-    aggregation: weighted_perplexity
-    higher_is_better: false
-  - metric: byte_perplexity
-    aggregation: weighted_perplexity
-    higher_is_better: false
-  - metric: bits_per_byte
-    aggregation: bits_per_byte
-    higher_is_better: false
--- a/lm_eval/tasks/piqa/piqa.yaml
+++ b/lm_eval/tasks/piqa/piqa.yaml
 group:
-  - piqa_yaml_grp
-task: piqa_yaml
+  - multiple_choice
+task: piqa
 dataset_path: piqa
 dataset_name: null
 output_type: multiple_choice

--- a/lm_eval/tasks/sciq/sciq.yaml
+++ b/lm_eval/tasks/sciq/sciq.yaml
 group:
-  - sciq_yaml_grp
-task: sciq_yaml
+  - multiple_choice
+task: sciq
 dataset_path: sciq
 dataset_name: null
 output_type: multiple_choice