".github/git@developer.sourcefind.cn:zhaoyu6/sglang.git" did not exist on "13d596c93e98cd34fb2d9238a11316c6bec4f4e1"
Commit 44eec73b authored by haileyschoelkopf's avatar haileyschoelkopf
Browse files

push many yaml seed tasks

parent 82401c96
group:
- pile
task: pile_pile-cc
dataset_path: EleutherAI/the_pile
dataset_name: pile_pile-cc
output_type: loglikelihood_rolling
test_split: train
template_aliases: ""
doc_to_text: ""
doc_to_target: "{{text}}"
should_decontaminate: true
doc_to_decontamination_query: "{{text}}"
metric_list:
- metric: word_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: byte_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: bits_per_byte
aggregation: bits_per_byte
higher_is_better: false
\ No newline at end of file
group:
- pile
task: pile_pubmed-abstracts
dataset_path: EleutherAI/the_pile
dataset_name: pile_pubmed-abstracts
output_type: loglikelihood_rolling
test_split: train
template_aliases: ""
doc_to_text: ""
doc_to_target: "{{text}}"
should_decontaminate: true
doc_to_decontamination_query: "{{text}}"
metric_list:
- metric: word_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: byte_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: bits_per_byte
aggregation: bits_per_byte
higher_is_better: false
\ No newline at end of file
group:
- pile
task: pile_pubmed-central
dataset_path: EleutherAI/the_pile
dataset_name: pile_pubmed-central
output_type: loglikelihood_rolling
test_split: train
template_aliases: ""
doc_to_text: ""
doc_to_target: "{{text}}"
should_decontaminate: true
doc_to_decontamination_query: "{{text}}"
metric_list:
- metric: word_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: byte_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: bits_per_byte
aggregation: bits_per_byte
higher_is_better: false
\ No newline at end of file
group:
- pile
task: pile_stackexchange
dataset_path: EleutherAI/the_pile
dataset_name: pile_stackexchange
output_type: loglikelihood_rolling
test_split: train
template_aliases: ""
doc_to_text: ""
doc_to_target: "{{text}}"
should_decontaminate: true
doc_to_decontamination_query: "{{text}}"
metric_list:
- metric: word_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: byte_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: bits_per_byte
aggregation: bits_per_byte
higher_is_better: false
\ No newline at end of file
group:
- pile
task: pile_ubuntu-irc
dataset_path: EleutherAI/the_pile
dataset_name: pile_ubuntu-irc
output_type: loglikelihood_rolling
test_split: train
template_aliases: ""
doc_to_text: ""
doc_to_target: "{{text}}"
should_decontaminate: true
doc_to_decontamination_query: "{{text}}"
metric_list:
- metric: word_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: byte_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: bits_per_byte
aggregation: bits_per_byte
higher_is_better: false
\ No newline at end of file
group:
- pile
task: pile_uspto
dataset_path: EleutherAI/the_pile
dataset_name: pile_uspto
output_type: loglikelihood_rolling
test_split: train
template_aliases: ""
doc_to_text: ""
doc_to_target: "{{text}}"
should_decontaminate: true
doc_to_decontamination_query: "{{text}}"
metric_list:
- metric: word_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: byte_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: bits_per_byte
aggregation: bits_per_byte
higher_is_better: false
\ No newline at end of file
group:
- pile
task: pile_wikipedia
dataset_path: EleutherAI/the_pile
dataset_name: pile_wikipedia
output_type: loglikelihood_rolling
test_split: train
template_aliases: ""
doc_to_text: ""
doc_to_target: "{{text}}"
should_decontaminate: true
doc_to_decontamination_query: "{{text}}"
metric_list:
- metric: word_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: byte_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: bits_per_byte
aggregation: bits_per_byte
higher_is_better: false
\ No newline at end of file
group:
- pile
task: pile_youtubesubtitles
dataset_path: EleutherAI/the_pile
dataset_name: pile_youtubesubtitles
output_type: loglikelihood_rolling
test_split: train
template_aliases: ""
doc_to_text: ""
doc_to_target: "{{text}}"
should_decontaminate: true
doc_to_decontamination_query: "{{text}}"
metric_list:
- metric: word_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: byte_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: bits_per_byte
aggregation: bits_per_byte
higher_is_better: false
\ No newline at end of file
group:
- piqa_yaml_grp
task: piqa_yaml
dataset_path: piqa
dataset_name: null
output_type: multiple_choice
training_split: train
validation_split: validation
test_split: null
template_aliases: "{% set question = goal %}{% set answer_choices = [sol1, sol2] %}{% set gold = label %}" # set the list of possible answer choices, and set what this doc's gold label idx is
doc_to_text: "Question: {{question}}\nAnswer:"
doc_to_target: "{{gold}}" # this will be cast to an int.
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
- metric: acc_mutual_info
aggregation: mean
higher_is_better: true
group:
- sciq_yaml_grp
task: sciq_yaml
dataset_path: sciq
dataset_name: null
output_type: multiple_choice
training_split: train
validation_split: validation
test_split: test
# TODO: we should see how shuffling answer choices affects perf.
template_aliases: "{% set answer_choices = [distractor1, distractor2, distractor3, correct_answer] %}{% set gold = 3 %}" # set the list of possible answer choices, and set what this doc's gold label idx is
doc_to_text: "{{support.lstrip()}}\nQuestion: {{question}}\nAnswer:"
doc_to_target: "{{gold}}" # this will be cast to an int.
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
- metric: acc_mutual_info
aggregation: mean
higher_is_better: true
\ No newline at end of file
# Wikitext
### Paper
Pointer Sentinel Mixture Models
https://arxiv.org/pdf/1609.07843.pdf
The WikiText language modeling dataset is a collection of over 100 million tokens
extracted from the set of verified Good and Featured articles on Wikipedia.
NOTE: This `Task` is based on WikiText-2.
Homepage: https://www.salesforce.com/products/einstein/ai-research/the-wikitext-dependency-language-modeling-dataset/
### Citation
```
@misc{merity2016pointer,
title={Pointer Sentinel Mixture Models},
author={Stephen Merity and Caiming Xiong and James Bradbury and Richard Socher},
year={2016},
eprint={1609.07843},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
```
### Subtasks
### Checklist
- [x] Is in Eval-harness v1.0 ?
- [x] Has been checked for regression from v1.0?
- [ ] Has been checked for equivalence with original paper methodology?
- [ ] "Main" checked variant clearly denoted?
\ No newline at end of file
import re
def wikitext_detokenizer(doc):
string = doc["page"]
# contractions
string = string.replace("s '", "s'")
string = re.sub(r"/' [0-9]/", r"/'[0-9]/", string)
# number separators
string = string.replace(" @-@ ", "-")
string = string.replace(" @,@ ", ",")
string = string.replace(" @.@ ", ".")
# punctuation
string = string.replace(" : ", ": ")
string = string.replace(" ; ", "; ")
string = string.replace(" . ", ". ")
string = string.replace(" ! ", "! ")
string = string.replace(" ? ", "? ")
string = string.replace(" , ", ", ")
# double brackets
string = re.sub(r"\(\s*([^\)]*?)\s*\)", r"(\1)", string)
string = re.sub(r"\[\s*([^\]]*?)\s*\]", r"[\1]", string)
string = re.sub(r"{\s*([^}]*?)\s*}", r"{\1}", string)
string = re.sub(r"\"\s*([^\"]*?)\s*\"", r'"\1"', string)
string = re.sub(r"'\s*([^']*?)\s*'", r"'\1'", string)
# miscellaneous
string = string.replace("= = = =", "====")
string = string.replace("= = =", "===")
string = string.replace("= =", "==")
string = string.replace(" " + chr(176) + " ", chr(176))
string = string.replace(" \n", "\n")
string = string.replace("\n ", "\n")
string = string.replace(" N ", " 1 ")
string = string.replace(" 's", "'s")
return string
group:
- wikitext_group
task: wikitext_yaml
dataset_path: EleutherAI/wikitext_document_level
dataset_name: wikitext-2-raw-v1
output_type: loglikelihood_rolling
training_split: train
validation_split: validation
test_split: test
template_aliases: ""
doc_to_text: ""
doc_to_target: !function preprocess_wikitext.wikitext_detokenizer
should_decontaminate: true
doc_to_decontamination_query: "{{page}}"
metric_list:
- metric: word_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: byte_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: bits_per_byte
aggregation: bits_per_byte
higher_is_better: false
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment