Commit 4bf44695 authored by lintangsutawika's avatar lintangsutawika
Browse files

update on names

parent 81ec804e
group:
- paloma
dataset_path: allenai/paloma
output_type: loglikelihood_rolling
validation_split: val
test_split: test
doc_to_text: ""
doc_to_target: !function paloma_utils.doc_to_target
should_decontaminate: true
doc_to_decontamination_query: !function paloma_utils.doc_to_target
metric_list:
- metric: word_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: byte_perplexity
aggregation: weighted_perplexity
higher_is_better: false
- metric: bits_per_byte
aggregation: bits_per_byte
higher_is_better: false
metadata:
version: 1
include: paloma.yaml
include: _paloma_template
task: paloma_4chan_meta_sep
task_alias: 4chan Corpus
task_alias: 4chan
dataset_name: 4chan_meta_sep
include: paloma.yaml
include: _paloma_template
task: paloma_c4_100_domains
task_alias: C4-100-domains
task_alias: C4 100 Domains
dataset_name: c4_100_domains
include: paloma.yaml
include: _paloma_template
task: paloma_c4_en
task_alias: C4
dataset_name: c4_en
include: paloma.yaml
include: _paloma_template
task: paloma_dolma-v1_5
task_alias: Dolma V1.5
dataset_name: dolma-v1_5
include: paloma.yaml
include: _paloma_template
task: paloma_dolma_100_subreddits
task_alias: Dolma-100-subreddits
task_alias: 100 Subreddits
dataset_name: dolma_100_subreddits
include: paloma.yaml
include: _paloma_template
task: paloma_falcon-refinedweb
task_alias: Falcon Refinedweb
task_alias: Falcon
dataset_name: falcon-refinedweb
include: paloma.yaml
include: _paloma_template
task: paloma_gab
task_alias: Gab Corpus
task_alias: Gab
dataset_name: gab
include: paloma.yaml
include: _paloma_template
task: paloma_m2d2_s2orc_unsplit
task_alias: M2D2 S2ORC
dataset_name: m2d2_s2orc_unsplit
include: paloma.yaml
include: _paloma_template
task: paloma_m2d2_wikipedia_unsplit
task_alias: M2D2 Wikipedia
dataset_name: m2d2_wikipedia_unsplit
include: paloma.yaml
include: _paloma_template
task: paloma_manosphere_meta_sep
task_alias: Manosphere Corpus
task_alias: Manosphere
dataset_name: manosphere_meta_sep
include: paloma.yaml
include: _paloma_template
task: paloma_mc4
task_alias: mC4-en
task_alias: mC4
dataset_name: mc4
include: paloma.yaml
include: _paloma_template
task: paloma_ptb
task_alias: Penn Treebank
task_alias: PTB
dataset_name: ptb
include: paloma.yaml
include: _paloma_template
task: paloma_redpajama
task_alias: RedPajama
dataset_name: redpajama
include: paloma.yaml
include: _paloma_template
task: paloma_twitterAAE_HELM_fixed
task_alias: Twitter AAE
dataset_name: twitterAAE_HELM_fixed
include: paloma.yaml
include: _paloma_template
task: paloma_wikitext_103
task_alias: Wikitext-103
dataset_name: wikitext_103
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment