Commit 527a4352 authored by Baber's avatar Baber
Browse files

Merge branch 'main' into longcxt

# Conflicts:
#	lm_eval/tasks/README.md
parents 6042f622 52df63b7
"dataset_name": "Primary General Knowledge"
"tag": "arabicmmlu_other_tasks"
"include": "_default_arabicmmlu_template_yaml"
"task": "arabicmmlu_primary_general_knowledge"
"task_alias": "Primary General Knowledge"
"dataset_name": "Primary Geography"
"tag": "arabicmmlu_social_science_tasks"
"include": "_default_arabicmmlu_template_yaml"
"task": "arabicmmlu_primary_geography"
"task_alias": "Primary Geography"
"dataset_name": "Primary History"
"tag": "arabicmmlu_humanities_tasks"
"include": "_default_arabicmmlu_template_yaml"
"task": "arabicmmlu_primary_history"
"task_alias": "Primary History"
"dataset_name": "Primary Islamic Studies"
"tag": "arabicmmlu_humanities_tasks"
"include": "_default_arabicmmlu_template_yaml"
"task": "arabicmmlu_primary_islamic_studies"
"task_alias": "Primary Islamic Studies"
"dataset_name": "Primary Math"
"tag": "arabicmmlu_stem_tasks"
"include": "_default_arabicmmlu_template_yaml"
"task": "arabicmmlu_primary_math"
"task_alias": "Primary Math"
"dataset_name": "Primary Natural Science"
"tag": "arabicmmlu_stem_tasks"
"include": "_default_arabicmmlu_template_yaml"
"task": "arabicmmlu_primary_natural_science"
"task_alias": "Primary Natural Science"
"dataset_name": "Primary Social Science"
"tag": "arabicmmlu_social_science_tasks"
"include": "_default_arabicmmlu_template_yaml"
"task": "arabicmmlu_primary_social_science"
"task_alias": "Primary Social Science"
"dataset_name": "Prof Law"
"tag": "arabicmmlu_humanities_tasks"
"include": "_default_arabicmmlu_template_yaml"
"task": "arabicmmlu_prof_law"
"task_alias": "Prof Law"
"dataset_name": "Social Science (Middle School)"
"include": "_default_arabicmmlu_template_yaml"
"tag": "arabicmmlu_social_science_tasks"
"task": "arabicmmlu_social_science_middle_school"
"task_alias": "Social Science (Middle School)"
"dataset_name": "Social Science (Primary School)"
"include": "_default_arabicmmlu_template_yaml"
"tag": "arabicmmlu_social_science_tasks"
"task": "arabicmmlu_social_science_primary_school"
"task_alias": "Social Science (Primary School)"
"dataset_name": "Univ Accounting"
"tag": "arabicmmlu_social_science_tasks"
"include": "_default_arabicmmlu_template_yaml"
"task": "arabicmmlu_univ_accounting"
"task_alias": "Univ Accounting"
"dataset_name": "Univ Computer Science"
"tag": "arabicmmlu_stem_tasks"
"include": "_default_arabicmmlu_template_yaml"
"task": "arabicmmlu_univ_computer_science"
"task_alias": "Univ Computer Science"
"dataset_name": "Univ Economics"
"tag": "arabicmmlu_social_science_tasks"
"include": "_default_arabicmmlu_template_yaml"
"task": "arabicmmlu_univ_economics"
"task_alias": "Univ Economics"
"dataset_name": "Univ Management"
"tag": "arabicmmlu_other_tasks"
"include": "_default_arabicmmlu_template_yaml"
"task": "arabicmmlu_univ_management"
"task_alias": "Univ Management"
"dataset_name": "Univ Political Science"
"tag": "arabicmmlu_social_science_tasks"
"include": "_default_arabicmmlu_template_yaml"
"task": "arabicmmlu_univ_political_science"
"task_alias": "Univ Political Science"
......@@ -23,7 +23,7 @@ def doc_to_text(doc):
question = (
doc["Question"]
if doc["Context"] == ""
if not doc["Context"]
else f"{doc['Context']}\n\n{doc['Question']}"
)
......
......@@ -58,3 +58,6 @@ If other tasks on this dataset are already supported:
* [ ] Is the "Main" variant of this task clearly denoted?
* [ ] Have you provided a short sentence in a README on what each new variant adds / evaluates?
* [ ] Have you noted which, if any, published evaluation setups are matched by this variant?
### Changelog
version 2.0: (2025-Feb-14) set target delimiter to "" as the targets already start with a space.
......@@ -8,11 +8,12 @@ validation_split: validation
test_split: null
doc_to_text: "{{context}}"
doc_to_target: "{{completion}}"
target_delimiter: ""
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
metadata:
version: 1.0
version: 2.0
dataset_kwargs:
trust_remote_code: true
......@@ -30,6 +30,12 @@ Homepage: https://github.com/google/BIG-bench
* `group_name`: `Short description`
#### Tags
* `bigbench_generate_until`
* `bigbench_multiple_choice_a`
* `bigbench_multiple_choice_b`
#### Tasks
* `task_name`: `1-sentence description of what this particular task does`
......
group: bigbench_generate_until
tag: bigbench_generate_until
dataset_path: hails/bigbench
output_type: generate_until
dataset_kwargs:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment