Commit 527a4352 authored by Baber's avatar Baber
Browse files

Merge branch 'main' into longcxt

# Conflicts:
#	lm_eval/tasks/README.md
parents 6042f622 52df63b7
"dataset_name": "Primary General Knowledge"
"tag": "arabicmmlu_other_tasks"
"include": "_default_arabicmmlu_template_yaml"
"task": "arabicmmlu_primary_general_knowledge"
"task_alias": "Primary General Knowledge"
"dataset_name": "Primary Geography"
"tag": "arabicmmlu_social_science_tasks"
"include": "_default_arabicmmlu_template_yaml"
"task": "arabicmmlu_primary_geography"
"task_alias": "Primary Geography"
"dataset_name": "Primary History"
"tag": "arabicmmlu_humanities_tasks"
"include": "_default_arabicmmlu_template_yaml"
"task": "arabicmmlu_primary_history"
"task_alias": "Primary History"
"dataset_name": "Primary Islamic Studies"
"tag": "arabicmmlu_humanities_tasks"
"include": "_default_arabicmmlu_template_yaml"
"task": "arabicmmlu_primary_islamic_studies"
"task_alias": "Primary Islamic Studies"
"dataset_name": "Primary Math"
"tag": "arabicmmlu_stem_tasks"
"include": "_default_arabicmmlu_template_yaml"
"task": "arabicmmlu_primary_math"
"task_alias": "Primary Math"
"dataset_name": "Primary Natural Science"
"tag": "arabicmmlu_stem_tasks"
"include": "_default_arabicmmlu_template_yaml"
"task": "arabicmmlu_primary_natural_science"
"task_alias": "Primary Natural Science"
"dataset_name": "Primary Social Science"
"tag": "arabicmmlu_social_science_tasks"
"include": "_default_arabicmmlu_template_yaml"
"task": "arabicmmlu_primary_social_science"
"task_alias": "Primary Social Science"
"dataset_name": "Prof Law"
"tag": "arabicmmlu_humanities_tasks"
"include": "_default_arabicmmlu_template_yaml"
"task": "arabicmmlu_prof_law"
"task_alias": "Prof Law"
"dataset_name": "Social Science (Middle School)"
"include": "_default_arabicmmlu_template_yaml"
"tag": "arabicmmlu_social_science_tasks"
"task": "arabicmmlu_social_science_middle_school"
"task_alias": "Social Science (Middle School)"
"dataset_name": "Social Science (Primary School)"
"include": "_default_arabicmmlu_template_yaml"
"tag": "arabicmmlu_social_science_tasks"
"task": "arabicmmlu_social_science_primary_school"
"task_alias": "Social Science (Primary School)"
"dataset_name": "Univ Accounting"
"tag": "arabicmmlu_social_science_tasks"
"include": "_default_arabicmmlu_template_yaml"
"task": "arabicmmlu_univ_accounting"
"task_alias": "Univ Accounting"
"dataset_name": "Univ Computer Science"
"tag": "arabicmmlu_stem_tasks"
"include": "_default_arabicmmlu_template_yaml"
"task": "arabicmmlu_univ_computer_science"
"task_alias": "Univ Computer Science"
"dataset_name": "Univ Economics"
"tag": "arabicmmlu_social_science_tasks"
"include": "_default_arabicmmlu_template_yaml"
"task": "arabicmmlu_univ_economics"
"task_alias": "Univ Economics"
"dataset_name": "Univ Management"
"tag": "arabicmmlu_other_tasks"
"include": "_default_arabicmmlu_template_yaml"
"task": "arabicmmlu_univ_management"
"task_alias": "Univ Management"
"dataset_name": "Univ Political Science"
"tag": "arabicmmlu_social_science_tasks"
"include": "_default_arabicmmlu_template_yaml"
"task": "arabicmmlu_univ_political_science"
"task_alias": "Univ Political Science"
...@@ -23,7 +23,7 @@ def doc_to_text(doc): ...@@ -23,7 +23,7 @@ def doc_to_text(doc):
question = ( question = (
doc["Question"] doc["Question"]
if doc["Context"] == "" if not doc["Context"]
else f"{doc['Context']}\n\n{doc['Question']}" else f"{doc['Context']}\n\n{doc['Question']}"
) )
......
...@@ -58,3 +58,6 @@ If other tasks on this dataset are already supported: ...@@ -58,3 +58,6 @@ If other tasks on this dataset are already supported:
* [ ] Is the "Main" variant of this task clearly denoted? * [ ] Is the "Main" variant of this task clearly denoted?
* [ ] Have you provided a short sentence in a README on what each new variant adds / evaluates? * [ ] Have you provided a short sentence in a README on what each new variant adds / evaluates?
* [ ] Have you noted which, if any, published evaluation setups are matched by this variant? * [ ] Have you noted which, if any, published evaluation setups are matched by this variant?
### Changelog
version 2.0: (2025-Feb-14) set target delimiter to "" as the targets already start with a space.
...@@ -8,11 +8,12 @@ validation_split: validation ...@@ -8,11 +8,12 @@ validation_split: validation
test_split: null test_split: null
doc_to_text: "{{context}}" doc_to_text: "{{context}}"
doc_to_target: "{{completion}}" doc_to_target: "{{completion}}"
target_delimiter: ""
metric_list: metric_list:
- metric: acc - metric: acc
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
metadata: metadata:
version: 1.0 version: 2.0
dataset_kwargs: dataset_kwargs:
trust_remote_code: true trust_remote_code: true
...@@ -30,6 +30,12 @@ Homepage: https://github.com/google/BIG-bench ...@@ -30,6 +30,12 @@ Homepage: https://github.com/google/BIG-bench
* `group_name`: `Short description` * `group_name`: `Short description`
#### Tags
* `bigbench_generate_until`
* `bigbench_multiple_choice_a`
* `bigbench_multiple_choice_b`
#### Tasks #### Tasks
* `task_name`: `1-sentence description of what this particular task does` * `task_name`: `1-sentence description of what this particular task does`
......
group: bigbench_generate_until tag: bigbench_generate_until
dataset_path: hails/bigbench dataset_path: hails/bigbench
output_type: generate_until output_type: generate_until
dataset_kwargs: dataset_kwargs:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment