Commit 4eecbabb authored by Baber's avatar Baber
Browse files

Merge branch 'main' into prefill

parents dac8b534 fb963f0f
task: arabic_leaderboard_arabic_mmlu_high_school_geography_light
dataset_path: arcee-globe/Arabic_MMLU-10percent
dataset_name: high_school_geography
output_type: multiple_choice
training_split: null
validation_split: dev
test_split: test
process_docs: !function utils.process_docs
doc_to_text: "{{query}}"
doc_to_target: "{{gold}}"
doc_to_choice: "choices"
fewshot_split: dev
fewshot_config:
sampler: first_n
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
metadata:
version: 1.0
task: arabic_leaderboard_arabic_mmlu_high_school_government_and_politics_light
dataset_path: arcee-globe/Arabic_MMLU-10percent
dataset_name: high_school_government_and_politics
output_type: multiple_choice
training_split: null
validation_split: dev
test_split: test
process_docs: !function utils.process_docs
doc_to_text: "{{query}}"
doc_to_target: "{{gold}}"
doc_to_choice: "choices"
fewshot_split: dev
fewshot_config:
sampler: first_n
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
metadata:
version: 1.0
task: arabic_leaderboard_arabic_mmlu_high_school_macroeconomics_light
dataset_path: arcee-globe/Arabic_MMLU-10percent
dataset_name: high_school_macroeconomics
output_type: multiple_choice
training_split: null
validation_split: dev
test_split: test
process_docs: !function utils.process_docs
doc_to_text: "{{query}}"
doc_to_target: "{{gold}}"
doc_to_choice: "choices"
fewshot_split: dev
fewshot_config:
sampler: first_n
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
metadata:
version: 1.0
task: arabic_leaderboard_arabic_mmlu_high_school_mathematics_light
dataset_path: arcee-globe/Arabic_MMLU-10percent
dataset_name: high_school_mathematics
output_type: multiple_choice
training_split: null
validation_split: dev
test_split: test
process_docs: !function utils.process_docs
doc_to_text: "{{query}}"
doc_to_target: "{{gold}}"
doc_to_choice: "choices"
fewshot_split: dev
fewshot_config:
sampler: first_n
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
metadata:
version: 1.0
task: arabic_leaderboard_arabic_mmlu_high_school_microeconomics_light
dataset_path: arcee-globe/Arabic_MMLU-10percent
dataset_name: high_school_microeconomics
output_type: multiple_choice
training_split: null
validation_split: dev
test_split: test
process_docs: !function utils.process_docs
doc_to_text: "{{query}}"
doc_to_target: "{{gold}}"
doc_to_choice: "choices"
fewshot_split: dev
fewshot_config:
sampler: first_n
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
metadata:
version: 1.0
task: arabic_leaderboard_arabic_mmlu_high_school_physics_light
dataset_path: arcee-globe/Arabic_MMLU-10percent
dataset_name: high_school_physics
output_type: multiple_choice
training_split: null
validation_split: dev
test_split: test
process_docs: !function utils.process_docs
doc_to_text: "{{query}}"
doc_to_target: "{{gold}}"
doc_to_choice: "choices"
fewshot_split: dev
fewshot_config:
sampler: first_n
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
metadata:
version: 1.0
task: arabic_leaderboard_arabic_mmlu_high_school_psychology_light
dataset_path: arcee-globe/Arabic_MMLU-10percent
dataset_name: high_school_psychology
output_type: multiple_choice
training_split: null
validation_split: dev
test_split: test
process_docs: !function utils.process_docs
doc_to_text: "{{query}}"
doc_to_target: "{{gold}}"
doc_to_choice: "choices"
fewshot_split: dev
fewshot_config:
sampler: first_n
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
metadata:
version: 1.0
task: arabic_leaderboard_arabic_mmlu_high_school_statistics_light
dataset_path: arcee-globe/Arabic_MMLU-10percent
dataset_name: high_school_statistics
output_type: multiple_choice
training_split: null
validation_split: dev
test_split: test
process_docs: !function utils.process_docs
doc_to_text: "{{query}}"
doc_to_target: "{{gold}}"
doc_to_choice: "choices"
fewshot_split: dev
fewshot_config:
sampler: first_n
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
metadata:
version: 1.0
task: arabic_leaderboard_arabic_mmlu_high_school_us_history_light
dataset_path: arcee-globe/Arabic_MMLU-10percent
dataset_name: high_school_us_history
output_type: multiple_choice
training_split: null
validation_split: dev
test_split: test
process_docs: !function utils.process_docs
doc_to_text: "{{query}}"
doc_to_target: "{{gold}}"
doc_to_choice: "choices"
fewshot_split: dev
fewshot_config:
sampler: first_n
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
metadata:
version: 1.0
task: arabic_leaderboard_arabic_mmlu_high_school_world_history_light
dataset_path: arcee-globe/Arabic_MMLU-10percent
dataset_name: high_school_world_history
output_type: multiple_choice
training_split: null
validation_split: dev
test_split: test
process_docs: !function utils.process_docs
doc_to_text: "{{query}}"
doc_to_target: "{{gold}}"
doc_to_choice: "choices"
fewshot_split: dev
fewshot_config:
sampler: first_n
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
metadata:
version: 1.0
task: arabic_leaderboard_arabic_mmlu_human_aging_light
dataset_path: arcee-globe/Arabic_MMLU-10percent
dataset_name: human_aging
output_type: multiple_choice
training_split: null
validation_split: dev
test_split: test
process_docs: !function utils.process_docs
doc_to_text: "{{query}}"
doc_to_target: "{{gold}}"
doc_to_choice: "choices"
fewshot_split: dev
fewshot_config:
sampler: first_n
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
metadata:
version: 1.0
task: arabic_leaderboard_arabic_mmlu_human_sexuality_light
dataset_path: arcee-globe/Arabic_MMLU-10percent
dataset_name: human_sexuality
output_type: multiple_choice
training_split: null
validation_split: dev
test_split: test
process_docs: !function utils.process_docs
doc_to_text: "{{query}}"
doc_to_target: "{{gold}}"
doc_to_choice: "choices"
fewshot_split: dev
fewshot_config:
sampler: first_n
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
metadata:
version: 1.0
task: arabic_leaderboard_arabic_mmlu_international_law_light
dataset_path: arcee-globe/Arabic_MMLU-10percent
dataset_name: international_law
output_type: multiple_choice
training_split: null
validation_split: dev
test_split: test
process_docs: !function utils.process_docs
doc_to_text: "{{query}}"
doc_to_target: "{{gold}}"
doc_to_choice: "choices"
fewshot_split: dev
fewshot_config:
sampler: first_n
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
metadata:
version: 1.0
task: arabic_leaderboard_arabic_mmlu_jurisprudence_light
dataset_path: arcee-globe/Arabic_MMLU-10percent
dataset_name: jurisprudence
output_type: multiple_choice
training_split: null
validation_split: dev
test_split: test
process_docs: !function utils.process_docs
doc_to_text: "{{query}}"
doc_to_target: "{{gold}}"
doc_to_choice: "choices"
fewshot_split: dev
fewshot_config:
sampler: first_n
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
metadata:
version: 1.0
group: arabic_leaderboard_arabic_mmlu_light
task:
- arabic_leaderboard_arabic_mmlu_abstract_algebra_light
- arabic_leaderboard_arabic_mmlu_anatomy_light
- arabic_leaderboard_arabic_mmlu_astronomy_light
- arabic_leaderboard_arabic_mmlu_business_ethics_light
- arabic_leaderboard_arabic_mmlu_clinical_knowledge_light
- arabic_leaderboard_arabic_mmlu_college_biology_light
- arabic_leaderboard_arabic_mmlu_college_chemistry_light
- arabic_leaderboard_arabic_mmlu_college_computer_science_light
- arabic_leaderboard_arabic_mmlu_college_mathematics_light
- arabic_leaderboard_arabic_mmlu_college_medicine_light
- arabic_leaderboard_arabic_mmlu_college_physics_light
- arabic_leaderboard_arabic_mmlu_computer_security_light
- arabic_leaderboard_arabic_mmlu_conceptual_physics_light
- arabic_leaderboard_arabic_mmlu_econometrics_light
- arabic_leaderboard_arabic_mmlu_electrical_engineering_light
- arabic_leaderboard_arabic_mmlu_elementary_mathematics_light
- arabic_leaderboard_arabic_mmlu_formal_logic_light
- arabic_leaderboard_arabic_mmlu_global_facts_light
- arabic_leaderboard_arabic_mmlu_high_school_biology_light
- arabic_leaderboard_arabic_mmlu_high_school_chemistry_light
- arabic_leaderboard_arabic_mmlu_high_school_computer_science_light
- arabic_leaderboard_arabic_mmlu_high_school_european_history_light
- arabic_leaderboard_arabic_mmlu_high_school_geography_light
- arabic_leaderboard_arabic_mmlu_high_school_government_and_politics_light
- arabic_leaderboard_arabic_mmlu_high_school_macroeconomics_light
- arabic_leaderboard_arabic_mmlu_high_school_mathematics_light
- arabic_leaderboard_arabic_mmlu_high_school_microeconomics_light
- arabic_leaderboard_arabic_mmlu_high_school_physics_light
- arabic_leaderboard_arabic_mmlu_high_school_psychology_light
- arabic_leaderboard_arabic_mmlu_high_school_statistics_light
- arabic_leaderboard_arabic_mmlu_high_school_us_history_light
- arabic_leaderboard_arabic_mmlu_high_school_world_history_light
- arabic_leaderboard_arabic_mmlu_human_aging_light
- arabic_leaderboard_arabic_mmlu_human_sexuality_light
- arabic_leaderboard_arabic_mmlu_international_law_light
- arabic_leaderboard_arabic_mmlu_jurisprudence_light
- arabic_leaderboard_arabic_mmlu_logical_fallacies_light
- arabic_leaderboard_arabic_mmlu_machine_learning_light
- arabic_leaderboard_arabic_mmlu_management_light
- arabic_leaderboard_arabic_mmlu_marketing_light
- arabic_leaderboard_arabic_mmlu_medical_genetics_light
- arabic_leaderboard_arabic_mmlu_miscellaneous_light
- arabic_leaderboard_arabic_mmlu_moral_disputes_light
- arabic_leaderboard_arabic_mmlu_moral_scenarios_light
- arabic_leaderboard_arabic_mmlu_nutrition_light
- arabic_leaderboard_arabic_mmlu_philosophy_light
- arabic_leaderboard_arabic_mmlu_prehistory_light
- arabic_leaderboard_arabic_mmlu_professional_accounting_light
- arabic_leaderboard_arabic_mmlu_professional_law_light
- arabic_leaderboard_arabic_mmlu_professional_medicine_light
- arabic_leaderboard_arabic_mmlu_professional_psychology_light
- arabic_leaderboard_arabic_mmlu_public_relations_light
- arabic_leaderboard_arabic_mmlu_security_studies_light
- arabic_leaderboard_arabic_mmlu_sociology_light
- arabic_leaderboard_arabic_mmlu_us_foreign_policy_light
- arabic_leaderboard_arabic_mmlu_virology_light
- arabic_leaderboard_arabic_mmlu_world_religions_light
aggregate_metric_list:
- metric: acc
aggregation: mean
weight_by_size: true
- metric: acc_norm
aggregation: mean
weight_by_size: true
metadata:
version: 1.0
task: arabic_leaderboard_arabic_mmlu_logical_fallacies_light
dataset_path: arcee-globe/Arabic_MMLU-10percent
dataset_name: logical_fallacies
output_type: multiple_choice
training_split: null
validation_split: dev
test_split: test
process_docs: !function utils.process_docs
doc_to_text: "{{query}}"
doc_to_target: "{{gold}}"
doc_to_choice: "choices"
fewshot_split: dev
fewshot_config:
sampler: first_n
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
metadata:
version: 1.0
task: arabic_leaderboard_arabic_mmlu_machine_learning_light
dataset_path: arcee-globe/Arabic_MMLU-10percent
dataset_name: machine_learning
output_type: multiple_choice
training_split: null
validation_split: dev
test_split: test
process_docs: !function utils.process_docs
doc_to_text: "{{query}}"
doc_to_target: "{{gold}}"
doc_to_choice: "choices"
fewshot_split: dev
fewshot_config:
sampler: first_n
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
metadata:
version: 1.0
task: arabic_leaderboard_arabic_mmlu_management_light
dataset_path: arcee-globe/Arabic_MMLU-10percent
dataset_name: management
output_type: multiple_choice
training_split: null
validation_split: dev
test_split: test
process_docs: !function utils.process_docs
doc_to_text: "{{query}}"
doc_to_target: "{{gold}}"
doc_to_choice: "choices"
fewshot_split: dev
fewshot_config:
sampler: first_n
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
metadata:
version: 1.0
task: arabic_leaderboard_arabic_mmlu_marketing_light
dataset_path: arcee-globe/Arabic_MMLU-10percent
dataset_name: marketing
output_type: multiple_choice
training_split: null
validation_split: dev
test_split: test
process_docs: !function utils.process_docs
doc_to_text: "{{query}}"
doc_to_target: "{{gold}}"
doc_to_choice: "choices"
fewshot_split: dev
fewshot_config:
sampler: first_n
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
metadata:
version: 1.0
task: arabic_leaderboard_arabic_mmlu_medical_genetics_light
dataset_path: arcee-globe/Arabic_MMLU-10percent
dataset_name: medical_genetics
output_type: multiple_choice
training_split: null
validation_split: dev
test_split: test
process_docs: !function utils.process_docs
doc_to_text: "{{query}}"
doc_to_target: "{{gold}}"
doc_to_choice: "choices"
fewshot_split: dev
fewshot_config:
sampler: first_n
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
metadata:
version: 1.0
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment