Commit ba73d131 authored by lintangsutawika's avatar lintangsutawika
Browse files

Merge branch 'group-agg-rework' of...

Merge branch 'group-agg-rework' of https://github.com/EleutherAI/lm-evaluation-harness into group-agg-rework
parents 6e2dbe76 269b66e9
group: leaderboard_bbh
task:
- leaderboard_bbh_boolean_expressions
- leaderboard_bbh_causal_judgement
- leaderboard_bbh_date_understanding
- leaderboard_bbh_disambiguation_qa
- leaderboard_bbh_formal_fallacies
- leaderboard_bbh_geometric_shapes
- leaderboard_bbh_hyperbaton
- leaderboard_bbh_logical_deduction_five_objects
- leaderboard_bbh_logical_deduction_seven_objects
- leaderboard_bbh_logical_deduction_three_objects
- leaderboard_bbh_movie_recommendation
- leaderboard_bbh_navigate
- leaderboard_bbh_object_counting
- leaderboard_bbh_penguins_in_a_table
- leaderboard_bbh_reasoning_about_colored_objects
- leaderboard_bbh_ruin_names
- leaderboard_bbh_salient_translation_error_detection
- leaderboard_bbh_snarks
- leaderboard_bbh_sports_understanding
- leaderboard_bbh_temporal_sequences
- leaderboard_bbh_tracking_shuffled_objects_five_objects
- leaderboard_bbh_tracking_shuffled_objects_seven_objects
- leaderboard_bbh_tracking_shuffled_objects_three_objects
- leaderboard_bbh_web_of_lies
group: leaderboard_gpqa
task:
- leaderboard_gpqa_diamond
- leaderboard_gpqa_extended
- leaderboard_gpqa_main
dataset_path: Idavidrein/gpqa dataset_path: Idavidrein/gpqa
group: leaderboard_gpqa
output_type: multiple_choice output_type: multiple_choice
process_docs: !function utils.process_docs process_docs: !function utils.process_docs
training_split: train training_split: train
......
group: leaderboard_instruction_following
task:
- leaderboard_ifeval
task: leaderboard_ifeval task: leaderboard_ifeval
group: leaderboard_instruction_following
dataset_path: wis-k/instruction-following-eval dataset_path: wis-k/instruction-following-eval
dataset_name: null dataset_name: null
output_type: generate_until output_type: generate_until
......
group: leaderboard_math_hard
task:
- leaderboard_math_algebra_hard
- leaderboard_math_counting_and_prob_hard
- leaderboard_math_geometry_hard
- leaderboard_math_intermediate_algebra_hard
- leaderboard_math_num_theory_hard
- leaderboard_math_prealgebra_hard
- leaderboard_math_precalculus_hard
group:
- leaderboard_math_hard
dataset_path: lighteval/MATH-Hard dataset_path: lighteval/MATH-Hard
process_docs: !function utils.process_docs process_docs: !function utils.process_docs
output_type: generate_until output_type: generate_until
......
group: leaderboard_musr
task:
- leaderboard_musr_murder_mysteries
- leaderboard_musr_object_placements
- leaderboard_musr_team_allocation
group:
- leaderboard_musr
dataset_path: TAUR-Lab/MuSR dataset_path: TAUR-Lab/MuSR
output_type: multiple_choice output_type: multiple_choice
doc_to_text: !function utils.doc_to_text doc_to_text: !function utils.doc_to_text
......
group: tag:
- paloma - paloma
dataset_path: allenai/paloma dataset_path: allenai/paloma
output_type: loglikelihood_rolling output_type: loglikelihood_rolling
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment