Commit 06d3406e authored by lintangsutawika's avatar lintangsutawika
Browse files

update

parent f23ae748
# Generated by _generate_configs.py
dataset_name: multistep_arithmetic_two
include: _template_yaml
task: bbh_multistep_arithmetic_two
# Generated by _generate_configs.py
dataset_name: navigate
include: _template_yaml
task: bbh_navigate
# Generated by _generate_configs.py
dataset_name: object_counting
include: _template_yaml
task: bbh_object_counting
# Generated by _generate_configs.py
dataset_name: penguins_in_a_table
include: _template_yaml
task: bbh_penguins_in_a_table
# Generated by _generate_configs.py
dataset_name: reasoning_about_colored_objects
include: _template_yaml
task: bbh_reasoning_about_colored_objects
# Generated by _generate_configs.py
dataset_name: ruin_names
include: _template_yaml
task: bbh_ruin_names
# Generated by _generate_configs.py
dataset_name: salient_translation_error_detection
include: _template_yaml
task: bbh_salient_translation_error_detection
# Generated by _generate_configs.py
dataset_name: snarks
include: _template_yaml
task: bbh_snarks
# Generated by _generate_configs.py
dataset_name: sports_understanding
include: _template_yaml
task: bbh_sports_understanding
# Generated by _generate_configs.py
dataset_name: temporal_sequences
include: _template_yaml
task: bbh_temporal_sequences
# Generated by _generate_configs.py
dataset_name: tracking_shuffled_objects_five_objects
include: _template_yaml
task: bbh_tracking_shuffled_objects_five_objects
# Generated by _generate_configs.py
dataset_name: tracking_shuffled_objects_seven_objects
include: _template_yaml
task: bbh_tracking_shuffled_objects_seven_objects
# Generated by _generate_configs.py
dataset_name: tracking_shuffled_objects_three_objects
include: _template_yaml
task: bbh_tracking_shuffled_objects_three_objects
# Generated by _generate_configs.py
dataset_name: web_of_lies
include: _template_yaml
task: bbh_web_of_lies
# Generated by _generate_configs.py
dataset_name: word_sorting
include: _template_yaml
task: bbh_word_sorting
......@@ -115,4 +115,4 @@ if __name__ == "__main__":
file_save_path = args.save_prefix_path + f"_{subject}.yaml"
eval_logger.info(f"Saving yaml for subset {subject} to {file_save_path}")
with open(file_save_path, "w") as yaml_file:
yaml.dump(yaml_dict, yaml_file)
yaml.dump(yaml_dict, yaml_file, width=float("inf"))
group: mmlu_flan
dataset_path: cais/mmlu
validation_split: validation
# validation_split: validation
test_split: test
fewshot_split: dev
doc_to_text: "Q: {{question.strip()}}\n(A) {{choices[0]}} (B) {{choices[1]}} (C) {{choices[2]}} (D) {{choices[3]}}\nA:"
# doc_to_text: "Q: {{question.strip()}}\n(A) {{choices[0]}} (B) {{choices[1]}} (C) {{choices[2]}} (D) {{choices[3]}}\nA: "
doc_to_text: "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:"
output_type: greedy_until
doc_to_target: "{{['(A)', '(B)', '(C)', '(D)'][answer]}}"
# doc_to_target: "{{['(A)', '(B)', '(C)', '(D)'][answer]}}"
doc_to_target: "{{['A', 'B', 'C', 'D'][answer]}}"
metric_list:
- metric: exact_match
aggregation: mean
higher_is_better: true
ignore_case: true
ignore_punctuation: true
# ignore_case: true
# ignore_punctuation: true
generation_kwargs:
until:
- "</s>"
do_sample: false
temperature: 0.0
\ No newline at end of file
# do_sample: false
# temperature: 0.0
\ No newline at end of file
group: mmlu_flan_loglikelihood
dataset_path: cais/mmlu
validation_split: validation
# validation_split: validation
test_split: test
fewshot_split: dev
doc_to_text: "Q: {{question.strip()}}\n(A) {{choices[0]}} (B) {{choices[1]}} (C) {{choices[2]}} (D) {{choices[3]}}\nA:"
output_type: multiple_choice
doc_to_choice: ['(A)', '(B)', '(C)', '(D)']
doc_to_text: "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:"
doc_to_choice: ["A", "B", "C", "D"]
doc_to_target: answer
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
\ No newline at end of file
dataset_name: abstract_algebra
description: 'The following are multiple choice questions (with answers) about abstract
algebra.
description: 'The following are multiple choice questions (with answers) about abstract algebra.
'
......
dataset_name: business_ethics
description: 'The following are multiple choice questions (with answers) about business
ethics.
description: 'The following are multiple choice questions (with answers) about business ethics.
'
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment