Commit 66421b57 authored by lintangsutawika's avatar lintangsutawika
Browse files

add prompt variation

parent 55eff889
| Task | Prompt Variation | Output Variation | Option in Sample |
| :-----------------:| :---------------: | :---------------: |:---------------: |
| boolean_expression | Yes | Yes | No |
| causal_judgement | Yes | Yes | Yes |
| date_understanding | Yes | Yes | Yes |
| disambiguation_qa | Yes | Yes | Yes |
| dyck_languages | Yes | No | No |
| formal_fallacies | Yes | Yes | Yes |
| geometric_shapes | Yes | Yes | Yes |
| hyperbaton | Yes | Yes | Yes |
| logical_deduction_five_objects| Yes | Yes | Yes |
| logical_deduction_seven_objects| Yes | Yes | Yes |
| logical_deduction_three_objects| Yes | Yes | Yes |
| movie_recommendation| Yes | Yes | Yes |
| multistep_arithmetic_two| Yes | No | No |
| navigate | Yes | Yes | Yes |
| object_counting | Yes | No | No |
| penguins_in_a_table| Yes | Yes | Yes |
| reasoning_about_colored_objects| Yes | Yes | Yes |
| ruin_names | Yes | Yes | Yes |
| salient_translation_error_detection| Yes| Yes | Yes |
| snarks | Yes | Yes | Yes |
| sports_understanding| Yes | Yes | No |
| temporal_sequences | Yes | Yes | Yes |
| tracking_shuffled_objects_five_objects| Yes| Yes | Yes |
| tracking_shuffled_objects_seven_objects| Yes| Yes | Yes |
| tracking_shuffled_objects_three_objects| Yes| Yes | Yes |
| web_of_lies | Yes | Yes | No |
| word_sorting | Yes | No | No |
Notes:
- `web_of_lies` already starts with `Question: `
- Tasks with options are `Options: (A) ...` (multiple choice) or `Options: - ...` (binary choice)
\ No newline at end of file
group: bbh_alt_pv_zeroshot
task:
- bbh_alt_pv_01_zeroshot
- bbh_alt_pv_02_zeroshot
- bbh_alt_pv_03_zeroshot
group: bbh_alt_pv_01_zeroshot
dataset_path: lukaemon/bbh
output_type: multiple_choice
test_split: test
doc_to_text: !function ../../styles.styles_01
doc_to_target: !function ../../styles.doc_to_target
doc_to_choice: !function ../../styles.doc_to_choice
num_fewshot: 0
metric_list:
- metric: acc
- metric: acc_norm
- metric: brier_score
\ No newline at end of file
"dataset_name": "boolean_expressions"
"description": "Evaluate the result of a random Boolean expression.\n\n"
"include": "_zeroshot_template_yaml"
"task": "bbh_alt_pv_01_zeroshot_boolean_expressions"
"doc_to_target": target
"doc_to_choice": ["True", "False"]
"dataset_name": "causal_judgement"
"description": "Answer questions about causal attribution.\n\n"
"include": "_zeroshot_template_yaml"
"task": "bbh_alt_pv_01_zeroshot_causal_judgement"
"dataset_name": "date_understanding"
"description": "Infer the date from context.\n\n"
"include": "_zeroshot_template_yaml"
"task": "bbh_alt_pv_01_zeroshot_date_understanding"
"dataset_name": "disambiguation_qa"
"description": "Clarify the meaning of sentences with ambiguous pronouns.\n\n"
"include": "_zeroshot_template_yaml"
"task": "bbh_alt_pv_01_zeroshot_disambiguation_qa"
"dataset_name": "formal_fallacies"
"description": "Distinguish deductively valid arguments from formal fallacies.\n\n"
"include": "_zeroshot_template_yaml"
"task": "bbh_alt_pv_01_zeroshot_formal_fallacies"
"doc_to_target": target
"doc_to_choice": ["valid", "invalid"]
\ No newline at end of file
"dataset_name": "geometric_shapes"
"description": "Name geometric shapes from their SVG paths.\n\n"
"include": "_zeroshot_template_yaml"
"task": "bbh_alt_pv_01_zeroshot_geometric_shapes"
"dataset_name": "hyperbaton"
"description": "Order adjectives correctly in English sentences.\n\n"
"include": "_zeroshot_template_yaml"
"task": "bbh_alt_pv_01_zeroshot_hyperbaton"
"dataset_name": "logical_deduction_five_objects"
"description": "A logical deduction task which requires deducing the order of a sequence of objects.\n\n"
"include": "_zeroshot_template_yaml"
"task": "bbh_alt_pv_01_zeroshot_logical_deduction_five_objects"
"dataset_name": "logical_deduction_seven_objects"
"description": "A logical deduction task which requires deducing the order of a sequence of objects.\n\n"
"include": "_zeroshot_template_yaml"
"task": "bbh_alt_pv_01_zeroshot_logical_deduction_seven_objects"
"dataset_name": "logical_deduction_three_objects"
"description": "A logical deduction task which requires deducing the order of a sequence of objects.\n\n"
"include": "_zeroshot_template_yaml"
"task": "bbh_alt_pv_01_zeroshot_logical_deduction_three_objects"
"dataset_name": "movie_recommendation"
"description": "Recommend movies similar to the given list of movies.\n\n"
"include": "_zeroshot_template_yaml"
"task": "bbh_alt_pv_01_zeroshot_movie_recommendation"
"process_docs": !function ../../utils.fix_movie_recommendation
\ No newline at end of file
"dataset_name": "navigate"
"description": "Given a series of navigation instructions, determine whether one would end up back at the starting point.\n\n"
"include": "_zeroshot_template_yaml"
"task": "bbh_alt_pv_01_zeroshot_navigate"
"dataset_name": "penguins_in_a_table"
"description": "Answer questions about a table of penguins and their attributes.\n\n"
"include": "_zeroshot_template_yaml"
"task": "bbh_alt_pv_01_zeroshot_penguins_in_a_table"
"dataset_name": "reasoning_about_colored_objects"
"description": "Answer extremely simple questions about the colors of objects on a surface.\n\n"
"include": "_zeroshot_template_yaml"
"task": "bbh_alt_pv_01_zeroshot_reasoning_about_colored_objects"
"dataset_name": "ruin_names"
"description": "Select the humorous edit that 'ruins' the input movie or musical artist name.\n\n"
"include": "_zeroshot_template_yaml"
"task": "bbh_alt_pv_01_zeroshot_ruin_names"
"process_docs": !function ../../utils.fix_ruin_names
\ No newline at end of file
"dataset_name": "salient_translation_error_detection"
"description": "Detect the type of error in an English translation of a German source sentence.\n\n"
"include": "_zeroshot_template_yaml"
"task": "bbh_alt_pv_01_zeroshot_salient_translation_error_detection"
"dataset_name": "snarks"
"description": "Determine which of two sentences is sarcastic.\n\nAccording to Cambridge University Dictionary, sarcasm is \"the use of remarks that clearly mean the opposite of what they say, made in order to hurt someone's feelings or to criticize something in a humorous way.\" Sarcastic sentences often contain satirical or ironic utterances, hyperboles, ambivalent or witty remarks.\n\n"
"include": "_zeroshot_template_yaml"
"task": "bbh_alt_pv_01_zeroshot_snarks"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment