"docs/source/compression/index_zh.rst" did not exist on "2e84b445125aa2365eb5e79c94287d869db3366d"
Unverified Commit 8ad598df authored by Jungwhan Kim's avatar Jungwhan Kim Committed by GitHub
Browse files

keep new line for task description (#2116)



* add keep trailing newline

* apply ruff-format

* add prompt unit test

* increment the version of tasks that have description with whitespace

* remove white spaces of leaderboard bbh

* update MMLU expected versions in output

* CI run does display the expected version=1 for mmlu subtasks, fix expected test output again

---------
Co-authored-by: default avatarhaileyschoelkopf <hailey@eleuther.ai>
parent 0571eeb1
...@@ -13,4 +13,4 @@ num_fewshot: 3 ...@@ -13,4 +13,4 @@ num_fewshot: 3
fewshot_config: fewshot_config:
sampler: first_n sampler: first_n
metadata: metadata:
version: 0.0 version: 1.0
dataset_name: boolean_expressions dataset_name: boolean_expressions
description: 'Evaluate the result of a random Boolean expression. description: 'Evaluate the result of a random Boolean expression.'
'
doc_to_choice: ["False", "True"] doc_to_choice: ["False", "True"]
fewshot_config: fewshot_config:
sampler: first_n sampler: first_n
......
dataset_name: causal_judgement dataset_name: causal_judgement
description: 'Answer questions about causal attribution. description: 'Answer questions about causal attribution.'
'
doc_to_choice: ["Yes", "No"] doc_to_choice: ["Yes", "No"]
fewshot_config: fewshot_config:
sampler: first_n sampler: first_n
......
dataset_name: date_understanding dataset_name: date_understanding
description: 'Infer the date from context. description: 'Infer the date from context.'
'
doc_to_choice: ["(A)", "(B)", "(C)", "(D)", "(E)", "(F)"] doc_to_choice: ["(A)", "(B)", "(C)", "(D)", "(E)", "(F)"]
fewshot_config: fewshot_config:
sampler: first_n sampler: first_n
......
dataset_name: disambiguation_qa dataset_name: disambiguation_qa
description: 'Clarify the meaning of sentences with ambiguous pronouns. description: 'Clarify the meaning of sentences with ambiguous pronouns.'
'
doc_to_choice: ["(A)", "(B)", "(C)"] doc_to_choice: ["(A)", "(B)", "(C)"]
fewshot_config: fewshot_config:
sampler: first_n sampler: first_n
......
dataset_name: formal_fallacies dataset_name: formal_fallacies
description: 'Distinguish deductively valid arguments from formal fallacies. description: 'Distinguish deductively valid arguments from formal fallacies.'
'
doc_to_choice: ["valid", "invalid"] doc_to_choice: ["valid", "invalid"]
fewshot_config: fewshot_config:
sampler: first_n sampler: first_n
......
dataset_name: geometric_shapes dataset_name: geometric_shapes
description: 'Name geometric shapes from their SVG paths. description: 'Name geometric shapes from their SVG paths.'
'
doc_to_choice: ["(A)","(B)","(C)","(D)","(E)","(F)","(G)","(H)","(I)","(J)","(K)"] doc_to_choice: ["(A)","(B)","(C)","(D)","(E)","(F)","(G)","(H)","(I)","(J)","(K)"]
fewshot_config: fewshot_config:
sampler: first_n sampler: first_n
......
dataset_name: hyperbaton dataset_name: hyperbaton
description: 'Order adjectives correctly in English sentences. description: 'Order adjectives correctly in English sentences.'
'
doc_to_choice: ["(A)", "(B)"] doc_to_choice: ["(A)", "(B)"]
fewshot_config: fewshot_config:
sampler: first_n sampler: first_n
......
dataset_name: logical_deduction_five_objects dataset_name: logical_deduction_five_objects
description: 'A logical deduction task which requires deducing the order of a sequence description: 'A logical deduction task which requires deducing the order of a sequence
of objects. of objects.'
'
doc_to_choice: ["(A)","(B)","(C)","(D)","(E)"] doc_to_choice: ["(A)","(B)","(C)","(D)","(E)"]
fewshot_config: fewshot_config:
sampler: first_n sampler: first_n
......
dataset_name: logical_deduction_seven_objects dataset_name: logical_deduction_seven_objects
description: 'A logical deduction task which requires deducing the order of a sequence description: 'A logical deduction task which requires deducing the order of a sequence
of objects. of objects.'
'
doc_to_choice: ["(A)","(B)","(C)","(D)","(E)","(F)","(G)"] doc_to_choice: ["(A)","(B)","(C)","(D)","(E)","(F)","(G)"]
fewshot_config: fewshot_config:
sampler: first_n sampler: first_n
......
dataset_name: logical_deduction_three_objects dataset_name: logical_deduction_three_objects
description: 'A logical deduction task which requires deducing the order of a sequence description: 'A logical deduction task which requires deducing the order of a sequence
of objects. of objects.'
'
doc_to_choice: ["(A)","(B)","(C)"] doc_to_choice: ["(A)","(B)","(C)"]
fewshot_config: fewshot_config:
sampler: first_n sampler: first_n
......
dataset_name: movie_recommendation dataset_name: movie_recommendation
description: 'Recommend movies similar to the given list of movies. description: 'Recommend movies similar to the given list of movies.'
'
doc_to_choice: ["(A)","(B)","(C)","(D)","(E)","(F)"] doc_to_choice: ["(A)","(B)","(C)","(D)","(E)","(F)"]
fewshot_config: fewshot_config:
sampler: first_n sampler: first_n
......
dataset_name: navigate dataset_name: navigate
description: 'Given a series of navigation instructions, determine whether one would description: 'Given a series of navigation instructions, determine whether one would
end up back at the starting point. end up back at the starting point.'
'
doc_to_choice: ["Yes","No"] doc_to_choice: ["Yes","No"]
fewshot_config: fewshot_config:
sampler: first_n sampler: first_n
......
dataset_name: object_counting dataset_name: object_counting
description: 'Questions that involve enumerating objects and asking the model to count description: 'Questions that involve enumerating objects and asking the model to count
them. them.'
'
doc_to_choice: ["0","1","2","3","4","5","6","7","8","9","10", "11", "12", "13", "14", "15", "16", "17", "18"] doc_to_choice: ["0","1","2","3","4","5","6","7","8","9","10", "11", "12", "13", "14", "15", "16", "17", "18"]
fewshot_config: fewshot_config:
sampler: first_n sampler: first_n
......
dataset_name: penguins_in_a_table dataset_name: penguins_in_a_table
description: 'Answer questions about a table of penguins and their attributes. description: 'Answer questions about a table of penguins and their attributes.'
'
doc_to_choice: ["(A)","(B)","(C)","(D)","(E)"] doc_to_choice: ["(A)","(B)","(C)","(D)","(E)"]
fewshot_config: fewshot_config:
sampler: first_n sampler: first_n
......
dataset_name: reasoning_about_colored_objects dataset_name: reasoning_about_colored_objects
description: 'Answer extremely simple questions about the colors of objects on a surface. description: 'Answer extremely simple questions about the colors of objects on a surface.'
'
doc_to_choice: ["(A)","(B)","(C)","(D)","(E)","(F)","(G)","(H)","(I)","(J)","(K)","(L)","(M)","(N)","(O)","(P)","(Q)","(R)"] doc_to_choice: ["(A)","(B)","(C)","(D)","(E)","(F)","(G)","(H)","(I)","(J)","(K)","(L)","(M)","(N)","(O)","(P)","(Q)","(R)"]
fewshot_config: fewshot_config:
sampler: first_n sampler: first_n
......
dataset_name: ruin_names dataset_name: ruin_names
description: 'Select the humorous edit that ''ruins'' the input movie or musical artist description: 'Select the humorous edit that ''ruins'' the input movie or musical artist
name. name.'
'
doc_to_choice: ["(A)","(B)","(C)","(D)","(E)","(F)"] doc_to_choice: ["(A)","(B)","(C)","(D)","(E)","(F)"]
fewshot_config: fewshot_config:
sampler: first_n sampler: first_n
......
dataset_name: salient_translation_error_detection dataset_name: salient_translation_error_detection
description: 'Detect the type of error in an English translation of a German source description: 'Detect the type of error in an English translation of a German source
sentence. sentence.'
'
doc_to_choice: ["(A)","(B)","(C)","(D)","(E)","(F)"] doc_to_choice: ["(A)","(B)","(C)","(D)","(E)","(F)"]
fewshot_config: fewshot_config:
sampler: first_n sampler: first_n
......
...@@ -5,10 +5,7 @@ description: 'Determine which of two sentences is sarcastic. ...@@ -5,10 +5,7 @@ description: 'Determine which of two sentences is sarcastic.
According to Cambridge University Dictionary, sarcasm is "the use of remarks that According to Cambridge University Dictionary, sarcasm is "the use of remarks that
clearly mean the opposite of what they say, made in order to hurt someone''s feelings clearly mean the opposite of what they say, made in order to hurt someone''s feelings
or to criticize something in a humorous way." Sarcastic sentences often contain or to criticize something in a humorous way." Sarcastic sentences often contain
satirical or ironic utterances, hyperboles, ambivalent or witty remarks. satirical or ironic utterances, hyperboles, ambivalent or witty remarks.'
'
doc_to_choice: ["(A)","(B)"] doc_to_choice: ["(A)","(B)"]
fewshot_config: fewshot_config:
sampler: first_n sampler: first_n
......
dataset_name: sports_understanding dataset_name: sports_understanding
description: 'Determine whether an artificially constructed sentence relating to sports description: 'Determine whether an artificially constructed sentence relating to sports
is plausible or not. is plausible or not.'
'
doc_to_choice: ["yes","no"] doc_to_choice: ["yes","no"]
fewshot_config: fewshot_config:
sampler: first_n sampler: first_n
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment