Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
f2c396ab
Commit
f2c396ab
authored
Oct 05, 2023
by
lintangsutawika
Browse files
pre-commit reformat
parent
20a54b3a
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
180 additions
and
173 deletions
+180
-173
lm_eval/tasks/bigbench/README.md
lm_eval/tasks/bigbench/README.md
+1
-1
lm_eval/tasks/bigbench/generate_tasks.py
lm_eval/tasks/bigbench/generate_tasks.py
+179
-172
No files found.
lm_eval/tasks/bigbench/README.md
View file @
f2c396ab
lm_eval/tasks/bigbench/generate_tasks.py
View file @
f2c396ab
...
@@ -2,179 +2,182 @@ import os
...
@@ -2,179 +2,182 @@ import os
import
yaml
import
yaml
all_subtasks
=
[
all_subtasks
=
[
'
abstract_narrative_understanding
'
,
"
abstract_narrative_understanding
"
,
'
anachronisms
'
,
"
anachronisms
"
,
'
analogical_similarity
'
,
"
analogical_similarity
"
,
'
analytic_entailment
'
,
"
analytic_entailment
"
,
'
arithmetic
'
,
"
arithmetic
"
,
'
ascii_word_recognition
'
,
"
ascii_word_recognition
"
,
'
authorship_verification
'
,
"
authorship_verification
"
,
'
auto_categorization
'
,
"
auto_categorization
"
,
'
auto_debugging
'
,
"
auto_debugging
"
,
'
bbq_lite_json
'
,
"
bbq_lite_json
"
,
'
bridging_anaphora_resolution_barqa
'
,
"
bridging_anaphora_resolution_barqa
"
,
'
causal_judgment
'
,
"
causal_judgment
"
,
'
cause_and_effect
'
,
"
cause_and_effect
"
,
'
checkmate_in_one
'
,
"
checkmate_in_one
"
,
'
chess_state_tracking
'
,
"
chess_state_tracking
"
,
'
chinese_remainder_theorem
'
,
"
chinese_remainder_theorem
"
,
'
cifar10_classification
'
,
"
cifar10_classification
"
,
'
code_line_description
'
,
"
code_line_description
"
,
'
codenames
'
,
"
codenames
"
,
'
color
'
,
"
color
"
,
'
common_morpheme
'
,
"
common_morpheme
"
,
'
conceptual_combinations
'
,
"
conceptual_combinations
"
,
'
conlang_translation
'
,
"
conlang_translation
"
,
'
contextual_parametric_knowledge_conflicts
'
,
"
contextual_parametric_knowledge_conflicts
"
,
'
crash_blossom
'
,
"
crash_blossom
"
,
'
crass_ai
'
,
"
crass_ai
"
,
'
cryobiology_spanish
'
,
"
cryobiology_spanish
"
,
'
cryptonite
'
,
"
cryptonite
"
,
'
cs_algorithms
'
,
"
cs_algorithms
"
,
'
dark_humor_detection
'
,
"
dark_humor_detection
"
,
'
date_understanding
'
,
"
date_understanding
"
,
'
disambiguation_qa
'
,
"
disambiguation_qa
"
,
'
discourse_marker_prediction
'
,
"
discourse_marker_prediction
"
,
'
disfl_qa
'
,
"
disfl_qa
"
,
'
dyck_languages
'
,
"
dyck_languages
"
,
'
elementary_math_qa
'
,
"
elementary_math_qa
"
,
'
emoji_movie
'
,
"
emoji_movie
"
,
'
emojis_emotion_prediction
'
,
"
emojis_emotion_prediction
"
,
'
empirical_judgments
'
,
"
empirical_judgments
"
,
'
english_proverbs
'
,
"
english_proverbs
"
,
'
english_russian_proverbs
'
,
"
english_russian_proverbs
"
,
'
entailed_polarity
'
,
"
entailed_polarity
"
,
'
entailed_polarity_hindi
'
,
"
entailed_polarity_hindi
"
,
'
epistemic_reasoning
'
,
"
epistemic_reasoning
"
,
'
evaluating_information_essentiality
'
,
"
evaluating_information_essentiality
"
,
'
fact_checker
'
,
"
fact_checker
"
,
'
fantasy_reasoning
'
,
"
fantasy_reasoning
"
,
'
few_shot_nlg
'
,
"
few_shot_nlg
"
,
'
figure_of_speech_detection
'
,
"
figure_of_speech_detection
"
,
'
formal_fallacies_syllogisms_negation
'
,
"
formal_fallacies_syllogisms_negation
"
,
'
gem
'
,
"
gem
"
,
'
gender_inclusive_sentences_german
'
,
"
gender_inclusive_sentences_german
"
,
'
general_knowledge
'
,
"
general_knowledge
"
,
'
geometric_shapes
'
,
"
geometric_shapes
"
,
'
goal_step_wikihow
'
,
"
goal_step_wikihow
"
,
'
gre_reading_comprehension
'
,
"
gre_reading_comprehension
"
,
'
hhh_alignment
'
,
"
hhh_alignment
"
,
'
hindi_question_answering
'
,
"
hindi_question_answering
"
,
'
hindu_knowledge
'
,
"
hindu_knowledge
"
,
'
hinglish_toxicity
'
,
"
hinglish_toxicity
"
,
'
human_organs_senses
'
,
"
human_organs_senses
"
,
'
hyperbaton
'
,
"
hyperbaton
"
,
'
identify_math_theorems
'
,
"
identify_math_theorems
"
,
'
identify_odd_metaphor
'
,
"
identify_odd_metaphor
"
,
'
implicatures
'
,
"
implicatures
"
,
'
implicit_relations
'
,
"
implicit_relations
"
,
'
intent_recognition
'
,
"
intent_recognition
"
,
'
international_phonetic_alphabet_nli
'
,
"
international_phonetic_alphabet_nli
"
,
'
international_phonetic_alphabet_transliterate
'
,
"
international_phonetic_alphabet_transliterate
"
,
'
intersect_geometry
'
,
"
intersect_geometry
"
,
'
irony_identification
'
,
"
irony_identification
"
,
'
kanji_ascii
'
,
"
kanji_ascii
"
,
'
kannada
'
,
"
kannada
"
,
'
key_value_maps
'
,
"
key_value_maps
"
,
'
known_unknowns
'
,
"
known_unknowns
"
,
'
language_games
'
,
"
language_games
"
,
'
language_identification
'
,
"
language_identification
"
,
'
linguistic_mappings
'
,
"
linguistic_mappings
"
,
'
linguistics_puzzles
'
,
"
linguistics_puzzles
"
,
'
list_functions
'
,
"
list_functions
"
,
'
logic_grid_puzzle
'
,
"
logic_grid_puzzle
"
,
'
logical_args
'
,
"
logical_args
"
,
'
logical_deduction
'
,
"
logical_deduction
"
,
'
logical_fallacy_detection
'
,
"
logical_fallacy_detection
"
,
'
logical_sequence
'
,
"
logical_sequence
"
,
'
mathematical_induction
'
,
"
mathematical_induction
"
,
'
matrixshapes
'
,
"
matrixshapes
"
,
'
metaphor_boolean
'
,
"
metaphor_boolean
"
,
'
metaphor_understanding
'
,
"
metaphor_understanding
"
,
'
minute_mysteries_qa
'
,
"
minute_mysteries_qa
"
,
'
misconceptions
'
,
"
misconceptions
"
,
'
misconceptions_russian
'
,
"
misconceptions_russian
"
,
'
mnist_ascii
'
,
"
mnist_ascii
"
,
'
modified_arithmetic
'
,
"
modified_arithmetic
"
,
'
moral_permissibility
'
,
"
moral_permissibility
"
,
'
movie_dialog_same_or_different
'
,
"
movie_dialog_same_or_different
"
,
'
movie_recommendation
'
,
"
movie_recommendation
"
,
'
mult_data_wrangling
'
,
"
mult_data_wrangling
"
,
'
multiemo
'
,
"
multiemo
"
,
'
natural_instructions
'
,
"
natural_instructions
"
,
'
navigate
'
,
"
navigate
"
,
'
nonsense_words_grammar
'
,
"
nonsense_words_grammar
"
,
'
novel_concepts
'
,
"
novel_concepts
"
,
'
object_counting
'
,
"
object_counting
"
,
'
odd_one_out
'
,
"
odd_one_out
"
,
'
operators
'
,
"
operators
"
,
'
paragraph_segmentation
'
,
"
paragraph_segmentation
"
,
'
parsinlu_qa
'
,
"
parsinlu_qa
"
,
'
parsinlu_reading_comprehension
'
,
"
parsinlu_reading_comprehension
"
,
'
penguins_in_a_table
'
,
"
penguins_in_a_table
"
,
'
periodic_elements
'
,
"
periodic_elements
"
,
'
persian_idioms
'
,
"
persian_idioms
"
,
'
phrase_relatedness
'
,
"
phrase_relatedness
"
,
'
physical_intuition
'
,
"
physical_intuition
"
,
'
physics
'
,
"
physics
"
,
'
physics_questions
'
,
"
physics_questions
"
,
'
play_dialog_same_or_different
'
,
"
play_dialog_same_or_different
"
,
'
polish_sequence_labeling
'
,
"
polish_sequence_labeling
"
,
'
presuppositions_as_nli
'
,
"
presuppositions_as_nli
"
,
'
qa_wikidata
'
,
"
qa_wikidata
"
,
'
question_selection
'
,
"
question_selection
"
,
'
real_or_fake_text
'
,
"
real_or_fake_text
"
,
'
reasoning_about_colored_objects
'
,
"
reasoning_about_colored_objects
"
,
'
repeat_copy_logic
'
,
"
repeat_copy_logic
"
,
'
rephrase
'
,
"
rephrase
"
,
'
riddle_sense
'
,
"
riddle_sense
"
,
'
ruin_names
'
,
"
ruin_names
"
,
'
salient_translation_error_detection
'
,
"
salient_translation_error_detection
"
,
'
scientific_press_release
'
,
"
scientific_press_release
"
,
'
semantic_parsing_in_context_sparc
'
,
"
semantic_parsing_in_context_sparc
"
,
'
semantic_parsing_spider
'
,
"
semantic_parsing_spider
"
,
'
sentence_ambiguity
'
,
"
sentence_ambiguity
"
,
'
similarities_abstraction
'
,
"
similarities_abstraction
"
,
'
simp_turing_concept
'
,
"
simp_turing_concept
"
,
'
simple_arithmetic_json
'
,
"
simple_arithmetic_json
"
,
'
simple_arithmetic_json_multiple_choice
'
,
"
simple_arithmetic_json_multiple_choice
"
,
'
simple_arithmetic_json_subtasks
'
,
"
simple_arithmetic_json_subtasks
"
,
'
simple_arithmetic_multiple_targets_json
'
,
"
simple_arithmetic_multiple_targets_json
"
,
'
simple_ethical_questions
'
,
"
simple_ethical_questions
"
,
'
simple_text_editing
'
,
"
simple_text_editing
"
,
'
snarks
'
,
"
snarks
"
,
'
social_iqa
'
,
"
social_iqa
"
,
'
social_support
'
,
"
social_support
"
,
'
sports_understanding
'
,
"
sports_understanding
"
,
'
strange_stories
'
,
"
strange_stories
"
,
'
strategyqa
'
,
"
strategyqa
"
,
'
sufficient_information
'
,
"
sufficient_information
"
,
'
suicide_risk
'
,
"
suicide_risk
"
,
'
swahili_english_proverbs
'
,
"
swahili_english_proverbs
"
,
'
swedish_to_german_proverbs
'
,
"
swedish_to_german_proverbs
"
,
'
symbol_interpretation
'
,
"
symbol_interpretation
"
,
'
temporal_sequences
'
,
"
temporal_sequences
"
,
'
tense
'
,
"
tense
"
,
'
timedial
'
,
"
timedial
"
,
'
topical_chat
'
,
"
topical_chat
"
,
'
tracking_shuffled_objects
'
,
"
tracking_shuffled_objects
"
,
'
understanding_fables
'
,
"
understanding_fables
"
,
'
undo_permutation
'
,
"
undo_permutation
"
,
'
unit_conversion
'
,
"
unit_conversion
"
,
'
unit_interpretation
'
,
"
unit_interpretation
"
,
'
unnatural_in_context_learning
'
,
"
unnatural_in_context_learning
"
,
'
vitaminc_fact_verification
'
,
"
vitaminc_fact_verification
"
,
'
what_is_the_tao
'
,
"
what_is_the_tao
"
,
'
which_wiki_edit
'
,
"
which_wiki_edit
"
,
'
winowhy
'
,
"
winowhy
"
,
'
word_sorting
'
,
"
word_sorting
"
,
'
word_unscrambling
'
"
word_unscrambling
"
,
]
]
def
main
()
->
None
:
def
main
()
->
None
:
for
path
,
task_type
in
zip
([
"multiple_choice"
,
"greedy_until"
],
[
"multiple_choice_template_yaml"
,
"greedy_until_template_yaml"
]):
for
path
,
task_type
in
zip
(
[
"multiple_choice"
,
"greedy_until"
],
[
"multiple_choice_template_yaml"
,
"greedy_until_template_yaml"
],
):
os
.
makedirs
(
path
,
exist_ok
=
True
)
os
.
makedirs
(
path
,
exist_ok
=
True
)
for
task
in
all_subtasks
:
for
task
in
all_subtasks
:
file_name
=
f
"
{
task
}
.yaml"
file_name
=
f
"
{
task
}
.yaml"
...
@@ -184,11 +187,15 @@ def main() -> None:
...
@@ -184,11 +187,15 @@ def main() -> None:
yaml
.
dump
(
yaml
.
dump
(
{
{
"include"
:
f
"../
{
task_type
}
"
,
"include"
:
f
"../
{
task_type
}
"
,
"task"
:
"bigbench_"
+
task
+
"_{}"
.
format
(
task_type
.
split
(
"_template_yaml"
)[
0
]),
"task"
:
"bigbench_"
"dataset_name"
:
task
+
"_zero_shot"
,
# zero-shot version of the dataset
+
task
+
"_{}"
.
format
(
task_type
.
split
(
"_template_yaml"
)[
0
]),
"dataset_name"
:
task
+
"_zero_shot"
,
# zero-shot version of the dataset
},
},
f
,
f
,
width
=
float
(
"inf"
),
allow_unicode
=
True
width
=
float
(
"inf"
),
allow_unicode
=
True
,
)
)
except
FileExistsError
:
except
FileExistsError
:
pass
pass
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment