Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
835cc40e
"...lm-evaluation-harness.git" did not exist on "5a8ac19857ad1b06220e820c339fe2602e6d3442"
Commit
835cc40e
authored
Dec 06, 2023
by
lintangsutawika
Browse files
merged latest and added altworld files
parents
8da401e0
c9bbec6e
Changes
430
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
66 additions
and
0 deletions
+66
-0
lm_eval/tasks/super_glue/wic/t5-prompt.yaml
lm_eval/tasks/super_glue/wic/t5-prompt.yaml
+2
-0
lm_eval/tasks/super_glue/wsc/default.yaml
lm_eval/tasks/super_glue/wsc/default.yaml
+2
-0
lm_eval/tasks/super_glue/wsc/t5-prompt.yaml
lm_eval/tasks/super_glue/wsc/t5-prompt.yaml
+2
-0
lm_eval/tasks/swag/swag.yaml
lm_eval/tasks/swag/swag.yaml
+2
-0
lm_eval/tasks/toxigen/toxigen.yaml
lm_eval/tasks/toxigen/toxigen.yaml
+2
-0
lm_eval/tasks/translation/wmt_common_yaml
lm_eval/tasks/translation/wmt_common_yaml
+2
-0
lm_eval/tasks/triviaqa/default.yaml
lm_eval/tasks/triviaqa/default.yaml
+2
-0
lm_eval/tasks/truthfulqa/alternative_worlds/truthfulqa_mc1.yaml
...l/tasks/truthfulqa/alternative_worlds/truthfulqa_mc1.yaml
+19
-0
lm_eval/tasks/truthfulqa/alternative_worlds/truthfulqa_mc2.yaml
...l/tasks/truthfulqa/alternative_worlds/truthfulqa_mc2.yaml
+11
-0
lm_eval/tasks/truthfulqa/truthfulqa_gen.yaml
lm_eval/tasks/truthfulqa/truthfulqa_gen.yaml
+2
-0
lm_eval/tasks/truthfulqa/truthfulqa_mc1.yaml
lm_eval/tasks/truthfulqa/truthfulqa_mc1.yaml
+2
-0
lm_eval/tasks/truthfulqa/truthfulqa_mc2.yaml
lm_eval/tasks/truthfulqa/truthfulqa_mc2.yaml
+2
-0
lm_eval/tasks/unscramble/anagrams1.yaml
lm_eval/tasks/unscramble/anagrams1.yaml
+2
-0
lm_eval/tasks/unscramble/anagrams2.yaml
lm_eval/tasks/unscramble/anagrams2.yaml
+2
-0
lm_eval/tasks/unscramble/cycle_letters.yaml
lm_eval/tasks/unscramble/cycle_letters.yaml
+2
-0
lm_eval/tasks/unscramble/random_insertion.yaml
lm_eval/tasks/unscramble/random_insertion.yaml
+2
-0
lm_eval/tasks/unscramble/reversed_words.yaml
lm_eval/tasks/unscramble/reversed_words.yaml
+2
-0
lm_eval/tasks/webqs/webqs.yaml
lm_eval/tasks/webqs/webqs.yaml
+2
-0
lm_eval/tasks/wikitext/wikitext.yaml
lm_eval/tasks/wikitext/wikitext.yaml
+2
-0
lm_eval/tasks/winogrande/default.yaml
lm_eval/tasks/winogrande/default.yaml
+2
-0
No files found.
lm_eval/tasks/super_glue/wic/t5-prompt.yaml
View file @
835cc40e
...
@@ -18,3 +18,5 @@ metric_list:
...
@@ -18,3 +18,5 @@ metric_list:
higher_is_better
:
true
higher_is_better
:
true
ignore_case
:
true
ignore_case
:
true
ignore_punctuation
:
true
ignore_punctuation
:
true
metadata
:
-
version
:
0.0
lm_eval/tasks/super_glue/wsc/default.yaml
View file @
835cc40e
...
@@ -11,3 +11,5 @@ doc_to_target: label
...
@@ -11,3 +11,5 @@ doc_to_target: label
doc_to_choice
:
[
'
no'
,
'
yes'
]
doc_to_choice
:
[
'
no'
,
'
yes'
]
metric_list
:
metric_list
:
-
metric
:
acc
-
metric
:
acc
metadata
:
-
version
:
1.0
lm_eval/tasks/super_glue/wsc/t5-prompt.yaml
View file @
835cc40e
...
@@ -19,3 +19,5 @@ filter_list:
...
@@ -19,3 +19,5 @@ filter_list:
-
name
:
"
wsc_postprocessor"
-
name
:
"
wsc_postprocessor"
filter
:
filter
:
-
function
:
!function
t5_utils.WSCPostprocess
-
function
:
!function
t5_utils.WSCPostprocess
metadata
:
-
version
:
0.0
lm_eval/tasks/swag/swag.yaml
View file @
835cc40e
...
@@ -15,3 +15,5 @@ metric_list:
...
@@ -15,3 +15,5 @@ metric_list:
-
metric
:
acc_norm
-
metric
:
acc_norm
aggregation
:
mean
aggregation
:
mean
higher_is_better
:
true
higher_is_better
:
true
metadata
:
-
version
:
1.0
lm_eval/tasks/toxigen/toxigen.yaml
View file @
835cc40e
...
@@ -14,3 +14,5 @@ metric_list:
...
@@ -14,3 +14,5 @@ metric_list:
-
metric
:
acc_norm
-
metric
:
acc_norm
aggregation
:
mean
aggregation
:
mean
higher_is_better
:
true
higher_is_better
:
true
metadata
:
-
version
:
1.0
lm_eval/tasks/translation/wmt_common_yaml
View file @
835cc40e
...
@@ -13,3 +13,5 @@ generation_kwargs:
...
@@ -13,3 +13,5 @@ generation_kwargs:
do_sample: false
do_sample: false
temperature: 0.0
temperature: 0.0
repeats: 1
repeats: 1
metadata:
- version: 0.0
lm_eval/tasks/triviaqa/default.yaml
View file @
835cc40e
...
@@ -27,3 +27,5 @@ metric_list:
...
@@ -27,3 +27,5 @@ metric_list:
higher_is_better
:
true
higher_is_better
:
true
ignore_case
:
true
ignore_case
:
true
ignore_punctuation
:
true
ignore_punctuation
:
true
metadata
:
-
version
:
2.0
lm_eval/tasks/truthfulqa/alternative_worlds/truthfulqa_mc1.yaml
0 → 100644
View file @
835cc40e
group
:
-
truthfulqa
task
:
truthfulqa_mc1
dataset_path
:
truthful_qa
dataset_name
:
multiple_choice
output_type
:
multiple_choice
training_split
:
null
validation_split
:
validation
test_split
:
null
num_fewshot
:
0
doc_to_text
:
"
{{question}}"
doc_to_target
:
0
doc_to_choice
:
"
{{mc1_targets.choices}}"
should_decontaminate
:
True
doc_to_decontamination_query
:
question
metric_list
:
-
metric
:
acc
aggregation
:
mean
higher_is_better
:
true
lm_eval/tasks/truthfulqa/alternative_worlds/truthfulqa_mc2.yaml
0 → 100644
View file @
835cc40e
include
:
truthfulqa_mc1.yaml
task
:
truthfulqa_mc2
doc_to_target
:
0
doc_to_choice
:
"
{{mc2_targets.choices}}"
process_results
:
!function
utils.process_results_mc2
should_decontaminate
:
True
doc_to_decontamination_query
:
question
metric_list
:
-
metric
:
acc
aggregation
:
mean
higher_is_better
:
true
lm_eval/tasks/truthfulqa/truthfulqa_gen.yaml
View file @
835cc40e
...
@@ -75,3 +75,5 @@ metric_list:
...
@@ -75,3 +75,5 @@ metric_list:
-
metric
:
rougeL_diff
-
metric
:
rougeL_diff
aggregation
:
mean
aggregation
:
mean
higher_is_better
:
true
higher_is_better
:
true
metadata
:
-
version
:
2.0
lm_eval/tasks/truthfulqa/truthfulqa_mc1.yaml
View file @
835cc40e
...
@@ -32,3 +32,5 @@ metric_list:
...
@@ -32,3 +32,5 @@ metric_list:
-
metric
:
acc
-
metric
:
acc
aggregation
:
mean
aggregation
:
mean
higher_is_better
:
true
higher_is_better
:
true
metadata
:
-
version
:
2.0
lm_eval/tasks/truthfulqa/truthfulqa_mc2.yaml
View file @
835cc40e
...
@@ -9,3 +9,5 @@ metric_list:
...
@@ -9,3 +9,5 @@ metric_list:
-
metric
:
acc
-
metric
:
acc
aggregation
:
mean
aggregation
:
mean
higher_is_better
:
true
higher_is_better
:
true
metadata
:
-
version
:
2.0
lm_eval/tasks/unscramble/anagrams1.yaml
View file @
835cc40e
...
@@ -16,3 +16,5 @@ metric_list:
...
@@ -16,3 +16,5 @@ metric_list:
higher_is_better
:
true
higher_is_better
:
true
ignore_case
:
false
ignore_case
:
false
ignore_punctuation
:
false
ignore_punctuation
:
false
metadata
:
-
version
:
1.0
lm_eval/tasks/unscramble/anagrams2.yaml
View file @
835cc40e
...
@@ -16,3 +16,5 @@ metric_list:
...
@@ -16,3 +16,5 @@ metric_list:
higher_is_better
:
true
higher_is_better
:
true
ignore_case
:
false
ignore_case
:
false
ignore_punctuation
:
false
ignore_punctuation
:
false
metadata
:
-
version
:
1.0
lm_eval/tasks/unscramble/cycle_letters.yaml
View file @
835cc40e
...
@@ -16,3 +16,5 @@ metric_list:
...
@@ -16,3 +16,5 @@ metric_list:
higher_is_better
:
true
higher_is_better
:
true
ignore_case
:
false
ignore_case
:
false
ignore_punctuation
:
false
ignore_punctuation
:
false
metadata
:
-
version
:
1.0
lm_eval/tasks/unscramble/random_insertion.yaml
View file @
835cc40e
...
@@ -16,3 +16,5 @@ metric_list:
...
@@ -16,3 +16,5 @@ metric_list:
higher_is_better
:
true
higher_is_better
:
true
ignore_case
:
false
ignore_case
:
false
ignore_punctuation
:
false
ignore_punctuation
:
false
metadata
:
-
version
:
1.0
lm_eval/tasks/unscramble/reversed_words.yaml
View file @
835cc40e
...
@@ -16,3 +16,5 @@ metric_list:
...
@@ -16,3 +16,5 @@ metric_list:
higher_is_better
:
true
higher_is_better
:
true
ignore_case
:
false
ignore_case
:
false
ignore_punctuation
:
false
ignore_punctuation
:
false
metadata
:
-
version
:
1.0
lm_eval/tasks/webqs/webqs.yaml
View file @
835cc40e
...
@@ -16,3 +16,5 @@ metric_list:
...
@@ -16,3 +16,5 @@ metric_list:
-
metric
:
exact_match
-
metric
:
exact_match
aggregation
:
mean
aggregation
:
mean
higher_is_better
:
true
higher_is_better
:
true
metadata
:
-
version
:
1.0
lm_eval/tasks/wikitext/wikitext.yaml
View file @
835cc40e
...
@@ -14,3 +14,5 @@ metric_list:
...
@@ -14,3 +14,5 @@ metric_list:
-
metric
:
word_perplexity
-
metric
:
word_perplexity
-
metric
:
byte_perplexity
-
metric
:
byte_perplexity
-
metric
:
bits_per_byte
-
metric
:
bits_per_byte
metadata
:
-
version
:
2.0
lm_eval/tasks/winogrande/default.yaml
View file @
835cc40e
...
@@ -13,3 +13,5 @@ metric_list:
...
@@ -13,3 +13,5 @@ metric_list:
-
metric
:
acc
-
metric
:
acc
aggregation
:
mean
aggregation
:
mean
higher_is_better
:
true
higher_is_better
:
true
metadata
:
-
version
:
1.0
Prev
1
…
17
18
19
20
21
22
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment