"...lm-evaluation-harness.git" did not exist on "5a8ac19857ad1b06220e820c339fe2602e6d3442"
Commit 835cc40e authored by lintangsutawika's avatar lintangsutawika
Browse files

merged latest and added altworld files

parents 8da401e0 c9bbec6e
...@@ -18,3 +18,5 @@ metric_list: ...@@ -18,3 +18,5 @@ metric_list:
higher_is_better: true higher_is_better: true
ignore_case: true ignore_case: true
ignore_punctuation: true ignore_punctuation: true
metadata:
- version: 0.0
...@@ -11,3 +11,5 @@ doc_to_target: label ...@@ -11,3 +11,5 @@ doc_to_target: label
doc_to_choice: ['no', 'yes'] doc_to_choice: ['no', 'yes']
metric_list: metric_list:
- metric: acc - metric: acc
metadata:
- version: 1.0
...@@ -19,3 +19,5 @@ filter_list: ...@@ -19,3 +19,5 @@ filter_list:
- name: "wsc_postprocessor" - name: "wsc_postprocessor"
filter: filter:
- function: !function t5_utils.WSCPostprocess - function: !function t5_utils.WSCPostprocess
metadata:
- version: 0.0
...@@ -15,3 +15,5 @@ metric_list: ...@@ -15,3 +15,5 @@ metric_list:
- metric: acc_norm - metric: acc_norm
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
metadata:
- version: 1.0
...@@ -14,3 +14,5 @@ metric_list: ...@@ -14,3 +14,5 @@ metric_list:
- metric: acc_norm - metric: acc_norm
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
metadata:
- version: 1.0
...@@ -13,3 +13,5 @@ generation_kwargs: ...@@ -13,3 +13,5 @@ generation_kwargs:
do_sample: false do_sample: false
temperature: 0.0 temperature: 0.0
repeats: 1 repeats: 1
metadata:
- version: 0.0
...@@ -27,3 +27,5 @@ metric_list: ...@@ -27,3 +27,5 @@ metric_list:
higher_is_better: true higher_is_better: true
ignore_case: true ignore_case: true
ignore_punctuation: true ignore_punctuation: true
metadata:
- version: 2.0
group:
- truthfulqa
task: truthfulqa_mc1
dataset_path: truthful_qa
dataset_name: multiple_choice
output_type: multiple_choice
training_split: null
validation_split: validation
test_split: null
num_fewshot: 0
doc_to_text: "{{question}}"
doc_to_target: 0
doc_to_choice: "{{mc1_targets.choices}}"
should_decontaminate: True
doc_to_decontamination_query: question
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
include: truthfulqa_mc1.yaml
task: truthfulqa_mc2
doc_to_target: 0
doc_to_choice: "{{mc2_targets.choices}}"
process_results: !function utils.process_results_mc2
should_decontaminate: True
doc_to_decontamination_query: question
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
...@@ -75,3 +75,5 @@ metric_list: ...@@ -75,3 +75,5 @@ metric_list:
- metric: rougeL_diff - metric: rougeL_diff
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
metadata:
- version: 2.0
...@@ -32,3 +32,5 @@ metric_list: ...@@ -32,3 +32,5 @@ metric_list:
- metric: acc - metric: acc
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
metadata:
- version: 2.0
...@@ -9,3 +9,5 @@ metric_list: ...@@ -9,3 +9,5 @@ metric_list:
- metric: acc - metric: acc
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
metadata:
- version: 2.0
...@@ -16,3 +16,5 @@ metric_list: ...@@ -16,3 +16,5 @@ metric_list:
higher_is_better: true higher_is_better: true
ignore_case: false ignore_case: false
ignore_punctuation: false ignore_punctuation: false
metadata:
- version: 1.0
...@@ -16,3 +16,5 @@ metric_list: ...@@ -16,3 +16,5 @@ metric_list:
higher_is_better: true higher_is_better: true
ignore_case: false ignore_case: false
ignore_punctuation: false ignore_punctuation: false
metadata:
- version: 1.0
...@@ -16,3 +16,5 @@ metric_list: ...@@ -16,3 +16,5 @@ metric_list:
higher_is_better: true higher_is_better: true
ignore_case: false ignore_case: false
ignore_punctuation: false ignore_punctuation: false
metadata:
- version: 1.0
...@@ -16,3 +16,5 @@ metric_list: ...@@ -16,3 +16,5 @@ metric_list:
higher_is_better: true higher_is_better: true
ignore_case: false ignore_case: false
ignore_punctuation: false ignore_punctuation: false
metadata:
- version: 1.0
...@@ -16,3 +16,5 @@ metric_list: ...@@ -16,3 +16,5 @@ metric_list:
higher_is_better: true higher_is_better: true
ignore_case: false ignore_case: false
ignore_punctuation: false ignore_punctuation: false
metadata:
- version: 1.0
...@@ -16,3 +16,5 @@ metric_list: ...@@ -16,3 +16,5 @@ metric_list:
- metric: exact_match - metric: exact_match
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
metadata:
- version: 1.0
...@@ -14,3 +14,5 @@ metric_list: ...@@ -14,3 +14,5 @@ metric_list:
- metric: word_perplexity - metric: word_perplexity
- metric: byte_perplexity - metric: byte_perplexity
- metric: bits_per_byte - metric: bits_per_byte
metadata:
- version: 2.0
...@@ -13,3 +13,5 @@ metric_list: ...@@ -13,3 +13,5 @@ metric_list:
- metric: acc - metric: acc
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
metadata:
- version: 1.0
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment