Commit 57f08e40 authored by lintangsutawika's avatar lintangsutawika
Browse files

removed json files

parent 34fc9e68
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
{"doc_id": 0, "doc": {"sentence": "Sarah was a much better surgeon than Maria so _ always got the easier cases.", "option1": "Sarah", "option2": "Maria", "answer": "2"}, "target": "always got the easier cases.", "arguments": ["Sarah was a much better surgeon than Maria so Sarah", " always got the easier cases."], "resps": [[[-27.389801025390625, false]], [[-26.199748992919922, false]]], "filtered_resps": [[-27.389801025390625, false], [-26.199748992919922, false]], "acc": 1.0}
{"doc_id": 1, "doc": {"sentence": "Sarah was a much better surgeon than Maria so _ always got the harder cases.", "option1": "Sarah", "option2": "Maria", "answer": "1"}, "target": "always got the harder cases.", "arguments": ["Sarah was a much better surgeon than Maria so Sarah", " always got the harder cases."], "resps": [[[-26.916120529174805, false]], [[-25.329540252685547, false]]], "filtered_resps": [[-26.916120529174805, false], [-25.329540252685547, false]], "acc": 0.0}
{
"results": {
"boolq": {
"acc,none": 0.5859327217125382,
"acc_stderr,none": 0.008614932353134956
}
},
"configs": {
"boolq": {
"task": "boolq",
"group": [
"super-glue-lm-eval-v1"
],
"dataset_path": "super_glue",
"dataset_name": "boolq",
"training_split": "train",
"validation_split": "validation",
"doc_to_text": "{{passage}}\nQuestion: {{question}}\nAnswer:",
"doc_to_target": "label",
"doc_to_choice": {
"0": "no",
"1": "yes"
},
"description": "",
"target_delimiter": " ",
"fewshot_delimiter": "\n\n",
"num_fewshot": 0,
"metric_list": [
{
"metric": "acc"
}
],
"output_type": "multiple_choice",
"repeats": 1,
"should_decontaminate": false
}
},
"versions": {
"boolq": "Yaml"
},
"config": {
"model": "hf",
"model_args": "pretrained=facebook/xglm-1.7B",
"num_fewshot": 0,
"batch_size": 4,
"batch_sizes": [],
"device": null,
"use_cache": null,
"limit": null,
"bootstrap_iters": 100000
},
"git_hash": "c37ad6e"
}
\ No newline at end of file
{
"results": {
"cb": {
"acc,none": 0.5,
"acc_stderr,none": 0.5,
"f1,none": 0.2222222222222222
}
},
"configs": {
"cb": {
"task": "cb",
"group": [
"super-glue-lm-eval-v1"
],
"dataset_path": "super_glue",
"dataset_name": "cb",
"training_split": "train",
"validation_split": "validation",
"doc_to_text": "{{premise}}\nQuestion: {{hypothesis}}. True, False, or Neither?\nAnswer:",
"doc_to_target": "label",
"doc_to_choice": [
"True",
"False",
"Neither"
],
"description": "",
"target_delimiter": " ",
"fewshot_delimiter": "\n\n",
"num_fewshot": 4,
"metric_list": [
{
"metric": "acc"
},
{
"metric": "f1",
"aggregation": "<function cb_multi_fi at 0x7f3212743d30>"
}
],
"output_type": "multiple_choice",
"repeats": 1,
"should_decontaminate": false
}
},
"versions": {
"cb": "Yaml"
},
"config": {
"model": "hf",
"model_args": "",
"num_fewshot": 4,
"batch_size": 1,
"batch_sizes": [],
"device": null,
"use_cache": null,
"limit": 2.0,
"bootstrap_iters": 100000
},
"git_hash": "656c310"
}
\ No newline at end of file
{
"results": {
"boolq-seq2seq": {
"exact_match,none": 0.0,
"exact_match_stderr,none": 0.0
}
},
"configs": {
"boolq-seq2seq": {
"task": "boolq-seq2seq",
"group": [
"super-glue-lm-eval-v1-seq2seq"
],
"dataset_path": "super_glue",
"dataset_name": "boolq",
"training_split": "train",
"validation_split": "validation",
"doc_to_text": "{{passage}}\nQuestion: {{question}}\nAnswer:",
"doc_to_target": "{{['no', 'yes'][label]}}",
"description": "",
"target_delimiter": " ",
"fewshot_delimiter": "\n\n",
"num_fewshot": 4,
"metric_list": [
{
"metric": "exact_match",
"aggregation": "mean",
"higher_is_better": true,
"ignore_case": true,
"ignore_punctuation": true
}
],
"output_type": "greedy_until",
"generation_kwargs": {
"until": [
"\n\n",
"\n"
],
"do_sample": false,
"temperature": 0.0
},
"repeats": 1,
"should_decontaminate": false
}
},
"versions": {
"boolq-seq2seq": "Yaml"
},
"config": {
"model": "hf",
"model_args": "",
"num_fewshot": 4,
"batch_size": 1,
"batch_sizes": [],
"device": "cuda",
"use_cache": null,
"limit": 16.0,
"bootstrap_iters": 100000
},
"git_hash": "ae41f67"
}
\ No newline at end of file
{"doc_id": 0, "doc": {"question": "Who was President when the first Peanuts cartoon was published?", "question_id": "tc_0", "question_source": "http://www.triviacountry.com/", "entity_pages": {"doc_source": [], "filename": [], "title": [], "wiki_context": []}, "search_results": {"description": [], "filename": [], "rank": [], "title": [], "url": [], "search_context": []}, "answer": {"aliases": ["33rd President of the United States", "H. S. Truman", "H. Truman", "H.S. Truman", "HST (president)", "Harold Truman", "Harry S Truman", "Harry S. Truman", "Harry S.Truman", "Harry Shipp Truman", "Harry Shippe Truman", "Harry Solomon Truman", "Harry Truman", "Harry Truman's", "Harry truman", "Hary truman", "Mary Jane Truman", "Mr. Citizen", "Presidency of Harry S. Truman", "Presidency of Harry Truman", "President Harry Truman", "President Truman", "S truman", "Truman Administration", "Truman administration"], "normalized_aliases": ["presidency of harry s truman", "33rd president of united states", "truman administration", "s truman", "mr citizen", "harry truman s", "harry truman", "hary truman", "harry shipp truman", "h truman", "harry shippe truman", "h s truman", "president truman", "president harry truman", "hst president", "presidency of harry truman", "mary jane truman", "harry solomon truman", "harold truman", "harry s truman"], "matched_wiki_entity_name": "", "normalized_matched_wiki_entity_name": "", "normalized_value": "harry truman", "type": "WikipediaEntity", "value": "Harry Truman"}}, "target": " Harry Truman", "arguments": ["Q:Who was President when the first Peanuts cartoon was published? A:", {"do_sample": false, "temperature": 0.0}], "resps": [[" President"]], "filtered_resps": [" President"], "acc": 0, "f1": "tc_0"}
{"doc_id": 1, "doc": {"question": "Which American-born Sinclair won the Nobel Prize for Literature in 1930?", "question_id": "tc_1", "question_source": "http://www.triviacountry.com/", "entity_pages": {"doc_source": [], "filename": [], "title": [], "wiki_context": []}, "search_results": {"description": [], "filename": [], "rank": [], "title": [], "url": [], "search_context": []}, "answer": {"aliases": ["(Harry) Sinclair Lewis", "Grace Hegger", "Harry Sinclair Lewis", "Lewis, (Harry) Sinclair", "Sinclair Lewis"], "normalized_aliases": ["grace hegger", "lewis harry sinclair", "sinclair lewis", "harry sinclair lewis"], "matched_wiki_entity_name": "", "normalized_matched_wiki_entity_name": "", "normalized_value": "sinclair lewis", "type": "WikipediaEntity", "value": "Sinclair Lewis"}}, "target": " Sinclair Lewis", "arguments": ["Q:Which American-born Sinclair won the Nobel Prize for Literature in 1930? A:", {"do_sample": false, "temperature": 0.0}], "resps": [[" Sinclair"]], "filtered_resps": [" Sinclair"], "acc": 0, "f1": "tc_1"}
{"doc_id": 0, "doc": {"sentence": "Sarah was a much better surgeon than Maria so _ always got the easier cases.", "option1": "Sarah", "option2": "Maria", "answer": "2"}, "target": "always got the easier cases.", "arguments": ["Sarah was a much better surgeon than Maria so Sarah", " always got the easier cases."], "resps": [[[-15.5546875, false]], [[-15.78125, false]]], "filtered_resps": [[-15.5546875, false], [-15.78125, false]], "acc": 0.0}
{"doc_id": 1, "doc": {"sentence": "Sarah was a much better surgeon than Maria so _ always got the harder cases.", "option1": "Sarah", "option2": "Maria", "answer": "1"}, "target": "always got the harder cases.", "arguments": ["Sarah was a much better surgeon than Maria so Sarah", " always got the harder cases."], "resps": [[[-17.328125, false]], [[-17.21875, false]]], "filtered_resps": [[-17.328125, false], [-17.21875, false]], "acc": 0.0}
{"doc_id": 2, "doc": {"sentence": "They were worried the wine would ruin the bed and the blanket, but the _ was't ruined.", "option1": "blanket", "option2": "bed", "answer": "2"}, "target": "was't ruined.", "arguments": ["They were worried the wine would ruin the bed and the blanket, but the blanket", " was't ruined."], "resps": [[[-17.5625, false]], [[-18.171875, false]]], "filtered_resps": [[-17.5625, false], [-18.171875, false]], "acc": 0.0}
{"doc_id": 3, "doc": {"sentence": "Terry tried to bake the eggplant in the toaster oven but the _ was too big.", "option1": "eggplant", "option2": "toaster", "answer": "1"}, "target": "was too big.", "arguments": ["Terry tried to bake the eggplant in the toaster oven but the eggplant", " was too big."], "resps": [[[-4.9296875, true]], [[-7.11328125, false]]], "filtered_resps": [[-4.9296875, true], [-7.11328125, false]], "acc": 1.0}
This diff is collapsed.
This diff is collapsed.
{
"results": {
"winogrande": {
"acc,none": 0.5,
"acc_stderr,none": 0.5
}
},
"configs": {
"winogrande": {
"task": "winogrande",
"dataset_path": "winogrande",
"dataset_name": "winogrande_xl",
"training_split": "train",
"validation_split": "validation",
"doc_to_text": "<function doc_to_text at 0x7efcf0c7b1f0>",
"doc_to_target": "<function doc_to_target at 0x7efcf0c7be50>",
"gold_alias": "<function gold_alias at 0x7efcf0aaf1f0>",
"description": "",
"target_delimiter": " ",
"fewshot_delimiter": "\n\n",
"num_fewshot": 0,
"metric_list": [
{
"metric": "acc",
"aggregation": "mean",
"higher_is_better": true
}
],
"output_type": "multiple_choice",
"repeats": 1,
"should_decontaminate": false
}
},
"versions": {
"winogrande": "Yaml"
},
"config": {
"model": "hf",
"model_args": "",
"num_fewshot": 0,
"batch_size": 1,
"batch_sizes": [],
"device": null,
"use_cache": null,
"limit": 2.0,
"bootstrap_iters": 100000
},
"git_hash": "656c310"
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment