"vscode:/vscode.git/clone" did not exist on "6a77a3d396f096e8b44743a864a3e0e934d6cb12"
Commit 52f75f0e authored by lintangsutawika's avatar lintangsutawika
Browse files

Merge branch 'big-refactor' of https://github.com/EleutherAI/lm-evaluation-harness into versioning

parents 331d7c51 b072bb0d
# Generated by utils.py # Generated by utils.py
dataset_name: tough_vs_raising_1 dataset_name: tough_vs_raising_1
include: template_yaml include: _template_yaml
task: blimp_tough_vs_raising_1 task: blimp_tough_vs_raising_1
# Generated by utils.py # Generated by utils.py
dataset_name: tough_vs_raising_2 dataset_name: tough_vs_raising_2
include: template_yaml include: _template_yaml
task: blimp_tough_vs_raising_2 task: blimp_tough_vs_raising_2
# Generated by utils.py # Generated by utils.py
dataset_name: transitive dataset_name: transitive
include: template_yaml include: _template_yaml
task: blimp_transitive task: blimp_transitive
# Generated by utils.py # Generated by utils.py
dataset_name: wh_island dataset_name: wh_island
include: template_yaml include: _template_yaml
task: blimp_wh_island task: blimp_wh_island
# Generated by utils.py # Generated by utils.py
dataset_name: wh_questions_object_gap dataset_name: wh_questions_object_gap
include: template_yaml include: _template_yaml
task: blimp_wh_questions_object_gap task: blimp_wh_questions_object_gap
# Generated by utils.py # Generated by utils.py
dataset_name: wh_questions_subject_gap dataset_name: wh_questions_subject_gap
include: template_yaml include: _template_yaml
task: blimp_wh_questions_subject_gap task: blimp_wh_questions_subject_gap
# Generated by utils.py # Generated by utils.py
dataset_name: wh_questions_subject_gap_long_distance dataset_name: wh_questions_subject_gap_long_distance
include: template_yaml include: _template_yaml
task: blimp_wh_questions_subject_gap_long_distance task: blimp_wh_questions_subject_gap_long_distance
# Generated by utils.py # Generated by utils.py
dataset_name: wh_vs_that_no_gap dataset_name: wh_vs_that_no_gap
include: template_yaml include: _template_yaml
task: blimp_wh_vs_that_no_gap task: blimp_wh_vs_that_no_gap
# Generated by utils.py # Generated by utils.py
dataset_name: wh_vs_that_no_gap_long_distance dataset_name: wh_vs_that_no_gap_long_distance
include: template_yaml include: _template_yaml
task: blimp_wh_vs_that_no_gap_long_distance task: blimp_wh_vs_that_no_gap_long_distance
# Generated by utils.py # Generated by utils.py
dataset_name: wh_vs_that_with_gap dataset_name: wh_vs_that_with_gap
include: template_yaml include: _template_yaml
task: blimp_wh_vs_that_with_gap task: blimp_wh_vs_that_with_gap
# Generated by utils.py # Generated by utils.py
dataset_name: wh_vs_that_with_gap_long_distance dataset_name: wh_vs_that_with_gap_long_distance
include: template_yaml include: _template_yaml
task: blimp_wh_vs_that_with_gap_long_distance task: blimp_wh_vs_that_with_gap_long_distance
...@@ -339,31 +339,27 @@ def make_table(result_dict, column: str = "results"): ...@@ -339,31 +339,27 @@ def make_table(result_dict, column: str = "results"):
elif column == "groups": elif column == "groups":
column_name = "Groups" column_name = "Groups"
md_writer = MarkdownTableWriter() all_headers = [
latex_writer = LatexTableWriter()
md_writer.headers = [
column_name,
"Version",
"Filter",
"Metric",
"Value",
"",
"Stderr",
]
latex_writer.headers = [
column_name, column_name,
"Version", "Version",
"Filter", "Filter",
"n-shot",
"Metric", "Metric",
"Value", "Value",
"", "",
"Stderr", "Stderr",
] ]
md_writer = MarkdownTableWriter()
latex_writer = LatexTableWriter()
md_writer.headers = all_headers
latex_writer.headers = all_headers
values = [] values = []
for k, dic in result_dict[column].items(): for k, dic in result_dict[column].items():
version = result_dict["versions"][k] version = result_dict["versions"][k]
n = str(result_dict["n-shot"][k])
if "alias" in dic: if "alias" in dic:
k = dic.pop("alias") k = dic.pop("alias")
...@@ -375,9 +371,9 @@ def make_table(result_dict, column: str = "results"): ...@@ -375,9 +371,9 @@ def make_table(result_dict, column: str = "results"):
if m + "_stderr" + "," + f in dic: if m + "_stderr" + "," + f in dic:
se = dic[m + "_stderr" + "," + f] se = dic[m + "_stderr" + "," + f]
values.append([k, version, f, m, "%.4f" % v, "±", "%.4f" % se]) values.append([k, version, f, n, m, "%.4f" % v, "±", "%.4f" % se])
else: else:
values.append([k, version, f, m, "%.4f" % v, "", ""]) values.append([k, version, f, n, m, "%.4f" % v, "", ""])
k = "" k = ""
version = "" version = ""
md_writer.value_matrix = values md_writer.value_matrix = values
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment