generate_excel.py 4.02 KB
Newer Older
1
2
import json
from itertools import product
3
4
5
from pathlib import Path

import pandas as pd
6
7
8
9


def get_branch_name_from_hash(hash):
    import subprocess
10
11
12
13
14
15

    process = subprocess.Popen(
        ["git", "name-rev", "--name-only", hash],
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
    )
16
17
18
19
    stdout, stderr = process.communicate()
    if len(stderr) > 0:
        return hash[:10]
    else:
20
        return stdout.decode("utf-8").strip("\n")
21
22
23
24
25
26
27
28
29
30
31
32
33
34


def main():
    results_path = Path("../results")
    results_path.is_dir()
    benchmark_json_path = results_path / "benchmarks.json"
    with benchmark_json_path.open() as f:
        benchmark_json = json.load(f)
    machines = [f for f in results_path.glob("*") if f.is_dir()]
    output_results_dict = {}
    for machine in machines:
        # commit_results_dict = {}
        per_machine_result = {}
        commit_results_json_paths = [
35
36
            f for f in machine.glob("*") if f.name != "machine.json"
        ]
37
38
39
        for commit in commit_results_json_paths:
            with commit.open() as f:
                commit_result = json.load(f)
40
            commit_hash = commit_result["commit_hash"]
41
            per_commit_result = {}
42
            for test_name, result in commit_result["results"].items():
43
                per_commit_result[test_name] = []
44
45
                if result["result"] is None:
                    for test_args in product(*result["params"]):
46
                        per_commit_result[test_name].append(
47
48
                            {"params": ", ".join(test_args), "result": None}
                        )
49
                else:
50
51
52
                    for test_args, performance_number in zip(
                        product(*result["params"]), result["result"]
                    ):
53
                        per_commit_result[test_name].append(
54
55
56
57
58
                            {
                                "params": ", ".join(test_args),
                                "result": performance_number,
                            }
                        )
59
60
61
62
63
64
65
66
67
68
            per_machine_result[commit_hash] = per_commit_result
        output_results_dict[machine.name] = per_machine_result
    return output_results_dict


def dict_to_csv(output_results_dict):
    with open("../results/benchmarks.json") as f:
        benchmark_conf = json.load(f)
    unit_dict = {}
    for k, v in benchmark_conf.items():
69
70
        if k != "version":
            unit_dict[k] = v["unit"]
71
72
73
74
75
76
77
78
79
    result_list = []
    for machine, per_machine_result in output_results_dict.items():
        for commit, test_cases in per_machine_result.items():
            branch_name = get_branch_name_from_hash(commit)
            result_column_name = "number_{}".format(branch_name)
            # per_commit_result_list = []
            for test_case_name, results in test_cases.items():
                for result in results:
                    result_list.append(
80
81
82
83
84
85
86
87
88
                        {
                            "test_name": test_case_name,
                            "params": result["params"],
                            "unit": unit_dict[test_case_name],
                            "number": result["result"],
                            "commit": branch_name,
                            "machine": machine,
                        }
                    )
89
90
91
92
93
    df = pd.DataFrame(result_list)
    return df


def side_by_side_view(df):
94
95
    commits = df["commit"].unique().tolist()
    full_df = df.loc[df["commit"] == commits[0]]
96
    for commit in commits[1:]:
97
        per_commit_df = df.loc[df["commit"] == commit]
98
        full_df: pd.DataFrame = full_df.merge(
99
100
101
102
103
104
105
106
107
            per_commit_df,
            on=["test_name", "params", "machine", "unit"],
            how="outer",
            suffixes=(
                "_{}".format(full_df.iloc[0]["commit"]),
                "_{}".format(per_commit_df.iloc[0]["commit"]),
            ),
        )
    full_df = full_df.loc[:, ~full_df.columns.str.startswith("commit")]
108
109
110
111
112
113
    return full_df


output_results_dict = main()
df = dict_to_csv(output_results_dict)
sbs_df = side_by_side_view(df)
114
sbs_df.to_csv("result.csv")