update_training_history.py 4.42 KB
Newer Older
zhanggzh's avatar
zhanggzh committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import json
import os
import sys

import tensorboard as tb
from absl import flags

flags.DEFINE_string(
    "model_name", None, "The name of the KerasCV.model that was trained"
)
flags.DEFINE_string("tensorboard_logs_path", None, "Path to tensorboard logs to load")
flags.DEFINE_string("training_script_path", None, "Path to the training script")
flags.DEFINE_string(
    "script_version",
    None,
    "commit hash of the latest commit in KerasCV/master for the training script",
)
flags.DEFINE_string(
    "weights_version",
    None,
    "The version of the training script used to produce the latest weights. For example, v0",
)
flags.DEFINE_string(
    "contributor", None, "The GitHub username of the contributor of these results"
)
flags.DEFINE_string(
    "accelerators", None, "The number of accelerators used for training."
)

FLAGS = flags.FLAGS
FLAGS(sys.argv)

model_name = FLAGS.model_name or input(
    "Input the name of the KerasCV.model that was trained\n"
)
weights_version = FLAGS.weights_version or input(
    "Input the weights version for your script\n"
)

training_script_path = FLAGS.training_script_path or input(
    "Input the path to your training script\n"
)
full_training_script_path = os.path.abspath(training_script_path)

# Build an experiment name structured as task/training_script_name/model_name-version
training_script_rooted_at_training = full_training_script_path[
    full_training_script_path.index("keras-cv/examples/training/") + 27 :
]
training_script_dirs = training_script_rooted_at_training.split("/")
tensorboard_experiment_name = f"{training_script_dirs[0]}/{'/'.join(training_script_dirs[1:])[:-3]}/{model_name}-{weights_version}"

training_script_json_path = full_training_script_path[
    : full_training_script_path.index("keras-cv/examples/training/") + 27
] + "/".join(training_script_dirs[:2] + ["training_history.json"])

script_version = FLAGS.script_version or input(
    "Input the commit hash of the latest commit in KerasCV/master for the training script used for training."
)

tensorboard_logs_path = FLAGS.tensorboard_logs_path or input(
    "Input the path to the TensorBoard logs\n"
)
tensorboard_experiment_id = (
    os.popen(
        f"python3 -m tensorboard.main dev upload --logdir {tensorboard_logs_path} --name {tensorboard_experiment_name} --one_shot --verbose 0"
    )
    .read()
    .split("/")[-2]
)

tensorboard_experiment = tb.data.experimental.ExperimentFromDev(
    tensorboard_experiment_id
)

tensorboard_results = tensorboard_experiment.get_scalars()

training_epochs = max(tensorboard_results[tensorboard_results.run == "train"].step)
max_validation_accuracy = max(
    tensorboard_results[
        (tensorboard_results.run == "validation")
        & (tensorboard_results.tag == "epoch_categorical_accuracy")
    ].value
)
max_validation_accuracy = f"{max_validation_accuracy:.4f}"

contributor = FLAGS.contributor or input(
    "Input your GitHub username (or the username of the contributor, if it's not you)\n"
)

accelerators = FLAGS.accelerators or input(
    "Input the number of accelerators used during training.\n"
)

args = input(
    "Input any training arguments used for the training script.\n"
    "Use comma-separate, colon-split key-value pairs. For example:\n"
    "arg1:value, arg2:value\n"
)

args_dict = {}
for arg in args.split(","):
    if len(arg.strip()) == 0:
        continue
    key_value_pair = [s.strip() for s in arg.split(":")]
    args_dict[key_value_pair[0]] = key_value_pair[1]

new_results = {
    "script": {"name": "/".join(training_script_dirs[2:]), "version": script_version},
    "validation_accuracy": max_validation_accuracy,
    "epochs_trained": training_epochs,
    "tensorboard_logs": f"https://tensorboard.dev/experiment/{tensorboard_experiment_id}/",
    "contributor": contributor,
    "args": args_dict,
    "accelerators": int(accelerators),
}

# Check if the JSON file already exists
results_file = open(training_script_json_path, "r")
results_string = results_file.read()
results = json.loads(results_string) if results_string != "" else {}
results_file.close()

# If we've never run this script on this model, insert a record for it
if model_name not in results:
    results[model_name] = {}

# Add this run's results to the model's record
model_results = results[model_name]
model_results[weights_version] = new_results

# Save the updated results
results_file = open(training_script_json_path, "w")
json.dump(results, results_file, indent=4, sort_keys=True)
results_file.close()