Unverified Commit 6d62a69c authored by Petr Baudis's avatar Petr Baudis Committed by GitHub
Browse files

Fix Zeno visualizer on tasks like GSM8k (#2599)



* fix(zeno): Generate unique ids in case of multiple filters

* fix(zeno): Report even non-aggregable metrics, just not as metrics

* pre-commit

---------
Co-authored-by: default avatarBaber <baber@hey.com>
parent 16cfe464
...@@ -109,13 +109,14 @@ def main(): ...@@ -109,13 +109,14 @@ def main():
if model_index == 0: # Only need to assemble data for the first model if model_index == 0: # Only need to assemble data for the first model
metrics = [] metrics = []
for metric in config["metric_list"]: for metric in config["metric_list"]:
metrics.append( if metric.get("aggregation") == "mean":
ZenoMetric( metrics.append(
name=metric["metric"], ZenoMetric(
type="mean", name=metric["metric"],
columns=[metric["metric"]], type="mean",
columns=[metric["metric"]],
)
) )
)
project = client.create_project( project = client.create_project(
name=args.project_name + (f"_{task}" if len(tasks) > 1 else ""), name=args.project_name + (f"_{task}" if len(tasks) > 1 else ""),
view="text-classification", view="text-classification",
...@@ -168,7 +169,11 @@ def generate_dataset( ...@@ -168,7 +169,11 @@ def generate_dataset(
Returns: Returns:
pd.Dataframe: A dataframe that is ready to be uploaded to Zeno. pd.Dataframe: A dataframe that is ready to be uploaded to Zeno.
""" """
ids = [x["doc_id"] for x in data] ids = (
[x["doc_id"] for x in data]
if not config.get("filter_list")
else [f"{x['doc_id']}.{x['filter']}" for x in data]
)
labels = [x["target"] for x in data] labels = [x["target"] for x in data]
instance = [""] * len(ids) instance = [""] * len(ids)
...@@ -190,6 +195,7 @@ def generate_dataset( ...@@ -190,6 +195,7 @@ def generate_dataset(
return pd.DataFrame( return pd.DataFrame(
{ {
"id": ids, "id": ids,
"doc_id": [x["doc_id"] for x in data],
"data": instance, "data": instance,
"input_len": [len(x) for x in instance], "input_len": [len(x) for x in instance],
"labels": labels, "labels": labels,
...@@ -208,8 +214,15 @@ def generate_system_df(data, config): ...@@ -208,8 +214,15 @@ def generate_system_df(data, config):
Returns: Returns:
pd.Dataframe: A dataframe that is ready to be uploaded to Zeno as a system. pd.Dataframe: A dataframe that is ready to be uploaded to Zeno as a system.
""" """
ids = [x["doc_id"] for x in data] ids = (
[x["doc_id"] for x in data]
if not config.get("filter_list")
else [f"{x['doc_id']}.{x['filter']}" for x in data]
)
system_dict = {"id": ids} system_dict = {"id": ids}
system_dict["doc_id"] = [x["doc_id"] for x in data]
if config.get("filter_list"):
system_dict["filter"] = [x["filter"] for x in data]
system_dict["output"] = [""] * len(ids) system_dict["output"] = [""] * len(ids)
if config["output_type"] == "loglikelihood": if config["output_type"] == "loglikelihood":
...@@ -228,11 +241,10 @@ def generate_system_df(data, config): ...@@ -228,11 +241,10 @@ def generate_system_df(data, config):
system_dict["output"] = [str(x["filtered_resps"][0]) for x in data] system_dict["output"] = [str(x["filtered_resps"][0]) for x in data]
system_dict["output_length"] = [len(str(x["filtered_resps"][0])) for x in data] system_dict["output_length"] = [len(str(x["filtered_resps"][0])) for x in data]
metrics = {} metrics = {
for metric in config["metric_list"]: metric["metric"]: [x[metric["metric"]] for x in data]
if "aggregation" in metric and metric["aggregation"] == "mean": for metric in config["metric_list"]
metrics[metric["metric"]] = [x[metric["metric"]] for x in data] }
system_dict.update(metrics) system_dict.update(metrics)
system_df = pd.DataFrame(system_dict) system_df = pd.DataFrame(system_dict)
return system_df return system_df
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment