classify_prs.py

# In[1]:


import pandas as pd


# In[2]:


# from https://github.com/pytorch/audio/blob/main/.github/process_commit.py
primary_labels_mapping = {
    "BC-breaking": "Backward-incompatible changes",
    "deprecation": "Deprecations",
    "bug fix": "Bug Fixes",
    "new feature": "New Features",
    "improvement": "Improvements",
    "example": "Examples",
    "prototype": "Prototypes",
    "other": "Other",
    "None": "Missing",
}

secondary_labels_mapping = {
    "module: I/O": "I/O",
    "module: ops": "Ops",
    "module: models": "Models",
    "module: pipelines": "Pipelines",
    "module: datasets": "Datasets",
    "module: docs": "Documentation",
    "module: tests": "Tests",
    "build": "Build",
    "style": "Style",
    "perf": "Performance",
    "other": "Other",
    "None": "Missing",
}


# In[3]:


df = pd.read_json("data.json").T
df.tail()


# In[4]:


def get_labels(col_name, labels):
    df[col_name] = [[] for _ in range(len(df))]
    for _, row in df.iterrows():
        row[col_name] = "None"
        for label in labels:
            if label in row["labels"]:
                row[col_name] = label
                break


# In[5]:


get_labels("primary_label", primary_labels_mapping.keys())
get_labels("secondary_label", secondary_labels_mapping.keys())
df.tail(5)


# In[6]:


for primary_label in primary_labels_mapping.keys():
    primary_df = df[df["primary_label"] == primary_label]
    if primary_df.empty:
        continue
    print(f"## {primary_labels_mapping[primary_label]}")
    for secondary_label in secondary_labels_mapping.keys():
        secondary_df = primary_df[primary_df["secondary_label"] == secondary_label]
        if secondary_df.empty:
            continue
        print(f"### {secondary_labels_mapping[secondary_label]}")
        for _, row in secondary_df.iterrows():
            print(f"- {row['title']}")
        print()
    print()