Unverified Commit 2ae6a6d5 authored by deekay42's avatar deekay42 Committed by GitHub
Browse files

Update classify_prs notebook (#8383)


Co-authored-by: default avatarNicolas Hug <contact@nicolas-hug.com>
parent 5181a854
## Torchvision maintainers guide # Torchvision maintainers guide
This document aims at documenting user-facing policies / principles used when This document aims at documenting user-facing policies / principles used when
developing and maintaining torchvision. Other maintainer info (e.g. release developing and maintaining torchvision. Other maintainer info (e.g. release
......
# In[1]: # In[1]:
# imports and set configuration
import pandas as pd import pandas as pd
from retrieve_prs_data import run
exclude_prototype = True
data_filename = "10.0_to_11.0-rc2.json"
previous_release = "v10.0"
current_release = "v11.0-rc2"
# In[2]: # In[2]:
data_filename = "data.json"
df = pd.read_json(data_filename).T df = pd.read_json(data_filename).T
df.tail() df.tail()
# In[3]: # In[3]:
all_labels = {lbl for labels in df["labels"] for lbl in labels} all_labels = {lbl for labels in df["labels"] for lbl in labels}
all_labels all_labels
# In[4]: # In[4]:
# Add one column per label # Add one column per label
for label in all_labels: for label in all_labels:
df[label] = df["labels"].apply(lambda labels_list: label in labels_list) df[label] = df["labels"].apply(lambda labels_list: label in labels_list)
df.head() df.head()
# In[5]: # In[5]:
# Add a clean "module" column. It contains tuples since PRs can have more than one module. # Add a clean "module" column. It contains tuples since PRs can have more than one module.
# Maybe we should include "topics" in that column as well? # Maybe we should include "topics" in that column as well?
...@@ -51,24 +33,15 @@ for i, row in df.iterrows(): ...@@ -51,24 +33,15 @@ for i, row in df.iterrows():
df["module"] = df.module.apply(tuple) df["module"] = df.module.apply(tuple)
df.head() df.head()
# In[6]: # In[6]:
mod_df = df.set_index("module").sort_index() mod_df = df.set_index("module").sort_index()
mod_df.tail() mod_df.tail()
# In[7]: # In[7]:
# All improvement PRs # All improvement PRs
mod_df[mod_df["enhancement"]].head() mod_df[mod_df["enhancement"]].head()
# In[8]: # In[8]:
# improvement f module # improvement f module
# note: don't filter module name on the index as the index contain tuples with non-exclusive values # note: don't filter module name on the index as the index contain tuples with non-exclusive values
# Use the boolean column instead # Use the boolean column instead
...@@ -76,12 +49,10 @@ mod_df[mod_df["enhancement"] & mod_df["module: transforms"]] ...@@ -76,12 +49,10 @@ mod_df[mod_df["enhancement"] & mod_df["module: transforms"]]
# In[9]: # In[9]:
def format_prs(mod_df, exclude_prototype=True):
def format_prs(mod_df):
out = [] out = []
for idx, row in mod_df.iterrows(): for idx, row in mod_df.iterrows():
if exclude_prototype and row["prototype"]: if exclude_prototype and "prototype" in row and row["prototype"]:
continue continue
modules = idx modules = idx
# Put "documentation" and "tests" first for sorting to be dece # Put "documentation" and "tests" first for sorting to be dece
...@@ -98,8 +69,6 @@ def format_prs(mod_df): ...@@ -98,8 +69,6 @@ def format_prs(mod_df):
# In[10]: # In[10]:
included_prs = pd.DataFrame() included_prs = pd.DataFrame()
# If labels are accurate, this shouhld generate most of the release notes already # If labels are accurate, this shouhld generate most of the release notes already
...@@ -112,6 +81,7 @@ for section_title, module_idx in ( ...@@ -112,6 +81,7 @@ for section_title, module_idx in (
("Bug Fixes", "bug"), ("Bug Fixes", "bug"),
("Code Quality", "code quality"), ("Code Quality", "code quality"),
): ):
if module_idx in mod_df:
print(f"## {section_title}") print(f"## {section_title}")
print() print()
tmp_df = mod_df[mod_df[module_idx]] tmp_df = mod_df[mod_df[module_idx]]
...@@ -121,18 +91,30 @@ for section_title, module_idx in ( ...@@ -121,18 +91,30 @@ for section_title, module_idx in (
# In[11]: # In[11]:
# Missing PRs are these ones... classify them manually # Missing PRs are these ones... classify them manually
missing_prs = pd.concat([mod_df, included_prs]).drop_duplicates(subset="pr_number", keep=False) missing_prs = pd.concat([mod_df, included_prs]).drop_duplicates(subset="pr_number", keep=False)
print(format_prs(missing_prs)) print(format_prs(missing_prs))
# In[12]: # In[12]:
# Generate list of contributors # Generate list of contributors
print() print()
print("## Contributors") print("## Contributors")
command_to_run = f"{{ git shortlog -s {previous_release}..{current_release} | cut -f2- & git log -s {previous_release}..{current_release} | grep Co-authored | cut -f2- -d: | cut -f1 -d\\< | sed 's/^ *//;s/ *$//' ; }} | sort --ignore-case | uniq | tr '\\n' ';' | sed 's/;/, /g;s/, $//' | fold -s" previous_release = "c35d3855ccbfa6a36e6ae6337a1f2c721c1f1e78"
rc, output, err = run(command_to_run) current_release = "5181a854d8b127cf465cd22a67c1b5aaf6ccae05"
print(output) print(
f"{{ git shortlog -s {previous_release}..{current_release} | cut -f2- & git log -s {previous_release}..{current_release} | grep Co-authored | cut -f2- -d: | cut -f1 -d\\< | sed 's/^ *//;s/ *//' ; }} | sort --ignore-case | uniq | tr '\\n' ';' | sed 's/;/, /g;s/,//' | fold -s"
)
# In[13]:
# Utility to extract PR numbers only from multiple lines, useful to bundle all
# the docs changes for example:
import re
s = """
[] Remove unnecessary dependency from macOS/Conda binaries (#8077)
[rocm] [ROCm] remove HCC references (#8070)
"""
print(", ".join(re.findall("(#\\d+)", s)))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment