Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
vision
Commits
2ae6a6d5
Unverified
Commit
2ae6a6d5
authored
Apr 18, 2024
by
deekay42
Committed by
GitHub
Apr 18, 2024
Browse files
Update classify_prs notebook (#8383)
Co-authored-by:
Nicolas Hug
<
contact@nicolas-hug.com
>
parent
5181a854
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
29 additions
and
47 deletions
+29
-47
maintainer_guide.md
maintainer_guide.md
+1
-1
scripts/release_notes/classify_prs.py
scripts/release_notes/classify_prs.py
+28
-46
No files found.
maintainer_guide.md
View file @
2ae6a6d5
#
# Torchvision maintainers guide
# Torchvision maintainers guide
This document aims at documenting user-facing policies / principles used when
This document aims at documenting user-facing policies / principles used when
developing and maintaining torchvision. Other maintainer info (e.g. release
developing and maintaining torchvision. Other maintainer info (e.g. release
...
...
scripts/release_notes/classify_prs.py
View file @
2ae6a6d5
# In[1]:
# In[1]:
# imports and set configuration
import
pandas
as
pd
import
pandas
as
pd
from
retrieve_prs_data
import
run
exclude_prototype
=
True
data_filename
=
"10.0_to_11.0-rc2.json"
previous_release
=
"v10.0"
current_release
=
"v11.0-rc2"
# In[2]:
# In[2]:
data_filename
=
"data.json"
df
=
pd
.
read_json
(
data_filename
).
T
df
=
pd
.
read_json
(
data_filename
).
T
df
.
tail
()
df
.
tail
()
# In[3]:
# In[3]:
all_labels
=
{
lbl
for
labels
in
df
[
"labels"
]
for
lbl
in
labels
}
all_labels
=
{
lbl
for
labels
in
df
[
"labels"
]
for
lbl
in
labels
}
all_labels
all_labels
# In[4]:
# In[4]:
# Add one column per label
# Add one column per label
for
label
in
all_labels
:
for
label
in
all_labels
:
df
[
label
]
=
df
[
"labels"
].
apply
(
lambda
labels_list
:
label
in
labels_list
)
df
[
label
]
=
df
[
"labels"
].
apply
(
lambda
labels_list
:
label
in
labels_list
)
df
.
head
()
df
.
head
()
# In[5]:
# In[5]:
# Add a clean "module" column. It contains tuples since PRs can have more than one module.
# Add a clean "module" column. It contains tuples since PRs can have more than one module.
# Maybe we should include "topics" in that column as well?
# Maybe we should include "topics" in that column as well?
...
@@ -51,24 +33,15 @@ for i, row in df.iterrows():
...
@@ -51,24 +33,15 @@ for i, row in df.iterrows():
df
[
"module"
]
=
df
.
module
.
apply
(
tuple
)
df
[
"module"
]
=
df
.
module
.
apply
(
tuple
)
df
.
head
()
df
.
head
()
# In[6]:
# In[6]:
mod_df
=
df
.
set_index
(
"module"
).
sort_index
()
mod_df
=
df
.
set_index
(
"module"
).
sort_index
()
mod_df
.
tail
()
mod_df
.
tail
()
# In[7]:
# In[7]:
# All improvement PRs
# All improvement PRs
mod_df
[
mod_df
[
"enhancement"
]].
head
()
mod_df
[
mod_df
[
"enhancement"
]].
head
()
# In[8]:
# In[8]:
# improvement f module
# improvement f module
# note: don't filter module name on the index as the index contain tuples with non-exclusive values
# note: don't filter module name on the index as the index contain tuples with non-exclusive values
# Use the boolean column instead
# Use the boolean column instead
...
@@ -76,12 +49,10 @@ mod_df[mod_df["enhancement"] & mod_df["module: transforms"]]
...
@@ -76,12 +49,10 @@ mod_df[mod_df["enhancement"] & mod_df["module: transforms"]]
# In[9]:
# In[9]:
def
format_prs
(
mod_df
,
exclude_prototype
=
True
):
def
format_prs
(
mod_df
):
out
=
[]
out
=
[]
for
idx
,
row
in
mod_df
.
iterrows
():
for
idx
,
row
in
mod_df
.
iterrows
():
if
exclude_prototype
and
row
[
"prototype"
]:
if
exclude_prototype
and
"prototype"
in
row
and
row
[
"prototype"
]:
continue
continue
modules
=
idx
modules
=
idx
# Put "documentation" and "tests" first for sorting to be dece
# Put "documentation" and "tests" first for sorting to be dece
...
@@ -98,8 +69,6 @@ def format_prs(mod_df):
...
@@ -98,8 +69,6 @@ def format_prs(mod_df):
# In[10]:
# In[10]:
included_prs
=
pd
.
DataFrame
()
included_prs
=
pd
.
DataFrame
()
# If labels are accurate, this shouhld generate most of the release notes already
# If labels are accurate, this shouhld generate most of the release notes already
...
@@ -112,6 +81,7 @@ for section_title, module_idx in (
...
@@ -112,6 +81,7 @@ for section_title, module_idx in (
(
"Bug Fixes"
,
"bug"
),
(
"Bug Fixes"
,
"bug"
),
(
"Code Quality"
,
"code quality"
),
(
"Code Quality"
,
"code quality"
),
):
):
if
module_idx
in
mod_df
:
print
(
f
"##
{
section_title
}
"
)
print
(
f
"##
{
section_title
}
"
)
print
()
print
()
tmp_df
=
mod_df
[
mod_df
[
module_idx
]]
tmp_df
=
mod_df
[
mod_df
[
module_idx
]]
...
@@ -121,18 +91,30 @@ for section_title, module_idx in (
...
@@ -121,18 +91,30 @@ for section_title, module_idx in (
# In[11]:
# In[11]:
# Missing PRs are these ones... classify them manually
# Missing PRs are these ones... classify them manually
missing_prs
=
pd
.
concat
([
mod_df
,
included_prs
]).
drop_duplicates
(
subset
=
"pr_number"
,
keep
=
False
)
missing_prs
=
pd
.
concat
([
mod_df
,
included_prs
]).
drop_duplicates
(
subset
=
"pr_number"
,
keep
=
False
)
print
(
format_prs
(
missing_prs
))
print
(
format_prs
(
missing_prs
))
# In[12]:
# In[12]:
# Generate list of contributors
# Generate list of contributors
print
()
print
()
print
(
"## Contributors"
)
print
(
"## Contributors"
)
command_to_run
=
f
"{{ git shortlog -s
{
previous_release
}
..
{
current_release
}
| cut -f2- & git log -s
{
previous_release
}
..
{
current_release
}
| grep Co-authored | cut -f2- -d: | cut -f1 -d
\\
< | sed 's/^ *//;s/ *$//' ; }} | sort --ignore-case | uniq | tr '
\\
n' ';' | sed 's/;/, /g;s/, $//' | fold -s"
previous_release
=
"c35d3855ccbfa6a36e6ae6337a1f2c721c1f1e78"
rc
,
output
,
err
=
run
(
command_to_run
)
current_release
=
"5181a854d8b127cf465cd22a67c1b5aaf6ccae05"
print
(
output
)
print
(
f
"{{ git shortlog -s
{
previous_release
}
..
{
current_release
}
| cut -f2- & git log -s
{
previous_release
}
..
{
current_release
}
| grep Co-authored | cut -f2- -d: | cut -f1 -d
\\
< | sed 's/^ *//;s/ *//' ; }} | sort --ignore-case | uniq | tr '
\\
n' ';' | sed 's/;/, /g;s/,//' | fold -s"
)
# In[13]:
# Utility to extract PR numbers only from multiple lines, useful to bundle all
# the docs changes for example:
import
re
s
=
"""
[] Remove unnecessary dependency from macOS/Conda binaries (#8077)
[rocm] [ROCm] remove HCC references (#8070)
"""
print
(
", "
.
join
(
re
.
findall
(
"(#
\\
d+)"
,
s
)))
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment