"vscode:/vscode.git/clone" did not exist on "8e70064c3781cc6d040b2e3575733f4993762052"
retrieve_prs.py 3.76 KB
Newer Older
1
2
3
4
5
6
"""Collect the PRs between two specified tags or commits and
    output the commit titles, PR numbers, and labels in a json file.
Usage: python tools/release_notes/retrieve_prs.py tags/v0.10.0 \
    18685a517ae68353b05b9a0ede5343df31525c76 --file data.json
"""
import argparse
7
8
9
10
11
12
13
14
import json
import re
import subprocess
from collections import namedtuple
from os.path import expanduser

import requests

15

16
17
18
19
20
21
22
23
24
25
Features = namedtuple(
    "Features",
    [
        "title",
        "pr_number",
        "labels",
    ],
)


nateanl's avatar
nateanl committed
26
def _run_cmd(cmd):
27
    return subprocess.check_output(cmd).decode("utf-8").strip()
28
29
30


def commit_title(commit_hash):
31
    cmd = ["git", "log", "-n", "1", "--pretty=format:%s", f"{commit_hash}"]
nateanl's avatar
nateanl committed
32
    return _run_cmd(cmd)
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61


def parse_pr_number(commit_hash, title):
    regex = r"(#[0-9]+)"
    matches = re.findall(regex, title)
    if len(matches) == 0:
        print(f"[{commit_hash}: {title}] Could not parse PR number, ignoring PR")
        return None
    if len(matches) > 1:
        print(f"[{commit_hash}: {title}] Got two PR numbers, using the last one")
        return matches[-1][1:]
    return matches[0][1:]


def get_ghstack_token():
    pattern = "github_oauth = (.*)"
    with open(expanduser("~/.ghstackrc"), "r+") as f:
        config = f.read()
    matches = re.findall(pattern, config)
    if len(matches) == 0:
        raise RuntimeError("Can't find a github oauth token")
    return matches[0]


token = get_ghstack_token()
headers = {"Authorization": f"token {token}"}


def run_query(query):
62
63
64
    response = requests.post("https://api.github.com/graphql", json={"query": query}, headers=headers)
    response.raise_for_status()
    return response.json()
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97


def gh_labels(pr_number):
    query = f"""
    {{
      repository(owner: "pytorch", name: "audio") {{
        pullRequest(number: {pr_number}) {{
          labels(first: 10) {{
            edges {{
              node {{
                name
              }}
            }}
          }}
        }}
      }}
    }}
    """
    query = run_query(query)
    edges = query["data"]["repository"]["pullRequest"]["labels"]["edges"]
    return [edge["node"]["name"] for edge in edges]


def get_features(commit_hash):
    title = commit_title(commit_hash)
    pr_number = parse_pr_number(commit_hash, title)
    labels = []
    if pr_number is not None:
        labels = gh_labels(pr_number)
    return Features(title, pr_number, labels)


def get_commits_between(base_version, new_version):
98
    cmd = ["git", "merge-base", f"{base_version}", f"{new_version}"]
nateanl's avatar
nateanl committed
99
    merge_base = _run_cmd(cmd)
100
101
102

    # Returns a list of items in the form
    # a7854f33 Add HuBERT model architectures (#1769)
103
    cmd = ["git", "log", "--reverse", "--oneline", f"{merge_base}..{new_version}"]
nateanl's avatar
nateanl committed
104
    commits = _run_cmd(cmd)
105
106
107
108
109
110

    log_lines = commits.split("\n")
    hashes, titles = zip(*[log_line.split(" ", 1) for log_line in log_lines])
    return hashes, titles


111
112
113
114
115
def _parse_args(args=None):
    parser = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.RawTextHelpFormatter,
    )
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
    parser.add_argument("base_version", type=str, help="starting tag or commit (exclusive)")
    parser.add_argument("new_version", type=str, help="final tag or commit (inclusive)")
    parser.add_argument("--file", type=str, default="data.json", help="output json file")
    return parser.parse_args(args)


def _main(args):
    hashes, titles = get_commits_between(args.base_version, args.new_version)
    data = {}

    for idx, commit in enumerate(hashes):
        data[commit] = get_features(commit)
        if idx % 10 == 0:
            print(f"{idx} / {len(hashes)}")

nateanl's avatar
nateanl committed
131
    data = {commit: features._asdict() for commit, features in data.items()}
132
133
134
135
136
    with open(args.file, "w") as f:
        json.dump(data, f)


if __name__ == "__main__":
137
    _main(_parse_args())