"examples/tutorial/vscode:/vscode.git/clone" did not exist on "d9bf83e084dd82daaa05cd68e58fe66a24fd5a23"
Unverified Commit b463651f authored by Frank Lee's avatar Frank Lee Committed by GitHub
Browse files

[workflow] cover all public repositories in weekly report (#4069)

parent 4a81faa5
import os import os
from dataclasses import dataclass
from datetime import datetime, timedelta from datetime import datetime, timedelta
from typing import Any, Dict, List from typing import Any, Dict, List
...@@ -10,8 +9,7 @@ import seaborn ...@@ -10,8 +9,7 @@ import seaborn
from requests_toolbelt import MultipartEncoder from requests_toolbelt import MultipartEncoder
@dataclass class Counter(dict):
class Contributor:
""" """
Dataclass for a github contributor. Dataclass for a github contributor.
...@@ -19,8 +17,40 @@ class Contributor: ...@@ -19,8 +17,40 @@ class Contributor:
name (str): name of the contributor name (str): name of the contributor
num_commits_this_week (int): number of commits made within one week num_commits_this_week (int): number of commits made within one week
""" """
name: str
num_commits_this_week: int def record(self, item: str):
if item in self:
self[item] += 1
else:
self[item] = 1
def to_sorted_list(self):
data = [(key, value) for key, value in self.items()]
data.sort(key=lambda x: x[1], reverse=True)
return data
def get_utc_time_one_week_ago():
"""
Get the UTC time one week ago.
"""
now = datetime.utcnow()
start_datetime = now - timedelta(days=7)
return start_datetime
def datetime2str(dt):
"""
Convert datetime to string in the format of YYYY-MM-DDTHH:MM:SSZ
"""
return dt.strftime("%Y-%m-%dT%H:%M:%SZ")
def str2datetime(string):
"""
Convert string in the format of YYYY-MM-DDTHH:MM:SSZ to datetime
"""
return datetime.strptime(string, "%Y-%m-%dT%H:%M:%SZ")
def plot_bar_chart(x: List[Any], y: List[Any], xlabel: str, ylabel: str, title: str, output_path: str) -> None: def plot_bar_chart(x: List[Any], y: List[Any], xlabel: str, ylabel: str, title: str, output_path: str) -> None:
...@@ -36,7 +66,28 @@ def plot_bar_chart(x: List[Any], y: List[Any], xlabel: str, ylabel: str, title: ...@@ -36,7 +66,28 @@ def plot_bar_chart(x: List[Any], y: List[Any], xlabel: str, ylabel: str, title:
plt.savefig(output_path, dpi=1200) plt.savefig(output_path, dpi=1200)
def get_issue_pull_request_comments(github_token: str, since: str) -> Dict[str, int]: def get_organization_repositories(github_token, organization_name) -> List[str]:
"""
Retrieve the public repositories under the organization.
"""
url = f"https://api.github.com/orgs/{organization_name}/repos?type=public"
# prepare header
headers = {
'Authorization': f'Bearer {github_token}',
'Accept': 'application/vnd.github+json',
'X-GitHub-Api-Version': '2022-11-28'
}
res = requests.get(url, headers=headers).json()
repo_list = []
for item in res:
repo_list.append(item['name'])
return repo_list
def get_issue_pull_request_comments(github_token: str, org_name: str, repo_name: str, since: str) -> Dict[str, int]:
""" """
Retrieve the issue/PR comments made by our members in the last 7 days. Retrieve the issue/PR comments made by our members in the last 7 days.
...@@ -56,7 +107,7 @@ def get_issue_pull_request_comments(github_token: str, since: str) -> Dict[str, ...@@ -56,7 +107,7 @@ def get_issue_pull_request_comments(github_token: str, since: str) -> Dict[str,
# do pagination to the API # do pagination to the API
page = 1 page = 1
while True: while True:
comment_api = f'https://api.github.com/repos/hpcaitech/ColossalAI/issues/comments?since={since}&page={page}' comment_api = f'https://api.github.com/repos/{org_name}/{repo_name}/issues/comments?since={since}&page={page}'
comment_response = requests.get(comment_api, headers=headers).json() comment_response = requests.get(comment_api, headers=headers).json()
if len(comment_response) == 0: if len(comment_response) == 0:
...@@ -70,7 +121,7 @@ def get_issue_pull_request_comments(github_token: str, since: str) -> Dict[str, ...@@ -70,7 +121,7 @@ def get_issue_pull_request_comments(github_token: str, since: str) -> Dict[str,
continue continue
issue_id = item['issue_url'].split('/')[-1] issue_id = item['issue_url'].split('/')[-1]
issue_api = f'https://api.github.com/repos/hpcaitech/ColossalAI/issues/{issue_id}' issue_api = f'https://api.github.com/repos/{org_name}/{repo_name}/issues/{issue_id}'
issue_response = requests.get(issue_api, headers=headers).json() issue_response = requests.get(issue_api, headers=headers).json()
issue_author_relationship = issue_response['author_association'] issue_author_relationship = issue_response['author_association']
...@@ -87,7 +138,7 @@ def get_issue_pull_request_comments(github_token: str, since: str) -> Dict[str, ...@@ -87,7 +138,7 @@ def get_issue_pull_request_comments(github_token: str, since: str) -> Dict[str,
return user_engagement_count return user_engagement_count
def get_discussion_comments(github_token, since) -> Dict[str, int]: def get_discussion_comments(github_token: str, org_name: str, repo_name: str, since: str) -> Dict[str, int]:
""" """
Retrieve the discussion comments made by our members in the last 7 days. Retrieve the discussion comments made by our members in the last 7 days.
This is only available via the GitHub GraphQL API. This is only available via the GitHub GraphQL API.
...@@ -105,7 +156,7 @@ def get_discussion_comments(github_token, since) -> Dict[str, int]: ...@@ -105,7 +156,7 @@ def get_discussion_comments(github_token, since) -> Dict[str, int]:
offset_str = f", after: \"{cursor}\"" offset_str = f", after: \"{cursor}\""
query = f""" query = f"""
{{ {{
repository(owner: "hpcaitech", name: "ColossalAI"){{ repository(owner: "{org_name}", name: "{repo_name}"){{
discussions(first: {num} {offset_str}){{ discussions(first: {num} {offset_str}){{
edges {{ edges {{
cursor cursor
...@@ -134,7 +185,7 @@ def get_discussion_comments(github_token, since) -> Dict[str, int]: ...@@ -134,7 +185,7 @@ def get_discussion_comments(github_token, since) -> Dict[str, int]:
offset_str = f", before: \"{cursor}\"" offset_str = f", before: \"{cursor}\""
query = f""" query = f"""
{{ {{
repository(owner: "hpcaitech", name: "ColossalAI"){{ repository(owner: "{org_name}", name: "{repo_name}"){{
discussion(number: {discussion_number}){{ discussion(number: {discussion_number}){{
title title
comments(last: {num} {offset_str}){{ comments(last: {num} {offset_str}){{
...@@ -191,8 +242,8 @@ def get_discussion_comments(github_token, since) -> Dict[str, int]: ...@@ -191,8 +242,8 @@ def get_discussion_comments(github_token, since) -> Dict[str, int]:
for edge in edges: for edge in edges:
# print the discussion title # print the discussion title
discussion = edge['node'] discussion = edge['node']
discussion_updated_at = str2datetime(discussion['updatedAt'])
discussion_updated_at = datetime.strptime(discussion['updatedAt'], "%Y-%m-%dT%H:%M:%SZ")
# check if the updatedAt is within the last 7 days # check if the updatedAt is within the last 7 days
# if yes, add it to discussion_numbers # if yes, add it to discussion_numbers
if discussion_updated_at > since: if discussion_updated_at > since:
...@@ -250,6 +301,7 @@ def get_discussion_comments(github_token, since) -> Dict[str, int]: ...@@ -250,6 +301,7 @@ def get_discussion_comments(github_token, since) -> Dict[str, int]:
if reply['authorAssociation'] == 'MEMBER': if reply['authorAssociation'] == 'MEMBER':
# check if the updatedAt is within the last 7 days # check if the updatedAt is within the last 7 days
# if yes, add it to discussion_numbers # if yes, add it to discussion_numbers
reply_updated_at = datetime.strptime(reply['updatedAt'], "%Y-%m-%dT%H:%M:%SZ") reply_updated_at = datetime.strptime(reply['updatedAt'], "%Y-%m-%dT%H:%M:%SZ")
if reply_updated_at > since: if reply_updated_at > since:
member_name = reply['author']['login'] member_name = reply['author']['login']
...@@ -260,7 +312,7 @@ def get_discussion_comments(github_token, since) -> Dict[str, int]: ...@@ -260,7 +312,7 @@ def get_discussion_comments(github_token, since) -> Dict[str, int]:
return user_engagement_count return user_engagement_count
def generate_user_engagement_leaderboard_image(github_token: str, output_path: str) -> bool: def generate_user_engagement_leaderboard_image(github_token: str, org_name: str, repo_list: List[str], output_path: str) -> bool:
""" """
Generate the user engagement leaderboard image for stats within the last 7 days Generate the user engagement leaderboard image for stats within the last 7 days
...@@ -270,23 +322,29 @@ def generate_user_engagement_leaderboard_image(github_token: str, output_path: s ...@@ -270,23 +322,29 @@ def generate_user_engagement_leaderboard_image(github_token: str, output_path: s
""" """
# request to the Github API to get the users who have replied the most in the last 7 days # request to the Github API to get the users who have replied the most in the last 7 days
now = datetime.utcnow() start_datetime = get_utc_time_one_week_ago()
start_datetime = now - timedelta(days=7) start_datetime_str = datetime2str(start_datetime)
start_datetime_str = start_datetime.strftime("%Y-%m-%dT%H:%M:%SZ")
# get the issue/PR comments and discussion comment count # get the issue/PR comments and discussion comment count
issue_pr_engagement_count = get_issue_pull_request_comments(github_token=github_token, since=start_datetime_str)
discussion_engagement_count = get_discussion_comments(github_token=github_token, since=start_datetime)
total_engagement_count = {} total_engagement_count = {}
# update the total engagement count def _update_count(counter):
total_engagement_count.update(issue_pr_engagement_count) for name, count in counter.items():
for name, count in discussion_engagement_count.items():
if name in total_engagement_count: if name in total_engagement_count:
total_engagement_count[name] += count total_engagement_count[name] += count
else: else:
total_engagement_count[name] = count total_engagement_count[name] = count
for repo_name in repo_list:
print(f"Fetching user engagement count for {repo_name}/{repo_name}")
issue_pr_engagement_count = get_issue_pull_request_comments(github_token=github_token, org_name=org_name, repo_name=repo_name, since=start_datetime_str)
discussion_engagement_count = get_discussion_comments(github_token=github_token, org_name=org_name, repo_name=repo_name, since=start_datetime)
# update the total engagement count
_update_count(issue_pr_engagement_count)
_update_count(discussion_engagement_count)
# prepare the data for plotting # prepare the data for plotting
x = [] x = []
y = [] y = []
...@@ -302,9 +360,6 @@ def generate_user_engagement_leaderboard_image(github_token: str, output_path: s ...@@ -302,9 +360,6 @@ def generate_user_engagement_leaderboard_image(github_token: str, output_path: s
x.append(count) x.append(count)
y.append(name) y.append(name)
# use Shanghai time to display on the image
start_datetime_str = datetime.now(pytz.timezone('Asia/Shanghai')).strftime("%Y-%m-%dT%H:%M:%SZ")
# plot the leaderboard # plot the leaderboard
xlabel = f"Number of Comments made (since {start_datetime_str})" xlabel = f"Number of Comments made (since {start_datetime_str})"
ylabel = "Member" ylabel = "Member"
...@@ -315,7 +370,7 @@ def generate_user_engagement_leaderboard_image(github_token: str, output_path: s ...@@ -315,7 +370,7 @@ def generate_user_engagement_leaderboard_image(github_token: str, output_path: s
return False return False
def generate_contributor_leaderboard_image(github_token, output_path) -> bool: def generate_contributor_leaderboard_image(github_token, org_name, repo_list, output_path) -> bool:
""" """
Generate the contributor leaderboard image for stats within the last 7 days Generate the contributor leaderboard image for stats within the last 7 days
...@@ -324,54 +379,81 @@ def generate_contributor_leaderboard_image(github_token, output_path) -> bool: ...@@ -324,54 +379,81 @@ def generate_contributor_leaderboard_image(github_token, output_path) -> bool:
output_path (str): the path to save the image output_path (str): the path to save the image
""" """
# request to the Github API to get the users who have contributed in the last 7 days # request to the Github API to get the users who have contributed in the last 7 days
URL = 'https://api.github.com/repos/hpcaitech/ColossalAI/stats/contributors'
headers = { headers = {
'Authorization': f'Bearer {github_token}', 'Authorization': f'Bearer {github_token}',
'Accept': 'application/vnd.github+json', 'Accept': 'application/vnd.github+json',
'X-GitHub-Api-Version': '2022-11-28' 'X-GitHub-Api-Version': '2022-11-28'
} }
counter = Counter()
start_datetime = get_utc_time_one_week_ago()
def _get_url(org_name, repo_name, page):
return f'https://api.github.com/repos/{org_name}/{repo_name}/pulls?per_page=50&page={page}&state=closed'
def _iterate_by_page(org_name, repo_name):
page = 1
stop = False
while not stop:
print(f"Fetching pull request data for {org_name}/{repo_name} - page{page}")
url = _get_url(org_name, repo_name, page)
while True: while True:
response = requests.get(URL, headers=headers).json() response = requests.get(url, headers=headers).json()
if isinstance(response, list):
# sometimes the Github API returns nothing
# request again if the response is not a list
break
print("Empty response, request again...")
if len(response) != 0: if len(response) == 0:
# sometimes the Github API returns empty response for unknown reason # if the response is empty, stop
# request again if the response is empty stop = True
break break
contributor_list = [] # count the pull request and author from response
for pr_data in response:
merged_at = pr_data['merged_at']
author = pr_data['user']['login']
if merged_at is None:
continue
merge_datetime = str2datetime(merged_at)
if merge_datetime < start_datetime:
# if we found a pull request that is merged before the start_datetime
# we stop
stop = True
break
else:
# record the author1
counter.record(author)
# get number of commits for each contributor # next page
start_timestamp = None page += 1
for item in response:
num_commits_this_week = item['weeks'][-1]['c']
name = item['author']['login']
contributor = Contributor(name=name, num_commits_this_week=num_commits_this_week)
contributor_list.append(contributor)
# update start_timestamp for repo_name in repo_list:
start_timestamp = item['weeks'][-1]['w'] _iterate_by_page(org_name, repo_name)
# convert unix timestamp to Beijing datetime # convert unix timestamp to Beijing datetime
start_datetime = datetime.fromtimestamp(start_timestamp, tz=pytz.timezone('Asia/Shanghai')) bj_start_datetime = datetime.fromtimestamp(start_datetime.timestamp(), tz=pytz.timezone('Asia/Shanghai'))
start_datetime_str = start_datetime.strftime("%Y-%m-%dT%H:%M:%SZ") bj_start_datetime_str = datetime2str(bj_start_datetime)
# sort by number of commits contribution_list = counter.to_sorted_list()
contributor_list.sort(key=lambda x: x.num_commits_this_week, reverse=True)
# remove contributors who has zero commits # remove contributors who has zero commits
contributor_list = [x for x in contributor_list if x.num_commits_this_week > 0] author_list = [x[0] for x in contribution_list]
num_commit_list = [x[1] for x in contribution_list]
# prepare the data for plotting
x = [x.num_commits_this_week for x in contributor_list]
y = [x.name for x in contributor_list]
# plot # plot
if len(x) > 0: if len(author_list) > 0:
xlabel = f"Number of Commits (since {start_datetime_str})" xlabel = f"Number of Pull Requests (since {bj_start_datetime_str})"
ylabel = "Contributor" ylabel = "Contributor"
title = 'Active Contributor Leaderboard' title = 'Active Contributor Leaderboard'
plot_bar_chart(x, y, xlabel=xlabel, ylabel=ylabel, title=title, output_path=output_path) plot_bar_chart(num_commit_list, author_list, xlabel=xlabel, ylabel=ylabel, title=title, output_path=output_path)
return True return True
else: else:
return False return False
...@@ -438,10 +520,14 @@ if __name__ == '__main__': ...@@ -438,10 +520,14 @@ if __name__ == '__main__':
GITHUB_TOKEN = os.environ['GITHUB_TOKEN'] GITHUB_TOKEN = os.environ['GITHUB_TOKEN']
CONTRIBUTOR_IMAGE_PATH = 'contributor_leaderboard.png' CONTRIBUTOR_IMAGE_PATH = 'contributor_leaderboard.png'
USER_ENGAGEMENT_IMAGE_PATH = 'engagement_leaderboard.png' USER_ENGAGEMENT_IMAGE_PATH = 'engagement_leaderboard.png'
ORG_NAME = "hpcaitech"
# get all open source repositories
REPO_LIST = get_organization_repositories(GITHUB_TOKEN, ORG_NAME)
# generate images # generate images
contrib_success = generate_contributor_leaderboard_image(GITHUB_TOKEN, CONTRIBUTOR_IMAGE_PATH) contrib_success = generate_contributor_leaderboard_image(GITHUB_TOKEN, ORG_NAME, REPO_LIST, CONTRIBUTOR_IMAGE_PATH)
engagement_success = generate_user_engagement_leaderboard_image(GITHUB_TOKEN, USER_ENGAGEMENT_IMAGE_PATH) engagement_success = generate_user_engagement_leaderboard_image(GITHUB_TOKEN, ORG_NAME, REPO_LIST, USER_ENGAGEMENT_IMAGE_PATH)
# upload images # upload images
APP_ID = os.environ['LARK_APP_ID'] APP_ID = os.environ['LARK_APP_ID']
...@@ -457,8 +543,8 @@ if __name__ == '__main__': ...@@ -457,8 +543,8 @@ if __name__ == '__main__':
2. 用户互动榜单 2. 用户互动榜单
注: 注:
- 开发贡献者测评标准为:本周由公司成员提交的commit次数 - 开发贡献者测评标准为:本周由公司成员与社区在所有开源仓库提交的Pull Request次数
- 用户互动榜单测评标准为:本周由公司成员在非成员创建的issue/PR/discussion中回复的次数 - 用户互动榜单测评标准为:本周由公司成员在非成员在所有开源仓库创建的issue/PR/discussion中回复的次数
""" """
send_message_to_lark(message, LARK_WEBHOOK_URL) send_message_to_lark(message, LARK_WEBHOOK_URL)
...@@ -467,7 +553,7 @@ if __name__ == '__main__': ...@@ -467,7 +553,7 @@ if __name__ == '__main__':
if contrib_success: if contrib_success:
send_image_to_lark(contributor_image_key, LARK_WEBHOOK_URL) send_image_to_lark(contributor_image_key, LARK_WEBHOOK_URL)
else: else:
send_message_to_lark("本周没有成员贡献commit,无榜单图片生成。", LARK_WEBHOOK_URL) send_message_to_lark("本周没有成员贡献PR,无榜单图片生成。", LARK_WEBHOOK_URL)
# send user engagement image to lark # send user engagement image to lark
if engagement_success: if engagement_success:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment