Unverified Commit 8e51049f authored by Xiaoyu Zhang's avatar Xiaoyu Zhang Committed by GitHub
Browse files

[CI Monitor] Ci monitor only deal with main branch in default (#11538)

parent cb8f3d90
...@@ -31,9 +31,10 @@ class SGLangCIAnalyzer: ...@@ -31,9 +31,10 @@ class SGLangCIAnalyzer:
self.session = requests.Session() self.session = requests.Session()
self.session.headers.update(self.headers) self.session.headers.update(self.headers)
def get_recent_runs(self, limit: int = 100) -> List[Dict]: def get_recent_runs(self, limit: int = 100, branch: str = None) -> List[Dict]:
"""Get recent CI run data""" """Get recent CI run data"""
print(f"Fetching {limit} recent CI runs...") branch_info = f" from branch '{branch}'" if branch else ""
print(f"Fetching {limit} recent CI runs{branch_info}...")
all_runs = [] all_runs = []
page = 1 page = 1
...@@ -42,6 +43,8 @@ class SGLangCIAnalyzer: ...@@ -42,6 +43,8 @@ class SGLangCIAnalyzer:
while len(all_runs) < limit: while len(all_runs) < limit:
url = f"{self.base_url}/repos/{self.repo}/actions/runs" url = f"{self.base_url}/repos/{self.repo}/actions/runs"
params = {"per_page": min(per_page, limit - len(all_runs)), "page": page} params = {"per_page": min(per_page, limit - len(all_runs)), "page": page}
if branch:
params["branch"] = branch
try: try:
response = self.session.get(url, params=params) response = self.session.get(url, params=params)
...@@ -407,6 +410,11 @@ def main(): ...@@ -407,6 +410,11 @@ def main():
default="ci_analysis.json", default="ci_analysis.json",
help="Output file (default: ci_analysis.json)", help="Output file (default: ci_analysis.json)",
) )
parser.add_argument(
"--branch",
default="main",
help="Filter runs by branch (default: 'main'). Set to empty string '' to analyze all branches.",
)
args = parser.parse_args() args = parser.parse_args()
...@@ -415,7 +423,9 @@ def main(): ...@@ -415,7 +423,9 @@ def main():
try: try:
# Get CI run data # Get CI run data
runs = analyzer.get_recent_runs(args.limit) # Use None for branch if empty string is provided (to scan all branches)
branch = args.branch if args.branch else None
runs = analyzer.get_recent_runs(args.limit, branch)
if not runs: if not runs:
print("No CI run data found") print("No CI run data found")
......
...@@ -128,24 +128,29 @@ class SGLangPerfAnalyzer: ...@@ -128,24 +128,29 @@ class SGLangPerfAnalyzer:
rcParams["grid.alpha"] = 0.3 rcParams["grid.alpha"] = 0.3
def get_recent_runs( def get_recent_runs(
self, limit: int = 100, start_date: str = None, end_date: str = None self,
limit: int = 100,
start_date: str = None,
end_date: str = None,
branch: str = None,
) -> List[Dict]: ) -> List[Dict]:
"""Get recent CI run data with multiple collection strategies""" """Get recent CI run data with multiple collection strategies"""
# If date range is specified, get all data in that range # If date range is specified, get all data in that range
if start_date or end_date: if start_date or end_date:
return self._get_date_range_runs(start_date, end_date) return self._get_date_range_runs(start_date, end_date, branch)
print(f"Getting PR Test runs (limit: {limit})...") branch_info = f" from branch '{branch}'" if branch else ""
print(f"Getting PR Test runs{branch_info} (limit: {limit})...")
# Use sampling strategy if limit >= 500, otherwise use sequential # Use sampling strategy if limit >= 500, otherwise use sequential
if limit >= 500: if limit >= 500:
print(f"Using uniform sampling for {limit} runs to cover ~30 days...") print(f"Using uniform sampling for {limit} runs to cover ~30 days...")
return self._get_sampled_runs(limit) return self._get_sampled_runs(limit, branch)
else: else:
return self._get_sequential_runs(limit) return self._get_sequential_runs(limit, branch)
def _get_sequential_runs(self, limit: int) -> List[Dict]: def _get_sequential_runs(self, limit: int, branch: str = None) -> List[Dict]:
"""Original sequential method for smaller limits""" """Original sequential method for smaller limits"""
print(f"Using sequential sampling for {limit} runs...") print(f"Using sequential sampling for {limit} runs...")
...@@ -156,6 +161,8 @@ class SGLangPerfAnalyzer: ...@@ -156,6 +161,8 @@ class SGLangPerfAnalyzer:
while len(pr_test_runs) < limit: while len(pr_test_runs) < limit:
url = f"{self.base_url}/repos/{self.repo}/actions/runs" url = f"{self.base_url}/repos/{self.repo}/actions/runs"
params = {"per_page": per_page, "page": page} params = {"per_page": per_page, "page": page}
if branch:
params["branch"] = branch
try: try:
response = self.session.get(url, params=params) response = self.session.get(url, params=params)
...@@ -192,12 +199,14 @@ class SGLangPerfAnalyzer: ...@@ -192,12 +199,14 @@ class SGLangPerfAnalyzer:
return pr_test_runs return pr_test_runs
def _get_sampled_runs(self, limit: int) -> List[Dict]: def _get_sampled_runs(self, limit: int, branch: str = None) -> List[Dict]:
"""Uniform sampling method for 30-day coverage""" """Uniform sampling method for 30-day coverage"""
from datetime import datetime, timedelta from datetime import datetime, timedelta
# Uniform sampling across 30 days # Uniform sampling across 30 days
sampled_runs = self._sample_time_period(limit, days_back=30, uniform=True) sampled_runs = self._sample_time_period(
limit, days_back=30, uniform=True, branch=branch
)
print( print(
f"Sampled {len(sampled_runs)} runs from 30-day period (requested: {limit})" f"Sampled {len(sampled_runs)} runs from 30-day period (requested: {limit})"
...@@ -210,6 +219,7 @@ class SGLangPerfAnalyzer: ...@@ -210,6 +219,7 @@ class SGLangPerfAnalyzer:
days_back: int, days_back: int,
skip_recent_days: int = 0, skip_recent_days: int = 0,
uniform: bool = False, uniform: bool = False,
branch: str = None,
) -> List[Dict]: ) -> List[Dict]:
"""Sample runs from a specific time period""" """Sample runs from a specific time period"""
from datetime import datetime, timedelta from datetime import datetime, timedelta
...@@ -231,6 +241,8 @@ class SGLangPerfAnalyzer: ...@@ -231,6 +241,8 @@ class SGLangPerfAnalyzer:
while True: while True:
url = f"{self.base_url}/repos/{self.repo}/actions/runs" url = f"{self.base_url}/repos/{self.repo}/actions/runs"
params = {"per_page": per_page, "page": page} params = {"per_page": per_page, "page": page}
if branch:
params["branch"] = branch
try: try:
response = self.session.get(url, params=params) response = self.session.get(url, params=params)
...@@ -358,7 +370,7 @@ class SGLangPerfAnalyzer: ...@@ -358,7 +370,7 @@ class SGLangPerfAnalyzer:
return sampled_runs return sampled_runs
def _get_date_range_runs( def _get_date_range_runs(
self, start_date: str = None, end_date: str = None self, start_date: str = None, end_date: str = None, branch: str = None
) -> List[Dict]: ) -> List[Dict]:
"""Get all CI runs within specified date range""" """Get all CI runs within specified date range"""
from datetime import datetime, timedelta from datetime import datetime, timedelta
...@@ -394,8 +406,9 @@ class SGLangPerfAnalyzer: ...@@ -394,8 +406,9 @@ class SGLangPerfAnalyzer:
f"start_date ({start_date}) must be before end_date ({end_date})" f"start_date ({start_date}) must be before end_date ({end_date})"
) )
branch_info = f" from branch '{branch}'" if branch else ""
print( print(
f"Getting ALL CI runs from {start_time.strftime('%Y-%m-%d')} to {end_time.strftime('%Y-%m-%d')}" f"Getting ALL CI runs{branch_info} from {start_time.strftime('%Y-%m-%d')} to {end_time.strftime('%Y-%m-%d')}"
) )
collected_runs = [] collected_runs = []
...@@ -406,6 +419,8 @@ class SGLangPerfAnalyzer: ...@@ -406,6 +419,8 @@ class SGLangPerfAnalyzer:
while True: while True:
url = f"{self.base_url}/repos/{self.repo}/actions/runs" url = f"{self.base_url}/repos/{self.repo}/actions/runs"
params = {"per_page": per_page, "page": page} params = {"per_page": per_page, "page": page}
if branch:
params["branch"] = branch
try: try:
response = self.session.get(url, params=params) response = self.session.get(url, params=params)
...@@ -1331,6 +1346,11 @@ def main(): ...@@ -1331,6 +1346,11 @@ def main():
type=str, type=str,
help="End date for date range query (YYYY-MM-DD format). When specified with --start-date, gets ALL runs in range.", help="End date for date range query (YYYY-MM-DD format). When specified with --start-date, gets ALL runs in range.",
) )
parser.add_argument(
"--branch",
default="main",
help="Filter runs by branch (default: 'main'). Set to empty string '' to analyze all branches.",
)
args = parser.parse_args() args = parser.parse_args()
...@@ -1339,7 +1359,11 @@ def main(): ...@@ -1339,7 +1359,11 @@ def main():
try: try:
# Get CI run data # Get CI run data
runs = analyzer.get_recent_runs(args.limit, args.start_date, args.end_date) # Use None for branch if empty string is provided (to scan all branches)
branch = args.branch if args.branch else None
runs = analyzer.get_recent_runs(
args.limit, args.start_date, args.end_date, branch
)
if not runs: if not runs:
print("No CI run data found") print("No CI run data found")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment