import re import pandas as pd class RcclLogParser: def __init__(self): self.output = set() self.raw_lines = set() # Pattern -> output string or as-is self.sys_patterns = { r"kernel version": None, r"ROCr version": None, r"RCCL version": None, r"Librccl path": None, r"iommu": None, r"Dmabuf feature disabled": "Dmabuf: disabled", r"Disabled GDRCopy": "GDRCopy: disabled", } # Pattern -> column self.graph_info_fields = { r"Pattern": "Pattern", r"crossNic": "crossNic", r"nChannels": "nChannels", r"bw": "bandwidth", r"type": "type", r"sameChannels": "sameChannels", } # Pattern -> column self.cl_transfer_fields = { r"protocol": "protocol", r"nbytes": "nbytes", r"algorithm": "algorithm", r"slicesteps": "slicesteps", r"nchannels": "nchannels", r"nloops": "nloops", r"nsteps": "nsteps", r"chunksize": "chunksize", } # Pattern -> column self.p2p_fields = { r"p2p : rank": "local", r"send rank": "send", r"recv rank": "recv", r"p2pnChannelsPerPeer": "p2pnChannelsPerPeer", r"p2pnChannels": "p2pnChannels", r"nChannelsMax": "nChannelsMax", r"protocol": "protocol", } def collect(self, line): self.raw_lines.add(line) def report(self): print(" RCCL Log Parser Report ".center(80, "=")) print() for line in self.raw_lines: self._preprocess_line(line) self._report_sys() self._report_user_envs() self._report_graph_info() self._report_cl_transfers() self._report_p2p_transfers() print(" End of Report ".center(80, "=")) def _preprocess_line(self, line): match = re.search(r"\[\d+\]\s+NCCL\s+(?:INFO|WARN|ERROR)\s+(.*)", line) if match: self.output.add(match.group(1)) def _report_sys(self): """Search patterns and print pre-defined strings if matched""" print("===> System Information:\n") reported_lines = [] for line in self.output: for pattern, output in self.sys_patterns.items(): if re.search(pattern, line, re.IGNORECASE): reported_lines.append(output if output else line) break for line in reported_lines: print(line) print() def _report_user_envs(self): """Search environment variables set by user""" print("===> User-defined Environment Variables:\n") pattern = re.compile(r"(\w+)\s+set by environment to\s+(.+)") for line in self.output: m = pattern.search(line) if m: print(f"{m.group(1)}: {m.group(2)}") print() def _report_graph_info(self): """Extract graph information""" print("===> Graph Info:\n") # Filter lines by looking for 'Pattern' and 'crossNic' filtered_lines = [line for line in self.output if "Pattern" in line and "crossNic" in line] if not filtered_lines: print(" (No graph info found)\n") return df = pd.DataFrame(filtered_lines, columns=["raw_log"]) # Extract each field independently (order-agnostic) # Values are comma-separated, so use [^,\s]+ to exclude trailing commas for pattern, col_name in self.graph_info_fields.items(): df[col_name] = df["raw_log"].str.extract(rf"\b{pattern}\s+([^,\s]+)", expand=False) # Type conversion for correct sorting if "Pattern" in df.columns: df["Pattern"] = pd.to_numeric(df["Pattern"], errors="coerce") # Clean up df.drop(columns=["raw_log"], inplace=True) df.drop_duplicates(inplace=True) df.sort_values(by="Pattern", ascending=False, inplace=True) print(df.fillna("-").to_string(index=False)) print() def _report_cl_transfers(self): """Extract non-P2P transfer arguments""" print("===> Unique Ring/Tree Transfers:\n") # Filter lines by looking for 'protocol' and 'nbytes' raw_lines = [line for line in self.output if "protocol" in line and "nbytes" in line] if not raw_lines: print(" (No transfer patterns found)\n") return df = pd.DataFrame(raw_lines, columns=["raw_log"]) # Extract all fields using a single loop for pattern, col_name in self.cl_transfer_fields.items(): df[col_name] = df["raw_log"].str.extract(rf"\b{pattern}\s+(\S+)", expand=False) # Type conversion for correct sorting for field in ["nbytes", "nchannels"]: if field in df.columns: df[field] = pd.to_numeric(df[field], errors="coerce") # Drop rows where mandatory fields are missing mandatory_cols = [c for c in ["protocol", "nbytes"] if c in df.columns] df.dropna(subset=mandatory_cols, inplace=True) # Clean up df.drop(columns=["raw_log"], inplace=True) df.drop_duplicates(inplace=True) sort_cols = ["nbytes", "protocol", "nchannels"] sort_cols = [col for col in sort_cols if col in df.columns] if sort_cols: df.sort_values(by=sort_cols, inplace=True) # Fill NaNs with "-" and print print(df.fillna("-").to_string(index=False)) print() def _report_p2p_transfers(self): """Extract P2P transfer details""" print("===> Unique P2P Transfers:\n") # Filter lines by looking for 'p2p :' and 'send rank' raw_lines = [line for line in self.output if "p2p :" in line and "send rank" in line] if not raw_lines: print(" (No P2P transfers found)\n") return # Extract all fields using a single loop df = pd.DataFrame(raw_lines, columns=["raw_log"]) for pattern, col_name in self.p2p_fields.items(): df[col_name] = df["raw_log"].str.extract(rf"{pattern}\s+(\S+)", expand=False) # Type conversion for correct sorting numeric_cols = [ "local", "send", "recv", "p2pnChannelsPerPeer", "p2pnChannels", "nChannelsMax", ] for col in numeric_cols: if col in df.columns: df[col] = pd.to_numeric(df[col], errors="coerce") # Clean up df.drop(columns=["raw_log"], inplace=True) df.drop_duplicates(inplace=True) sort_cols = ["protocol", "local", "send", "recv"] sort_cols = [c for c in sort_cols if c in df.columns] if sort_cols: df.sort_values(by=sort_cols, inplace=True) # Move 'protocol' to the first column cols = df.columns.tolist() if "protocol" in cols: cols.remove("protocol") cols.insert(0, "protocol") df = df[cols] # Fill NaNs with "-" and print print(df.fillna("-").to_string(index=False)) print()