Commit deed39a3 authored by one's avatar one
Browse files

[xcl-lens] Migrate to typer

parent 48e40f63
......@@ -10,7 +10,7 @@ readme = "README.md"
license = { text = "MIT" }
authors = [{ name = "alephpiece", email = "wangan.cs@gmail.com" }]
requires-python = ">=3.10"
dependencies = ["pandas>=2.0.0"]
dependencies = ["pandas>=2.0.0", "typer>=0.23"]
keywords = ["rccl", "nccl", "logging", "parser", "gpu"]
classifiers = [
"License :: OSI Approved :: MIT License",
......@@ -29,7 +29,7 @@ Repository = "https://github.com/alephpiece/hg-misc-tools"
Issues = "https://github.com/alephpiece/hg-misc-tools/issues"
[project.scripts]
xcl-lens = "xcl_lens.main:main"
xcl-lens = "xcl_lens.main:app"
[dependency-groups]
dev = ["pytest>=8", "ruff>=0.15"]
......
#!/usr/bin/env python3
import argparse
import sys
from typing import Annotated
import typer
from .runner import run_with_input
from .utils import get_mpi_rank
def create_parser():
parser = argparse.ArgumentParser(
description="RCCL Log Parser Wrapper\n\n"
app = typer.Typer(
help=(
"RCCL Log Parser Wrapper\n\n"
"Usage modes:\n"
" 1. Pipe input: cat log.txt | xcl-lens\n"
" 2. Read files: xcl-lens log1.txt log2.txt\n"
" 3. Wrap command: xcl-lens ./all_reduce_perf",
formatter_class=argparse.RawDescriptionHelpFormatter,
)
parser.add_argument("--summary", action="store_true", help="Print summary report only")
parser.add_argument("-v", "--verbose", action="store_true", help="Print verbose reports")
parser.add_argument(
"command", nargs=argparse.REMAINDER, help="Executable to run, or log files to read"
)
return parser
" 3. Wrap command: xcl-lens ./all_reduce_perf"
),
add_completion=False,
)
def main():
@app.command(
context_settings={
"allow_extra_args": True,
"ignore_unknown_options": True,
"help_option_names": ["-h", "--help"],
}
)
def main(
ctx: typer.Context,
summary: Annotated[bool, typer.Option(help="Print summary report only")] = False,
verbose: Annotated[bool, typer.Option("-v", "--verbose", help="Print verbose reports")] = False,
hosts: Annotated[str | None, typer.Option(help="Filter by hosts (comma-separated)")] = None,
ranks: Annotated[str | None, typer.Option(help="Filter by ranks (comma-separated)")] = None,
):
"""RCCL Log Parser Wrapper."""
rank = get_mpi_rank()
parser = create_parser()
args = parser.parse_args()
command: list[str] = ctx.args
try:
exit_code = run_with_input(args, rank)
hosts_list = [h.strip() for h in hosts.split(",")] if hosts else None
ranks_list = [r.strip() for r in ranks.split(",")] if ranks else None
exit_code = run_with_input(
command=command,
summary=summary,
verbose=verbose,
hosts=hosts_list,
ranks=ranks_list,
rank=rank,
)
if exit_code is not None:
sys.exit(exit_code)
except KeyboardInterrupt:
......@@ -39,9 +56,9 @@ def main():
# If we got here, no command was provided and stdin is a tty
if rank == 0:
parser.print_help()
raise typer.Exit(code=1)
sys.exit(1)
if __name__ == "__main__":
main()
app()
......@@ -4,12 +4,16 @@ import pandas as pd
class RcclLogParser:
def __init__(self):
def __init__(self, verbose=False, hosts=None, ranks=None):
# (host, rank, content) -> None
self.log_entries = dict()
# Verbosity flag used by report sections
self._verbose = False
self._verbose = verbose
# Filters
self._hosts = hosts if hosts is not None else []
self._ranks = [int(r) for r in ranks] if ranks is not None else []
# Pattern -> output string or as-is
self.sys_patterns = {
......@@ -60,13 +64,10 @@ class RcclLogParser:
def collect(self, line):
self._preprocess_line(line)
def report(self, verbose=False):
def report(self):
print(" RCCL Log Parser Report ".center(80, "="))
print()
# Remember verbosity for sub-sections
self._verbose = verbose
self._report_sys()
self._report_user_envs()
self._report_gdr_info()
......@@ -88,6 +89,10 @@ class RcclLogParser:
)
if match:
host, rank, content = match.group(1), int(match.group(2)), match.group(3)
if self._hosts and host not in self._hosts:
return
if self._ranks and rank not in self._ranks:
return
self.log_entries[(host, rank, content)] = None
return
......@@ -95,6 +100,8 @@ class RcclLogParser:
match = re.search(r"\[(\d+)\]\s+NCCL\s+(?:INFO|WARN|ERROR)\s+(.*)", line)
if match:
rank, content = int(match.group(1)), match.group(2)
if self._ranks and rank not in self._ranks:
return
self.log_entries[("-", rank, content)] = None
def _report_sys(self):
......@@ -156,9 +163,6 @@ class RcclLogParser:
df_ib.drop_duplicates(inplace=True)
df_ib.sort_values(by=["host", "rank", "hca_no", "hca_id"], inplace=True)
df_ib = df_ib[["host", "rank", "hca_no", "hca_id", "gdr"]]
if not self._verbose:
df_ib = df_ib.drop(columns=["host", "rank"])
df_ib.drop_duplicates(inplace=True)
print(df_ib.to_string(index=False))
print()
else:
......@@ -203,9 +207,6 @@ class RcclLogParser:
inplace=True,
)
df_gpu = df_gpu[["host", "rank", "gpu", "hca_no", "distance", "max_distance", "r/w"]]
if not self._verbose:
df_gpu = df_gpu.drop(columns=["host", "rank"])
df_gpu.drop_duplicates(inplace=True)
print(df_gpu.to_string(index=False))
print()
else:
......
......@@ -5,56 +5,86 @@ import sys
from .parser import RcclLogParser
def run_with_input(args, rank):
def run_with_input(
*,
command: list[str],
summary: bool,
verbose: bool,
hosts: list[str] | None,
ranks: list[str] | None,
rank: int,
):
"""Handle all three input modes: stdin, files, or command execution"""
log_prefix = f"[Rank {rank}]"
cmd = args.command
# Case 1: No command provided - check for stdin
if not cmd:
if not command:
if not sys.stdin.isatty():
return _process_stdin(args, rank)
return _process_stdin(
summary=summary, verbose=verbose, hosts=hosts, ranks=ranks, rank=rank
)
else:
return None
# Case 2: Check if first argument is an existing file (treat as log file)
if os.path.isfile(cmd[0]):
return _process_files(args, rank, log_prefix, cmd)
if os.path.isfile(command[0]):
return _process_files(
summary=summary,
verbose=verbose,
hosts=hosts,
ranks=ranks,
rank=rank,
log_prefix=log_prefix,
filenames=command,
)
# Case 3: Execute as command
return _execute_command(args, rank, log_prefix, cmd)
return _execute_command(
summary=summary, verbose=verbose, rank=rank, log_prefix=log_prefix, cmd=command
)
def _process_stdin(args, rank):
rccl_parser = RcclLogParser()
def _process_stdin(
*, summary: bool, verbose: bool, hosts: list[str] | None, ranks: list[str] | None, rank: int
):
rccl_parser = RcclLogParser(verbose=verbose, hosts=hosts, ranks=ranks)
for line in sys.stdin:
if not args.summary:
if not summary:
print(f"{line}", end="", flush=True)
rccl_parser.collect(line)
if rank == 0:
rccl_parser.report(verbose=args.verbose)
rccl_parser.report()
return 0
def _process_files(args, rank, log_prefix, filenames):
rccl_parser = RcclLogParser()
def _process_files(
*,
summary: bool,
verbose: bool,
hosts: list[str] | None,
ranks: list[str] | None,
rank: int,
log_prefix: str,
filenames: list[str],
):
rccl_parser = RcclLogParser(verbose=verbose, hosts=hosts, ranks=ranks)
for filename in filenames:
if not os.path.isfile(filename):
print(f"{log_prefix} Error: File not found: {filename}")
return 1
with open(filename, encoding="utf-8", errors="replace") as f:
for line in f:
if not args.summary:
if not summary:
print(f"{line}", end="", flush=True)
rccl_parser.collect(line)
if rank == 0:
rccl_parser.report(verbose=args.verbose)
rccl_parser.report()
return 0
def _execute_command(args, rank, log_prefix, cmd):
def _execute_command(*, summary: bool, verbose: bool, rank: int, log_prefix: str, cmd: list[str]):
env = os.environ.copy()
env["NCCL_DEBUG"] = "INFO"
env["NCCL_DEBUG_SUBSYS"] = "ALL"
......@@ -71,14 +101,15 @@ def _execute_command(args, rank, log_prefix, cmd):
bufsize=1,
)
if process.stdout is not None:
for line in process.stdout:
if not args.summary:
if not summary:
print(f"{line}", end="", flush=True)
parser.collect(line)
process.wait()
if rank == 0:
parser.report(verbose=args.verbose)
parser.report(verbose=verbose)
return process.returncode
version = 1
revision = 3
revision = 2
requires-python = ">=3.10"
resolution-markers = [
"python_full_version >= '3.14' and sys_platform == 'win32'",
......@@ -11,6 +11,27 @@ resolution-markers = [
"python_full_version < '3.11'",
]
[[package]]
name = "annotated-doc"
version = "0.0.4"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/57/ba/046ceea27344560984e26a590f90bc7f4a75b06701f653222458922b558c/annotated_doc-0.0.4.tar.gz", hash = "sha256:fbcda96e87e9c92ad167c2e53839e57503ecfda18804ea28102353485033faa4", size = 7288, upload-time = "2025-11-10T22:07:42.062Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/1e/d3/26bf1008eb3d2daa8ef4cacc7f3bfdc11818d111f7e2d0201bc6e3b49d45/annotated_doc-0.0.4-py3-none-any.whl", hash = "sha256:571ac1dc6991c450b25a9c2d84a3705e2ae7a53467b5d111c24fa8baabbed320", size = 5303, upload-time = "2025-11-10T22:07:40.673Z" },
]
[[package]]
name = "click"
version = "8.3.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "colorama", marker = "sys_platform == 'win32'" },
]
sdist = { url = "https://files.pythonhosted.org/packages/3d/fa/656b739db8587d7b5dfa22e22ed02566950fbfbcdc20311993483657a5c0/click-8.3.1.tar.gz", hash = "sha256:12ff4785d337a1bb490bb7e9c2b1ee5da3112e94a8622f26a6c77f5d2fc6842a", size = 295065, upload-time = "2025-11-15T20:45:42.706Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/98/78/01c019cdb5d6498122777c1a43056ebb3ebfeef2076d9d026bfe15583b2b/click-8.3.1-py3-none-any.whl", hash = "sha256:981153a64e25f12d547d3426c367a4857371575ee7ad18df2a6183ab0545b2a6", size = 108274, upload-time = "2025-11-15T20:45:41.139Z" },
]
[[package]]
name = "colorama"
version = "0.4.6"
......@@ -41,6 +62,27 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" },
]
[[package]]
name = "markdown-it-py"
version = "4.0.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "mdurl" },
]
sdist = { url = "https://files.pythonhosted.org/packages/5b/f5/4ec618ed16cc4f8fb3b701563655a69816155e79e24a17b651541804721d/markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3", size = 73070, upload-time = "2025-08-11T12:57:52.854Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147", size = 87321, upload-time = "2025-08-11T12:57:51.923Z" },
]
[[package]]
name = "mdurl"
version = "0.1.2"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload-time = "2022-08-14T12:40:10.846Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" },
]
[[package]]
name = "numpy"
version = "2.2.6"
......@@ -391,6 +433,19 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/10/99/781fe0c827be2742bcc775efefccb3b048a3a9c6ce9aec0cbf4a101677e5/pytz-2026.1.post1-py2.py3-none-any.whl", hash = "sha256:f2fd16142fda348286a75e1a524be810bb05d444e5a081f37f7affc635035f7a", size = 510489, upload-time = "2026-03-03T07:47:49.167Z" },
]
[[package]]
name = "rich"
version = "14.3.3"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "markdown-it-py" },
{ name = "pygments" },
]
sdist = { url = "https://files.pythonhosted.org/packages/b3/c6/f3b320c27991c46f43ee9d856302c70dc2d0fb2dba4842ff739d5f46b393/rich-14.3.3.tar.gz", hash = "sha256:b8daa0b9e4eef54dd8cf7c86c03713f53241884e814f4e2f5fb342fe520f639b", size = 230582, upload-time = "2026-02-19T17:23:12.474Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/14/25/b208c5683343959b670dc001595f2f3737e051da617f66c31f7c4fa93abc/rich-14.3.3-py3-none-any.whl", hash = "sha256:793431c1f8619afa7d3b52b2cdec859562b950ea0d4b6b505397612db8d5362d", size = 310458, upload-time = "2026-02-19T17:23:13.732Z" },
]
[[package]]
name = "ruff"
version = "0.15.4"
......@@ -416,6 +471,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/3e/0a/9e1be9035b37448ce2e68c978f0591da94389ade5a5abafa4cf99985d1b2/ruff-0.15.4-py3-none-win_arm64.whl", hash = "sha256:60d5177e8cfc70e51b9c5fad936c634872a74209f934c1e79107d11787ad5453", size = 10966776, upload-time = "2026-02-26T20:03:56.908Z" },
]
[[package]]
name = "shellingham"
version = "1.5.4"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/58/15/8b3609fd3830ef7b27b655beb4b4e9c62313a4e8da8c676e142cc210d58e/shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de", size = 10310, upload-time = "2023-10-24T04:13:40.426Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" },
]
[[package]]
name = "six"
version = "1.17.0"
......@@ -479,6 +543,21 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/23/d1/136eb2cb77520a31e1f64cbae9d33ec6df0d78bdf4160398e86eec8a8754/tomli-2.4.0-py3-none-any.whl", hash = "sha256:1f776e7d669ebceb01dee46484485f43a4048746235e683bcdffacdf1fb4785a", size = 14477, upload-time = "2026-01-11T11:22:37.446Z" },
]
[[package]]
name = "typer"
version = "0.24.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "annotated-doc" },
{ name = "click" },
{ name = "rich" },
{ name = "shellingham" },
]
sdist = { url = "https://files.pythonhosted.org/packages/f5/24/cb09efec5cc954f7f9b930bf8279447d24618bb6758d4f6adf2574c41780/typer-0.24.1.tar.gz", hash = "sha256:e39b4732d65fbdcde189ae76cf7cd48aeae72919dea1fdfc16593be016256b45", size = 118613, upload-time = "2026-02-21T16:54:40.609Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/4a/91/48db081e7a63bb37284f9fbcefda7c44c277b18b0e13fbc36ea2335b71e6/typer-0.24.1-py3-none-any.whl", hash = "sha256:112c1f0ce578bfb4cab9ffdabc68f031416ebcc216536611ba21f04e9aa84c9e", size = 56085, upload-time = "2026-02-21T16:54:41.616Z" },
]
[[package]]
name = "typing-extensions"
version = "4.15.0"
......@@ -503,6 +582,7 @@ source = { editable = "." }
dependencies = [
{ name = "pandas", version = "2.3.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
{ name = "pandas", version = "3.0.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
{ name = "typer" },
]
[package.dev-dependencies]
......@@ -512,7 +592,10 @@ dev = [
]
[package.metadata]
requires-dist = [{ name = "pandas", specifier = ">=2.0.0" }]
requires-dist = [
{ name = "pandas", specifier = ">=2.0.0" },
{ name = "typer", specifier = ">=0.23" },
]
[package.metadata.requires-dev]
dev = [
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment