Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tsoc
hg-misc-tools
Commits
df9bba54
Commit
df9bba54
authored
Mar 05, 2026
by
one
Browse files
[xcl-lens] Add channel transport info
parent
9f243032
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
52 additions
and
8 deletions
+52
-8
projects/xcl-lens/src/xcl_lens/main.py
projects/xcl-lens/src/xcl_lens/main.py
+4
-4
projects/xcl-lens/src/xcl_lens/parser/rccl.py
projects/xcl-lens/src/xcl_lens/parser/rccl.py
+48
-4
No files found.
projects/xcl-lens/src/xcl_lens/main.py
View file @
df9bba54
...
@@ -34,15 +34,15 @@ def main():
...
@@ -34,15 +34,15 @@ def main():
# Parse command line arguments
# Parse command line arguments
parser
=
argparse
.
ArgumentParser
(
description
=
"RCCL Log Parser Wrapper"
)
parser
=
argparse
.
ArgumentParser
(
description
=
"RCCL Log Parser Wrapper"
)
parser
.
add_argument
(
parser
.
add_argument
(
"-
v"
,
"--verbose
"
,
action
=
"store_true"
,
help
=
"Print raw log lines in addition to the report"
"-
-raw
"
,
action
=
"store_true"
,
help
=
"Print raw log lines in addition to the report"
)
)
parser
.
add_argument
(
"-v"
,
"--verbose"
,
action
=
"store_true"
,
help
=
"Print verbose reports"
)
parser
.
add_argument
(
parser
.
add_argument
(
"command"
,
nargs
=
argparse
.
REMAINDER
,
help
=
"The executable and arguments to run"
"command"
,
nargs
=
argparse
.
REMAINDER
,
help
=
"The executable and arguments to run"
)
)
args
=
parser
.
parse_args
()
args
=
parser
.
parse_args
()
verbose
=
args
.
verbose
cmd
=
args
.
command
cmd
=
args
.
command
# Check if command is provided
# Check if command is provided
...
@@ -72,14 +72,14 @@ def main():
...
@@ -72,14 +72,14 @@ def main():
# Collect all output lines
# Collect all output lines
for
line
in
process
.
stdout
:
for
line
in
process
.
stdout
:
if
verbose
:
if
args
.
raw
:
print
(
f
"
{
line
}
"
,
end
=
""
,
flush
=
True
)
print
(
f
"
{
line
}
"
,
end
=
""
,
flush
=
True
)
parser
.
collect
(
line
)
parser
.
collect
(
line
)
process
.
wait
()
process
.
wait
()
if
rank
==
0
:
if
rank
==
0
:
parser
.
report
()
parser
.
report
(
verbose
=
args
.
verbose
)
sys
.
exit
(
process
.
returncode
)
sys
.
exit
(
process
.
returncode
)
except
KeyboardInterrupt
:
except
KeyboardInterrupt
:
...
...
projects/xcl-lens/src/xcl_lens/parser/rccl.py
View file @
df9bba54
...
@@ -17,6 +17,8 @@ class RcclLogParser:
...
@@ -17,6 +17,8 @@ class RcclLogParser:
r
"iommu"
:
None
,
r
"iommu"
:
None
,
r
"Dmabuf feature disabled"
:
"Dmabuf: disabled"
,
r
"Dmabuf feature disabled"
:
"Dmabuf: disabled"
,
r
"Disabled GDRCopy"
:
"GDRCopy: disabled"
,
r
"Disabled GDRCopy"
:
"GDRCopy: disabled"
,
r
"Using network IB"
:
"NET/IB: enabled"
,
r
"NET/Plugin: Could not find: librccl-net.so"
:
"NET/Plugin: internal"
,
}
}
# Pattern -> column with strict validation
# Pattern -> column with strict validation
...
@@ -55,13 +57,14 @@ class RcclLogParser:
...
@@ -55,13 +57,14 @@ class RcclLogParser:
def
collect
(
self
,
line
):
def
collect
(
self
,
line
):
self
.
_preprocess_line
(
line
)
self
.
_preprocess_line
(
line
)
def
report
(
self
):
def
report
(
self
,
verbose
=
False
):
print
(
" RCCL Log Parser Report "
.
center
(
80
,
"="
))
print
(
" RCCL Log Parser Report "
.
center
(
80
,
"="
))
print
()
print
()
self
.
_report_sys
()
self
.
_report_sys
()
self
.
_report_user_envs
()
self
.
_report_user_envs
()
self
.
_report_graph_info
()
self
.
_report_graph_info
()
self
.
_report_channel_transport_info
(
verbose
)
self
.
_report_cl_transfers
()
self
.
_report_cl_transfers
()
self
.
_report_p2p_transfers
()
self
.
_report_p2p_transfers
()
...
@@ -74,8 +77,7 @@ class RcclLogParser:
...
@@ -74,8 +77,7 @@ class RcclLogParser:
match
=
re
.
search
(
r
"\[(\d+)\]\s+NCCL\s+(?:INFO|WARN|ERROR)\s+(.*)"
,
line
)
match
=
re
.
search
(
r
"\[(\d+)\]\s+NCCL\s+(?:INFO|WARN|ERROR)\s+(.*)"
,
line
)
if
match
:
if
match
:
rank
,
content
=
int
(
match
.
group
(
1
)),
match
.
group
(
2
)
rank
,
content
=
int
(
match
.
group
(
1
)),
match
.
group
(
2
)
if
len
(
content
)
>=
20
:
self
.
log_entries
[(
rank
,
content
)]
=
None
self
.
log_entries
[(
rank
,
content
)]
=
None
def
_report_sys
(
self
):
def
_report_sys
(
self
):
"""Search patterns and print pre-defined strings if matched"""
"""Search patterns and print pre-defined strings if matched"""
...
@@ -94,7 +96,7 @@ class RcclLogParser:
...
@@ -94,7 +96,7 @@ class RcclLogParser:
"""Search environment variables set by user"""
"""Search environment variables set by user"""
print
(
"===> User-defined Environment Variables:
\n
"
)
print
(
"===> User-defined Environment Variables:
\n
"
)
env_vars
=
{}
env_vars
=
{}
pattern
=
re
.
compile
(
r
"(\w+)\s+set by environment to\s+(.+)"
)
pattern
=
re
.
compile
(
r
"(
(?:N|R)CCL_
\w+)\s+set
(?:
by environment
)?
to\s+(.+)"
)
for
(
_
,
content
),
_
in
self
.
log_entries
.
items
():
for
(
_
,
content
),
_
in
self
.
log_entries
.
items
():
m
=
pattern
.
search
(
content
)
m
=
pattern
.
search
(
content
)
if
m
:
if
m
:
...
@@ -220,6 +222,48 @@ class RcclLogParser:
...
@@ -220,6 +222,48 @@ class RcclLogParser:
sort_cols
=
[
"rank"
,
"Pattern"
],
sort_cols
=
[
"rank"
,
"Pattern"
],
)
)
def
_report_channel_transport_info
(
self
,
verbose
=
False
):
print
(
"===> Channel Transport Info:
\n
"
)
data
=
[]
# Match pattern: Channel 00/0 : 2[5d000] -> 1[56000] [send] via NET/IB/6/GDRDMA
# Group 1: channel (e.g., 00/0)
# Group 2: src (e.g., 2)
# Group 3: dst (e.g., 1)
# Group 4: type (e.g., send or receive, optional)
# Group 5: transport (e.g., P2P/IPC, NET/IB/6/GDRDMA)
pattern
=
re
.
compile
(
r
"Channel\s+(\d+/\d+)\s+:\s+(\d+)\[.*?\]\s+->\s+(\d+)\[.*?\]"
r
"(?: \[(\w+)\])?\s+via\s+([\w/]+)"
)
for
(
rank
,
content
),
_
in
self
.
log_entries
.
items
():
m
=
pattern
.
search
(
content
)
if
m
:
channel
,
src
,
dst
,
type_
,
transport
=
m
.
groups
()
data
.
append
(
{
"rank"
:
rank
,
"channel"
:
channel
,
"sender"
:
int
(
src
),
"receiver"
:
int
(
dst
),
"type"
:
type_
if
type_
else
"-"
,
"transport"
:
transport
,
}
)
if
not
data
:
print
(
" (No data found)
\n
"
)
return
df
=
pd
.
DataFrame
(
data
)
df
.
sort_values
(
by
=
[
"rank"
,
"channel"
,
"sender"
,
"receiver"
],
inplace
=
True
)
if
not
verbose
:
df
=
df
.
drop
(
columns
=
[
"channel"
,
"sender"
,
"receiver"
])
df
.
drop_duplicates
(
inplace
=
True
)
print
(
df
.
to_string
(
index
=
False
))
print
()
def
_report_cl_transfers
(
self
):
def
_report_cl_transfers
(
self
):
self
.
_extract_and_print
(
self
.
_extract_and_print
(
title
=
"Unique Ring/Tree Transfers"
,
title
=
"Unique Ring/Tree Transfers"
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment