Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tsoc
hg-misc-tools
Commits
3b294cb7
Commit
3b294cb7
authored
Mar 24, 2026
by
one
Browse files
[xcl-lens] Improve sys and env reports
parent
d5bae7a2
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
49 additions
and
37 deletions
+49
-37
projects/xcl-lens/src/xcl_lens/parser/rccl.py
projects/xcl-lens/src/xcl_lens/parser/rccl.py
+49
-37
No files found.
projects/xcl-lens/src/xcl_lens/parser/rccl.py
View file @
3b294cb7
...
@@ -62,49 +62,61 @@ class RcclLogParser:
...
@@ -62,49 +62,61 @@ class RcclLogParser:
self
.
log_entries
.
add
((
"-"
,
rank
,
content
))
self
.
log_entries
.
add
((
"-"
,
rank
,
content
))
def
_report_sys
(
self
):
def
_report_sys
(
self
):
"""Search patterns and print pre-defined strings if matched"""
sys_specs
=
[
# Pattern -> output string or as-is
(
r
"kernel version\s*:?\s*(.+)"
,
"kernel version"
,
1
,
None
),
sys_patterns
=
{
(
r
"ROCr version\s*:?\s*(.+)"
,
"ROCr version"
,
1
,
None
),
r
"kernel version"
:
None
,
(
r
"RCCL version\s*:?\s*(.+)"
,
"RCCL version"
,
1
,
None
),
r
"ROCr version"
:
None
,
(
r
"Librccl path\s*:?\s*(.+)"
,
"Librccl path"
,
1
,
None
),
r
"RCCL version"
:
None
,
(
r
'(Missing "iommu=pt".*|iommu.*)'
,
"iommu"
,
1
,
None
),
r
"Librccl path"
:
None
,
(
r
"Dmabuf feature disabled"
,
"Dmabuf"
,
None
,
"disabled"
),
r
"iommu"
:
None
,
(
r
"Disabled GDRCopy"
,
"GDRCopy"
,
None
,
"disabled"
),
r
"Dmabuf feature disabled"
:
"Dmabuf: disabled"
,
(
r
"Using network IB"
,
"NET/IB"
,
None
,
"enabled"
),
r
"Disabled GDRCopy"
:
"GDRCopy: disabled"
,
(
r
"NET/Plugin: Could not find: librccl-net.so"
,
"NET/Plugin"
,
None
,
"internal"
),
r
"Using network IB"
:
"NET/IB: enabled"
,
(
r
"XDP is disabled"
,
"XDP"
,
None
,
"disabled"
),
r
"NET/Plugin: Could not find: librccl-net.so"
:
"NET/Plugin: internal"
,
]
r
"XDP is disabled"
:
"XDP: disabled"
,
}
print
(
"===> System Information:
\n
"
)
records
=
{}
reported
=
set
()
for
host
,
rank
,
content
in
self
.
log_entries
:
for
_
,
_
,
content
in
self
.
log_entries
:
for
pattern
,
field
,
group_idx
,
literal
in
sys_specs
:
for
pattern
,
out
in
sys_patterns
.
items
():
m
=
re
.
search
(
pattern
,
content
,
re
.
IGNORECASE
)
if
re
.
search
(
pattern
,
content
,
re
.
IGNORECASE
):
if
not
m
:
reported
.
add
(
out
if
out
is
not
None
else
content
)
continue
value
=
literal
if
group_idx
is
None
else
m
.
group
(
group_idx
).
strip
()
records
.
setdefault
(
field
,
{}).
setdefault
((
host
,
rank
),
set
()).
add
(
value
)
break
break
for
line
in
sorted
(
reported
):
print
(
line
)
self
.
_print_consistency_report
(
"System Information"
,
records
)
print
()
def
_report_user_envs
(
self
):
def
_report_user_envs
(
self
):
"""Search environment variables set by user"""
print
(
"===> User-defined Environment Variables:
\n
"
)
env_vars
=
{}
pattern
=
re
.
compile
(
r
"((?:N|R)CCL_\w+)\s+set(?: by environment)? to\s+(.+)"
)
pattern
=
re
.
compile
(
r
"((?:N|R)CCL_\w+)\s+set(?: by environment)? to\s+(.+)"
)
for
_
,
_
,
content
in
self
.
log_entries
:
records
=
{}
for
host
,
rank
,
content
in
self
.
log_entries
:
m
=
pattern
.
search
(
content
)
m
=
pattern
.
search
(
content
)
if
m
:
if
not
m
:
var_name
,
var_value
=
m
.
group
(
1
),
m
.
group
(
2
)
continue
env_vars
.
setdefault
(
var_name
,
set
()).
add
(
var_value
)
var_name
,
var_value
=
m
.
group
(
1
),
m
.
group
(
2
).
strip
()
for
key
,
values
in
sorted
(
env_vars
.
items
()):
records
.
setdefault
(
var_name
,
{}).
setdefault
((
host
,
rank
),
set
()).
add
(
var_value
)
self
.
_print_consistency_report
(
"User-defined Environment Variables"
,
records
)
def
_print_consistency_report
(
self
,
title
,
records
):
print
(
f
"===>
{
title
}
:
\n
"
)
if
not
records
:
print
(
" (No data found)
\n
"
)
return
for
field
in
sorted
(
records
):
entries
=
records
[
field
]
values
=
sorted
({
value
for
field_values
in
entries
.
values
()
for
value
in
field_values
})
if
len
(
values
)
==
1
:
if
len
(
values
)
==
1
:
print
(
f
"
{
key
}
:
{
next
(
iter
(
values
))
}
"
)
print
(
f
"
{
field
}
:
{
values
[
0
]
}
"
)
else
:
continue
print
(
f
"
{
key
}
:
{
', '
.
join
(
sorted
(
values
))
}
(WARNING: Different values across ranks)"
print
(
f
"
{
field
}
: (WARNING: Different values across ranks)"
)
)
for
(
host
,
rank
),
field_values
in
sorted
(
entries
.
items
()):
joined
=
" | "
.
join
(
sorted
(
field_values
))
print
(
f
"
{
host
}
rank
{
rank
}
:
{
joined
}
"
)
print
()
print
()
def
_report_net_ib_info
(
self
):
def
_report_net_ib_info
(
self
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment