Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tsoc
hg-misc-tools
Commits
3e48fc9b
Commit
3e48fc9b
authored
Mar 07, 2026
by
one
Browse files
[xcl-lens] Change log_entries to set
parent
a95f20e8
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
10 additions
and
10 deletions
+10
-10
projects/xcl-lens/src/xcl_lens/parser/rccl.py
projects/xcl-lens/src/xcl_lens/parser/rccl.py
+10
-10
No files found.
projects/xcl-lens/src/xcl_lens/parser/rccl.py
View file @
3e48fc9b
...
@@ -5,8 +5,8 @@ import pandas as pd
...
@@ -5,8 +5,8 @@ import pandas as pd
class
RcclLogParser
:
class
RcclLogParser
:
def
__init__
(
self
,
verbose
=
False
,
hosts
=
None
,
ranks
=
None
):
def
__init__
(
self
,
verbose
=
False
,
hosts
=
None
,
ranks
=
None
):
# (host, rank, content)
-> None
#
Deduplicated set of
(host, rank, content)
tuples
self
.
log_entries
=
dic
t
()
self
.
log_entries
:
set
[
tuple
[
str
,
int
,
str
]]
=
se
t
()
# Verbosity flag used by report sections
# Verbosity flag used by report sections
self
.
_verbose
=
verbose
self
.
_verbose
=
verbose
...
@@ -94,7 +94,7 @@ class RcclLogParser:
...
@@ -94,7 +94,7 @@ class RcclLogParser:
return
return
if
self
.
_ranks
and
rank
not
in
self
.
_ranks
:
if
self
.
_ranks
and
rank
not
in
self
.
_ranks
:
return
return
self
.
log_entries
[
(
host
,
rank
,
content
)
]
=
None
self
.
log_entries
.
add
(
(
host
,
rank
,
content
)
)
return
return
# Backward-compatible fallback for logs without host/pid/tid prefix
# Backward-compatible fallback for logs without host/pid/tid prefix
...
@@ -103,13 +103,13 @@ class RcclLogParser:
...
@@ -103,13 +103,13 @@ class RcclLogParser:
rank
,
content
=
int
(
match
.
group
(
1
)),
match
.
group
(
2
)
rank
,
content
=
int
(
match
.
group
(
1
)),
match
.
group
(
2
)
if
self
.
_ranks
and
rank
not
in
self
.
_ranks
:
if
self
.
_ranks
and
rank
not
in
self
.
_ranks
:
return
return
self
.
log_entries
[
(
"-"
,
rank
,
content
)
]
=
None
self
.
log_entries
.
add
(
(
"-"
,
rank
,
content
)
)
def
_report_sys
(
self
):
def
_report_sys
(
self
):
"""Search patterns and print pre-defined strings if matched"""
"""Search patterns and print pre-defined strings if matched"""
print
(
"===> System Information:
\n
"
)
print
(
"===> System Information:
\n
"
)
reported
=
set
()
reported
=
set
()
for
(
_
,
_
,
content
),
_
in
self
.
log_entries
.
items
()
:
for
_
,
_
,
content
in
self
.
log_entries
:
for
pattern
,
out
in
self
.
sys_patterns
.
items
():
for
pattern
,
out
in
self
.
sys_patterns
.
items
():
if
re
.
search
(
pattern
,
content
,
re
.
IGNORECASE
):
if
re
.
search
(
pattern
,
content
,
re
.
IGNORECASE
):
reported
.
add
(
out
if
out
is
not
None
else
content
)
reported
.
add
(
out
if
out
is
not
None
else
content
)
...
@@ -123,7 +123,7 @@ class RcclLogParser:
...
@@ -123,7 +123,7 @@ class RcclLogParser:
print
(
"===> User-defined Environment Variables:
\n
"
)
print
(
"===> User-defined Environment Variables:
\n
"
)
env_vars
=
{}
env_vars
=
{}
pattern
=
re
.
compile
(
r
"((?:N|R)CCL_\w+)\s+set(?: by environment)? to\s+(.+)"
)
pattern
=
re
.
compile
(
r
"((?:N|R)CCL_\w+)\s+set(?: by environment)? to\s+(.+)"
)
for
(
_
,
_
,
content
),
_
in
self
.
log_entries
.
items
()
:
for
_
,
_
,
content
in
self
.
log_entries
:
m
=
pattern
.
search
(
content
)
m
=
pattern
.
search
(
content
)
if
m
:
if
m
:
var_name
,
var_value
=
m
.
group
(
1
),
m
.
group
(
2
)
var_name
,
var_value
=
m
.
group
(
1
),
m
.
group
(
2
)
...
@@ -143,7 +143,7 @@ class RcclLogParser:
...
@@ -143,7 +143,7 @@ class RcclLogParser:
ib_rows
=
[]
ib_rows
=
[]
pattern_ib
=
re
.
compile
(
r
"NET/IB\s+:\s+GPU Direct RDMA Enabled for HCA\s+(\d+)\s+'([^']+)'"
)
pattern_ib
=
re
.
compile
(
r
"NET/IB\s+:\s+GPU Direct RDMA Enabled for HCA\s+(\d+)\s+'([^']+)'"
)
for
(
host
,
rank
,
content
),
_
in
self
.
log_entries
.
items
()
:
for
host
,
rank
,
content
in
self
.
log_entries
:
m
=
pattern_ib
.
search
(
content
)
m
=
pattern_ib
.
search
(
content
)
if
m
:
if
m
:
hca_no
,
hca_id
=
m
.
groups
()
hca_no
,
hca_id
=
m
.
groups
()
...
@@ -176,7 +176,7 @@ class RcclLogParser:
...
@@ -176,7 +176,7 @@ class RcclLogParser:
r
"GPU Direct RDMA Enabled for GPU\s+(\S+)\s*/\s*"
r
"GPU Direct RDMA Enabled for GPU\s+(\S+)\s*/\s*"
r
"HCA\s+(\d+)\s*\(distance\s+([^)]*)\),\s*read\s+([01])"
r
"HCA\s+(\d+)\s*\(distance\s+([^)]*)\),\s*read\s+([01])"
)
)
for
(
host
,
rank
,
content
),
_
in
self
.
log_entries
.
items
()
:
for
host
,
rank
,
content
in
self
.
log_entries
:
m
=
pattern_gpu
.
search
(
content
)
m
=
pattern_gpu
.
search
(
content
)
if
m
:
if
m
:
gpu
,
hca_no
,
distance
,
read_flag
=
m
.
groups
()
gpu
,
hca_no
,
distance
,
read_flag
=
m
.
groups
()
...
@@ -245,7 +245,7 @@ class RcclLogParser:
...
@@ -245,7 +245,7 @@ class RcclLogParser:
print
(
f
"===>
{
title
}
:
\n
"
)
print
(
f
"===>
{
title
}
:
\n
"
)
# Filter relevant log lines using the provided filter function
# Filter relevant log lines using the provided filter function
data
=
[(
h
,
r
,
c
)
for
(
h
,
r
,
c
),
_
in
self
.
log_entries
.
items
()
if
filter_func
(
c
)]
data
=
[(
h
,
r
,
c
)
for
h
,
r
,
c
in
self
.
log_entries
if
filter_func
(
c
)]
if
not
data
:
if
not
data
:
print
(
" (No data found)
\n
"
)
print
(
" (No data found)
\n
"
)
return
return
...
@@ -370,7 +370,7 @@ class RcclLogParser:
...
@@ -370,7 +370,7 @@ class RcclLogParser:
r
"(?: \[(\w+)\])?\s+via\s+([\w/]+)"
r
"(?: \[(\w+)\])?\s+via\s+([\w/]+)"
)
)
for
(
host
,
rank
,
content
),
_
in
self
.
log_entries
.
items
()
:
for
host
,
rank
,
content
in
self
.
log_entries
:
m
=
pattern
.
search
(
content
)
m
=
pattern
.
search
(
content
)
if
m
:
if
m
:
channel
,
src
,
dst
,
type_
,
transport
=
m
.
groups
()
channel
,
src
,
dst
,
type_
,
transport
=
m
.
groups
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment