Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tsoc
hg-misc-tools
Commits
35b309ba
Commit
35b309ba
authored
Mar 05, 2026
by
one
Browse files
[xcl-lens] Refactor main.py
parent
6e26d46d
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
119 additions
and
102 deletions
+119
-102
projects/xcl-lens/src/xcl_lens/main.py
projects/xcl-lens/src/xcl_lens/main.py
+16
-102
projects/xcl-lens/src/xcl_lens/runner.py
projects/xcl-lens/src/xcl_lens/runner.py
+84
-0
projects/xcl-lens/src/xcl_lens/utils.py
projects/xcl-lens/src/xcl_lens/utils.py
+19
-0
No files found.
projects/xcl-lens/src/xcl_lens/main.py
View file @
35b309ba
#!/usr/bin/env python3
import
argparse
import
os
import
subprocess
import
sys
from
.parser
import
RcclLogParser
from
.runner
import
run_with_input
from
.utils
import
get_mpi_rank
def
get_mpi_rank
():
"""
Try to get Rank ID from common environment variables.
If not found, return "0".
"""
# Common MPI Rank environment variables
rank_vars
=
[
"OMPI_COMM_WORLD_RANK"
,
# OpenMPI
"PMI_RANK"
,
# MPICH / MVAPICH
"SLURM_PROCID"
,
# Slurm
"RANK"
,
# General / Torch
]
for
var
in
rank_vars
:
if
var
in
os
.
environ
:
return
int
(
os
.
environ
[
var
])
return
0
def
main
():
rank
=
get_mpi_rank
()
log_prefix
=
f
"[Rank
{
rank
}
]"
# Parse command line arguments
def
create_parser
():
parser
=
argparse
.
ArgumentParser
(
description
=
"RCCL Log Parser Wrapper
\n\n
"
"Usage modes:
\n
"
...
...
@@ -45,88 +21,26 @@ def main():
parser
.
add_argument
(
"command"
,
nargs
=
argparse
.
REMAINDER
,
help
=
"Executable to run, or log files to read"
)
return
parser
args
=
parser
.
parse_args
()
cmd
=
args
.
command
# Case 1: No command provided - check for stdin
if
not
cmd
:
if
not
sys
.
stdin
.
isatty
():
try
:
rccl_parser
=
RcclLogParser
()
for
line
in
sys
.
stdin
:
if
not
args
.
summary
:
print
(
f
"
{
line
}
"
,
end
=
""
,
flush
=
True
)
rccl_parser
.
collect
(
line
)
if
rank
==
0
:
rccl_parser
.
report
(
verbose
=
args
.
verbose
)
sys
.
exit
(
0
)
except
KeyboardInterrupt
:
sys
.
exit
(
130
)
else
:
if
rank
==
0
:
parser
.
print_help
()
sys
.
exit
(
1
)
# Case 2: Check if first argument is an existing file (treat as log file)
if
os
.
path
.
isfile
(
cmd
[
0
]):
try
:
rccl_parser
=
RcclLogParser
()
for
filename
in
cmd
:
if
not
os
.
path
.
isfile
(
filename
):
print
(
f
"
{
log_prefix
}
Error: File not found:
{
filename
}
"
)
sys
.
exit
(
1
)
with
open
(
filename
,
encoding
=
"utf-8"
,
errors
=
"replace"
)
as
f
:
for
line
in
f
:
if
not
args
.
summary
:
print
(
f
"
{
line
}
"
,
end
=
""
,
flush
=
True
)
rccl_parser
.
collect
(
line
)
if
rank
==
0
:
rccl_parser
.
report
(
verbose
=
args
.
verbose
)
sys
.
exit
(
0
)
except
KeyboardInterrupt
:
sys
.
exit
(
130
)
# Get the environment variables
env
=
os
.
environ
.
copy
()
# Inject RCCL environment variables
env
[
"NCCL_DEBUG"
]
=
"INFO"
env
[
"NCCL_DEBUG_SUBSYS"
]
=
"ALL"
def
main
():
rank
=
get_mpi_rank
()
print
(
f
"
{
log_prefix
}
[Wrapper] Running command:
{
' '
.
join
(
cmd
)
}
"
)
parser
=
create_parser
()
args
=
parser
.
parse_args
()
try
:
parser
=
RcclLogParser
()
process
=
subprocess
.
Popen
(
cmd
,
env
=
env
,
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
STDOUT
,
text
=
True
,
bufsize
=
1
,
)
# Collect all output lines
for
line
in
process
.
stdout
:
if
not
args
.
summary
:
print
(
f
"
{
line
}
"
,
end
=
""
,
flush
=
True
)
parser
.
collect
(
line
)
process
.
wait
()
if
rank
==
0
:
parser
.
report
(
verbose
=
args
.
verbose
)
sys
.
exit
(
process
.
returncode
)
exit_code
=
run_with_input
(
args
,
rank
)
if
exit_code
is
not
None
:
sys
.
exit
(
exit_code
)
except
KeyboardInterrupt
:
sys
.
exit
(
130
)
except
FileNotFoundError
:
print
(
f
"
{
log_prefix
}
Error: Command not found:
{
cmd
[
0
]
}
"
)
sys
.
exit
(
1
)
# If we got here, no command was provided and stdin is a tty
if
rank
==
0
:
parser
.
print_help
()
sys
.
exit
(
1
)
if
__name__
==
"__main__"
:
...
...
projects/xcl-lens/src/xcl_lens/runner.py
0 → 100644
View file @
35b309ba
import
os
import
subprocess
import
sys
from
.parser
import
RcclLogParser
def
run_with_input
(
args
,
rank
):
"""Handle all three input modes: stdin, files, or command execution"""
log_prefix
=
f
"[Rank
{
rank
}
]"
cmd
=
args
.
command
# Case 1: No command provided - check for stdin
if
not
cmd
:
if
not
sys
.
stdin
.
isatty
():
return
_process_stdin
(
args
,
rank
)
else
:
return
None
# Case 2: Check if first argument is an existing file (treat as log file)
if
os
.
path
.
isfile
(
cmd
[
0
]):
return
_process_files
(
args
,
rank
,
log_prefix
,
cmd
)
# Case 3: Execute as command
return
_execute_command
(
args
,
rank
,
log_prefix
,
cmd
)
def
_process_stdin
(
args
,
rank
):
rccl_parser
=
RcclLogParser
()
for
line
in
sys
.
stdin
:
if
not
args
.
summary
:
print
(
f
"
{
line
}
"
,
end
=
""
,
flush
=
True
)
rccl_parser
.
collect
(
line
)
if
rank
==
0
:
rccl_parser
.
report
(
verbose
=
args
.
verbose
)
return
0
def
_process_files
(
args
,
rank
,
log_prefix
,
filenames
):
rccl_parser
=
RcclLogParser
()
for
filename
in
filenames
:
if
not
os
.
path
.
isfile
(
filename
):
print
(
f
"
{
log_prefix
}
Error: File not found:
{
filename
}
"
)
return
1
with
open
(
filename
,
encoding
=
"utf-8"
,
errors
=
"replace"
)
as
f
:
for
line
in
f
:
if
not
args
.
summary
:
print
(
f
"
{
line
}
"
,
end
=
""
,
flush
=
True
)
rccl_parser
.
collect
(
line
)
if
rank
==
0
:
rccl_parser
.
report
(
verbose
=
args
.
verbose
)
return
0
def
_execute_command
(
args
,
rank
,
log_prefix
,
cmd
):
env
=
os
.
environ
.
copy
()
env
[
"NCCL_DEBUG"
]
=
"INFO"
env
[
"NCCL_DEBUG_SUBSYS"
]
=
"ALL"
print
(
f
"
{
log_prefix
}
[Wrapper] Running command:
{
' '
.
join
(
cmd
)
}
"
)
parser
=
RcclLogParser
()
process
=
subprocess
.
Popen
(
cmd
,
env
=
env
,
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
STDOUT
,
text
=
True
,
bufsize
=
1
,
)
for
line
in
process
.
stdout
:
if
not
args
.
summary
:
print
(
f
"
{
line
}
"
,
end
=
""
,
flush
=
True
)
parser
.
collect
(
line
)
process
.
wait
()
if
rank
==
0
:
parser
.
report
(
verbose
=
args
.
verbose
)
return
process
.
returncode
projects/xcl-lens/src/xcl_lens/utils.py
0 → 100644
View file @
35b309ba
import
os
def
get_mpi_rank
():
"""
Try to get Rank ID from common environment variables.
If not found, return "0".
"""
rank_vars
=
[
"OMPI_COMM_WORLD_RANK"
,
"PMI_RANK"
,
"SLURM_PROCID"
,
"RANK"
,
]
for
var
in
rank_vars
:
if
var
in
os
.
environ
:
return
int
(
os
.
environ
[
var
])
return
0
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment