Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tsoc
hg-misc-tools
Commits
15f150c7
Commit
15f150c7
authored
Feb 27, 2026
by
one
Browse files
[hytop] Add `--wait-idle-seconds`, move `--timeout` to the root command
parent
34675024
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
46 additions
and
18 deletions
+46
-18
projects/hytop/README.md
projects/hytop/README.md
+26
-6
projects/hytop/src/hytop/gpu/cli.py
projects/hytop/src/hytop/gpu/cli.py
+8
-11
projects/hytop/src/hytop/gpu/service.py
projects/hytop/src/hytop/gpu/service.py
+3
-1
projects/hytop/src/hytop/main.py
projects/hytop/src/hytop/main.py
+9
-0
No files found.
projects/hytop/README.md
View file @
15f150c7
...
...
@@ -4,6 +4,7 @@
```
bash
uv pip
install
-e
.
hytop
--help
hytop gpu
--help
```
...
...
@@ -11,7 +12,23 @@ hytop gpu --help
-
Python >= 3.10
-
Python packages:
`rich`
,
`typer`
-
Passwordless SSH for remote monitoring
-
Passwordless SSH for remote
## `hytop`
```
bash
# Show the version number
hytop
--version
# Specify a timeout for the subcommand
hytop
--timeout
300
[
COMMAND]
# 0.5-second interval and 5-second rolling window for the subcommand
hytop
-n
0.5
--window
5
[
COMMAND]
# Specify a list of nodes for the subcommand
hytop
-H
node01,node02
[
COMMAND]
```
## `hytop gpu`
...
...
@@ -22,15 +39,18 @@ A lightweight script for live `hy-smi` polling with rolling averages across mult
Simple examples:
```
bash
# Local node, all GPUs
, 5-second rolling window
hytop
-n
1
--window
5
gpu
# Local node, all GPUs
hytop gpu
# Two nodes,
monitor only GPU 0 and 1
hytop
-H
node01,node02
-n
1
gpu
--devices
0,1
# Two nodes,
0.5-second interval
hytop
-H
node01,node02
-n
0.5
gpu
# Exit with code 0 when all monitored GPUs are available
hytop gpu
--devices
0,1
--wait-idle
# Wait for GPUs to be idle for 30 seconds before exiting
hytop gpu
--devices
0,1
--wait-idle
--wait-idle-seconds
30
# Wait at most 300s for availability (exit 124 on timeout)
hytop gpu
--devices
0,1
--wait-idle
--timeout
300
...
...
@@ -42,7 +62,7 @@ hytop gpu --showpower --showtemp
Queue jobs in shared environments:
```
bash
if
hytop
-H
node01,node02 gpu
--
wait-idle
--timeout
300
;
then
if
hytop
-H
node01,node02 gpu
--
timeout
300
--wait-idle
;
then
echo
"GPUs available, starting workload..."
# YOUR COMMAND HERE (e.g., python train.py)
else
...
...
projects/hytop/src/hytop/gpu/cli.py
View file @
15f150c7
...
...
@@ -42,7 +42,12 @@ def gpu(
wait_idle
:
bool
=
typer
.
Option
(
False
,
"--wait-idle"
,
help
=
"Exit 0 when all monitored GPUs have zero VRAM/HCU avg in the configured window."
,
help
=
"Exit 0 when all monitored GPUs have zero VRAM/HCU avg."
,
),
wait_idle_seconds
:
float
=
typer
.
Option
(
10.0
,
"--wait-idle-seconds"
,
help
=
"How long GPUs must stay idle before exiting. Effective only with --wait-idle."
,
),
showtemp
:
bool
=
typer
.
Option
(
False
,
...
...
@@ -74,11 +79,6 @@ def gpu(
callback
=
remember_show_flag_callback
,
help
=
"Display GPU utilization."
,
),
timeout
:
Optional
[
float
]
=
typer
.
Option
(
None
,
"--timeout"
,
help
=
"Max runtime in seconds. Effective only with --wait-idle."
,
),
)
->
None
:
"""GPU monitoring commands."""
...
...
@@ -90,6 +90,7 @@ def gpu(
host_list
=
ctx
.
obj
[
"hosts"
]
interval
=
ctx
.
obj
[
"interval"
]
window_value
=
ctx
.
obj
[
"window"
]
timeout_value
=
ctx
.
obj
.
get
(
"timeout"
)
selected_show_flags
=
{
"showtemp"
:
showtemp
,
"showpower"
:
showpower
,
...
...
@@ -111,11 +112,6 @@ def gpu(
parsed_device_filter
:
Optional
[
Set
[
int
]]
=
None
if
device_filter
:
parsed_device_filter
=
set
(
parse_csv_ints
(
device_filter
,
"--devices"
))
timeout_value
=
(
float
(
timeout
)
if
timeout
is
not
None
else
None
)
except
ValueError
as
exc
:
typer
.
echo
(
f
"argument error:
{
exc
}
"
,
err
=
True
)
raise
typer
.
Exit
(
code
=
2
)
from
exc
...
...
@@ -127,6 +123,7 @@ def gpu(
window
=
window_value
,
interval
=
interval
,
wait_idle
=
wait_idle
,
wait_idle_duration
=
max
(
wait_idle_seconds
,
interval
),
timeout
=
timeout_value
,
)
raise
typer
.
Exit
(
code
=
code
)
...
...
projects/hytop/src/hytop/gpu/service.py
View file @
15f150c7
...
...
@@ -262,6 +262,7 @@ def run_monitor(
interval
:
float
,
wait_idle
:
bool
,
timeout
:
Optional
[
float
],
wait_idle_duration
:
float
=
10.0
,
)
->
int
:
"""Run the asynchronous collector + periodic renderer monitor loop.
...
...
@@ -271,6 +272,7 @@ def run_monitor(
window: Rolling window length in seconds.
interval: Sampling interval in seconds.
wait_idle: Whether to exit when all monitored GPUs become idle.
wait_idle_duration: How long GPUs must stay idle before exiting.
timeout: Optional timeout for wait-idle mode.
Returns:
...
...
@@ -334,7 +336,7 @@ def run_monitor(
refresh
=
True
,
)
elapsed_since_start
=
time
.
monotonic
()
-
started
warmup_done
=
elapsed_since_start
>=
state
.
max_window
warmup_done
=
elapsed_since_start
>=
wait_idle_duration
if
(
wait_idle
and
warmup_done
...
...
projects/hytop/src/hytop/main.py
View file @
15f150c7
from
__future__
import
annotations
from
typing
import
Optional
import
typer
from
hytop
import
__version__
...
...
@@ -50,11 +52,17 @@ def root(
"--window"
,
help
=
"Single rolling window in seconds. Default: 5.0"
,
),
timeout
:
Optional
[
float
]
=
typer
.
Option
(
None
,
"--timeout"
,
help
=
"Max runtime in seconds."
,
),
)
->
None
:
"""Root callback that parses global options and stores them in context."""
try
:
host_list
=
parse_csv_strings
(
hosts
,
"--hosts"
)
window_value
=
parse_positive_float
(
str
(
window
),
"--window"
)
timeout_value
=
float
(
timeout
)
if
timeout
is
not
None
else
None
except
ValueError
as
exc
:
typer
.
echo
(
f
"argument error:
{
exc
}
"
,
err
=
True
)
raise
typer
.
Exit
(
code
=
2
)
from
exc
...
...
@@ -63,6 +71,7 @@ def root(
"hosts"
:
host_list
,
"interval"
:
interval
,
"window"
:
window_value
,
"timeout"
:
timeout_value
,
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment