Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ycai
simbricks
Commits
1efbda52
Commit
1efbda52
authored
Sep 09, 2022
by
Jonas Kaufmann
Committed by
Antoine Kaufmann
Sep 17, 2022
Browse files
implement Ctrl+C handling for SlurmRuntime
parent
574d4ccb
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
27 additions
and
4 deletions
+27
-4
experiments/simbricks/runtime/common.py
experiments/simbricks/runtime/common.py
+2
-0
experiments/simbricks/runtime/slurm.py
experiments/simbricks/runtime/slurm.py
+25
-4
No files found.
experiments/simbricks/runtime/common.py
View file @
1efbda52
...
...
@@ -51,6 +51,8 @@ class Run(object):
self
.
outpath
=
outpath
self
.
output
:
tp
.
Optional
[
ExpOutput
]
=
None
self
.
prereq
=
prereq
self
.
job_id
:
tp
.
Optional
[
int
]
=
None
"""Slurm job id."""
def
name
(
self
):
return
self
.
experiment
.
name
+
'.'
+
str
(
self
.
index
)
...
...
experiments/simbricks/runtime/slurm.py
View file @
1efbda52
...
...
@@ -20,10 +20,12 @@
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
import
asyncio
import
os
import
pathlib
import
pickle
import
re
import
typing
as
tp
from
simbricks.runtime.common
import
Run
,
Runtime
...
...
@@ -32,12 +34,14 @@ class SlurmRuntime(Runtime):
def
__init__
(
self
,
slurmdir
,
args
,
verbose
=
False
,
cleanup
=
True
):
super
().
__init__
()
self
.
runnable
=
[]
self
.
runnable
:
tp
.
List
[
Run
]
=
[]
self
.
slurmdir
=
slurmdir
self
.
args
=
args
self
.
verbose
=
verbose
self
.
cleanup
=
cleanup
self
.
_start_task
:
asyncio
.
Task
def
add_run
(
self
,
run
:
Run
):
self
.
runnable
.
append
(
run
)
...
...
@@ -88,7 +92,7 @@ class SlurmRuntime(Runtime):
return
exp_script
async
def
start
(
self
):
async
def
_do_
start
(
self
):
pathlib
.
Path
(
self
.
slurmdir
).
mkdir
(
parents
=
True
,
exist_ok
=
True
)
jid_re
=
re
.
compile
(
r
'Submitted batch job ([0-9]+)'
)
...
...
@@ -111,6 +115,23 @@ class SlurmRuntime(Runtime):
m
=
jid_re
.
search
(
output
)
run
.
job_id
=
int
(
m
.
group
(
1
))
async
def
start
(
self
):
self
.
_start_task
=
asyncio
.
create_task
(
self
.
_do_start
())
try
:
await
self
.
_start_task
except
asyncio
.
CancelledError
:
# stop all runs that have already been scheduled
# (existing slurm job id)
job_ids
=
[]
for
run
in
self
.
runnable
:
if
run
.
job_id
:
job_ids
.
append
(
str
(
run
.
job_id
))
scancel_process
=
await
asyncio
.
create_subprocess_shell
(
f
"scancel
{
' '
.
join
(
job_ids
)
}
"
)
await
scancel_process
.
wait
()
def
interrupt
(
self
):
return
super
().
interrupt
()
# TODO implement this
super
().
interrupt
()
self
.
_start_task
.
cancel
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment