Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ycai
simbricks
Commits
f012bd04
Commit
f012bd04
authored
Nov 04, 2020
by
Antoine Kaufmann
Browse files
experiments: add slurm runtime
parent
24ee28c3
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
75 additions
and
5 deletions
+75
-5
experiments/modes/runtime.py
experiments/modes/runtime.py
+57
-1
experiments/run.py
experiments/run.py
+18
-4
No files found.
experiments/modes/runtime.py
View file @
f012bd04
import
asyncio
import
asyncio
import
pickle
import
os
import
pathlib
import
modes.experiments
as
exp
import
modes.experiments
as
exp
...
@@ -11,7 +14,7 @@ class Run(object):
...
@@ -11,7 +14,7 @@ class Run(object):
self
.
output
=
None
self
.
output
=
None
def
name
(
self
):
def
name
(
self
):
return
self
.
experiment
.
name
+
'
[
'
+
str
(
self
.
index
)
+
']'
return
self
.
experiment
.
name
+
'
.
'
+
str
(
self
.
index
)
class
Runtime
(
object
):
class
Runtime
(
object
):
def
add_run
(
self
,
run
):
def
add_run
(
self
,
run
):
...
@@ -120,3 +123,56 @@ class LocalParallelRuntime(Runtime):
...
@@ -120,3 +123,56 @@ class LocalParallelRuntime(Runtime):
def
start
(
self
):
def
start
(
self
):
asyncio
.
run
(
self
.
do_start
())
asyncio
.
run
(
self
.
do_start
())
class
SlurmRuntime
(
Runtime
):
def
__init__
(
self
,
slurmdir
,
args
,
verbose
=
False
,
cleanup
=
True
):
self
.
runnable
=
[]
self
.
slurmdir
=
slurmdir
self
.
args
=
args
self
.
verbose
=
verbose
self
.
cleanup
=
cleanup
def
add_run
(
self
,
run
):
self
.
runnable
.
append
(
run
)
def
prep_run
(
self
,
run
):
exp
=
run
.
experiment
exp_path
=
'%s/%s-%d.exp'
%
(
self
.
slurmdir
,
exp
.
name
,
run
.
index
)
exp_log
=
'%s/%s-%d.log'
%
(
self
.
slurmdir
,
exp
.
name
,
run
.
index
)
exp_script
=
'%s/%s-%d.sh'
%
(
self
.
slurmdir
,
exp
.
name
,
run
.
index
)
# write out pickled experiment
with
open
(
exp_path
,
'wb'
)
as
f
:
pickle
.
dump
(
exp
,
f
)
# create slurm batch script
with
open
(
exp_script
,
'w'
)
as
f
:
f
.
write
(
'#!/bin/sh
\n
'
)
f
.
write
(
'#SBATCH -o %s -e %s
\n
'
%
(
exp_log
,
exp_log
))
f
.
write
(
'#SBATCH -c %d
\n
'
%
(
exp
.
resreq_cores
(),))
f
.
write
(
'#SBATCH --mem=%dM
\n
'
%
(
exp
.
resreq_mem
(),))
f
.
write
(
'#SBATCH --job-name="%s"
\n
'
%
(
run
.
name
(),))
if
exp
.
timeout
is
not
None
:
h
=
int
(
exp
.
timeout
/
3600
)
m
=
int
((
exp
.
timeout
%
3600
)
/
60
)
s
=
int
(
exp
.
timeout
%
60
)
f
.
write
(
'#SBATCH --time=%02d:%02d:%02d
\n
'
%
(
h
,
m
,
s
))
f
.
write
(
'mkdir -p %s
\n
'
%
(
self
.
args
.
workdir
))
f
.
write
((
'python3 run.py --repo=%s --workdir=%s --outdir=%s '
'--firstrun=%d --runs=1 %s
\n
'
)
%
(
self
.
args
.
repo
,
self
.
args
.
workdir
,
self
.
args
.
outdir
,
run
.
index
,
exp_path
))
f
.
write
(
'status=$?
\n
'
)
if
self
.
cleanup
:
f
.
write
(
'rm -rf %s
\n
'
%
(
run
.
env
.
workdir
))
f
.
write
(
'exit $status
\n
'
)
return
exp_script
def
start
(
self
):
pathlib
.
Path
(
self
.
slurmdir
).
mkdir
(
parents
=
True
,
exist_ok
=
True
)
for
run
in
self
.
runnable
:
script
=
self
.
prep_run
(
run
)
os
.
system
(
'sbatch '
+
script
)
experiments/run.py
View file @
f012bd04
...
@@ -16,6 +16,8 @@ parser.add_argument('experiments', metavar='EXP', type=str, nargs='+',
...
@@ -16,6 +16,8 @@ parser.add_argument('experiments', metavar='EXP', type=str, nargs='+',
help
=
'An experiment file to run'
)
help
=
'An experiment file to run'
)
parser
.
add_argument
(
'--runs'
,
metavar
=
'N'
,
type
=
int
,
default
=
1
,
parser
.
add_argument
(
'--runs'
,
metavar
=
'N'
,
type
=
int
,
default
=
1
,
help
=
'Number of repetition for each experiment'
)
help
=
'Number of repetition for each experiment'
)
parser
.
add_argument
(
'--firstrun'
,
metavar
=
'N'
,
type
=
int
,
default
=
1
,
help
=
'ID for first run'
)
parser
.
add_argument
(
'--verbose'
,
action
=
'store_const'
,
const
=
True
,
parser
.
add_argument
(
'--verbose'
,
action
=
'store_const'
,
const
=
True
,
default
=
False
,
default
=
False
,
help
=
'Verbose output'
)
help
=
'Verbose output'
)
...
@@ -38,6 +40,13 @@ g_par.add_argument('--cores', metavar='N', type=int,
...
@@ -38,6 +40,13 @@ g_par.add_argument('--cores', metavar='N', type=int,
g_par
.
add_argument
(
'--mem'
,
metavar
=
'N'
,
type
=
int
,
default
=
None
,
g_par
.
add_argument
(
'--mem'
,
metavar
=
'N'
,
type
=
int
,
default
=
None
,
help
=
'Memory limit for parallel runs (in MB)'
)
help
=
'Memory limit for parallel runs (in MB)'
)
g_slurm
=
parser
.
add_argument_group
(
'Slurm Runtime'
)
g_slurm
.
add_argument
(
'--slurm'
,
dest
=
'runtime'
,
action
=
'store_const'
,
const
=
'slurm'
,
default
=
'sequential'
,
help
=
'Use slurm instead of sequential runtime'
)
g_slurm
.
add_argument
(
'--slurmdir'
,
metavar
=
'DIR'
,
type
=
str
,
default
=
'./slurm/'
,
help
=
'Slurm communication directory'
)
args
=
parser
.
parse_args
()
args
=
parser
.
parse_args
()
experiments
=
[]
experiments
=
[]
...
@@ -53,27 +62,32 @@ for path in args.experiments:
...
@@ -53,27 +62,32 @@ for path in args.experiments:
with
open
(
path
,
'rb'
)
as
f
:
with
open
(
path
,
'rb'
)
as
f
:
experiments
.
append
(
pickle
.
load
(
f
))
experiments
.
append
(
pickle
.
load
(
f
))
mkdir_if_not_exists
(
args
.
workdir
)
if
args
.
runtime
!=
'slurm'
:
mkdir_if_not_exists
(
args
.
workdir
)
mkdir_if_not_exists
(
args
.
outdir
)
mkdir_if_not_exists
(
args
.
outdir
)
if
args
.
runtime
==
'parallel'
:
if
args
.
runtime
==
'parallel'
:
rt
=
runtime
.
LocalParallelRuntime
(
cores
=
args
.
cores
,
mem
=
args
.
mem
,
rt
=
runtime
.
LocalParallelRuntime
(
cores
=
args
.
cores
,
mem
=
args
.
mem
,
verbose
=
args
.
verbose
)
verbose
=
args
.
verbose
)
elif
args
.
runtime
==
'slurm'
:
rt
=
runtime
.
SlurmRuntime
(
args
.
slurmdir
,
args
,
verbose
=
args
.
verbose
)
else
:
else
:
rt
=
runtime
.
LocalSimpleRuntime
(
verbose
=
args
.
verbose
)
rt
=
runtime
.
LocalSimpleRuntime
(
verbose
=
args
.
verbose
)
for
e
in
experiments
:
for
e
in
experiments
:
workdir_base
=
'%s/%s'
%
(
args
.
workdir
,
e
.
name
)
workdir_base
=
'%s/%s'
%
(
args
.
workdir
,
e
.
name
)
mkdir_if_not_exists
(
workdir_base
)
if
args
.
runtime
!=
'slurm'
:
mkdir_if_not_exists
(
workdir_base
)
for
run
in
range
(
0
,
args
.
runs
):
for
run
in
range
(
args
.
firstrun
,
args
.
firstrun
+
args
.
runs
):
outpath
=
'%s/%s-%d.json'
%
(
args
.
outdir
,
e
.
name
,
run
)
outpath
=
'%s/%s-%d.json'
%
(
args
.
outdir
,
e
.
name
,
run
)
if
os
.
path
.
exists
(
outpath
):
if
os
.
path
.
exists
(
outpath
):
print
(
'skip %s run %d'
%
(
e
.
name
,
run
))
print
(
'skip %s run %d'
%
(
e
.
name
,
run
))
continue
continue
workdir
=
'%s/%d'
%
(
workdir_base
,
run
)
workdir
=
'%s/%d'
%
(
workdir_base
,
run
)
mkdir_if_not_exists
(
workdir
)
if
args
.
runtime
!=
'slurm'
:
mkdir_if_not_exists
(
workdir
)
env
=
exp
.
ExpEnv
(
args
.
repo
,
workdir
)
env
=
exp
.
ExpEnv
(
args
.
repo
,
workdir
)
rt
.
add_run
(
runtime
.
Run
(
e
,
run
,
env
,
outpath
))
rt
.
add_run
(
runtime
.
Run
(
e
,
run
,
env
,
outpath
))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment