Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ycai
simbricks
Commits
4b2fde4d
Commit
4b2fde4d
authored
Jul 07, 2021
by
Antoine Kaufmann
Browse files
experiments: add ExperimentRunner instead of logic in Experiment class
Again prepration for distributed experiments.
parent
a8fd4999
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
180 additions
and
109 deletions
+180
-109
experiments/simbricks/experiments.py
experiments/simbricks/experiments.py
+172
-101
experiments/simbricks/runtime/local.py
experiments/simbricks/runtime/local.py
+8
-8
No files found.
experiments/simbricks/experiments.py
View file @
4b2fde4d
...
...
@@ -59,133 +59,204 @@ class Experiment(object):
raise
Exception
(
'Duplicate net name'
)
self
.
networks
.
append
(
sim
)
async
def
prepare
(
self
,
env
,
verbose
=
False
,
exec
=
exectools
.
LocalExecutor
()):
def
resreq_mem
(
self
):
mem
=
0
for
h
in
self
.
hosts
:
mem
+=
h
.
resreq_mem
()
for
n
in
self
.
nics
:
mem
+=
n
.
resreq_mem
()
for
n
in
self
.
networks
:
mem
+=
n
.
resreq_mem
()
return
mem
def
resreq_cores
(
self
):
cores
=
0
for
h
in
self
.
hosts
:
cores
+=
h
.
resreq_cores
()
for
n
in
self
.
nics
:
cores
+=
n
.
resreq_cores
()
for
n
in
self
.
networks
:
cores
+=
n
.
resreq_cores
()
return
cores
class
ExperimentBaseRunner
(
object
):
def
__init__
(
self
,
exp
,
env
,
verbose
):
self
.
exp
=
exp
self
.
env
=
env
self
.
verbose
=
verbose
self
.
out
=
ExpOutput
(
exp
)
self
.
running
=
[]
self
.
sockets
=
[]
self
.
wait_hosts
=
[]
def
sim_executor
(
self
,
sim
):
raise
NotImplementedError
(
"Please implement this method"
)
async
def
before_nics
(
self
):
pass
async
def
before_nets
(
self
):
pass
async
def
before_hosts
(
self
):
pass
async
def
before_wait
(
self
):
pass
async
def
before_cleanup
(
self
):
pass
async
def
after_cleanup
(
self
):
pass
async
def
prepare
(
self
):
# generate config tars
for
host
in
self
.
hosts
:
path
=
env
.
cfgtar_path
(
host
)
if
verbose
:
for
host
in
self
.
exp
.
hosts
:
path
=
self
.
env
.
cfgtar_path
(
host
)
if
self
.
verbose
:
print
(
'preparing config tar:'
,
path
)
host
.
node_config
.
make_tar
(
path
)
await
exec
.
send_file
(
path
,
verbose
)
await
self
.
sim_executor
(
host
)
.
send_file
(
path
,
self
.
verbose
)
# prepare all simulators in parallel
sims
=
[]
for
sim
in
self
.
hosts
+
self
.
nics
+
self
.
networks
:
prep_cmds
=
[
pc
for
pc
in
sim
.
prep_cmds
(
env
)]
sims
.
append
(
exec
.
run_cmdlist
(
'prepare_'
+
self
.
name
,
prep_cmds
,
verbose
=
verbose
))
for
sim
in
self
.
exp
.
hosts
+
self
.
exp
.
nics
+
self
.
exp
.
networks
:
prep_cmds
=
[
pc
for
pc
in
sim
.
prep_cmds
(
self
.
env
)]
exec
=
self
.
sim_executor
(
sim
)
sims
.
append
(
exec
.
run_cmdlist
(
'prepare_'
+
self
.
exp
.
name
,
prep_cmds
,
verbose
=
self
.
verbose
))
await
asyncio
.
wait
(
sims
)
async
def
run
(
self
,
env
,
verbose
=
False
,
exec
=
exectools
.
LocalExecutor
()):
running
=
[]
sockets
=
[]
out
=
ExpOutput
(
self
)
async
def
run_nics
(
self
):
""" Start all NIC simulators. """
if
self
.
verbose
:
print
(
'%s: starting NICS'
%
self
.
exp
.
name
)
for
nic
in
self
.
exp
.
nics
:
if
self
.
verbose
:
print
(
'start NIC:'
,
nic
.
run_cmd
(
self
.
env
))
exec
=
self
.
sim_executor
(
nic
)
sc
=
exec
.
create_component
(
nic
.
full_name
(),
shlex
.
split
(
nic
.
run_cmd
(
self
.
env
)),
verbose
=
self
.
verbose
,
canfail
=
True
)
await
sc
.
start
()
self
.
running
.
append
((
nic
,
sc
))
self
.
sockets
.
append
((
exec
,
self
.
env
.
nic_pci_path
(
nic
)))
self
.
sockets
.
append
((
exec
,
self
.
env
.
nic_eth_path
(
nic
)))
self
.
sockets
.
append
((
exec
,
self
.
env
.
nic_shm_path
(
nic
)))
# Wait till all NIC sockets exist
if
self
.
verbose
:
print
(
'%s: waiting for sockets'
%
self
.
exp
.
name
)
for
(
exec
,
s
)
in
self
.
sockets
:
await
exec
.
await_file
(
s
,
verbose
=
self
.
verbose
)
# just a bit of a safety delay
await
asyncio
.
sleep
(
0.5
)
async
def
run_nets
(
self
):
""" Start all network simulators (typically one). """
if
self
.
verbose
:
print
(
'%s: starting networks'
%
self
.
exp
.
name
)
for
net
in
self
.
exp
.
networks
:
if
self
.
verbose
:
print
(
'start Net:'
,
net
.
run_cmd
(
self
.
env
))
exec
=
self
.
sim_executor
(
net
)
sc
=
exec
.
create_component
(
net
.
full_name
(),
shlex
.
split
(
net
.
run_cmd
(
self
.
env
)),
verbose
=
self
.
verbose
,
canfail
=
True
)
await
sc
.
start
()
self
.
running
.
append
((
net
,
sc
))
async
def
run_hosts
(
self
):
""" Start all host simulators. """
if
self
.
verbose
:
print
(
'%s: starting hosts'
%
self
.
exp
.
name
)
for
host
in
self
.
exp
.
hosts
:
if
self
.
verbose
:
print
(
'start Host:'
,
host
.
run_cmd
(
self
.
env
))
exec
=
self
.
sim_executor
(
host
)
sc
=
exec
.
create_component
(
host
.
full_name
(),
shlex
.
split
(
host
.
run_cmd
(
self
.
env
)),
verbose
=
self
.
verbose
,
canfail
=
True
)
await
sc
.
start
()
self
.
running
.
append
((
host
,
sc
))
if
host
.
wait
:
self
.
wait_hosts
.
append
(
sc
)
if
host
.
sleep
>
0
:
await
asyncio
.
sleep
(
host
.
sleep
)
async
def
wait_for_hosts
(
self
):
""" Wait for hosts to terminate (the ones marked to wait on). """
if
self
.
verbose
:
print
(
'%s: waiting for hosts to terminate'
%
self
.
exp
.
name
)
for
sc
in
self
.
wait_hosts
:
await
sc
.
wait
()
async
def
run
(
self
):
try
:
out
.
set_start
()
if
verbose
:
print
(
'%s: starting NICS'
%
self
.
name
)
for
nic
in
self
.
nics
:
if
verbose
:
print
(
'start NIC:'
,
nic
.
run_cmd
(
env
))
sc
=
exec
.
create_component
(
nic
.
full_name
(),
shlex
.
split
(
nic
.
run_cmd
(
env
)),
verbose
=
verbose
,
canfail
=
True
)
await
sc
.
start
()
running
.
append
((
nic
,
sc
))
sockets
.
append
(
env
.
nic_pci_path
(
nic
))
sockets
.
append
(
env
.
nic_eth_path
(
nic
))
sockets
.
append
(
env
.
nic_shm_path
(
nic
))
if
verbose
:
print
(
'%s: waiting for sockets'
%
self
.
name
)
for
s
in
sockets
:
await
exec
.
await_file
(
s
,
verbose
=
verbose
)
await
asyncio
.
sleep
(
0.5
)
# start networks
for
net
in
self
.
networks
:
if
verbose
:
print
(
'start Net:'
,
net
.
run_cmd
(
env
))
sc
=
exec
.
create_component
(
net
.
full_name
(),
shlex
.
split
(
net
.
run_cmd
(
env
)),
verbose
=
verbose
,
canfail
=
True
)
await
sc
.
start
()
running
.
append
((
net
,
sc
))
# start hosts
wait_hosts
=
[]
for
host
in
self
.
hosts
:
if
verbose
:
print
(
'start Host:'
,
host
.
run_cmd
(
env
))
sc
=
exec
.
create_component
(
host
.
full_name
(),
shlex
.
split
(
host
.
run_cmd
(
env
)),
verbose
=
verbose
,
canfail
=
True
)
await
sc
.
start
()
running
.
append
((
host
,
sc
))
if
host
.
wait
:
wait_hosts
.
append
(
sc
)
if
host
.
sleep
>
0
:
await
asyncio
.
sleep
(
host
.
sleep
)
if
verbose
:
print
(
'%s: waiting for hosts to terminate'
%
self
.
name
)
for
sc
in
wait_hosts
:
await
sc
.
wait
()
# wait for necessary hosts to terminate
self
.
out
.
set_start
()
await
self
.
before_nics
()
await
self
.
run_nics
()
await
self
.
before_nets
()
await
self
.
run_nets
()
await
self
.
before_hosts
()
await
self
.
run_hosts
()
await
self
.
before_wait
()
await
self
.
wait_for_hosts
()
except
:
out
.
set_failed
()
self
.
out
.
set_failed
()
traceback
.
print_exc
()
finally
:
out
.
set_end
()
self
.
out
.
set_end
()
# shut things back down
if
verbose
:
print
(
'%s: cleaning up'
%
self
.
name
)
if
self
.
verbose
:
print
(
'%s: cleaning up'
%
self
.
exp
.
name
)
await
self
.
before_cleanup
()
# "interrupt, terminate, kill" all processes
scs
=
[]
for
_
,
sc
in
running
:
for
_
,
sc
in
self
.
running
:
scs
.
append
(
sc
.
int_term_kill
())
await
asyncio
.
wait
(
scs
)
for
_
,
sc
in
running
:
# wait for all processes to terminate
for
_
,
sc
in
self
.
running
:
await
sc
.
wait
()
for
sock
in
sockets
:
# remove all sockets
for
(
exec
,
sock
)
in
self
.
sockets
:
await
exec
.
rmtree
(
sock
)
for
sim
,
sc
in
running
:
out
.
add_sim
(
sim
,
sc
)
return
out
# add all simulator components to the output
for
sim
,
sc
in
self
.
running
:
self
.
out
.
add_sim
(
sim
,
sc
)
await
self
.
after_cleanup
()
return
self
.
out
class
ExperimentSimpleRunner
(
ExperimentBaseRunner
):
""" Simple experiment runner with just one executor. """
def
__init__
(
self
,
exec
,
*
args
,
**
kwargs
):
self
.
exec
=
exec
super
().
__init__
(
*
args
,
**
kwargs
)
def
resreq_mem
(
self
):
mem
=
0
for
h
in
self
.
hosts
:
mem
+=
h
.
resreq_mem
()
for
n
in
self
.
nics
:
mem
+=
n
.
resreq_mem
()
for
n
in
self
.
networks
:
mem
+=
n
.
resreq_mem
()
return
mem
def
resreq_cores
(
self
):
cores
=
0
for
h
in
self
.
hosts
:
cores
+=
h
.
resreq_cores
()
for
n
in
self
.
nics
:
cores
+=
n
.
resreq_cores
()
for
n
in
self
.
networks
:
cores
+=
n
.
resreq_cores
()
return
cores
def
sim_executor
(
self
,
sim
):
return
self
.
exec
class
ExpEnv
(
object
):
def
__init__
(
self
,
repo_path
,
workdir
,
cpdir
):
...
...
experiments/simbricks/runtime/local.py
View file @
4b2fde4d
...
...
@@ -38,11 +38,11 @@ class LocalSimpleRuntime(Runtime):
self
.
runnable
.
append
(
run
)
async
def
do_run
(
self
,
run
):
runner
=
exp
.
ExperimentSimpleRunner
(
self
.
exec
,
run
.
experiment
,
run
.
env
,
self
.
verbose
)
await
run
.
prep_dirs
(
self
.
exec
)
await
run
.
experiment
.
prepare
(
run
.
env
,
verbose
=
self
.
verbose
,
exec
=
self
.
exec
)
run
.
output
=
await
run
.
experiment
.
run
(
run
.
env
,
verbose
=
self
.
verbose
,
exec
=
self
.
exec
)
await
runner
.
prepare
()
run
.
output
=
await
runner
.
run
()
self
.
complete
.
append
(
run
)
pathlib
.
Path
(
run
.
outpath
).
parent
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
...
...
@@ -79,12 +79,12 @@ class LocalParallelRuntime(Runtime):
async
def
do_run
(
self
,
run
):
''' actually starts a run '''
runner
=
exp
.
ExperimentSimpleRunner
(
self
.
exec
,
run
.
experiment
,
run
.
env
,
self
.
verbose
)
await
run
.
prep_dirs
(
exec
=
self
.
exec
)
await
run
.
experiment
.
prepare
(
run
.
env
,
verbose
=
self
.
verbose
,
exec
=
self
.
exec
)
await
runner
.
prepare
()
print
(
'starting run '
,
run
.
name
())
run
.
output
=
await
run
.
experiment
.
run
(
run
.
env
,
verbose
=
self
.
verbose
,
exec
=
self
.
exec
)
run
.
output
=
await
runner
.
run
()
pathlib
.
Path
(
run
.
outpath
).
parent
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
with
open
(
run
.
outpath
,
'w'
)
as
f
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment