Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ycai
simbricks
Commits
013f612a
Commit
013f612a
authored
Aug 06, 2021
by
Antoine Kaufmann
Browse files
experiments: more parallelism when starting experiments
Especially important for large scale un-synchronized experiments
parent
35fa9495
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
23 additions
and
5 deletions
+23
-5
experiments/simbricks/exectools.py
experiments/simbricks/exectools.py
+6
-0
experiments/simbricks/experiments.py
experiments/simbricks/experiments.py
+17
-5
No files found.
experiments/simbricks/exectools.py
View file @
013f612a
...
@@ -281,6 +281,12 @@ class Executor(object):
...
@@ -281,6 +281,12 @@ class Executor(object):
await
cmdC
.
start
()
await
cmdC
.
start
()
await
cmdC
.
wait
()
await
cmdC
.
wait
()
async
def
await_files
(
self
,
paths
,
delay
=
0.05
,
verbose
=
False
):
xs
=
[]
for
p
in
paths
:
xs
.
append
(
self
.
await_file
(
p
,
delay
=
delay
,
verbose
=
verbose
))
await
asyncio
.
wait
(
xs
)
class
LocalExecutor
(
Executor
):
class
LocalExecutor
(
Executor
):
def
create_component
(
self
,
label
,
parts
,
**
kwargs
):
def
create_component
(
self
,
label
,
parts
,
**
kwargs
):
return
SimpleComponent
(
label
,
parts
,
**
kwargs
)
return
SimpleComponent
(
label
,
parts
,
**
kwargs
)
...
...
experiments/simbricks/experiments.py
View file @
013f612a
...
@@ -22,6 +22,7 @@
...
@@ -22,6 +22,7 @@
import
os
import
os
import
asyncio
import
asyncio
from
collections
import
defaultdict
import
simbricks.exectools
as
exectools
import
simbricks.exectools
as
exectools
import
shlex
import
shlex
import
time
import
time
...
@@ -147,12 +148,14 @@ class ExperimentBaseRunner(object):
...
@@ -147,12 +148,14 @@ class ExperimentBaseRunner(object):
async
def
prepare
(
self
):
async
def
prepare
(
self
):
# generate config tars
# generate config tars
copies
=
[]
for
host
in
self
.
exp
.
hosts
:
for
host
in
self
.
exp
.
hosts
:
path
=
self
.
env
.
cfgtar_path
(
host
)
path
=
self
.
env
.
cfgtar_path
(
host
)
if
self
.
verbose
:
if
self
.
verbose
:
print
(
'preparing config tar:'
,
path
)
print
(
'preparing config tar:'
,
path
)
host
.
node_config
.
make_tar
(
path
)
host
.
node_config
.
make_tar
(
path
)
await
self
.
sim_executor
(
host
).
send_file
(
path
,
self
.
verbose
)
copies
.
append
(
self
.
sim_executor
(
host
).
send_file
(
path
,
self
.
verbose
))
await
asyncio
.
wait
(
copies
)
# prepare all simulators in parallel
# prepare all simulators in parallel
sims
=
[]
sims
=
[]
...
@@ -167,6 +170,7 @@ class ExperimentBaseRunner(object):
...
@@ -167,6 +170,7 @@ class ExperimentBaseRunner(object):
""" Start all NIC simulators. """
""" Start all NIC simulators. """
if
self
.
verbose
:
if
self
.
verbose
:
print
(
'%s: starting NICS'
%
self
.
exp
.
name
)
print
(
'%s: starting NICS'
%
self
.
exp
.
name
)
starts
=
[]
for
nic
in
self
.
exp
.
nics
:
for
nic
in
self
.
exp
.
nics
:
if
self
.
verbose
:
if
self
.
verbose
:
print
(
'start NIC:'
,
nic
.
run_cmd
(
self
.
env
))
print
(
'start NIC:'
,
nic
.
run_cmd
(
self
.
env
))
...
@@ -174,18 +178,22 @@ class ExperimentBaseRunner(object):
...
@@ -174,18 +178,22 @@ class ExperimentBaseRunner(object):
sc
=
exec
.
create_component
(
nic
.
full_name
(),
sc
=
exec
.
create_component
(
nic
.
full_name
(),
shlex
.
split
(
nic
.
run_cmd
(
self
.
env
)),
verbose
=
self
.
verbose
,
shlex
.
split
(
nic
.
run_cmd
(
self
.
env
)),
verbose
=
self
.
verbose
,
canfail
=
True
)
canfail
=
True
)
await
sc
.
start
()
starts
.
append
(
sc
.
start
()
)
self
.
running
.
append
((
nic
,
sc
))
self
.
running
.
append
((
nic
,
sc
))
self
.
sockets
.
append
((
exec
,
self
.
env
.
nic_pci_path
(
nic
)))
self
.
sockets
.
append
((
exec
,
self
.
env
.
nic_pci_path
(
nic
)))
self
.
sockets
.
append
((
exec
,
self
.
env
.
nic_eth_path
(
nic
)))
self
.
sockets
.
append
((
exec
,
self
.
env
.
nic_eth_path
(
nic
)))
self
.
sockets
.
append
((
exec
,
self
.
env
.
nic_shm_path
(
nic
)))
self
.
sockets
.
append
((
exec
,
self
.
env
.
nic_shm_path
(
nic
)))
await
asyncio
.
wait
(
starts
)
# Wait till all NIC sockets exist
# Wait till all NIC sockets exist
if
self
.
verbose
:
if
self
.
verbose
:
print
(
'%s: waiting for sockets'
%
self
.
exp
.
name
)
print
(
'%s: waiting for sockets'
%
self
.
exp
.
name
)
byexec
=
defaultdict
(
lambda
:
[])
for
(
exec
,
s
)
in
self
.
sockets
:
for
(
exec
,
s
)
in
self
.
sockets
:
await
exec
.
await_file
(
s
,
verbose
=
self
.
verbose
)
byexec
[
exec
].
append
(
s
)
for
(
exec
,
ss
)
in
byexec
.
items
():
await
exec
.
await_files
(
ss
,
verbose
=
self
.
verbose
)
# just a bit of a safety delay
# just a bit of a safety delay
await
asyncio
.
sleep
(
0.5
)
await
asyncio
.
sleep
(
0.5
)
...
@@ -194,6 +202,7 @@ class ExperimentBaseRunner(object):
...
@@ -194,6 +202,7 @@ class ExperimentBaseRunner(object):
""" Start all network simulators (typically one). """
""" Start all network simulators (typically one). """
if
self
.
verbose
:
if
self
.
verbose
:
print
(
'%s: starting networks'
%
self
.
exp
.
name
)
print
(
'%s: starting networks'
%
self
.
exp
.
name
)
starts
=
[]
for
net
in
self
.
exp
.
networks
:
for
net
in
self
.
exp
.
networks
:
if
self
.
verbose
:
if
self
.
verbose
:
print
(
'start Net:'
,
net
.
run_cmd
(
self
.
env
))
print
(
'start Net:'
,
net
.
run_cmd
(
self
.
env
))
...
@@ -202,13 +211,15 @@ class ExperimentBaseRunner(object):
...
@@ -202,13 +211,15 @@ class ExperimentBaseRunner(object):
sc
=
exec
.
create_component
(
net
.
full_name
(),
sc
=
exec
.
create_component
(
net
.
full_name
(),
shlex
.
split
(
net
.
run_cmd
(
self
.
env
)),
verbose
=
self
.
verbose
,
shlex
.
split
(
net
.
run_cmd
(
self
.
env
)),
verbose
=
self
.
verbose
,
canfail
=
True
)
canfail
=
True
)
await
sc
.
start
()
starts
.
append
(
sc
.
start
()
)
self
.
running
.
append
((
net
,
sc
))
self
.
running
.
append
((
net
,
sc
))
await
asyncio
.
wait
(
starts
)
async
def
run_hosts
(
self
):
async
def
run_hosts
(
self
):
""" Start all host simulators. """
""" Start all host simulators. """
if
self
.
verbose
:
if
self
.
verbose
:
print
(
'%s: starting hosts'
%
self
.
exp
.
name
)
print
(
'%s: starting hosts'
%
self
.
exp
.
name
)
starts
=
[]
for
host
in
self
.
exp
.
hosts
:
for
host
in
self
.
exp
.
hosts
:
if
self
.
verbose
:
if
self
.
verbose
:
print
(
'start Host:'
,
host
.
run_cmd
(
self
.
env
))
print
(
'start Host:'
,
host
.
run_cmd
(
self
.
env
))
...
@@ -217,7 +228,7 @@ class ExperimentBaseRunner(object):
...
@@ -217,7 +228,7 @@ class ExperimentBaseRunner(object):
sc
=
exec
.
create_component
(
host
.
full_name
(),
sc
=
exec
.
create_component
(
host
.
full_name
(),
shlex
.
split
(
host
.
run_cmd
(
self
.
env
)),
verbose
=
self
.
verbose
,
shlex
.
split
(
host
.
run_cmd
(
self
.
env
)),
verbose
=
self
.
verbose
,
canfail
=
True
)
canfail
=
True
)
await
sc
.
start
()
starts
.
append
(
sc
.
start
()
)
self
.
running
.
append
((
host
,
sc
))
self
.
running
.
append
((
host
,
sc
))
if
host
.
wait
:
if
host
.
wait
:
...
@@ -225,6 +236,7 @@ class ExperimentBaseRunner(object):
...
@@ -225,6 +236,7 @@ class ExperimentBaseRunner(object):
if
host
.
sleep
>
0
:
if
host
.
sleep
>
0
:
await
asyncio
.
sleep
(
host
.
sleep
)
await
asyncio
.
sleep
(
host
.
sleep
)
await
asyncio
.
wait
(
starts
)
async
def
wait_for_hosts
(
self
):
async
def
wait_for_hosts
(
self
):
""" Wait for hosts to terminate (the ones marked to wait on). """
""" Wait for hosts to terminate (the ones marked to wait on). """
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment