experiments.py 8.11 KB
Newer Older
Antoine Kaufmann's avatar
Antoine Kaufmann committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# Copyright 2021 Max Planck Institute for Software Systems, and
# National University of Singapore
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

23
24
25
26
27
28
import os
import asyncio
import modes.exectools as exectools
import shlex
import time
import json
29
import traceback
30
31
32
33
34
35
36
37
38
39
40

class Experiment(object):
    name = None
    timeout = None
    checkpoint = False

    def __init__(self, name):
        self.name = name
        self.hosts = []
        self.nics = []
        self.networks = []
41
        self.metadata = {}
42
43

    def add_host(self, sim):
44
45
46
        for h in self.hosts:
            if h.name == sim.name:
                raise Exception('Duplicate host name')
47
48
49
        self.hosts.append(sim)

    def add_nic(self, sim):
50
51
52
        for n in self.nics:
            if n.name == sim.name:
                raise Exception('Duplicate nic name')
53
54
55
        self.nics.append(sim)

    def add_network(self, sim):
56
57
58
        for n in self.networks:
            if n.name == sim.name:
                raise Exception('Duplicate net name')
59
60
        self.networks.append(sim)

61
    async def prepare(self, env, verbose=False):
62
63
64
        # generate config tars
        for host in self.hosts:
            path = env.cfgtar_path(host)
65
66
            if verbose:
                print('preparing config tar:', path)
67
68
69
70
71
72
            host.node_config.make_tar(path)

        # prepare all simulators in parallel
        sims = []
        for sim in self.hosts + self.nics + self.networks:
            prep_cmds = [pc for pc in sim.prep_cmds(env)]
73
74
            sims.append(exectools.run_cmdlist('prepare_' + self.name, prep_cmds,
                verbose=verbose))
75
76
        await asyncio.wait(sims)

77
    async def run(self, env, verbose=False):
78
79
80
81
82
83
        running = []
        sockets = []
        out = ExpOutput(self)
        try:
            out.set_start()

84
85
            if verbose:
                print('%s: starting NICS' % self.name)
86
            for nic in self.nics:
87
88
                if verbose:
                    print('start NIC:', nic.run_cmd(env))
89
                sc = exectools.SimpleComponent(nic.full_name(),
Hejing Li's avatar
Hejing Li committed
90
                        shlex.split(nic.run_cmd(env)), verbose=verbose, canfail=True)
91
92
93
94
95
96
97
                await sc.start()
                running.append((nic, sc))

                sockets.append(env.nic_pci_path(nic))
                sockets.append(env.nic_eth_path(nic))
                sockets.append(env.nic_shm_path(nic))

98
99
100
            if verbose:
                print('%s: waiting for sockets' % self.name)

101
            for s in sockets:
102
                await exectools.await_file(s, verbose=verbose)
103
104
            await asyncio.sleep(0.5)

105
106
107

            # start networks
            for net in self.networks:
108
109
110
                if verbose:
                    print('start Net:', net.run_cmd(env))

111
                sc = exectools.SimpleComponent(net.full_name(),
Hejing Li's avatar
Hejing Li committed
112
                        shlex.split(net.run_cmd(env)), verbose=verbose, canfail=True)
113
114
115
116
117
118
                await sc.start()
                running.append((net, sc))

            # start hosts
            wait_hosts = []
            for host in self.hosts:
119
120
121
                if verbose:
                    print('start Host:', host.run_cmd(env))

122
                sc = exectools.SimpleComponent(host.full_name(),
Hejing Li's avatar
Hejing Li committed
123
                        shlex.split(host.run_cmd(env)), verbose=verbose, canfail=True)
124
125
126
127
128
129
                await sc.start()
                running.append((host,sc))

                if host.wait:
                    wait_hosts.append(sc)

130
131
132
                if host.sleep > 0:
                    await asyncio.sleep(host.sleep)

133
134
            if verbose:
                print('%s: waiting for hosts to terminate' % self.name)
135
136
137
138
139
            for sc in wait_hosts:
                await sc.wait()
            # wait for necessary hosts to terminate
        except:
            out.set_failed()
140
            traceback.print_exc()
141
142
143
144
145

        finally:
            out.set_end()

            # shut things back down
146
147
            if verbose:
                print('%s: cleaning up' % self.name)
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
            scs = []
            for _,sc in running:
                scs.append(sc.int_term_kill())
            await asyncio.wait(scs)

            for _,sc in running:
                await sc.wait()

            for sock in sockets:
                os.remove(sock)

            for sim,sc in running:
                out.add_sim(sim, sc)
        return out



    def resreq_mem(self):
        mem = 0
        for h in self.hosts:
            mem += h.resreq_mem()
        for n in self.nics:
            mem += n.resreq_mem()
        for n in self.networks:
            mem += n.resreq_mem()
        return mem

    def resreq_cores(self):
        cores = 0
        for h in self.hosts:
            cores += h.resreq_cores()
        for n in self.nics:
            cores += n.resreq_cores()
        for n in self.networks:
            cores += n.resreq_cores()
        return cores

class ExpEnv(object):
186
    def __init__(self, repo_path, workdir, cpdir):
187
188
        self.repodir = os.path.abspath(repo_path)
        self.workdir = os.path.abspath(workdir)
189
        self.cpdir = os.path.abspath(cpdir)
190
191
        self.qemu_img_path = self.repodir + '/qemu/build/qemu-img'
        self.qemu_path = self.repodir + '/qemu/build/x86_64-softmmu/qemu-system-x86_64'
192
        self.qemu_kernel_path = self.repodir + '/images/bzImage'
193
194
195
        self.gem5_path = self.repodir + '/gem5/build/X86/gem5.opt'
        self.gem5_py_path = self.repodir + '/gem5/configs/cosim/cosim.py'
        self.gem5_kernel_path = self.repodir + '/images/vmlinux'
196
197

    def hdcopy_path(self, sim):
198
        return '%s/hdcopy.%s' % (self.workdir, sim.name)
199
200
201
202

    def hd_path(self, hd_name):
        return '%s/images/output-%s/%s' % (self.repodir, hd_name, hd_name)

203
204
205
    def hd_raw_path(self, hd_name):
        return '%s/images/output-%s/%s.raw' % (self.repodir, hd_name, hd_name)

206
    def cfgtar_path(self, sim):
207
        return '%s/cfg.%s.tar' % (self.workdir, sim.name)
208
209

    def nic_pci_path(self, sim):
210
        return '%s/nic.pci.%s' % (self.workdir, sim.name)
211
212

    def nic_eth_path(self, sim):
213
        return '%s/nic.eth.%s' % (self.workdir, sim.name)
214
215

    def nic_shm_path(self, sim):
216
        return '%s/nic.shm.%s' % (self.workdir, sim.name)
217

218
219
220
221
222
223
    def gem5_outdir(self, sim):
        return '%s/gem5-out.%s' % (self.workdir, sim.name)

    def gem5_cpdir(self, sim):
        return '%s/gem5-cp.%s' % (self.cpdir, sim.name)

224
225
226
class ExpOutput(object):
    def __init__(self, exp):
        self.exp_name = exp.name
227
        self.metadata = exp.metadata
228
229
230
231
232
233
234
235
236
        self.start_time = None
        self.end_time = None
        self.sims = {}
        self.success = True

    def set_start(self):
        self.start_time = time.time()

    def set_end(self):
237
        self.end_time = time.time()
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255

    def set_failed(self):
        self.success = False

    def add_sim(self, sim, comp):
        obj = {
            'class': sim.__class__.__name__,
            'cmd': comp.cmd_parts,
            'stdout': comp.stdout,
            'stderr': comp.stderr,
        }
        self.sims[sim.full_name()] = obj

    def dumps(self):
        return json.dumps(self.__dict__)



256
257
258
def run_exp_local(exp, env, verbose=False):
    asyncio.run(exp.prepare(env, verbose=verbose))
    return asyncio.run(exp.run(env, verbose=verbose))