run.py 4.53 KB
Newer Older
1
2
3
4
import argparse
import sys
import os
import importlib
5
import pickle
6
import fnmatch
7
import modes.experiments as exp
8
import modes.runtime as runtime
9
10
11
12
13

def mkdir_if_not_exists(path):
    if not os.path.exists(path):
        os.mkdir(path)

14

15
16
17
parser = argparse.ArgumentParser()
parser.add_argument('experiments', metavar='EXP', type=str, nargs='+',
        help='An experiment file to run')
18
19
parser.add_argument('--filter', metavar='PATTERN', type=str, nargs='+',
        help='Pattern to match experiment names against')
20
21
22
parser.add_argument('--pickled', action='store_const', const=True,
        default=False,
        help='Read exp files as pickled runs instead of exp.py files')
23
parser.add_argument('--runs', metavar='N', type=int, default=1,
24
        help='Number of repetition for each experiment')
25
26
parser.add_argument('--firstrun', metavar='N', type=int, default=1,
        help='ID for first run')
27
28
parser.add_argument('--force', action='store_const', const=True, default=False,
        help='Run experiments even if output already exists')
29
30
31
parser.add_argument('--verbose', action='store_const', const=True,
        default=False,
        help='Verbose output')
32
33
34

g_env = parser.add_argument_group('Environment')
g_env.add_argument('--repo', metavar='DIR', type=str,
35
        default='..', help='Repo directory')
36
g_env.add_argument('--workdir', metavar='DIR', type=str,
37
        default='./out/', help='Work directory base')
38
g_env.add_argument('--outdir', metavar='DIR',  type=str,
39
        default='./out/', help='Output directory base')
40
41
g_env.add_argument('--cpdir', metavar='DIR',  type=str,
        default='./out/', help='Checkpoint directory base')
42

43
44
45
46
47
48
49
50
51
52
g_par = parser.add_argument_group('Parallel Runtime')
g_par.add_argument('--parallel', dest='runtime', action='store_const',
        const='parallel', default='sequential',
        help='Use parallel instead of sequential runtime')
g_par.add_argument('--cores', metavar='N', type=int,
        default=len(os.sched_getaffinity(0)),
        help='Number of cores to use for parallel runs')
g_par.add_argument('--mem', metavar='N', type=int, default=None,
        help='Memory limit for parallel runs (in MB)')

53
54
55
56
57
58
59
g_slurm = parser.add_argument_group('Slurm Runtime')
g_slurm.add_argument('--slurm', dest='runtime', action='store_const',
        const='slurm', default='sequential',
        help='Use slurm instead of sequential runtime')
g_slurm.add_argument('--slurmdir', metavar='DIR',  type=str,
        default='./slurm/', help='Slurm communication directory')

60

61
args = parser.parse_args()
62

63
# initialize runtime
64
if args.runtime == 'parallel':
65
66
    rt = runtime.LocalParallelRuntime(cores=args.cores, mem=args.mem,
            verbose=args.verbose)
67
68
elif args.runtime == 'slurm':
    rt = runtime.SlurmRuntime(args.slurmdir, args, verbose=args.verbose)
69
else:
70
    rt = runtime.LocalSimpleRuntime(verbose=args.verbose)
71

72
73
def add_exp(e, run, prereq, create_cp, restore_cp):
    outpath = '%s/%s-%d.json' % (args.outdir, e.name, run)
74
    if os.path.exists(outpath) and not args.force:
75
76
77
78
        print('skip %s run %d' % (e.name, run))
        return None

    workdir = '%s/%s/%d' % (args.workdir, e.name, run)
79
    cpdir = '%s/%s/%d' % (args.cpdir, e.name, 0)
80

81
    env = exp.ExpEnv(args.repo, workdir, cpdir)
82
83
84
85
86
87
88
    env.create_cp = create_cp
    env.restore_cp = restore_cp

    run = runtime.Run(e, run, env, outpath, prereq)
    rt.add_run(run)
    return run

89
90
91
92
93
94
95
96
97
98
99
# load experiments
if not args.pickled:
    # default: load python modules with experiments
    experiments = []
    for path in args.experiments:
        modname, _ = os.path.splitext(os.path.basename(path))

        spec = importlib.util.spec_from_file_location(modname, path)
        mod = importlib.util.module_from_spec(spec)
        spec.loader.exec_module(mod)
        experiments += mod.experiments
100

101
    for e in experiments:
102
        # apply filter if any specified
103
        if (args.filter) and (len(args.filter) > 0):
104
105
106
107
108
109
110
111
            match = False
            for f in args.filter:
                if fnmatch.fnmatch(e.name, f):
                    match = True
                    break
            if not match:
                continue

112
113
114
115
116
        # if this is an experiment with a checkpoint we might have to create it
        if e.checkpoint:
            prereq = add_exp(e, 0, None, True, False)
        else:
            prereq = None
117

118
119
        for run in range(args.firstrun, args.firstrun + args.runs):
            add_exp(e, run, prereq, False, e.checkpoint)
120
121
122
123
124
else:
    # otherwise load pickled run object
    for path in args.experiments:
        with open(path, 'rb') as f:
            rt.add_run(pickle.load(f))
125

126
rt.start()