experiments: pickle runs instead of experiments for slurm

This simplifies both ends, especially with checkpoints.

experiments: pickle runs instead of experiments for slurm
This simplifies both ends, especially with checkpoints.
c6139684 · Antoine Kaufmann · 710f659b · c6139684 · c6139684
Commit c6139684 authored Nov 04, 2020 by Antoine Kaufmann
Show whitespace changes
Inline Side-by-side

Showing with 34 additions and 29 deletions

experiments/modes/runtime.py experiments/modes/runtime.py +4 -7

experiments/run.py experiments/run.py +30 -22

No files found.
--- a/experiments/modes/runtime.py
+++ b/experiments/modes/runtime.py
@@ -166,9 +166,10 @@ class SlurmRuntime(Runtime):
        exp_log = '%s/%s-%d.log' % (self.slurmdir, exp.name, run.index)
        exp_script = '%s/%s-%d.sh' % (self.slurmdir, exp.name, run.index)
-        # write out pickled experiment
+        # write out pickled run
        with open(exp_path, 'wb') as f:
-            pickle.dump(exp, f)
+            run.prereq = None # we don't want to pull in the prereq too
+            pickle.dump(run, f)
        # create slurm batch script
        with open(exp_script, 'w') as f:
@@ -183,11 +184,7 @@ class SlurmRuntime(Runtime):
                s = int(exp.timeout % 60)
                f.write('#SBATCH --time=%02d:%02d:%02d\n' % (h, m, s))
-            f.write('mkdir -p %s\n' % (self.args.workdir))
+            f.write('python3 run.py --pickled %s\n' % (exp_path))
-            f.write(('python3 run.py --repo=%s --workdir=%s --outdir=%s '
-                '--firstrun=%d --runs=1 %s\n') % (self.args.repo,
-                    self.args.workdir, self.args.outdir, run.index,
-                    exp_path))
            f.write('status=$?\n')
            if self.cleanup:
                f.write('rm -rf %s\n' % (run.env.workdir))

--- a/experiments/run.py
+++ b/experiments/run.py
@@ -14,6 +14,9 @@ def mkdir_if_not_exists(path):
 parser = argparse.ArgumentParser()
 parser.add_argument('experiments', metavar='EXP', type=str, nargs='+',
        help='An experiment file to run')
+parser.add_argument('--pickled', action='store_const', const=True,
+        default=False,
+        help='Read exp files as pickled runs instead of exp.py files')
 parser.add_argument('--runs', metavar='N', type=int, default=1,
        help='Number of repetition for each experiment')
 parser.add_argument('--firstrun', metavar='N', type=int, default=1,
@@ -47,21 +50,10 @@ g_slurm.add_argument('--slurm', dest='runtime', action='store_const',
 g_slurm.add_argument('--slurmdir', metavar='DIR',  type=str,
        default='./slurm/', help='Slurm communication directory')
-args = parser.parse_args()
-experiments = []
-for path in args.experiments:
-    modname, modext = os.path.splitext(os.path.basename(path))
-    if modext == '.py':
+args = parser.parse_args()
-        spec = importlib.util.spec_from_file_location(modname, path)
-        mod = importlib.util.module_from_spec(spec)
-        spec.loader.exec_module(mod)
-        experiments += mod.experiments
-    else:
-        with open(path, 'rb') as f:
-            experiments.append(pickle.load(f))
+# initialize runtime
 if args.runtime == 'parallel':
    rt = runtime.LocalParallelRuntime(cores=args.cores, mem=args.mem,
            verbose=args.verbose)
@@ -70,8 +62,19 @@ elif args.runtime == 'slurm':
 else:
    rt = runtime.LocalSimpleRuntime(verbose=args.verbose)
+# load experiments
+if not args.pickled:
+    # default: load python modules with experiments
+    experiments = []
+    for path in args.experiments:
+        modname, _ = os.path.splitext(os.path.basename(path))
+        spec = importlib.util.spec_from_file_location(modname, path)
+        mod = importlib.util.module_from_spec(spec)
+        spec.loader.exec_module(mod)
+        experiments += mod.experiments
-for e in experiments:
+    for e in experiments:
        for run in range(args.firstrun, args.firstrun + args.runs):
            outpath = '%s/%s-%d.json' % (args.outdir, e.name, run)
            if os.path.exists(outpath):
@@ -82,5 +85,10 @@ for e in experiments:
            env = exp.ExpEnv(args.repo, workdir)
            rt.add_run(runtime.Run(e, run, env, outpath))
+else:
+    # otherwise load pickled run object
+    for path in args.experiments:
+        with open(path, 'rb') as f:
+            rt.add_run(pickle.load(f))
 rt.start()