Unverified Commit 168c9baa authored by anj-s's avatar anj-s Committed by GitHub
Browse files

[offload] Add golden data for offload benchmarks. (#578)

* add model

* add offload regression benchmarks

* add golden data

* remove mp pipe benchmark

* fix lint

* remove rank

* add check for model type

* lint errors
parent e41452e8
...@@ -166,6 +166,12 @@ upload_coverage: &upload_coverage ...@@ -166,6 +166,12 @@ upload_coverage: &upload_coverage
file: 'coverage.xml' file: 'coverage.xml'
token: $CODECOV_TOKEN token: $CODECOV_TOKEN
run_offload_benchmark: &run_offload_benchmark
- run:
name: Run Offload Benchmark
command: |
python benchmarks/experimental/offload.py
run_pipe_benchmark: &run_pipe_benchmark run_pipe_benchmark: &run_pipe_benchmark
- run: - run:
name: Run Pipe Benchmark name: Run Pipe Benchmark
...@@ -544,6 +550,8 @@ jobs: ...@@ -544,6 +550,8 @@ jobs:
- <<: *run_pipe_benchmark - <<: *run_pipe_benchmark
- <<: *run_offload_benchmark
- <<: *run_oss_amp - <<: *run_oss_amp
- <<: *run_oss_for_each - <<: *run_oss_for_each
......
...@@ -120,7 +120,7 @@ def train_seq(model_config, benchmark_config, model_specs, args): ...@@ -120,7 +120,7 @@ def train_seq(model_config, benchmark_config, model_specs, args):
optimizer = model_config["optimizer"](model.parameters(), lr=benchmark_config["lr"]) optimizer = model_config["optimizer"](model.parameters(), lr=benchmark_config["lr"])
dataloader, _, _ = model_config["data"] dataloader, _, _ = model_config["data"]
def train_epoch(args): def train_epoch(args, num_iters):
model.train() model.train()
for batch_inputs, batch_outputs in dataloader: for batch_inputs, batch_outputs in dataloader:
batch_inputs, batch_outputs = batch_inputs.to("cuda"), batch_outputs.to("cuda") batch_inputs, batch_outputs = batch_inputs.to("cuda"), batch_outputs.to("cuda")
...@@ -143,10 +143,13 @@ def train_seq(model_config, benchmark_config, model_specs, args): ...@@ -143,10 +143,13 @@ def train_seq(model_config, benchmark_config, model_specs, args):
loss.item(), benchmark_config["batch_size"] / (time.time_ns() - start) * 10 ** 9 loss.item(), benchmark_config["batch_size"] / (time.time_ns() - start) * 10 ** 9
) )
) )
num_iters -= 1
if num_iters == 0:
break
if args.use_profiler: if args.use_profiler:
prof.export_chrome_trace("/tmp/offload_prof") prof.export_chrome_trace("/tmp/offload_prof")
train_epoch(args) train_epoch(args, num_iters=5)
def train(model_config, model, benchmark_config, model_specs, args): def train(model_config, model, benchmark_config, model_specs, args):
...@@ -179,6 +182,10 @@ def train(model_config, model, benchmark_config, model_specs, args): ...@@ -179,6 +182,10 @@ def train(model_config, model, benchmark_config, model_specs, args):
return data, target return data, target
for i, batch in enumerate(lm_dataloader): for i, batch in enumerate(lm_dataloader):
# TODO(anj): Make this a flag for both "lm" and "seq" models.
if i == 5:
break
if i == 1: if i == 1:
epoch_start_time = time.time() epoch_start_time = time.time()
...@@ -226,24 +233,20 @@ def train(model_config, model, benchmark_config, model_specs, args): ...@@ -226,24 +233,20 @@ def train(model_config, model, benchmark_config, model_specs, args):
return wps, loss.item() return wps, loss.item()
def verify_peak_memory(rank, golden_config, std_dev): def verify_peak_memory(golden_config, std_dev):
print("Peak allocated bytes on cuda:0: {:1d}".format(torch.cuda.memory_stats(rank)["allocated_bytes.all.peak"])) print("Peak allocated bytes on cuda:0: {:1d}".format(torch.cuda.memory_stats(0)["allocated_bytes.all.peak"]))
current_device_usage = torch.cuda.memory_stats(rank)["allocated_bytes.all.peak"] current_device_usage = torch.cuda.memory_stats(0)["allocated_bytes.all.peak"]
golden_ref = golden_config["peak_mem_usage"][rank] golden_ref = golden_config["peak_mem_usage"]
if not current_device_usage < golden_ref * std_dev: if not current_device_usage < golden_ref * std_dev:
raise RuntimeError( raise RuntimeError(
"Peak memory usage for cuda device {:d} is {:d} which" "Peak memory usage for cuda device {:d} is {:d} which"
"is less than golden reference value of {:d}".format(rank, current_device_usage, golden_ref) "is less than golden reference value of {:d}".format(0, current_device_usage, golden_ref)
) )
def verify_lm_run(wps, golden_config, args): def verify_lm_throughput(wps, golden_config, args):
"""Verify that words per second for a given benchmark run matches the golden data.""" """Verify that words per second for a given benchmark run matches the golden data."""
# Verify wps only on the last rank in multiprocess pipe
if not args.multiprocess or dist.get_rank() == dist.get_world_size() - 1:
# Assert that words per second is within 3 standard deviations of the average
# of five golden runs
print("Throughput(wps) is {:.2f}.".format(wps)) print("Throughput(wps) is {:.2f}.".format(wps))
if not wps > (golden_config["avg_wps"] - (3 * golden_config["std_dev_wps"])): if not wps > (golden_config["avg_wps"] - (3 * golden_config["std_dev_wps"])):
raise RuntimeError( raise RuntimeError(
...@@ -253,12 +256,6 @@ def verify_lm_run(wps, golden_config, args): ...@@ -253,12 +256,6 @@ def verify_lm_run(wps, golden_config, args):
) )
) )
if args.multiprocess:
verify_peak_memory(dist.get_rank(), golden_config, 1.5)
else:
for i in range(4):
verify_peak_memory(i, golden_config, 1.1)
def benchmark_language_model(model_config, model, benchmark_config, model_specs, args): def benchmark_language_model(model_config, model, benchmark_config, model_specs, args):
epoch = benchmark_config["epochs"] epoch = benchmark_config["epochs"]
...@@ -271,9 +268,14 @@ def benchmark_language_model(model_config, model, benchmark_config, model_specs, ...@@ -271,9 +268,14 @@ def benchmark_language_model(model_config, model, benchmark_config, model_specs,
print("-" * 110) print("-" * 110)
print("| end of epoch {:1d} | time: {:5.2f}s | train loss {:5.2f} ".format(epoch, elapsed_time, loss)) print("| end of epoch {:1d} | time: {:5.2f}s | train loss {:5.2f} ".format(epoch, elapsed_time, loss))
print("-" * 110) print("-" * 110)
print("Throughput(wps) is {:.2f}.".format(wps))
print("Peak allocated bytes on cuda:0: {:1d}".format(torch.cuda.memory_stats(0)["allocated_bytes.all.peak"])) if args.model_name == "seq":
# TODO(anj-s): Enable golden config data verification. raise RuntimeError(
f"Golden data verification is only supported for the Transformer(lm) model and not {args.model_name}"
)
golden_config = get_golden_config(args.model_name, args)
verify_lm_throughput(wps, golden_config, args)
verify_peak_memory(golden_config, 1.1)
def get_synthetic_dataloaders(args, device, benchmark_config, model_specs): def get_synthetic_dataloaders(args, device, benchmark_config, model_specs):
...@@ -357,7 +359,7 @@ def get_golden_config(model_name, args): ...@@ -357,7 +359,7 @@ def get_golden_config(model_name, args):
"""Return a dict with the golden data for throughput and memory usage.""" """Return a dict with the golden data for throughput and memory usage."""
if model_name == "lm": if model_name == "lm":
return lm_wikitext2.get_golden_real_stats(False) return lm_wikitext2.get_golden_real_stats()
else: else:
raise RuntimeError(f"Unrecognized args.model_mame {args.model_name}") raise RuntimeError(f"Unrecognized args.model_mame {args.model_name}")
...@@ -403,7 +405,7 @@ def run_benchmark(args): ...@@ -403,7 +405,7 @@ def run_benchmark(args):
parser = argparse.ArgumentParser(description="benchmark") parser = argparse.ArgumentParser(description="benchmark")
parser.add_argument( parser.add_argument(
"--dry_run", default=True, action="store_true", help="Run a sample training run without regression testing." "--dry_run", default=False, action="store_true", help="Run a sample training run without regression testing."
) )
parser.add_argument( parser.add_argument(
"--debug", "--debug",
......
...@@ -32,6 +32,13 @@ class Offload_Transformer: ...@@ -32,6 +32,13 @@ class Offload_Transformer:
"slices": 3, "slices": 3,
} }
def get_golden_real_stats():
return {
"avg_wps": 192.105,
"std_dev_wps": 39.56,
"peak_mem_usage": 1180848128,
}
class Offload_Sequential: class Offload_Sequential:
def get_model_config(): def get_model_config():
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment