"git@developer.sourcefind.cn:OpenDAS/vision.git" did not exist on "7c1ed419ff7e0e62d91960a6011db4156e5c302a"
Commit e59ad48f authored by guptapriya's avatar guptapriya Committed by Toby Boyd
Browse files

Add NCF custom training loop benchmark (#6943)

* Add CTL benchmark

* Divide train loss by number of train steps

* increase num epochs to 10

* add benchmark for early stopping with CTL

* remove whitespace
parent 25f13fa9
...@@ -81,7 +81,7 @@ class KerasNCFRealData(KerasNCFBenchmarkBase): ...@@ -81,7 +81,7 @@ class KerasNCFRealData(KerasNCFBenchmarkBase):
default_flags = {} default_flags = {}
default_flags['dataset'] = 'ml-20m' default_flags['dataset'] = 'ml-20m'
default_flags['num_gpus'] = 1 default_flags['num_gpus'] = 1
default_flags['train_epochs'] = 8 default_flags['train_epochs'] = 10
default_flags['clean'] = True default_flags['clean'] = True
default_flags['batch_size'] = 99000 default_flags['batch_size'] = 99000
default_flags['learning_rate'] = 0.00382059 default_flags['learning_rate'] = 0.00382059
...@@ -126,6 +126,19 @@ class KerasNCFRealData(KerasNCFBenchmarkBase): ...@@ -126,6 +126,19 @@ class KerasNCFRealData(KerasNCFBenchmarkBase):
FLAGS.early_stopping = True FLAGS.early_stopping = True
self._run_and_report_benchmark() self._run_and_report_benchmark()
# NCF with custom training loop. Works only in TF 2.0
def benchmark_1_gpu_ctl(self):
self._setup()
FLAGS.keras_use_ctl = True
self._run_and_report_benchmark()
# NCF with custom training loop. Works only in TF 2.0
def benchmark_1_gpu_ctl_early_stop(self):
self._setup()
FLAGS.keras_use_ctl = True
FLAGS.early_stopping = True
self._run_and_report_benchmark()
def benchmark_2_gpus(self): def benchmark_2_gpus(self):
self._setup() self._setup()
FLAGS.num_gpus = 2 FLAGS.num_gpus = 2
...@@ -137,6 +150,21 @@ class KerasNCFRealData(KerasNCFBenchmarkBase): ...@@ -137,6 +150,21 @@ class KerasNCFRealData(KerasNCFBenchmarkBase):
FLAGS.num_gpus = 2 FLAGS.num_gpus = 2
self._run_and_report_benchmark() self._run_and_report_benchmark()
# NCF with custom training loop. Works only in TF 2.0
def benchmark_2_gpus_ctl(self):
self._setup()
FLAGS.keras_use_ctl = True
FLAGS.num_gpus = 2
self._run_and_report_benchmark()
# NCF with custom training loop. Works only in TF 2.0
def benchmark_2_gpus_ctl_early_stop(self):
self._setup()
FLAGS.keras_use_ctl = True
FLAGS.early_stopping = True
FLAGS.num_gpus = 2
self._run_and_report_benchmark()
class KerasNCFSyntheticData(KerasNCFBenchmarkBase): class KerasNCFSyntheticData(KerasNCFBenchmarkBase):
"""Benchmark NCF model using synthetic data.""" """Benchmark NCF model using synthetic data."""
......
...@@ -362,8 +362,9 @@ def run_ncf(_): ...@@ -362,8 +362,9 @@ def run_ncf(_):
time_callback.on_batch_begin(step+epoch*num_train_steps) time_callback.on_batch_begin(step+epoch*num_train_steps)
train_loss += train_step() train_loss += train_step()
time_callback.on_batch_end(step+epoch*num_train_steps) time_callback.on_batch_end(step+epoch*num_train_steps)
train_loss /= num_train_steps
logging.info("Done training epoch %s, epoch loss=%s.", logging.info("Done training epoch %s, epoch loss=%s.",
epoch+1, train_loss/num_train_steps) epoch+1, train_loss)
eval_input_iterator.initialize() eval_input_iterator.initialize()
hr_sum = 0 hr_sum = 0
hr_count = 0 hr_count = 0
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment