Unverified Commit 35620eaf authored by Toby Boyd's avatar Toby Boyd Committed by GitHub
Browse files

Record highest uncased bleu found. (#7196)

* Record highest uncased bleu found.

* change to bleu_best_score_iteration
parent fa28535d
...@@ -83,10 +83,22 @@ class TransformerBenchmark(PerfZeroBenchmark): ...@@ -83,10 +83,22 @@ class TransformerBenchmark(PerfZeroBenchmark):
metrics = [] metrics = []
if 'bleu_uncased' in stats: if 'bleu_uncased' in stats:
metrics.append({'name': 'bleu_uncased', if 'bleu_uncased_history' in stats:
'value': stats['bleu_uncased'], bleu_uncased_best = max(stats['bleu_uncased_history'],
'min_value': bleu_min, key=lambda x: x[1])
'max_value': bleu_max}) metrics.append({'name': 'bleu_uncased',
'value': bleu_uncased_best[1],
'min_value': bleu_min,
'max_value': bleu_max})
metrics.append({'name': 'bleu_best_score_iteration',
'value': bleu_uncased_best[0]})
metrics.append({'name': 'bleu_uncased_last',
'value': stats['bleu_uncased']})
else:
metrics.append({'name': 'bleu_uncased',
'value': stats['bleu_uncased'],
'min_value': bleu_min,
'max_value': bleu_max})
if (warmup and 'step_timestamp_log' in stats and if (warmup and 'step_timestamp_log' in stats and
len(stats['step_timestamp_log']) > warmup): len(stats['step_timestamp_log']) > warmup):
...@@ -142,9 +154,9 @@ class TransformerBaseKerasAccuracy(TransformerBenchmark): ...@@ -142,9 +154,9 @@ class TransformerBaseKerasAccuracy(TransformerBenchmark):
FLAGS['bleu_source'].value = self.bleu_source FLAGS['bleu_source'].value = self.bleu_source
FLAGS['bleu_ref'].value = self.bleu_ref FLAGS['bleu_ref'].value = self.bleu_ref
FLAGS.param_set = 'base' FLAGS.param_set = 'base'
FLAGS.batch_size = 4096 FLAGS.batch_size = 2048
FLAGS.train_steps = 100000 FLAGS.train_steps = 1000
FLAGS.steps_between_evals = 5000 FLAGS.steps_between_evals = 500
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu') FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu')
# These bleu scores are based on test runs after at this limited # These bleu scores are based on test runs after at this limited
# number of steps and batch size after verifying SOTA at 8xV100s. # number of steps and batch size after verifying SOTA at 8xV100s.
......
...@@ -167,6 +167,7 @@ class TransformerTask(object): ...@@ -167,6 +167,7 @@ class TransformerTask(object):
iterations = flags_obj.train_steps // flags_obj.steps_between_evals iterations = flags_obj.train_steps // flags_obj.steps_between_evals
cased_score, uncased_score = None, None cased_score, uncased_score = None, None
cased_score_history, uncased_score_history = [], []
for i in range(1, iterations + 1): for i in range(1, iterations + 1):
print("Start train iteration:{}/{}".format(i, iterations)) print("Start train iteration:{}/{}".format(i, iterations))
history = model.fit( history = model.fit(
...@@ -187,13 +188,15 @@ class TransformerTask(object): ...@@ -187,13 +188,15 @@ class TransformerTask(object):
if (flags_obj.bleu_source and flags_obj.bleu_ref): if (flags_obj.bleu_source and flags_obj.bleu_ref):
uncased_score, cased_score = self.eval() uncased_score, cased_score = self.eval()
cased_score_history.append([i, cased_score])
print("BLEU: uncased={}, cased={}".format(uncased_score, cased_score)) uncased_score_history.append([i, uncased_score])
stats = misc.build_stats(history, callbacks) stats = misc.build_stats(history, callbacks)
if uncased_score and cased_score: if uncased_score and cased_score:
stats["bleu_uncased"] = uncased_score stats["bleu_uncased"] = uncased_score
stats["bleu_cased"] = cased_score stats["bleu_cased"] = cased_score
stats["bleu_uncased_history"] = uncased_score_history
stats["bleu_cased_history"] = cased_score_history
return stats return stats
def eval(self): def eval(self):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment