Unverified Commit 35620eaf authored by Toby Boyd's avatar Toby Boyd Committed by GitHub
Browse files

Record highest uncased bleu found. (#7196)

* Record highest uncased bleu found.

* change to bleu_best_score_iteration
parent fa28535d
......@@ -83,6 +83,18 @@ class TransformerBenchmark(PerfZeroBenchmark):
metrics = []
if 'bleu_uncased' in stats:
if 'bleu_uncased_history' in stats:
bleu_uncased_best = max(stats['bleu_uncased_history'],
key=lambda x: x[1])
metrics.append({'name': 'bleu_uncased',
'value': bleu_uncased_best[1],
'min_value': bleu_min,
'max_value': bleu_max})
metrics.append({'name': 'bleu_best_score_iteration',
'value': bleu_uncased_best[0]})
metrics.append({'name': 'bleu_uncased_last',
'value': stats['bleu_uncased']})
else:
metrics.append({'name': 'bleu_uncased',
'value': stats['bleu_uncased'],
'min_value': bleu_min,
......@@ -142,9 +154,9 @@ class TransformerBaseKerasAccuracy(TransformerBenchmark):
FLAGS['bleu_source'].value = self.bleu_source
FLAGS['bleu_ref'].value = self.bleu_ref
FLAGS.param_set = 'base'
FLAGS.batch_size = 4096
FLAGS.train_steps = 100000
FLAGS.steps_between_evals = 5000
FLAGS.batch_size = 2048
FLAGS.train_steps = 1000
FLAGS.steps_between_evals = 500
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu')
# These bleu scores are based on test runs after at this limited
# number of steps and batch size after verifying SOTA at 8xV100s.
......
......@@ -167,6 +167,7 @@ class TransformerTask(object):
iterations = flags_obj.train_steps // flags_obj.steps_between_evals
cased_score, uncased_score = None, None
cased_score_history, uncased_score_history = [], []
for i in range(1, iterations + 1):
print("Start train iteration:{}/{}".format(i, iterations))
history = model.fit(
......@@ -187,13 +188,15 @@ class TransformerTask(object):
if (flags_obj.bleu_source and flags_obj.bleu_ref):
uncased_score, cased_score = self.eval()
print("BLEU: uncased={}, cased={}".format(uncased_score, cased_score))
cased_score_history.append([i, cased_score])
uncased_score_history.append([i, uncased_score])
stats = misc.build_stats(history, callbacks)
if uncased_score and cased_score:
stats["bleu_uncased"] = uncased_score
stats["bleu_cased"] = cased_score
stats["bleu_uncased_history"] = uncased_score_history
stats["bleu_cased_history"] = cased_score_history
return stats
def eval(self):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment