"test/git@developer.sourcefind.cn:change/sglang.git" did not exist on "0f8eb15323ea8776a945d917517990ca7cbfbdcb"
Unverified Commit f42fddee authored by Hongjun Choi's avatar Hongjun Choi Committed by GitHub
Browse files

Merged commit includes the following changes: (#6931)

250779087  by A. Unique TensorFlower<gardener@tensorflow.org>:

    Reduce BERT Perfzero benchmark test training steps.

--

PiperOrigin-RevId: 250779087
parent f6c2d9f8
...@@ -76,6 +76,9 @@ class BertBenchmarkBase(tf.test.Benchmark): ...@@ -76,6 +76,9 @@ class BertBenchmarkBase(tf.test.Benchmark):
def __init__(self, output_dir=None): def __init__(self, output_dir=None):
self.num_gpus = 8 self.num_gpus = 8
self.num_epochs = None
self.num_steps_per_epoch = None
if not output_dir: if not output_dir:
output_dir = '/tmp' output_dir = '/tmp'
self.output_dir = output_dir self.output_dir = output_dir
...@@ -114,7 +117,7 @@ class BertBenchmarkBase(tf.test.Benchmark): ...@@ -114,7 +117,7 @@ class BertBenchmarkBase(tf.test.Benchmark):
'value': stats['train_loss'], 'value': stats['train_loss'],
}, { }, {
'name': 'name':
'examples_per_second', 'exp_per_second',
'value': 'value':
self.timer_callback.get_examples_per_sec(FLAGS.train_batch_size) self.timer_callback.get_examples_per_sec(FLAGS.train_batch_size)
}] }]
...@@ -139,10 +142,13 @@ class BertBenchmarkBase(tf.test.Benchmark): ...@@ -139,10 +142,13 @@ class BertBenchmarkBase(tf.test.Benchmark):
input_meta_data = json.loads(reader.read().decode('utf-8')) input_meta_data = json.loads(reader.read().decode('utf-8'))
bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file) bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)
epochs = FLAGS.num_train_epochs epochs = self.num_epochs if self.num_epochs else FLAGS.num_train_epochs
train_data_size = input_meta_data['train_data_size'] if self.num_steps_per_epoch:
steps_per_epoch = int(train_data_size / FLAGS.train_batch_size) steps_per_epoch = self.num_steps_per_epoch
warmup_steps = int(epochs * train_data_size * 0.1 / FLAGS.train_batch_size) else:
train_data_size = input_meta_data['train_data_size']
steps_per_epoch = int(train_data_size / FLAGS.train_batch_size)
warmup_steps = int(epochs * steps_per_epoch * 0.1)
eval_steps = int( eval_steps = int(
math.ceil(input_meta_data['eval_data_size'] / FLAGS.eval_batch_size)) math.ceil(input_meta_data['eval_data_size'] / FLAGS.eval_batch_size))
strategy = distribution_utils.get_distribution_strategy( strategy = distribution_utils.get_distribution_strategy(
...@@ -175,6 +181,11 @@ class BertClassifyBenchmark(BertBenchmarkBase): ...@@ -175,6 +181,11 @@ class BertClassifyBenchmark(BertBenchmarkBase):
self.eval_data_path = CLASSIFIER_EVAL_DATA_PATH self.eval_data_path = CLASSIFIER_EVAL_DATA_PATH
self.bert_config_file = MODEL_CONFIG_FILE_PATH self.bert_config_file = MODEL_CONFIG_FILE_PATH
self.input_meta_data_path = CLASSIFIER_INPUT_META_DATA_PATH self.input_meta_data_path = CLASSIFIER_INPUT_META_DATA_PATH
# Since we only care about performance metrics, we limit
# the number of training steps and epochs to prevent unnecessarily
# long tests.
self.num_steps_per_epoch = 110
self.num_epochs = 1
super(BertClassifyBenchmark, self).__init__(output_dir=output_dir) super(BertClassifyBenchmark, self).__init__(output_dir=output_dir)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment