Commit e286da17 authored by qianyj's avatar qianyj
Browse files

update Tensorflow test method

parent f270c43a
# 简介
该测试用例可用于ResNet50/Vgg16等网络的性能测试及精度验证。
该测试用例可用于ResNet50/Vgg16等网络的性能测试及精度验证。
# 单卡测试 (单精度)
......@@ -19,7 +19,14 @@
## 运行
mpirun -np 4 --hostfile hostfile --bind-to none scripts-run/single_process.sh
mpirun -np 4 --hostfile hostfile -mca btl self,tcp --bind-to none scripts-run/single_process.sh
# 多卡测试 (混合精度)
##
修改scripts-run/single_process.sh中的--use_fp16=True
mpirun -np 4 --hostfile hostfile -mca btl self,tcp --bind-to none scripts-run/single_process.sh
# 参考资料
[https://github.com/tensorflow/benchmarks/tree/master/scripts/tf_cnn_benchmarks](https://github.com/tensorflow/benchmarks/tree/master/scripts/tf_cnn_benchmarks)
......
......@@ -1555,7 +1555,7 @@ class BenchmarkCNN(object):
n_epochs = self.params.eval_during_training_every_n_epochs
self.eval_during_training_at_specified_steps = {
(int(e * num_train_examples_per_epoch + self.batch_size - 1) //
self.batch_size)
(self.batch_size * self.num_workers ))
for e in np.arange(n_epochs, self.num_epochs, n_epochs)}
if self.params.eval_during_training_at_specified_steps:
......@@ -1577,7 +1577,7 @@ class BenchmarkCNN(object):
mlperf.logger.log(key=mlperf.tags.EVAL_EPOCH_OFFSET, value=offset)
self.eval_during_training_at_specified_steps = {
(int(e * num_train_examples_per_epoch + self.batch_size - 1) //
self.batch_size)
(self.batch_size * self.num_workers ))
for e in n_epochs}
except ValueError:
raise ValueError('Param eval_during_training_at_specified_epochs value '
......
# 简介
该测试用例用于TensorFlow分类模型性能测试,使用的数据集是imagenet。
该测试用例用于TensorFlow分类模型性能测试,使用的数据集是imagenet。
* 该脚本支持horovod等分布式通信库方式
* 该脚本支持horovod等分布式通信库方式
# 运行
......@@ -16,7 +16,7 @@
## 分布式多卡
mpirun -np ${num_gpu} --hostfile hostfile --bind-to none scripts-run/single_process.sh
mpirun -np ${num_gpu} --hostfile hostfile -mca btl self,tcp --bind-to none scripts-run/single_process.sh
hostfile格式参考:
......
......@@ -1555,7 +1555,7 @@ class BenchmarkCNN(object):
n_epochs = self.params.eval_during_training_every_n_epochs
self.eval_during_training_at_specified_steps = {
(int(e * num_train_examples_per_epoch + self.batch_size - 1) //
self.batch_size)
(self.batch_size * self.num_workers ))
for e in np.arange(n_epochs, self.num_epochs, n_epochs)}
if self.params.eval_during_training_at_specified_steps:
......@@ -1577,7 +1577,7 @@ class BenchmarkCNN(object):
mlperf.logger.log(key=mlperf.tags.EVAL_EPOCH_OFFSET, value=offset)
self.eval_during_training_at_specified_steps = {
(int(e * num_train_examples_per_epoch + self.batch_size - 1) //
self.batch_size)
(self.batch_size * self.num_workers ))
for e in n_epochs}
except ValueError:
raise ValueError('Param eval_during_training_at_specified_epochs value '
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment