ModelBenchmarks - Fix early stop logic due to num_steps. (#522)

**Description** Model benchmarks can stop due to `num_steps` or `duration` config which will take effect when the value is set greater than 0. If both are set greater than 0, the earliest condition reached will work.

ModelBenchmarks - Fix early stop logic due to num_steps. (#522)
**Description** Model benchmarks can stop due to `num_steps` or `duration` config which will take effect when the value is set greater than 0. If both are set greater than 0, the earliest condition reached will work.
f38a9829 · guoshzhao · GitHub · 664c59a1 · f38a9829 · f38a9829
Unverified Commit f38a9829 authored Apr 28, 2023 by guoshzhao Committed by GitHub Apr 28, 2023
3 changed files
--- a/docs/superbench-config.mdx
+++ b/docs/superbench-config.mdx
@@ -344,10 +344,10 @@ There have four common parameters for all benchmarks:
 For Model-Benchmark, there have some parameters that can control the elapsed time.
 * duration: the elapsed time of benchmark in seconds.
-* num_warmup: the number of warmup step.
+* num_warmup: the number of warmup step, should be positive integer.
 * num_steps: the number of test step.
-If `duration > 0` and `num_warmup + num_steps > 0`, then benchmark will take the least as the elapsed time. Otherwise only one of them will take effect.
+If `duration > 0` and `num_steps > 0`, then benchmark will take the least as the elapsed time. Otherwise only one of them will take effect.
 ## `Mode` Schema

--- a/superbench/benchmarks/model_benchmarks/model_base.py
+++ b/superbench/benchmarks/model_benchmarks/model_base.py
@@ -204,6 +204,11 @@ def _preprocess(self):
            )
        )
+        if self._args.num_warmup < 0:
+            logger.error('num_warmup should be positive integer, while {} is set.'.format(self._args.num_warmup))
+            self._result.set_return_code(ReturnCode.INVALID_ARGUMENT)
+            return False
        if not self._init_distributed_setting():
            self._result.set_return_code(ReturnCode.DISTRIBUTED_SETTING_INIT_FAILURE)
            return False
@@ -374,7 +379,7 @@ def _is_finished(self, curr_step, curr_time):
        if (
            (self._args.duration > 0 and (curr_time - self._sub_benchmark_start_time) >= self._args.duration)
-            or (total_steps > 0 and curr_step >= total_steps)
+            or (self._args.num_steps > 0 and curr_step >= total_steps)
        ):
            return True

--- a/tests/benchmarks/model_benchmarks/test_model_base.py
+++ b/tests/benchmarks/model_benchmarks/test_model_base.py
@@ -20,6 +20,7 @@ def __init__(self, name, parameters=''):
        """
        super().__init__(name, parameters)
        self._supported_precision = [Precision.FLOAT32, Precision.FLOAT16]
+        self._sub_benchmark_start_time = 0
    def add_parser_arguments(self):
        """Add the specified arguments."""
@@ -377,3 +378,37 @@ def test_check_result_format():
    # Negative case for __check_raw_data() - invalid benchmark result.
    assert (benchmark._Benchmark__check_result_format() is False)
    assert (benchmark.return_code == ReturnCode.INVALID_BENCHMARK_RESULT)
+def test_is_finished():
+    """Test interface Benchmark._is_finished()."""
+    # Only step takes effect, benchmarking finish due to step.
+    benchmark = create_benchmark('--num_warmup 32 --num_steps 128 --duration 0')
+    benchmark._preprocess()
+    end_time = 2
+    curr_step = 50
+    assert (benchmark._is_finished(curr_step, end_time) is False)
+    curr_step = 160
+    assert (benchmark._is_finished(curr_step, end_time))
+    # Only duration takes effect, benchmarking finish due to duration.
+    benchmark = create_benchmark('--num_warmup 32 --num_steps 0 --duration 10')
+    benchmark._preprocess()
+    benchmark._sub_benchmark_start_time = 0
+    curr_step = 50
+    end_time = 1
+    assert (benchmark._is_finished(curr_step, end_time) is False)
+    end_time = 10
+    assert (benchmark._is_finished(curr_step, end_time))
+    # Both step and duration take effect.
+    benchmark = create_benchmark('--num_warmup 32 --num_steps 128 --duration 10')
+    benchmark._preprocess()
+    # Benchmarking finish due to step.
+    curr_step = 160
+    end_time = 2
+    assert (benchmark._is_finished(curr_step, end_time))
+    # Benchmarking finish due to duration.
+    curr_step = 50
+    end_time = 10
+    assert (benchmark._is_finished(curr_step, end_time))