test_result.py 3.91 KB
Newer Older
1
2
3
4
5
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

"""Tests for BenchmarkResult module."""

6
from superbench.benchmarks import BenchmarkType, ReturnCode, ReduceType
7
8
9
10
11
from superbench.benchmarks.result import BenchmarkResult


def test_add_raw_data():
    """Test interface BenchmarkResult.add_raw_data()."""
12
    result = BenchmarkResult('micro', BenchmarkType.MICRO, ReturnCode.SUCCESS)
13
14
15
16
    result.add_raw_data('metric1', 'raw log 1')
    result.add_raw_data('metric1', 'raw log 2')
    assert (result.raw_data['metric1'][0] == 'raw log 1')
    assert (result.raw_data['metric1'][1] == 'raw log 2')
17
18
    assert (result.type == BenchmarkType.MICRO)
    assert (result.return_code == ReturnCode.SUCCESS)
19

20
    result = BenchmarkResult('model', BenchmarkType.MODEL, ReturnCode.SUCCESS)
21
22
23
24
25
    result.add_raw_data('metric1', [1, 2, 3])
    result.add_raw_data('metric1', [4, 5, 6])

    assert (result.raw_data['metric1'][0] == [1, 2, 3])
    assert (result.raw_data['metric1'][1] == [4, 5, 6])
26
27
    assert (result.type == BenchmarkType.MODEL)
    assert (result.return_code == ReturnCode.SUCCESS)
28
29
30
31


def test_add_result():
    """Test interface BenchmarkResult.add_result()."""
32
    result = BenchmarkResult('micro', BenchmarkType.MICRO, ReturnCode.SUCCESS)
33
34
35
36
37
38
39
40
    result.add_result('metric1', 300)
    result.add_result('metric1', 200)
    assert (result.result['metric1'][0] == 300)
    assert (result.result['metric1'][1] == 200)


def test_set_timestamp():
    """Test interface BenchmarkResult.set_timestamp()."""
41
    result = BenchmarkResult('micro', BenchmarkType.MICRO, ReturnCode.SUCCESS)
42
43
44
45
46
47
48
49
50
    start_time = '2021-02-03 16:59:49'
    end_time = '2021-02-03 17:00:08'
    result.set_timestamp(start_time, end_time)
    assert (result.start_time == start_time)
    assert (result.end_time == end_time)


def test_set_benchmark_type():
    """Test interface BenchmarkResult.set_benchmark_type()."""
51
52
53
    result = BenchmarkResult('micro', BenchmarkType.MICRO, ReturnCode.SUCCESS)
    result.set_benchmark_type(BenchmarkType.MICRO)
    assert (result.type == BenchmarkType.MICRO)
54
55
56
57


def test_set_return_code():
    """Test interface BenchmarkResult.set_return_code()."""
58
59
60
61
62
63
64
65
66
    result = BenchmarkResult('micro', BenchmarkType.MICRO, ReturnCode.SUCCESS)
    assert (result.return_code == ReturnCode.SUCCESS)
    assert (result.result['return_code'] == [ReturnCode.SUCCESS.value])
    result.set_return_code(ReturnCode.INVALID_ARGUMENT)
    assert (result.return_code == ReturnCode.INVALID_ARGUMENT)
    assert (result.result['return_code'] == [ReturnCode.INVALID_ARGUMENT.value])
    result.set_return_code(ReturnCode.INVALID_BENCHMARK_RESULT)
    assert (result.return_code == ReturnCode.INVALID_BENCHMARK_RESULT)
    assert (result.result['return_code'] == [ReturnCode.INVALID_BENCHMARK_RESULT.value])
67
68
69
70
71


def test_serialize_deserialize():
    """Test serialization/deserialization and compare the results."""
    # Result with one metric.
72
    result = BenchmarkResult('pytorch-bert-base1', BenchmarkType.MICRO, ReturnCode.SUCCESS, run_count=2)
73
74
75
    result.add_result('metric1', 300, ReduceType.MAX)
    result.add_result('metric1', 200, ReduceType.MAX)
    result.add_result('metric2', 100, ReduceType.AVG)
76
77
78
79
80
81
    result.add_raw_data('metric1', [1, 2, 3])
    result.add_raw_data('metric1', [4, 5, 6])
    result.add_raw_data('metric1', [7, 8, 9])
    start_time = '2021-02-03 16:59:49'
    end_time = '2021-02-03 17:00:08'
    result.set_timestamp(start_time, end_time)
82
    result.set_benchmark_type(BenchmarkType.MICRO)
83
84
85
86
87

    expected = (
        '{"name": "pytorch-bert-base1", "type": "micro", "run_count": 2, "return_code": 0, '
        '"start_time": "2021-02-03 16:59:49", "end_time": "2021-02-03 17:00:08", '
        '"raw_data": {"metric1": [[1, 2, 3], [4, 5, 6], [7, 8, 9]]}, '
88
        '"result": {"return_code": [0], "metric1": [300, 200], "metric2": [100]}, '
89
        '"reduce_op": {"return_code": null, "metric1": "max", "metric2": "avg"}}'
90
91
    )
    assert (result.to_string() == expected)