directx_gpu_encoding_latency.py 6.3 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

"""Module of the DirectXGPUEncodingLatency benchmarks."""

import os

from superbench.common.utils import logger
from superbench.benchmarks import BenchmarkRegistry, Platform
from superbench.benchmarks.micro_benchmarks import MicroBenchmarkWithInvoke


def create_nv12_file(file_name, num_frames, width, height):
    """Create a NV12 file with the specified name, number of frames, width, and height."""
    import numpy as np
    # Generate a Y plane of width x height with values from 0-255
    y_plane = np.random.randint(0, 256, (height, width), dtype=np.uint8)
    # Generate a UV plane of width x height/2 with values from 0-255
    uv_plane = np.random.randint(0, 256, (height // 2, width), dtype=np.uint8)
    # Create the file
    with open(f'{file_name}', 'wb') as f:
        for _ in range(num_frames):
            # Write the Y plane and UV plane to the file
            f.write(y_plane.tobytes())
            f.write(uv_plane.tobytes())


class DirectXGPUEncodingLatency(MicroBenchmarkWithInvoke):
    """The DirectXGPUEncodingLatency benchmark class."""
    def __init__(self, name, parameters=''):
        """Constructor."""
        super().__init__(name, parameters)
        self._bin_name = 'EncoderLatency.exe'
        self._test_file = 'test_directx_gpu_encoding_latency.nv12'

    def add_parser_arguments(self):
        """Add the specified arguments."""
        super().add_parser_arguments()
        self._parser.add_argument(
            '--algo',
            type=str,
            choices=['ASAP', 'OneInOne'],
            default='ASAP',
            required=False,
            help='The algorithm to use for encoding'
        )
        self._parser.add_argument(
            '--codec',
            type=str,
            choices=['AVC', 'H264', 'HEVC', 'H265', 'AV1'],
            default='H265',
            required=False,
            help='The codec to use for encoding'
        )
        self._parser.add_argument(
            '--format',
            type=str,
            choices=['RGBA_F16', 'R10G10B10A2', 'NV12', 'P010'],
            default='NV12',
            required=False,
            help='The format to use for encoding'
        )
        self._parser.add_argument(
            '--frames', type=int, default=500, required=False, help='The number of frames to encode'
        )
        self._parser.add_argument(
            '--height', type=int, default=720, required=False, help='The height of the input video'
        )
        self._parser.add_argument(
            '--width', type=int, default=1080, required=False, help='The width of the input video'
        )
        self._parser.add_argument('--input_file', type=str, default=None, required=False, help='The input video file')
        self._parser.add_argument('--output_file', type=str, default=None, required=False, help='The output video file')
        self._parser.add_argument(
            '--output_height', type=int, default=720, required=False, help='The height of the output video'
        )
        self._parser.add_argument(
            '--output_width', type=int, default=1080, required=False, help='The width of the output video'
        )
        self._parser.add_argument(
            '--vcn', type=int, choices=[0, 1], default=0, required=False, help='The VCN instance to use for encoding'
        )

    def _preprocess(self):
        """Preprocess/preparation operations before the benchmarking.

        Return:
            True if _preprocess() succeed.
        """
        if not super()._preprocess():
            return False

        command = os.path.join(self._args.bin_dir, self._bin_name)
        command += f' -ALGORITHM {self._args.algo}'
        command += f' -CODEC {self._args.codec}'
        command += f' -FORMAT {self._args.format}'
        command += f' -FRAMES {self._args.frames}'
        command += f' -HEIGHT {self._args.height}'
        command += f' -WIDTH {self._args.width}'
        if self._args.input_file is not None:
            command += f' -INPUT {self._args.input_file}'
        else:
            if not os.path.exists(f'{self._test_file}'):
                create_nv12_file(self._test_file, self._args.frames, self._args.width, self._args.height)
            command += f' -INPUT {self._test_file}'
        if self._args.output_file is not None:
            command += f' -OUTPUT {self._args.output_file}'
        command += f' -OUTPUT_HEIGHT {self._args.output_height}'
        command += f' -OUTPUT_WIDTH {self._args.output_width}'
        command += f' -VCNINSTANCE {self._args.vcn}'
        self._commands.append(command)

        return True

    def _process_raw_result(self, cmd_idx, raw_output):
        """Function to parse raw results and save the summarized results.

          self._result.add_raw_data() and self._result.add_result() need to be called to save the results.

        Args:
            cmd_idx (int): the index of command corresponding with the raw_output.
            raw_output (str): raw output string of the micro-benchmark.

        Return:
            True if the raw output string is valid and result can be extracted.
        """
        self._result.add_raw_data('raw_output', raw_output, self._args.log_raw_data)

        content = raw_output.splitlines()
        metrics = {}

        try:
            for line in content:
                if 'Total' in line:
                    metrics['fps'] = float(line.split('=')[3].strip().strip('frames').split()[0])
                if 'Latency' in line and 'min' in line.lower():
                    metrics['min_lat'] = float(line.split('=')[1].split(',')[1].strip('ms').strip())
                    metrics['max_lat'] = float(line.split('=')[1].split(',')[2].strip('ms').strip())
                if 'Latency' in line and 'average' in line.lower():
                    metrics['avg_lat'] = float(line.split('=')[1].strip('ms').strip())
        except Exception as e:
            logger.error(
                'The result format is invalid - benchmark: {}, raw output: {}, error: {}'.format(
                    self._name, raw_output, str(e)
                )
            )
            return False

        for metric, value in metrics.items():
            self._result.add_result(metric, value)

        return True


BenchmarkRegistry.register_benchmark(
    'directx-gpu-encoding-latency', DirectXGPUEncodingLatency, platform=Platform.DIRECTX
)