"vscode:/vscode.git/clone" did not exist on "f5dbb0021b7c6b4681c55e9ebe9b601faa1fd4da"
benchmark.py 2.21 KB
Newer Older
mashun1's avatar
v1  
mashun1 committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import time

import kp
import numpy as np
from imp1_naive import MatMulOp as MatMulOp1
from imp2_tiled import MatMulOp as MatMulOp2
from imp3_better_tiling import MatMulOp as MatMulOp3


def main():
    mgr = kp.Manager()
    for tensor_size, experiment_count in [(512, 1000), (4096, 5)]:
        tensor_shape = [tensor_size, tensor_size]
        tensor_shape = [tensor_size, tensor_size]
        mat_1 = np.triu(np.ones(tensor_shape))
        mat_2 = np.triu(np.ones(tensor_shape))

        tensor_in_1 = mgr.tensor(mat_1)
        tensor_in_2 = mgr.tensor(mat_2)
        tensor_out = mgr.tensor(np.zeros(tensor_shape))
        if tensor_size <= 512:
            mat_result = mat_1 @ mat_2
        else:
            MatMulOp1(mgr)(tensor_shape, tensor_in_1, tensor_in_2, tensor_out)
            mat_result = tensor_out.data().reshape(tensor_shape)  # CPU is too slow for big sizes

        print(f'{tensor_shape} input tensors:\n'
              f'{mat_1}\n'
              f'{mat_2}\n')
        print(f'Output :\n{mat_result}')

        for MatMulOp in [MatMulOp1, MatMulOp2, MatMulOp3]:
            tensor_out.data()[:] = 0
            mgr.sequence().record(kp.OpTensorSyncDevice([tensor_out]))
            matmul_op = MatMulOp(mgr)
            matmul_op(tensor_shape, tensor_in_1, tensor_in_2, tensor_out)

            start_time = time.time()
            for _ in range(experiment_count):
                matmul_op(tensor_shape, tensor_in_1, tensor_in_2, tensor_out)
            end_time = time.time()
            experiment_time = end_time - start_time
            op_count = tensor_shape[0] * tensor_shape[1] * ((tensor_shape[1] * 2) - 1)

            # print(tensor_out.data().reshape(tensor_shape))
            if (tensor_out.data().reshape(tensor_shape) == mat_result).all():
                print(f'From {MatMulOp.__module__} : {experiment_count} matmul time : '
                      f'{experiment_time * 1000:0.2f}ms => '
                      f'{experiment_count / experiment_time:0.2f}op/s or '
                      f'{experiment_count * op_count / (1e9 * experiment_time):0.2f} GFLOPS')
            else:
                print(f'Test failed => output tensor is wrong :\n{tensor_out.data().reshape(tensor_shape)}')


if __name__ == '__main__':
    main()