blas.h 3.19 KB
Newer Older
PanZezhongQY's avatar
PanZezhongQY committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#ifndef __BLAS_H__
#define __BLAS_H__

#include "../utils.h"
#include "infiniop/operator.h"
#include <algorithm>
#include <stdint.h>

typedef struct BlasMatrix {
    size_t ndim;
    size_t batch;
    int64_t stride;
    size_t rows;
    size_t cols;
    int64_t row_stride;
    int64_t col_stride;

    BlasMatrix() {}

    BlasMatrix(infiniopTensorDescriptor_t layout, infiniopStatus_t *status) {
        if (layout->ndim == 2) {
            this->ndim = 2;
            this->batch = 1;
            this->stride = 0;
            this->rows = layout->shape[0];
            this->cols = layout->shape[1];
            this->row_stride = layout->strides[0];
            this->col_stride = layout->strides[1];
        } else if (layout->ndim == 3) {
            this->ndim = 3;
            this->batch = layout->shape[0];
            this->stride = this->batch == 1 ? 0 : layout->strides[0];
            this->rows = layout->shape[1];
            this->cols = layout->shape[2];
            this->row_stride = layout->strides[1];
            this->col_stride = layout->strides[2];
        } else {
            *status = INFINIOP_STATUS_BAD_TENSOR_SHAPE;
            return;
        }

        if (this->row_stride != 1 && this->col_stride != 1) {
            *status = INFINIOP_STATUS_BAD_TENSOR_STRIDES;
            return;
        }

        *status = INFINIOP_STATUS_SUCCESS;
    }

    bool match_batch(int batch) const {
        return this->batch == batch || this->batch == 1;
    }

    void transpose() {
        std::swap(rows, cols);
        std::swap(row_stride, col_stride);
    }

    int ld() const {
        if (this->row_stride == 1) {
            return this->col_stride;
        } else {
            return this->row_stride;
        }
    }
} BlasMatrix;

struct MatmulInfo {
    BlasMatrix a_matrix;
    BlasMatrix b_matrix;
    BlasMatrix c_matrix;

    size_t m, n, k, batch;

    bool is_transed = false;

    MatmulInfo(infiniopTensorDescriptor_t c_desc, infiniopTensorDescriptor_t a_desc, infiniopTensorDescriptor_t b_desc, infiniopStatus_t *status, bool col_major = true) {
        a_matrix = BlasMatrix(a_desc, status);
        if (*status != INFINIOP_STATUS_SUCCESS) {
            return;
        }
        b_matrix = BlasMatrix(b_desc, status);
        if (*status != INFINIOP_STATUS_SUCCESS) {
            return;
        }
        c_matrix = BlasMatrix(c_desc, status);
        if (*status != INFINIOP_STATUS_SUCCESS) {
            return;
        }

        if (c_matrix.rows != a_matrix.rows || c_matrix.cols != b_matrix.cols || a_matrix.cols != b_matrix.rows){
            *status = INFINIOP_STATUS_BAD_TENSOR_SHAPE;
            return;
        }

        batch = c_matrix.batch;
        if (!a_matrix.match_batch(batch) || !b_matrix.match_batch(batch)) {
            *status = INFINIOP_STATUS_BAD_TENSOR_SHAPE;
            return;
        }

        if ((col_major && c_matrix.col_stride == 1) || (!col_major && c_matrix.row_stride == 1)) {
            c_matrix.transpose();
            b_matrix.transpose();
            a_matrix.transpose();
            std::swap(a_matrix, b_matrix);
            is_transed = true;
        }

        m = c_matrix.rows;
        n = c_matrix.cols;
        k = a_matrix.cols;
    }
};

#endif// __BLAS_H__