Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
infinicore
Commits
e5bda616
Unverified
Commit
e5bda616
authored
Apr 02, 2025
by
PanZezhong1725
Committed by
GitHub
Apr 02, 2025
Browse files
Merge pull request #122 from YdrMaster/main
issue/121/feat: 添加 Result 类型
parents
beaf1e8c
fd5d90c9
Changes
21
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
448 additions
and
407 deletions
+448
-407
src/infiniop/ops/causal_softmax/causal_softmax.h
src/infiniop/ops/causal_softmax/causal_softmax.h
+37
-78
src/infiniop/ops/causal_softmax/cpu/causal_softmax_cpu.cc
src/infiniop/ops/causal_softmax/cpu/causal_softmax_cpu.cc
+9
-6
src/infiniop/ops/causal_softmax/info.h
src/infiniop/ops/causal_softmax/info.h
+60
-0
src/infiniop/ops/gemm/ascend/gemm_ascend.cc
src/infiniop/ops/gemm/ascend/gemm_ascend.cc
+4
-6
src/infiniop/ops/gemm/bang/gemm_bang.cc
src/infiniop/ops/gemm/bang/gemm_bang.cc
+3
-5
src/infiniop/ops/gemm/blas.h
src/infiniop/ops/gemm/blas.h
+0
-125
src/infiniop/ops/gemm/cpu/gemm_cpu.cc
src/infiniop/ops/gemm/cpu/gemm_cpu.cc
+3
-6
src/infiniop/ops/gemm/cuda/gemm_cuda.cu
src/infiniop/ops/gemm/cuda/gemm_cuda.cu
+3
-6
src/infiniop/ops/gemm/gemm.h
src/infiniop/ops/gemm/gemm.h
+45
-45
src/infiniop/ops/gemm/info.h
src/infiniop/ops/gemm/info.h
+135
-0
src/infiniop/ops/gemm/kunlun/gemm_kunlun.cc
src/infiniop/ops/gemm/kunlun/gemm_kunlun.cc
+3
-6
src/infiniop/ops/gemm/maca/gemm_maca.cc
src/infiniop/ops/gemm/maca/gemm_maca.cc
+3
-6
src/infiniop/ops/gemm/operator.cc
src/infiniop/ops/gemm/operator.cc
+3
-3
src/infiniop/ops/rearrange/cpu/rearrange_cpu.cc
src/infiniop/ops/rearrange/cpu/rearrange_cpu.cc
+3
-5
src/infiniop/ops/rms_norm/cpu/rms_norm_cpu.cc
src/infiniop/ops/rms_norm/cpu/rms_norm_cpu.cc
+8
-6
src/infiniop/ops/rms_norm/info.h
src/infiniop/ops/rms_norm/info.h
+78
-0
src/infiniop/ops/rms_norm/rms_norm.h
src/infiniop/ops/rms_norm/rms_norm.h
+43
-96
src/utils.h
src/utils.h
+0
-2
src/utils/rearrange.cc
src/utils/rearrange.cc
+5
-3
src/utils/rearrange.h
src/utils/rearrange.h
+3
-3
No files found.
src/infiniop/ops/causal_softmax/causal_softmax.h
View file @
e5bda616
...
...
@@ -2,84 +2,43 @@
#define CAUSAL_SOFTMAX_H
#include "../../operator.h"
#include "../../tensor.h"
#include <iostream>
#include <vector>
struct
CausalSoftmaxInfo
{
infiniDtype_t
dtype
;
size_t
batch_size
;
ptrdiff_t
stride_b
;
size_t
seq_len
;
ptrdiff_t
stride_i
;
size_t
total_seq_len
;
ptrdiff_t
stride_j
;
};
inline
infiniStatus_t
createCausalSoftmaxInfo
(
CausalSoftmaxInfo
*
info
,
infiniopTensorDescriptor_t
y_desc
)
{
auto
dtype
=
y_desc
->
dtype
();
if
(
y_desc
->
dtype
()
!=
INFINI_DTYPE_F16
&&
y_desc
->
dtype
()
!=
INFINI_DTYPE_F32
)
{
return
INFINI_STATUS_BAD_TENSOR_DTYPE
;
}
info
->
dtype
=
dtype
;
if
(
y_desc
->
ndim
()
!=
2
&&
y_desc
->
ndim
()
!=
3
)
{
return
INFINI_STATUS_BAD_TENSOR_SHAPE
;
}
if
(
y_desc
->
shape
()[
y_desc
->
ndim
()
-
1
]
<
y_desc
->
shape
()[
y_desc
->
ndim
()
-
2
])
{
return
INFINI_STATUS_BAD_TENSOR_SHAPE
;
}
size_t
batch_size
=
1
;
ptrdiff_t
stride_b
=
0
;
size_t
seq_len
=
y_desc
->
shape
()[
y_desc
->
ndim
()
-
2
];
ptrdiff_t
stride_i
=
y_desc
->
strides
()[
y_desc
->
ndim
()
-
2
];
size_t
total_seq_len
=
y_desc
->
shape
()[
y_desc
->
ndim
()
-
1
];
ptrdiff_t
stride_j
=
y_desc
->
strides
()[
y_desc
->
ndim
()
-
1
];
if
(
y_desc
->
ndim
()
==
3
)
{
stride_b
=
y_desc
->
strides
()[
0
];
batch_size
=
y_desc
->
shape
()[
0
];
}
info
->
batch_size
=
batch_size
;
info
->
stride_b
=
stride_b
;
info
->
seq_len
=
seq_len
;
info
->
stride_i
=
stride_i
;
info
->
total_seq_len
=
total_seq_len
;
info
->
stride_j
=
stride_j
;
return
INFINI_STATUS_SUCCESS
;
}
#define DESCRIPTOR(NAMESPACE) \
namespace op::causal_softmax::NAMESPACE { \
class Descriptor final : public InfiniopDescriptor { \
struct Opaque; \
Opaque *_opaque; \
CausalSoftmaxInfo _info; \
size_t _workspace_size; \
\
Descriptor( \
Opaque *opaque, \
CausalSoftmaxInfo info, \
size_t workspace_size, \
infiniDevice_t device_type, \
int device_id) : InfiniopDescriptor{device_type, device_id}, \
_opaque(opaque), \
_info(info), \
_workspace_size(workspace_size) {} \
\
public: \
~Descriptor(); \
size_t workspaceSize() const { return _workspace_size; } \
static infiniStatus_t create( \
infiniopHandle_t handle, \
Descriptor **desc_ptr, \
infiniopTensorDescriptor_t y_desc); \
infiniStatus_t calculate(void *workspace, size_t workspace_size, \
void *data, void *stream); \
}; \
#include "info.h"
#define DESCRIPTOR(NAMESPACE) \
\
namespace op::causal_softmax::NAMESPACE { \
class Descriptor final : public InfiniopDescriptor { \
struct Opaque; \
Opaque *_opaque; \
CausalSoftmaxInfo _info; \
size_t _workspace_size; \
\
Descriptor( \
Opaque *opaque, \
CausalSoftmaxInfo info, \
size_t workspace_size, \
infiniDevice_t device_type, \
int device_id) \
: InfiniopDescriptor{device_type, device_id}, \
_opaque(opaque), \
_info(info), \
_workspace_size(workspace_size) {} \
\
public: \
~Descriptor(); \
\
size_t workspaceSize() const { return _workspace_size; } \
\
static infiniStatus_t create( \
infiniopHandle_t handle, \
Descriptor **desc_ptr, \
infiniopTensorDescriptor_t y_desc); \
\
infiniStatus_t calculate( \
void *workspace, size_t workspace_size, \
void *data, \
void *stream) const; \
}; \
}
#endif // CAUSAL_SOFTMAX_H
src/infiniop/ops/causal_softmax/cpu/causal_softmax_cpu.cc
View file @
e5bda616
...
...
@@ -3,15 +3,16 @@
#include "../../../reduce/cpu/reduce.h"
namespace
op
::
causal_softmax
::
cpu
{
Descriptor
::~
Descriptor
()
{}
infiniStatus_t
Descriptor
::
create
(
infiniopHandle_t
handle
,
Descriptor
**
desc_ptr
,
infiniopTensorDescriptor_t
y_desc
)
{
CausalSoftmaxInfo
info
;
CHECK_
STATUS
(
createCausalSoftmaxInfo
(
&
info
,
y_desc
)
);
*
desc_ptr
=
new
Descriptor
(
nullptr
,
info
,
0
,
handle
->
device
,
handle
->
device_id
);
auto
result
=
CausalSoftmaxInfo
::
create
(
y_desc
)
;
CHECK_
RESULT
(
result
);
*
desc_ptr
=
new
Descriptor
(
nullptr
,
result
.
take
()
,
0
,
handle
->
device
,
handle
->
device_id
);
return
INFINI_STATUS_SUCCESS
;
}
...
...
@@ -53,9 +54,11 @@ infiniStatus_t causal_softmax(const CausalSoftmaxInfo *info, T *data) {
return
INFINI_STATUS_SUCCESS
;
}
infiniStatus_t
Descriptor
::
calculate
(
void
*
workspace
,
size_t
workspace_size
,
void
*
data
,
void
*
stream
)
{
infiniStatus_t
Descriptor
::
calculate
(
void
*
workspace
,
size_t
workspace_size
,
void
*
data
,
void
*
stream
)
const
{
if
(
_info
.
dtype
==
INFINI_DTYPE_F16
)
{
CHECK_STATUS
(
causal_softmax
<
fp16_t
>
(
&
_info
,
(
fp16_t
*
)
data
));
}
else
if
(
_info
.
dtype
==
INFINI_DTYPE_F32
)
{
...
...
src/infiniop/ops/causal_softmax/info.h
0 → 100644
View file @
e5bda616
#
ifndef
__CAUSAL_SOFTMAX_INFO_H__
#define __CAUSAL_SOFTMAX_INFO_H__
#include "../../../utils.h"
#include "../../tensor.h"
#include <vector>
namespace
op
::
causal_softmax
{
class
CausalSoftmaxInfo
{
CausalSoftmaxInfo
()
=
default
;
public:
infiniDtype_t
dtype
;
size_t
batch_size
;
ptrdiff_t
stride_b
;
size_t
seq_len
;
ptrdiff_t
stride_i
;
size_t
total_seq_len
;
ptrdiff_t
stride_j
;
static
utils
::
Result
<
CausalSoftmaxInfo
>
create
(
infiniopTensorDescriptor_t
y_desc
)
{
auto
dtype
=
y_desc
->
dtype
();
if
(
y_desc
->
dtype
()
!=
INFINI_DTYPE_F16
&&
y_desc
->
dtype
()
!=
INFINI_DTYPE_F32
)
{
return
INFINI_STATUS_BAD_TENSOR_DTYPE
;
}
if
(
y_desc
->
ndim
()
!=
2
&&
y_desc
->
ndim
()
!=
3
)
{
return
INFINI_STATUS_BAD_TENSOR_SHAPE
;
}
if
(
y_desc
->
shape
()[
y_desc
->
ndim
()
-
1
]
<
y_desc
->
shape
()[
y_desc
->
ndim
()
-
2
])
{
return
INFINI_STATUS_BAD_TENSOR_SHAPE
;
}
size_t
batch_size
=
1
;
ptrdiff_t
stride_b
=
0
;
size_t
seq_len
=
y_desc
->
shape
()[
y_desc
->
ndim
()
-
2
];
ptrdiff_t
stride_i
=
y_desc
->
strides
()[
y_desc
->
ndim
()
-
2
];
size_t
total_seq_len
=
y_desc
->
shape
()[
y_desc
->
ndim
()
-
1
];
ptrdiff_t
stride_j
=
y_desc
->
strides
()[
y_desc
->
ndim
()
-
1
];
if
(
y_desc
->
ndim
()
==
3
)
{
stride_b
=
y_desc
->
strides
()[
0
];
batch_size
=
y_desc
->
shape
()[
0
];
}
return
utils
::
Result
<
CausalSoftmaxInfo
>
(
CausalSoftmaxInfo
{
dtype
,
batch_size
,
stride_b
,
seq_len
,
stride_i
,
total_seq_len
,
stride_j
});
}
};
}
// namespace op::causal_softmax
#endif // __CAUSAL_SOFTMAX_INFO_H__
src/infiniop/ops/gemm/ascend/gemm_ascend.cc
View file @
e5bda616
...
...
@@ -38,14 +38,12 @@ infiniStatus_t Descriptor::create(
return
INFINI_STATUS_BAD_TENSOR_DTYPE
;
}
infiniStatus_t
status
;
auto
info
=
MatmulInfo
(
c_desc
,
a_desc
,
b_desc
,
&
status
,
MatrixLayout
::
ROW_MAJOR
);
if
(
status
!=
INFINI_STATUS_SUCCESS
)
{
return
status
;
}
auto
result
=
MatmulInfo
::
create
(
c_desc
,
a_desc
,
b_desc
,
MatrixLayout
::
ROW_MAJOR
);
CHECK_RESULT
(
result
);
auto
info
=
result
.
take
();
auto
c
=
new
aclnnTensorDescriptor
(
toAclDataType
(
c_desc
->
dtype
()),
{
static_cast
<
int64_t
>
(
info
.
c_matrix
.
rows
),
static_cast
<
int64_t
>
(
info
.
c_matrix
.
cols
)},
{
static_cast
<
int64_t
>
(
info
.
m
),
static_cast
<
int64_t
>
(
info
.
n
)},
{
info
.
c_matrix
.
row_stride
,
info
.
c_matrix
.
col_stride
});
auto
a
=
new
aclnnTensorDescriptor
(
toAclDataType
(
a_desc
->
dtype
()),
{
static_cast
<
int64_t
>
(
info
.
a_matrix
.
rows
),
static_cast
<
int64_t
>
(
info
.
a_matrix
.
cols
)},
...
...
src/infiniop/ops/gemm/bang/gemm_bang.cc
View file @
e5bda616
...
...
@@ -71,11 +71,9 @@ infiniStatus_t Descriptor::create(
return
INFINI_STATUS_BAD_TENSOR_DTYPE
;
}
infiniStatus_t
status
;
auto
info
=
MatmulInfo
(
c_desc
,
a_desc
,
b_desc
,
&
status
,
MatrixLayout
::
ROW_MAJOR
);
if
(
status
!=
INFINI_STATUS_SUCCESS
)
{
return
status
;
}
auto
result
=
MatmulInfo
::
create
(
c_desc
,
a_desc
,
b_desc
,
MatrixLayout
::
ROW_MAJOR
);
CHECK_RESULT
(
result
);
auto
info
=
result
.
take
();
cnnlTensorDescriptor_t
a
,
b
,
c
;
CHECK_BANG
(
cnnlCreateTensorDescriptor
(
&
a
));
...
...
src/infiniop/ops/gemm/blas.h
deleted
100644 → 0
View file @
beaf1e8c
#ifndef __BLAS_H__
#define __BLAS_H__
#include "../../operator.h"
#include "../../tensor.h"
#include <algorithm>
namespace
op
::
gemm
{
struct
BlasMatrix
{
size_t
ndim
;
size_t
batch
;
ptrdiff_t
stride
;
size_t
rows
;
size_t
cols
;
ptrdiff_t
row_stride
;
ptrdiff_t
col_stride
;
BlasMatrix
()
=
default
;
BlasMatrix
(
infiniopTensorDescriptor_t
layout
,
infiniStatus_t
*
status
)
{
if
(
layout
->
ndim
()
==
2
)
{
ndim
=
2
;
batch
=
1
;
stride
=
0
;
rows
=
layout
->
dim
(
0
);
cols
=
layout
->
dim
(
1
);
row_stride
=
layout
->
stride
(
0
);
col_stride
=
layout
->
stride
(
1
);
}
else
if
(
layout
->
ndim
()
==
3
)
{
ndim
=
3
;
batch
=
layout
->
dim
(
0
);
stride
=
batch
==
1
?
0
:
layout
->
stride
(
0
);
rows
=
layout
->
dim
(
1
);
cols
=
layout
->
dim
(
2
);
row_stride
=
layout
->
stride
(
1
);
col_stride
=
layout
->
stride
(
2
);
}
else
{
*
status
=
INFINI_STATUS_BAD_TENSOR_SHAPE
;
return
;
}
if
(
row_stride
!=
1
&&
col_stride
!=
1
)
{
*
status
=
INFINI_STATUS_BAD_TENSOR_STRIDES
;
return
;
}
*
status
=
INFINI_STATUS_SUCCESS
;
}
bool
match_batch
(
size_t
_batch
)
const
{
return
batch
==
_batch
||
batch
==
1
;
}
void
transpose
()
{
std
::
swap
(
rows
,
cols
);
std
::
swap
(
row_stride
,
col_stride
);
}
ptrdiff_t
ld
()
const
{
return
row_stride
==
1
?
col_stride
:
row_stride
;
}
};
enum
class
MatrixLayout
:
char
{
COL_MAJOR
,
ROW_MAJOR
,
};
struct
MatmulInfo
{
BlasMatrix
a_matrix
;
BlasMatrix
b_matrix
;
BlasMatrix
c_matrix
;
size_t
m
,
n
,
k
,
batch
;
bool
is_transed
=
false
;
MatmulInfo
(
infiniopTensorDescriptor_t
c_desc
,
infiniopTensorDescriptor_t
a_desc
,
infiniopTensorDescriptor_t
b_desc
,
infiniStatus_t
*
status
,
MatrixLayout
layout
)
{
a_matrix
=
BlasMatrix
(
a_desc
,
status
);
if
(
*
status
!=
INFINI_STATUS_SUCCESS
)
{
return
;
}
b_matrix
=
BlasMatrix
(
b_desc
,
status
);
if
(
*
status
!=
INFINI_STATUS_SUCCESS
)
{
return
;
}
c_matrix
=
BlasMatrix
(
c_desc
,
status
);
if
(
*
status
!=
INFINI_STATUS_SUCCESS
)
{
return
;
}
if
(
c_matrix
.
rows
!=
a_matrix
.
rows
||
c_matrix
.
cols
!=
b_matrix
.
cols
||
a_matrix
.
cols
!=
b_matrix
.
rows
)
{
*
status
=
INFINI_STATUS_BAD_TENSOR_SHAPE
;
return
;
}
batch
=
c_matrix
.
batch
;
if
(
!
a_matrix
.
match_batch
(
batch
)
||
!
b_matrix
.
match_batch
(
batch
))
{
*
status
=
INFINI_STATUS_BAD_TENSOR_SHAPE
;
return
;
}
if
((
layout
==
MatrixLayout
::
COL_MAJOR
&&
c_matrix
.
col_stride
==
1
)
||
(
layout
==
MatrixLayout
::
ROW_MAJOR
&&
c_matrix
.
row_stride
==
1
))
{
c_matrix
.
transpose
();
b_matrix
.
transpose
();
a_matrix
.
transpose
();
std
::
swap
(
a_matrix
,
b_matrix
);
is_transed
=
true
;
}
m
=
c_matrix
.
rows
;
n
=
c_matrix
.
cols
;
k
=
a_matrix
.
cols
;
}
};
}
// namespace op::gemm
#endif // __BLAS_H__
src/infiniop/ops/gemm/cpu/gemm_cpu.cc
View file @
e5bda616
...
...
@@ -18,14 +18,11 @@ infiniStatus_t Descriptor::create(
return
INFINI_STATUS_BAD_TENSOR_DTYPE
;
}
infiniStatus_t
status
;
auto
info
=
MatmulInfo
(
c_desc
,
a_desc
,
b_desc
,
&
status
,
MatrixLayout
::
COL_MAJOR
);
if
(
status
!=
INFINI_STATUS_SUCCESS
)
{
return
status
;
}
auto
result
=
MatmulInfo
::
create
(
c_desc
,
a_desc
,
b_desc
,
MatrixLayout
::
COL_MAJOR
);
CHECK_RESULT
(
result
);
*
desc_ptr
=
new
Descriptor
(
dtype
,
info
,
0
,
dtype
,
result
.
take
()
,
0
,
nullptr
,
handle
->
device
,
handle
->
device_id
);
return
INFINI_STATUS_SUCCESS
;
...
...
src/infiniop/ops/gemm/cuda/gemm_cuda.cu
View file @
e5bda616
...
...
@@ -24,14 +24,11 @@ infiniStatus_t Descriptor::create(
return
INFINI_STATUS_BAD_TENSOR_DTYPE
;
}
infiniStatus_t
status
;
auto
info
=
MatmulInfo
(
c_desc
,
a_desc
,
b_desc
,
&
status
,
MatrixLayout
::
COL_MAJOR
);
if
(
status
!=
INFINI_STATUS_SUCCESS
)
{
return
status
;
}
auto
result
=
MatmulInfo
::
create
(
c_desc
,
a_desc
,
b_desc
,
MatrixLayout
::
COL_MAJOR
);
CHECK_RESULT
(
result
);
*
desc_ptr
=
new
Descriptor
(
dtype
,
info
,
0
,
dtype
,
result
.
take
()
,
0
,
new
Opaque
{
handle
->
internal
()},
handle
->
device
,
handle
->
device_id
);
return
INFINI_STATUS_SUCCESS
;
...
...
src/infiniop/ops/gemm/gemm.h
View file @
e5bda616
...
...
@@ -2,7 +2,7 @@
#define __GEMM_H__
#include "../../operator.h"
#include "
blas
.h"
#include "
info
.h"
/**
* # 关于 `DESCRIPTOR(NAMESPACE)` 和 `struct Opaque;` 的说明
...
...
@@ -44,50 +44,50 @@
* 这个宏仅适用于矩阵乘,但这种模式很容易复制到其他算子,以简化和规范算子的声明。
*/
#define DESCRIPTOR(NAMESPACE) \
\
namespace op::gemm::NAMESPACE { \
class Descriptor final : public InfiniopDescriptor { \
struct Opaque; \
Opaque *_opaque; \
infiniDtype_t _dtype; \
MatmulInfo _info; \
\
Descriptor(
\
infiniDtype_t dtype,
\
MatmulInfo info,
\
size_t workspace_size_,
\
Opaque *opaque,
\
infiniDevice_t device_type,
\
in
t device_id)
\
: InfiniopDescriptor{device_type, device_id},
\
_opaque(opaque),
\
_
dtype(dtype),
\
_
info(info),
\
workspace_size(workspace_size_) {}
\
\
public:
\
size_t workspace_size;
\
\
~Descriptor();
\
\
static infiniStatus_t create(
\
infini
opHandle_t handle,
\
Descriptor **desc_ptr,
\
infiniopTensorDescriptor_t c_desc,
\
infiniopTensorDescriptor_t
a
_desc, \
infiniopTensorDescriptor_t
b
_desc
);
\
\
infiniStatus_t calculate(
\
void *workspace,
\
size_t workspace_size,
\
void *c, \
float beta, \
const void *a, \
const void *b, \
float alpha, \
void *stream) const; \
}; \
#define DESCRIPTOR(NAMESPACE)
\
\
namespace op::gemm::NAMESPACE {
\
class Descriptor final : public InfiniopDescriptor {
\
struct Opaque;
\
Opaque *_opaque;
\
infiniDtype_t _dtype;
\
MatmulInfo _info;
\
size_t _workspace_size;
\
\
Descriptor(
\
infiniDtype_t dtype,
\
MatmulInfo info,
\
size_t workspace_size_,
\
Opaque *opaque,
\
in
finiDevice_t device_type,
\
int device_id)
\
: InfiniopDescriptor{device_type, device_id},
\
_
opaque(opaque),
\
_
dtype(dtype),
\
_info(info),
\
_workspace_size(workspace_size_) {}
\
\
public:
\
~Descriptor();
\
\
size_t workspaceSize() const { return _workspace_size; }
\
\
static
infini
Status_t create(
\
infiniopHandle_t handle,
\
Descriptor **desc_ptr,
\
infiniopTensorDescriptor_t
c
_desc,
\
infiniopTensorDescriptor_t
a
_desc
,
\
infiniopTensorDescriptor_t b_desc);
\
\
infiniStatus_t calculate(
\
void *workspace,
size_t workspace_size, \
void *c,
\
float beta,
\
const void *a,
\
const void *b,
\
float alpha,
\
void *stream) const;
\
};
\
}
#endif // __GEMM_H__
src/infiniop/ops/gemm/info.h
0 → 100644
View file @
e5bda616
#ifndef __GEMM_INFO_H__
#define __GEMM_INFO_H__
#include "../../../utils.h"
#include "../../operator.h"
#include "../../tensor.h"
#include <algorithm>
namespace
op
::
gemm
{
class
BlasMatrix
{
BlasMatrix
()
=
default
;
public:
size_t
ndim
;
size_t
batch
;
ptrdiff_t
stride
;
size_t
rows
;
size_t
cols
;
ptrdiff_t
row_stride
;
ptrdiff_t
col_stride
;
static
utils
::
Result
<
BlasMatrix
>
create
(
infiniopTensorDescriptor_t
layout
)
{
BlasMatrix
ans
;
if
(
layout
->
ndim
()
==
2
)
{
ans
.
ndim
=
2
;
ans
.
batch
=
1
;
ans
.
stride
=
0
;
ans
.
rows
=
layout
->
dim
(
0
);
ans
.
cols
=
layout
->
dim
(
1
);
ans
.
row_stride
=
layout
->
stride
(
0
);
ans
.
col_stride
=
layout
->
stride
(
1
);
}
else
if
(
layout
->
ndim
()
==
3
)
{
ans
.
ndim
=
3
;
ans
.
batch
=
layout
->
dim
(
0
);
ans
.
stride
=
ans
.
batch
==
1
?
0
:
layout
->
stride
(
0
);
ans
.
rows
=
layout
->
dim
(
1
);
ans
.
cols
=
layout
->
dim
(
2
);
ans
.
row_stride
=
layout
->
stride
(
1
);
ans
.
col_stride
=
layout
->
stride
(
2
);
}
else
{
return
INFINI_STATUS_BAD_TENSOR_SHAPE
;
}
if
(
ans
.
row_stride
!=
1
&&
ans
.
col_stride
!=
1
)
{
return
INFINI_STATUS_BAD_TENSOR_STRIDES
;
}
return
utils
::
Result
<
BlasMatrix
>
(
ans
);
}
bool
match_batch
(
size_t
_batch
)
const
{
return
batch
==
_batch
||
batch
==
1
;
}
void
transpose
()
{
std
::
swap
(
rows
,
cols
);
std
::
swap
(
row_stride
,
col_stride
);
}
ptrdiff_t
ld
()
const
{
return
row_stride
==
1
?
col_stride
:
row_stride
;
}
};
enum
class
MatrixLayout
:
char
{
COL_MAJOR
,
ROW_MAJOR
,
};
class
MatmulInfo
{
MatmulInfo
()
=
default
;
public:
BlasMatrix
a_matrix
;
BlasMatrix
b_matrix
;
BlasMatrix
c_matrix
;
size_t
m
,
n
,
k
,
batch
;
bool
is_transed
;
static
utils
::
Result
<
MatmulInfo
>
create
(
infiniopTensorDescriptor_t
c_desc
,
infiniopTensorDescriptor_t
a_desc
,
infiniopTensorDescriptor_t
b_desc
,
MatrixLayout
layout
)
{
auto
a_matrix
=
BlasMatrix
::
create
(
a_desc
);
CHECK_RESULT
(
a_matrix
);
auto
b_matrix
=
BlasMatrix
::
create
(
b_desc
);
CHECK_RESULT
(
b_matrix
);
auto
c_matrix
=
BlasMatrix
::
create
(
c_desc
);
CHECK_RESULT
(
c_matrix
);
if
(
c_matrix
->
rows
!=
a_matrix
->
rows
||
c_matrix
->
cols
!=
b_matrix
->
cols
||
a_matrix
->
cols
!=
b_matrix
->
rows
)
{
return
INFINI_STATUS_BAD_TENSOR_SHAPE
;
}
auto
batch
=
c_matrix
->
batch
;
if
(
!
a_matrix
->
match_batch
(
batch
)
||
!
b_matrix
->
match_batch
(
batch
))
{
return
INFINI_STATUS_BAD_TENSOR_SHAPE
;
}
auto
is_transed
=
false
;
if
((
layout
==
MatrixLayout
::
COL_MAJOR
&&
c_matrix
->
col_stride
==
1
)
||
(
layout
==
MatrixLayout
::
ROW_MAJOR
&&
c_matrix
->
row_stride
==
1
))
{
c_matrix
->
transpose
();
b_matrix
->
transpose
();
a_matrix
->
transpose
();
std
::
swap
(
a_matrix
,
b_matrix
);
is_transed
=
true
;
}
auto
m
=
c_matrix
->
rows
;
auto
n
=
c_matrix
->
cols
;
auto
k
=
a_matrix
->
cols
;
return
utils
::
Result
<
MatmulInfo
>
(
MatmulInfo
{
a_matrix
.
take
(),
b_matrix
.
take
(),
c_matrix
.
take
(),
m
,
n
,
k
,
batch
,
is_transed
});
}
};
}
// namespace op::gemm
#endif // __GEMM_INFO_H__
src/infiniop/ops/gemm/kunlun/gemm_kunlun.cc
View file @
e5bda616
...
...
@@ -27,14 +27,11 @@ infiniStatus_t Descriptor::create(
return
INFINI_STATUS_BAD_TENSOR_DTYPE
;
}
infiniStatus_t
status
;
auto
info
=
MatmulInfo
(
c_desc
,
a_desc
,
b_desc
,
&
status
,
MatrixLayout
::
ROW_MAJOR
);
if
(
status
!=
INFINI_STATUS_SUCCESS
)
{
return
status
;
}
auto
result
=
MatmulInfo
::
create
(
c_desc
,
a_desc
,
b_desc
,
MatrixLayout
::
ROW_MAJOR
);
CHECK_RESULT
(
result
);
*
desc_ptr
=
new
Descriptor
(
dtype
,
info
,
0
,
dtype
,
result
.
take
()
,
0
,
new
Opaque
{
handle
->
internal
()},
handle
->
device
,
handle
->
device_id
);
return
INFINI_STATUS_SUCCESS
;
...
...
src/infiniop/ops/gemm/maca/gemm_maca.cc
View file @
e5bda616
...
...
@@ -25,14 +25,11 @@ infiniStatus_t Descriptor::create(
return
INFINI_STATUS_BAD_TENSOR_DTYPE
;
}
infiniStatus_t
status
;
auto
info
=
MatmulInfo
(
c_desc
,
a_desc
,
b_desc
,
&
status
,
MatrixLayout
::
COL_MAJOR
);
if
(
status
!=
INFINI_STATUS_SUCCESS
)
{
return
status
;
}
auto
result
=
MatmulInfo
::
create
(
c_desc
,
a_desc
,
b_desc
,
MatrixLayout
::
COL_MAJOR
);
CHECK_RESULT
(
result
);
*
desc_ptr
=
new
Descriptor
(
dtype
,
info
,
0
,
dtype
,
result
.
take
()
,
0
,
new
Opaque
{
handle
->
internal
()},
handle
->
device
,
handle
->
device_id
);
return
INFINI_STATUS_SUCCESS
;
...
...
src/infiniop/ops/gemm/operator.cc
View file @
e5bda616
...
...
@@ -70,9 +70,9 @@ infiniopGetGemmWorkspaceSize(
infiniopGemmDescriptor_t
desc
,
size_t
*
size
)
{
#define GET(CASE, NAMESPACE) \
case CASE: \
*size = reinterpret_cast<const op::gemm::NAMESPACE::Descriptor *>(desc)->workspace
_s
ize; \
#define GET(CASE, NAMESPACE)
\
case CASE:
\
*size = reinterpret_cast<const op::gemm::NAMESPACE::Descriptor *>(desc)->workspace
S
ize
()
; \
return INFINI_STATUS_SUCCESS
switch
(
desc
->
device_type
)
{
...
...
src/infiniop/ops/rearrange/cpu/rearrange_cpu.cc
View file @
e5bda616
...
...
@@ -27,14 +27,12 @@ infiniStatus_t Descriptor::create(
auto
dst_strides
=
y_desc
->
strides
().
data
();
auto
src_strides
=
x_desc
->
strides
().
data
();
auto
element_size
=
infiniSizeOf
(
dtype
);
auto
meta
=
utils
::
RearrangeMeta
::
create
(
shape
,
dst_strides
,
src_strides
,
ndim
,
element_size
);
if
(
!
meta
)
{
return
INFINI_STATUS_BAD_TENSOR_STRIDES
;
}
auto
result
=
utils
::
RearrangeMeta
::
create
(
shape
,
dst_strides
,
src_strides
,
ndim
,
element_size
);
CHECK_RESULT
(
result
);
*
desc_ptr
=
new
Descriptor
(
std
::
move
(
*
meta
),
result
.
take
(
),
nullptr
,
handle
->
device
,
handle
->
device_id
);
...
...
src/infiniop/ops/rms_norm/cpu/rms_norm_cpu.cc
View file @
e5bda616
...
...
@@ -3,6 +3,7 @@
#include "../../../reduce/cpu/reduce.h"
namespace
op
::
rms_norm
::
cpu
{
Descriptor
::~
Descriptor
()
{}
infiniStatus_t
Descriptor
::
create
(
...
...
@@ -12,9 +13,9 @@ infiniStatus_t Descriptor::create(
infiniopTensorDescriptor_t
x_desc
,
infiniopTensorDescriptor_t
w_desc
,
float
epsilon
)
{
RMSNormInfo
info
;
CHECK_
STATUS
(
createRMSNormInfo
(
&
info
,
y_desc
,
x_desc
,
w_desc
,
epsilon
)
);
*
desc_ptr
=
new
Descriptor
(
nullptr
,
info
,
0
,
handle
->
device
,
handle
->
device_id
);
auto
result
=
RMSNormInfo
::
create
(
y_desc
,
x_desc
,
w_desc
,
epsilon
)
;
CHECK_
RESULT
(
result
);
*
desc_ptr
=
new
Descriptor
(
nullptr
,
result
.
take
()
,
0
,
handle
->
device
,
handle
->
device_id
);
return
INFINI_STATUS_SUCCESS
;
}
...
...
@@ -68,9 +69,10 @@ infiniStatus_t rmsnormF16(const RMSNormInfo *info, fp16_t *y, const fp16_t *x, c
return
INFINI_STATUS_SUCCESS
;
}
infiniStatus_t
Descriptor
::
calculate
(
void
*
workspace
,
size_t
workspace_size
,
void
*
y
,
const
void
*
x
,
const
void
*
w
,
void
*
stream
)
{
infiniStatus_t
Descriptor
::
calculate
(
void
*
workspace
,
size_t
workspace_size
,
void
*
y
,
const
void
*
x
,
const
void
*
w
,
void
*
stream
)
const
{
if
(
_info
.
atype
==
INFINI_DTYPE_F16
)
{
if
(
_info
.
wtype
==
INFINI_DTYPE_F16
)
{
CHECK_STATUS
(
rmsnormF16
(
&
_info
,
(
fp16_t
*
)
y
,
(
const
fp16_t
*
)
x
,
(
const
fp16_t
*
)
w
));
...
...
src/infiniop/ops/rms_norm/info.h
0 → 100644
View file @
e5bda616
#
ifndef
__RMS_NORM_INFO_H__
#define __RMS_NORM_INFO_H__
#include "../../../utils.h"
#include "../../tensor.h"
#include <vector>
namespace
op
::
rms_norm
{
class
RMSNormInfo
{
RMSNormInfo
()
=
default
;
public:
infiniDtype_t
wtype
;
infiniDtype_t
atype
;
float
epsilon
;
std
::
vector
<
size_t
>
shape
;
std
::
vector
<
ptrdiff_t
>
y_strides
;
std
::
vector
<
ptrdiff_t
>
x_strides
;
size_t
ndim
()
const
{
return
shape
.
size
();
}
size_t
dim
()
const
{
return
shape
[
ndim
()
-
1
];
}
static
utils
::
Result
<
RMSNormInfo
>
create
(
infiniopTensorDescriptor_t
y_desc
,
infiniopTensorDescriptor_t
x_desc
,
infiniopTensorDescriptor_t
w_desc
,
float
epsilon
)
{
auto
atype
=
y_desc
->
dtype
();
auto
wtype
=
w_desc
->
dtype
();
if
(
x_desc
->
dtype
()
!=
atype
)
{
return
INFINI_STATUS_BAD_TENSOR_DTYPE
;
}
if
(
atype
==
INFINI_DTYPE_F16
)
{
if
(
wtype
!=
INFINI_DTYPE_F16
&&
wtype
!=
INFINI_DTYPE_F32
)
{
return
INFINI_STATUS_BAD_TENSOR_DTYPE
;
}
}
else
if
(
atype
==
INFINI_DTYPE_F32
||
atype
==
INFINI_DTYPE_F64
)
{
if
(
atype
!=
wtype
)
{
return
INFINI_STATUS_BAD_TENSOR_DTYPE
;
}
}
else
{
return
INFINI_STATUS_BAD_TENSOR_DTYPE
;
}
if
(
y_desc
->
ndim
()
!=
2
||
x_desc
->
ndim
()
!=
2
||
w_desc
->
ndim
()
!=
1
)
{
return
INFINI_STATUS_BAD_TENSOR_SHAPE
;
}
size_t
batch
=
y_desc
->
shape
()[
0
];
size_t
dim
=
y_desc
->
shape
()[
1
];
if
(
x_desc
->
shape
()[
0
]
!=
batch
||
x_desc
->
shape
()[
1
]
!=
dim
||
w_desc
->
shape
()[
0
]
!=
dim
)
{
return
INFINI_STATUS_BAD_TENSOR_SHAPE
;
}
if
(
w_desc
->
stride
(
0
)
!=
1
)
{
return
INFINI_STATUS_BAD_TENSOR_STRIDES
;
}
if
(
x_desc
->
stride
(
1
)
!=
1
||
y_desc
->
stride
(
1
)
!=
1
)
{
return
INFINI_STATUS_BAD_TENSOR_STRIDES
;
}
return
utils
::
Result
<
RMSNormInfo
>
(
RMSNormInfo
{
wtype
,
atype
,
epsilon
,
y_desc
->
shape
(),
y_desc
->
strides
(),
x_desc
->
strides
(),
});
}
};
}
// namespace op::rms_norm
#endif // __RMS_NORM_INFO_H__
src/infiniop/ops/rms_norm/rms_norm.h
View file @
e5bda616
#ifndef RMS_NORM_H
#define RMS_NORM_H
#include "../../operator.h"
#include "../../tensor.h"
#include <vector>
struct
RMSNormInfo
{
infiniDtype_t
wtype
;
infiniDtype_t
atype
;
float
epsilon
;
std
::
vector
<
size_t
>
shape
;
std
::
vector
<
ptrdiff_t
>
y_strides
;
std
::
vector
<
ptrdiff_t
>
x_strides
;
size_t
ndim
()
{
return
shape
.
size
();
}
size_t
dim
()
{
return
shape
[
ndim
()
-
1
];
}
};
inline
infiniStatus_t
createRMSNormInfo
(
RMSNormInfo
*
info
,
infiniopTensorDescriptor_t
y_desc
,
infiniopTensorDescriptor_t
x_desc
,
infiniopTensorDescriptor_t
w_desc
,
float
epsilon
)
{
auto
atype
=
y_desc
->
dtype
();
auto
wtype
=
w_desc
->
dtype
();
if
(
x_desc
->
dtype
()
!=
atype
)
{
return
INFINI_STATUS_BAD_TENSOR_DTYPE
;
}
if
(
atype
==
INFINI_DTYPE_F16
)
{
if
(
wtype
!=
INFINI_DTYPE_F16
&&
wtype
!=
INFINI_DTYPE_F32
)
{
return
INFINI_STATUS_BAD_TENSOR_DTYPE
;
}
}
else
if
(
atype
==
INFINI_DTYPE_F32
||
atype
==
INFINI_DTYPE_F64
)
{
if
(
atype
!=
wtype
)
{
return
INFINI_STATUS_BAD_TENSOR_DTYPE
;
}
}
else
{
return
INFINI_STATUS_BAD_TENSOR_DTYPE
;
}
info
->
wtype
=
wtype
;
info
->
atype
=
atype
;
info
->
epsilon
=
epsilon
;
if
(
y_desc
->
ndim
()
!=
2
||
x_desc
->
ndim
()
!=
2
||
w_desc
->
ndim
()
!=
1
)
{
return
INFINI_STATUS_BAD_TENSOR_SHAPE
;
}
size_t
batch
=
y_desc
->
shape
()[
0
];
size_t
dim
=
y_desc
->
shape
()[
1
];
if
(
x_desc
->
shape
()[
0
]
!=
batch
||
x_desc
->
shape
()[
1
]
!=
dim
||
w_desc
->
shape
()[
0
]
!=
dim
)
{
return
INFINI_STATUS_BAD_TENSOR_SHAPE
;
}
if
(
w_desc
->
stride
(
0
)
!=
1
)
{
return
INFINI_STATUS_BAD_TENSOR_STRIDES
;
}
if
(
x_desc
->
stride
(
1
)
!=
1
||
y_desc
->
stride
(
1
)
!=
1
)
{
return
INFINI_STATUS_BAD_TENSOR_STRIDES
;
}
info
->
shape
=
std
::
move
(
y_desc
->
shape
());
info
->
y_strides
=
std
::
move
(
y_desc
->
strides
());
info
->
x_strides
=
std
::
move
(
x_desc
->
strides
());
return
INFINI_STATUS_SUCCESS
;
}
#define DESCRIPTOR(NAMESPACE) \
namespace op::rms_norm::NAMESPACE { \
class Descriptor final : public InfiniopDescriptor { \
struct Opaque; \
Opaque *_opaque; \
RMSNormInfo _info; \
size_t _workspace_size; \
\
Descriptor( \
Opaque *opaque, \
RMSNormInfo info, \
size_t workspace_size, \
infiniDevice_t device_type, \
int device_id) : InfiniopDescriptor{device_type, device_id}, \
_opaque(opaque), \
_info(info), \
_workspace_size(workspace_size) {} \
\
public: \
~Descriptor(); \
size_t workspaceSize() const { return _workspace_size; } \
static infiniStatus_t create( \
infiniopHandle_t handle, \
Descriptor **desc_ptr, \
infiniopTensorDescriptor_t y_desc, \
infiniopTensorDescriptor_t x_desc, \
infiniopTensorDescriptor_t w_desc, \
float epsilon); \
infiniStatus_t calculate(void *workspace, size_t workspace_size, \
void *y, const void *x, const void *w, void *stream); \
}; \
#include "../../operator.h"
#include "info.h"
#define DESCRIPTOR(NAMESPACE) \
\
namespace op::rms_norm::NAMESPACE { \
class Descriptor final : public InfiniopDescriptor { \
struct Opaque; \
Opaque *_opaque; \
RMSNormInfo _info; \
size_t _workspace_size; \
\
Descriptor( \
Opaque *opaque, \
RMSNormInfo info, \
size_t workspace_size, \
infiniDevice_t device_type, \
int device_id) \
: InfiniopDescriptor{device_type, device_id}, \
_opaque(opaque), \
_info(info), \
_workspace_size(workspace_size) {} \
\
public: \
~Descriptor(); \
\
size_t workspaceSize() const { return _workspace_size; } \
\
static infiniStatus_t create( \
infiniopHandle_t handle, \
Descriptor **desc_ptr, \
infiniopTensorDescriptor_t y_desc, \
infiniopTensorDescriptor_t x_desc, \
infiniopTensorDescriptor_t w_desc, \
float epsilon); \
\
infiniStatus_t calculate( \
void *workspace, size_t workspace_size, \
void *y, \
const void *x, \
const void *w, \
void *stream) const; \
}; \
}
#endif // RMS_NORM_H
src/utils.h
View file @
e5bda616
#ifndef INFINIUTILS_H
#define INFINIUTILS_H
#include "infinicore.h"
#include "utils/check.h"
#include "utils/custom_types.h"
#include "utils/rearrange.h"
...
...
src/utils/rearrange.cc
View file @
e5bda616
...
...
@@ -13,7 +13,7 @@ namespace utils {
RearrangeMeta
::
RearrangeMeta
(
std
::
vector
<
ptrdiff_t
>
meta
)
:
_meta
(
std
::
move
(
meta
))
{}
std
::
optional
<
RearrangeMeta
>
RearrangeMeta
::
create
(
Result
<
RearrangeMeta
>
RearrangeMeta
::
create
(
const
size_t
*
shape
,
const
ptrdiff_t
*
dst_strides_
,
const
ptrdiff_t
*
src_strides_
,
...
...
@@ -32,7 +32,9 @@ std::optional<RearrangeMeta> RearrangeMeta::create(
// 剔除初始的 1 长维度
if
(
shape
[
i
]
!=
1
)
{
auto
sd
=
dst_strides_
[
i
]
*
unit
,
ss
=
src_strides_
[
i
]
*
unit
;
// assert (sd != 0)
if
(
sd
==
0
)
{
return
INFINI_STATUS_BAD_TENSOR_STRIDES
;
}
dims
.
push_back
(
Dim
{
shape
[
i
],
sd
,
ss
});
}
}
...
...
@@ -81,7 +83,7 @@ std::optional<RearrangeMeta> RearrangeMeta::create(
for
(
ptrdiff_t
i
=
ndim
;
i
>
0
;
--
i
)
{
meta
[
1
+
i
-
1
]
*=
meta
[
1
+
i
];
}
return
{
RearrangeMeta
(
std
::
move
(
meta
)
)}
;
return
Result
<
RearrangeMeta
>
(
meta
);
}
size_t
RearrangeMeta
::
ndim
()
const
{
return
(
_meta
.
size
()
-
2
)
/
3
;
}
...
...
src/utils/rearrange.h
View file @
e5bda616
#ifndef __INFINIUTILS_REARRANGE_H__
#define __INFINIUTILS_REARRANGE_H__
#include
<optional>
#include <stddef
.h
>
#include
"result.hpp"
#include <
c
stddef>
#include <vector>
namespace
utils
{
...
...
@@ -12,7 +12,7 @@ class RearrangeMeta {
RearrangeMeta
(
std
::
vector
<
ptrdiff_t
>
);
public:
static
std
::
optional
<
RearrangeMeta
>
create
(
static
Result
<
RearrangeMeta
>
create
(
const
size_t
*
shape
,
const
ptrdiff_t
*
dst_strides
,
const
ptrdiff_t
*
src_strides
,
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment