Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
infinicore
Commits
35388a54
"tests/data/gpu_hpl_sample.out" did not exist on "88cd18dfaf707551540b22d2d3c8b731fa2d10b7"
Commit
35388a54
authored
Aug 08, 2025
by
zhangyue
Browse files
Merge branch 'main' of
https://github.com/InfiniTensor/InfiniCore
into p800-sub
parents
0fe0aea2
72c4dc7c
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
44 additions
and
34 deletions
+44
-34
src/infiniop/ops/swiglu/kunlun/kernel.h
src/infiniop/ops/swiglu/kunlun/kernel.h
+39
-0
src/infiniop/ops/swiglu/kunlun/swiglu_kunlun.xpu
src/infiniop/ops/swiglu/kunlun/swiglu_kunlun.xpu
+5
-34
No files found.
src/infiniop/ops/swiglu/kunlun/kernel.h
0 → 100644
View file @
35388a54
#ifndef __SWIGLU_KUNLUN_KERNEL_H__
#define __SWIGLU_KUNLUN_KERNEL_H__
namespace
op
::
swiglu
::
kunlun
{
/// @brief SwiGLU op kernel
typedef
struct
SwiGLUOp
{
private:
template
<
typename
T
>
inline
__device__
T
sigmoid
(
T
x
)
const
{
return
1.0
f
/
(
1.0
f
+
exp
(
-
x
));
}
// float version of sigmoid
inline
__device__
float
sigmoidf
(
float
x
)
const
{
return
1.0
f
/
(
1.0
f
+
exp
(
-
x
));
}
public:
// This static number must be set in other Ops
static
constexpr
int
num_inputs
=
2
;
template
<
typename
T
>
inline
__device__
T
operator
()(
const
T
*
inputs
)
const
{
T
up
=
inputs
[
0
];
T
gate
=
inputs
[
1
];
T
out
=
gate
*
sigmoid
(
gate
)
*
up
;
return
out
;
}
// bfloat16 特化版本(使用 float 计算精度)
inline
__device__
bfloat16_t
operator
()(
const
bfloat16_t
*
inputs
)
const
{
float
up_f
=
__bfloat162float
(
inputs
[
0
]);
float
gate_f
=
__bfloat162float
(
inputs
[
1
]);
float
out_f
=
gate_f
*
sigmoidf
(
gate_f
)
*
up_f
;
return
__float2bfloat16
(
out_f
);
}
}
SwiGLUOp
;
}
// namespace op::swiglu::kunlun
#endif // __SWIGLU_KUNLUN_KERNEL_H__
src/infiniop/ops/swiglu/kunlun/swiglu_kunlun.xpu
View file @
35388a54
#include "../../../elementwise/kunlun/elementwise_kunlun.h"
#include "../../../elementwise/kunlun/elementwise_kunlun.h"
#include "kernel.h"
#include "swiglu_kunlun.h"
#include "swiglu_kunlun.h"
namespace op::elementwise::kunlun {
namespace op::elementwise::kunlun {
/// @brief SwiGLU op kernel
using SwiGLUOp = op::swiglu::kunlun::SwiGLUOp;
typedef struct SwiGLUOp {
private:
template <typename T>
inline __device__ T sigmoid(T x) const {
return 1.0f / (1.0f + exp(-x));
}
// float version of sigmoid
inline __device__ float sigmoidf(float x) const {
return 1.0f / (1.0f + exp(-x));
}
public:
// This static number must be set in other Ops
static constexpr int num_inputs = 2;
template <typename T>
inline __device__ T operator()(const T *inputs) const {
T up = inputs[0];
T gate = inputs[1];
T out = gate * sigmoid(gate) * up;
return out;
}
// bfloat16 特化版本(使用 float 计算精度)
inline __device__ bfloat16_t operator()(const bfloat16_t *inputs) const {
float up_f = __bfloat162float(inputs[0]);
float gate_f = __bfloat162float(inputs[1]);
float out_f = gate_f * sigmoidf(gate_f) * up_f;
return __float2bfloat16(out_f);
}
} SwiGLUOp;
// __global__ template function instantiation
// __global__ template function instantiation
INSTANTIATE_ELEMENTWISE_KERNEL(SwiGLUOp::num_inputs, SwiGLUOp, float);
INSTANTIATE_ELEMENTWISE_KERNEL(SwiGLUOp::num_inputs, SwiGLUOp, float);
...
@@ -82,11 +53,11 @@ infiniStatus_t Descriptor::calculate(
...
@@ -82,11 +53,11 @@ infiniStatus_t Descriptor::calculate(
switch (_dtype) {
switch (_dtype) {
case INFINI_DTYPE_F32:
case INFINI_DTYPE_F32:
return _device_info->calculate<8,
op::elementwise::kunlun::
SwiGLUOp, float>(_info, workspace, output, inputs, stream);
return _device_info->calculate<8, SwiGLUOp, float>(_info, workspace, output, inputs, stream);
case INFINI_DTYPE_F16:
case INFINI_DTYPE_F16:
return _device_info->calculate<8,
op::elementwise::kunlun::
SwiGLUOp, half>(_info, workspace, output, inputs, stream);
return _device_info->calculate<8, SwiGLUOp, half>(_info, workspace, output, inputs, stream);
case INFINI_DTYPE_BF16:
case INFINI_DTYPE_BF16:
return _device_info->calculate<8,
op::elementwise::kunlun::
SwiGLUOp, bfloat16_t>(_info, workspace, output, inputs, stream);
return _device_info->calculate<8, SwiGLUOp, bfloat16_t>(_info, workspace, output, inputs, stream);
default:
default:
return INFINI_STATUS_BAD_TENSOR_DTYPE;
return INFINI_STATUS_BAD_TENSOR_DTYPE;
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment