Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
infinicore
Commits
46a15a1a
Commit
46a15a1a
authored
Aug 06, 2025
by
zhangyue
Browse files
支持 elementwise float16 bfloat16 数据类型
parent
3a7633ba
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
21 additions
and
1 deletion
+21
-1
src/infiniop/devices/kunlun/kunlun_kernel_common.h
src/infiniop/devices/kunlun/kunlun_kernel_common.h
+1
-0
src/infiniop/ops/swiglu/kunlun/swiglu_kunlun.xpu
src/infiniop/ops/swiglu/kunlun/swiglu_kunlun.xpu
+20
-1
No files found.
src/infiniop/devices/kunlun/kunlun_kernel_common.h
View file @
46a15a1a
...
@@ -4,6 +4,7 @@
...
@@ -4,6 +4,7 @@
// This header file will only be include by .xpu file
// This header file will only be include by .xpu file
#include "xpu/runtime.h"
#include "xpu/runtime.h"
#include <xpu/kernel/xtdk.h>
#include <xpu/kernel/xtdk.h>
#include <xpu/kernel/xtdk_bf16.h>
#include <xpu/kernel/xtdk_math.h>
#include <xpu/kernel/xtdk_math.h>
#include <xpu/kernel/xtdk_simd.h>
#include <xpu/kernel/xtdk_simd.h>
...
...
src/infiniop/ops/swiglu/kunlun/swiglu_kunlun.xpu
View file @
46a15a1a
...
@@ -10,6 +10,10 @@ private:
...
@@ -10,6 +10,10 @@ private:
inline __device__ T sigmoid(T x) const {
inline __device__ T sigmoid(T x) const {
return 1.0f / (1.0f + exp(-x));
return 1.0f / (1.0f + exp(-x));
}
}
// float version of sigmoid
inline __device__ float sigmoidf(float x) const {
return 1.0f / (1.0f + exp(-x));
}
public:
public:
// This static number must be set in other Ops
// This static number must be set in other Ops
...
@@ -21,9 +25,20 @@ public:
...
@@ -21,9 +25,20 @@ public:
T out = gate * sigmoid(gate) * up;
T out = gate * sigmoid(gate) * up;
return out;
return out;
}
}
// bfloat16 特化版本(使用 float 计算精度)
inline __device__ bfloat16_t operator()(const bfloat16_t *inputs) const {
float up_f = __bfloat162float(inputs[0]);
float gate_f = __bfloat162float(inputs[1]);
float out_f = gate_f * sigmoidf(gate_f) * up_f;
return __float2bfloat16(out_f);
}
} SwiGLUOp;
} SwiGLUOp;
// __global__ template function instantiation
INSTANTIATE_ELEMENTWISE_KERNEL(SwiGLUOp::num_inputs, SwiGLUOp, float);
INSTANTIATE_ELEMENTWISE_KERNEL(SwiGLUOp::num_inputs, SwiGLUOp, float);
INSTANTIATE_ELEMENTWISE_KERNEL(SwiGLUOp::num_inputs, SwiGLUOp, half);
INSTANTIATE_ELEMENTWISE_KERNEL(SwiGLUOp::num_inputs, SwiGLUOp, bfloat16_t);
} // namespace op::elementwise::kunlun
} // namespace op::elementwise::kunlun
namespace op::swiglu::kunlun {
namespace op::swiglu::kunlun {
...
@@ -45,7 +60,7 @@ infiniStatus_t Descriptor::create(
...
@@ -45,7 +60,7 @@ infiniStatus_t Descriptor::create(
const auto &up_shape = up_desc->shape();
const auto &up_shape = up_desc->shape();
const auto &gate_shape = gate_desc->shape();
const auto &gate_shape = gate_desc->shape();
CHECK_DTYPE(dtype, INFINI_DTYPE_F32);
CHECK_DTYPE(dtype, INFINI_DTYPE_F32
, INFINI_DTYPE_F16, INFINI_DTYPE_BF16
);
CHECK_SAME_SHAPE(out_shape, up_shape, gate_shape);
CHECK_SAME_SHAPE(out_shape, up_shape, gate_shape);
// create KUNLUN elementwise descriptor
// create KUNLUN elementwise descriptor
...
@@ -68,6 +83,10 @@ infiniStatus_t Descriptor::calculate(
...
@@ -68,6 +83,10 @@ infiniStatus_t Descriptor::calculate(
switch (_dtype) {
switch (_dtype) {
case INFINI_DTYPE_F32:
case INFINI_DTYPE_F32:
return _device_info->calculate<8, op::elementwise::kunlun::SwiGLUOp, float>(_info, workspace, output, inputs, stream);
return _device_info->calculate<8, op::elementwise::kunlun::SwiGLUOp, float>(_info, workspace, output, inputs, stream);
case INFINI_DTYPE_F16:
return _device_info->calculate<8, op::elementwise::kunlun::SwiGLUOp, half>(_info, workspace, output, inputs, stream);
case INFINI_DTYPE_BF16:
return _device_info->calculate<8, op::elementwise::kunlun::SwiGLUOp, bfloat16_t>(_info, workspace, output, inputs, stream);
default:
default:
return INFINI_STATUS_BAD_TENSOR_DTYPE;
return INFINI_STATUS_BAD_TENSOR_DTYPE;
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment