Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
infinicore
Commits
23781e21
"...git@developer.sourcefind.cn:yaoyuping/nndetection.git" did not exist on "98eb97f15d9f9d107fac11d27c153e18932ab635"
Commit
23781e21
authored
Oct 22, 2025
by
pengcheng888
Browse files
issue/521 - support topksoftmax on metax GPU.
parent
f5e6d729
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
120 additions
and
7 deletions
+120
-7
src/infiniop/ops/topksoftmax/cuda/kernel.cuh
src/infiniop/ops/topksoftmax/cuda/kernel.cuh
+1
-4
src/infiniop/ops/topksoftmax/metax/topksoftmax_metax.cuh
src/infiniop/ops/topksoftmax/metax/topksoftmax_metax.cuh
+8
-0
src/infiniop/ops/topksoftmax/metax/topksoftmax_metax.maca
src/infiniop/ops/topksoftmax/metax/topksoftmax_metax.maca
+93
-0
src/infiniop/ops/topksoftmax/operator.cc
src/infiniop/ops/topksoftmax/operator.cc
+15
-0
test/infiniop-test/test_generate/testcases/topksoftmax.py
test/infiniop-test/test_generate/testcases/topksoftmax.py
+1
-1
test/infiniop/topksoftmax.py
test/infiniop/topksoftmax.py
+2
-2
No files found.
src/infiniop/ops/topksoftmax/cuda/kernel.cuh
View file @
23781e21
...
...
@@ -5,16 +5,13 @@
#include <cub/block/block_radix_sort.cuh>
#include <cub/block/block_reduce.cuh>
#include <cub/block/block_store.cuh>
#include <cuda_bf16.h>
#include <cuda_fp16.h>
#include <cuda_runtime.h>
template
<
typename
T
>
inline
__device__
float
exp_func
(
T
x
)
{
float
data
;
if
constexpr
(
std
::
is_same_v
<
T
,
float
>
)
{
data
=
x
;
}
else
if
constexpr
(
std
::
is_same_v
<
T
,
__nv
_bfloat16
>
)
{
}
else
if
constexpr
(
std
::
is_same_v
<
T
,
cuda
_bfloat16
>
)
{
data
=
__bfloat162float
(
x
);
}
else
if
constexpr
(
std
::
is_same_v
<
T
,
half
>
)
{
data
=
__half2float
(
x
);
...
...
src/infiniop/ops/topksoftmax/metax/topksoftmax_metax.cuh
0 → 100644
View file @
23781e21
#ifndef __TOPKSOFTMAX_METAX_CUH__
#define __TOPKSOFTMAX_METAX_CUH__
#include "../topksoftmax.h"
DESCRIPTOR
(
metax
)
#endif
src/infiniop/ops/topksoftmax/metax/topksoftmax_metax.maca
0 → 100644
View file @
23781e21
#include "../../../devices/metax/metax_common.h"
#include "topksoftmax_metax.cuh"
#include "../../../devices/metax/metax_kernel_common.h"
#include <cub/block/block_reduce.cuh>
#include "../../../reduce/cuda/reduce.cuh"
#include "../cuda/kernel.cuh"
namespace op::topksoftmax::metax {
struct Descriptor::Opaque {
std::shared_ptr<device::metax::Handle::Internal> internal;
};
Descriptor::~Descriptor() {
delete _opaque;
}
infiniStatus_t Descriptor::create(
infiniopHandle_t handle,
Descriptor **desc_ptr,
infiniopTensorDescriptor_t x_desc) {
auto result = TopksoftmaxInfo::create(x_desc);
CHECK_RESULT(result);
auto info = result.take();
if (info.x_strides[1] != 1) {
return INFINI_STATUS_BAD_TENSOR_STRIDES;
}
*desc_ptr = new Descriptor(
new Opaque{reinterpret_cast<device::metax::Handle *>(handle)->internal()},
std::move(info),
0,
handle->device, handle->device_id);
return INFINI_STATUS_SUCCESS;
}
namespace {
template <int BLOCK_SIZE = 128>
infiniStatus_t launch_topksoftmax(float *d_values_out, int *d_indices_out, const void *d_input, const size_t N, const size_t width, const size_t topk, const bool norm, infiniDtype_t xtype, hcStream_t stream) {
const int block_threads = BLOCK_SIZE;
dim3 blocks(static_cast<unsigned int>(N));
dim3 threads(block_threads);
if (xtype == INFINI_DTYPE_F32) {
softmax_topk_row_kernel<float, BLOCK_SIZE><<<blocks, threads, 0, stream>>>(d_values_out, d_indices_out, (float *)d_input, N, width, topk, norm);
} else if (xtype == INFINI_DTYPE_F16) {
softmax_topk_row_kernel<half, BLOCK_SIZE><<<blocks, threads, 0, stream>>>(d_values_out, d_indices_out, (half *)d_input, N, width, topk, norm);
} else if (xtype == INFINI_DTYPE_BF16) {
softmax_topk_row_kernel<__hpcc_bfloat16, BLOCK_SIZE><<<blocks, threads, 0, stream>>>(d_values_out, d_indices_out, (__hpcc_bfloat16 *)d_input, N, width, topk, norm);
} else {
return INFINI_STATUS_BAD_TENSOR_DTYPE;
}
return INFINI_STATUS_SUCCESS;
}
}; // namespace
infiniStatus_t Descriptor::calculate(
void *workspace,
size_t workspace_size,
float *values,
int *indices,
const void *x,
const size_t topk,
const bool norm,
void *stream_) const {
if (workspace_size < _workspace_size) {
return INFINI_STATUS_INSUFFICIENT_WORKSPACE;
}
size_t N = _info.N;
size_t width = _info.width;
auto stream = reinterpret_cast<hcStream_t>(stream_);
if (width <= 128) {
launch_topksoftmax<128>(values, indices, x, N, width, topk, norm, _info.xtype, stream);
} else if (width <= 256) {
launch_topksoftmax<256>(values, indices, x, N, width, topk, norm, _info.xtype, stream);
} else if (width <= 512) {
launch_topksoftmax<512>(values, indices, x, N, width, topk, norm, _info.xtype, stream);
} else {
return INFINI_STATUS_INTERNAL_ERROR;
}
return INFINI_STATUS_SUCCESS;
}
} // namespace op::topksoftmax::metax
src/infiniop/ops/topksoftmax/operator.cc
View file @
23781e21
...
...
@@ -8,6 +8,9 @@
#if defined(ENABLE_NVIDIA_API)
#include "nvidia/topksoftmax_nvidia.cuh"
#endif
#ifdef ENABLE_METAX_API
#include "metax/topksoftmax_metax.cuh"
#endif
__C
infiniStatus_t
infiniopCreateTopksoftmaxDescriptor
(
infiniopHandle_t
handle
,
infiniopTopksoftmaxDescriptor_t
*
desc_ptr
,
...
...
@@ -24,6 +27,9 @@ __C infiniStatus_t infiniopCreateTopksoftmaxDescriptor(infiniopHandle_t handle,
#endif
#ifdef ENABLE_NVIDIA_API
CREATE
(
INFINI_DEVICE_NVIDIA
,
nvidia
);
#endif
#ifdef ENABLE_METAX_API
CREATE
(
INFINI_DEVICE_METAX
,
metax
);
#endif
}
...
...
@@ -45,6 +51,9 @@ __C infiniStatus_t infiniopGetTopksoftmaxWorkspaceSize(infiniopTopksoftmaxDescri
#endif
#ifdef ENABLE_NVIDIA_API
GET
(
INFINI_DEVICE_NVIDIA
,
nvidia
);
#endif
#ifdef ENABLE_METAX_API
GET
(
INFINI_DEVICE_METAX
,
metax
);
#endif
}
...
...
@@ -71,6 +80,9 @@ __C infiniStatus_t infiniopTopksoftmax(infiniopTopksoftmaxDescriptor_t desc, voi
#endif
#ifdef ENABLE_NVIDIA_API
CALCULATE
(
INFINI_DEVICE_NVIDIA
,
nvidia
);
#endif
#ifdef ENABLE_METAX_API
CALCULATE
(
INFINI_DEVICE_METAX
,
metax
);
#endif
}
...
...
@@ -92,6 +104,9 @@ __C infiniStatus_t infiniopDestroyTopksoftmaxDescriptor(infiniopTopksoftmaxDescr
#endif
#ifdef ENABLE_NVIDIA_API
DESTROY
(
INFINI_DEVICE_NVIDIA
,
nvidia
);
#endif
#ifdef ENABLE_METAX_API
DESTROY
(
INFINI_DEVICE_METAX
,
metax
);
#endif
}
...
...
test/infiniop-test/test_generate/testcases/topksoftmax.py
View file @
23781e21
...
...
@@ -115,7 +115,7 @@ if __name__ == "__main__":
# x_shape, x_strides, topk, norm
((
1
,
32
),
None
,
4
,
True
),
((
8
,
20
),
None
,
8
,
False
),
((
2
,
128
),
None
,
10
,
True
)
((
2
,
64
),
None
,
6
,
True
)
]
_TENSOR_DTYPES_
=
[
np
.
float32
,
np
.
float16
]
for
dtype
in
_TENSOR_DTYPES_
:
...
...
test/infiniop/topksoftmax.py
View file @
23781e21
...
...
@@ -28,8 +28,8 @@ from libinfiniop import (
_TEST_CASES_
=
[
# x_shape, x_stride, topk, norm
((
1
,
10
),
None
,
7
,
True
),
((
2
,
20
),
None
,
4
,
True
),
((
1
,
128
),
None
,
10
,
True
),
((
8
,
20
),
None
,
4
,
True
),
((
2
,
64
),
None
,
6
,
True
),
]
# w (weight) types
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment