Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
infinicore
Commits
2f2a74b6
Commit
2f2a74b6
authored
Mar 24, 2025
by
Zimin Li
Browse files
Merge remote-tracking branch 'upstream/main'
parents
1d95ddf3
70806eed
Changes
80
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
378 additions
and
230 deletions
+378
-230
src/infiniop/ops/gemm/blas.h
src/infiniop/ops/gemm/blas.h
+2
-2
src/infiniop/ops/gemm/cpu/gemm_cpu.cc
src/infiniop/ops/gemm/cpu/gemm_cpu.cc
+3
-3
src/infiniop/ops/gemm/cpu/gemm_cpu.h
src/infiniop/ops/gemm/cpu/gemm_cpu.h
+8
-0
src/infiniop/ops/gemm/cuda/gemm_cuda.cu
src/infiniop/ops/gemm/cuda/gemm_cuda.cu
+3
-3
src/infiniop/ops/gemm/cuda/gemm_cuda.cuh
src/infiniop/ops/gemm/cuda/gemm_cuda.cuh
+8
-0
src/infiniop/ops/gemm/gemm.h
src/infiniop/ops/gemm/gemm.h
+4
-4
src/infiniop/ops/gemm/kunlun/gemm_kunlun.cc
src/infiniop/ops/gemm/kunlun/gemm_kunlun.cc
+5
-5
src/infiniop/ops/gemm/kunlun/gemm_kunlun.h
src/infiniop/ops/gemm/kunlun/gemm_kunlun.h
+8
-0
src/infiniop/ops/gemm/maca/gemm_maca.cc
src/infiniop/ops/gemm/maca/gemm_maca.cc
+3
-3
src/infiniop/ops/gemm/maca/gemm_maca.h
src/infiniop/ops/gemm/maca/gemm_maca.h
+8
-0
src/infiniop/ops/gemm/operator.cc
src/infiniop/ops/gemm/operator.cc
+33
-33
src/infiniop/ops/matmul/ascend/matmul_ascend.h
src/infiniop/ops/matmul/ascend/matmul_ascend.h
+0
-8
src/infiniop/ops/matmul/bang/matmul_bang.h
src/infiniop/ops/matmul/bang/matmul_bang.h
+0
-8
src/infiniop/ops/matmul/cpu/matmul_cpu.h
src/infiniop/ops/matmul/cpu/matmul_cpu.h
+0
-8
src/infiniop/ops/matmul/cuda/matmul_cuda.cuh
src/infiniop/ops/matmul/cuda/matmul_cuda.cuh
+0
-8
src/infiniop/ops/matmul/kunlun/matmul_kunlun.h
src/infiniop/ops/matmul/kunlun/matmul_kunlun.h
+0
-8
src/infiniop/ops/matmul/maca/matmul_maca.h
src/infiniop/ops/matmul/maca/matmul_maca.h
+0
-8
src/infiniop/ops/random_sample/cpu/random_sample_cpu.cc
src/infiniop/ops/random_sample/cpu/random_sample_cpu.cc
+197
-0
src/infiniop/ops/random_sample/cpu/random_sample_cpu.h
src/infiniop/ops/random_sample/cpu/random_sample_cpu.h
+8
-0
src/infiniop/ops/random_sample/operator.cc
src/infiniop/ops/random_sample/operator.cc
+88
-129
No files found.
src/infiniop/ops/
matmul
/blas.h
→
src/infiniop/ops/
gemm
/blas.h
View file @
2f2a74b6
...
...
@@ -5,7 +5,7 @@
#include "../../tensor.h"
#include <algorithm>
namespace
op
::
matmul
{
namespace
op
::
gemm
{
struct
BlasMatrix
{
size_t
ndim
;
...
...
@@ -120,6 +120,6 @@ struct MatmulInfo {
}
};
}
// namespace op::
matmul
}
// namespace op::
gemm
#endif // __BLAS_H__
src/infiniop/ops/
matmul/cpu/matmul
_cpu.cc
→
src/infiniop/ops/
gemm/cpu/gemm
_cpu.cc
View file @
2f2a74b6
#include "
matmul
_cpu.h"
#include "
gemm
_cpu.h"
#include "../../../devices/cpu/common_cpu.h"
namespace
op
::
matmul
::
cpu
{
namespace
op
::
gemm
::
cpu
{
Descriptor
::~
Descriptor
()
=
default
;
...
...
@@ -95,4 +95,4 @@ infiniStatus_t Descriptor::calculate(
}
}
}
// namespace op::
matmul
::cpu
}
// namespace op::
gemm
::cpu
src/infiniop/ops/gemm/cpu/gemm_cpu.h
0 → 100644
View file @
2f2a74b6
#ifndef __GEMM_CPU_H__
#define __GEMM_CPU_H__
#include "../gemm.h"
DESCRIPTOR
(
cpu
)
#endif // __GEMM_CPU_H__
src/infiniop/ops/
matmul/cuda/matmul
_cuda.cu
→
src/infiniop/ops/
gemm/cuda/gemm
_cuda.cu
View file @
2f2a74b6
#include "../../../devices/cuda/cuda_handle.cuh"
#include "
matmul
_cuda.cuh"
#include "
gemm
_cuda.cuh"
namespace
op
::
matmul
::
cuda
{
namespace
op
::
gemm
::
cuda
{
struct
Descriptor
::
Opaque
{
std
::
shared_ptr
<
device
::
cuda
::
Handle
::
Internal
>
internal
;
...
...
@@ -109,4 +109,4 @@ infiniStatus_t Descriptor::calculate(
return
INFINI_STATUS_SUCCESS
;
}
}
// namespace op::
matmul
::cuda
}
// namespace op::
gemm
::cuda
src/infiniop/ops/gemm/cuda/gemm_cuda.cuh
0 → 100644
View file @
2f2a74b6
#ifndef __GEMM_CUDA_CUH__
#define __GEMM_CUDA_CUH__
#include "../gemm.h"
DESCRIPTOR
(
cuda
)
#endif // __GEMM_CUDA_CUH__
src/infiniop/ops/
matmul/matmul
.h
→
src/infiniop/ops/
gemm/gemm
.h
View file @
2f2a74b6
#ifndef __
MATMUL
_H__
#define __
MATMUL
_H__
#ifndef __
GEMM
_H__
#define __
GEMM
_H__
#include "../../operator.h"
#include "blas.h"
...
...
@@ -46,7 +46,7 @@
#define DESCRIPTOR(NAMESPACE) \
\
namespace op::
matmul
::NAMESPACE { \
namespace op::
gemm
::NAMESPACE {
\
class Descriptor final : public InfiniopDescriptor { \
struct Opaque; \
Opaque *_opaque; \
...
...
@@ -90,4 +90,4 @@
}; \
}
#endif // __
MATMUL
_H__
#endif // __
GEMM
_H__
src/infiniop/ops/
matmul
/kunlun/
matmul
_kunlun.cc
→
src/infiniop/ops/
gemm
/kunlun/
gemm
_kunlun.cc
View file @
2f2a74b6
#include "
matmul
_kunlun.h"
#include "
gemm
_kunlun.h"
#include "../../../../utils.h"
#include "../../../devices/kunlun/kunlun_handle.h"
namespace
op
::
matmul
::
kunlun
{
namespace
op
::
gemm
::
kunlun
{
typedef
device
::
kunlun
::
Handle
::
Internal
HandleInternal
;
...
...
@@ -103,12 +103,12 @@ infiniStatus_t Descriptor::calculate(
void
*
stream
)
const
{
switch
(
_dtype
)
{
case
INFINI_DTYPE_F16
:
return
op
::
matmul
::
kunlun
::
calculate
<
float16
>
(
_info
,
_opaque
->
internal
,
_dtype
,
c
,
beta
,
a
,
b
,
alpha
,
(
kunlunStream_t
)
stream
);
return
op
::
gemm
::
kunlun
::
calculate
<
float16
>
(
_info
,
_opaque
->
internal
,
_dtype
,
c
,
beta
,
a
,
b
,
alpha
,
(
kunlunStream_t
)
stream
);
case
INFINI_DTYPE_F32
:
return
op
::
matmul
::
kunlun
::
calculate
<
float
>
(
_info
,
_opaque
->
internal
,
_dtype
,
c
,
beta
,
a
,
b
,
alpha
,
(
kunlunStream_t
)
stream
);
return
op
::
gemm
::
kunlun
::
calculate
<
float
>
(
_info
,
_opaque
->
internal
,
_dtype
,
c
,
beta
,
a
,
b
,
alpha
,
(
kunlunStream_t
)
stream
);
default:
return
INFINI_STATUS_BAD_TENSOR_DTYPE
;
}
}
}
// namespace op::
matmul
::kunlun
}
// namespace op::
gemm
::kunlun
src/infiniop/ops/gemm/kunlun/gemm_kunlun.h
0 → 100644
View file @
2f2a74b6
#ifndef __GEMM_KUNLUN_H__
#define __GEMM_KUNLUN_H__
#include "../gemm.h"
DESCRIPTOR
(
kunlun
)
#endif // __GEMM_KUNLUN_H__
src/infiniop/ops/
matmul/maca/matmul
_maca.cc
→
src/infiniop/ops/
gemm/maca/gemm
_maca.cc
View file @
2f2a74b6
#include "
matmul
_maca.h"
#include "
gemm
_maca.h"
#include "../../../devices/maca/common_maca.h"
#include "../../../devices/maca/maca_handle.h"
namespace
op
::
matmul
::
maca
{
namespace
op
::
gemm
::
maca
{
struct
Descriptor
::
Opaque
{
std
::
shared_ptr
<
device
::
maca
::
Handle
::
Internal
>
internal
;
...
...
@@ -106,4 +106,4 @@ infiniStatus_t Descriptor::calculate(
return
INFINI_STATUS_SUCCESS
;
}
}
// namespace op::
matmul
::maca
}
// namespace op::
gemm
::maca
src/infiniop/ops/gemm/maca/gemm_maca.h
0 → 100644
View file @
2f2a74b6
#ifndef __GEMM_MACA_H__
#define __GEMM_MACA_H__
#include "../gemm.h"
DESCRIPTOR
(
maca
)
#endif // __GEMM_MACA_H__
src/infiniop/ops/
matmul
/operator.cc
→
src/infiniop/ops/
gemm
/operator.cc
View file @
2f2a74b6
#include "../../operator.h"
#include "../../handle.h"
#include "infiniop/ops/
matmul
.h"
#include "infiniop/ops/
gemm
.h"
#ifdef ENABLE_CPU_API
#include "cpu/
matmul
_cpu.h"
#include "cpu/
gemm
_cpu.h"
#endif
#ifdef ENABLE_CUDA_API
#include "cuda/
matmul
_cuda.cuh"
#include "cuda/
gemm
_cuda.cuh"
#endif
#ifdef ENABLE_CAMBRICON_API
#include "bang/
matmul
_bang.h"
#include "bang/
gemm
_bang.h"
#endif
#ifdef ENABLE_ASCEND_API
#include "ascend/
matmul
_ascend.h"
#include "ascend/
gemm
_ascend.h"
#endif
#ifdef ENABLE_METAX_API
#include "maca/
matmul
_maca.h"
#include "maca/
gemm
_maca.h"
#endif
#ifdef ENABLE_KUNLUN_API
#include "kunlun/
matmul
_kunlun.h"
#include "kunlun/
gemm
_kunlun.h"
#endif
__C
infiniStatus_t
infiniopCreate
Matmul
Descriptor
(
__C
infiniStatus_t
infiniopCreate
Gemm
Descriptor
(
infiniopHandle_t
handle
,
infiniop
Matmul
Descriptor_t
*
desc_ptr
,
infiniop
Gemm
Descriptor_t
*
desc_ptr
,
infiniopTensorDescriptor_t
c_desc
,
infiniopTensorDescriptor_t
a_desc
,
infiniopTensorDescriptor_t
b_desc
)
{
#define CREATE(CASE, NAMESPACE)
\
case CASE:
\
return op::
matmul
::NAMESPACE::Descriptor::create( \
handle,
\
reinterpret_cast<op::
matmul
::NAMESPACE::Descriptor **>(desc_ptr), \
c_desc,
\
a_desc,
\
#define CREATE(CASE, NAMESPACE) \
case CASE: \
return op::
gemm
::NAMESPACE::Descriptor::create( \
handle, \
reinterpret_cast<op::
gemm
::NAMESPACE::Descriptor **>(desc_ptr), \
c_desc, \
a_desc, \
b_desc)
switch
(
handle
->
device
)
{
...
...
@@ -66,13 +66,13 @@ __C infiniStatus_t infiniopCreateMatmulDescriptor(
}
__C
infiniStatus_t
infiniopGet
Matmul
WorkspaceSize
(
infiniop
Matmul
Descriptor_t
desc
,
infiniopGet
Gemm
WorkspaceSize
(
infiniop
Gemm
Descriptor_t
desc
,
size_t
*
size
)
{
#define GET(CASE, NAMESPACE)
\
case CASE:
\
*size = reinterpret_cast<const op::
matmul
::NAMESPACE::Descriptor *>(desc)->workspace_size; \
#define GET(CASE, NAMESPACE) \
case CASE: \
*size = reinterpret_cast<const op::
gemm
::NAMESPACE::Descriptor *>(desc)->workspace_size; \
return INFINI_STATUS_SUCCESS
switch
(
desc
->
device_type
)
{
...
...
@@ -103,8 +103,8 @@ infiniopGetMatmulWorkspaceSize(
#undef GET
}
__C
infiniStatus_t
infiniop
Matmul
(
infiniop
Matmul
Descriptor_t
desc
,
__C
infiniStatus_t
infiniop
Gemm
(
infiniop
Gemm
Descriptor_t
desc
,
void
*
workspace
,
size_t
workspace_size
,
void
*
c
,
const
void
*
a
,
...
...
@@ -113,12 +113,12 @@ __C infiniStatus_t infiniopMatmul(
float
beta
,
void
*
stream
)
{
#define CALCULATE(CASE, NAMESPACE)
\
case CASE:
\
return reinterpret_cast<const op::
matmul
::NAMESPACE::Descriptor *>(desc) \
->calculate(workspace, workspace_size,
\
c, beta,
\
a, b, alpha,
\
#define CALCULATE(CASE, NAMESPACE) \
case CASE: \
return reinterpret_cast<const op::
gemm
::NAMESPACE::Descriptor *>(desc) \
->calculate(workspace, workspace_size, \
c, beta, \
a, b, alpha, \
stream)
switch
(
desc
->
device_type
)
{
...
...
@@ -150,11 +150,11 @@ __C infiniStatus_t infiniopMatmul(
}
__C
infiniStatus_t
infiniopDestroy
Matmul
Descriptor
(
infiniop
Matmul
Descriptor_t
desc
)
{
infiniopDestroy
Gemm
Descriptor
(
infiniop
Gemm
Descriptor_t
desc
)
{
#define DELETE(CASE, NAMESPACE)
\
case CASE:
\
delete reinterpret_cast<const op::
matmul
::NAMESPACE::Descriptor *>(desc); \
#define DELETE(CASE, NAMESPACE) \
case CASE: \
delete reinterpret_cast<const op::
gemm
::NAMESPACE::Descriptor *>(desc); \
return INFINI_STATUS_SUCCESS;
switch
(
desc
->
device_type
)
{
...
...
src/infiniop/ops/matmul/ascend/matmul_ascend.h
deleted
100644 → 0
View file @
1d95ddf3
#ifndef __MATMUL_ASCEND_H__
#define __MATMUL_ASCEND_H__
#include "../matmul.h"
DESCRIPTOR
(
ascend
)
#endif // __MATMUL_ASCEND_H__
src/infiniop/ops/matmul/bang/matmul_bang.h
deleted
100644 → 0
View file @
1d95ddf3
#ifndef __MATMUL_BANG_H__
#define __MATMUL_BANG_H__
#include "../matmul.h"
DESCRIPTOR
(
bang
)
#endif // __MATMUL_BANG_H__
src/infiniop/ops/matmul/cpu/matmul_cpu.h
deleted
100644 → 0
View file @
1d95ddf3
#ifndef __MATMUL_CPU_H__
#define __MATMUL_CPU_H__
#include "../matmul.h"
DESCRIPTOR
(
cpu
)
#endif // __MATMUL_CPU_H__
src/infiniop/ops/matmul/cuda/matmul_cuda.cuh
deleted
100644 → 0
View file @
1d95ddf3
#ifndef __MATMUL_CUDA_CUH__
#define __MATMUL_CUDA_CUH__
#include "../matmul.h"
DESCRIPTOR
(
cuda
)
#endif // __MATMUL_CUDA_CUH__
src/infiniop/ops/matmul/kunlun/matmul_kunlun.h
deleted
100644 → 0
View file @
1d95ddf3
#ifndef __MATMUL_KUNLUN_H__
#define __MATMUL_KUNLUN_H__
#include "../matmul.h"
DESCRIPTOR
(
kunlun
)
#endif // __MATMUL_KUNLUN_H__
src/infiniop/ops/matmul/maca/matmul_maca.h
deleted
100644 → 0
View file @
1d95ddf3
#ifndef __MATMUL_MACA_H__
#define __MATMUL_MACA_H__
#include "../matmul.h"
DESCRIPTOR
(
maca
)
#endif // __MATMUL_MACA_H__
src/infiniop/ops/random_sample/cpu/random_sample_cpu.cc
0 → 100644
View file @
2f2a74b6
#include "random_sample_cpu.h"
#include "../../../devices/cpu/common_cpu.h"
#include "../../../devices/cpu/cpu_handle.h"
#include "../../../tensor.h"
#include <algorithm>
namespace
op
::
random_sample
::
cpu
{
Descriptor
::~
Descriptor
()
=
default
;
infiniStatus_t
Descriptor
::
create
(
infiniopHandle_t
handle_
,
Descriptor
**
desc_ptr
,
infiniopTensorDescriptor_t
result_desc
,
infiniopTensorDescriptor_t
probs_desc
)
{
auto
handle
=
reinterpret_cast
<
device
::
cpu
::
Handle
*>
(
handle_
);
auto
dt_i
=
result_desc
->
dtype
();
auto
dt_p
=
probs_desc
->
dtype
();
CHECK_DTYPE
(
dt_i
,
INFINI_DTYPE_U8
,
INFINI_DTYPE_U16
,
INFINI_DTYPE_U32
,
INFINI_DTYPE_U64
,
INFINI_DTYPE_I8
,
INFINI_DTYPE_I16
,
INFINI_DTYPE_I32
,
INFINI_DTYPE_I64
);
CHECK_DTYPE
(
dt_p
,
INFINI_DTYPE_F16
,
INFINI_DTYPE_F32
,
INFINI_DTYPE_F64
);
CHECK_API_OR
(
result_desc
->
ndim
(),
0
,
return
INFINI_STATUS_BAD_TENSOR_SHAPE
);
CHECK_API_OR
(
probs_desc
->
ndim
(),
1
,
return
INFINI_STATUS_BAD_TENSOR_SHAPE
);
CHECK_API_OR
(
probs_desc
->
stride
(
0
),
1
,
return
INFINI_STATUS_BAD_TENSOR_STRIDES
);
*
desc_ptr
=
new
Descriptor
(
dt_i
,
dt_p
,
probs_desc
->
dim
(
0
),
0
,
nullptr
,
handle
->
device
,
handle
->
device_id
);
return
INFINI_STATUS_SUCCESS
;
}
size_t
Descriptor
::
minWorkspaceSize
()
const
{
return
_min_workspace_size
;
}
template
<
typename
DT
>
struct
ComputeType
{
using
type
=
DT
;
};
template
<
>
struct
ComputeType
<
fp16_t
>
{
using
type
=
float
;
};
template
<
class
Tidx
,
class
Tval
>
struct
Scheme
{
using
Tcompute
=
typename
ComputeType
<
Tval
>::
type
;
static
Tcompute
get
(
void
const
*
ptr
,
size_t
i
)
{
return
utils
::
cast
<
Tcompute
,
Tval
>
(
reinterpret_cast
<
Tval
const
*>
(
ptr
)[
i
]);
}
static
void
argmax
(
void
*
result
,
void
const
*
probs
,
size_t
n
)
{
auto
idx
=
reinterpret_cast
<
Tidx
*>
(
result
);
*
idx
=
0
;
auto
max_val
=
get
(
probs
,
0
);
for
(
size_t
i
=
0
;
i
<
n
;
i
++
)
{
if
(
auto
val
=
get
(
probs
,
i
);
val
>
max_val
)
{
max_val
=
val
;
*
idx
=
static_cast
<
Tidx
>
(
i
);
}
}
}
static
void
random
(
void
*
result
,
void
const
*
probs
,
size_t
n
,
float
random_val
,
float
topp
,
int
topk
,
float
temperature
)
{
struct
KVPair
{
Tidx
idx
;
Tcompute
val
;
bool
operator
<
(
const
KVPair
&
other
)
const
{
return
val
>
other
.
val
;
}
};
auto
idx
=
reinterpret_cast
<
Tidx
*>
(
result
);
// build & sort
std
::
vector
<
KVPair
>
pairs
(
n
);
for
(
size_t
i
=
0
;
i
<
n
;
i
++
)
{
pairs
[
i
]
=
{
static_cast
<
Tidx
>
(
i
),
get
(
probs
,
i
)};
}
std
::
sort
(
pairs
.
begin
(),
pairs
.
end
());
// softmax & sum
auto
const
max_val
=
pairs
[
0
].
val
;
pairs
[
0
].
val
=
1
;
for
(
size_t
i
=
1
;
i
<
n
;
i
++
)
{
pairs
[
i
].
val
=
pairs
[
i
-
1
].
val
+
std
::
exp
((
pairs
[
i
].
val
-
max_val
)
/
temperature
);
}
// topk & topp & limit
auto
const
pk
=
pairs
[
std
::
min
(
static_cast
<
size_t
>
(
topk
),
n
)
-
1
].
val
,
pp
=
pairs
[
n
-
1
].
val
*
topp
,
plimit
=
random_val
*
std
::
min
(
pk
,
pp
);
// sample
for
(
size_t
i
=
0
;
i
<
n
;
i
++
)
{
if
(
plimit
<=
pairs
[
i
].
val
)
{
*
idx
=
pairs
[
i
].
idx
;
break
;
}
}
}
};
template
<
class
Tidx
,
class
Tval
>
void
switch_f
(
size_t
n
,
void
*
result
,
const
void
*
probs
,
float
random_val
,
float
topp
,
int
topk
,
float
temperature
)
{
if
(
random_val
==
0
||
topp
==
0
||
topk
==
1
||
temperature
==
0
)
{
Scheme
<
Tidx
,
Tval
>::
argmax
(
result
,
probs
,
n
);
}
else
{
Scheme
<
Tidx
,
Tval
>::
random
(
result
,
probs
,
n
,
random_val
,
topp
,
topk
,
temperature
);
}
}
template
<
class
Tidx
>
void
switch_val
(
infiniDtype_t
dt_p
,
size_t
n
,
void
*
result
,
void
const
*
probs
,
float
random_val
,
float
topp
,
int
topk
,
float
temperature
)
{
switch
(
dt_p
)
{
case
INFINI_DTYPE_F16
:
switch_f
<
Tidx
,
fp16_t
>
(
n
,
result
,
probs
,
random_val
,
topp
,
topk
,
temperature
);
break
;
case
INFINI_DTYPE_F32
:
switch_f
<
Tidx
,
float
>
(
n
,
result
,
probs
,
random_val
,
topp
,
topk
,
temperature
);
break
;
case
INFINI_DTYPE_F64
:
switch_f
<
Tidx
,
double
>
(
n
,
result
,
probs
,
random_val
,
topp
,
topk
,
temperature
);
break
;
default:
// unreachable
std
::
abort
();
}
}
void
switch_idx
(
infiniDtype_t
dt_i
,
infiniDtype_t
dt_p
,
size_t
n
,
void
*
result
,
void
const
*
probs
,
float
random_val
,
float
topp
,
int
topk
,
float
temperature
)
{
#define CASE(DT_VAL, DT_TYP) \
case DT_VAL: \
switch_val<DT_TYP>(dt_p, n, result, probs, random_val, topp, topk, temperature); \
break
switch
(
dt_i
)
{
CASE
(
INFINI_DTYPE_I8
,
int8_t
);
CASE
(
INFINI_DTYPE_I16
,
int16_t
);
CASE
(
INFINI_DTYPE_I32
,
int32_t
);
CASE
(
INFINI_DTYPE_I64
,
int64_t
);
CASE
(
INFINI_DTYPE_U8
,
uint8_t
);
CASE
(
INFINI_DTYPE_U16
,
uint16_t
);
CASE
(
INFINI_DTYPE_U32
,
uint32_t
);
CASE
(
INFINI_DTYPE_U64
,
uint64_t
);
default:
// unreachable
std
::
abort
();
}
#undef CASE
}
infiniStatus_t
Descriptor
::
calculate
(
void
*
workspace
,
size_t
workspace_size
,
void
*
result
,
const
void
*
probs
,
float
random_val
,
float
topp
,
int
topk
,
float
temperature
,
void
*
stream
)
const
{
switch_idx
(
_dt_i
,
_dt_p
,
_n
,
result
,
probs
,
random_val
,
topp
,
topk
,
temperature
);
return
INFINI_STATUS_SUCCESS
;
}
}
// namespace op::random_sample::cpu
src/infiniop/ops/random_sample/cpu/random_sample_cpu.h
0 → 100644
View file @
2f2a74b6
#ifndef __RANDOM_SAMPLE_CPU_H__
#define __RANDOM_SAMPLE_CPU_H__
#include "../random_sample.h"
DESCRIPTOR
(
cpu
)
#endif // __RANDOM_SAMPLE_CPU_H__
src/infiniop/ops/random_sample/operator.cc
View file @
2f2a74b6
...
...
@@ -2,152 +2,111 @@
#include "../../handle.h"
#include "infiniop/ops/random_sample.h"
__C
infiniStatus_t
infiniopCreateRandomSampleDescriptor
(
infiniopHandle_t
handle
,
infiniopRandomSampleDescriptor_t
*
desc_ptr
,
infiniopTensorDescriptor_t
result
,
infiniopTensorDescriptor_t
probs
)
{
switch
(
handle
->
device
)
{
#ifdef ENABLE_CPU
case
DevCpu
:
return
cpuCreateRandomSampleDescriptor
(
handle
,
(
RandomSampleCpuDescriptor_t
*
)
desc_ptr
,
result
,
probs
);
#endif
#ifdef ENABLE_NV_GPU
case
DevNvGpu
:
return
cudaCreateRandomSampleDescriptor
((
CudaHandle_t
)
handle
,
(
RandomSampleCudaDescriptor_t
*
)
desc_ptr
,
result
,
probs
);
#endif
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
return
bangCreateRandomSampleDescriptor
((
BangHandle_t
)
handle
,
(
RandomSampleBangDescriptor_t
*
)
desc_ptr
,
result
,
probs
);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case
DevAscendNpu
:
{
return
ascendCreateRandomSampleDescriptor
((
AscendHandle_t
)
handle
,
(
RandomSampleAscendDescriptor_t
*
)
desc_ptr
,
result
,
probs
);
}
#endif
#ifdef ENABLE_METAX_GPU
case
DevMetaxGpu
:
{
return
macaCreateRandomSampleDescriptor
((
MacaHandle_t
)
handle
,
(
RandomSampleMacaDescriptor_t
*
)
desc_ptr
,
result
,
probs
);
}
#ifdef ENABLE_CPU_API
#include "cpu/random_sample_cpu.h"
#endif
#ifdef ENABLE_MTHREADS_GPU
case
DevMthreadsGpu
:
return
musaCreateRandomSampleDescriptor
((
MusaHandle_t
)
handle
,
(
RandomSampleMusaDescriptor_t
*
)
desc_ptr
,
result
,
probs
);
__C
infiniStatus_t
infiniopCreateRandomSampleDescriptor
(
infiniopHandle_t
handle
,
infiniopRandomSampleDescriptor_t
*
desc_ptr
,
infiniopTensorDescriptor_t
result
,
infiniopTensorDescriptor_t
probs
)
{
#define CREATE(CASE, NAMESPACE) \
case CASE: \
return op::random_sample::NAMESPACE::Descriptor::create( \
handle, \
reinterpret_cast<op::random_sample::NAMESPACE::Descriptor **>(desc_ptr), \
result, \
probs)
switch
(
handle
->
device
)
{
#ifdef ENABLE_CPU_API
CREATE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
default:
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
#undef CREATE
};
__C
infiniStatus_t
infiniopGetRandomSampleWorkspaceSize
(
infiniopRandomSampleDescriptor_t
desc
,
size_t
*
size
)
{
__C
infiniStatus_t
infiniopGetRandomSampleWorkspaceSize
(
infiniopRandomSampleDescriptor_t
desc
,
size_t
*
size
)
{
#define GET(CASE, NAMESPACE) \
case CASE: \
using Ptr = const op::random_sample::NAMESPACE::Descriptor *; \
*size = reinterpret_cast<Ptr>(desc)->minWorkspaceSize(); \
return INFINI_STATUS_SUCCESS
switch
(
desc
->
device_type
)
{
#ifdef ENABLE_CPU
case
DevCpu
:
return
cpuGetRandomSampleWorkspaceSize
((
RandomSampleCpuDescriptor_t
)
desc
,
size
);
#endif
#ifdef ENABLE_NV_GPU
case
DevNvGpu
:
{
return
cudaGetRandomSampleWorkspaceSize
((
RandomSampleCudaDescriptor_t
)
desc
,
size
);
}
#ifdef ENABLE_CPU_API
GET
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
return
bangGetRandomSampleWorkspaceSize
((
RandomSampleBangDescriptor_t
)
desc
,
size
);
// return cnnlGetRandomSampleWorkspaceSize((RandomSampleCnnlDescriptor_t) desc, size);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case
DevAscendNpu
:
{
return
ascendGetRandomSampleWorkspaceSize
((
RandomSampleAscendDescriptor_t
)
desc
,
size
);
}
#endif
#ifdef ENABLE_METAX_GPU
case
DevMetaxGpu
:
{
return
macaGetRandomSampleWorkspaceSize
((
RandomSampleMacaDescriptor_t
)
desc
,
size
);
}
#endif
#ifdef ENABLE_MTHREADS_GPU
case
DevMthreadsGpu
:
{
return
musaGetRandomSampleWorkspaceSize
((
RandomSampleMusaDescriptor_t
)
desc
,
size
);
}
#endif
default:
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
#undef GET
}
__C
infiniStatus_t
infiniopRandomSample
(
infiniopRandomSampleDescriptor_t
desc
,
void
*
workspace
,
size_t
workspace_size
,
void
*
result
,
const
void
*
probs
,
float
random_val
,
float
topp
,
int
topk
,
float
temperature
,
void
*
stream
)
{
__C
infiniStatus_t
infiniopRandomSample
(
infiniopRandomSampleDescriptor_t
desc
,
void
*
workspace
,
size_t
workspace_size
,
void
*
result
,
const
void
*
probs
,
float
random_val
,
float
topp
,
int
topk
,
float
temperature
,
void
*
stream
)
{
#define CALCULATE(CASE, NAMESPACE) \
case CASE: \
return reinterpret_cast<const op::random_sample::NAMESPACE::Descriptor *>(desc) \
->calculate(workspace, workspace_size, \
result, probs, \
random_val, \
topp, topk, temperature, \
stream)
switch
(
desc
->
device_type
)
{
#ifdef ENABLE_CPU
case
DevCpu
:
return
cpuRandomSample
((
RandomSampleCpuDescriptor_t
)
desc
,
workspace
,
workspace_size
,
result
,
probs
,
random_val
,
topp
,
topk
,
temperature
,
stream
);
#endif
#ifdef ENABLE_NV_GPU
case
DevNvGpu
:
return
cudaRandomSample
((
RandomSampleCudaDescriptor_t
)
desc
,
workspace
,
workspace_size
,
result
,
probs
,
random_val
,
topp
,
topk
,
temperature
,
stream
);
#endif
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
return
bangRandomSample
((
RandomSampleBangDescriptor_t
)
desc
,
workspace
,
workspace_size
,
result
,
probs
,
random_val
,
topp
,
topk
,
temperature
,
stream
);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case
DevAscendNpu
:
{
return
ascendRandomSample
((
RandomSampleAscendDescriptor_t
)
desc
,
workspace
,
workspace_size
,
result
,
probs
,
random_val
,
topp
,
topk
,
temperature
,
stream
);
}
#endif
#ifdef ENABLE_METAX_GPU
case
DevMetaxGpu
:
{
return
macaRandomSample
((
RandomSampleMacaDescriptor_t
)
desc
,
workspace
,
workspace_size
,
result
,
probs
,
random_val
,
topp
,
topk
,
temperature
,
stream
);
}
#endif
#ifdef ENABLE_MTHREADS_GPU
case
DevMthreadsGpu
:
return
musaRandomSample
((
RandomSampleMusaDescriptor_t
)
desc
,
workspace
,
workspace_size
,
result
,
probs
,
random_val
,
topp
,
topk
,
temperature
,
stream
);
#ifdef ENABLE_CPU_API
CALCULATE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
default:
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
#undef CALCULATE
}
__C
infiniStatus_t
infiniopDestroyRandomSampleDescriptor
(
infiniopRandomSampleDescriptor_t
desc
)
{
__C
infiniStatus_t
infiniopDestroyRandomSampleDescriptor
(
infiniopRandomSampleDescriptor_t
desc
)
{
#define DELETE(CASE, NAMESPACE) \
case CASE: \
delete reinterpret_cast<const op::random_sample::NAMESPACE::Descriptor *>(desc); \
return INFINI_STATUS_SUCCESS;
switch
(
desc
->
device_type
)
{
#ifdef ENABLE_CPU
case
DevCpu
:
return
cpuDestroyRandomSampleDescriptor
((
RandomSampleCpuDescriptor_t
)
desc
);
#endif
#ifdef ENABLE_NV_GPU
case
DevNvGpu
:
return
cudaDestroyRandomSampleDescriptor
((
RandomSampleCudaDescriptor_t
)
desc
);
#endif
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
return
bangDestroyRandomSampleDescriptor
((
RandomSampleBangDescriptor_t
)
desc
);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case
DevAscendNpu
:
{
return
ascendDestroyRandomSampleDescriptor
((
RandomSampleAscendDescriptor_t
)
desc
);
}
#endif
#ifdef ENABLE_METAX_GPU
case
DevMetaxGpu
:
{
return
macaDestroyRandomSampleDescriptor
((
RandomSampleMacaDescriptor_t
)
desc
);
}
#endif
#ifdef ENABLE_MTHREADS_GPU
case
DevMthreadsGpu
:
return
musaDestroyRandomSampleDescriptor
((
RandomSampleMusaDescriptor_t
)
desc
);
#ifdef ENABLE_CPU_API
DELETE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
default:
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
#undef DELETE
}
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment