Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
infinicore
Commits
b5ccf30f
Commit
b5ccf30f
authored
Mar 18, 2025
by
YdrMaster
Browse files
issue/50/fix: 添加 random sample/cpu 并修改测例
Signed-off-by:
YdrMaster
<
ydrml@hotmail.com
>
parent
bd8ae651
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
391 additions
and
165 deletions
+391
-165
.github/workflows/build.yml
.github/workflows/build.yml
+1
-0
include/infiniop/ops/random_sample.h
include/infiniop/ops/random_sample.h
+21
-13
src/infiniop/ops/random_sample/cpu/random_sample_cpu.cc
src/infiniop/ops/random_sample/cpu/random_sample_cpu.cc
+193
-0
src/infiniop/ops/random_sample/cpu/random_sample_cpu.h
src/infiniop/ops/random_sample/cpu/random_sample_cpu.h
+8
-0
src/infiniop/ops/random_sample/operator.cc
src/infiniop/ops/random_sample/operator.cc
+87
-129
src/infiniop/ops/random_sample/random_sample.h
src/infiniop/ops/random_sample/random_sample.h
+56
-0
src/utils/check.h
src/utils/check.h
+13
-0
test/infiniop/random_sample.py
test/infiniop/random_sample.py
+12
-23
No files found.
.github/workflows/build.yml
View file @
b5ccf30f
...
...
@@ -47,3 +47,4 @@ jobs:
pip install torch
LD_LIBRARY_PATH=$HOME/.infini/lib python test/infiniop/matmul.py --cpu
LD_LIBRARY_PATH=$HOME/.infini/lib python test/infiniop/rms_norm.py --cpu
LD_LIBRARY_PATH=$HOME/.infini/lib python test/infiniop/random_sample.py --cpu
include/infiniop/ops/random_sample.h
View file @
b5ccf30f
...
...
@@ -5,21 +5,29 @@
typedef
InfiniopDescriptor
*
infiniopRandomSampleDescriptor_t
;
__C
__export
infiniStatus_t
infiniopCreateRandomSampleDescriptor
(
infiniopHandle_t
handle
,
infiniopRandomSampleDescriptor_t
*
desc_ptr
,
infiniopTensorDescriptor_t
result
,
infiniopTensorDescriptor_t
probs
);
__C
__export
infiniStatus_t
infiniopCreateRandomSampleDescriptor
(
infiniopHandle_t
handle
,
infiniopRandomSampleDescriptor_t
*
desc_ptr
,
infiniopTensorDescriptor_t
result
,
infiniopTensorDescriptor_t
probs
);
__C
__export
infiniStatus_t
infiniopGetRandomSampleWorkspaceSize
(
infiniopRandomSampleDescriptor_t
desc
,
size_t
*
size
);
__C
__export
infiniStatus_t
infiniopGetRandomSampleWorkspaceSize
(
infiniopRandomSampleDescriptor_t
desc
,
size_t
*
size
);
__C
__export
infiniStatus_t
infiniopRandomSample
(
infiniopRandomSampleDescriptor_t
desc
,
void
*
workspace
,
size_t
workspace_size
,
void
*
result
,
void
const
*
probs
,
float
random_val
,
float
topp
,
int
topk
,
float
temperature
,
void
*
stream
);
__C
__export
infiniStatus_t
infiniopRandomSample
(
infiniopRandomSampleDescriptor_t
desc
,
void
*
workspace
,
size_t
workspace_size
,
void
*
result
,
const
void
*
probs
,
float
random_val
,
float
topp
,
int
topk
,
float
temperature
,
void
*
stream
);
__C
__export
infiniStatus_t
infiniopDestroyRandomSampleDescriptor
(
infiniopRandomSampleDescriptor_t
desc
);
__C
__export
infiniStatus_t
infiniopDestroyRandomSampleDescriptor
(
infiniopRandomSampleDescriptor_t
desc
);
#endif
src/infiniop/ops/random_sample/cpu/random_sample_cpu.cc
0 → 100644
View file @
b5ccf30f
#include "random_sample_cpu.h"
#include "../../../devices/cpu/common_cpu.h"
#include "../../../devices/cpu/cpu_handle.h"
#include "../../../tensor.h"
#include <algorithm>
namespace
op
::
random_sample
::
cpu
{
Descriptor
::~
Descriptor
()
=
default
;
infiniStatus_t
Descriptor
::
create
(
infiniopHandle_t
handle_
,
Descriptor
**
desc_ptr
,
infiniopTensorDescriptor_t
result_desc
,
infiniopTensorDescriptor_t
probs_desc
)
{
auto
handle
=
reinterpret_cast
<
device
::
cpu
::
Handle
*>
(
handle_
);
auto
dt_i
=
result_desc
->
dtype
();
auto
dt_p
=
probs_desc
->
dtype
();
CHECK_DTYPE
(
dt_i
,
INFINI_DTYPE_U8
,
INFINI_DTYPE_U16
,
INFINI_DTYPE_U32
,
INFINI_DTYPE_U64
,
INFINI_DTYPE_I8
,
INFINI_DTYPE_I16
,
INFINI_DTYPE_I32
,
INFINI_DTYPE_I64
);
CHECK_DTYPE
(
dt_p
,
INFINI_DTYPE_F16
,
INFINI_DTYPE_F32
,
INFINI_DTYPE_F64
);
CHECK_API_OR
(
result_desc
->
ndim
(),
0
,
return
INFINI_STATUS_BAD_TENSOR_SHAPE
);
CHECK_API_OR
(
probs_desc
->
ndim
(),
1
,
return
INFINI_STATUS_BAD_TENSOR_SHAPE
);
CHECK_API_OR
(
probs_desc
->
stride
(
0
),
1
,
return
INFINI_STATUS_BAD_TENSOR_STRIDES
);
*
desc_ptr
=
new
Descriptor
(
dt_i
,
dt_p
,
probs_desc
->
dim
(
0
),
0
,
nullptr
,
handle
->
device
,
handle
->
device_id
);
return
INFINI_STATUS_SUCCESS
;
}
template
<
typename
DT
>
struct
ComputeType
{
using
type
=
DT
;
};
template
<
>
struct
ComputeType
<
fp16_t
>
{
using
type
=
float
;
};
template
<
class
Tidx
,
class
Tval
>
struct
Scheme
{
using
Tcompute
=
typename
ComputeType
<
Tval
>::
type
;
static
Tcompute
get
(
void
const
*
ptr
,
size_t
i
)
{
return
utils
::
cast
<
Tcompute
,
Tval
>
(
reinterpret_cast
<
Tval
const
*>
(
ptr
)[
i
]);
}
static
void
argmax
(
void
*
result
,
void
const
*
probs
,
size_t
n
)
{
auto
idx
=
reinterpret_cast
<
Tidx
*>
(
result
);
*
idx
=
0
;
auto
max_val
=
get
(
probs
,
0
);
for
(
size_t
i
=
0
;
i
<
n
;
i
++
)
{
if
(
auto
val
=
get
(
probs
,
i
);
val
>
max_val
)
{
max_val
=
val
;
*
idx
=
static_cast
<
Tidx
>
(
i
);
}
}
}
static
void
random
(
void
*
result
,
void
const
*
probs
,
size_t
n
,
float
random_val
,
float
topp
,
int
topk
,
float
temperature
)
{
struct
KVPair
{
Tidx
idx
;
Tcompute
val
;
bool
operator
<
(
const
KVPair
&
other
)
const
{
return
val
>
other
.
val
;
}
};
auto
idx
=
reinterpret_cast
<
Tidx
*>
(
result
);
// build & sort
std
::
vector
<
KVPair
>
pairs
(
n
);
for
(
size_t
i
=
0
;
i
<
n
;
i
++
)
{
pairs
[
i
]
=
{
static_cast
<
Tidx
>
(
i
),
get
(
probs
,
i
)};
}
std
::
sort
(
pairs
.
begin
(),
pairs
.
end
());
// softmax & sum
auto
const
max_val
=
pairs
[
0
].
val
;
pairs
[
0
].
val
=
1
;
for
(
size_t
i
=
1
;
i
<
n
;
i
++
)
{
pairs
[
i
].
val
=
pairs
[
i
-
1
].
val
+
std
::
exp
((
pairs
[
i
].
val
-
max_val
)
/
temperature
);
}
// topk & topp & limit
auto
const
pk
=
pairs
[
std
::
min
(
static_cast
<
size_t
>
(
topk
),
n
)
-
1
].
val
,
pp
=
pairs
[
n
-
1
].
val
*
topp
,
plimit
=
random_val
*
std
::
min
(
pk
,
pp
);
// sample
for
(
size_t
i
=
0
;
i
<
n
;
i
++
)
{
if
(
plimit
<=
pairs
[
i
].
val
)
{
*
idx
=
pairs
[
i
].
idx
;
break
;
}
}
}
};
template
<
class
Tidx
,
class
Tval
>
void
switch_f
(
size_t
n
,
void
*
result
,
const
void
*
probs
,
float
random_val
,
float
topp
,
int
topk
,
float
temperature
)
{
if
(
random_val
==
0
||
topp
==
0
||
topk
==
1
||
temperature
==
0
)
{
Scheme
<
Tidx
,
Tval
>::
argmax
(
result
,
probs
,
n
);
}
else
{
Scheme
<
Tidx
,
Tval
>::
random
(
result
,
probs
,
n
,
random_val
,
topp
,
topk
,
temperature
);
}
}
template
<
class
Tidx
>
void
switch_val
(
infiniDtype_t
dt_p
,
size_t
n
,
void
*
result
,
void
const
*
probs
,
float
random_val
,
float
topp
,
int
topk
,
float
temperature
)
{
switch
(
dt_p
)
{
case
INFINI_DTYPE_F16
:
switch_f
<
Tidx
,
fp16_t
>
(
n
,
result
,
probs
,
random_val
,
topp
,
topk
,
temperature
);
break
;
case
INFINI_DTYPE_F32
:
switch_f
<
Tidx
,
float
>
(
n
,
result
,
probs
,
random_val
,
topp
,
topk
,
temperature
);
break
;
case
INFINI_DTYPE_F64
:
switch_f
<
Tidx
,
double
>
(
n
,
result
,
probs
,
random_val
,
topp
,
topk
,
temperature
);
break
;
default:
// unreachable
std
::
abort
();
}
}
void
switch_idx
(
infiniDtype_t
dt_i
,
infiniDtype_t
dt_p
,
size_t
n
,
void
*
result
,
void
const
*
probs
,
float
random_val
,
float
topp
,
int
topk
,
float
temperature
)
{
#define CASE(DT_VAL, DT_TYP) \
case DT_VAL: \
switch_val<DT_TYP>(dt_p, n, result, probs, random_val, topp, topk, temperature); \
break
switch
(
dt_i
)
{
CASE
(
INFINI_DTYPE_I8
,
int8_t
);
CASE
(
INFINI_DTYPE_I16
,
int16_t
);
CASE
(
INFINI_DTYPE_I32
,
int32_t
);
CASE
(
INFINI_DTYPE_I64
,
int64_t
);
CASE
(
INFINI_DTYPE_U8
,
uint8_t
);
CASE
(
INFINI_DTYPE_U16
,
uint16_t
);
CASE
(
INFINI_DTYPE_U32
,
uint32_t
);
CASE
(
INFINI_DTYPE_U64
,
uint64_t
);
default:
// unreachable
std
::
abort
();
}
#undef CASE
}
infiniStatus_t
Descriptor
::
calculate
(
void
*
workspace
,
size_t
workspace_size
,
void
*
result
,
const
void
*
probs
,
float
random_val
,
float
topp
,
int
topk
,
float
temperature
,
void
*
stream
)
const
{
switch_idx
(
_dt_i
,
_dt_p
,
_n
,
result
,
probs
,
random_val
,
topp
,
topk
,
temperature
);
return
INFINI_STATUS_SUCCESS
;
}
}
// namespace op::random_sample::cpu
src/infiniop/ops/random_sample/cpu/random_sample_cpu.h
0 → 100644
View file @
b5ccf30f
#ifndef __RANDOM_SAMPLE_CPU_H__
#define __RANDOM_SAMPLE_CPU_H__
#include "../random_sample.h"
DESCRIPTOR
(
cpu
)
#endif // __RANDOM_SAMPLE_CPU_H__
src/infiniop/ops/random_sample/operator.cc
View file @
b5ccf30f
...
...
@@ -2,152 +2,110 @@
#include "../../handle.h"
#include "infiniop/ops/random_sample.h"
__C
infiniStatus_t
infiniopCreateRandomSampleDescriptor
(
infiniopHandle_t
handle
,
infiniopRandomSampleDescriptor_t
*
desc_ptr
,
infiniopTensorDescriptor_t
result
,
infiniopTensorDescriptor_t
probs
)
{
switch
(
handle
->
device
)
{
#ifdef ENABLE_CPU
case
DevCpu
:
return
cpuCreateRandomSampleDescriptor
(
handle
,
(
RandomSampleCpuDescriptor_t
*
)
desc_ptr
,
result
,
probs
);
#endif
#ifdef ENABLE_NV_GPU
case
DevNvGpu
:
return
cudaCreateRandomSampleDescriptor
((
CudaHandle_t
)
handle
,
(
RandomSampleCudaDescriptor_t
*
)
desc_ptr
,
result
,
probs
);
#endif
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
return
bangCreateRandomSampleDescriptor
((
BangHandle_t
)
handle
,
(
RandomSampleBangDescriptor_t
*
)
desc_ptr
,
result
,
probs
);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case
DevAscendNpu
:
{
return
ascendCreateRandomSampleDescriptor
((
AscendHandle_t
)
handle
,
(
RandomSampleAscendDescriptor_t
*
)
desc_ptr
,
result
,
probs
);
}
#endif
#ifdef ENABLE_METAX_GPU
case
DevMetaxGpu
:
{
return
macaCreateRandomSampleDescriptor
((
MacaHandle_t
)
handle
,
(
RandomSampleMacaDescriptor_t
*
)
desc_ptr
,
result
,
probs
);
}
#ifdef ENABLE_CPU_API
#include "cpu/random_sample_cpu.h"
#endif
#ifdef ENABLE_MTHREADS_GPU
case
DevMthreadsGpu
:
return
musaCreateRandomSampleDescriptor
((
MusaHandle_t
)
handle
,
(
RandomSampleMusaDescriptor_t
*
)
desc_ptr
,
result
,
probs
);
__C
infiniStatus_t
infiniopCreateRandomSampleDescriptor
(
infiniopHandle_t
handle
,
infiniopRandomSampleDescriptor_t
*
desc_ptr
,
infiniopTensorDescriptor_t
result
,
infiniopTensorDescriptor_t
probs
)
{
#define CREATE(CASE, NAMESPACE) \
case CASE: \
return op::random_sample::NAMESPACE::Descriptor::create( \
handle, \
reinterpret_cast<op::random_sample::NAMESPACE::Descriptor **>(desc_ptr), \
result, \
probs)
switch
(
handle
->
device
)
{
#ifdef ENABLE_CPU_API
CREATE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
default:
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
#undef CREATE
};
__C
infiniStatus_t
infiniopGetRandomSampleWorkspaceSize
(
infiniopRandomSampleDescriptor_t
desc
,
size_t
*
size
)
{
__C
infiniStatus_t
infiniopGetRandomSampleWorkspaceSize
(
infiniopRandomSampleDescriptor_t
desc
,
size_t
*
size
)
{
#define GET(CASE, NAMESPACE) \
case CASE: \
*size = reinterpret_cast<const op::random_sample::NAMESPACE::Descriptor *>(desc)->workspace_size; \
return INFINI_STATUS_SUCCESS
switch
(
desc
->
device_type
)
{
#ifdef ENABLE_CPU
case
DevCpu
:
return
cpuGetRandomSampleWorkspaceSize
((
RandomSampleCpuDescriptor_t
)
desc
,
size
);
#endif
#ifdef ENABLE_NV_GPU
case
DevNvGpu
:
{
return
cudaGetRandomSampleWorkspaceSize
((
RandomSampleCudaDescriptor_t
)
desc
,
size
);
}
#ifdef ENABLE_CPU_API
GET
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
return
bangGetRandomSampleWorkspaceSize
((
RandomSampleBangDescriptor_t
)
desc
,
size
);
// return cnnlGetRandomSampleWorkspaceSize((RandomSampleCnnlDescriptor_t) desc, size);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case
DevAscendNpu
:
{
return
ascendGetRandomSampleWorkspaceSize
((
RandomSampleAscendDescriptor_t
)
desc
,
size
);
}
#endif
#ifdef ENABLE_METAX_GPU
case
DevMetaxGpu
:
{
return
macaGetRandomSampleWorkspaceSize
((
RandomSampleMacaDescriptor_t
)
desc
,
size
);
}
#endif
#ifdef ENABLE_MTHREADS_GPU
case
DevMthreadsGpu
:
{
return
musaGetRandomSampleWorkspaceSize
((
RandomSampleMusaDescriptor_t
)
desc
,
size
);
}
#endif
default:
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
#undef GET
}
__C
infiniStatus_t
infiniopRandomSample
(
infiniopRandomSampleDescriptor_t
desc
,
void
*
workspace
,
size_t
workspace_size
,
void
*
result
,
const
void
*
probs
,
float
random_val
,
float
topp
,
int
topk
,
float
temperature
,
void
*
stream
)
{
__C
infiniStatus_t
infiniopRandomSample
(
infiniopRandomSampleDescriptor_t
desc
,
void
*
workspace
,
size_t
workspace_size
,
void
*
result
,
const
void
*
probs
,
float
random_val
,
float
topp
,
int
topk
,
float
temperature
,
void
*
stream
)
{
#define CALCULATE(CASE, NAMESPACE) \
case CASE: \
return reinterpret_cast<const op::random_sample::NAMESPACE::Descriptor *>(desc) \
->calculate(workspace, workspace_size, \
result, probs, \
random_val, \
topp, topk, temperature, \
stream)
switch
(
desc
->
device_type
)
{
#ifdef ENABLE_CPU
case
DevCpu
:
return
cpuRandomSample
((
RandomSampleCpuDescriptor_t
)
desc
,
workspace
,
workspace_size
,
result
,
probs
,
random_val
,
topp
,
topk
,
temperature
,
stream
);
#endif
#ifdef ENABLE_NV_GPU
case
DevNvGpu
:
return
cudaRandomSample
((
RandomSampleCudaDescriptor_t
)
desc
,
workspace
,
workspace_size
,
result
,
probs
,
random_val
,
topp
,
topk
,
temperature
,
stream
);
#endif
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
return
bangRandomSample
((
RandomSampleBangDescriptor_t
)
desc
,
workspace
,
workspace_size
,
result
,
probs
,
random_val
,
topp
,
topk
,
temperature
,
stream
);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case
DevAscendNpu
:
{
return
ascendRandomSample
((
RandomSampleAscendDescriptor_t
)
desc
,
workspace
,
workspace_size
,
result
,
probs
,
random_val
,
topp
,
topk
,
temperature
,
stream
);
}
#endif
#ifdef ENABLE_METAX_GPU
case
DevMetaxGpu
:
{
return
macaRandomSample
((
RandomSampleMacaDescriptor_t
)
desc
,
workspace
,
workspace_size
,
result
,
probs
,
random_val
,
topp
,
topk
,
temperature
,
stream
);
}
#endif
#ifdef ENABLE_MTHREADS_GPU
case
DevMthreadsGpu
:
return
musaRandomSample
((
RandomSampleMusaDescriptor_t
)
desc
,
workspace
,
workspace_size
,
result
,
probs
,
random_val
,
topp
,
topk
,
temperature
,
stream
);
#ifdef ENABLE_CPU_API
CALCULATE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
default:
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
#undef CALCULATE
}
__C
infiniStatus_t
infiniopDestroyRandomSampleDescriptor
(
infiniopRandomSampleDescriptor_t
desc
)
{
__C
infiniStatus_t
infiniopDestroyRandomSampleDescriptor
(
infiniopRandomSampleDescriptor_t
desc
)
{
#define DELETE(CASE, NAMESPACE) \
case CASE: \
delete reinterpret_cast<const op::random_sample::NAMESPACE::Descriptor *>(desc); \
return INFINI_STATUS_SUCCESS;
switch
(
desc
->
device_type
)
{
#ifdef ENABLE_CPU
case
DevCpu
:
return
cpuDestroyRandomSampleDescriptor
((
RandomSampleCpuDescriptor_t
)
desc
);
#endif
#ifdef ENABLE_NV_GPU
case
DevNvGpu
:
return
cudaDestroyRandomSampleDescriptor
((
RandomSampleCudaDescriptor_t
)
desc
);
#endif
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
return
bangDestroyRandomSampleDescriptor
((
RandomSampleBangDescriptor_t
)
desc
);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case
DevAscendNpu
:
{
return
ascendDestroyRandomSampleDescriptor
((
RandomSampleAscendDescriptor_t
)
desc
);
}
#endif
#ifdef ENABLE_METAX_GPU
case
DevMetaxGpu
:
{
return
macaDestroyRandomSampleDescriptor
((
RandomSampleMacaDescriptor_t
)
desc
);
}
#endif
#ifdef ENABLE_MTHREADS_GPU
case
DevMthreadsGpu
:
return
musaDestroyRandomSampleDescriptor
((
RandomSampleMusaDescriptor_t
)
desc
);
#ifdef ENABLE_CPU_API
DELETE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
default:
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
#undef DELETE
}
src/infiniop/ops/random_sample/random_sample.h
0 → 100644
View file @
b5ccf30f
#ifndef __RANDOM_SAMPLE_H__
#define __RANDOM_SAMPLE_H__
#include "../../../utils.h"
#include "../../operator.h"
#define DESCRIPTOR(NAMESPACE) \
\
namespace op::random_sample::NAMESPACE { \
class Descriptor final : public InfiniopDescriptor { \
struct Opaque; \
Opaque *_opaque; \
\
infiniDtype_t _dt_i, _dt_p; \
size_t _n; \
\
Descriptor( \
infiniDtype_t dt_i, \
infiniDtype_t dt_p, \
size_t n, \
size_t workspace_size_, \
Opaque *opaque, \
infiniDevice_t device_type, \
int device_id) \
: InfiniopDescriptor{device_type, device_id}, \
_opaque(opaque), \
_dt_i(dt_i), \
_dt_p(dt_p), \
_n(n), \
workspace_size(workspace_size_) {} \
\
public: \
size_t workspace_size; \
\
~Descriptor(); \
\
static infiniStatus_t create( \
infiniopHandle_t handle, \
Descriptor **desc_ptr, \
infiniopTensorDescriptor_t result_desc, \
infiniopTensorDescriptor_t probs_desc); \
\
infiniStatus_t calculate( \
void *workspace, \
size_t workspace_size, \
void *result, \
const void *probs, \
float random_val, \
float topp, \
int topk, \
float temperature, \
void *stream) const; \
}; \
}
#endif // __RANDOM_SAMPLE_H__
src/utils/check.h
View file @
b5ccf30f
...
...
@@ -17,4 +17,17 @@
#define CHECK_STATUS(API) CHECK_API_OR(API, INFINI_STATUS_SUCCESS, return api_result_)
#define CHECK_DTYPE(DT, ...) \
do { \
auto found_supported_dtype = false; \
for (auto dt : {__VA_ARGS__}) { \
if (dt == DT) { \
found_supported_dtype = true; \
break; \
} \
} \
CHECK_API_OR(found_supported_dtype, true, \
return INFINI_STATUS_BAD_TENSOR_DTYPE); \
} while (0)
#endif // INFINIUTILS_CHECK_H
test/infiniop/random_sample.py
View file @
b5ccf30f
...
...
@@ -82,25 +82,14 @@ def random_sample(data, random_val, topp, topk, voc, temperature):
globalM
=
dataNp
[
0
]
dataNp
=
(
dataNp
-
globalM
)
/
temperature
dataNp
=
torch
.
softmax
(
dataNp
.
float
(),
dim
=
0
)
sum_s
=
0
for
end
in
range
(
topk
):
sum_s
+=
dataNp
[
end
]
if
sum_s
>=
topp
:
break
if
end
<
topk
-
1
:
end
+=
1
else
:
end
=
topk
sum_s
=
0
for
i
in
range
(
end
):
sum_s
+=
dataNp
[
i
]
random_val
*=
sum_s
sum_s
=
0
for
i
in
range
(
end
):
sum_s
+=
dataNp
[
i
]
if
random_val
<
sum_s
:
for
i
in
range
(
1
,
voc
):
dataNp
[
i
]
+=
dataNp
[
i
-
1
]
limit_k
=
dataNp
[
min
(
topk
,
voc
)
-
1
]
limit_p
=
dataNp
[
voc
-
1
]
*
topp
limit
=
min
(
limit_k
,
limit_p
)
*
random_val
for
i
in
range
(
voc
):
if
limit
<
dataNp
[
i
]:
return
indices
[
i
]
else
:
return
torch
.
argmax
(
data
)
...
...
@@ -129,7 +118,7 @@ def test(
data
,
random_val
,
topp
,
topk
,
voc
,
temperature
)
# 这个函数在device速度可能会很慢,可以通过data.to("cpu")方式加快计算过程
indices
=
torch
.
zeros
([
1
],
dtype
=
torch
.
int64
).
to
(
torch_device
)
indices
=
torch
.
zeros
([],
dtype
=
torch
.
int64
).
to
(
torch_device
)
x_tensor
,
indices_tensor
=
[
to_tensor
(
tensor
,
lib
)
for
tensor
in
[
data
,
indices
]]
...
...
@@ -147,7 +136,7 @@ def test(
# Invalidate the shape and strides in the descriptor to prevent them from being directly used by the kernel
for
tensor
in
[
x_tensor
,
indices_tensor
]:
tensor
.
des
criptor
.
contents
.
invalidate
(
)
tensor
.
des
troyDesc
(
lib
)
workspace_size
=
c_uint64
(
0
)
check_error
(
...
...
@@ -181,13 +170,13 @@ def test(
atol
,
rtol
=
get_tolerance
(
_TOLERANCE_MAP
,
dtype
)
if
DEBUG
:
debug_all
(
(
indices
[
0
]
.
type
(
ans
.
dtype
),
data
[
indices
[
0
]
]),
(
indices
.
type
(
ans
.
dtype
),
data
[
indices
]),
(
ans
,
data
[
ans
]),
"or"
,
atol
=
atol
,
rtol
=
rtol
,
)
assert
indices
[
0
]
.
type
(
ans
.
dtype
)
==
ans
or
data
[
ans
]
==
data
[
indices
[
0
]
]
assert
indices
.
type
(
ans
.
dtype
)
==
ans
or
data
[
ans
]
==
data
[
indices
]
# Profiling workflow
if
PROFILE
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment