Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
infinicore
Commits
3546e737
Unverified
Commit
3546e737
authored
Jun 27, 2025
by
PanZezhong1725
Committed by
GitHub
Jun 27, 2025
Browse files
Merge pull request #62 from InfiniTensor/issue/11-randomsample-ascend
issue/11: add random sample ascend
parents
27b836c9
c1fa267c
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
411 additions
and
7 deletions
+411
-7
src/infiniop/devices/ascend/CMakeLists.txt
src/infiniop/devices/ascend/CMakeLists.txt
+2
-1
src/infiniop/devices/ascend/ascend_kernel_common.h
src/infiniop/devices/ascend/ascend_kernel_common.h
+1
-0
src/infiniop/ops/random_sample/ascend/random_sample_aclnn.h
src/infiniop/ops/random_sample/ascend/random_sample_aclnn.h
+8
-0
src/infiniop/ops/random_sample/ascend/random_sample_kernel.cpp
...nfiniop/ops/random_sample/ascend/random_sample_kernel.cpp
+268
-0
src/infiniop/ops/random_sample/ascend/randomsample_aclnn.cc
src/infiniop/ops/random_sample/ascend/randomsample_aclnn.cc
+110
-0
src/infiniop/ops/random_sample/operator.cc
src/infiniop/ops/random_sample/operator.cc
+17
-1
test/infiniop/random_sample.py
test/infiniop/random_sample.py
+5
-5
No files found.
src/infiniop/devices/ascend/CMakeLists.txt
View file @
3546e737
...
...
@@ -26,6 +26,7 @@ include_directories(
ascendc_library
(
ascend_kernels STATIC
../../ops/swiglu/ascend/swiglu_ascend_kernel.cpp
../../ops/rope/ascend/rope_ascend_kernel.cpp
#
../../ops/random_sample/ascend/random_sample_kernel.cpp
../../ops/random_sample/ascend/random_sample_kernel.cpp
)
target_include_directories
(
ascend_kernels PRIVATE ../../../../include
)
src/infiniop/devices/ascend/ascend_kernel_common.h
View file @
3546e737
...
...
@@ -7,6 +7,7 @@
constexpr
size_t
BLOCK_NUM
=
8
;
constexpr
size_t
BUFFER_NUM
=
2
;
constexpr
size_t
BYTE_ALIGN
=
32
;
constexpr
size_t
BLOCK_LEN
=
256
;
template
<
typename
T
>
__aicore__
inline
size_t
alignTileLen
(
size_t
tile_len
,
size_t
byte_align
)
{
...
...
src/infiniop/ops/random_sample/ascend/random_sample_aclnn.h
0 → 100644
View file @
3546e737
#ifndef __ACLNN_RANDOM_SAMPLE_H__
#define __ACLNN_RANDOM_SAMPLE_H__
#include "../random_sample.h"
DESCRIPTOR
(
ascend
)
#endif // __ACLNN_RANDOM_SAMPLE_H__
src/infiniop/ops/random_sample/ascend/random_sample_kernel.cpp
0 → 100644
View file @
3546e737
#include "../../../devices/ascend/ascend_kernel_common.h"
using
namespace
AscendC
;
template
<
typename
T
>
class
RandomSampleKernel
{
public:
__aicore__
inline
RandomSampleKernel
()
{}
__aicore__
inline
void
init
(
GM_ADDR
probs
,
GM_ADDR
result
,
GM_ADDR
topk_val_addr
,
GM_ADDR
topk_idx_addr
,
float
random_val
,
float
topp
,
int
topk
,
float
temperature
,
int32_t
n
);
__aicore__
inline
void
process
();
private:
__aicore__
inline
void
copyIn
();
__aicore__
inline
void
copyOut
();
__aicore__
inline
void
compute
();
__aicore__
inline
void
SoftMax
(
LocalTensor
<
T
>
&
topkValIn
,
LocalTensor
<
T
>
&
softMaxOut
);
__aicore__
inline
void
InclusiveSum
(
LocalTensor
<
T
>
&
topkValIn
,
LocalTensor
<
T
>
&
topkValOut
);
__aicore__
inline
void
RandomSample
(
LocalTensor
<
T
>
&
valIn
,
LocalTensor
<
int64_t
>
&
Index
,
LocalTensor
<
int64_t
>
&
result
);
GlobalTensor
<
T
>
_pGM
,
_topk_valGM
;
GlobalTensor
<
int64_t
>
_topk_idxGM
,
_resGM
;
TPipe
pipe
;
TQue
<
QuePosition
::
VECIN
,
1
>
_topk_valQue
;
TQue
<
QuePosition
::
VECIN
,
1
>
_topk_idxQue
;
TQue
<
QuePosition
::
VECOUT
,
1
>
_resQue
;
TBuf
<
TPosition
::
VECCALC
>
_inBuf
;
TBuf
<
TPosition
::
VECCALC
>
_tmp1Buf
;
TBuf
<
TPosition
::
VECCALC
>
_tmp2Buf
;
TBuf
<
TPosition
::
VECCALC
>
_tmp3Buf
;
TBuf
<
TPosition
::
VECCALC
>
_softmax_OutBuf
;
TBuf
<
TPosition
::
VECCALC
>
_inclusive_sum_OutBuf
;
int32_t
_topk
;
int32_t
_voc
;
float
_random_val
;
float
_topp
;
float
_invTemp
;
float
_negMax
=
0.
f
;
float
_sum
=
0.
f
;
int32_t
_topkAligned
;
int32_t
_topkIdxAligned
;
int32_t
_vocAligned
;
int32_t
_bufferLen
;
};
template
<
typename
T
>
__aicore__
inline
void
RandomSampleKernel
<
T
>::
init
(
GM_ADDR
probs
,
GM_ADDR
result
,
GM_ADDR
topk_val_addr
,
GM_ADDR
topk_idx_addr
,
float
random_val
,
float
topp
,
int
topk
,
float
temperature
,
int32_t
n
)
{
_topk
=
topk
;
_voc
=
n
;
_random_val
=
random_val
;
_topp
=
topp
;
_invTemp
=
1.0
f
/
temperature
;
// CumSumInfo
_topkAligned
=
alignTileLen
<
T
>
(
_topk
,
BYTE_ALIGN
);
_vocAligned
=
alignTileLen
<
T
>
(
_voc
,
BYTE_ALIGN
);
_topkIdxAligned
=
(
_topk
+
3
)
/
4
*
4
;
_bufferLen
=
_topkAligned
>
BLOCK_LEN
?
_topkAligned
:
BLOCK_LEN
;
// Set GlobalTensor
_pGM
.
SetGlobalBuffer
(
reinterpret_cast
<
__gm__
T
*>
(
probs
),
_voc
);
_topk_valGM
.
SetGlobalBuffer
(
reinterpret_cast
<
__gm__
T
*>
(
topk_val_addr
),
_topk
);
_topk_idxGM
.
SetGlobalBuffer
(
reinterpret_cast
<
__gm__
int64_t
*>
(
topk_idx_addr
),
_topk
);
_resGM
.
SetGlobalBuffer
(
reinterpret_cast
<
__gm__
int64_t
*>
(
result
),
1
);
// Global input and output
pipe
.
InitBuffer
(
_topk_valQue
,
1
,
_topkAligned
*
sizeof
(
T
));
pipe
.
InitBuffer
(
_topk_idxQue
,
1
,
_topkIdxAligned
*
sizeof
(
int64_t
));
pipe
.
InitBuffer
(
_resQue
,
1
,
BYTE_ALIGN
);
// 32 bytes for aligned
pipe
.
InitBuffer
(
_inBuf
,
BLOCK_LEN
*
sizeof
(
T
));
pipe
.
InitBuffer
(
_tmp1Buf
,
_bufferLen
*
sizeof
(
T
));
pipe
.
InitBuffer
(
_tmp2Buf
,
_bufferLen
*
sizeof
(
T
));
pipe
.
InitBuffer
(
_tmp3Buf
,
_bufferLen
*
sizeof
(
T
));
pipe
.
InitBuffer
(
_softmax_OutBuf
,
_topkAligned
*
sizeof
(
T
));
pipe
.
InitBuffer
(
_inclusive_sum_OutBuf
,
_topkAligned
*
sizeof
(
T
));
}
template
<
typename
T
>
__aicore__
inline
void
RandomSampleKernel
<
T
>::
process
()
{
copyIn
();
compute
();
copyOut
();
}
template
<
typename
T
>
__aicore__
inline
void
RandomSampleKernel
<
T
>::
SoftMax
(
LocalTensor
<
T
>
&
topkValIn
,
LocalTensor
<
T
>
&
softMaxOut
)
{
float
invSum
=
1.0
f
/
_sum
;
LocalTensor
<
T
>
tmpBuffer
=
_tmp1Buf
.
Get
<
T
>
();
LocalTensor
<
T
>
tmpBuffer2
=
_tmp2Buf
.
Get
<
T
>
();
LocalTensor
<
T
>
tmpBuffer3
=
_tmp3Buf
.
Get
<
T
>
();
Adds
(
tmpBuffer
,
topkValIn
,
static_cast
<
T
>
(
_negMax
),
_topk
);
Muls
(
tmpBuffer2
,
tmpBuffer
,
static_cast
<
T
>
(
_invTemp
),
_topk
);
Exp
(
tmpBuffer3
,
tmpBuffer2
,
_topk
);
Muls
(
softMaxOut
,
tmpBuffer3
,
static_cast
<
T
>
(
invSum
),
_topk
);
}
template
<
typename
T
>
__aicore__
inline
void
RandomSampleKernel
<
T
>::
InclusiveSum
(
LocalTensor
<
T
>
&
topkValIn
,
LocalTensor
<
T
>
&
topkValOut
)
{
static
constexpr
CumSumConfig
cumSumConfig
{
true
,
false
,
false
};
LocalTensor
<
T
>
lastRowLocal
;
CumSum
<
T
,
cumSumConfig
>
(
topkValOut
,
lastRowLocal
,
topkValIn
,
{
1
,
static_cast
<
uint32_t
>
(
_topkAligned
)});
}
template
<
typename
T
>
__aicore__
inline
void
RandomSampleKernel
<
T
>::
RandomSample
(
LocalTensor
<
T
>
&
valIn
,
LocalTensor
<
int64_t
>
&
Index
,
LocalTensor
<
int64_t
>
&
result
)
{
int
end
=
0
;
for
(
end
=
0
;
end
<
_topk
;
end
++
)
{
if
(
static_cast
<
float
>
(
valIn
(
end
))
>=
_topp
)
{
break
;
}
}
if
(
end
<
_topk
-
1
)
{
end
+=
1
;
}
else
{
end
=
_topk
;
}
auto
random_val
=
_random_val
*
static_cast
<
float
>
(
valIn
(
end
-
1
));
for
(
int
i
=
0
;
i
<
end
;
i
++
)
{
if
(
random_val
<
static_cast
<
float
>
(
valIn
(
i
)))
{
result
(
0
)
=
Index
(
i
);
return
;
}
}
result
(
0
)
=
Index
(
end
-
1
);
}
template
<
typename
T
>
__aicore__
inline
void
RandomSampleKernel
<
T
>::
copyIn
()
{
LocalTensor
<
T
>
topkValLocal
=
_topk_valQue
.
AllocTensor
<
T
>
();
LocalTensor
<
int64_t
>
topkIdxLocal
=
_topk_idxQue
.
AllocTensor
<
int64_t
>
();
DataCopy
(
topkValLocal
,
_topk_valGM
,
_topkAligned
);
DataCopy
(
topkIdxLocal
,
_topk_idxGM
,
_topkIdxAligned
);
// Get Max val of input
_negMax
=
-
static_cast
<
float
>
(
topkValLocal
(
0
));
// Copy in p and compute _sum
int32_t
repeatTimes
=
_voc
/
BLOCK_LEN
;
int32_t
remainder
=
_voc
%
BLOCK_LEN
;
float
sum_s
=
0.
f
;
LocalTensor
<
T
>
_inBuffer
=
_inBuf
.
Get
<
T
>
();
LocalTensor
<
T
>
tmpBuffer
=
_tmp1Buf
.
Get
<
T
>
();
LocalTensor
<
T
>
tmpBuffer2
=
_tmp2Buf
.
Get
<
T
>
();
LocalTensor
<
T
>
tmpBuffer3
=
_tmp3Buf
.
Get
<
T
>
();
for
(
int32_t
i
=
0
;
i
<
repeatTimes
;
i
++
)
{
DataCopy
(
_inBuffer
,
_pGM
[
i
*
BLOCK_LEN
],
BLOCK_LEN
);
Adds
(
tmpBuffer
,
_inBuffer
,
static_cast
<
T
>
(
_negMax
),
BLOCK_LEN
);
Muls
(
tmpBuffer2
,
tmpBuffer
,
static_cast
<
T
>
(
_invTemp
),
BLOCK_LEN
);
Exp
(
tmpBuffer3
,
tmpBuffer2
,
BLOCK_LEN
);
sum_s
=
0.
f
;
for
(
int
j
=
0
;
j
<
BLOCK_LEN
;
++
j
)
{
sum_s
+=
static_cast
<
float
>
(
tmpBuffer3
(
j
));
}
_sum
+=
sum_s
;
}
if
(
remainder
!=
0
)
{
int32_t
remainderAligned
=
alignTileLen
<
T
>
(
remainder
,
BYTE_ALIGN
);
DataCopy
(
_inBuffer
,
_pGM
[
repeatTimes
*
BLOCK_LEN
],
remainderAligned
);
Adds
(
tmpBuffer
,
_inBuffer
,
static_cast
<
T
>
(
_negMax
),
remainder
);
Muls
(
tmpBuffer2
,
tmpBuffer
,
static_cast
<
T
>
(
_invTemp
),
remainder
);
Exp
(
tmpBuffer3
,
tmpBuffer2
,
remainder
);
sum_s
=
0.
f
;
for
(
int
i
=
0
;
i
<
remainder
;
++
i
)
{
sum_s
+=
static_cast
<
float
>
(
tmpBuffer3
(
i
));
}
_sum
+=
sum_s
;
}
_topk_valQue
.
EnQue
(
topkValLocal
);
_topk_idxQue
.
EnQue
(
topkIdxLocal
);
}
template
<
typename
T
>
__aicore__
inline
void
RandomSampleKernel
<
T
>::
compute
()
{
// Get input data
LocalTensor
<
T
>
topkValLocal
=
_topk_valQue
.
DeQue
<
T
>
();
// SoftMax
LocalTensor
<
T
>
softMaxOutLocal
=
_softmax_OutBuf
.
Get
<
T
>
();
SoftMax
(
topkValLocal
,
softMaxOutLocal
);
// InclusiveSum
LocalTensor
<
T
>
inclusiveOutLocal
=
_inclusive_sum_OutBuf
.
Get
<
T
>
();
InclusiveSum
(
softMaxOutLocal
,
inclusiveOutLocal
);
// randomSample
LocalTensor
<
int64_t
>
topkIdxLocal
=
_topk_idxQue
.
DeQue
<
int64_t
>
();
LocalTensor
<
int64_t
>
resultLocal
=
_resQue
.
AllocTensor
<
int64_t
>
();
RandomSample
(
inclusiveOutLocal
,
topkIdxLocal
,
resultLocal
);
_topk_valQue
.
FreeTensor
(
topkValLocal
);
_topk_idxQue
.
FreeTensor
(
topkIdxLocal
);
_resQue
.
EnQue
(
resultLocal
);
}
template
<
typename
T
>
__aicore__
inline
void
RandomSampleKernel
<
T
>::
copyOut
()
{
LocalTensor
<
int64_t
>
resLocal
=
_resQue
.
DeQue
<
int64_t
>
();
DataCopy
(
_resGM
,
resLocal
,
BYTE_ALIGN
/
sizeof
(
int64_t
));
_resQue
.
FreeTensor
(
resLocal
);
}
extern
"C"
__global__
__aicore__
void
random_sample_kernel_fp16
(
GM_ADDR
probs
,
GM_ADDR
result
,
GM_ADDR
topk_val_addr
,
GM_ADDR
topk_idx_addr
,
float
random_val
,
float
topp
,
int
topk
,
float
temperature
,
int32_t
n
)
{
RandomSampleKernel
<
half
>
op
;
op
.
init
(
probs
,
result
,
topk_val_addr
,
topk_idx_addr
,
random_val
,
topp
,
topk
,
temperature
,
n
);
op
.
process
();
}
extern
"C"
__global__
__aicore__
void
random_sample_kernel_fp32
(
GM_ADDR
probs
,
GM_ADDR
result
,
GM_ADDR
topk_val_addr
,
GM_ADDR
topk_idx_addr
,
float
random_val
,
float
topp
,
int
topk
,
float
temperature
,
int32_t
n
)
{
RandomSampleKernel
<
float
>
op
;
op
.
init
(
probs
,
result
,
topk_val_addr
,
topk_idx_addr
,
random_val
,
topp
,
topk
,
temperature
,
n
);
op
.
process
();
}
extern
"C"
infiniStatus_t
random_sample_kernel_launch
(
void
*
probs
,
void
*
result
,
void
*
topk_val_addr
,
void
*
topk_idx_addr
,
float
random_val
,
float
topp
,
int
topk
,
float
temperature
,
uint64_t
n
,
infiniDtype_t
dt_p
,
void
*
stream
)
{
switch
(
dt_p
)
{
case
INFINI_DTYPE_F16
:
random_sample_kernel_fp16
<<<
1
,
nullptr
,
stream
>>>
(
probs
,
result
,
topk_val_addr
,
topk_idx_addr
,
random_val
,
topp
,
topk
,
temperature
,
n
);
break
;
case
INFINI_DTYPE_F32
:
random_sample_kernel_fp32
<<<
1
,
nullptr
,
stream
>>>
(
probs
,
result
,
topk_val_addr
,
topk_idx_addr
,
random_val
,
topp
,
topk
,
temperature
,
n
);
break
;
default:
return
INFINI_STATUS_BAD_TENSOR_DTYPE
;
}
return
INFINI_STATUS_SUCCESS
;
}
src/infiniop/ops/random_sample/ascend/randomsample_aclnn.cc
0 → 100644
View file @
3546e737
#include "../../../devices/ascend/common_ascend.h"
#include "random_sample_aclnn.h"
#include <aclnnop/aclnn_topk.h>
namespace
op
::
random_sample
::
ascend
{
struct
Descriptor
::
Opaque
{
aclnnTensorDescriptor_t
probs
;
aclnnTensorDescriptor_t
result
;
~
Opaque
()
{
delete
probs
;
delete
result
;
}
};
Descriptor
::~
Descriptor
()
=
default
;
infiniStatus_t
Descriptor
::
create
(
infiniopHandle_t
handle_
,
Descriptor
**
desc_ptr
,
infiniopTensorDescriptor_t
result_desc
,
infiniopTensorDescriptor_t
probs_desc
)
{
auto
handle
=
reinterpret_cast
<
device
::
ascend
::
Handle
*>
(
handle_
);
auto
result
=
RandomSampleInfo
::
create
(
result_desc
,
probs_desc
);
CHECK_RESULT
(
result
);
CHECK_DTYPE
(
result
->
dt_i
,
INFINI_DTYPE_I64
);
auto
workspace_size
=
probs_desc
->
numel
()
*
infiniSizeOf
(
probs_desc
->
dtype
())
+
probs_desc
->
numel
()
*
infiniSizeOf
(
infiniDtype_t
::
INFINI_DTYPE_I64
);
auto
tresult
=
new
aclnnTensorDescriptor
(
result_desc
);
auto
tprobs
=
new
aclnnTensorDescriptor
(
probs_desc
);
*
desc_ptr
=
new
Descriptor
(
result
.
take
(),
workspace_size
,
new
Opaque
{
tprobs
,
tresult
},
handle
->
device
,
handle
->
device_id
);
return
INFINI_STATUS_SUCCESS
;
}
size_t
Descriptor
::
minWorkspaceSize
()
const
{
return
_min_workspace_size
;
}
extern
"C"
infiniStatus_t
random_sample_kernel_launch
(
void
*
probs
,
void
*
result
,
void
*
topk_val_addr
,
void
*
topk_idx_addr
,
float
random_val
,
float
topp
,
int
topk
,
float
temperature
,
uint64_t
n
,
infiniDtype_t
dt_p
,
void
*
stream
);
infiniStatus_t
Descriptor
::
calculate
(
void
*
workspace
,
size_t
workspace_size
,
void
*
result
,
const
void
*
probs
,
float
random_val
,
float
topp
,
int
topk
,
float
temperature
,
void
*
stream
)
const
{
if
(
workspace_size
<
_min_workspace_size
)
{
return
INFINI_STATUS_INSUFFICIENT_WORKSPACE
;
}
auto
topk_
=
topk
<=
(
int
)
_info
.
n
?
topk
:
(
int
)
_info
.
n
;
bool
dosample
=
topk_
>
1
&&
temperature
!=
0.0
f
&&
topp
!=
0.0
f
&&
random_val
!=
0.0
f
;
auto
topk_shape
=
std
::
vector
<
int64_t
>
{
dosample
?
topk_
:
1
};
auto
topk_stride
=
std
::
vector
<
int64_t
>
{
1
};
auto
topk_idx
=
new
aclnnTensorDescriptor
(
toAclDataType
(
_info
.
dt_i
),
topk_shape
,
topk_stride
);
auto
topk_val
=
new
aclnnTensorDescriptor
(
toAclDataType
(
_info
.
dt_p
),
topk_shape
,
topk_stride
);
auto
topk_val_addr
=
workspace
;
auto
topk_idx_addr
=
(
void
*
)((
uint8_t
*
)
workspace
+
topk_
*
infiniSizeOf
(
_info
.
dt_p
));
uint64_t
topk_workspace_size
=
0
;
aclOpExecutor
*
topk_executor
=
nullptr
;
CHECK_ACL
(
aclnnTopkGetWorkspaceSize
(
_opaque
->
probs
->
tensor
,
topk_shape
[
0
],
0
,
true
,
true
,
topk_val
->
tensor
,
dosample
?
topk_idx
->
tensor
:
_opaque
->
result
->
tensor
,
&
topk_workspace_size
,
&
topk_executor
));
CHECK_ACL
(
aclSetAclOpExecutorRepeatable
(
topk_executor
));
void
*
topk_workspace
;
CHECK_ACL
(
aclrtMalloc
(
&
topk_workspace
,
topk_workspace_size
,
ACL_MEM_MALLOC_HUGE_FIRST
));
AclSetTensorAddr
(
topk_executor
,
0
,
_opaque
->
probs
->
tensor
,
(
void
*
)
probs
);
AclSetTensorAddr
(
topk_executor
,
1
,
topk_val
->
tensor
,
topk_val_addr
);
if
(
!
dosample
)
{
AclSetTensorAddr
(
topk_executor
,
2
,
_opaque
->
result
->
tensor
,
result
);
}
else
{
AclSetTensorAddr
(
topk_executor
,
2
,
topk_idx
->
tensor
,
topk_idx_addr
);
}
CHECK_ACL
(
aclnnTopk
(
topk_workspace
,
topk_workspace_size
,
topk_executor
,
stream
));
CHECK_ACL
(
aclrtFree
(
topk_workspace
));
if
(
dosample
)
{
auto
status
=
random_sample_kernel_launch
((
void
*
)
probs
,
result
,
topk_val_addr
,
topk_idx_addr
,
random_val
,
topp
,
topk_
,
temperature
,
_info
.
n
,
_info
.
dt_p
,
stream
);
CHECK_STATUS
(
status
);
}
return
INFINI_STATUS_SUCCESS
;
}
}
// namespace op::random_sample::ascend
src/infiniop/ops/random_sample/operator.cc
View file @
3546e737
...
...
@@ -11,8 +11,12 @@
#ifdef ENABLE_METAX_API
#include "maca/random_sample_maca.h"
#endif
#ifdef ENABLE_ASCEND_API
#include "ascend/random_sample_aclnn.h"
#endif
__C
infiniStatus_t
infiniopCreateRandomSampleDescriptor
(
__C
infiniStatus_t
infiniopCreateRandomSampleDescriptor
(
infiniopHandle_t
handle
,
infiniopRandomSampleDescriptor_t
*
desc_ptr
,
infiniopTensorDescriptor_t
result
,
...
...
@@ -37,6 +41,9 @@ __C infiniStatus_t infiniopCreateRandomSampleDescriptor(
#ifdef ENABLE_METAX_API
CREATE
(
INFINI_DEVICE_METAX
,
maca
);
#endif
#ifdef ENABLE_ASCEND_API
CREATE
(
INFINI_DEVICE_ASCEND
,
ascend
);
#endif
default:
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
...
...
@@ -67,6 +74,9 @@ __C infiniStatus_t infiniopGetRandomSampleWorkspaceSize(
#ifdef ENABLE_METAX_API
GET
(
INFINI_DEVICE_METAX
,
maca
);
#endif
#ifdef ENABLE_ASCEND_API
GET
(
INFINI_DEVICE_ASCEND
,
ascend
);
#endif
default:
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
...
...
@@ -107,6 +117,9 @@ __C infiniStatus_t infiniopRandomSample(
#ifdef ENABLE_METAX_API
CALCULATE
(
INFINI_DEVICE_METAX
,
maca
);
#endif
#ifdef ENABLE_ASCEND_API
CALCULATE
(
INFINI_DEVICE_ASCEND
,
ascend
);
#endif
default:
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
...
...
@@ -134,6 +147,9 @@ __C infiniStatus_t infiniopDestroyRandomSampleDescriptor(
#ifdef ENABLE_METAX_API
DELETE
(
INFINI_DEVICE_METAX
,
maca
);
#endif
#ifdef ENABLE_ASCEND_API
DELETE
(
INFINI_DEVICE_ASCEND
,
ascend
);
#endif
default:
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
...
...
test/infiniop/random_sample.py
View file @
3546e737
...
...
@@ -60,17 +60,17 @@ infiniopRandomSampleDescriptor_t = POINTER(RandomSampleDescriptor)
def
random_sample
(
data
,
random_val
,
topp
,
topk
,
voc
,
temperature
):
if
topp
>
0
and
topk
>
1
:
sorted_vals
,
sorted_indices
=
torch
.
sort
(
data
,
descending
=
True
)
scaled_vals
=
(
sorted_vals
-
sorted_vals
[
0
])
/
temperature
probs
=
torch
.
softmax
(
scaled_vals
,
dim
=
0
)
cum_probs
=
torch
.
cumsum
(
probs
,
dim
=
0
)
k_index
=
min
(
topk
,
voc
)
-
1
threshold
=
min
(
cum_probs
[
k_index
],
topp
)
*
random_val
idx
=
torch
.
searchsorted
(
cum_probs
,
threshold
)
return
sorted_indices
[
idx
]
return
torch
.
argmax
(
data
)
...
...
@@ -84,7 +84,7 @@ def test(
topk
,
temperature
,
dtype
=
torch
.
float16
,
sync
=
None
sync
=
None
,
):
print
(
f
"Testing RandomSample on
{
torch_device
}
with voc:
{
voc
}
random_val:
{
random_val
}
topp:
{
topp
}
topk:
{
topk
}
temperature:
{
temperature
}
dtype:
{
dtype
}
"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment