Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Torchaudio
Commits
97f6e1c3
Commit
97f6e1c3
authored
Apr 25, 2025
by
zhanggzh
Browse files
add hcu support code
parent
1661daf1
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
7 additions
and
6 deletions
+7
-6
src/libtorchaudio/cuctc/src/bitonic_topk/bitonic_sort.cuh
src/libtorchaudio/cuctc/src/bitonic_topk/bitonic_sort.cuh
+4
-3
src/libtorchaudio/cuctc/src/bitonic_topk/pow2_utils.cuh
src/libtorchaudio/cuctc/src/bitonic_topk/pow2_utils.cuh
+1
-1
src/libtorchaudio/cuctc/src/bitonic_topk/warpsort_topk.cuh
src/libtorchaudio/cuctc/src/bitonic_topk/warpsort_topk.cuh
+2
-2
No files found.
src/libtorchaudio/cuctc/src/bitonic_topk/bitonic_sort.cuh
View file @
97f6e1c3
...
...
@@ -16,9 +16,10 @@ constexpr inline __host__ __device__ bool isPo2(IntType num) {
}
inline
__device__
int
laneId
()
{
int
id
;
asm
(
"mov.s32 %0, %%laneid;"
:
"=r"
(
id
));
return
id
;
//int id;
//asm("mov.s32 %0, %%laneid;" : "=r"(id));
//return id;
return
__lane_id
();
}
/**
* @brief Shuffle the data inside a warp
...
...
src/libtorchaudio/cuctc/src/bitonic_topk/pow2_utils.cuh
View file @
97f6e1c3
...
...
@@ -12,7 +12,7 @@ namespace cu_ctc {
* @tparam IntType data type (checked only for integers)
*/
template
<
typename
IntType
>
constexpr
__device__
IntType
log2
(
IntType
num
,
IntType
ret
=
IntType
(
0
))
{
constexpr
__host__
__device__
IntType
log2
(
IntType
num
,
IntType
ret
=
IntType
(
0
))
{
return
num
<=
IntType
(
1
)
?
ret
:
log2
(
num
>>
IntType
(
1
),
++
ret
);
}
...
...
src/libtorchaudio/cuctc/src/bitonic_topk/warpsort_topk.cuh
View file @
97f6e1c3
...
...
@@ -313,7 +313,7 @@ class warp_sort_filtered : public warp_sort<Capacity, Ascending, T, IdxT> {
__device__
__forceinline__
void
merge_buf_
()
{
topk
::
bitonic
<
kMaxBufLen
>
(
!
Ascending
,
kWarpWidth
).
sort
(
val_buf_
,
idx_buf_
);
this
->
merge_in
<
kMaxBufLen
>
(
val_buf_
,
idx_buf_
);
this
->
template
merge_in
<
kMaxBufLen
>(
val_buf_
,
idx_buf_
);
buf_len_
=
0
;
set_k_th_
();
// contains warp sync
#pragma unroll
...
...
@@ -421,7 +421,7 @@ constexpr inline __host__ __device__ IntType ceildiv(IntType a, IntType b) {
return
(
a
+
b
-
1
)
/
b
;
}
template
<
typename
IntType
>
constexpr
inline
__device__
IntType
roundUp256
(
IntType
num
)
{
constexpr
inline
__host__
__device__
IntType
roundUp256
(
IntType
num
)
{
// return (num + 255) / 256 * 256;
constexpr
int
MASK
=
255
;
return
(
num
+
MASK
)
&
(
~
MASK
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment