Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
9a232a29
Commit
9a232a29
authored
Sep 03, 2024
by
carlushuang
Browse files
add another test
parent
00a0a07f
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
182 additions
and
1 deletion
+182
-1
test/CMakeLists.txt
test/CMakeLists.txt
+1
-0
test/tile_reduce/CMakeLists.txt
test/tile_reduce/CMakeLists.txt
+2
-0
test/tile_reduce/tile_reduce.cpp
test/tile_reduce/tile_reduce.cpp
+178
-0
test/topk/topk.cpp
test/topk/topk.cpp
+1
-1
No files found.
test/CMakeLists.txt
View file @
9a232a29
...
...
@@ -219,4 +219,5 @@ endif()
add_subdirectory
(
position_embedding
)
add_subdirectory
(
scatter_gather
)
add_subdirectory
(
topk
)
add_subdirectory
(
tile_reduce
)
test/tile_reduce/CMakeLists.txt
0 → 100644
View file @
9a232a29
add_test_executable
(
test_tile_reduce tile_reduce.cpp
)
target_compile_options
(
test_tile_reduce PRIVATE -v --save-temps -Wno-gnu-line-marker
)
\ No newline at end of file
test/tile_reduce/tile_reduce.cpp
0 → 100644
View file @
9a232a29
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.
#include <vector>
#include <iostream>
#include <numeric>
#include <cassert>
#include <cstdlib>
#include <iostream>
#include <time.h>
#include <unordered_set>
#include "ck_tile/core.hpp"
#include "ck_tile/ops/reduce.hpp"
#ifndef TEST_TILE_REDUCE_VERBOSE
#define TEST_TILE_REDUCE_VERBOSE 0
#endif
#define HIP_CALL(call) \
do \
{ \
hipError_t err = call; \
if(err != hipSuccess) \
{ \
printf("[hiperror](%d) fail to call %s", static_cast<int>(err), #call); \
exit(0); \
} \
} while(0)
#define BLOCK_SIZE 256
template
<
int
Rows
,
int
Cols
,
typename
DataType
>
__global__
void
reduce_row
(
DataType
*
p_src
,
DataType
*
p_dst
)
{
using
namespace
ck_tile
;
// some constexpr vars
constexpr
index_t
vec
=
16
/
sizeof
(
DataType
);
static_assert
(
Cols
%
vec
==
0
);
constexpr
index_t
col_lanes
=
Cols
/
vec
;
constexpr
index_t
warp_size
=
ck_tile
::
get_warp_size
();
static_assert
(
warp_size
%
col_lanes
==
0
);
constexpr
index_t
row_lanes
=
warp_size
/
col_lanes
;
constexpr
index_t
num_warps
=
BLOCK_SIZE
/
warp_size
;
static_assert
(
Rows
%
(
num_warps
*
row_lanes
)
==
0
);
constexpr
index_t
row_repeat
=
Rows
/
(
num_warps
*
row_lanes
);
auto
src_tile
=
[
&
]()
{
constexpr
auto
src_dist
=
make_static_tile_distribution
(
tile_distribution_encoding
<
sequence
<
1
>
,
tuple
<
sequence
<
row_repeat
,
num_warps
,
row_lanes
>
,
sequence
<
col_lanes
,
vec
>>
,
tuple
<
sequence
<
1
>
,
sequence
<
1
,
2
>>
,
tuple
<
sequence
<
1
>
,
sequence
<
2
,
0
>>
,
sequence
<
1
,
2
>
,
sequence
<
0
,
1
>>
{});
auto
src_view
=
make_naive_tensor_view
<
address_space_enum
::
global
>
(
p_src
,
make_tuple
(
Rows
,
Cols
),
make_tuple
(
Cols
,
1
),
number
<
vec
>
{},
// alignement
number
<
1
>
{});
return
make_tile_window
(
src_view
,
make_tuple
(
number
<
Rows
>
{},
number
<
Cols
>
{}),
{
0
,
0
},
src_dist
);
}();
constexpr
auto
dst_dist
=
make_static_tile_distribution
(
tile_distribution_encoding
<
sequence
<
col_lanes
>
,
// -> replicate here, hence we can figure out the offset
tuple
<
sequence
<
row_repeat
,
num_warps
,
row_lanes
>
,
sequence
<
1
>
/* only 1 per row*/
>
,
tuple
<
sequence
<
1
>
,
sequence
<
1
,
0
>>
,
tuple
<
sequence
<
1
>
,
sequence
<
2
,
0
>>
,
sequence
<
1
,
2
>
,
sequence
<
0
,
0
>>
{});
auto
dst_tile
=
[
&
]()
{
auto
dst_view
=
make_naive_tensor_view
<
address_space_enum
::
global
>
(
p_dst
,
make_tuple
(
Rows
,
1
),
make_tuple
(
1
,
1
),
number
<
1
>
{},
// alignement
number
<
1
>
{});
return
make_tile_window
(
dst_view
,
make_tuple
(
number
<
Rows
>
{},
number
<
1
>
{}),
{
0
,
0
},
dst_dist
);
}();
auto
data
=
load_tile
(
src_tile
);
const
auto
f_max
=
[](
auto
e0
,
auto
e1
)
{
return
max
(
e0
,
e1
);
};
// Note: the return type will fill the replicate dim
// usually is 2d. This is for the next block_tile_reduce_sync()
// in order to do further reduce.
auto
r
=
block_tile_reduce
<
DataType
>
(
data
,
sequence
<
1
>
{},
f_max
,
-
numeric
<
DataType
>::
infinity
());
// r.foo();
// further reduce cross thread
block_tile_reduce_sync
(
r
,
f_max
,
bool_constant
<
false
>
{});
if
(
threadIdx
.
x
%
col_lanes
==
0
)
{
auto
o
=
make_static_distributed_tensor
<
DataType
>
(
dst_dist
);
o
.
get_thread_buffer
()
=
r
.
get_thread_buffer
();
store_tile
(
dst_tile
,
o
);
}
}
template
<
int
Rows
,
int
Cols
,
typename
DataType
>
int
test_tile_reduce
()
{
std
::
srand
(
std
::
time
(
nullptr
));
DataType
*
src
=
reinterpret_cast
<
DataType
*>
(
malloc
(
Rows
*
Cols
*
sizeof
(
DataType
)));
DataType
*
dst
=
reinterpret_cast
<
DataType
*>
(
malloc
(
Rows
*
sizeof
(
DataType
)));
// const auto f_max = [](auto e0, auto e1) { return max(e0, e1); };
for
(
auto
i
=
0
;
i
<
Rows
*
Cols
;
i
++
)
{
float
v
=
static_cast
<
float
>
(
std
::
rand
()
%
2000
-
1000
)
/
1000.
f
;
src
[
i
]
=
ck_tile
::
type_convert
<
DataType
>
(
v
);
}
void
*
dev_src
;
void
*
dev_dst
;
HIP_CALL
(
hipMalloc
(
&
dev_src
,
Rows
*
Cols
*
sizeof
(
DataType
)));
HIP_CALL
(
hipMalloc
(
&
dev_dst
,
Rows
*
sizeof
(
DataType
)));
HIP_CALL
(
hipMemcpy
(
dev_src
,
src
,
Rows
*
Cols
*
sizeof
(
DataType
),
hipMemcpyHostToDevice
));
constexpr
int
bdim
=
BLOCK_SIZE
;
int
gdim
=
1
;
reduce_row
<
Rows
,
Cols
,
DataType
><<<
gdim
,
bdim
>>>
(
reinterpret_cast
<
DataType
*>
(
dev_src
),
reinterpret_cast
<
DataType
*>
(
dev_dst
));
HIP_CALL
(
hipMemcpy
(
dst
,
dev_dst
,
Rows
*
sizeof
(
DataType
),
hipMemcpyDeviceToHost
));
int
err_cnt
=
0
;
for
(
int
i_r
=
0
;
i_r
<
Rows
;
i_r
++
)
{
auto
row_max
=
-
ck_tile
::
numeric
<
float
>::
infinity
();
for
(
int
i_c
=
0
;
i_c
<
Cols
;
i_c
++
)
{
int
idx
=
i_r
*
Cols
+
i_c
;
float
v
=
ck_tile
::
type_convert
<
float
>
(
src
[
idx
]);
row_max
=
row_max
>
v
?
row_max
:
v
;
#if TEST_TILE_REDUCE_VERBOSE
printf
(
"%.3f "
,
v
);
#endif
}
{
uint32_t
ref
=
ck_tile
::
bit_cast
<
uint32_t
>
(
row_max
);
uint32_t
out
=
ck_tile
::
bit_cast
<
uint32_t
>
(
dst
[
i_r
]);
if
(
ref
!=
out
)
err_cnt
++
;
}
#if TEST_TILE_REDUCE_VERBOSE
printf
(
" -> %.3f (%.3f)
\n
"
,
dst
[
i_r
],
row_max
);
#endif
}
#if TEST_TILE_REDUCE_VERBOSE
printf
(
"
\n
"
);
#endif
free
(
src
);
free
(
dst
);
return
err_cnt
==
0
?
0
:
-
1
;
}
int
main
()
{
int
r
=
test_tile_reduce
<
32
,
64
,
float
>
();
return
r
;
}
test/topk/topk.cpp
View file @
9a232a29
...
...
@@ -11,7 +11,7 @@
#include "ck_tile/host.hpp"
#ifndef TEST_TOPK_VERBOSE
#define TEST_TOPK_VERBOSE
1
#define TEST_TOPK_VERBOSE
0
#endif
int
test_reference_topk
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment