"...composable_kernel.git" did not exist on "c8f6d5d1f57ab1fef63b80a574174c55a1331a82"
Commit 50ba9c44 authored by carlushuang's avatar carlushuang
Browse files

mofisy karg

parent d7e0f7e2
...@@ -2,4 +2,4 @@ ...@@ -2,4 +2,4 @@
# to be included in "make all/install/check" # to be included in "make all/install/check"
add_executable(tile_example_elementwise EXCLUDE_FROM_ALL elementwise.cpp elementwise_api.cpp) add_executable(tile_example_elementwise EXCLUDE_FROM_ALL elementwise.cpp elementwise_api.cpp)
target_include_directories(tile_example_elementwise PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include) target_include_directories(tile_example_elementwise PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include)
target_compile_options(tile_example_elementwise PRIVATE -v --save-temps -Wno-gnu-line-marker) target_compile_options(tile_example_elementwise PRIVATE -v --save-temps -Wno-gnu-line-marker -mllvm --amdgpu-kernarg-preload-count=16)
...@@ -25,7 +25,7 @@ struct Cast ...@@ -25,7 +25,7 @@ struct Cast
constexpr dim3 blocks = kernel::BlockSize(); \ constexpr dim3 blocks = kernel::BlockSize(); \
\ \
float ave_time = ck_tile::launch_kernel( \ float ave_time = ck_tile::launch_kernel( \
s, ck_tile::make_kernel<blocks.x, 1>(kernel{}, grids, blocks, 0, kargs)); \ s, ck_tile::make_kernel<blocks.x, 1>(kernel{}, grids, blocks, 0, kargs.p_input, kargs.p_output, kargs.num_pixels)); \
return ave_time; return ave_time;
float elementwise(elementwise_trait t, elementwise_kargs a, ck_tile::stream_config s) float elementwise(elementwise_trait t, elementwise_kargs a, ck_tile::stream_config s)
...@@ -35,11 +35,11 @@ float elementwise(elementwise_trait t, elementwise_kargs a, ck_tile::stream_conf ...@@ -35,11 +35,11 @@ float elementwise(elementwise_trait t, elementwise_kargs a, ck_tile::stream_conf
{ {
if(t.output_type == "fp32" && t.input_type == "fp16") if(t.output_type == "fp32" && t.input_type == "fp16")
{ {
DISPATCH_ELEMENTWISE_CAST(float, ck_tile::fp16_t, sizeof(ck_tile::fp16_t), 8) DISPATCH_ELEMENTWISE_CAST(float, ck_tile::fp16_t, 8*sizeof(ck_tile::fp16_t), 8)
} }
else if(t.output_type == "fp16" && t.input_type == "fp32") else if(t.output_type == "fp16" && t.input_type == "fp32")
{ {
DISPATCH_ELEMENTWISE_CAST(ck_tile::fp16_t, float, sizeof(float), 8) DISPATCH_ELEMENTWISE_CAST(ck_tile::fp16_t, float, 4*sizeof(float), 8)
} }
} }
return rtn; return rtn;
......
...@@ -57,15 +57,17 @@ struct ElementwiseUnaryKernel ...@@ -57,15 +57,17 @@ struct ElementwiseUnaryKernel
CK_TILE_HOST_DEVICE static constexpr auto BlockSize() { return Problem::BlockSize; } CK_TILE_HOST_DEVICE static constexpr auto BlockSize() { return Problem::BlockSize; }
CK_TILE_DEVICE void operator()(Kargs kargs) const CK_TILE_DEVICE void operator()(const void* p_input_,
void* p_output_,
uint64_t num_pixels_) const
{ {
uint64_t block_base = uint64_t block_base =
static_cast<uint64_t>(blockIdx.x) * Problem::BlockSize * Problem::VectorSize; static_cast<uint64_t>(blockIdx.x) * Problem::BlockSize * Problem::VectorSize;
uint64_t pixels_rem = kargs.num_pixels - block_base; uint64_t pixels_rem = num_pixels_ - block_base;
const auto input_window = [&]() { const auto input_window = [&]() {
const InputType* p_input = const InputType* p_input =
reinterpret_cast<const InputType*>(kargs.p_input) + block_base; reinterpret_cast<const InputType*>(p_input_) + block_base;
auto tmp = make_naive_tensor_view_packed<address_space_enum::global>( auto tmp = make_naive_tensor_view_packed<address_space_enum::global>(
p_input, p_input,
...@@ -78,7 +80,7 @@ struct ElementwiseUnaryKernel ...@@ -78,7 +80,7 @@ struct ElementwiseUnaryKernel
auto output_window = [&]() { auto output_window = [&]() {
OutputType* p_output = OutputType* p_output =
reinterpret_cast<OutputType*>(kargs.p_output) + block_base; reinterpret_cast<OutputType*>(p_output_) + block_base;
auto tmp = make_naive_tensor_view_packed<address_space_enum::global>( auto tmp = make_naive_tensor_view_packed<address_space_enum::global>(
p_output, p_output,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment