Commit 6cdfbf38 authored by aska-0096's avatar aska-0096
Browse files

Merge branch 'develop' of https://github.com/ROCmSoftwarePlatform/composable_kernel into wmma_op

parents 463e2aa1 ad541ad6
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "device_conv2d_xdl_int8_instance.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
void add_device_conv2d_perchannel_quantization_int8_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial,
GNHWC,
GKYXC,
GK_Tuple,
GNHWK,
int8_t,
int8_t,
F32_Tuple,
int8_t,
PassThrough,
PassThrough,
Mul2_Clamp>>>& instances)
{
add_device_operation_instances(
instances,
device_conv2d_int8_32Ds_instances<GK_Tuple, F32_Tuple, Mul2_Clamp, ConvFwdDefault>{});
add_device_operation_instances(
instances,
device_conv2d_int8_32Ds_instances<GK_Tuple, F32_Tuple, Mul2_Clamp, ConvFwd1x1P0>{});
add_device_operation_instances(
instances,
device_conv2d_int8_32Ds_instances<GK_Tuple, F32_Tuple, Mul2_Clamp, ConvFwd1x1S1P0>{});
}
void add_device_conv2d_relu_perchannel_quantization_int8_instances(
std::vector<std::unique_ptr<DeviceGroupedConvFwdMultipleD<NDimSpatial,
GNHWC,
GKYXC,
GK_Tuple,
GNHWK,
int8_t,
int8_t,
F32_Tuple,
int8_t,
PassThrough,
PassThrough,
Relu_Mul2_Clamp>>>& instances)
{
add_device_operation_instances(
instances,
device_conv2d_int8_32Ds_instances<GK_Tuple, F32_Tuple, Relu_Mul2_Clamp, ConvFwdDefault>{});
add_device_operation_instances(
instances,
device_conv2d_int8_32Ds_instances<GK_Tuple, F32_Tuple, Relu_Mul2_Clamp, ConvFwd1x1P0>{});
add_device_operation_instances(
instances,
device_conv2d_int8_32Ds_instances<GK_Tuple, F32_Tuple, Relu_Mul2_Clamp, ConvFwd1x1S1P0>{});
}
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck
...@@ -27,6 +27,7 @@ set(PROFILER_SOURCE ...@@ -27,6 +27,7 @@ set(PROFILER_SOURCE
src/profile_layernorm.cpp src/profile_layernorm.cpp
src/profile_softmax.cpp src/profile_softmax.cpp
src/profile_batchnorm_fwd.cpp src/profile_batchnorm_fwd.cpp
src/profile_batchnorm_bwd.cpp
) )
add_executable(ckProfiler ${PROFILER_SOURCE}) add_executable(ckProfiler ${PROFILER_SOURCE})
......
This diff is collapsed.
This diff is collapsed.
...@@ -25,6 +25,7 @@ int profile_layernorm(int, char*[]); ...@@ -25,6 +25,7 @@ int profile_layernorm(int, char*[]);
int profile_groupnorm(int, char*[]); int profile_groupnorm(int, char*[]);
int profile_reduce(int, char*[]); int profile_reduce(int, char*[]);
int profile_batchnorm_forward(int, char*[]); int profile_batchnorm_forward(int, char*[]);
int profile_batchnorm_backward(int, char*[]);
static void print_helper_message() static void print_helper_message()
{ {
...@@ -148,6 +149,10 @@ int main(int argc, char* argv[]) ...@@ -148,6 +149,10 @@ int main(int argc, char* argv[])
{ {
return profile_batchnorm_forward(argc, argv); return profile_batchnorm_forward(argc, argv);
} }
else if(strcmp(argv[1], "bnorm_bwd") == 0)
{
return profile_batchnorm_backward(argc, argv);
}
else else
{ {
print_helper_message(); print_helper_message();
......
...@@ -54,6 +54,7 @@ add_subdirectory(normalization) ...@@ -54,6 +54,7 @@ add_subdirectory(normalization)
add_subdirectory(data_type) add_subdirectory(data_type)
add_subdirectory(elementwise_normalization) add_subdirectory(elementwise_normalization)
add_subdirectory(batchnorm_fwd) add_subdirectory(batchnorm_fwd)
add_subdirectory(batchnorm)
if(GPU_TARGETS MATCHES "gfx1100") if(GPU_TARGETS MATCHES "gfx1100")
add_subdirectory(wmma_op) add_subdirectory(wmma_op)
endif() endif()
add_gtest_executable(test_batchnorm_fwd_rank_4 batchnorm_fwd_rank_4.cpp) add_gtest_executable(test_batchnorm_fwd_rank_4 batchnorm_fwd_rank_4.cpp)
add_gtest_executable(test_batchnorm_bwd_rank_4 batchnorm_bwd_rank_4.cpp)
target_link_libraries(test_batchnorm_fwd_rank_4 PRIVATE utility device_batchnorm_instance) target_link_libraries(test_batchnorm_fwd_rank_4 PRIVATE utility device_batchnorm_instance)
target_link_libraries(test_batchnorm_bwd_rank_4 PRIVATE utility device_batchnorm_instance)
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment