Commit 4d93ce0e authored by Chao Liu's avatar Chao Liu
Browse files

implemented int8x4 datatype

parent 0ca0103c
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
#endif #endif
#include "bfloat16_dev.hpp" #include "bfloat16_dev.hpp"
#if 1 #if 0
#define CK_AMD_GPU_GFX906 1 #define CK_AMD_GPU_GFX906 1
#elif 0 #elif 0
#define CK_AMD_GPU_GFX908 1 #define CK_AMD_GPU_GFX908 1
......
...@@ -56,10 +56,10 @@ void device_dynamic_convolution_forward_implicit_gemm_v4r4_nhwc_kyxc_nhwk( ...@@ -56,10 +56,10 @@ void device_dynamic_convolution_forward_implicit_gemm_v4r4_nhwc_kyxc_nhwk(
#if 0 #if 0
// run-time variables // run-time variables
constexpr auto in_n_hi_wi_c_desc = constexpr auto in_n_hi_wi_c0_desc =
make_dynamic_naive_tensor_descriptor_packed_v2(make_multi_index(N, Hi, Wi, C)); make_dynamic_naive_tensor_descriptor_packed_v2(make_multi_index(N, Hi, Wi, C0));
constexpr auto wei_k_y_x_c_desc = constexpr auto wei_k_y_x_c0_desc =
make_dynamic_naive_tensor_descriptor_packed_v2(make_multi_index(K, Y, X, C)); make_dynamic_naive_tensor_descriptor_packed_v2(make_multi_index(K, Y, X, C0));
constexpr auto out_n_ho_wo_k_desc = constexpr auto out_n_ho_wo_k_desc =
make_dynamic_naive_tensor_descriptor_packed_v2(make_multi_index(N, Ho, Wo, K)); make_dynamic_naive_tensor_descriptor_packed_v2(make_multi_index(N, Ho, Wo, K));
...@@ -109,9 +109,7 @@ void device_dynamic_convolution_forward_implicit_gemm_v4r4_nhwc_kyxc_nhwk( ...@@ -109,9 +109,7 @@ void device_dynamic_convolution_forward_implicit_gemm_v4r4_nhwc_kyxc_nhwk(
DeviceMem wei_k_y_x_c_device_buf(sizeof(TInWei) * wei_k_y_x_c.mDesc.GetElementSpace()); DeviceMem wei_k_y_x_c_device_buf(sizeof(TInWei) * wei_k_y_x_c.mDesc.GetElementSpace());
DeviceMem out_n_ho_wo_k_device_buf(sizeof(TOut) * out_n_ho_wo_k.mDesc.GetElementSpace()); DeviceMem out_n_ho_wo_k_device_buf(sizeof(TOut) * out_n_ho_wo_k.mDesc.GetElementSpace());
#if 1
in_n_hi_wi_c_device_buf.ToDevice(in_n_hi_wi_c.mData.data()); in_n_hi_wi_c_device_buf.ToDevice(in_n_hi_wi_c.mData.data());
#endif
wei_k_y_x_c_device_buf.ToDevice(wei_k_y_x_c.mData.data()); wei_k_y_x_c_device_buf.ToDevice(wei_k_y_x_c.mData.data());
out_n_ho_wo_k_device_buf.ToDevice(out_n_ho_wo_k.mData.data()); out_n_ho_wo_k_device_buf.ToDevice(out_n_ho_wo_k.mData.data());
......
...@@ -750,7 +750,6 @@ int main(int argc, char* argv[]) ...@@ -750,7 +750,6 @@ int main(int argc, char* argv[])
if(do_verification) if(do_verification)
{ {
#if 1
host_direct_convolution(in_nchw, host_direct_convolution(in_nchw,
wei_kcyx, wei_kcyx,
out_nkhw_host, out_nkhw_host,
...@@ -758,11 +757,8 @@ int main(int argc, char* argv[]) ...@@ -758,11 +757,8 @@ int main(int argc, char* argv[])
ConvDilations{}, ConvDilations{},
LeftPads{}, LeftPads{},
RightPads{}); RightPads{});
#endif
#if 1
check_error(out_nkhw_host, out_nkhw_device); check_error(out_nkhw_host, out_nkhw_device);
#endif
if(do_log) if(do_log)
{ {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment