"docs/en/vscode:/vscode.git/clone" did not exist on "77a1cc448682e8069de7bd855d3ba7d42c7fbfef"
Commit 157ce4cc authored by Jing Zhang's avatar Jing Zhang
Browse files

fixed validation

parent 5b1a9994
...@@ -867,9 +867,9 @@ struct GridwiseGemmDlops_km_kn_mn_v3_add ...@@ -867,9 +867,9 @@ struct GridwiseGemmDlops_km_kn_mn_v3_add
static_for<0, WoPerThreadx2, 1>{}([&](auto w_i) { static_for<0, WoPerThreadx2, 1>{}([&](auto w_i) {
d_thread_buf( d_thread_buf(
Number<d_k0_k1_n_h0_h1_h2x2_w0_w1_w2x2_thread_desc.CalculateOffset( Number<d_k0_k1_n_h0_h1_h2x2_w0_w1_w2x2_thread_desc.CalculateOffset(
make_tuple(0, k_i, 0, 0, 0, h_i, 0, 0, w_i))>{}) = 1; make_tuple(0, k_i, 0, 0, 0, h_i, 0, 0, w_i))>{}) +=
// c_thread_buf[Number<c_k1_n_h2_w2_thread_gemm_desc.CalculateOffset( c_thread_buf[Number<c_k1_n_h2_w2_thread_gemm_desc.CalculateOffset(
// make_tuple(k_i, 0, h_i / 2, w_i / 2))>{}]; make_tuple(k_i, 0, h_i / 2, w_i / 2))>{}];
}); });
}); });
}); });
......
...@@ -216,7 +216,7 @@ void device_convolution_add_forward_implicit_gemm_v5r1_dlops_nc0hwc1_kc0yxc1_nk0 ...@@ -216,7 +216,7 @@ void device_convolution_add_forward_implicit_gemm_v5r1_dlops_nc0hwc1_kc0yxc1_nk0
in_n_c0_hi_wi_c1_device_buf.GetDeviceBuffer()), in_n_c0_hi_wi_c1_device_buf.GetDeviceBuffer()),
static_cast<TOut*>(add_n_k0_hox2_wox2_k1_device_buf.GetDeviceBuffer()), static_cast<TOut*>(add_n_k0_hox2_wox2_k1_device_buf.GetDeviceBuffer()),
static_cast<TOut*>(out_n_k0_ho_wo_k1_device_buf.GetDeviceBuffer()), static_cast<TOut*>(out_n_k0_ho_wo_k1_device_buf.GetDeviceBuffer()),
1); 0);
add_n_k0_hox2_wox2_k1_device_buf.FromDevice(add_n_k0_hox2_wox2_k1_out.mData.data()); add_n_k0_hox2_wox2_k1_device_buf.FromDevice(add_n_k0_hox2_wox2_k1_out.mData.data());
out_n_k0_ho_wo_k1_device_buf.FromDevice(out_n_k0_ho_wo_k1.mData.data()); out_n_k0_ho_wo_k1_device_buf.FromDevice(out_n_k0_ho_wo_k1.mData.data());
......
...@@ -95,7 +95,7 @@ int main(int argc, char* argv[]) ...@@ -95,7 +95,7 @@ int main(int argc, char* argv[])
constexpr index_t activ_type = 0; constexpr index_t activ_type = 0;
#if 0 #if 1
constexpr auto N = Number<1>{}; constexpr auto N = Number<1>{};
constexpr auto Hi = Number<1080>{}; constexpr auto Hi = Number<1080>{};
constexpr auto Wi = Number<1920>{}; constexpr auto Wi = Number<1920>{};
...@@ -125,7 +125,7 @@ int main(int argc, char* argv[]) ...@@ -125,7 +125,7 @@ int main(int argc, char* argv[])
constexpr auto C1 = Number<8>{}; constexpr auto C1 = Number<8>{};
constexpr auto K1 = Number<8>{}; constexpr auto K1 = Number<8>{};
constexpr auto K0 = Number<8>{}; constexpr auto K0 = Number<8>{};
#elif 1 #elif 0
constexpr auto N = Number<1>{}; constexpr auto N = Number<1>{};
constexpr auto Hi = Number<135>{}; constexpr auto Hi = Number<135>{};
constexpr auto Wi = Number<240>{}; constexpr auto Wi = Number<240>{};
...@@ -135,6 +135,16 @@ int main(int argc, char* argv[]) ...@@ -135,6 +135,16 @@ int main(int argc, char* argv[])
constexpr auto C1 = Number<8>{}; constexpr auto C1 = Number<8>{};
constexpr auto K1 = Number<8>{}; constexpr auto K1 = Number<8>{};
constexpr auto K0 = Number<8>{}; constexpr auto K0 = Number<8>{};
#elif 0
constexpr auto N = Number<1>{};
constexpr auto Hi = Number<32>{};
constexpr auto Wi = Number<32>{};
constexpr auto Y = Number<3>{};
constexpr auto X = Number<3>{};
constexpr auto C0 = Number<2>{};
constexpr auto C1 = Number<8>{};
constexpr auto K1 = Number<8>{};
constexpr auto K0 = Number<8>{};
#endif #endif
constexpr auto conv_stride_h = I1; constexpr auto conv_stride_h = I1;
...@@ -321,14 +331,12 @@ int main(int argc, char* argv[]) ...@@ -321,14 +331,12 @@ int main(int argc, char* argv[])
if(do_log) if(do_log)
{ {
// LogRangeAsType<float>(std::cout << "in : ", in.mData, ",") << std::endl; LogRangeAsType<float>(std::cout << "in : ", in.mData, ",") << std::endl;
// LogRangeAsType<float>(std::cout << "wei: ", wei.mData, ",") << std::endl; LogRangeAsType<float>(std::cout << "wei: ", wei.mData, ",") << std::endl;
// LogRangeAsType<float>(std::cout << "out_host : ", out_host.mData, ",") << std::endl; LogRangeAsType<float>(std::cout << "out_host : ", out_host.mData, ",") << std::endl;
// LogRangeAsType<float>(std::cout << "out_device: ", out_device.mData, ",") << LogRangeAsType<float>(std::cout << "out_device: ", out_device.mData, ",") << std::endl;
// std::endl;
// LogRangeAsType<float>(std::cout << "add_device: ", add_device.mData, ",") <<
// std::endl;
LogRangeAsType<float>(std::cout << "add_host: ", add_host.mData, ",") << std::endl; LogRangeAsType<float>(std::cout << "add_host: ", add_host.mData, ",") << std::endl;
LogRangeAsType<float>(std::cout << "add_device: ", add_device.mData, ",") << std::endl;
} }
} }
} }
...@@ -205,11 +205,15 @@ void host_direct_convolution_add_nchwc(const Tensor<TIn>& in, ...@@ -205,11 +205,15 @@ void host_direct_convolution_add_nchwc(const Tensor<TIn>& in,
v = activ(v, activ_type); v = activ(v, activ_type);
out_host(n, k0, ho, wo, k1) = v; const int hox2 = ho * 2;
add_host(n, k0, ho, wo, k1) = v + add(n, k0, ho, wo, k1); const int wox2 = wo * 2;
add_host(n, k0, ho, wo + 1, k1) = v + add(n, k0, ho, wo + 1, k1);
add_host(n, k0, ho + 1, wo, k1) = v + add(n, k0, ho + 1, wo, k1); out_host(n, k0, ho, wo, k1) = v;
add_host(n, k0, ho + 1, wo + 1, k1) = v + add(n, k0, ho + 1, wo + 1, k1);
add_host(n, k0, hox2, wox2, k1) = v + add(n, k0, hox2, wox2, k1);
add_host(n, k0, hox2, wox2 + 1, k1) = v + add(n, k0, hox2, wox2 + 1, k1);
add_host(n, k0, hox2 + 1, wox2, k1) = v + add(n, k0, hox2 + 1, wox2, k1);
add_host(n, k0, hox2 + 1, wox2 + 1, k1) = v + add(n, k0, hox2 + 1, wox2 + 1, k1);
}; };
make_ParallelTensorFunctor(f_nchw, make_ParallelTensorFunctor(f_nchw,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment