modify kBatch value

573e1b64 · ltqin · eed64f7e · 573e1b64 · 573e1b64
Commit 573e1b64 authored Sep 08, 2021 by ltqin
2 changed files
--- a/host/driver_offline/include/device_convolution_backward_weight_implicit_gemm_v4r4r2_xdlops_atomic_nchw_kcyx_nkhw.hpp
+++ b/host/driver_offline/include/device_convolution_backward_weight_implicit_gemm_v4r4r2_xdlops_atomic_nchw_kcyx_nkhw.hpp
@@ -76,7 +76,7 @@ void device_convolution_backward_weight_implicit_gemm_v4r4r2_xdlops_atomic_nchw_

    constexpr index_t GemmCThreadTransferDstScalarPerVector = 1;

-    constexpr index_t KBatch = 96;
+    constexpr index_t KBatch = 64;
 #elif 1
    // [M, N, K0, K1] = [128, 128, 4, 8] for fp16
    constexpr index_t BlockSize = 256;

--- a/host/driver_offline/src/conv_wrw_driver_offline.cpp
+++ b/host/driver_offline/src/conv_wrw_driver_offline.cpp
@@ -253,8 +253,7 @@ int main(int argc, char* argv[])
                          in_left_pads_dev,
                          in_right_pads_dev);
    };
-    
-    
+
    // set zero to wei_device
    wei_device.GenerateTensorValue(GeneratorTensor_0{}, num_thread);
 #if USE_CONV_WRW_V4R4R2_XDL_NCHW
@@ -284,7 +283,6 @@ int main(int argc, char* argv[])
    }
 #endif

-
 #if USE_CONV_WRW_V4R4R4_XDL_NHWC
    if(algo == ConvBackwardWeightAlgo::V4R4R4XDLNHWC)
    {