Commit d2a488dd authored by Chao Liu's avatar Chao Liu
Browse files

hand tuned params

parent c5877261
......@@ -170,7 +170,7 @@ int main()
int num_thread = std::thread::hardware_concurrency();
#if 1
#if 0
in.GenerateTensorValue(GeneratorTensor<float>{}, num_thread);
wei.GenerateTensorValue(GeneratorTensor<float>{}, num_thread);
#endif
......@@ -180,7 +180,7 @@ int main()
device_convolution(in_desc, in, wei_desc, wei, out_desc, out_device);
}
#if 1
#if 0
host_convolution(in, wei, out_host);
float error = 0;
......
......@@ -27,9 +27,9 @@ void device_convolution(
constexpr unsigned OutTileSizeH = 2;
constexpr unsigned OutTileSizeW = 2;
constexpr unsigned NPerBlock = 2;
constexpr unsigned KPerBlock = 8;
constexpr unsigned KPerBlock = 32;
constexpr unsigned CPerBlock = 2;
constexpr unsigned YPerBlock = 4;
constexpr unsigned YPerBlock = 1;
constexpr unsigned XPerBlock = 16;
constexpr unsigned NPerThread = 2;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment